Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

72
LINES

< > BotCompany Repo | #1033575 // Welford [running average & standard deviation]

JavaX fragment (include) [tags: use-pretranspiled]

Libraryless. Click here for Pure Java version (10210L/57K).

// from https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm
srecord Welford(long count, dbl mean, dbl m2) {
  // For a new value newValue, compute the new count, new mean, the new M2.
  // mean accumulates the mean of the entire dataset
  // M2 aggregates the squared distance from the mean
  // count aggregates the number of samples seen so far
  void add(double newValue) {
    count++;
    dbl delta = newValue - mean;
    mean += delta / count;
    dbl delta2 = newValue - mean;
    m2 += delta * delta2;
  }
  
  void remove(double valueToRemove) {
    guarantee(count > 0);
    
    /* 
      Let's reverse the formulas from add():
      
        delta = newValue - mean1
        mean2 = mean1 + delta / count2
        
        => mean2 = mean1+(newValue-mean1)/count2
                 = mean1 + newValue/count2 - mean1/count2
        => mean2 = mean1*(1-1/count2) + newValue/count2
                 
      Solve for mean1. (| -newValue/count2)
        
        => mean2-newValue/count2 = mean1*(1-1/count2)  | :(1-1/count2)
        
        => (mean2-newValue/count2)/(1-1/count2) = mean1.
        
      "Count" is easy to reverse: count1 = count2-1, obviously.
        
      Now, about restoring m2.
        
        delta2 = newValue - mean2
        m2_2 = m2_1 + delta * delta2

        => m2_2 - delta * delta2 = m2_1
        
      Well, that was easy!

    */
    
    double newValue = valueToRemove;
    double delta2 = newValue-mean;
    mean = (mean-newValue/count)/(1-1.0/count);
    double delta = newValue-mean;
    m2 -= delta*delta2;
    --count;
  }
 
  bool isEmpty() { ret count == 0; }

  dbl average aka avg aka mean() { ret count == 0 ? Double.NaN : mean; }
  dbl variance() { ret count == 0 ? Double.NaN : m2/count; }
  dbl sampleVariance() { ret count < 2 ? Double.NaN : m2/(count-1); }
  dbl standardDeviation() { ret sqrt(variance()); }
  
  toString {
    S s = nValues(count);
    if (!isEmpty())
      s += ", avg: " + avg() + ", std dev: " + standardDeviation();
    ret s;
  }
  
  AverageAndStandardDeviation get() {
    ret new AverageAndStandardDeviation(avg(), standardDeviation());
  }
}

download  show line numbers  debug dex  old transpilations   

Travelled to 3 computer(s): bhatertpkbcr, mowyntqkapby, mqqgnosmbjvj

No comments. add comment

Snippet ID: #1033575
Snippet name: Welford [running average & standard deviation]
Eternal ID of this version: #1033575/9
Text MD5: a025a00f9c6bf3d5a5b07e090a5abe7b
Transpilation MD5: ab321ab3958147f54e6dc44c3445a673
Author: stefan
Category: javax / maths
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2022-12-12 02:22:42
Source code size: 2244 bytes / 72 lines
Pitched / IR pitched: No / No
Views / Downloads: 267 / 424
Version history: 8 change(s)
Referenced in: #1003674 - Standard Classes + Interfaces (LIVE continued in #1034167)
#1036458 - HalfWelford [running average - I think this is nonsense. Average does the same]