// from https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm srecord Welford(long count, dbl mean, dbl m2) { // For a new value newValue, compute the new count, new mean, the new M2. // mean accumulates the mean of the entire dataset // M2 aggregates the squared distance from the mean // count aggregates the number of samples seen so far void add(double newValue) { count++; dbl delta = newValue - mean; mean += delta / count; dbl delta2 = newValue - mean; m2 += delta * delta2; } void remove(double valueToRemove) { guarantee(count > 0); /* Let's reverse the formulas from add(): delta = newValue - mean1 mean2 = mean1 + delta / count2 => mean2 = mean1+(newValue-mean1)/count2 = mean1 + newValue/count2 - mean1/count2 => mean2 = mean1*(1-1/count2) + newValue/count2 Solve for mean1. (| -newValue/count2) => mean2-newValue/count2 = mean1*(1-1/count2) | :(1-1/count2) => (mean2-newValue/count2)/(1-1/count2) = mean1. "Count" is easy to reverse: count1 = count2-1, obviously. Now, about restoring m2. delta2 = newValue - mean2 m2_2 = m2_1 + delta * delta2 => m2_2 - delta * delta2 = m2_1 Well, that was easy! */ double newValue = valueToRemove; double delta2 = newValue-mean; mean = (mean-newValue/count)/(1-1.0/count); double delta = newValue-mean; m2 -= delta*delta2; --count; } bool isEmpty() { ret count == 0; } dbl average aka avg aka mean() { ret count == 0 ? Double.NaN : mean; } dbl variance() { ret count == 0 ? Double.NaN : m2/count; } dbl sampleVariance() { ret count < 2 ? Double.NaN : m2/(count-1); } dbl standardDeviation() { ret sqrt(variance()); } toString { S s = nValues(count); if (!isEmpty()) s += ", avg: " + avg() + ", std dev: " + standardDeviation(); ret s; } AverageAndStandardDeviation get() { ret new AverageAndStandardDeviation(avg(), standardDeviation()); } }