// For audio recognition, we need a double-precision (16 bit) // integral image. We simply turn the sample data into pixels and // have ourselves a verybig*1 image. sclass AudioRecognizer { IAudioSample mainSample; interface IAudioSample { int channels(); double length(); // result is in the range -32768*(end-start) to 32767*(end-start) double sampleSum(int channel, double start, double end); // range is -1 to 1 default double getPixel(int channel, double start, double end) { ret doubleRatio(sampleSum(channel, start, end), (end-start)*32768); } default BufferedImage render() { int w = iceil(length()), h = channels(); ret imageFromFunction(w, h, (x, y) -> { int channel = y; double value = sampleSum(channel, x, x+1); // lose lower 8 bits and shift to 0 to 255 int digital = ifloor(value/256)+128; //if (x < 20) printVars(+value, +digital); ret rgbIntFullAlpha(digital, digital, digital); }); } // going pixel-by-pixel double maxAmplitude() { int n = iceil(length()); double max = 0; for i to n: max = max(max, abs(sampleSum(i, i+1))); ret max; } } // the 16 bit per channel 1D integral image // we use to represent audio samples class AudioSample implements IAudioSample { int channels; int length; long[] data; public int channels() { ret channels; } public double length() { ret length; } // result is in the range -32768*(end-start) to 32767*(end-start) public double sampleSum(int channel, double start, double end) { int a = ifloor(start), b = ifloor(end); ret getEntry(channel, b)-getEntry(channel, a-1); } // get an entry of the sum table long getEntry(int channel, int i) { if (i < 0) ret 0; i = min(i, length-1); ret data[i*channels+channel]; } *(short[] samples, int *channels) { length = l(samples)/channels; data = new long[length*channels]; long[] sums = new[channels]; int iSample = 0; for i to length: for c to channels: { data[iSample] = (sums[c] += samples[iSample]); iSample++; } } *(L samples, int *channels) { length = lengthLevel2_shortArrays(samples); data = new long[length*channels]; long[] sums = new[channels]; int iSample = 0, iChunk = 0, iInArray = 0; short[] chunk = null; for i to length: { if (chunk == null || iInArray >= chunk.length) { chunk = samples.get(iChunk++); iInArray = 0; } for c to channels: data[iSample++] = (sums[c] += chunk[iInArray++]); } } } record noeq Gain(double factor, IAudioSample original) implements IAudioSample { public int channels() { ret original.channels(); } public double length() { ret original.length(); } public double sampleSum(int channel, double start, double end) { ret original.sampleSum(channel, start, end)*factor; } } class SpeedUp implements IAudioSample { double factor, invFactor; IAudioSample original; *(double factor, IAudioSample *original) { invFactor = 1/factor; } public int channels() { ret original.channels(); } public double length() { ret original.length()*invFactor; } public double sampleSum(int channel, double start, double end) { ret original.sampleSum(channel, start*factor, end*factor)*invFactor; } } *() {} *(short[] samples, int channels) { this(ll(samples), channels); } *(L samples, int channels) { mainSample = new AudioSample(samples, channels); } void applyGain(double factor) { mainSample = new Gain(factor, mainSample); } void speedUp(double factor) { mainSample = new SpeedUp(factor, mainSample); } }