// For audio recognition, we need a double-precision (16 bit) // integral image. We simply turn the sample data into pixels and // have ourselves a verybig*1 image. sclass AudioRecognizer { IAudioSample mainSample; interface IAudioSample { int channels(); double length(); double sampleSum(int channel, double start, double end); public default double getPixel(int channel, double start, double end) { ret doubleRatio(sampleSum(channel, start, end), end-start); } } // the 16 bit per channel 1D integral image // we use to represent audio samples class AudioSample implements IAudioSample { int channels; int length; long[] data; public int channels() { ret channels; } public double length() { ret length; } public double sampleSum(int channel, double start, double end) { int a = ifloor(start), b = ifloor(end); ret getEntry(channel, b)-getEntry(channel, a-1); } // get an entry of the sum table long getEntry(int channel, int i) { if (i < 0) ret 0; i = min(i, length-1); ret data[i*channels+channel]; } *(short[] samples, int *channels) { length = l(samples)/channels; data = new long[length*channels]; long[] sums = new[channels]; int iSample = 0; for i to length: for c to channels: { data[iSample] = (sums[c] += samples[iSample]); iSample++; } } *(L samples, int *channels) { length = lengthLevel2_shortArrays(samples); data = new long[length*channels]; long[] sums = new[channels]; int iSample = 0, iChunk = 0, iInArray = 0; short[] chunk = null; for i to length: { if (chunk == null || iInArray >= chunk.length) { chunk = samples.get(iChunk++); iInArray = 0; } for c to channels: data[iSample++] = (sums[c] += chunk[iInArray++]); } } } *() {} *(L samples, int channels) { mainSample = new AudioSample(samples, channels); } }