// For audio recognition, we need a double-precision (16 bit) // integral image. We simply turn the sample data into pixels and // have ourselves a verybig*1 image. sclass AudioRecognizer { IAudioSample mainSample; double defaultInputSampleRate() { ret 44100; } interface IAudioSample { int channels(); double length(); double sampleRate(); // result is in the range -32768*(end-start) to 32767*(end-start) double sampleSum(int channel, double start, double end); // range is -1 to 1 default double getPixel(int channel, double start, double end) { ret doubleRatio(sampleSum(channel, start, end), (end-start)*32768); } // h = height per channel default BufferedImage stripes(int h default 50) { int w = iceil(length()); int channels = channels(); ret imageFromFunction(w, h*channels, (x, y) -> { int channel = y/h; double value = sampleSum(channel, x, x+1); // lose lower 8 bits and shift to 0 to 255 int digital = ifloor(value/256)+128; //if (x < 20) printVars(+value, +digital); ret rgbIntFullAlpha(digital, digital, digital); }); } // h = height per channel default BufferedImage graph(int h default 100) { int w = iceil(length()); ret mergeBufferedImagesVertically( countIteratorToList(channels(), c -> simpleGraph(w, h, x -> sampleSum(c, x, x+1), -32768, 32767))); } default BufferedImage render(int h default 100) { ret mergeBufferedImagesVertically(stripes(h/2), graph(h)); } // going pixel-by-pixel default double maxAmplitude() { int n = iceil(length()), channels = channels(); double max = 0; for i to n: for c to channels: max = max(max, abs(sampleSum(c, i, i+1))); ret min(32767, max); } default IAudioSample gain(double factor) { ret factor == 1 ? this : new Gain(factor, this); } default IAudioSample normalize() { ret gain(doubleRatio(32767, maxAmplitude())); } public default IAudioSample speedUp(double factor) { ret factor == 1 ? this : new SpeedUp(factor, this); } public default IAudioSample sampleAt(double freq) { ret speedUp(sampleRate()/freq); } public default IAudioSample timeShift aka shift(double shift) { ret shift == 0 ? this : new TimeShift(shift, this); } // valued from 0 to 1 because why not // first channel only default L firstPixels(int n default 20) { double[] pixels = new[n]; for i to n: pixels[i] = sampleSum(0, i, i+1)/32768; ret wrapDoubleArrayAsList(pixels); } } // end of IAudioSample // the 16 bit per channel 1D integral image // we use to represent audio samples sclass AudioSample implements IAudioSample { int channels; int length; long[] data; double sampleRate; public double sampleRate() { ret sampleRate; } public int channels() { ret channels; } public double length() { ret length; } // result is in the range -32768*(end-start) to 32767*(end-start) public double sampleSum(int channel, double start, double end) { int a = ifloor(start), b = ifloor(end); ret getEntry(channel, b-1)-getEntry(channel, a-1); } // get an entry of the sum table long getEntry(int channel, int i) { if (i < 0) ret 0; i = min(i, length-1); ret data[i*channels+channel]; } *(L samples, int *channels, double *sampleRate) { length = lengthLevel2_shortArrays(samples); data = new long[length*channels]; long[] sums = new[channels]; int iSample = 0, iChunk = 0, iInArray = 0; short[] chunk = null; for i to length: { if (chunk == null || iInArray >= chunk.length) { chunk = samples.get(iChunk++); iInArray = 0; } for c to channels: data[iSample++] = (sums[c] += chunk[iInArray++]); } } } srecord noeq Gain(double factor, IAudioSample original) implements IAudioSample { public double sampleRate() { ret original.sampleRate(); } public int channels() { ret original.channels(); } public double length() { ret original.length(); } public double sampleSum(int channel, double start, double end) { ret original.sampleSum(channel, start, end)*factor; } // optimize double gain public IAudioSample gain(double factor) { ret original.gain(this.factor*factor); } } // moves the input samples to the left (cuts off beginning) // samples can be fractional - we're in integral image (audio) wonderland after all srecord noeq TimeShift(double shift, IAudioSample original) implements IAudioSample { public double sampleRate() { ret original.sampleRate(); } public int channels() { ret original.channels(); } public double length() { ret original.length()-shift; } public double sampleSum(int channel, double start, double end) { ret original.sampleSum(channel, start+shift, end+shift); } // optimize double shift public IAudioSample timeShift(double shift) { ret original.timeShift(this.shift+shift); } } sclass SpeedUp implements IAudioSample { double factor, invFactor; IAudioSample original; *(double *factor, IAudioSample *original) { if (factor < 1) fail("Can't slow down. " + factor); invFactor = 1/factor; } public double sampleRate() { ret original.sampleRate()*invFactor; } public int channels() { ret original.channels(); } public double length() { ret original.length()*invFactor; } public double sampleSum(int channel, double start, double end) { ret original.sampleSum(channel, start*factor, end*factor)*invFactor; } // optimize double speed-up public IAudioSample speedUp(double factor) { ret original.speedUp(this.factor*factor); } } *() {} *(short[] samples, int channels) { this(ll(samples), channels); } *(L samples, int channels) { mainSample = new AudioSample(samples, channels, defaultInputSampleRate()); } *(double seconds, VF1 soundSource, int channels) { this(soundSourceToShortArrays(seconds, soundSource, channels), channels); } // modifiers for mainSample void applyGain(double factor) { mainSample = mainSample.gain(factor); } void normalize { mainSample = mainSample.normalize(); } void speedUp(double factor) { mainSample = mainSample.speedUp(factor); } // analysis functions // now divided by the duration double sumOfVibrations(IAudioSample sample, int channel, double start, double freq, int periods) { double sum = 0, period = sample.sampleRate()/freq; double t = start; for p to periods: { sum += sample.sampleSum(channel, t, t+period/2) - sample.sampleSum(channel, t+period/2, t+period); t += period; } ret doubleRatio(sum, t-start); } // also divided by duration Complex complexSumOfVibrations(IAudioSample sample, int channel, double start, double freq, int periods) { double period = sample.sampleRate()/freq; ret Complex(sumOfVibrations(sample, channel, start, freq, periods), sumOfVibrations(sample, channel, start+period/4, freq, periods)); } }