// It works like this: There is a general interface for accessing an "integrated" audio clip - IAudioSample. interface IAudioSample { int channels(); // 1 for mono, 2 for left+right, 3 for center+left+right... or whatever channel model you prefer DoubleRange bounds(); // our bounding box in samples according to sampleRate double sampleRate(); // in hertz default double start() { ret bounds().start(); } default double end() { ret bounds().end(); } default double length() { ret l(bounds()); } delegate Gain to AudioSampleOps. delegate SpeedUp to AudioSampleOps. delegate TimeShift to AudioSampleOps. // Query the integral. // Result is in the range -32768*(end-start) to 32767*(end-start)... // unless you applied too much gain (there is no clipping). // channel is between 0 and channels()-1 from here on out default double sampleSum(int channel, double start, double end) { ret readSumTable(channel, end-1) - readSumTable(channel, start-1); } default double readSumTable(int channel, double t) { int tFloor = ifloor(t); double val = readSumTable(channel, tFloor); if (tFloor == t) ret val; double next = readSumTable(channel, tFloor+1); ret blend(val, next, t-tFloor); } default double readSumTable(int channel, int position) { throw unimplemented(this); } // Here the range is -1 to 1 just to spice things up default double getPixel(int channel, double start, double end) { ret doubleRatio(sampleSum(channel, start, end), (end-start)*32768); } // RENDERING FUNCTIONS (visualize audio as BufferedImage) // [also "acoustic" rendering] // render audio as black-and-white (grayscale) stripes // h = height per channel default BufferedImage stripes(int h default 50) { int w = iceil(length()); int channels = channels(); ret imageFromFunction(w, h*channels, (x, y) -> { int channel = y/h; double value = sampleSum(channel, x, x+1); // lose lower 8 bits and shift to 0 to 255 int digital = ifloor(value/256)+128; ret rgbIntFullAlpha(digital, digital, digital); }); } // render audio as graph // h = height per channel default BufferedImage graph(int h default 100) { int w = iceil(length()); ret mergeBufferedImagesVertically( countIteratorToList(channels(), c -> simpleGraph(w, h, x -> sampleSum(c, x, x+1), -32768, 32767))); } // render audio as stripes + graph (best way to look at it) default BufferedImage render(int h default 100) { ret mergeBufferedImagesVertically(stripes(h/2), graph(h)); } // in bounds // all channels default short[] toShortArray() { DoubleRange r = bounds(); int i = ifloor(r.start()), j = ifloor(r.end()); int n = max(0, j-i); var channels = channels(); short[] array = new[n*channels]; int iArray = 0; for (; i < j; i++) for c to channels: array[iArray++] = clampToShort(iround(sampleSum(c, i, i+1))); ret array; } // END OF RENDERING FUNCTIONS // find maximum amplitude, going pixel-by-pixel // (remember: This clip may already have been temporally // scaled with speedUp(), so a "pixel" may represent the average // of multiple audio samples.) default double maxAmplitude() { int n = iceil(length()), channels = channels(); double max = 0; for i to n: for c to channels: max = max(max, abs(sampleSum(c, i, i+1))); ret min(32767, max); } // There are various non-destructive virtual transformations // which you can do on the audio clip (gain, speed-up and time-shift). // All transformations are affine in time and amplitude and thus // preserve the "integral image" property. default IAudioSample gain(double factor) { ret factor == 1 ? this : new Gain(factor, this); } // gain to maximum volume possible without clipping // (even though clipping isn't even a thing in integral audio wonderland, // so we just define "clipping" as exceeding the 32767 value we are used to from real audio.) default IAudioSample normalize() { ret gain(doubleRatio(32767, maxAmplitude())); } // resample with a factor public default IAudioSample speedUp(double factor) { ret factor == 1 ? this : new SpeedUp(factor, this); } // resample to a target frequency public default IAudioSample sampleAt(double freq) { ret speedUp(sampleRate()/freq); } public default IAudioSample timeShift aka shift(double shift) { ret shift == 0 ? this : new TimeShift(shift, this); } // For debug-printing. Valued from 0 to 1 this time because why not. First channel only default L firstPixels(int n default 20) { double[] pixels = new[n]; for i to n: pixels[i] = sampleSum(0, i, i+1)/32768; ret wrapDoubleArrayAsList(pixels); } // also first channel only default double[] toDoubleArray() { int iStart = ifloor(start()), iEnd = iceil(end()); double[] d = new[iEnd-iStart]; for i over d: d[i] = sampleSum(0, i, i+1); ret d; } } // end of IAudioSample