IAudioSample - integral audio [1032974]

// It works like this: There is a general interface for accessing an "integrated" audio clip - IAudioSample.
interface IAudioSample {
  int channels(); // 1 for mono, 2 for left+right, 3 for center+left+right... or whatever channel model you prefer
  
  DoubleRange bounds(); // our bounding box in samples according to sampleRate
  double sampleRate(); // in hertz
  
  default double start() { ret bounds().start(); }
  default double end() { ret bounds().end(); }
  default double length() { ret l(bounds()); }
  
  delegate Gain to AudioSampleOps.
  delegate SpeedUp to AudioSampleOps.
  delegate TimeShift to AudioSampleOps.
  
  // Query the integral.
  // Result is in the range -32768*(end-start) to 32767*(end-start)...
  // unless you applied too much gain (there is no clipping).
  // channel is between 0 and channels()-1 from here on out
  default double sampleSum(int channel, double start, double end) {
    ret readSumTable(channel, end-1) - readSumTable(channel, start-1);
  }
  
  default double readSumTable(int channel, double t) {
    int tFloor = ifloor(t);
    double val = readSumTable(channel, tFloor);
    if (tFloor == t)
      ret val;
    double next = readSumTable(channel, tFloor+1);
    ret blend(val, next, t-tFloor);
  }
  
  default double readSumTable(int channel, int position) {
    throw unimplemented(this);
  }
  
  // Here the range is -1 to 1 just to spice things up
  default double getPixel(int channel, double start, double end) {
    ret doubleRatio(sampleSum(channel, start, end), (end-start)*32768);
  }
  
  // RENDERING FUNCTIONS (visualize audio as BufferedImage)
  // [also "acoustic" rendering]
  
  // render audio as black-and-white (grayscale) stripes
  // h = height per channel
  default BufferedImage stripes(int h default 50) {
    int w = iceil(length());
    int channels = channels();
    ret imageFromFunction(w, h*channels, (x, y) -> {
      int channel = y/h;
      double value = sampleSum(channel, x, x+1);
      
      // lose lower 8 bits and shift to 0 to 255
      int digital = ifloor(value/256)+128;
      ret rgbIntFullAlpha(digital, digital, digital);
    });
  }
 
  // render audio as graph
  // h = height per channel
  default BufferedImage graph(int h default 100) {
    int w = iceil(length());
    ret mergeBufferedImagesVertically(
      countIteratorToList(channels(), c ->
        simpleGraph(w, h, x -> sampleSum(c, x, x+1), -32768, 32767)));
  }
  
  // render audio as stripes + graph (best way to look at it)
  default BufferedImage render(int h default 100) {
    ret mergeBufferedImagesVertically(stripes(h/2), graph(h));
  }
  
  // in bounds
  // all channels
  default short[] toShortArray() {
    DoubleRange r = bounds();
    int i = ifloor(r.start()), j = ifloor(r.end());
    int n = max(0, j-i);
    var channels = channels();
    short[] array = new[n*channels];
    int iArray = 0;
    for (; i < j; i++)
      for c to channels:
        array[iArray++] = clampToShort(iround(sampleSum(c, i, i+1)));
    ret array;
  }
  
  // END OF RENDERING FUNCTIONS
 
  // find maximum amplitude, going pixel-by-pixel
  // (remember: This clip may already have been temporally
  // scaled with speedUp(), so a "pixel" may represent the average
  // of multiple audio samples.)
  default double maxAmplitude() {
    int n = iceil(length()), channels = channels();
    double max = 0;
    for i to n:
      for c to channels: 
        max = max(max, abs(sampleSum(c, i, i+1)));
    ret min(32767, max);
  }
  
  // There are various non-destructive virtual transformations
  // which you can do on the audio clip (gain, speed-up and time-shift).
  // All transformations are affine in time and amplitude and thus
  // preserve the "integral image" property.
  
  default IAudioSample gain(double factor) {
    ret factor == 1 ? this : new Gain(factor, this);
  }
  
  // gain to maximum volume possible without clipping
  // (even though clipping isn't even a thing in integral audio wonderland,
  // so we just define "clipping" as exceeding the 32767 value we are used to from real audio.)
  default IAudioSample normalize() {
    ret gain(doubleRatio(32767, maxAmplitude()));
  }
  
  // resample with a factor
  public default IAudioSample speedUp(double factor) {
    ret factor == 1 ? this : new SpeedUp(factor, this);
  }
  
  // resample to a target frequency
  public default IAudioSample sampleAt(double freq) {
    ret speedUp(sampleRate()/freq);
  }
  
  public default IAudioSample timeShift aka shift(double shift) {
    ret shift == 0 ? this : new TimeShift(shift, this);
  }
  
  // For debug-printing. Valued from 0 to 1 this time because why not. First channel only
  default L<Double> firstPixels(int n default 20) {
    double[] pixels = new[n];
    for i to n:
      pixels[i] = sampleSum(0, i, i+1)/32768;
    ret wrapDoubleArrayAsList(pixels);
  }
  
  // also first channel only
  default double[] toDoubleArray() {
    int iStart = ifloor(start()), iEnd = iceil(end());
    double[] d = new[iEnd-iStart];
    for i over d:
      d[i] = sampleSum(0, i, i+1);
    ret d;
  }
    
} // end of IAudioSample

Travelled to 4 computer(s): bhatertpkbcr, ekrmjmnbrukm, mowyntqkapby, mqqgnosmbjvj

Snippet ID:	#1032974
Snippet name:	IAudioSample - integral audio
Eternal ID of this version:	#1032974/20
Text MD5:	e5b11daaa09dbd135f3ae19ebd665b8a
Transpilation MD5:	da66213d55c214e841b324a690dd7cd8
Author:	stefan
Category:	javax / audio analysis
Type:	JavaX fragment (include)
Public (visible to everyone):	Yes
Archived (hidden from active list):	No
Created/modified:	2021-10-18 05:34:16
Source code size:	5266 bytes / 151 lines
Pitched / IR pitched:	No / No
Views / Downloads:	250 / 417
Version history:	19 change(s)
Referenced in:	[show references]

< > BotCompany Repo | #1032974 // IAudioSample - integral audio

JavaX fragment (include) [tags: use-pretranspiled]