IAudioSample - integral audio [1032974]

// It works like this: There is a general interface for accessing an "integrated" audio clip - IAudioSample.
interface IAudioSample {
  int channels(); // 1 for mono, 2 for left+right, 3 for center+left+right... or whatever channel model you prefer
  
  DoubleRange bounds(); // our bounding box in samples according to sampleRate
  double sampleRate(); // in hertz
  
  default double start() { ret bounds().start(); }
  default double end() { ret bounds().end(); }
  default double length() { ret l(bounds()); }
  
  delegate Gain to AudioSampleOps.
  delegate SpeedUp to AudioSampleOps.
  delegate TimeShift to AudioSampleOps.
  
  // Query the integral.
  // Result is in the range -32768*(end-start) to 32767*(end-start)...
  // unless you applied too much gain (there is no clipping).
  // channel is between 0 and channels()-1 from here on out
  default double sampleSum(int channel, double start, double end) {
    ret readSumTable(channel, end-1) - readSumTable(channel, start-1);
  }
  
  default double readSumTable(int channel, double t) {
    int tFloor = ifloor(t);
    double val = readSumTable(channel, tFloor);
    if (tFloor == t)
      ret val;
    double next = readSumTable(channel, tFloor+1);
    ret blend(val, next, t-tFloor);
  }
  
  default double readSumTable(int channel, int position) {
    throw unimplemented(this);
  }
  
  // Here the range is -1 to 1 just to spice things up
  default double getPixel(int channel, double start, double end) {
    ret doubleRatio(sampleSum(channel, start, end), (end-start)*32768);
  }
  
  // RENDERING FUNCTIONS (visualize audio as BufferedImage)
  // [also "acoustic" rendering]
  
  // render audio as black-and-white (grayscale) stripes
  // h = height per channel
  default BufferedImage stripes(int h default 50) {
    int w = iceil(length());
    int channels = channels();
    ret imageFromFunction(w, h*channels, (x, y) -> {
      int channel = y/h;
      double value = sampleSum(channel, x, x+1);
      
      // lose lower 8 bits and shift to 0 to 255
      int digital = ifloor(value/256)+128;
      ret rgbIntFullAlpha(digital, digital, digital);
    });
  }
 
  // render audio as graph
  // h = height per channel
  default BufferedImage graph(int h default 100) {
    int w = iceil(length());
    ret mergeBufferedImagesVertically(
      countIteratorToList(channels(), c ->
        simpleGraph(w, h, x -> sampleSum(c, x, x+1), -32768, 32767)));
  }
  
  // render audio as stripes + graph (best way to look at it)
  default BufferedImage render(int h default 100) {
    ret mergeBufferedImagesVertically(stripes(h/2), graph(h));
  }
  
  // in bounds
  // all channels
  default short[] toShortArray() {
    DoubleRange r = bounds();
    int i = ifloor(r.start()), j = ifloor(r.end());
    int n = max(0, j-i);
    var channels = channels();
    short[] array = new[n*channels];
    int iArray = 0;
    for (; i < j; i++)
      for c to channels:
        array[iArray++] = clampToShort(iround(sampleSum(c, i, i+1)));
    ret array;
  }
  
  // END OF RENDERING FUNCTIONS
 
  // find maximum amplitude, going pixel-by-pixel
  // (remember: This clip may already have been temporally
  // scaled with speedUp(), so a "pixel" may represent the average
  // of multiple audio samples.)
  default double maxAmplitude() {
    int n = iceil(length()), channels = channels();
    double max = 0;
    for i to n:
      for c to channels: 
        max = max(max, abs(sampleSum(c, i, i+1)));
    ret min(32767, max);
  }
  
  // There are various non-destructive virtual transformations
  // which you can do on the audio clip (gain, speed-up and time-shift).
  // All transformations are affine in time and amplitude and thus
  // preserve the "integral image" property.
  
  default IAudioSample gain(double factor) {
    ret factor == 1 ? this : new Gain(factor, this);
  }
  
  // gain to maximum volume possible without clipping
  // (even though clipping isn't even a thing in integral audio wonderland,
  // so we just define "clipping" as exceeding the 32767 value we are used to from real audio.)
  default IAudioSample normalize() {
    ret gain(doubleRatio(32767, maxAmplitude()));
  }
  
  // resample with a factor
  public default IAudioSample speedUp(double factor) {
    ret factor == 1 ? this : new SpeedUp(factor, this);
  }
  
  // resample to a target frequency
  public default IAudioSample sampleAt(double freq) {
    ret speedUp(sampleRate()/freq);
  }
  
  public default IAudioSample timeShift aka shift(double shift) {
    ret shift == 0 ? this : new TimeShift(shift, this);
  }
  
  // For debug-printing. Valued from 0 to 1 this time because why not. First channel only
  default L<Double> firstPixels(int n default 20) {
    double[] pixels = new[n];
    for i to n:
      pixels[i] = sampleSum(0, i, i+1)/32768;
    ret wrapDoubleArrayAsList(pixels);
  }
  
  // also first channel only
  default double[] toDoubleArray() {
    int iStart = ifloor(start()), iEnd = iceil(end());
    double[] d = new[iEnd-iStart];
    for i over d:
      d[i] = sampleSum(0, i, i+1);
    ret d;
  }
    
} // end of IAudioSample

Travelled to 4 computer(s): bhatertpkbcr, ekrmjmnbrukm, mowyntqkapby, mqqgnosmbjvj

Snippet ID:	#1032974
Snippet name:	IAudioSample - integral audio
Eternal ID of this version:	#1032974/20
Text MD5:	e5b11daaa09dbd135f3ae19ebd665b8a
Transpilation MD5:	da66213d55c214e841b324a690dd7cd8
Author:	stefan
Category:	javax / audio analysis
Type:	JavaX fragment (include)
Public (visible to everyone):	Yes
Archived (hidden from active list):	No
Created/modified:	2021-10-18 05:34:16
Source code size:	5266 bytes / 151 lines
Pitched / IR pitched:	No / No
Views / Downloads:	251 / 419
Version history:	19 change(s)
Referenced in:	[show references]

< > BotCompany Repo | #1032974 // IAudioSample - integral audio

JavaX fragment (include) [tags: use-pretranspiled]

1	// It works like this: There is a general interface for accessing an "integrated" audio clip - IAudioSample.
2	interface IAudioSample {
3	int channels(); // 1 for mono, 2 for left+right, 3 for center+left+right... or whatever channel model you prefer
4
5	DoubleRange bounds(); // our bounding box in samples according to sampleRate
6	double sampleRate(); // in hertz
7
8	default double start() { ret bounds().start(); }
9	default double end() { ret bounds().end(); }
10	default double length() { ret l(bounds()); }
11
12	delegate Gain to AudioSampleOps.
13	delegate SpeedUp to AudioSampleOps.
14	delegate TimeShift to AudioSampleOps.
15
16	// Query the integral.
17	// Result is in the range -32768(end-start) to 32767(end-start)...
18	// unless you applied too much gain (there is no clipping).
19	// channel is between 0 and channels()-1 from here on out
20	default double sampleSum(int channel, double start, double end) {
21	ret readSumTable(channel, end-1) - readSumTable(channel, start-1);
22	}
23
24	default double readSumTable(int channel, double t) {
25	int tFloor = ifloor(t);
26	double val = readSumTable(channel, tFloor);
27	if (tFloor == t)
28	ret val;
29	double next = readSumTable(channel, tFloor+1);
30	ret blend(val, next, t-tFloor);
31	}
32
33	default double readSumTable(int channel, int position) {
34	throw unimplemented(this);
35	}
36
37	// Here the range is -1 to 1 just to spice things up
38	default double getPixel(int channel, double start, double end) {
39	ret doubleRatio(sampleSum(channel, start, end), (end-start)*32768);
40	}
41
42	// RENDERING FUNCTIONS (visualize audio as BufferedImage)
43	// [also "acoustic" rendering]
44
45	// render audio as black-and-white (grayscale) stripes
46	// h = height per channel
47	default BufferedImage stripes(int h default 50) {
48	int w = iceil(length());
49	int channels = channels();
50	ret imageFromFunction(w, h*channels, (x, y) -> {
51	int channel = y/h;
52	double value = sampleSum(channel, x, x+1);
53
54	// lose lower 8 bits and shift to 0 to 255
55	int digital = ifloor(value/256)+128;
56	ret rgbIntFullAlpha(digital, digital, digital);
57	});
58	}
59
60	// render audio as graph
61	// h = height per channel
62	default BufferedImage graph(int h default 100) {
63	int w = iceil(length());
64	ret mergeBufferedImagesVertically(
65	countIteratorToList(channels(), c ->
66	simpleGraph(w, h, x -> sampleSum(c, x, x+1), -32768, 32767)));
67	}
68
69	// render audio as stripes + graph (best way to look at it)
70	default BufferedImage render(int h default 100) {
71	ret mergeBufferedImagesVertically(stripes(h/2), graph(h));
72	}
73
74	// in bounds
75	// all channels
76	default short[] toShortArray() {
77	DoubleRange r = bounds();
78	int i = ifloor(r.start()), j = ifloor(r.end());
79	int n = max(0, j-i);
80	var channels = channels();
81	short[] array = new[n*channels];
82	int iArray = 0;
83	for (; i < j; i++)
84	for c to channels:
85	array[iArray++] = clampToShort(iround(sampleSum(c, i, i+1)));
86	ret array;
87	}
88
89	// END OF RENDERING FUNCTIONS
90
91	// find maximum amplitude, going pixel-by-pixel
92	// (remember: This clip may already have been temporally
93	// scaled with speedUp(), so a "pixel" may represent the average
94	// of multiple audio samples.)
95	default double maxAmplitude() {
96	int n = iceil(length()), channels = channels();
97	double max = 0;
98	for i to n:
99	for c to channels:
100	max = max(max, abs(sampleSum(c, i, i+1)));
101	ret min(32767, max);
102	}
103
104	// There are various non-destructive virtual transformations
105	// which you can do on the audio clip (gain, speed-up and time-shift).
106	// All transformations are affine in time and amplitude and thus
107	// preserve the "integral image" property.
108
109	default IAudioSample gain(double factor) {
110	ret factor == 1 ? this : new Gain(factor, this);
111	}
112
113	// gain to maximum volume possible without clipping
114	// (even though clipping isn't even a thing in integral audio wonderland,
115	// so we just define "clipping" as exceeding the 32767 value we are used to from real audio.)
116	default IAudioSample normalize() {
117	ret gain(doubleRatio(32767, maxAmplitude()));
118	}
119
120	// resample with a factor
121	public default IAudioSample speedUp(double factor) {
122	ret factor == 1 ? this : new SpeedUp(factor, this);
123	}
124
125	// resample to a target frequency
126	public default IAudioSample sampleAt(double freq) {
127	ret speedUp(sampleRate()/freq);
128	}
129
130	public default IAudioSample timeShift aka shift(double shift) {
131	ret shift == 0 ? this : new TimeShift(shift, this);
132	}
133
134	// For debug-printing. Valued from 0 to 1 this time because why not. First channel only
135	default L<Double> firstPixels(int n default 20) {
136	double[] pixels = new[n];
137	for i to n:
138	pixels[i] = sampleSum(0, i, i+1)/32768;
139	ret wrapDoubleArrayAsList(pixels);
140	}
141
142	// also first channel only
143	default double[] toDoubleArray() {
144	int iStart = ifloor(start()), iEnd = iceil(end());
145	double[] d = new[iEnd-iStart];
146	for i over d:
147	d[i] = sampleSum(0, i, i+1);
148	ret d;
149	}
150
151	} // end of IAudioSample