Libraryless. Click here for Pure Java version (6227L/36K).
1 | // It works like this: There is a general interface for accessing an "integrated" audio clip - IAudioSample. |
2 | interface IAudioSample { |
3 | int channels(); // 1 for mono, 2 for left+right, 3 for center+left+right... or whatever channel model you prefer |
4 | |
5 | DoubleRange bounds(); // our bounding box in samples according to sampleRate |
6 | double sampleRate(); // in hertz |
7 | |
8 | default double start() { ret bounds().start(); } |
9 | default double end() { ret bounds().end(); } |
10 | default double length() { ret l(bounds()); } |
11 | |
12 | delegate Gain to AudioSampleOps. |
13 | delegate SpeedUp to AudioSampleOps. |
14 | delegate TimeShift to AudioSampleOps. |
15 | |
16 | // Query the integral. |
17 | // Result is in the range -32768*(end-start) to 32767*(end-start)... |
18 | // unless you applied too much gain (there is no clipping). |
19 | // channel is between 0 and channels()-1 from here on out |
20 | default double sampleSum(int channel, double start, double end) { |
21 | ret readSumTable(channel, end-1) - readSumTable(channel, start-1); |
22 | } |
23 | |
24 | default double readSumTable(int channel, double t) { |
25 | int tFloor = ifloor(t); |
26 | double val = readSumTable(channel, tFloor); |
27 | if (tFloor == t) |
28 | ret val; |
29 | double next = readSumTable(channel, tFloor+1); |
30 | ret blend(val, next, t-tFloor); |
31 | } |
32 | |
33 | default double readSumTable(int channel, int position) { |
34 | throw unimplemented(this); |
35 | } |
36 | |
37 | // Here the range is -1 to 1 just to spice things up |
38 | default double getPixel(int channel, double start, double end) { |
39 | ret doubleRatio(sampleSum(channel, start, end), (end-start)*32768); |
40 | } |
41 | |
42 | // RENDERING FUNCTIONS (visualize audio as BufferedImage) |
43 | // [also "acoustic" rendering] |
44 | |
45 | // render audio as black-and-white (grayscale) stripes |
46 | // h = height per channel |
47 | default BufferedImage stripes(int h default 50) { |
48 | int w = iceil(length()); |
49 | int channels = channels(); |
50 | ret imageFromFunction(w, h*channels, (x, y) -> { |
51 | int channel = y/h; |
52 | double value = sampleSum(channel, x, x+1); |
53 | |
54 | // lose lower 8 bits and shift to 0 to 255 |
55 | int digital = ifloor(value/256)+128; |
56 | ret rgbIntFullAlpha(digital, digital, digital); |
57 | }); |
58 | } |
59 | |
60 | // render audio as graph |
61 | // h = height per channel |
62 | default BufferedImage graph(int h default 100) { |
63 | int w = iceil(length()); |
64 | ret mergeBufferedImagesVertically( |
65 | countIteratorToList(channels(), c -> |
66 | simpleGraph(w, h, x -> sampleSum(c, x, x+1), -32768, 32767))); |
67 | } |
68 | |
69 | // render audio as stripes + graph (best way to look at it) |
70 | default BufferedImage render(int h default 100) { |
71 | ret mergeBufferedImagesVertically(stripes(h/2), graph(h)); |
72 | } |
73 | |
74 | // in bounds |
75 | // all channels |
76 | default short[] toShortArray() { |
77 | DoubleRange r = bounds(); |
78 | int i = ifloor(r.start()), j = ifloor(r.end()); |
79 | int n = max(0, j-i); |
80 | var channels = channels(); |
81 | short[] array = new[n*channels]; |
82 | int iArray = 0; |
83 | for (; i < j; i++) |
84 | for c to channels: |
85 | array[iArray++] = clampToShort(iround(sampleSum(c, i, i+1))); |
86 | ret array; |
87 | } |
88 | |
89 | // END OF RENDERING FUNCTIONS |
90 | |
91 | // find maximum amplitude, going pixel-by-pixel |
92 | // (remember: This clip may already have been temporally |
93 | // scaled with speedUp(), so a "pixel" may represent the average |
94 | // of multiple audio samples.) |
95 | default double maxAmplitude() { |
96 | int n = iceil(length()), channels = channels(); |
97 | double max = 0; |
98 | for i to n: |
99 | for c to channels: |
100 | max = max(max, abs(sampleSum(c, i, i+1))); |
101 | ret min(32767, max); |
102 | } |
103 | |
104 | // There are various non-destructive virtual transformations |
105 | // which you can do on the audio clip (gain, speed-up and time-shift). |
106 | // All transformations are affine in time and amplitude and thus |
107 | // preserve the "integral image" property. |
108 | |
109 | default IAudioSample gain(double factor) { |
110 | ret factor == 1 ? this : new Gain(factor, this); |
111 | } |
112 | |
113 | // gain to maximum volume possible without clipping |
114 | // (even though clipping isn't even a thing in integral audio wonderland, |
115 | // so we just define "clipping" as exceeding the 32767 value we are used to from real audio.) |
116 | default IAudioSample normalize() { |
117 | ret gain(doubleRatio(32767, maxAmplitude())); |
118 | } |
119 | |
120 | // resample with a factor |
121 | public default IAudioSample speedUp(double factor) { |
122 | ret factor == 1 ? this : new SpeedUp(factor, this); |
123 | } |
124 | |
125 | // resample to a target frequency |
126 | public default IAudioSample sampleAt(double freq) { |
127 | ret speedUp(sampleRate()/freq); |
128 | } |
129 | |
130 | public default IAudioSample timeShift aka shift(double shift) { |
131 | ret shift == 0 ? this : new TimeShift(shift, this); |
132 | } |
133 | |
134 | // For debug-printing. Valued from 0 to 1 this time because why not. First channel only |
135 | default L<Double> firstPixels(int n default 20) { |
136 | double[] pixels = new[n]; |
137 | for i to n: |
138 | pixels[i] = sampleSum(0, i, i+1)/32768; |
139 | ret wrapDoubleArrayAsList(pixels); |
140 | } |
141 | |
142 | // also first channel only |
143 | default double[] toDoubleArray() { |
144 | int iStart = ifloor(start()), iEnd = iceil(end()); |
145 | double[] d = new[iEnd-iStart]; |
146 | for i over d: |
147 | d[i] = sampleSum(0, i, i+1); |
148 | ret d; |
149 | } |
150 | |
151 | } // end of IAudioSample |
download show line numbers debug dex old transpilations
Travelled to 4 computer(s): bhatertpkbcr, ekrmjmnbrukm, mowyntqkapby, mqqgnosmbjvj
No comments. add comment
Snippet ID: | #1032974 |
Snippet name: | IAudioSample - integral audio |
Eternal ID of this version: | #1032974/20 |
Text MD5: | e5b11daaa09dbd135f3ae19ebd665b8a |
Transpilation MD5: | da66213d55c214e841b324a690dd7cd8 |
Author: | stefan |
Category: | javax / audio analysis |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2021-10-18 05:34:16 |
Source code size: | 5266 bytes / 151 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 251 / 419 |
Version history: | 19 change(s) |
Referenced in: | [show references] |