1 | (function(window) { |
2 | |
3 | var VAD = function(options) { |
4 | // Default options |
5 | this.options = { |
6 | fftSize: 512, |
7 | bufferLen: 512, |
8 | voice_stop: function() {}, |
9 | voice_start: function() {}, |
10 | smoothingTimeConstant: 0.99, |
11 | energy_offset: 1e-8, // The initial offset. |
12 | energy_threshold_ratio_pos: 2, // Signal must be twice the offset |
13 | energy_threshold_ratio_neg: 0.5, // Signal must be half the offset |
14 | energy_integration: 1, // Size of integration change compared to the signal per second. |
15 | filter: [ |
16 | {f: 200, v:0}, // 0 -> 200 is 0 |
17 | {f: 2000, v:1} // 200 -> 2k is 1 |
18 | ], |
19 | source: null, |
20 | context: null |
21 | }; |
22 | |
23 | // User options |
24 | for(var option in options) { |
25 | if(options.hasOwnProperty(option)) { |
26 | this.options[option] = options[option]; |
27 | } |
28 | } |
29 | |
30 | // Require source |
31 | if(!this.options.source) |
32 | throw new Error("The options must specify a MediaStreamAudioSourceNode."); |
33 | |
34 | // Set this.options.context |
35 | this.options.context = this.options.source.context; |
36 | |
37 | // Calculate time relationships |
38 | this.hertzPerBin = this.options.context.sampleRate / this.options.fftSize; |
39 | this.iterationFrequency = this.options.context.sampleRate / this.options.bufferLen; |
40 | this.iterationPeriod = 1 / this.iterationFrequency; |
41 | |
42 | var DEBUG = true; |
43 | if(DEBUG) console.log( |
44 | 'Vad' + |
45 | ' | sampleRate: ' + this.options.context.sampleRate + |
46 | ' | hertzPerBin: ' + this.hertzPerBin + |
47 | ' | iterationFrequency: ' + this.iterationFrequency + |
48 | ' | iterationPeriod: ' + this.iterationPeriod |
49 | ); |
50 | |
51 | this.setFilter = function(shape) { |
52 | this.filter = []; |
53 | for(var i = 0, iLen = this.options.fftSize / 2; i < iLen; i++) { |
54 | this.filter[i] = 0; |
55 | for(var j = 0, jLen = shape.length; j < jLen; j++) { |
56 | if(i * this.hertzPerBin < shape[j].f) { |
57 | this.filter[i] = shape[j].v; |
58 | break; // Exit j loop |
59 | } |
60 | } |
61 | } |
62 | } |
63 | |
64 | this.setFilter(this.options.filter); |
65 | |
66 | this.ready = {}; |
67 | this.vadState = false; // True when Voice Activity Detected |
68 | |
69 | // Energy detector props |
70 | this.energy_offset = this.options.energy_offset; |
71 | this.energy_threshold_pos = this.energy_offset * this.options.energy_threshold_ratio_pos; |
72 | this.energy_threshold_neg = this.energy_offset * this.options.energy_threshold_ratio_neg; |
73 | |
74 | this.voiceTrend = 0; |
75 | this.voiceTrendMax = 10; |
76 | this.voiceTrendMin = -10; |
77 | this.voiceTrendStart = 5; |
78 | this.voiceTrendEnd = -5; |
79 | |
80 | // Create analyser |
81 | this.analyser = this.options.context.createAnalyser(); |
82 | this.analyser.smoothingTimeConstant = this.options.smoothingTimeConstant; // 0.99; |
83 | this.analyser.fftSize = this.options.fftSize; |
84 | |
85 | this.floatFrequencyData = new Float32Array(this.analyser.frequencyBinCount); |
86 | |
87 | // Setup local storage of the Linear FFT data |
88 | this.floatFrequencyDataLinear = new Float32Array(this.floatFrequencyData.length); |
89 | |
90 | // Connect this.analyser |
91 | this.options.source.connect(this.analyser); |
92 | |
93 | // Create ScriptProcessorNode |
94 | this.scriptProcessorNode = this.options.context.createScriptProcessor(this.options.bufferLen, 1, 1); |
95 | |
96 | // Connect scriptProcessorNode (Theretically, not required) |
97 | this.scriptProcessorNode.connect(this.options.context.destination); |
98 | |
99 | // Create callback to update/analyze floatFrequencyData |
100 | var self = this; |
101 | this.scriptProcessorNode.onaudioprocess = function(event) { |
102 | self.analyser.getFloatFrequencyData(self.floatFrequencyData); |
103 | self.update(); |
104 | self.monitor(); |
105 | }; |
106 | |
107 | // Connect scriptProcessorNode |
108 | this.options.source.connect(this.scriptProcessorNode); |
109 | |
110 | // log stuff |
111 | this.logging = false; |
112 | this.log_i = 0; |
113 | this.log_limit = 100; |
114 | |
115 | this.triggerLog = function(limit) { |
116 | this.logging = true; |
117 | this.log_i = 0; |
118 | this.log_limit = typeof limit === 'number' ? limit : this.log_limit; |
119 | } |
120 | |
121 | this.log = function(msg) { |
122 | if(this.logging && this.log_i < this.log_limit) { |
123 | this.log_i++; |
124 | console.log(msg); |
125 | } else { |
126 | this.logging = false; |
127 | } |
128 | } |
129 | |
130 | this.update = function() { |
131 | // Update the local version of the Linear FFT |
132 | var fft = this.floatFrequencyData; |
133 | for(var i = 0, iLen = fft.length; i < iLen; i++) { |
134 | this.floatFrequencyDataLinear[i] = Math.pow(10, fft[i] / 10); |
135 | } |
136 | this.ready = {}; |
137 | } |
138 | |
139 | this.getEnergy = function() { |
140 | if(this.ready.energy) { |
141 | return this.energy; |
142 | } |
143 | |
144 | var energy = 0; |
145 | var fft = this.floatFrequencyDataLinear; |
146 | |
147 | for(var i = 0, iLen = fft.length; i < iLen; i++) { |
148 | energy += this.filter[i] * fft[i] * fft[i]; |
149 | } |
150 | |
151 | this.energy = energy; |
152 | this.ready.energy = true; |
153 | |
154 | return energy; |
155 | } |
156 | |
157 | this.monitor = function() { |
158 | var energy = this.getEnergy(); |
159 | var signal = energy - this.energy_offset; |
160 | |
161 | if(signal > this.energy_threshold_pos) { |
162 | this.voiceTrend = (this.voiceTrend + 1 > this.voiceTrendMax) ? this.voiceTrendMax : this.voiceTrend + 1; |
163 | } else if(signal < -this.energy_threshold_neg) { |
164 | this.voiceTrend = (this.voiceTrend - 1 < this.voiceTrendMin) ? this.voiceTrendMin : this.voiceTrend - 1; |
165 | } else { |
166 | // voiceTrend gets smaller |
167 | if(this.voiceTrend > 0) { |
168 | this.voiceTrend--; |
169 | } else if(this.voiceTrend < 0) { |
170 | this.voiceTrend++; |
171 | } |
172 | } |
173 | |
174 | var start = false, end = false; |
175 | if(this.voiceTrend > this.voiceTrendStart) { |
176 | // Start of speech detected |
177 | start = true; |
178 | } else if(this.voiceTrend < this.voiceTrendEnd) { |
179 | // End of speech detected |
180 | end = true; |
181 | } |
182 | |
183 | // Integration brings in the real-time aspect through the relationship with the frequency this functions is called. |
184 | var integration = signal * this.iterationPeriod * this.options.energy_integration; |
185 | |
186 | // Idea?: The integration is affected by the voiceTrend magnitude? - Not sure. Not doing atm. |
187 | |
188 | // The !end limits the offset delta boost till after the end is detected. |
189 | if(integration > 0 || !end) { |
190 | this.energy_offset += integration; |
191 | } else { |
192 | this.energy_offset += integration * 10; |
193 | } |
194 | this.energy_offset = this.energy_offset < 0 ? 0 : this.energy_offset; |
195 | this.energy_threshold_pos = this.energy_offset * this.options.energy_threshold_ratio_pos; |
196 | this.energy_threshold_neg = this.energy_offset * this.options.energy_threshold_ratio_neg; |
197 | |
198 | // Broadcast the messages |
199 | if(start && !this.vadState) { |
200 | this.vadState = true; |
201 | this.options.voice_start(); |
202 | } |
203 | if(end && this.vadState) { |
204 | this.vadState = false; |
205 | this.options.voice_stop(); |
206 | } |
207 | |
208 | this.log( |
209 | 'e: ' + energy + |
210 | ' | e_of: ' + this.energy_offset + |
211 | ' | e+_th: ' + this.energy_threshold_pos + |
212 | ' | e-_th: ' + this.energy_threshold_neg + |
213 | ' | signal: ' + signal + |
214 | ' | int: ' + integration + |
215 | ' | voiceTrend: ' + this.voiceTrend + |
216 | ' | start: ' + start + |
217 | ' | end: ' + end |
218 | ); |
219 | |
220 | return signal; |
221 | } |
222 | }; |
223 | |
224 | window.VAD = VAD; |
225 | |
226 | })(window); |
from https://github.com/kdavis-mozilla/vad.js/blob/master/lib/vad.js
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1027698 |
Snippet name: | vad.js |
Eternal ID of this version: | #1027698/1 |
Text MD5: | b609cffe2290f3dad32381736c9aea4c |
Author: | stefan |
Category: | javascript |
Type: | Document |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-03-31 20:10:44 |
Source code size: | 7452 bytes / 226 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 283 / 1623 |
Referenced in: | [show references] |