Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

226
LINES

< > BotCompany Repo | #1027698 // vad.js

Document

1  
(function(window) {
2  
3  
  var VAD = function(options) {
4  
    // Default options
5  
    this.options = {
6  
      fftSize: 512,
7  
      bufferLen: 512, 
8  
      voice_stop: function() {},
9  
      voice_start: function() {},
10  
      smoothingTimeConstant: 0.99, 
11  
      energy_offset: 1e-8, // The initial offset.
12  
      energy_threshold_ratio_pos: 2, // Signal must be twice the offset
13  
      energy_threshold_ratio_neg: 0.5, // Signal must be half the offset
14  
      energy_integration: 1, // Size of integration change compared to the signal per second.
15  
      filter: [
16  
        {f: 200, v:0}, // 0 -> 200 is 0
17  
        {f: 2000, v:1} // 200 -> 2k is 1
18  
      ],
19  
      source: null,
20  
      context: null
21  
    };
22  
23  
    // User options
24  
    for(var option in options) {
25  
      if(options.hasOwnProperty(option)) {
26  
        this.options[option] = options[option];
27  
      }
28  
    }
29  
30  
    // Require source
31  
   if(!this.options.source)
32  
     throw new Error("The options must specify a MediaStreamAudioSourceNode.");
33  
34  
    // Set this.options.context
35  
    this.options.context = this.options.source.context;
36  
37  
    // Calculate time relationships
38  
    this.hertzPerBin = this.options.context.sampleRate / this.options.fftSize;
39  
    this.iterationFrequency = this.options.context.sampleRate / this.options.bufferLen;
40  
    this.iterationPeriod = 1 / this.iterationFrequency;
41  
42  
    var DEBUG = true;
43  
    if(DEBUG) console.log(
44  
      'Vad' +
45  
      ' | sampleRate: ' + this.options.context.sampleRate +
46  
      ' | hertzPerBin: ' + this.hertzPerBin +
47  
      ' | iterationFrequency: ' + this.iterationFrequency +
48  
      ' | iterationPeriod: ' + this.iterationPeriod
49  
    );
50  
51  
    this.setFilter = function(shape) {
52  
      this.filter = [];
53  
      for(var i = 0, iLen = this.options.fftSize / 2; i < iLen; i++) {
54  
        this.filter[i] = 0;
55  
        for(var j = 0, jLen = shape.length; j < jLen; j++) {
56  
          if(i * this.hertzPerBin < shape[j].f) {
57  
            this.filter[i] = shape[j].v;
58  
            break; // Exit j loop
59  
          }
60  
        }
61  
      }
62  
    }
63  
64  
    this.setFilter(this.options.filter);
65  
66  
    this.ready = {};
67  
    this.vadState = false; // True when Voice Activity Detected
68  
69  
    // Energy detector props
70  
    this.energy_offset = this.options.energy_offset;
71  
    this.energy_threshold_pos = this.energy_offset * this.options.energy_threshold_ratio_pos;
72  
    this.energy_threshold_neg = this.energy_offset * this.options.energy_threshold_ratio_neg;
73  
74  
    this.voiceTrend = 0;
75  
    this.voiceTrendMax = 10;
76  
    this.voiceTrendMin = -10;
77  
    this.voiceTrendStart = 5;
78  
    this.voiceTrendEnd = -5;
79  
80  
    // Create analyser 
81  
    this.analyser = this.options.context.createAnalyser();
82  
    this.analyser.smoothingTimeConstant = this.options.smoothingTimeConstant; // 0.99;
83  
    this.analyser.fftSize = this.options.fftSize;
84  
85  
    this.floatFrequencyData = new Float32Array(this.analyser.frequencyBinCount);
86  
87  
    // Setup local storage of the Linear FFT data
88  
    this.floatFrequencyDataLinear = new Float32Array(this.floatFrequencyData.length);
89  
90  
    // Connect this.analyser
91  
    this.options.source.connect(this.analyser); 
92  
93  
    // Create ScriptProcessorNode
94  
    this.scriptProcessorNode = this.options.context.createScriptProcessor(this.options.bufferLen, 1, 1);
95  
96  
    // Connect scriptProcessorNode (Theretically, not required)
97  
    this.scriptProcessorNode.connect(this.options.context.destination);
98  
99  
    // Create callback to update/analyze floatFrequencyData
100  
    var self = this;
101  
    this.scriptProcessorNode.onaudioprocess = function(event) {
102  
      self.analyser.getFloatFrequencyData(self.floatFrequencyData);
103  
      self.update();
104  
      self.monitor();
105  
    };
106  
107  
    // Connect scriptProcessorNode
108  
    this.options.source.connect(this.scriptProcessorNode);
109  
110  
    // log stuff
111  
    this.logging = false;
112  
    this.log_i = 0;
113  
    this.log_limit = 100;
114  
115  
    this.triggerLog = function(limit) {
116  
      this.logging = true;
117  
      this.log_i = 0;
118  
      this.log_limit = typeof limit === 'number' ? limit : this.log_limit;
119  
    }
120  
121  
    this.log = function(msg) {
122  
      if(this.logging && this.log_i < this.log_limit) {
123  
        this.log_i++;
124  
        console.log(msg);
125  
      } else {
126  
        this.logging = false;
127  
      }
128  
    }
129  
130  
    this.update = function() {
131  
      // Update the local version of the Linear FFT
132  
      var fft = this.floatFrequencyData;
133  
      for(var i = 0, iLen = fft.length; i < iLen; i++) {
134  
        this.floatFrequencyDataLinear[i] = Math.pow(10, fft[i] / 10);
135  
      }
136  
      this.ready = {};
137  
    }
138  
139  
    this.getEnergy = function() {
140  
      if(this.ready.energy) {
141  
        return this.energy;
142  
      }
143  
144  
      var energy = 0;
145  
      var fft = this.floatFrequencyDataLinear;
146  
147  
      for(var i = 0, iLen = fft.length; i < iLen; i++) {
148  
        energy += this.filter[i] * fft[i] * fft[i];
149  
      }
150  
151  
      this.energy = energy;
152  
      this.ready.energy = true;
153  
154  
      return energy;
155  
    }
156  
157  
    this.monitor = function() {
158  
      var energy = this.getEnergy();
159  
      var signal = energy - this.energy_offset;
160  
161  
      if(signal > this.energy_threshold_pos) {
162  
        this.voiceTrend = (this.voiceTrend + 1 > this.voiceTrendMax) ? this.voiceTrendMax : this.voiceTrend + 1;
163  
      } else if(signal < -this.energy_threshold_neg) {
164  
        this.voiceTrend = (this.voiceTrend - 1 < this.voiceTrendMin) ? this.voiceTrendMin : this.voiceTrend - 1;
165  
      } else {
166  
        // voiceTrend gets smaller
167  
        if(this.voiceTrend > 0) {
168  
          this.voiceTrend--;
169  
        } else if(this.voiceTrend < 0) {
170  
          this.voiceTrend++;
171  
        }
172  
      }
173  
174  
      var start = false, end = false;
175  
      if(this.voiceTrend > this.voiceTrendStart) {
176  
        // Start of speech detected
177  
        start = true;
178  
      } else if(this.voiceTrend < this.voiceTrendEnd) {
179  
        // End of speech detected
180  
        end = true;
181  
      }
182  
183  
      // Integration brings in the real-time aspect through the relationship with the frequency this functions is called.
184  
      var integration = signal * this.iterationPeriod * this.options.energy_integration;
185  
186  
      // Idea?: The integration is affected by the voiceTrend magnitude? - Not sure. Not doing atm.
187  
188  
      // The !end limits the offset delta boost till after the end is detected.
189  
      if(integration > 0 || !end) {
190  
        this.energy_offset += integration;
191  
      } else {
192  
        this.energy_offset += integration * 10;
193  
      }
194  
      this.energy_offset = this.energy_offset < 0 ? 0 : this.energy_offset;
195  
      this.energy_threshold_pos = this.energy_offset * this.options.energy_threshold_ratio_pos;
196  
      this.energy_threshold_neg = this.energy_offset * this.options.energy_threshold_ratio_neg;
197  
198  
      // Broadcast the messages
199  
      if(start && !this.vadState) {
200  
        this.vadState = true;
201  
        this.options.voice_start();
202  
      }
203  
      if(end && this.vadState) {
204  
        this.vadState = false;
205  
        this.options.voice_stop();
206  
      }
207  
208  
      this.log(
209  
        'e: ' + energy +
210  
        ' | e_of: ' + this.energy_offset +
211  
        ' | e+_th: ' + this.energy_threshold_pos +
212  
        ' | e-_th: ' + this.energy_threshold_neg +
213  
        ' | signal: ' + signal +
214  
        ' | int: ' + integration +
215  
        ' | voiceTrend: ' + this.voiceTrend +
216  
        ' | start: ' + start +
217  
        ' | end: ' + end
218  
      );
219  
220  
      return signal;
221  
    }
222  
  };
223  
224  
  window.VAD = VAD;
225  
226  
})(window);

Author comment

from https://github.com/kdavis-mozilla/vad.js/blob/master/lib/vad.js

download  show line numbers   

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

No comments. add comment

Snippet ID: #1027698
Snippet name: vad.js
Eternal ID of this version: #1027698/1
Text MD5: b609cffe2290f3dad32381736c9aea4c
Author: stefan
Category: javascript
Type: Document
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-03-31 20:10:44
Source code size: 7452 bytes / 226 lines
Pitched / IR pitched: No / No
Views / Downloads: 283 / 1623
Referenced in: [show references]