vad.js [1027698]

(function(window) {

  var VAD = function(options) {
    // Default options
    this.options = {
      fftSize: 512,
      bufferLen: 512, 
      voice_stop: function() {},
      voice_start: function() {},
      smoothingTimeConstant: 0.99, 
      energy_offset: 1e-8, // The initial offset.
      energy_threshold_ratio_pos: 2, // Signal must be twice the offset
      energy_threshold_ratio_neg: 0.5, // Signal must be half the offset
      energy_integration: 1, // Size of integration change compared to the signal per second.
      filter: [
        {f: 200, v:0}, // 0 -> 200 is 0
        {f: 2000, v:1} // 200 -> 2k is 1
      ],
      source: null,
      context: null
    };

    // User options
    for(var option in options) {
      if(options.hasOwnProperty(option)) {
        this.options[option] = options[option];
      }
    }

    // Require source
   if(!this.options.source)
     throw new Error("The options must specify a MediaStreamAudioSourceNode.");

    // Set this.options.context
    this.options.context = this.options.source.context;

    // Calculate time relationships
    this.hertzPerBin = this.options.context.sampleRate / this.options.fftSize;
    this.iterationFrequency = this.options.context.sampleRate / this.options.bufferLen;
    this.iterationPeriod = 1 / this.iterationFrequency;

    var DEBUG = true;
    if(DEBUG) console.log(
      'Vad' +
      ' | sampleRate: ' + this.options.context.sampleRate +
      ' | hertzPerBin: ' + this.hertzPerBin +
      ' | iterationFrequency: ' + this.iterationFrequency +
      ' | iterationPeriod: ' + this.iterationPeriod
    );

    this.setFilter = function(shape) {
      this.filter = [];
      for(var i = 0, iLen = this.options.fftSize / 2; i < iLen; i++) {
        this.filter[i] = 0;
        for(var j = 0, jLen = shape.length; j < jLen; j++) {
          if(i * this.hertzPerBin < shape[j].f) {
            this.filter[i] = shape[j].v;
            break; // Exit j loop
          }
        }
      }
    }

    this.setFilter(this.options.filter);

    this.ready = {};
    this.vadState = false; // True when Voice Activity Detected

    // Energy detector props
    this.energy_offset = this.options.energy_offset;
    this.energy_threshold_pos = this.energy_offset * this.options.energy_threshold_ratio_pos;
    this.energy_threshold_neg = this.energy_offset * this.options.energy_threshold_ratio_neg;

    this.voiceTrend = 0;
    this.voiceTrendMax = 10;
    this.voiceTrendMin = -10;
    this.voiceTrendStart = 5;
    this.voiceTrendEnd = -5;

    // Create analyser 
    this.analyser = this.options.context.createAnalyser();
    this.analyser.smoothingTimeConstant = this.options.smoothingTimeConstant; // 0.99;
    this.analyser.fftSize = this.options.fftSize;

    this.floatFrequencyData = new Float32Array(this.analyser.frequencyBinCount);

    // Setup local storage of the Linear FFT data
    this.floatFrequencyDataLinear = new Float32Array(this.floatFrequencyData.length);

    // Connect this.analyser
    this.options.source.connect(this.analyser); 

    // Create ScriptProcessorNode
    this.scriptProcessorNode = this.options.context.createScriptProcessor(this.options.bufferLen, 1, 1);

    // Connect scriptProcessorNode (Theretically, not required)
    this.scriptProcessorNode.connect(this.options.context.destination);

    // Create callback to update/analyze floatFrequencyData
    var self = this;
    this.scriptProcessorNode.onaudioprocess = function(event) {
      self.analyser.getFloatFrequencyData(self.floatFrequencyData);
      self.update();
      self.monitor();
    };

    // Connect scriptProcessorNode
    this.options.source.connect(this.scriptProcessorNode);

    // log stuff
    this.logging = false;
    this.log_i = 0;
    this.log_limit = 100;

    this.triggerLog = function(limit) {
      this.logging = true;
      this.log_i = 0;
      this.log_limit = typeof limit === 'number' ? limit : this.log_limit;
    }

    this.log = function(msg) {
      if(this.logging && this.log_i < this.log_limit) {
        this.log_i++;
        console.log(msg);
      } else {
        this.logging = false;
      }
    }

    this.update = function() {
      // Update the local version of the Linear FFT
      var fft = this.floatFrequencyData;
      for(var i = 0, iLen = fft.length; i < iLen; i++) {
        this.floatFrequencyDataLinear[i] = Math.pow(10, fft[i] / 10);
      }
      this.ready = {};
    }

    this.getEnergy = function() {
      if(this.ready.energy) {
        return this.energy;
      }

      var energy = 0;
      var fft = this.floatFrequencyDataLinear;

      for(var i = 0, iLen = fft.length; i < iLen; i++) {
        energy += this.filter[i] * fft[i] * fft[i];
      }

      this.energy = energy;
      this.ready.energy = true;

      return energy;
    }

    this.monitor = function() {
      var energy = this.getEnergy();
      var signal = energy - this.energy_offset;

      if(signal > this.energy_threshold_pos) {
        this.voiceTrend = (this.voiceTrend + 1 > this.voiceTrendMax) ? this.voiceTrendMax : this.voiceTrend + 1;
      } else if(signal < -this.energy_threshold_neg) {
        this.voiceTrend = (this.voiceTrend - 1 < this.voiceTrendMin) ? this.voiceTrendMin : this.voiceTrend - 1;
      } else {
        // voiceTrend gets smaller
        if(this.voiceTrend > 0) {
          this.voiceTrend--;
        } else if(this.voiceTrend < 0) {
          this.voiceTrend++;
        }
      }

      var start = false, end = false;
      if(this.voiceTrend > this.voiceTrendStart) {
        // Start of speech detected
        start = true;
      } else if(this.voiceTrend < this.voiceTrendEnd) {
        // End of speech detected
        end = true;
      }

      // Integration brings in the real-time aspect through the relationship with the frequency this functions is called.
      var integration = signal * this.iterationPeriod * this.options.energy_integration;

      // Idea?: The integration is affected by the voiceTrend magnitude? - Not sure. Not doing atm.

      // The !end limits the offset delta boost till after the end is detected.
      if(integration > 0 || !end) {
        this.energy_offset += integration;
      } else {
        this.energy_offset += integration * 10;
      }
      this.energy_offset = this.energy_offset < 0 ? 0 : this.energy_offset;
      this.energy_threshold_pos = this.energy_offset * this.options.energy_threshold_ratio_pos;
      this.energy_threshold_neg = this.energy_offset * this.options.energy_threshold_ratio_neg;

      // Broadcast the messages
      if(start && !this.vadState) {
        this.vadState = true;
        this.options.voice_start();
      }
      if(end && this.vadState) {
        this.vadState = false;
        this.options.voice_stop();
      }

      this.log(
        'e: ' + energy +
        ' | e_of: ' + this.energy_offset +
        ' | e+_th: ' + this.energy_threshold_pos +
        ' | e-_th: ' + this.energy_threshold_neg +
        ' | signal: ' + signal +
        ' | int: ' + integration +
        ' | voiceTrend: ' + this.voiceTrend +
        ' | start: ' + start +
        ' | end: ' + end
      );

      return signal;
    }
  };

  window.VAD = VAD;

})(window);
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
Snippet ID:	#1027698
Snippet name:	vad.js
Eternal ID of this version:	#1027698/1
Text MD5:	b609cffe2290f3dad32381736c9aea4c
Author:	stefan
Category:	javascript
Type:	Document
Public (visible to everyone):	Yes
Archived (hidden from active list):	No
Created/modified:	2020-03-31 20:10:44
Source code size:	7452 bytes / 226 lines
Pitched / IR pitched:	No / No
Views / Downloads:	282 / 1622
Referenced in:	#1027700 - hVAD - voice activity detector in JavaScript #1028684 - hVAD - voice activity detector in JavaScript [backup without volume meter]
< > BotCompany Repo | #1027698 // vad.js

Document

Author comment