!7 // Note: This is dependent on input buffer size which is not optimal. // Tested with buffer size 232 ms (using CD quality) cmodule VAD > DynPrintLogAndEnabled { switchable double windowSize = 1; // milliseconds switchable double threshold = // 10.0; // the magic value for 44 KHz stereo 1; // for 8 KHz mono transient double confidence; transient short[] fromData; // only useful when sendOutClonedData = true start { printLogUpdateInterval = 100; // technically we are treating stereo input as mono samples // but it might not matter dm_addAudioListener(voidfunc(short[] data) enter { if (!enabled) ret; short[] buffer = cloneArray(data); dm_q(r { if (q.size() > 2) ret with print("overload"); short[] buf = buffer; //printStruct(takeFirstOfShortArray(8, buf)); float sampleRate = dm_audioInputSampleRate(); if (sampleRate == 0) ret; float originalSampleRate = sampleRate; int channels = dm_audioInputChannels(); if (channels > 1) buf = mixStereoShortArrayToMono(buf); buf = convertSampleRate_shortArray_simple(buf, sampleRate, 8000); sampleRate = 8000f; long time = sysNow(); int windowLength = msToSamples_int(windowSize, sampleRate); //printWithMilliseconds("Window length (" + sampleRate + "): " + windowLength); new L l; L chunks = intRangeChunks(buf, windowLength); for (IntRange r : chunks) l.add(normalizedAutocorrelationOfAudioSamples(buf, r)); // print one table for reference print(asList(normalizedAutocorrelationTableForAudioSamples(buf, first(chunks)))); double result = doubleAverage(l); confidence = result/threshold*50; fromData = data; int vol = shortAbsMax(buf); print("VAD " + (confidence >= 50 ? "XXX" : " ") + " value: " + iround(confidence) + " % [" + elapsedMS(time) + " ms, " + nWindows(l(buf)/windowLength) + " of size " + windowLength + ", converted from \*originalSampleRate*/ Hz, " + nChannels(channels) + ", volume: " + iround(vol/32767.0*100) + "]"); }); }); } // API Bool hasVoiceActivity(short[] fromData) { ret fromData != this.fromData ? null : confidence >= 50; } }