!7 sbool autoClose = true; // close JavaScript window on server lost sbool autoTurnOn = true; // turn on recognition on program start static int initialDelay = 0, autoCloseDelay = 5000; static double firstWindowDelay = 0; sbool infoBox, doLog = true; sbool blipOnLanguageSwitch = true; // instead of "switching to..." sbool blipOnActivation; sbool hideChromeWindow; sbool usePassiveLog; // use "passive" log (listening in background) static int port; static O onUtterance; // voidfunc(S) sO onFirstWebSocket; // voidfunc() static L webSockets = synchroList(); sbool startRecognition; static java.util.Timer stopper; sS language = "en-US"; sS myURL; static JButton btn; sbool hadAnyWebSockets; // Does Chrome work at all? sbool bigOn = true, justOnce = false; static long speechRecognizerOpened; static volatile S lastGlobalID; p { load('language); if (isMainProgram()) { if (isMain()) substance(); infoBox = true; } print("Logging speech to: " + speechRecognitionLog()); NanoHTTPD.SOCKET_READ_TIMEOUT = 24*3600*1000; // not long enough - TODO: Fix in NanoHTTPD adjustMicrophoneLevel(); port = serveHttpWithWebSockets(findFreePortAbove(9999), func(NanoHTTPD.IHTTPSession handshake) { print("New WebSocket."); WebSocket ws = new WebSocket(handshake) { protected void onPong(WebSocketFrame pongFrame) { print("pong"); } protected void onMessage(WebSocketFrame messageFrame) ctex { fS s = messageFrame.getTextPayload(); //print("WebSocket message: " + s); cancelTimeout(); infoBoxOrPrint(/*"User said: " +*/ s, infoBox); //send("stop"); //print("Sending start."); send("start"); new Matches m; if (swic_trim(s, "Heard ", m)) handleUtterance(decensor($1), false); else if (eqic(s, "big on")) bigOn = true; else if (eqic(s, "big off")) bigOn = false; } protected void onClose(WebSocketFrame.CloseCode code, String reason, boolean initiatedByRemote) { print("WebSocket closed."); webSockets.remove(this); } protected void onException(IOException e) { printStackTrace(e); } }; if (startRecognition) { startRecognition = false; pcall { ws.send(justOnce ? "just once" : "big on"); } justOnce = false; } // close any other recognizers for (WebSocket ws2 : cloneList(webSockets)) { print("Closing websocket."); pcall { ws2.close(WebSocketFrame.CloseCode.NormalClosure, ""); } webSockets.remove(ws2); } if (!hadAnyWebSockets) { hadAnyWebSockets = true; pcallF(onFirstWebSocket); } ret addAndReturn(webSockets, ws); }); myURL = print("http://localhost:" + port + "/popup"); makeBot("Chrome Speech."); thread { sleepSeconds(firstWindowDelay); openSpeechRecognizer(); } if (isMain()) showControls(jcenteredline(btn = jbutton("Open Speech Recognizer", f openSpeechRecognizer))); awtEvery(btn, 500, r { setEnabled(btn, empty(webSockets)) }); /*thread "Chrome Re-Starter" { sleepSeconds(20); repeat with sleep 5 { if (hadAnyWebSockets && empty(webSockets)) { openSpeechRecognizer(); sleepSeconds(15); } } });*/ //if (autoTurnOn) startRecognition(); } html { if (eqic(uri, "/favicon.ico")) ret serveFile(loadLibrary(#1013028), "image/x-icon"); if (neq(uri, "/popup")) ret hbody("Opening popup..." + hjavascript([[ window.open('/popup', 'speech_recognizer', 'width=300,height=300,location=no'); setTimeout(function() { window.close(); }, 10000); ]])); // Serve Popup ret hhtml(hhead(htitle("Continuous Speech Recognizer")) + hbody(div( h3("Continuous Speech Recognizer") + [[]] + loadJQuery() + hdiv("Language: " + language, id := 'lang, style := "font-size: 10px") + hdiv("Results come here", id := 'results, style := "margin: 10px") + hjavascript([[ var websocket; var bigOn = #BIGON#, pause = false, listening = false, language = "#LANGUAGE#", justOnce = #JUSTONCE#; //var stopUntil = 0; window.onfocus = function(event) { //if (event.explicitOriginalTarget === window) $("#btn").focus(); }; function update() { if (bigOn) { $("#btn").html("Turn off"); document.title = (pause ? "Paused" : language.substring(3) /*"On"*/) + " - Speech Recognizer"; } else { $("#btn").html("Turn on"); document.title = "[OFF] Speech Recognizer"; } var should = bigOn && !pause; if (should && !listening) startRecognition(); else if (!should && listening) stopRecognition(); } function stopRecognition() { listening = false; recognition.stop(); update(); } function startRecognition() { listening = true; //if (Date.now() < stopUntil) return; recognition.start(); update(); } function openWebSocket() { websocket = new WebSocket("ws://localhost:#PORT#/"); websocket.onopen = function(event) { $("#btn").prop('disabled', false); $("#btn").focus(); $("#results").html(bigOn ? "Listening." : "Click to turn me on."); if (bigOn) startRecognition(); }; websocket.onmessage = function(event) { if (event.data == 'just once') { justOnce = bigOn = true; update(); } if (event.data == 'big on') { bigOn = true; justOnce = false; update(); } if (event.data == 'big off') { bigOn = false; update(); } if (event.data == 'pause') { pause = true; update(); } if (event.data == 'unpause') { pause = false; update(); } if (event.data.substring(0, 9) == 'language ') { var l = event.data.substring(9); recognition.lang = language = l; $("#lang").html("Language: " + l); } }; websocket.onclose = function(event) { $("#results").html("WebSocket closed"); if (#AUTOCLOSE#) setTimeout(function() { window.close(); }, autoCloseDelay); }; } setTimeout(openWebSocket, #INITIALDELAY#); var recognition = new webkitSpeechRecognition(); recognition.lang = "#LANGUAGE#"; recognition.onerror = function(event) { var s = " "; if (event.error != "no-speech") s = "Error: " + event.error; $("#results").html(s); //stopRecognition(); // do we get onEnd later? //setTimeout(startRecognition, 1000); // safety delay } recognition.onresult = function(event) { var result = event.results[0]; var transcript = result[0].transcript; var s = "Transcript: " + transcript; if (event.results.length > 1) s += " ." + event.results.length; if (result.length > 1) s += " #" + result.length; $("#results").html(s); websocket.send("Heard " + transcript); //stopUntil = Date.now()+200; //stopRecognition(); setTimeout(startRecognition, 100); } recognition.onnomatch = function(event) { $("#results").html("-"); //stopRecognition(); setTimeout(startRecognition, 100); } recognition.onend = function(event) { //$("#results").html("-end-"); //stopRecognition(); if (justOnce) justOnce = bigOn = false; listening = false; setTimeout(update, 100); } function startOrStop() { bigOn = !bigOn; websocket.send(bigOn ? "big on" : "big off"); update(); } window.resizeTo(300, 300); ]]) // end of JavaScript, variables follow .replace("#BIGON#", str(autoTurnOn)) .replace("#JUSTONCE#", str(justOnce)) .replace("#PORT#", str(port)) .replace("#AUTOCLOSE#", autoClose ? "true" : "false") .replace("#INITIALDELAY#", str(initialDelay)) .replace("#LANGUAGE#", language) .replace("autoCloseDelay", str(autoCloseDelay)) + tag('button, "...", onclick := "startOrStop()", type := 'button, id := 'btn, disabled := 'disabled) + hdiv("", id := 'msgs, style := "margin: 10px; font-size: 10px") //+ p(ahref("#", "Popup", onClick := "window.open('/', 'speech_recognizer', 'width=300,height=300,location=no'); return false;")); , style := "text-align: center")); } svoid justOnce() { startRecognition(true); } svoid startRecognition() { startRecognition(false); } svoid startRecognition(bool justOnce) { main.justOnce = justOnce; bigOn = true; L l = cloneList(webSockets); if (empty(l)) startRecognition = true; else { //print("Starting recognition." + (l(l) > 1 ? "Weird: Have " + l(l) + " websockets" : "")); pcall { first(l).send(justOnce ? "just once" : "big on"); } justOnce = false; } } svoid stopRecognition() { bigOn = false; if (startRecognition) startRecognition = false; if (nempty(webSockets)) pcall { first(webSockets).send("big off"); } } sS hotCommands(S s) { if (ai_isStopListeningCommand(s)) { stopRecognition(); playBlip(); ret "OK"; } S language = ai_extractChangeLanguageCommand(s); if (eq(language, 'english)) ret answer("language " + quote("en-US")); if (eq(language, 'german)) ret switchToGerman(); null; } answer { try answer hotCommands(s); if "start recognition timeout *" { final int seconds = parseInt($1); startRecognition(); stopper = timerOnce(toMS(seconds), f stopRecognition); ret "OK"; } if "is on" ret yesno(bigOn); if "has recognizer" ret yesno(nempty(webSockets)); if "just once" { if (nempty(webSockets)) justOnce(); else { justOnce = true; openSpeechRecognizerIfNone(); } ret "OK"; } if "start recognition" { openSpeechRecognizerIfNone(); if (nempty(webSockets)) { bool on = bigOn; startRecognition(); if (!on && blipOnActivation) blip(); } ret "OK"; } if "language *" { if (eq(language, $1)) ret "OK"; setAndSave('language, $1); if (blipOnLanguageSwitch) blip(); else if (eq(language, "de-DE")) william("Switching to German"); else william("Switching to English"); pcall { if (nempty(webSockets)) first(webSockets).send("language " + $1); } stopRecognition(); sleep(500); startRecognition(); ret "OK"; } if "user typed *" ret "OK" with handleUtterance($1, true); if "stop recognition" { stopRecognition(); ret "OK"; } if "use passive log" { usePassiveLog = true; ret "OK"; } if "use active log" { usePassiveLog = false; ret "OK"; } if "log on" { doLog = true; ret "OK"; } if "log off" { doLog = false; ret "OK"; } } svoid cancelTimeout { if (stopper != null) { stopper.cancel(); stopper = null; } } sS switchToGerman { ret answer("language " + quote("de-DE")); } svoid handleUtterance(fS s, final bool typed) { if (isStefanReichsPC()) { mechAppendQ_noUniq("Katze Speech Recognition Log With Date", "[" + localDateWithMilliseconds() + (typed ? ", typed" : "") + "] " + s); Map map = litorderedmap( where := typed ? "typed" : "voice", type := 'heard, date := localDateWithMilliseconds(), globalID := lastGlobalID = aGlobalID(), text := s); mechAppendQ_noUniq("Voice I/O Log", struct(map)); } S info = typed ? "User typed" : "Chrome Speech"; vmBus_send googleSpeechRecognized(s, info); if (doLog) logQuoted(usePassiveLog ? passiveSpeechRecognitionLog() : speechRecognitionLog(), now() + " [" + info + "] " + s); thread { pcallF(onUtterance, s); } hotCommands(s); } svoid openSpeechRecognizer { speechRecognizerOpened = sysNow(); if (hideChromeWindow) startInvisibleChromeAppForSpeech(myURL); else startChromeAppForSpeech(myURL); } svoid openSpeechRecognizerIfNone { if (empty(webSockets) && sysNow() >= speechRecognizerOpened + 5000) openSpeechRecognizer(); } sS decensor(S s) { ret googleDecensor_static(s); }