Continuous Recognition Module (just logs, German/English, with "bigOn" switch) [1009816]

!7

sbool autoClose = true; // close JavaScript window on server lost
sbool autoTurnOn = true; // turn on recognition on program start
static int initialDelay = 0, autoCloseDelay = 5000;
static double firstWindowDelay = 0;
sbool infoBox, doLog = true;
sbool blipOnLanguageSwitch = true; // instead of "switching to..."
sbool blipOnActivation;
sbool hideChromeWindow;
sbool usePassiveLog; // use "passive" log (listening in background)

static int port;
static O onUtterance; // voidfunc(S)
sO onFirstWebSocket; // voidfunc()
static L<WebSocket> webSockets = synchroList();
sbool startRecognition;
static java.util.Timer stopper;
sS language = "en-US";
sS myURL;
static JButton btn;
sbool hadAnyWebSockets; // Does Chrome work at all?
sbool bigOn = true, justOnce = false;
static long speechRecognizerOpened;
static volatile S lastGlobalID;

p {
  load('language);
  if (isMainProgram()) {
    if (isMain()) substance();
    infoBox = true;
  }
  print("Logging speech to: " + speechRecognitionLog());

  NanoHTTPD.SOCKET_READ_TIMEOUT = 24*3600*1000; // not long enough - TODO: Fix in NanoHTTPD
  
  adjustMicrophoneLevel();
  
  port = serveHttpWithWebSockets(findFreePortAbove(9999), func(NanoHTTPD.IHTTPSession handshake) {
    print("New WebSocket.");
    WebSocket ws = new WebSocket(handshake) {
      protected void onPong(WebSocketFrame pongFrame) { print("pong"); }
      protected void onMessage(WebSocketFrame messageFrame) ctex {
        fS s = messageFrame.getTextPayload();
        //print("WebSocket message: " + s);
        cancelTimeout();
        infoBoxOrPrint(/*"User said: " +*/ s, infoBox); 
          //send("stop");
        //print("Sending start."); send("start");
        new Matches m;
        if (swic_trim(s, "Heard ", m))
          handleUtterance(decensor($1), false);
        else if (eqic(s, "big on")) bigOn = true;
        else if (eqic(s, "big off")) bigOn = false;
      }
      protected void onClose(WebSocketFrame.CloseCode code, String reason, boolean initiatedByRemote) {
        print("WebSocket closed.");
        webSockets.remove(this);
      }
      protected void onException(IOException e) { printStackTrace(e); }
    };
    if (startRecognition) {
      startRecognition = false;
      pcall { ws.send(justOnce ? "just once" : "big on"); }
      justOnce = false;
    }
    
    // close any other recognizers
    for (WebSocket ws2 : cloneList(webSockets)) {
      print("Closing websocket.");
      pcall { ws2.close(WebSocketFrame.CloseCode.NormalClosure, ""); }
      webSockets.remove(ws2);
    }
    
    if (!hadAnyWebSockets) {
      hadAnyWebSockets = true;
      pcallF(onFirstWebSocket);
    }
    
    ret addAndReturn(webSockets, ws);
  });
  myURL = print("http://localhost:" + port + "/popup");
  makeBot("Chrome Speech.");
  
  thread { sleepSeconds(firstWindowDelay); openSpeechRecognizer(); }
  
  if (isMain())
    showControls(jcenteredline(btn = jbutton("Open Speech Recognizer", f openSpeechRecognizer)));
  awtEvery(btn, 500, r { setEnabled(btn, empty(webSockets)) });
  
  /*thread "Chrome Re-Starter" {
    sleepSeconds(20);
    repeat with sleep 5 {
      if (hadAnyWebSockets && empty(webSockets)) {
        openSpeechRecognizer();
        sleepSeconds(15);
      }
    }
  });*/
  
  //if (autoTurnOn) startRecognition();
}

html {
  if (eqic(uri, "/favicon.ico"))
    ret serveFile(loadLibrary(#1013028), "image/x-icon");
    
  if (neq(uri, "/popup"))
    ret hbody("Opening popup..." + hjavascript([[
      window.open('/popup', 'speech_recognizer', 'width=300,height=300,location=no');
      setTimeout(function() { window.close(); }, 10000);
    ]]));

  // Serve Popup
  
  ret hhtml(hhead(htitle("Continuous Speech Recognizer")) + hbody(div(
    h3("Continuous Speech Recognizer")
    + [[<link id="favicon" rel="shortcut icon" type="image/png" href="/favicon.png" />]]
    + loadJQuery()
    + hdiv("Language: " + language, id := 'lang, style := "font-size: 10px")
    + hdiv("Results come here", id := 'results, style := "margin: 10px")
  + hjavascript([[
    var websocket;
    var bigOn = #BIGON#, pause = false, listening = false, language = "#LANGUAGE#", justOnce = #JUSTONCE#;
    //var stopUntil = 0;
    
    window.onfocus = function(event) {
      //if (event.explicitOriginalTarget === window)
        $("#btn").focus();
    };

    function update() {
      if (bigOn) {
        $("#btn").html("Turn off");
        document.title = (pause ? "Paused" : language.substring(3) /*"On"*/) + " - Speech Recognizer";
      } else {
        $("#btn").html("Turn on");
        document.title = "[OFF] Speech Recognizer";
      }

      var should = bigOn && !pause;
      if (should && !listening) startRecognition();
      else if (!should && listening) stopRecognition();
    }
    
    function stopRecognition() {
      listening = false;
      recognition.stop();
      update();
    }
    
    function startRecognition() {
      listening = true;
      //if (Date.now() < stopUntil) return;
      recognition.start();
      update();
    }
    
    function openWebSocket() {
      websocket = new WebSocket("ws://localhost:#PORT#/");
      websocket.onopen = function(event) {
        $("#btn").prop('disabled', false);
        $("#btn").focus();
        $("#results").html(bigOn ? "Listening." : "Click to turn me on.");
        if (bigOn)
          startRecognition();
      };
    
      websocket.onmessage = function(event) {
        if (event.data == 'just once') { justOnce = bigOn = true; update(); }
        if (event.data == 'big on') { bigOn = true; justOnce = false; update(); }
        if (event.data == 'big off') { bigOn = false; update(); }
        if (event.data == 'pause') { pause = true; update(); }
        if (event.data == 'unpause') { pause = false; update(); }
        if (event.data.substring(0, 9) == 'language ') {
          var l = event.data.substring(9);
          recognition.lang = language = l;
          $("#lang").html("Language: " + l);
        }
      };
    
      websocket.onclose = function(event) {
        $("#results").html("WebSocket closed");
        if (#AUTOCLOSE#) setTimeout(function() { window.close(); }, autoCloseDelay);
      };
    }
    
    setTimeout(openWebSocket, #INITIALDELAY#);
      
    var recognition = new webkitSpeechRecognition();
    recognition.lang = "#LANGUAGE#";
    
    recognition.onerror = function(event) { 
      var s = "&nbsp;";
      if (event.error != "no-speech") s = "Error: " + event.error;
      $("#results").html(s);
      //stopRecognition(); // do we get onEnd later?
      //setTimeout(startRecognition, 1000); // safety delay
    }
    
    recognition.onresult = function(event) { 
      var result = event.results[0];
      var transcript = result[0].transcript;
      var s = "Transcript: " + transcript;
      if (event.results.length > 1) s += " ." + event.results.length;
      if (result.length > 1) s += " #" + result.length;
      $("#results").html(s);
      websocket.send("Heard " + transcript);
      //stopUntil = Date.now()+200;
      //stopRecognition(); setTimeout(startRecognition, 100);
    }
    
    recognition.onnomatch = function(event) {
      $("#results").html("-");
      //stopRecognition(); setTimeout(startRecognition, 100);
    }
    
    recognition.onend = function(event) { 
      //$("#results").html("-end-");
      //stopRecognition();
      if (justOnce) justOnce = bigOn = false;
      listening = false; setTimeout(update, 100);
    }
    
    function startOrStop() {
      bigOn = !bigOn;
      websocket.send(bigOn ? "big on" : "big off");
      update();
    }
    
    window.resizeTo(300, 300);
  ]]) // end of JavaScript, variables follow
    .replace("#BIGON#", str(autoTurnOn))
    .replace("#JUSTONCE#", str(justOnce))
    .replace("#PORT#", str(port))
    .replace("#AUTOCLOSE#", autoClose ? "true" : "false")
    .replace("#INITIALDELAY#", str(initialDelay))
    .replace("#LANGUAGE#", language)
    .replace("autoCloseDelay", str(autoCloseDelay))
    + tag('button, "...", onclick := "startOrStop()", type := 'button, id := 'btn, disabled := 'disabled)
    + hdiv("", id := 'msgs, style := "margin: 10px; font-size: 10px")
    //+ p(ahref("#", "Popup", onClick := "window.open('/', 'speech_recognizer', 'width=300,height=300,location=no'); return false;"));
  , style := "text-align: center"));
}

svoid justOnce() { startRecognition(true); }
svoid startRecognition() { startRecognition(false); }

svoid startRecognition(bool justOnce) {
  main.justOnce = justOnce;
  bigOn = true;
  L<WebSocket> l = cloneList(webSockets);
  if (empty(l)) startRecognition = true;
  else {
    //print("Starting recognition." + (l(l) > 1 ? "Weird: Have " + l(l) + " websockets" : ""));
    pcall {
      first(l).send(justOnce ? "just once" : "big on");
    }
    justOnce = false;
  }
}

svoid stopRecognition() {
  bigOn = false;
  if (startRecognition) startRecognition = false;
  if (nempty(webSockets)) pcall {
    first(webSockets).send("big off");
  }
}

sS hotCommands(S s) {
  if (ai_isStopListeningCommand(s)) { stopRecognition(); playBlip(); ret "OK"; }
  S language = ai_extractChangeLanguageCommand(s);
  if (eq(language, 'english)) ret answer("language " + quote("en-US"));
  if (eq(language, 'german)) ret switchToGerman();
  null;
}

answer {
  try answer hotCommands(s);
  if "start recognition timeout *" {
    final int seconds = parseInt($1);
    startRecognition();
    stopper = timerOnce(toMS(seconds), f stopRecognition);
    ret "OK";
  }
  if "is on" ret yesno(bigOn);
  if "has recognizer" ret yesno(nempty(webSockets));

  if "just once" {
    if (nempty(webSockets)) justOnce();
    else {
      justOnce = true;
      openSpeechRecognizerIfNone();
    }
    ret "OK";
  }
  
  if "start recognition" {
    openSpeechRecognizerIfNone();
    if (nempty(webSockets)) {
      bool on = bigOn;
      startRecognition();
      if (!on && blipOnActivation) blip();
    }
    ret "OK";
  }
  
  if "language *" {
    if (eq(language, $1)) ret "OK";
    setAndSave('language, $1);
    if (blipOnLanguageSwitch) blip(); else
      if (eq(language, "de-DE")) william("Switching to German");
        else william("Switching to English");
    pcall { if (nempty(webSockets)) first(webSockets).send("language " + $1); }
    stopRecognition();
    sleep(500);
    startRecognition();
    ret "OK";
  }
  
  if "user typed *" ret "OK" with handleUtterance($1, true);
  
  if "stop recognition" { stopRecognition(); ret "OK"; }
  
  if "use passive log" { usePassiveLog = true; ret "OK"; }
  if "use active log" { usePassiveLog = false; ret "OK"; }
  
  if "log on" { doLog = true; ret "OK"; }
  if "log off" { doLog = false; ret "OK"; }
}

svoid cancelTimeout {
  if (stopper != null) { stopper.cancel(); stopper = null; }
}

sS switchToGerman {
  ret answer("language " + quote("de-DE"));
}

svoid handleUtterance(fS s, final bool typed) {
  if (isStefanReichsPC()) {
    mechAppendQ_noUniq("Katze Speech Recognition Log With Date", "[" + localDateWithMilliseconds() + (typed ? ", typed" : "") + "] " + s);
    Map map = litorderedmap(
      where := typed ? "typed" : "voice",
      type := 'heard,
      date := localDateWithMilliseconds(),
      globalID := lastGlobalID = aGlobalID(),
      text := s);
    mechAppendQ_noUniq("Voice I/O Log", struct(map));
  }
  
  S info = typed ? "User typed" : "Chrome Speech";
  vmBus_send googleSpeechRecognized(s, info);
  
  if (doLog)
    logQuoted(usePassiveLog ? passiveSpeechRecognitionLog() : speechRecognitionLog(), now() + " [" + info + "] " + s);
  
  thread {
    pcallF(onUtterance, s);
  }
  hotCommands(s);
}

svoid openSpeechRecognizer {
  speechRecognizerOpened = sysNow();
  if (hideChromeWindow)
    startInvisibleChromeAppForSpeech(myURL);
  else
    startChromeAppForSpeech(myURL);
}

svoid openSpeechRecognizerIfNone {
  if (empty(webSockets) && sysNow() >= speechRecognizerOpened + 5000)
    openSpeechRecognizer();
}

sS decensor(S s) { ret googleDecensor_static(s); }

Travelled to 18 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, irmadwmeruwu, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, onxytkatvevr, pyentgdyhuwx, pzhvpgtvlbxg, triorysbatvj, tslmcundralx, tvejysmllsmz, unoaxrwscvea, vouqrxazstgt, xrpafgyirdlv

Snippet ID:	#1009816
Snippet name:	Continuous Recognition Module (just logs, German/English, with "bigOn" switch)
Eternal ID of this version:	#1009816/136
Text MD5:	208b2053dce2ff58ef2ebf8adf14ef81
Transpilation MD5:	a3736bd4369a787a35e97399206d0cce
Author:	stefan
Category:	javax
Type:	JavaX source code (desktop)
Public (visible to everyone):	Yes
Archived (hidden from active list):	No
Created/modified:	2020-07-21 21:44:01
Source code size:	12356 bytes / 378 lines
Pitched / IR pitched:	No / No
Views / Downloads:	1290 / 7349
Version history:	135 change(s)
Referenced in:	#1009884 - Hot Word Detection using Continuous Recognition [kinda works] #1010315 - Speech Training [just listens] #1013871 - Katze v1 [OK] #1013936 - Katze/Computer [Invisible VM w/speech recognition module] #1014485 - speechRecognizerLanguage #1018551 - Speech Recognizer Using Chrome [Dyn Module]

< > BotCompany Repo | #1009816 // Continuous Recognition Module (just logs, German/English, with "bigOn" switch)

JavaX source code (desktop) [tags: use-pretranspiled] - run with: x30.jar

Author comment