Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

156
LINES

< > BotCompany Repo | #1002700 // Translating English to Simplified NL (with first dialog!)

JavaX source code [tags: use-pretranspiled] - run with: x30.jar

Libraryless. Click here for Pure Java version (4164L/27K/90K).

!752

static PersistentMap<S, S> map;
static new HashSet<S> joiners;
static PersistentMap<S, S> splitters;

p {
  map = new PersistentMap("map");
  splitters = new PersistentMap("splitters");
  load("joiners");
}

static S dialog(S snl, S line) {
  new Map<S, Lisp> m;
  Lisp input = snlToTree(line);
  
  if (snlMatch("A < says < i < love < *", input, m))
    ret "oh < that < is < nice";
    
  if (snlMatch("A < says < i < hate < *", input, m))
    ret snlApply("? < why < do < you < hate < *", m);
    
  ret line;
}

synchronized static S answer(S s) {
  new Matches m;
  
  print("1002700 user=" + getUserName() + " attn=" + attn() + ", dedicated=" + dedicated());
  
  if (!attn()) ret null;
  
  if "are we dedicated"
    ret yn(dedicated());

  if "add joiner * *" {
    S j = preSimplify(m.unq(0) + " " + m.unq(1));
    if (joiners.contains(j))
      ret format("Joiner * exists", j);
    joiners.add(j);
    save("joiners");
    ret format("OK, joiner * added", j);
  }
    
  if "add splitter * = * *" {
    S a = preSimplify(m.unq(0));
    S b = preSimplify(m.unq(1) + " " + m.unq(2));
    S old = splitters.get(a);
    if (eq(old, b))
      ret format("Splitter * => * exists", a, b);
    splitters.put(a, b);
    ret format("OK, splitter * => * added" + (old != null ? " (old: " + structure(old) + ")" : ""), a, b);
  }
    
  if (!dedicated()) ret null;
  
  exceptionToUser {
    /*if (isSNL(s))
      ret "SNL";*/
      
    s = preSimplify(s);
    S snl = map.get(s);
    S snlSource = "map";
    if (snl == null) {
      snlSource = "naive";
      snl = naiveSNL(s);
    }
    
    print("1002700: got snl");
    
    S user = getUserName();
    if (user == null)
      user = master() ? "master" : "user";
    //user = user.replaceAll("[0-9]", "");
    S line = snlSimplifyBrackets(user + " < says < [" + snl + "]");
    
    print("1002700: calling dialog, line = " + line + ", user=" + user + ", snl=" + snl);
    S answer = dialog(snl, line);
    print("1002700: answer=" + answer);
    ret answer;
  }
}

//static L<S> keepPunctuation = litlist("*", "<", ">", "[", "]", "?", "!", ":", "@");

static L<S> tokensForSNL(S s) {
  L<S> tok = nlTok(s);
  //ret codeTokensOnly(dropPunctuationExcept(tok, keepPunctuation));
  ret codeTokensOnly(tok);
}

static synchronized S naiveSNL(S s) {
  print("Input: " + s);
    s = preSimplify(s);
  L<S> l = tokensForSNL(s);
  print("Tokens: " + structure(l));
  
  processJoiners(l);
  processSplitters(l);
  addArrows(l);
    
  ret join(" ", l);
}

// just a preprocessing for NL (does not generate SNL)
static S preSimplify(S s) {
  ret join(" ", codeTokensOnly(/*wordTokensToLowerCase*/(nlTok(s))));
}

static void processJoiners(L<S> l) {
  for (int i = 0; i < l(l)-1; i++) {
    S x = l.get(i) + " " + l.get(i+1);
    
    if (joiners.contains(x)) {
      l.set(i, x);
      l.remove(i+1);
      --i;
    }
  }
}

static void processSplitters(L<S> l) {  
  for (int i = 0; i < l(l); i++) {
    S x = splitters.get(l.get(i));
    if (x != null) {
      L<S> tokens = tokensForSNL(x);
      l.remove(i);
      l.addAll(i, tokens);
      i += l(tokens)-1;
    }
  }
}

static void addArrows(L<S> l) {
  for (int i = 0; i < l(l)-1; i++) {
    if (isWord(l.get(i)) && isWord(l.get(i+1)))
      l.add(++i, "<");
  }
}

static boolean isWord(S s) {
  if (empty(s)) ret false;
  if (isQuoted(s)) ret true;
  
  char c = s.charAt(0);
  ret Character.isLetter(c) || c == '\'';
}

static boolean isSNL(S s) {
  L<S> tok = javaTok(s);
  ret tok.contains("<") || tok.contains("[");
}

static S snlSimplifyBrackets(S s) {
  ret snlFromTree(snlToTree(s));
}

download  show line numbers  debug dex  old transpilations   

Travelled to 13 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt

No comments. add comment

Snippet ID: #1002700
Snippet name: Translating English to Simplified NL (with first dialog!)
Eternal ID of this version: #1002700/1
Text MD5: 52cd59ceffa8113bb2323a105bd1f1d4
Transpilation MD5: eda7753c746b281cfcd4b3e35b39c3a6
Author: stefan
Category: nl bots
Type: JavaX source code
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2016-02-29 20:23:45
Source code size: 3764 bytes / 156 lines
Pitched / IR pitched: No / No
Views / Downloads: 820 / 1798
Referenced in: [show references]