Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

175
LINES

< > BotCompany Repo | #1002289 // An NL Parser, attempt 2 (developing)

JavaX source code [tags: use-pretranspiled] - run with: x30.jar

Libraryless. Click here for Pure Java version (1895L/13K/42K).

// Idea: For every position, store the productions recognized to start there, then infer up to higher classes

!752

static class Recognized {
  S className;
  Pos endPos;
  L<S> prod;
  
  *(S *className, Pos *endPos, L<S> *prod) {}
  *() {}
}

static MultiMap<Integer, Recognized> recog;

static MultiMap<S, L<S>> productionMap = new MultiMap;

static boolean debug = false;

p {
  S rulesText = loadSnippet("#1002281");
  S inputText = loadSnippet("#1002286") + "\n" + loadSnippet("#1002280");
  S mainProd = "line";
  
  for (S rule : toLinesFullTrim(rulesText)) pcall {
    printF("Processing rule: *", rule);
    L<S> lr = splitAtJavaToken(rule, "=");
    if (l(lr) != 2) {
      print("Weird rule: " + rule);
      continue;
    }
    S l = lr.get(0), r = lr.get(1);
    L<S> tokr = javaTok(r);
    assertEquals(structure(tokr), 3, l(tokr));
    S className = assertIdentifier(get(tokr, 1));
    L<S> tok = javaTok(l);
    tok = mergeBracketThingies(tok);
    printStructure(tok);
    productionMap.put(className, tok);
  }
  
  print(n(productionMap.size(), "production") + ".");
  print();
  
  for (S line : toLinesFullTrim(inputText)) {
    print(line);
    L<S> tok = javaTok(line);
    printStructure(tok);
    Pos pos = new Pos(tok);
    O x = parseTop(pos, mainProd);
    if (x != null)
      print("  parsed: " + structure(x));
    else
      print("  not parsed, stuff found: " + recogToString());
  }
}

static O parseTop(Pos pos, S mainProd) {
  // init structures
  recog = new MultiMap;

  boolean anyChange;
  do {
    anyChange = false;
    for (int i = 1; i < l(pos.tok); i += 2) {
      Pos pos2 = new Pos(pos.tok, i);
      for (S className : productionMap.keySet()) {
        if (getRecognition(pos2, className) != null) continue;
        
        L<L<S>> prods = productionMap.get(className);
        for (L<S> prod : prods) {
          Pos _pos = pos2.clone();
          O x = matchProd(_pos, prod, className);
          if (x != null) {
            recog.put(pos2.i, new Recognized(className, _pos, prod));
            if (debug)
              print("new stuff at " + pos2.i + ": " + className);
            anyChange = true;
            break; // try next class - could also omit this
          }
        }
      }
    }
  } while (anyChange);
  
  /*if (debug)
    print("Stuff found: " + structure(recog));*/
  
  Recognized rec = getRecognition(pos, mainProd);
  if (debug)
    print("rec: " + structure(rec));
  if (rec != null) {
    copy(rec.endPos, pos);
    ret rec.prod;
  } else
    ret null;
}

static Recognized getRecognition(Pos pos, S className) {
  for (Recognized r : recog.get(pos.i))
    if (eq(r.className, className))
      ret r;
  ret null;
}

static class Pos {
  L<S> tok;
  int i = 1;
  
  *() {}
  *(L<S> *tok) {}
  *(L<S> *tok, int *i) {}
  
  boolean end() { ret i >= l(tok)-1; }
  public Pos clone() { ret new Pos(tok, i); }
  public boolean equals(O o) {
    if (!(o instanceof Pos)) ret false;
    Pos pos = cast o;
    ret tok == pos.tok && i == pos.i;
  }
  
  S rest() {
    ret join(subList(tok, i));
  }
}

static void copy(Pos a, Pos b) {
  b.tok = a.tok;
  b.i = a.i;
}

static void debug(S bla, Pos pos) {
  if (debug)
    print(bla + " on " + quote(pos.rest()));
}

static O matchProd(Pos pos, L<S> prod, S forClass) {
  /*if (debug)
    debug("matchProd " + structure(prod), pos);*/
  
  for (int i = 1; i < l(prod); i += 2) {
    S p = prod.get(i);
    S t = pos.tok.get(pos.i);
    if (isBracketedID(p)) {
      Recognized rec = getRecognition(pos, unbracket(p));
      if (eq(forClass, "line"))
        print("p=" + quote(p) + ", t=" + quote(t) + ", i=" + pos.i + ", rec= " + structure(rec));
      if (rec == null)
        ret null;
      copy(rec.endPos, pos);
      // keep parsing production
    } else {
      // it's a literal
      if (pos.end()) ret null; // need a token to match
      if (eq(p, "<quoted>")) {
        if (!isQuoted(t)) ret null;
      } else if (!(eq(p, "*") || eqic(p, t)))
        ret null; // token mismatch
      pos.i += 2; // consume & keep parsing
    }
  }
  
  /*if (debug)
    debug("ok " + structure(prod), pos);*/
  ret true; // production succeeded
}

static S recogToString() {
  new L<S> l;
  for (int i : recog.keySet()) {
    for (Recognized r : recog.get(i))
      l.add(i + "/" + r.className);
  }
  ret join(", ", l);
}

Author comment

Began life as a copy of #1002282

download  show line numbers  debug dex  old transpilations   

Travelled to 15 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, onxytkatvevr, pyentgdyhuwx, pzhvpgtvlbxg, teubizvjbppd, tslmcundralx, tvejysmllsmz, vouqrxazstgt

No comments. add comment

Snippet ID: #1002289
Snippet name: An NL Parser, attempt 2 (developing)
Eternal ID of this version: #1002289/1
Text MD5: 5ab08edd3201474b13957a4f70d88f79
Transpilation MD5: 6c57e5edca5371aa927302860bf61e2e
Author: stefan
Category: javax
Type: JavaX source code
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2016-01-03 01:49:16
Source code size: 4516 bytes / 175 lines
Pitched / IR pitched: No / Yes
Views / Downloads: 586 / 620
Referenced in: [show references]