Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

175
LINES

< > BotCompany Repo | #1002289 // An NL Parser, attempt 2 (developing)

JavaX source code [tags: use-pretranspiled] - run with: x30.jar

Libraryless. Click here for Pure Java version (1895L/13K/42K).

1  
// Idea: For every position, store the productions recognized to start there, then infer up to higher classes
2  
3  
!752
4  
5  
static class Recognized {
6  
  S className;
7  
  Pos endPos;
8  
  L<S> prod;
9  
  
10  
  *(S *className, Pos *endPos, L<S> *prod) {}
11  
  *() {}
12  
}
13  
14  
static MultiMap<Integer, Recognized> recog;
15  
16  
static MultiMap<S, L<S>> productionMap = new MultiMap;
17  
18  
static boolean debug = false;
19  
20  
p {
21  
  S rulesText = loadSnippet("#1002281");
22  
  S inputText = loadSnippet("#1002286") + "\n" + loadSnippet("#1002280");
23  
  S mainProd = "line";
24  
  
25  
  for (S rule : toLinesFullTrim(rulesText)) pcall {
26  
    printF("Processing rule: *", rule);
27  
    L<S> lr = splitAtJavaToken(rule, "=");
28  
    if (l(lr) != 2) {
29  
      print("Weird rule: " + rule);
30  
      continue;
31  
    }
32  
    S l = lr.get(0), r = lr.get(1);
33  
    L<S> tokr = javaTok(r);
34  
    assertEquals(structure(tokr), 3, l(tokr));
35  
    S className = assertIdentifier(get(tokr, 1));
36  
    L<S> tok = javaTok(l);
37  
    tok = mergeBracketThingies(tok);
38  
    printStructure(tok);
39  
    productionMap.put(className, tok);
40  
  }
41  
  
42  
  print(n(productionMap.size(), "production") + ".");
43  
  print();
44  
  
45  
  for (S line : toLinesFullTrim(inputText)) {
46  
    print(line);
47  
    L<S> tok = javaTok(line);
48  
    printStructure(tok);
49  
    Pos pos = new Pos(tok);
50  
    O x = parseTop(pos, mainProd);
51  
    if (x != null)
52  
      print("  parsed: " + structure(x));
53  
    else
54  
      print("  not parsed, stuff found: " + recogToString());
55  
  }
56  
}
57  
58  
static O parseTop(Pos pos, S mainProd) {
59  
  // init structures
60  
  recog = new MultiMap;
61  
62  
  boolean anyChange;
63  
  do {
64  
    anyChange = false;
65  
    for (int i = 1; i < l(pos.tok); i += 2) {
66  
      Pos pos2 = new Pos(pos.tok, i);
67  
      for (S className : productionMap.keySet()) {
68  
        if (getRecognition(pos2, className) != null) continue;
69  
        
70  
        L<L<S>> prods = productionMap.get(className);
71  
        for (L<S> prod : prods) {
72  
          Pos _pos = pos2.clone();
73  
          O x = matchProd(_pos, prod, className);
74  
          if (x != null) {
75  
            recog.put(pos2.i, new Recognized(className, _pos, prod));
76  
            if (debug)
77  
              print("new stuff at " + pos2.i + ": " + className);
78  
            anyChange = true;
79  
            break; // try next class - could also omit this
80  
          }
81  
        }
82  
      }
83  
    }
84  
  } while (anyChange);
85  
  
86  
  /*if (debug)
87  
    print("Stuff found: " + structure(recog));*/
88  
  
89  
  Recognized rec = getRecognition(pos, mainProd);
90  
  if (debug)
91  
    print("rec: " + structure(rec));
92  
  if (rec != null) {
93  
    copy(rec.endPos, pos);
94  
    ret rec.prod;
95  
  } else
96  
    ret null;
97  
}
98  
99  
static Recognized getRecognition(Pos pos, S className) {
100  
  for (Recognized r : recog.get(pos.i))
101  
    if (eq(r.className, className))
102  
      ret r;
103  
  ret null;
104  
}
105  
106  
static class Pos {
107  
  L<S> tok;
108  
  int i = 1;
109  
  
110  
  *() {}
111  
  *(L<S> *tok) {}
112  
  *(L<S> *tok, int *i) {}
113  
  
114  
  boolean end() { ret i >= l(tok)-1; }
115  
  public Pos clone() { ret new Pos(tok, i); }
116  
  public boolean equals(O o) {
117  
    if (!(o instanceof Pos)) ret false;
118  
    Pos pos = cast o;
119  
    ret tok == pos.tok && i == pos.i;
120  
  }
121  
  
122  
  S rest() {
123  
    ret join(subList(tok, i));
124  
  }
125  
}
126  
127  
static void copy(Pos a, Pos b) {
128  
  b.tok = a.tok;
129  
  b.i = a.i;
130  
}
131  
132  
static void debug(S bla, Pos pos) {
133  
  if (debug)
134  
    print(bla + " on " + quote(pos.rest()));
135  
}
136  
137  
static O matchProd(Pos pos, L<S> prod, S forClass) {
138  
  /*if (debug)
139  
    debug("matchProd " + structure(prod), pos);*/
140  
  
141  
  for (int i = 1; i < l(prod); i += 2) {
142  
    S p = prod.get(i);
143  
    S t = pos.tok.get(pos.i);
144  
    if (isBracketedID(p)) {
145  
      Recognized rec = getRecognition(pos, unbracket(p));
146  
      if (eq(forClass, "line"))
147  
        print("p=" + quote(p) + ", t=" + quote(t) + ", i=" + pos.i + ", rec= " + structure(rec));
148  
      if (rec == null)
149  
        ret null;
150  
      copy(rec.endPos, pos);
151  
      // keep parsing production
152  
    } else {
153  
      // it's a literal
154  
      if (pos.end()) ret null; // need a token to match
155  
      if (eq(p, "<quoted>")) {
156  
        if (!isQuoted(t)) ret null;
157  
      } else if (!(eq(p, "*") || eqic(p, t)))
158  
        ret null; // token mismatch
159  
      pos.i += 2; // consume & keep parsing
160  
    }
161  
  }
162  
  
163  
  /*if (debug)
164  
    debug("ok " + structure(prod), pos);*/
165  
  ret true; // production succeeded
166  
}
167  
168  
static S recogToString() {
169  
  new L<S> l;
170  
  for (int i : recog.keySet()) {
171  
    for (Recognized r : recog.get(i))
172  
      l.add(i + "/" + r.className);
173  
  }
174  
  ret join(", ", l);
175  
}

Author comment

Began life as a copy of #1002282

download  show line numbers  debug dex  old transpilations   

Travelled to 15 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, onxytkatvevr, pyentgdyhuwx, pzhvpgtvlbxg, teubizvjbppd, tslmcundralx, tvejysmllsmz, vouqrxazstgt

No comments. add comment

Snippet ID: #1002289
Snippet name: An NL Parser, attempt 2 (developing)
Eternal ID of this version: #1002289/1
Text MD5: 5ab08edd3201474b13957a4f70d88f79
Transpilation MD5: 6c57e5edca5371aa927302860bf61e2e
Author: stefan
Category: javax
Type: JavaX source code
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2016-01-03 01:49:16
Source code size: 4516 bytes / 175 lines
Pitched / IR pitched: No / Yes
Views / Downloads: 674 / 729
Referenced in: [show references]