Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

198
LINES

< > BotCompany Repo | #1002302 // An NL Parser, attempt 4, with analysis (developing)

JavaX source code [tags: use-pretranspiled] - run with: x30.jar

Libraryless. Click here for Pure Java version (1953L/13K/43K).

1  
// Idea: For every position, store the productions recognized to start there, then infer up to higher classes
2  
3  
!752
4  
5  
// a recognition is identified by (startPos, className, endPos)
6  
7  
8  
// key 1 = start position, key 2 = class name, value = end position
9  
static Map<Integer, MultiMap<S, Integer>> recog;
10  
11  
static MultiMap<S, L<S>> productionMap = new MultiMap;
12  
13  
static boolean debug = false;
14  
15  
p {
16  
  S rulesText = loadSnippet("#1002281");
17  
  S inputText = loadSnippet("#1002286") + "\n" + loadSnippet("#1002280");
18  
  S mainProd = "line";
19  
  
20  
  for (S rule : toLinesFullTrim(rulesText)) pcall {
21  
    //printF("Processing rule: *", rule);
22  
    L<S> lr = splitAtJavaToken(rule, "=");
23  
    if (l(lr) != 2) {
24  
      print("Weird rule: " + rule);
25  
      continue;
26  
    }
27  
    S l = lr.get(0), r = lr.get(1);
28  
    L<S> tokr = javaTok(r);
29  
    assertEquals(structure(tokr), 3, l(tokr));
30  
    S className = assertIdentifier(get(tokr, 1));
31  
    L<S> tok = javaTok(l);
32  
    tok = mergeBracketThingies(tok);
33  
    //printStructure(tok);
34  
    productionMap.put(className, tok);
35  
  }
36  
  
37  
  print(n(productionMap.size(), "production") + ".");
38  
  print();
39  
  
40  
  for (S line : toLinesFullTrim(inputText)) {
41  
    print();
42  
    print(line);
43  
    L<S> tok = javaTok(line);
44  
    //printStructure(tok);
45  
    Pos pos = new Pos(tok);
46  
    L<Integer> x = parseTop(pos, mainProd);
47  
    if (x.contains(l(tok)))
48  
      print("  parsed");
49  
    else if (!empty(x))
50  
      print("  beginning matches");
51  
    else
52  
      print("  not parsed");
53  
    print("  " + structure(recog));
54  
    if (!empty(x)) {
55  
      L out = explainMatch(new Pos(tok), x.get(0), mainProd);
56  
      print("Analysis: " + structure(out));
57  
    }
58  
  }
59  
}
60  
61  
static L<Integer> parseTop(Pos pos, S mainProd) {
62  
  // init structures
63  
  recog = new TreeMap;
64  
  for (int i = pos.i; i < l(pos.tok); i += 2)
65  
    recog.put(i, new MultiMap);
66  
67  
  boolean anyChange;
68  
  do {
69  
    anyChange = false;
70  
    for (int i = pos.i; i < l(pos.tok); i += 2) {
71  
      Pos pos2 = new Pos(pos.tok, i);
72  
      for (S className : productionMap.keySet()) {
73  
        MultiMap<S, Integer> rr = recog.get(i);
74  
        L<Integer> recs = rr.getActual(className);
75  
        L<L<S>> prods = productionMap.get(className);
76  
        for (L<S> prod : prods) {
77  
          int n = l(recs);
78  
          matchProd(pos2, new Pos(prod), className, recs);
79  
          anyChange = anyChange || l(recs) > n;
80  
        }
81  
        rr.clean(className);
82  
      }
83  
    }
84  
  } while (anyChange);
85  
  
86  
  ret recog.get(pos.i).get(mainProd);
87  
}
88  
89  
static class Pos {
90  
  L<S> tok;
91  
  int i = 1;
92  
  
93  
  *() {}
94  
  *(L<S> *tok) {}
95  
  *(L<S> *tok, int *i) {}
96  
  
97  
  boolean end() { ret i >= l(tok)-1; }
98  
  S get() { ret tok.get(i); }
99  
  public Pos clone() { ret new Pos(tok, i); }
100  
  public boolean equals(O o) {
101  
    if (!(o instanceof Pos)) ret false;
102  
    Pos pos = cast o;
103  
    ret tok == pos.tok && i == pos.i;
104  
  }
105  
  
106  
  S rest() {
107  
    ret join(subList(tok, i));
108  
  }
109  
110  
  Pos plus(int x) { ret new Pos(tok, i + x); }
111  
}
112  
113  
static void copy(Pos a, Pos b) {
114  
  b.tok = a.tok;
115  
  b.i = a.i;
116  
}
117  
118  
static void debug(S bla, Pos pos) {
119  
  if (debug)
120  
    print(bla + " on " + quote(pos.rest()));
121  
}
122  
123  
static void matchProd(Pos pos, Pos prod, S forClass, L<Integer> out) {
124  
  if (prod.end())
125  
    setAdd(out, pos.i);
126  
  else {
127  
    S p = prod.get();
128  
    
129  
    if (isBracketedID(p) && neq(p, "<quoted>")) {
130  
      L<Integer> r = recog.get(pos.i).get(unbracket(p));
131  
      
132  
      // keep parsing for every option
133  
  
134  
      for (int i : cloneList(r))
135  
        matchProd(new Pos(pos.tok, i), prod.plus(2), forClass, out);
136  
      
137  
    } else {
138  
      // it's a literal
139  
      if (pos.end()) ret; // need a token to match
140  
      S t = pos.get();
141  
      if (!matchToken(p, t))
142  
        ret;
143  
      
144  
      matchProd(pos.plus(2), prod.plus(2), forClass, out);
145  
    }
146  
  }
147  
}
148  
149  
static boolean matchToken(S p, S t) {
150  
  if (eq(p, "<quoted>")) {
151  
    if (!isQuoted(t)) ret false;
152  
  } else if (!(eq(p, "*") || eqic(p, t)))
153  
    ret false; // token mismatch
154  
  ret true;
155  
}
156  
157  
// assumes that there is a match (pos, class, endPos)
158  
// and gives explanations of how it was probably matched
159  
static L explainMatch(Pos pos, int endPos, S forClass) {
160  
  L<L<S>> prods = productionMap.get(forClass);
161  
  new L out;
162  
  for (L<S> prod : prods)
163  
    explainMatch2(pos, new Pos(prod), endPos, forClass, litlist(), out);
164  
  ret out;
165  
}
166  
167  
// same, but with fixed production
168  
static void explainMatch2(Pos pos, Pos prod, int endPos, S forClass, L match, L out) {
169  
  if (prod.end()) {
170  
    if (pos.i == endPos)
171  
      out.add(cloneList(match));
172  
  } else {
173  
    S p = prod.get();
174  
    
175  
    if (isBracketedID(p) && neq(p, "<quoted>")) {
176  
      S className = unbracket(p);
177  
      L<Integer> r = recog.get(pos.i).get(className);
178  
179  
      // keep parsing for every option
180  
  
181  
      for (int i : cloneList(r)) {
182  
        match.add(litlist(pos.i, i, className));
183  
        explainMatch2(new Pos(pos.tok, i), prod.plus(2), endPos, forClass, match, out);
184  
        removeLast(match);
185  
      }
186  
    } else {
187  
      // it's a literal
188  
      if (pos.end()) ret; // need a token to match
189  
      S t = pos.get();
190  
      if (!matchToken(p, t))
191  
        ret;
192  
      
193  
      match.add(litlist(pos.i, p, pos.i+2));
194  
      explainMatch2(pos.plus(2), prod.plus(2), endPos, forClass, match, out);
195  
      removeLast(match);
196  
    }
197  
  }
198  
}

Author comment

Began life as a copy of #1002297

download  show line numbers  debug dex  old transpilations   

Travelled to 15 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, onxytkatvevr, pyentgdyhuwx, pzhvpgtvlbxg, teubizvjbppd, tslmcundralx, tvejysmllsmz, vouqrxazstgt

No comments. add comment

Snippet ID: #1002302
Snippet name: An NL Parser, attempt 4, with analysis (developing)
Eternal ID of this version: #1002302/1
Text MD5: 840669e0033495e85d17458ef245b4b7
Transpilation MD5: f416d0536e0d52a0c12cc9bf443ba40a
Author: stefan
Category: javax
Type: JavaX source code
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2016-01-04 00:23:15
Source code size: 5391 bytes / 198 lines
Pitched / IR pitched: No / Yes
Views / Downloads: 589 / 615
Referenced in: [show references]