Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

149
LINES

< > BotCompany Repo | #1002297 // An NL Parser, attempt 3 (works!)

JavaX source code [tags: use-pretranspiled] - run with: x30.jar

Libraryless. Click here for Pure Java version (1893L/13K/42K).

1  
// Idea: For every position, store the productions recognized to start there, then infer up to higher classes
2  
3  
!752
4  
5  
// a recognition is identified by (startPos, className, endPos)
6  
7  
8  
// key 1 = start position, key 2 = class name, value = end position
9  
static Map<Integer, MultiMap<S, Integer>> recog;
10  
11  
static MultiMap<S, L<S>> productionMap = new MultiMap;
12  
13  
static boolean debug = false;
14  
15  
p {
16  
  S rulesText = loadSnippet("#1002281");
17  
  S inputText = loadSnippet("#1002286") + "\n" + loadSnippet("#1002280");
18  
  S mainProd = "line";
19  
  
20  
  for (S rule : toLinesFullTrim(rulesText)) pcall {
21  
    //printF("Processing rule: *", rule);
22  
    L<S> lr = splitAtJavaToken(rule, "=");
23  
    if (l(lr) != 2) {
24  
      print("Weird rule: " + rule);
25  
      continue;
26  
    }
27  
    S l = lr.get(0), r = lr.get(1);
28  
    L<S> tokr = javaTok(r);
29  
    assertEquals(structure(tokr), 3, l(tokr));
30  
    S className = assertIdentifier(get(tokr, 1));
31  
    L<S> tok = javaTok(l);
32  
    tok = mergeBracketThingies(tok);
33  
    //printStructure(tok);
34  
    productionMap.put(className, tok);
35  
  }
36  
  
37  
  print(n(productionMap.size(), "production") + ".");
38  
  print();
39  
  
40  
  for (S line : toLinesFullTrim(inputText)) {
41  
    print();
42  
    print(line);
43  
    L<S> tok = javaTok(line);
44  
    //printStructure(tok);
45  
    Pos pos = new Pos(tok);
46  
    L<Integer> x = parseTop(pos, mainProd);
47  
    if (x.contains(l(tok)))
48  
      print("  parsed");
49  
    else if (!empty(x))
50  
      print("  beginning matches");
51  
    else
52  
      print("  not parsed");
53  
    print("  " + structure(recog));
54  
  }
55  
}
56  
57  
static L<Integer> parseTop(Pos pos, S mainProd) {
58  
  // init structures
59  
  recog = new TreeMap;
60  
  for (int i = pos.i; i < l(pos.tok); i += 2)
61  
    recog.put(i, new MultiMap);
62  
63  
  boolean anyChange;
64  
  do {
65  
    anyChange = false;
66  
    for (int i = pos.i; i < l(pos.tok); i += 2) {
67  
      Pos pos2 = new Pos(pos.tok, i);
68  
      for (S className : productionMap.keySet()) {
69  
        MultiMap<S, Integer> rr = recog.get(i);
70  
        L<Integer> recs = rr.getActual(className);
71  
        L<L<S>> prods = productionMap.get(className);
72  
        for (L<S> prod : prods) {
73  
          int n = l(recs);
74  
          matchProd(pos2, new Pos(prod), className, recs);
75  
          anyChange = anyChange || l(recs) > n;
76  
        }
77  
        rr.clean(className);
78  
      }
79  
    }
80  
  } while (anyChange);
81  
  
82  
  ret recog.get(pos.i).get(mainProd);
83  
}
84  
85  
static class Pos {
86  
  L<S> tok;
87  
  int i = 1;
88  
  
89  
  *() {}
90  
  *(L<S> *tok) {}
91  
  *(L<S> *tok, int *i) {}
92  
  
93  
  boolean end() { ret i >= l(tok)-1; }
94  
  S get() { ret tok.get(i); }
95  
  public Pos clone() { ret new Pos(tok, i); }
96  
  public boolean equals(O o) {
97  
    if (!(o instanceof Pos)) ret false;
98  
    Pos pos = cast o;
99  
    ret tok == pos.tok && i == pos.i;
100  
  }
101  
  
102  
  S rest() {
103  
    ret join(subList(tok, i));
104  
  }
105  
106  
  Pos plus(int x) { ret new Pos(tok, i + x); }
107  
}
108  
109  
static void copy(Pos a, Pos b) {
110  
  b.tok = a.tok;
111  
  b.i = a.i;
112  
}
113  
114  
static void debug(S bla, Pos pos) {
115  
  if (debug)
116  
    print(bla + " on " + quote(pos.rest()));
117  
}
118  
119  
static void matchProd(Pos pos, Pos prod, S forClass, L<Integer> out) {
120  
    if (prod.end()) {
121  
      if (!out.contains(pos.i))
122  
        out.add(pos.i);
123  
      ret;
124  
    }
125  
126  
    S p = prod.get();
127  
    
128  
    if (isBracketedID(p) && neq(p, "<quoted>")) {
129  
      
130  
      MultiMap<S, Integer> rr = recog.get(pos.i);
131  
      L<Integer> r = rr.get(unbracket(p));
132  
      
133  
      // keep parsing for every option
134  
135  
      for (int i : cloneList(r))
136  
        matchProd(new Pos(pos.tok, i), prod.plus(2), forClass, out);
137  
      
138  
    } else {
139  
      // it's a literal
140  
      if (pos.end()) ret; // need a token to match
141  
      S t = pos.get();
142  
      if (eq(p, "<quoted>")) {
143  
        if (!isQuoted(t)) ret;
144  
      } else if (!(eq(p, "*") || eqic(p, t)))
145  
        ret; // token mismatch
146  
      
147  
      matchProd(pos.plus(2), prod.plus(2), forClass, out);
148  
    }
149  
}

Author comment

Began life as a copy of #1002289

download  show line numbers  debug dex  old transpilations   

Travelled to 14 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, onxytkatvevr, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt

No comments. add comment

Snippet ID: #1002297
Snippet name: An NL Parser, attempt 3 (works!)
Eternal ID of this version: #1002297/1
Text MD5: 68aea122efd42b0997f652f7524a1e0c
Transpilation MD5: de2bbceffa659d7ef84d9adfa6c0617d
Author: stefan
Category: javax
Type: JavaX source code
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2016-01-03 20:38:35
Source code size: 3910 bytes / 149 lines
Pitched / IR pitched: No / Yes
Views / Downloads: 588 / 699
Referenced in: [show references]