Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

159
LINES

< > BotCompany Repo | #1002282 // An NL Parser (developing)

JavaX source code [tags: use-pretranspiled] - run with: x30.jar

Libraryless. Click here for Pure Java version (1862L/12K/42K).

1  
!752
2  
3  
static MultiMap<S, L<S>> productionMap = new MultiMap;
4  
5  
static boolean debug = false;
6  
7  
p {
8  
  S rulesText = loadSnippet("#1002281");
9  
  S inputText = loadSnippet("#1002286") + "\n" + loadSnippet("#1002280");
10  
  S mainProd = "line";
11  
  
12  
  for (S rule : toLinesFullTrim(rulesText)) pcall {
13  
    printF("Processing rule: *", rule);
14  
    L<S> lr = splitAtJavaToken(rule, "=");
15  
    if (l(lr) != 2) {
16  
      print("Weird rule: " + rule);
17  
      continue;
18  
    }
19  
    S l = lr.get(0), r = lr.get(1);
20  
    L<S> tokr = javaTok(r);
21  
    assertEquals(structure(tokr), 3, l(tokr));
22  
    S className = assertIdentifier(get(tokr, 1));
23  
    L<S> tok = javaTok(l);
24  
    tok = mergeBracketThingies(tok);
25  
    //printStructure(tok);
26  
    productionMap.put(className, tok);
27  
  }
28  
  
29  
  print(n(productionMap.size(), "production") + ".");
30  
  print();
31  
  
32  
  for (S line : toLinesFullTrim(inputText)) {
33  
    print(line);
34  
    L<S> tok = javaTok(line);
35  
    Pos pos = new Pos(tok);
36  
    if (parseClass(pos, mainProd) != null)
37  
      print("  parsed");
38  
    else
39  
      print("  not parsed");
40  
  }
41  
}
42  
43  
static class Pos {
44  
  L<S> tok;
45  
  int i = 1;
46  
  
47  
  *() {}
48  
  *(L<S> *tok) {}
49  
  *(L<S> *tok, int *i) {}
50  
  
51  
  boolean end() { ret i >= l(tok)-1; }
52  
  public Pos clone() { ret new Pos(tok, i); }
53  
  public boolean equals(O o) {
54  
    if (!(o instanceof Pos)) ret false;
55  
    Pos pos = cast o;
56  
    ret tok == pos.tok && i == pos.i;
57  
  }
58  
  
59  
  S rest() {
60  
    ret join(subList(tok, i));
61  
  }
62  
}
63  
64  
static void copy(Pos a, Pos b) {
65  
  b.tok = a.tok;
66  
  b.i = a.i;
67  
}
68  
69  
static void debug(S bla, Pos pos) {
70  
  if (debug)
71  
    print(bla + " on " + quote(pos.rest()));
72  
}
73  
74  
// endless loop detector
75  
static Pos haltPos;
76  
static new HashSet<S> haltClasses;
77  
78  
static O parseClass(Pos pos, S name) {
79  
  if (debug) debug("parseClass " + name, pos);
80  
  if (checkHalt(pos, name)) ret null;
81  
  L<L<S>> prods = productionMap.get(name);
82  
  if (empty(prods)) ret null; // weird, unknown class name
83  
  
84  
  for (L<S> prod : prods) {
85  
    Pos _pos = pos.clone();
86  
    O x = parseProd(_pos, prod);
87  
    if (x != null) { copy(_pos, pos); ret x; }
88  
  }
89  
  
90  
  ret null;
91  
}
92  
93  
// returns true if we should halt because of endless looping
94  
static boolean checkHalt(Pos pos, S className) {
95  
  if (!eq(haltPos, pos)) {
96  
    haltPos = pos.clone();
97  
    haltClasses = lithashset(className);
98  
    return false;
99  
  } else {
100  
    if (haltClasses.contains(className)) {
101  
      if (debug)
102  
        print("Endless loop: " + structure(pos) + " " + structure(haltClasses));
103  
      ret true;
104  
    } else {
105  
      haltClasses.add(className);
106  
      print("checkHalt: same pos, classes now: " + structure(haltClasses));
107  
      ret false;
108  
    }
109  
  }
110  
}
111  
112  
static O parseProd(Pos pos, L<S> prod) {
113  
  if (debug)
114  
    debug("parseProd " + structure(prod), pos);
115  
  
116  
  for (int i = 1; i < l(prod); i += 2) {
117  
    S p = prod.get(i);
118  
    S t = pos.tok.get(pos.i);
119  
    if (isBracketedID(p)) {
120  
      Pos _pos = pos.clone();
121  
      O x = parseClass(_pos, unbracket(p));
122  
      if (x == null) ret null;
123  
      copy(_pos, pos);
124  
      // keep parsing production
125  
    } else {
126  
      // it's a literal
127  
      if (pos.end()) ret null; // need a token to match
128  
      if (!(eq(p, "*") || eqic(p, t)))
129  
        ret null; // token mismatch
130  
      pos.i += 2; // consume & keep parsing
131  
    }
132  
  }
133  
  
134  
  if (debug)
135  
    debug("ok " + structure(prod), pos);
136  
  ret true; // production succeeded
137  
}
138  
139  
static boolean isBracketedID(S s) {
140  
  ret s.startsWith("<") && s.endsWith(">");
141  
}
142  
143  
static S unbracket(S s) {
144  
  ret isBracketedID(s) ? s.substring(1, l(s)-1) : s;
145  
}
146  
147  
// angle bracket things like <quoted>
148  
static L<S> mergeBracketThingies(L<S> tok) {
149  
  tok = cloneList(tok);
150  
  for (int i = 1; i+4 < l(tok); i += 2)
151  
    if (eq(get(tok, i), "<") && eq(get(tok, i+1), "") && isIdentifier(get(tok, i+2)) && eq(get(tok, i+3), "") && eq(get(tok, i+4), ">")) {
152  
      tok.set(i, "<" + tok.get(i+2) + ">");
153  
      tok.remove(i+4);
154  
      tok.remove(i+3);
155  
      tok.remove(i+2);
156  
      tok.remove(i+1);
157  
    }
158  
  ret tok;
159  
}

download  show line numbers  debug dex  old transpilations   

Travelled to 15 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, onxytkatvevr, pyentgdyhuwx, pzhvpgtvlbxg, teubizvjbppd, tslmcundralx, tvejysmllsmz, vouqrxazstgt

No comments. add comment

Snippet ID: #1002282
Snippet name: An NL Parser (developing)
Eternal ID of this version: #1002282/1
Text MD5: 4fd683174302221441cdb839c8e2412e
Transpilation MD5: 8886016cc6fb751a4b2a57ffd6937c5c
Author: stefan
Category: javax
Type: JavaX source code
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2016-01-02 20:35:13
Source code size: 4074 bytes / 159 lines
Pitched / IR pitched: No / Yes
Views / Downloads: 774 / 866
Referenced in: [show references]