Libraryless. Click here for Pure Java version (1895L/13K/42K).
1 | // Idea: For every position, store the productions recognized to start there, then infer up to higher classes |
2 | |
3 | !752 |
4 | |
5 | static class Recognized {
|
6 | S className; |
7 | Pos endPos; |
8 | L<S> prod; |
9 | |
10 | *(S *className, Pos *endPos, L<S> *prod) {}
|
11 | *() {}
|
12 | } |
13 | |
14 | static MultiMap<Integer, Recognized> recog; |
15 | |
16 | static MultiMap<S, L<S>> productionMap = new MultiMap; |
17 | |
18 | static boolean debug = false; |
19 | |
20 | p {
|
21 | S rulesText = loadSnippet("#1002281");
|
22 | S inputText = loadSnippet("#1002286") + "\n" + loadSnippet("#1002280");
|
23 | S mainProd = "line"; |
24 | |
25 | for (S rule : toLinesFullTrim(rulesText)) pcall {
|
26 | printF("Processing rule: *", rule);
|
27 | L<S> lr = splitAtJavaToken(rule, "="); |
28 | if (l(lr) != 2) {
|
29 | print("Weird rule: " + rule);
|
30 | continue; |
31 | } |
32 | S l = lr.get(0), r = lr.get(1); |
33 | L<S> tokr = javaTok(r); |
34 | assertEquals(structure(tokr), 3, l(tokr)); |
35 | S className = assertIdentifier(get(tokr, 1)); |
36 | L<S> tok = javaTok(l); |
37 | tok = mergeBracketThingies(tok); |
38 | printStructure(tok); |
39 | productionMap.put(className, tok); |
40 | } |
41 | |
42 | print(n(productionMap.size(), "production") + "."); |
43 | print(); |
44 | |
45 | for (S line : toLinesFullTrim(inputText)) {
|
46 | print(line); |
47 | L<S> tok = javaTok(line); |
48 | printStructure(tok); |
49 | Pos pos = new Pos(tok); |
50 | O x = parseTop(pos, mainProd); |
51 | if (x != null) |
52 | print(" parsed: " + structure(x));
|
53 | else |
54 | print(" not parsed, stuff found: " + recogToString());
|
55 | } |
56 | } |
57 | |
58 | static O parseTop(Pos pos, S mainProd) {
|
59 | // init structures |
60 | recog = new MultiMap; |
61 | |
62 | boolean anyChange; |
63 | do {
|
64 | anyChange = false; |
65 | for (int i = 1; i < l(pos.tok); i += 2) {
|
66 | Pos pos2 = new Pos(pos.tok, i); |
67 | for (S className : productionMap.keySet()) {
|
68 | if (getRecognition(pos2, className) != null) continue; |
69 | |
70 | L<L<S>> prods = productionMap.get(className); |
71 | for (L<S> prod : prods) {
|
72 | Pos _pos = pos2.clone(); |
73 | O x = matchProd(_pos, prod, className); |
74 | if (x != null) {
|
75 | recog.put(pos2.i, new Recognized(className, _pos, prod)); |
76 | if (debug) |
77 | print("new stuff at " + pos2.i + ": " + className);
|
78 | anyChange = true; |
79 | break; // try next class - could also omit this |
80 | } |
81 | } |
82 | } |
83 | } |
84 | } while (anyChange); |
85 | |
86 | /*if (debug) |
87 | print("Stuff found: " + structure(recog));*/
|
88 | |
89 | Recognized rec = getRecognition(pos, mainProd); |
90 | if (debug) |
91 | print("rec: " + structure(rec));
|
92 | if (rec != null) {
|
93 | copy(rec.endPos, pos); |
94 | ret rec.prod; |
95 | } else |
96 | ret null; |
97 | } |
98 | |
99 | static Recognized getRecognition(Pos pos, S className) {
|
100 | for (Recognized r : recog.get(pos.i)) |
101 | if (eq(r.className, className)) |
102 | ret r; |
103 | ret null; |
104 | } |
105 | |
106 | static class Pos {
|
107 | L<S> tok; |
108 | int i = 1; |
109 | |
110 | *() {}
|
111 | *(L<S> *tok) {}
|
112 | *(L<S> *tok, int *i) {}
|
113 | |
114 | boolean end() { ret i >= l(tok)-1; }
|
115 | public Pos clone() { ret new Pos(tok, i); }
|
116 | public boolean equals(O o) {
|
117 | if (!(o instanceof Pos)) ret false; |
118 | Pos pos = cast o; |
119 | ret tok == pos.tok && i == pos.i; |
120 | } |
121 | |
122 | S rest() {
|
123 | ret join(subList(tok, i)); |
124 | } |
125 | } |
126 | |
127 | static void copy(Pos a, Pos b) {
|
128 | b.tok = a.tok; |
129 | b.i = a.i; |
130 | } |
131 | |
132 | static void debug(S bla, Pos pos) {
|
133 | if (debug) |
134 | print(bla + " on " + quote(pos.rest())); |
135 | } |
136 | |
137 | static O matchProd(Pos pos, L<S> prod, S forClass) {
|
138 | /*if (debug) |
139 | debug("matchProd " + structure(prod), pos);*/
|
140 | |
141 | for (int i = 1; i < l(prod); i += 2) {
|
142 | S p = prod.get(i); |
143 | S t = pos.tok.get(pos.i); |
144 | if (isBracketedID(p)) {
|
145 | Recognized rec = getRecognition(pos, unbracket(p)); |
146 | if (eq(forClass, "line")) |
147 | print("p=" + quote(p) + ", t=" + quote(t) + ", i=" + pos.i + ", rec= " + structure(rec));
|
148 | if (rec == null) |
149 | ret null; |
150 | copy(rec.endPos, pos); |
151 | // keep parsing production |
152 | } else {
|
153 | // it's a literal |
154 | if (pos.end()) ret null; // need a token to match |
155 | if (eq(p, "<quoted>")) {
|
156 | if (!isQuoted(t)) ret null; |
157 | } else if (!(eq(p, "*") || eqic(p, t))) |
158 | ret null; // token mismatch |
159 | pos.i += 2; // consume & keep parsing |
160 | } |
161 | } |
162 | |
163 | /*if (debug) |
164 | debug("ok " + structure(prod), pos);*/
|
165 | ret true; // production succeeded |
166 | } |
167 | |
168 | static S recogToString() {
|
169 | new L<S> l; |
170 | for (int i : recog.keySet()) {
|
171 | for (Recognized r : recog.get(i)) |
172 | l.add(i + "/" + r.className); |
173 | } |
174 | ret join(", ", l);
|
175 | } |
Began life as a copy of #1002282
download show line numbers debug dex old transpilations
Travelled to 15 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, onxytkatvevr, pyentgdyhuwx, pzhvpgtvlbxg, teubizvjbppd, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
| Snippet ID: | #1002289 |
| Snippet name: | An NL Parser, attempt 2 (developing) |
| Eternal ID of this version: | #1002289/1 |
| Text MD5: | 5ab08edd3201474b13957a4f70d88f79 |
| Transpilation MD5: | 6c57e5edca5371aa927302860bf61e2e |
| Author: | stefan |
| Category: | javax |
| Type: | JavaX source code |
| Public (visible to everyone): | Yes |
| Archived (hidden from active list): | No |
| Created/modified: | 2016-01-03 01:49:16 |
| Source code size: | 4516 bytes / 175 lines |
| Pitched / IR pitched: | No / Yes |
| Views / Downloads: | 955 / 1071 |
| Referenced in: | [show references] |