Libraryless. Click here for Pure Java version (1953L/13K/43K).
1 | // Idea: For every position, store the productions recognized to start there, then infer up to higher classes |
2 | |
3 | !752 |
4 | |
5 | // a recognition is identified by (startPos, className, endPos) |
6 | |
7 | |
8 | // key 1 = start position, key 2 = class name, value = end position |
9 | static Map<Integer, MultiMap<S, Integer>> recog; |
10 | |
11 | static MultiMap<S, L<S>> productionMap = new MultiMap; |
12 | |
13 | static boolean debug = false; |
14 | |
15 | p { |
16 | S rulesText = loadSnippet("#1002281"); |
17 | S inputText = loadSnippet("#1002286") + "\n" + loadSnippet("#1002280"); |
18 | S mainProd = "line"; |
19 | |
20 | for (S rule : toLinesFullTrim(rulesText)) pcall { |
21 | //printF("Processing rule: *", rule); |
22 | L<S> lr = splitAtJavaToken(rule, "="); |
23 | if (l(lr) != 2) { |
24 | print("Weird rule: " + rule); |
25 | continue; |
26 | } |
27 | S l = lr.get(0), r = lr.get(1); |
28 | L<S> tokr = javaTok(r); |
29 | assertEquals(structure(tokr), 3, l(tokr)); |
30 | S className = assertIdentifier(get(tokr, 1)); |
31 | L<S> tok = javaTok(l); |
32 | tok = mergeBracketThingies(tok); |
33 | //printStructure(tok); |
34 | productionMap.put(className, tok); |
35 | } |
36 | |
37 | print(n(productionMap.size(), "production") + "."); |
38 | print(); |
39 | |
40 | for (S line : toLinesFullTrim(inputText)) { |
41 | print(); |
42 | print(line); |
43 | L<S> tok = javaTok(line); |
44 | //printStructure(tok); |
45 | Pos pos = new Pos(tok); |
46 | L<Integer> x = parseTop(pos, mainProd); |
47 | if (x.contains(l(tok))) |
48 | print(" parsed"); |
49 | else if (!empty(x)) |
50 | print(" beginning matches"); |
51 | else |
52 | print(" not parsed"); |
53 | print(" " + structure(recog)); |
54 | if (!empty(x)) { |
55 | L out = explainMatch(new Pos(tok), x.get(0), mainProd); |
56 | print("Analysis: " + structure(out)); |
57 | } |
58 | } |
59 | } |
60 | |
61 | static L<Integer> parseTop(Pos pos, S mainProd) { |
62 | // init structures |
63 | recog = new TreeMap; |
64 | for (int i = pos.i; i < l(pos.tok); i += 2) |
65 | recog.put(i, new MultiMap); |
66 | |
67 | boolean anyChange; |
68 | do { |
69 | anyChange = false; |
70 | for (int i = pos.i; i < l(pos.tok); i += 2) { |
71 | Pos pos2 = new Pos(pos.tok, i); |
72 | for (S className : productionMap.keySet()) { |
73 | MultiMap<S, Integer> rr = recog.get(i); |
74 | L<Integer> recs = rr.getActual(className); |
75 | L<L<S>> prods = productionMap.get(className); |
76 | for (L<S> prod : prods) { |
77 | int n = l(recs); |
78 | matchProd(pos2, new Pos(prod), className, recs); |
79 | anyChange = anyChange || l(recs) > n; |
80 | } |
81 | rr.clean(className); |
82 | } |
83 | } |
84 | } while (anyChange); |
85 | |
86 | ret recog.get(pos.i).get(mainProd); |
87 | } |
88 | |
89 | static class Pos { |
90 | L<S> tok; |
91 | int i = 1; |
92 | |
93 | *() {} |
94 | *(L<S> *tok) {} |
95 | *(L<S> *tok, int *i) {} |
96 | |
97 | boolean end() { ret i >= l(tok)-1; } |
98 | S get() { ret tok.get(i); } |
99 | public Pos clone() { ret new Pos(tok, i); } |
100 | public boolean equals(O o) { |
101 | if (!(o instanceof Pos)) ret false; |
102 | Pos pos = cast o; |
103 | ret tok == pos.tok && i == pos.i; |
104 | } |
105 | |
106 | S rest() { |
107 | ret join(subList(tok, i)); |
108 | } |
109 | |
110 | Pos plus(int x) { ret new Pos(tok, i + x); } |
111 | } |
112 | |
113 | static void copy(Pos a, Pos b) { |
114 | b.tok = a.tok; |
115 | b.i = a.i; |
116 | } |
117 | |
118 | static void debug(S bla, Pos pos) { |
119 | if (debug) |
120 | print(bla + " on " + quote(pos.rest())); |
121 | } |
122 | |
123 | static void matchProd(Pos pos, Pos prod, S forClass, L<Integer> out) { |
124 | if (prod.end()) |
125 | setAdd(out, pos.i); |
126 | else { |
127 | S p = prod.get(); |
128 | |
129 | if (isBracketedID(p) && neq(p, "<quoted>")) { |
130 | L<Integer> r = recog.get(pos.i).get(unbracket(p)); |
131 | |
132 | // keep parsing for every option |
133 | |
134 | for (int i : cloneList(r)) |
135 | matchProd(new Pos(pos.tok, i), prod.plus(2), forClass, out); |
136 | |
137 | } else { |
138 | // it's a literal |
139 | if (pos.end()) ret; // need a token to match |
140 | S t = pos.get(); |
141 | if (!matchToken(p, t)) |
142 | ret; |
143 | |
144 | matchProd(pos.plus(2), prod.plus(2), forClass, out); |
145 | } |
146 | } |
147 | } |
148 | |
149 | static boolean matchToken(S p, S t) { |
150 | if (eq(p, "<quoted>")) { |
151 | if (!isQuoted(t)) ret false; |
152 | } else if (!(eq(p, "*") || eqic(p, t))) |
153 | ret false; // token mismatch |
154 | ret true; |
155 | } |
156 | |
157 | // assumes that there is a match (pos, class, endPos) |
158 | // and gives explanations of how it was probably matched |
159 | static L explainMatch(Pos pos, int endPos, S forClass) { |
160 | L<L<S>> prods = productionMap.get(forClass); |
161 | new L out; |
162 | for (L<S> prod : prods) |
163 | explainMatch2(pos, new Pos(prod), endPos, forClass, litlist(), out); |
164 | ret out; |
165 | } |
166 | |
167 | // same, but with fixed production |
168 | static void explainMatch2(Pos pos, Pos prod, int endPos, S forClass, L match, L out) { |
169 | if (prod.end()) { |
170 | if (pos.i == endPos) |
171 | out.add(cloneList(match)); |
172 | } else { |
173 | S p = prod.get(); |
174 | |
175 | if (isBracketedID(p) && neq(p, "<quoted>")) { |
176 | S className = unbracket(p); |
177 | L<Integer> r = recog.get(pos.i).get(className); |
178 | |
179 | // keep parsing for every option |
180 | |
181 | for (int i : cloneList(r)) { |
182 | match.add(litlist(pos.i, i, className)); |
183 | explainMatch2(new Pos(pos.tok, i), prod.plus(2), endPos, forClass, match, out); |
184 | removeLast(match); |
185 | } |
186 | } else { |
187 | // it's a literal |
188 | if (pos.end()) ret; // need a token to match |
189 | S t = pos.get(); |
190 | if (!matchToken(p, t)) |
191 | ret; |
192 | |
193 | match.add(litlist(pos.i, p, pos.i+2)); |
194 | explainMatch2(pos.plus(2), prod.plus(2), endPos, forClass, match, out); |
195 | removeLast(match); |
196 | } |
197 | } |
198 | } |
Began life as a copy of #1002297
download show line numbers debug dex old transpilations
Travelled to 15 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, onxytkatvevr, pyentgdyhuwx, pzhvpgtvlbxg, teubizvjbppd, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
Snippet ID: | #1002302 |
Snippet name: | An NL Parser, attempt 4, with analysis (developing) |
Eternal ID of this version: | #1002302/1 |
Text MD5: | 840669e0033495e85d17458ef245b4b7 |
Transpilation MD5: | f416d0536e0d52a0c12cc9bf443ba40a |
Author: | stefan |
Category: | javax |
Type: | JavaX source code |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2016-01-04 00:23:15 |
Source code size: | 5391 bytes / 198 lines |
Pitched / IR pitched: | No / Yes |
Views / Downloads: | 581 / 604 |
Referenced in: | [show references] |