Libraryless. Click here for Pure Java version (1893L/13K/42K).
1 | // Idea: For every position, store the productions recognized to start there, then infer up to higher classes |
2 | |
3 | !752 |
4 | |
5 | // a recognition is identified by (startPos, className, endPos) |
6 | |
7 | |
8 | // key 1 = start position, key 2 = class name, value = end position |
9 | static Map<Integer, MultiMap<S, Integer>> recog; |
10 | |
11 | static MultiMap<S, L<S>> productionMap = new MultiMap; |
12 | |
13 | static boolean debug = false; |
14 | |
15 | p {
|
16 | S rulesText = loadSnippet("#1002281");
|
17 | S inputText = loadSnippet("#1002286") + "\n" + loadSnippet("#1002280");
|
18 | S mainProd = "line"; |
19 | |
20 | for (S rule : toLinesFullTrim(rulesText)) pcall {
|
21 | //printF("Processing rule: *", rule);
|
22 | L<S> lr = splitAtJavaToken(rule, "="); |
23 | if (l(lr) != 2) {
|
24 | print("Weird rule: " + rule);
|
25 | continue; |
26 | } |
27 | S l = lr.get(0), r = lr.get(1); |
28 | L<S> tokr = javaTok(r); |
29 | assertEquals(structure(tokr), 3, l(tokr)); |
30 | S className = assertIdentifier(get(tokr, 1)); |
31 | L<S> tok = javaTok(l); |
32 | tok = mergeBracketThingies(tok); |
33 | //printStructure(tok); |
34 | productionMap.put(className, tok); |
35 | } |
36 | |
37 | print(n(productionMap.size(), "production") + "."); |
38 | print(); |
39 | |
40 | for (S line : toLinesFullTrim(inputText)) {
|
41 | print(); |
42 | print(line); |
43 | L<S> tok = javaTok(line); |
44 | //printStructure(tok); |
45 | Pos pos = new Pos(tok); |
46 | L<Integer> x = parseTop(pos, mainProd); |
47 | if (x.contains(l(tok))) |
48 | print(" parsed");
|
49 | else if (!empty(x)) |
50 | print(" beginning matches");
|
51 | else |
52 | print(" not parsed");
|
53 | print(" " + structure(recog));
|
54 | } |
55 | } |
56 | |
57 | static L<Integer> parseTop(Pos pos, S mainProd) {
|
58 | // init structures |
59 | recog = new TreeMap; |
60 | for (int i = pos.i; i < l(pos.tok); i += 2) |
61 | recog.put(i, new MultiMap); |
62 | |
63 | boolean anyChange; |
64 | do {
|
65 | anyChange = false; |
66 | for (int i = pos.i; i < l(pos.tok); i += 2) {
|
67 | Pos pos2 = new Pos(pos.tok, i); |
68 | for (S className : productionMap.keySet()) {
|
69 | MultiMap<S, Integer> rr = recog.get(i); |
70 | L<Integer> recs = rr.getActual(className); |
71 | L<L<S>> prods = productionMap.get(className); |
72 | for (L<S> prod : prods) {
|
73 | int n = l(recs); |
74 | matchProd(pos2, new Pos(prod), className, recs); |
75 | anyChange = anyChange || l(recs) > n; |
76 | } |
77 | rr.clean(className); |
78 | } |
79 | } |
80 | } while (anyChange); |
81 | |
82 | ret recog.get(pos.i).get(mainProd); |
83 | } |
84 | |
85 | static class Pos {
|
86 | L<S> tok; |
87 | int i = 1; |
88 | |
89 | *() {}
|
90 | *(L<S> *tok) {}
|
91 | *(L<S> *tok, int *i) {}
|
92 | |
93 | boolean end() { ret i >= l(tok)-1; }
|
94 | S get() { ret tok.get(i); }
|
95 | public Pos clone() { ret new Pos(tok, i); }
|
96 | public boolean equals(O o) {
|
97 | if (!(o instanceof Pos)) ret false; |
98 | Pos pos = cast o; |
99 | ret tok == pos.tok && i == pos.i; |
100 | } |
101 | |
102 | S rest() {
|
103 | ret join(subList(tok, i)); |
104 | } |
105 | |
106 | Pos plus(int x) { ret new Pos(tok, i + x); }
|
107 | } |
108 | |
109 | static void copy(Pos a, Pos b) {
|
110 | b.tok = a.tok; |
111 | b.i = a.i; |
112 | } |
113 | |
114 | static void debug(S bla, Pos pos) {
|
115 | if (debug) |
116 | print(bla + " on " + quote(pos.rest())); |
117 | } |
118 | |
119 | static void matchProd(Pos pos, Pos prod, S forClass, L<Integer> out) {
|
120 | if (prod.end()) {
|
121 | if (!out.contains(pos.i)) |
122 | out.add(pos.i); |
123 | ret; |
124 | } |
125 | |
126 | S p = prod.get(); |
127 | |
128 | if (isBracketedID(p) && neq(p, "<quoted>")) {
|
129 | |
130 | MultiMap<S, Integer> rr = recog.get(pos.i); |
131 | L<Integer> r = rr.get(unbracket(p)); |
132 | |
133 | // keep parsing for every option |
134 | |
135 | for (int i : cloneList(r)) |
136 | matchProd(new Pos(pos.tok, i), prod.plus(2), forClass, out); |
137 | |
138 | } else {
|
139 | // it's a literal |
140 | if (pos.end()) ret; // need a token to match |
141 | S t = pos.get(); |
142 | if (eq(p, "<quoted>")) {
|
143 | if (!isQuoted(t)) ret; |
144 | } else if (!(eq(p, "*") || eqic(p, t))) |
145 | ret; // token mismatch |
146 | |
147 | matchProd(pos.plus(2), prod.plus(2), forClass, out); |
148 | } |
149 | } |
Began life as a copy of #1002289
download show line numbers debug dex old transpilations
Travelled to 14 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, onxytkatvevr, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
| Snippet ID: | #1002297 |
| Snippet name: | An NL Parser, attempt 3 (works!) |
| Eternal ID of this version: | #1002297/1 |
| Text MD5: | 68aea122efd42b0997f652f7524a1e0c |
| Transpilation MD5: | de2bbceffa659d7ef84d9adfa6c0617d |
| Author: | stefan |
| Category: | javax |
| Type: | JavaX source code |
| Public (visible to everyone): | Yes |
| Archived (hidden from active list): | No |
| Created/modified: | 2016-01-03 20:38:35 |
| Source code size: | 3910 bytes / 149 lines |
| Pitched / IR pitched: | No / Yes |
| Views / Downloads: | 917 / 1107 |
| Referenced in: | [show references] |