Libraryless. Click here for Pure Java version (1862L/12K/42K).
1 | !752 |
2 | |
3 | static MultiMap<S, L<S>> productionMap = new MultiMap; |
4 | |
5 | static boolean debug = false; |
6 | |
7 | p {
|
8 | S rulesText = loadSnippet("#1002281");
|
9 | S inputText = loadSnippet("#1002286") + "\n" + loadSnippet("#1002280");
|
10 | S mainProd = "line"; |
11 | |
12 | for (S rule : toLinesFullTrim(rulesText)) pcall {
|
13 | printF("Processing rule: *", rule);
|
14 | L<S> lr = splitAtJavaToken(rule, "="); |
15 | if (l(lr) != 2) {
|
16 | print("Weird rule: " + rule);
|
17 | continue; |
18 | } |
19 | S l = lr.get(0), r = lr.get(1); |
20 | L<S> tokr = javaTok(r); |
21 | assertEquals(structure(tokr), 3, l(tokr)); |
22 | S className = assertIdentifier(get(tokr, 1)); |
23 | L<S> tok = javaTok(l); |
24 | tok = mergeBracketThingies(tok); |
25 | //printStructure(tok); |
26 | productionMap.put(className, tok); |
27 | } |
28 | |
29 | print(n(productionMap.size(), "production") + "."); |
30 | print(); |
31 | |
32 | for (S line : toLinesFullTrim(inputText)) {
|
33 | print(line); |
34 | L<S> tok = javaTok(line); |
35 | Pos pos = new Pos(tok); |
36 | if (parseClass(pos, mainProd) != null) |
37 | print(" parsed");
|
38 | else |
39 | print(" not parsed");
|
40 | } |
41 | } |
42 | |
43 | static class Pos {
|
44 | L<S> tok; |
45 | int i = 1; |
46 | |
47 | *() {}
|
48 | *(L<S> *tok) {}
|
49 | *(L<S> *tok, int *i) {}
|
50 | |
51 | boolean end() { ret i >= l(tok)-1; }
|
52 | public Pos clone() { ret new Pos(tok, i); }
|
53 | public boolean equals(O o) {
|
54 | if (!(o instanceof Pos)) ret false; |
55 | Pos pos = cast o; |
56 | ret tok == pos.tok && i == pos.i; |
57 | } |
58 | |
59 | S rest() {
|
60 | ret join(subList(tok, i)); |
61 | } |
62 | } |
63 | |
64 | static void copy(Pos a, Pos b) {
|
65 | b.tok = a.tok; |
66 | b.i = a.i; |
67 | } |
68 | |
69 | static void debug(S bla, Pos pos) {
|
70 | if (debug) |
71 | print(bla + " on " + quote(pos.rest())); |
72 | } |
73 | |
74 | // endless loop detector |
75 | static Pos haltPos; |
76 | static new HashSet<S> haltClasses; |
77 | |
78 | static O parseClass(Pos pos, S name) {
|
79 | if (debug) debug("parseClass " + name, pos);
|
80 | if (checkHalt(pos, name)) ret null; |
81 | L<L<S>> prods = productionMap.get(name); |
82 | if (empty(prods)) ret null; // weird, unknown class name |
83 | |
84 | for (L<S> prod : prods) {
|
85 | Pos _pos = pos.clone(); |
86 | O x = parseProd(_pos, prod); |
87 | if (x != null) { copy(_pos, pos); ret x; }
|
88 | } |
89 | |
90 | ret null; |
91 | } |
92 | |
93 | // returns true if we should halt because of endless looping |
94 | static boolean checkHalt(Pos pos, S className) {
|
95 | if (!eq(haltPos, pos)) {
|
96 | haltPos = pos.clone(); |
97 | haltClasses = lithashset(className); |
98 | return false; |
99 | } else {
|
100 | if (haltClasses.contains(className)) {
|
101 | if (debug) |
102 | print("Endless loop: " + structure(pos) + " " + structure(haltClasses));
|
103 | ret true; |
104 | } else {
|
105 | haltClasses.add(className); |
106 | print("checkHalt: same pos, classes now: " + structure(haltClasses));
|
107 | ret false; |
108 | } |
109 | } |
110 | } |
111 | |
112 | static O parseProd(Pos pos, L<S> prod) {
|
113 | if (debug) |
114 | debug("parseProd " + structure(prod), pos);
|
115 | |
116 | for (int i = 1; i < l(prod); i += 2) {
|
117 | S p = prod.get(i); |
118 | S t = pos.tok.get(pos.i); |
119 | if (isBracketedID(p)) {
|
120 | Pos _pos = pos.clone(); |
121 | O x = parseClass(_pos, unbracket(p)); |
122 | if (x == null) ret null; |
123 | copy(_pos, pos); |
124 | // keep parsing production |
125 | } else {
|
126 | // it's a literal |
127 | if (pos.end()) ret null; // need a token to match |
128 | if (!(eq(p, "*") || eqic(p, t))) |
129 | ret null; // token mismatch |
130 | pos.i += 2; // consume & keep parsing |
131 | } |
132 | } |
133 | |
134 | if (debug) |
135 | debug("ok " + structure(prod), pos);
|
136 | ret true; // production succeeded |
137 | } |
138 | |
139 | static boolean isBracketedID(S s) {
|
140 | ret s.startsWith("<") && s.endsWith(">");
|
141 | } |
142 | |
143 | static S unbracket(S s) {
|
144 | ret isBracketedID(s) ? s.substring(1, l(s)-1) : s; |
145 | } |
146 | |
147 | // angle bracket things like <quoted> |
148 | static L<S> mergeBracketThingies(L<S> tok) {
|
149 | tok = cloneList(tok); |
150 | for (int i = 1; i+4 < l(tok); i += 2) |
151 | if (eq(get(tok, i), "<") && eq(get(tok, i+1), "") && isIdentifier(get(tok, i+2)) && eq(get(tok, i+3), "") && eq(get(tok, i+4), ">")) {
|
152 | tok.set(i, "<" + tok.get(i+2) + ">"); |
153 | tok.remove(i+4); |
154 | tok.remove(i+3); |
155 | tok.remove(i+2); |
156 | tok.remove(i+1); |
157 | } |
158 | ret tok; |
159 | } |
download show line numbers debug dex old transpilations
Travelled to 15 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, onxytkatvevr, pyentgdyhuwx, pzhvpgtvlbxg, teubizvjbppd, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
| Snippet ID: | #1002282 |
| Snippet name: | An NL Parser (developing) |
| Eternal ID of this version: | #1002282/1 |
| Text MD5: | 4fd683174302221441cdb839c8e2412e |
| Transpilation MD5: | 8886016cc6fb751a4b2a57ffd6937c5c |
| Author: | stefan |
| Category: | javax |
| Type: | JavaX source code |
| Public (visible to everyone): | Yes |
| Archived (hidden from active list): | No |
| Created/modified: | 2016-01-02 20:35:13 |
| Source code size: | 4074 bytes / 159 lines |
| Pitched / IR pitched: | No / Yes |
| Views / Downloads: | 1086 / 1209 |
| Referenced in: | [show references] |