Libraryless. Click here for Pure Java version (1895L/13K/42K).
// Idea: For every position, store the productions recognized to start there, then infer up to higher classes !752 static class Recognized { S className; Pos endPos; L<S> prod; *(S *className, Pos *endPos, L<S> *prod) {} *() {} } static MultiMap<Integer, Recognized> recog; static MultiMap<S, L<S>> productionMap = new MultiMap; static boolean debug = false; p { S rulesText = loadSnippet("#1002281"); S inputText = loadSnippet("#1002286") + "\n" + loadSnippet("#1002280"); S mainProd = "line"; for (S rule : toLinesFullTrim(rulesText)) pcall { printF("Processing rule: *", rule); L<S> lr = splitAtJavaToken(rule, "="); if (l(lr) != 2) { print("Weird rule: " + rule); continue; } S l = lr.get(0), r = lr.get(1); L<S> tokr = javaTok(r); assertEquals(structure(tokr), 3, l(tokr)); S className = assertIdentifier(get(tokr, 1)); L<S> tok = javaTok(l); tok = mergeBracketThingies(tok); printStructure(tok); productionMap.put(className, tok); } print(n(productionMap.size(), "production") + "."); print(); for (S line : toLinesFullTrim(inputText)) { print(line); L<S> tok = javaTok(line); printStructure(tok); Pos pos = new Pos(tok); O x = parseTop(pos, mainProd); if (x != null) print(" parsed: " + structure(x)); else print(" not parsed, stuff found: " + recogToString()); } } static O parseTop(Pos pos, S mainProd) { // init structures recog = new MultiMap; boolean anyChange; do { anyChange = false; for (int i = 1; i < l(pos.tok); i += 2) { Pos pos2 = new Pos(pos.tok, i); for (S className : productionMap.keySet()) { if (getRecognition(pos2, className) != null) continue; L<L<S>> prods = productionMap.get(className); for (L<S> prod : prods) { Pos _pos = pos2.clone(); O x = matchProd(_pos, prod, className); if (x != null) { recog.put(pos2.i, new Recognized(className, _pos, prod)); if (debug) print("new stuff at " + pos2.i + ": " + className); anyChange = true; break; // try next class - could also omit this } } } } } while (anyChange); /*if (debug) print("Stuff found: " + structure(recog));*/ Recognized rec = getRecognition(pos, mainProd); if (debug) print("rec: " + structure(rec)); if (rec != null) { copy(rec.endPos, pos); ret rec.prod; } else ret null; } static Recognized getRecognition(Pos pos, S className) { for (Recognized r : recog.get(pos.i)) if (eq(r.className, className)) ret r; ret null; } static class Pos { L<S> tok; int i = 1; *() {} *(L<S> *tok) {} *(L<S> *tok, int *i) {} boolean end() { ret i >= l(tok)-1; } public Pos clone() { ret new Pos(tok, i); } public boolean equals(O o) { if (!(o instanceof Pos)) ret false; Pos pos = cast o; ret tok == pos.tok && i == pos.i; } S rest() { ret join(subList(tok, i)); } } static void copy(Pos a, Pos b) { b.tok = a.tok; b.i = a.i; } static void debug(S bla, Pos pos) { if (debug) print(bla + " on " + quote(pos.rest())); } static O matchProd(Pos pos, L<S> prod, S forClass) { /*if (debug) debug("matchProd " + structure(prod), pos);*/ for (int i = 1; i < l(prod); i += 2) { S p = prod.get(i); S t = pos.tok.get(pos.i); if (isBracketedID(p)) { Recognized rec = getRecognition(pos, unbracket(p)); if (eq(forClass, "line")) print("p=" + quote(p) + ", t=" + quote(t) + ", i=" + pos.i + ", rec= " + structure(rec)); if (rec == null) ret null; copy(rec.endPos, pos); // keep parsing production } else { // it's a literal if (pos.end()) ret null; // need a token to match if (eq(p, "<quoted>")) { if (!isQuoted(t)) ret null; } else if (!(eq(p, "*") || eqic(p, t))) ret null; // token mismatch pos.i += 2; // consume & keep parsing } } /*if (debug) debug("ok " + structure(prod), pos);*/ ret true; // production succeeded } static S recogToString() { new L<S> l; for (int i : recog.keySet()) { for (Recognized r : recog.get(i)) l.add(i + "/" + r.className); } ret join(", ", l); }
Began life as a copy of #1002282
download show line numbers debug dex old transpilations
Travelled to 15 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, onxytkatvevr, pyentgdyhuwx, pzhvpgtvlbxg, teubizvjbppd, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
Snippet ID: | #1002289 |
Snippet name: | An NL Parser, attempt 2 (developing) |
Eternal ID of this version: | #1002289/1 |
Text MD5: | 5ab08edd3201474b13957a4f70d88f79 |
Transpilation MD5: | 6c57e5edca5371aa927302860bf61e2e |
Author: | stefan |
Category: | javax |
Type: | JavaX source code |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2016-01-03 01:49:16 |
Source code size: | 4516 bytes / 175 lines |
Pitched / IR pitched: | No / Yes |
Views / Downloads: | 671 / 724 |
Referenced in: | #1002297 - An NL Parser, attempt 3 (works!) #3000189 - Answer for stefanreich(>> t bla) #3000382 - Answer for ferdie (>> t = 1, f = 0) #3000383 - Answer for funkoverflow (>> t=1, f=0 okay) |