Libraryless. Click here for Pure Java version (1862L/12K/42K).
!752 static MultiMap<S, L<S>> productionMap = new MultiMap; static boolean debug = false; p { S rulesText = loadSnippet("#1002281"); S inputText = loadSnippet("#1002286") + "\n" + loadSnippet("#1002280"); S mainProd = "line"; for (S rule : toLinesFullTrim(rulesText)) pcall { printF("Processing rule: *", rule); L<S> lr = splitAtJavaToken(rule, "="); if (l(lr) != 2) { print("Weird rule: " + rule); continue; } S l = lr.get(0), r = lr.get(1); L<S> tokr = javaTok(r); assertEquals(structure(tokr), 3, l(tokr)); S className = assertIdentifier(get(tokr, 1)); L<S> tok = javaTok(l); tok = mergeBracketThingies(tok); //printStructure(tok); productionMap.put(className, tok); } print(n(productionMap.size(), "production") + "."); print(); for (S line : toLinesFullTrim(inputText)) { print(line); L<S> tok = javaTok(line); Pos pos = new Pos(tok); if (parseClass(pos, mainProd) != null) print(" parsed"); else print(" not parsed"); } } static class Pos { L<S> tok; int i = 1; *() {} *(L<S> *tok) {} *(L<S> *tok, int *i) {} boolean end() { ret i >= l(tok)-1; } public Pos clone() { ret new Pos(tok, i); } public boolean equals(O o) { if (!(o instanceof Pos)) ret false; Pos pos = cast o; ret tok == pos.tok && i == pos.i; } S rest() { ret join(subList(tok, i)); } } static void copy(Pos a, Pos b) { b.tok = a.tok; b.i = a.i; } static void debug(S bla, Pos pos) { if (debug) print(bla + " on " + quote(pos.rest())); } // endless loop detector static Pos haltPos; static new HashSet<S> haltClasses; static O parseClass(Pos pos, S name) { if (debug) debug("parseClass " + name, pos); if (checkHalt(pos, name)) ret null; L<L<S>> prods = productionMap.get(name); if (empty(prods)) ret null; // weird, unknown class name for (L<S> prod : prods) { Pos _pos = pos.clone(); O x = parseProd(_pos, prod); if (x != null) { copy(_pos, pos); ret x; } } ret null; } // returns true if we should halt because of endless looping static boolean checkHalt(Pos pos, S className) { if (!eq(haltPos, pos)) { haltPos = pos.clone(); haltClasses = lithashset(className); return false; } else { if (haltClasses.contains(className)) { if (debug) print("Endless loop: " + structure(pos) + " " + structure(haltClasses)); ret true; } else { haltClasses.add(className); print("checkHalt: same pos, classes now: " + structure(haltClasses)); ret false; } } } static O parseProd(Pos pos, L<S> prod) { if (debug) debug("parseProd " + structure(prod), pos); for (int i = 1; i < l(prod); i += 2) { S p = prod.get(i); S t = pos.tok.get(pos.i); if (isBracketedID(p)) { Pos _pos = pos.clone(); O x = parseClass(_pos, unbracket(p)); if (x == null) ret null; copy(_pos, pos); // keep parsing production } else { // it's a literal if (pos.end()) ret null; // need a token to match if (!(eq(p, "*") || eqic(p, t))) ret null; // token mismatch pos.i += 2; // consume & keep parsing } } if (debug) debug("ok " + structure(prod), pos); ret true; // production succeeded } static boolean isBracketedID(S s) { ret s.startsWith("<") && s.endsWith(">"); } static S unbracket(S s) { ret isBracketedID(s) ? s.substring(1, l(s)-1) : s; } // angle bracket things like <quoted> static L<S> mergeBracketThingies(L<S> tok) { tok = cloneList(tok); for (int i = 1; i+4 < l(tok); i += 2) if (eq(get(tok, i), "<") && eq(get(tok, i+1), "") && isIdentifier(get(tok, i+2)) && eq(get(tok, i+3), "") && eq(get(tok, i+4), ">")) { tok.set(i, "<" + tok.get(i+2) + ">"); tok.remove(i+4); tok.remove(i+3); tok.remove(i+2); tok.remove(i+1); } ret tok; }
download show line numbers debug dex old transpilations
Travelled to 15 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, onxytkatvevr, pyentgdyhuwx, pzhvpgtvlbxg, teubizvjbppd, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
Snippet ID: | #1002282 |
Snippet name: | An NL Parser (developing) |
Eternal ID of this version: | #1002282/1 |
Text MD5: | 4fd683174302221441cdb839c8e2412e |
Transpilation MD5: | 8886016cc6fb751a4b2a57ffd6937c5c |
Author: | stefan |
Category: | javax |
Type: | JavaX source code |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2016-01-02 20:35:13 |
Source code size: | 4074 bytes / 159 lines |
Pitched / IR pitched: | No / Yes |
Views / Downloads: | 767 / 857 |
Referenced in: | #1002289 - An NL Parser, attempt 2 (developing) #3000189 - Answer for stefanreich(>> t bla) #3000382 - Answer for ferdie (>> t = 1, f = 0) #3000383 - Answer for funkoverflow (>> t=1, f=0 okay) |