Libraryless. Click here for Pure Java version (1893L/13K/42K).
// Idea: For every position, store the productions recognized to start there, then infer up to higher classes !752 // a recognition is identified by (startPos, className, endPos) // key 1 = start position, key 2 = class name, value = end position static Map<Integer, MultiMap<S, Integer>> recog; static MultiMap<S, L<S>> productionMap = new MultiMap; static boolean debug = false; p { S rulesText = loadSnippet("#1002281"); S inputText = loadSnippet("#1002286") + "\n" + loadSnippet("#1002280"); S mainProd = "line"; for (S rule : toLinesFullTrim(rulesText)) pcall { //printF("Processing rule: *", rule); L<S> lr = splitAtJavaToken(rule, "="); if (l(lr) != 2) { print("Weird rule: " + rule); continue; } S l = lr.get(0), r = lr.get(1); L<S> tokr = javaTok(r); assertEquals(structure(tokr), 3, l(tokr)); S className = assertIdentifier(get(tokr, 1)); L<S> tok = javaTok(l); tok = mergeBracketThingies(tok); //printStructure(tok); productionMap.put(className, tok); } print(n(productionMap.size(), "production") + "."); print(); for (S line : toLinesFullTrim(inputText)) { print(); print(line); L<S> tok = javaTok(line); //printStructure(tok); Pos pos = new Pos(tok); L<Integer> x = parseTop(pos, mainProd); if (x.contains(l(tok))) print(" parsed"); else if (!empty(x)) print(" beginning matches"); else print(" not parsed"); print(" " + structure(recog)); } } static L<Integer> parseTop(Pos pos, S mainProd) { // init structures recog = new TreeMap; for (int i = pos.i; i < l(pos.tok); i += 2) recog.put(i, new MultiMap); boolean anyChange; do { anyChange = false; for (int i = pos.i; i < l(pos.tok); i += 2) { Pos pos2 = new Pos(pos.tok, i); for (S className : productionMap.keySet()) { MultiMap<S, Integer> rr = recog.get(i); L<Integer> recs = rr.getActual(className); L<L<S>> prods = productionMap.get(className); for (L<S> prod : prods) { int n = l(recs); matchProd(pos2, new Pos(prod), className, recs); anyChange = anyChange || l(recs) > n; } rr.clean(className); } } } while (anyChange); ret recog.get(pos.i).get(mainProd); } static class Pos { L<S> tok; int i = 1; *() {} *(L<S> *tok) {} *(L<S> *tok, int *i) {} boolean end() { ret i >= l(tok)-1; } S get() { ret tok.get(i); } public Pos clone() { ret new Pos(tok, i); } public boolean equals(O o) { if (!(o instanceof Pos)) ret false; Pos pos = cast o; ret tok == pos.tok && i == pos.i; } S rest() { ret join(subList(tok, i)); } Pos plus(int x) { ret new Pos(tok, i + x); } } static void copy(Pos a, Pos b) { b.tok = a.tok; b.i = a.i; } static void debug(S bla, Pos pos) { if (debug) print(bla + " on " + quote(pos.rest())); } static void matchProd(Pos pos, Pos prod, S forClass, L<Integer> out) { if (prod.end()) { if (!out.contains(pos.i)) out.add(pos.i); ret; } S p = prod.get(); if (isBracketedID(p) && neq(p, "<quoted>")) { MultiMap<S, Integer> rr = recog.get(pos.i); L<Integer> r = rr.get(unbracket(p)); // keep parsing for every option for (int i : cloneList(r)) matchProd(new Pos(pos.tok, i), prod.plus(2), forClass, out); } else { // it's a literal if (pos.end()) ret; // need a token to match S t = pos.get(); if (eq(p, "<quoted>")) { if (!isQuoted(t)) ret; } else if (!(eq(p, "*") || eqic(p, t))) ret; // token mismatch matchProd(pos.plus(2), prod.plus(2), forClass, out); } }
Began life as a copy of #1002289
download show line numbers debug dex old transpilations
Travelled to 14 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, onxytkatvevr, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
Snippet ID: | #1002297 |
Snippet name: | An NL Parser, attempt 3 (works!) |
Eternal ID of this version: | #1002297/1 |
Text MD5: | 68aea122efd42b0997f652f7524a1e0c |
Transpilation MD5: | de2bbceffa659d7ef84d9adfa6c0617d |
Author: | stefan |
Category: | javax |
Type: | JavaX source code |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2016-01-03 20:38:35 |
Source code size: | 3910 bytes / 149 lines |
Pitched / IR pitched: | No / Yes |
Views / Downloads: | 647 / 775 |
Referenced in: | [show references] |