sclass AI_BottomUpParser1 { Map<S, Set<S>> wordsToCategories; new LinkedHashSet<Updatable> allObjects; L<Word> words; L<S> rawWords; L<S> cnc; // words + N tokens (null if unknown) long changes; new MultiMap<S, WeightedProduction> productionsByA; new MultiMap<S> subClasses; bool mergeGroupsInSamePosition = true; new HashMap<IntRange, Word> groupsByPosition; bool verbose_callPlausibilityFunction; bool observeNTokenHygiene; class Updatable { void update {} void setField(S field, O value) { if (eq(get(this, field), value)) ret; set(this, field, value); change(); } } class Expectation { S ifClass; Runnable action; *() {} *(S *ifClass, Runnable *action) {} } class Word extends Updatable { S text; // or null if unknown int wordFrom, wordTo; // token indices new LinkedHashSet<Word> prev; new LinkedHashSet<Word> next; new LinkedHashSet<L<Word>> constituents; // if group new L<Expectation> expectations; new L<Expectation> fulfilledExpectations; new TreeSet<S> classes; int classesConvertedToTraits; new LinkedHashSet<Word> groups; // I am part of new L<Trait> traits; *() {} *(S *text, int *wordFrom, int *wordTo) { classes.add(quote(lower(text))); } void update { // Add direct word classes if (text != null) for (S c : unnull(wordsToCategories.get(text))) addClass(c); // Process expectations for (Expectation e : cloneList(expectations)) { //print("Checking expected class " + e.ifClass); if (classes.contains(e.ifClass)) { moveElementFromCollectionToCollection(e, expectations, fulfilledExpectations); change(); callF(e.action); } } if (l(classes) > classesConvertedToTraits) { for (fS c : dropFirst(classesConvertedToTraits, classes)) addTraitsForClass(c); classesConvertedToTraits = l(classes); } for (Trait t : iterateListConcurrently(traits)) t.update(); } bool isGroup() { ret nempty(constituents); } bool addClass(S c) { if (!classes.addAll(makeHull_optimized(subClasses, c))) false; change(); true; } void addExpectation(Expectation e) { //print("addExpectation " + e); expectations.add(e); change(); } void addTraitsForClass(S c) { for (WeightedProduction p : productionsByA.get(c)) addTrait(new LinkWithTo(p.b, p.c, p.plausibilityFunction)); } void addTrait(Trait t) { set(t, w := this); traits.add(t); } toString { ret textAndClasses(this); } bool hasClass(S c) { ret contains(classes, c); } S text() { ret text; } } // end of class Word Word makeGroup(Word a, Word b, S newClass) { L<Word> list = null; Word g = null; if (mergeGroupsInSamePosition) g = groupsByPosition.get(IntRange(a.wordFrom, b.wordTo)); else { list = ll(a, b); // look for existing group for (Word _g : a.groups) if (contains(_g.constituents, list)) { g = _g; break; } } if (list == null) list = ll(a, b); if (g != null) { g.constituents.add(list); if (g.addClass(newClass)) { //print("Added class " + newClass + " to existing group: " + a.text + " + " + b.text); } ret g; } // new group, check hygiene if (observeNTokenHygiene && cnc != null) { L<S> t = subList(cnc, a.wordFrom*2, b.wordTo*2+1); print("Checking hygiene: " + sfu(t)); if (!checkNTokenHygiene(t)) { print("Rejecting unhygienic grouping: " + join(t)); null; } } //print("Making group " + newClass + " " + a.text + " + " + b.text); //print(" prev=" + sfu(collect(a.prev, 'text))); //print(" next=" + sfu(collect(b.next, 'text))); g = new Word(joinWithSpace(a.text, b.text), a.wordFrom, b.wordTo); allObjects.add(g); if (mergeGroupsInSamePosition) groupsByPosition.put(IntRange(a.wordFrom, b.wordTo), g); g.addClass(newClass); g.constituents.add(list); for (Word w : list) w.groups.add(g); g.prev.addAll(a.prev); g.next.addAll(b.next); for (Word prev : a.prev) prev.next.add(g); for (Word next : b.next) next.prev.add(g); ret g; } class Trait extends Updatable { Word w; } class LinkWithTo extends Trait { S linkWith, linkTo; // classes S plausibilityFunction; int expectationsSentToNext; *() {} *(S *linkWith, S *linkTo, S *plausibilityFunction) {} void update { if (l(w.next) > expectationsSentToNext) { for (final Word next : dropFirst(expectationsSentToNext, w.next)) next.addExpectation(new Expectation(linkWith, r { if (ai_parser_activateStandardFunctions_get() && plausibilityFunction != null) { O result = pcallAndMake(plausibilityFunction, w.text, next.text); if (verbose_callPlausibilityFunction) print("Called plausibility function " + plausibilityFunction + ": " + w.text + " + " + next.text + " => " + result); if (isFalse(result)) ret; } makeGroup(w, next, linkTo); })); expectationsSentToNext = l(w.next); } } } void parse(L<S> tok) { cnc = tok; rawWords = codeTokens(cnc); _parse(); } void parse(fS sentence) { if (words != null) fail("only call once"); if (observeNTokenHygiene) parse(javaTokNPunctuation(sentence)); else rawWords = main.words(sentence); _parse(); } void _parse() { ai_splitSplittables(rawWords); if (wordsToCategories == null) wordsToCategories = ai_wordToCategories(); parseGroupings(); words = new L; for i over rawWords: { Word w = setAllAndReturn(new Word(rawWords.get(i), i, i+1)); words.add(w); if (isQuoted(w.text)) w.addClass("<noun>"); if (isInteger(w.text)) w.addClass("<number>"); } for (int i = 0; i < l(words)-1; i++) linkWords(words.get(i), words.get(i+1)); //printStruct(first(words)); addAll(allObjects, words); long lastChanges; do { lastChanges = changes; //print(n2(changes, "change")); for (Updatable w : cloneList(allObjects)) w.update(); } while (lastChanges != changes); } void printWordsAndGroups() { for (Word w : words) print(" " + textAndClasses(w)); print(); L<Word> groups = groups(); print(); print(n2(groups, "group")); for (Word g : groups) print("Group: " + groupedTextAndClasses(g)); } void printConstituentsOfFullGroup() { Word g = fullGroup(); if (g == null) ret; print(); pnl(allGroupings(g)); } L<Word> words() { ret instancesOf(Word.class, allObjects); } L<Word> groups() { ret [Word w : words() | w.isGroup()]; } // only one with default flags L<Word> fullGroups() { ret filterByFields(words(), wordFrom := 0, wordTo := l(words)); } Word fullGroup() { ret findByFields(words(), wordFrom := 0, wordTo := l(words)); } Set<S> fullClasses() { new TreeSet<S> set; for (Word g : fullGroups()) set.addAll(g.classes); ret set; } S bracketStuff(Word w) { ret " (" + joinWithComma(w.classes) + + (empty(w.constituents) ? "" : ", " + n2(w.constituents, "grouping")) + ")"; } S textAndClasses(Word w) { ret w.text + bracketStuff(w); } S groupedTextAndClasses(Word w) { ret grouped(w) + bracketStuff(w); } void linkWords(Word a, Word b) { a.next.add(b); b.prev.add(a); } void change() { ++changes; } void parseGroupings() { for (WeightedProduction p : ai_buParser_parseWeightedProductions()) if (p.b != null) productionsByA.put(p.a, p); else subClasses.put(p.a, p.c); } // TODO: now there are multiple groupings S grouped(Word g) { if (empty(g.constituents)) ret g.text; ret groupedConstituents(first(g.constituents)); } S groupedConstituents(L<Word> constituents) { new L<S> l; for (Word w : constituents) l.add(curlyBraceIfMultipleTokens(grouped(w))); ret joinWithSpace(l); } Set<S> allGroupings(Word g) { if (empty(g.constituents)) ret litorderedset(g.text); new LinkedHashSet<S> set; for (L<Word> l : g.constituents) set.add(groupedConstituents(l)); ret set; } S textWithNTokens(int wordFrom, int wordTo) { if (cnc == null) ret joinWithSpace(subList(rawWords, wordFrom, wordTo)); ret join(subList(cnc, wordFrom*2+1, wordTo*2)); } }
download show line numbers debug dex old transpilations
Travelled to 14 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, irmadwmeruwu, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
Snippet ID: | #1017479 |
Snippet name: | AI_BottomUpParser1 with observeNTokenHygiene [dev., doesn't really work] |
Eternal ID of this version: | #1017479/10 |
Text MD5: | 521afc8ae22c89e978bd34aa0bfd3ea9 |
Author: | stefan |
Category: | javax / a.i. |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2018-07-23 22:00:58 |
Source code size: | 9094 bytes / 315 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 286 / 466 |
Version history: | 9 change(s) |
Referenced in: | #1017482 - Test Bottom-Up Parser with n-token hygiene [forget it] |