sclass AI_BottomUpParser1 { Map> wordsToCategories; bool splitSplittables = true; // e.g. what's => what is S productionsText; L words; L rawWords; L cnc; // words + N tokens (null if unknown) new LinkedHashSet allObjects; long changes, iterations; new MultiMap productionsByA; new MultiMap subClasses; bool mergeGroupsInSamePosition = true; new HashMap groupsByPosition; bool verbose_callPlausibilityFunction; Map> categoriesAtWordIndex; // user can set categories for each token int maxDepth = 100; bool maxDepthReached; // callbacks Runnable afterIteration, afterParse; class Updatable { void update {} void setField(S field, O value) { if (eq(get(this, field), value)) ret; set(this, field, value); change(); } } class Expectation { S ifClass; Runnable action; *() {} *(S *ifClass, Runnable *action) {} } class Word extends Updatable { S text; // or null if unknown int wordFrom, wordTo; // token indices new LinkedHashSet prev; new LinkedHashSet next; new LinkedHashSet> constituents; // if group new L expectations; new L fulfilledExpectations; new TreeSet classes; int classesConvertedToTraits; new LinkedHashSet groups; // I am part of new L traits; *() {} *(S *text, int *wordFrom, int *wordTo) { classes.add(quote(lower(text))); } void update { // Add direct word classes if (text != null) for (S c : unnull(wordsToCategories.get(text))) addClass(c); // Process expectations for (Expectation e : cloneList(expectations)) { //print("Checking expected class " + e.ifClass); if (classes.contains(e.ifClass)) { moveElementFromCollectionToCollection(e, expectations, fulfilledExpectations); change(); callF(e.action); } } if (l(classes) > classesConvertedToTraits) { for (fS c : dropFirst(classesConvertedToTraits, classes)) addTraitsForClass(c); classesConvertedToTraits = l(classes); } for (Trait t : iterateListConcurrently(traits)) t.update(); } bool isGroup() { ret nempty(constituents); } bool addClass(S c) { if (!classes.addAll(makeHull_optimized(subClasses, c))) false; change(); true; } void addExpectation(Expectation e) { //print("addExpectation " + e); expectations.add(e); change(); } void addTraitsForClass(S c) { for (WeightedProduction p : productionsByA.get(c)) addTrait(new LinkWithTo(p.b, p.c, p.plausibilityFunction)); } void addTrait(Trait t) { set(t, w := this); traits.add(t); } toString { ret textAndClasses(this); } bool hasClass(S c) { ret contains(classes, c); } S text() { ret text; } } // end of class Word Word makeGroup(Word a, Word b, S newClass) { L list = null; Word g = null; if (mergeGroupsInSamePosition) g = groupsByPosition.get(IntRange(a.wordFrom, b.wordTo)); else { list = ll(a, b); // look for existing group for (Word _g : a.groups) if (contains(_g.constituents, list)) { g = _g; break; } } if (list == null) list = ll(a, b); if (g != null) { g.constituents.add(list); if (g.addClass(newClass)) { //print("Added class " + newClass + " to existing group: " + a.text + " + " + b.text); } ret g; } // new group //print("Making group " + newClass + " " + a.text + " + " + b.text); //print(" prev=" + sfu(collect(a.prev, 'text))); //print(" next=" + sfu(collect(b.next, 'text))); g = new Word(joinWithSpace(a.text, b.text), a.wordFrom, b.wordTo); allObjects.add(g); if (mergeGroupsInSamePosition) groupsByPosition.put(IntRange(a.wordFrom, b.wordTo), g); g.addClass(newClass); g.constituents.add(list); for (Word w : list) w.groups.add(g); g.prev.addAll(a.prev); g.next.addAll(b.next); for (Word prev : a.prev) prev.next.add(g); for (Word next : b.next) next.prev.add(g); ret g; } class Trait extends Updatable { Word w; } class LinkWithTo extends Trait { S linkWith, linkTo; // classes S plausibilityFunction; int expectationsSentToNext; *() {} *(S *linkWith, S *linkTo, S *plausibilityFunction) {} void update { if (l(w.next) > expectationsSentToNext) { for (final Word next : dropFirst(expectationsSentToNext, w.next)) next.addExpectation(new Expectation(linkWith, r { if (ai_parser_activateStandardFunctions_get() && plausibilityFunction != null) { loadFunctions_preferDiskCache(); // note: changes this for whole program O result = pcallAndMake(plausibilityFunction, w.text, next.text); if (verbose_callPlausibilityFunction) print("Called plausibility function " + plausibilityFunction + ": " + w.text + " + " + next.text + " => " + result); if (isFalse(result)) ret; } makeGroup(w, next, linkTo) })); expectationsSentToNext = l(w.next); } } } void parse(L tok) { cnc = /*simpleSpaces2*/(tok); rawWords = codeTokens(cnc); _parse(); } AI_BottomUpParser1 parse(fS sentence) { rawWords = words_withAngleBrackets(sentence); _parse(); this; } void _parse() { if (words != null) fail("only call once"); if (splitSplittables) ai_splitSplittables(rawWords); if (wordsToCategories == null) wordsToCategories = ai_wordToCategories(); parseProductions(); words = new L; for i over rawWords: { Word w = new(rawWords.get(i), i, i+1); words.add(w); if (isAngleBracketed(w.text)) w.addClass(w.text); else if (isQuoted(w.text)) w.addClass(""); else if (isInteger(w.text)) w.addClass(""); for (S cat : unnull(mapGet(categoriesAtWordIndex, i))) w.addClass(cat); } for (int i = 0; i < l(words)-1; i++) linkWords(words.get(i), words.get(i+1)); //printStruct(first(words)); addAll(allObjects, words); long lastChanges; do { lastChanges = changes; //print(n2(changes, "change")); for (Updatable w : cloneList(allObjects)) w.update(); ++iterations; callF(afterIteration); } while (licensed() && lastChanges != changes); callF(afterParse); } void printWordsAndGroups() { for (Word w : words) print(" " + textAndClasses(w)); print(); L groups = groups(); print(); print(n2(groups, "group")); for (Word g : groups) print("Group: " + groupedTextAndClasses(g)); } void printConstituentsOfFullGroup() { Word g = fullGroup(); if (g == null) ret; print(); pnl(allGroupings(g)); } L words() { ret instancesOf(Word.class, allObjects); } L groups() { ret [Word w : words() | w.isGroup()]; } // only one with default flags L fullGroups() { ret filterByFields(words(), wordFrom := 0, wordTo := l(words)); } Word fullGroup() { ret findByFields(words(), wordFrom := 0, wordTo := l(words)); } Set fullClasses() { new TreeSet set; for (Word g : fullGroups()) set.addAll(g.classes); ret set; } S bracketStuff(Word w) { ret " (" + joinWithComma(w.classes) + + (empty(w.constituents) ? "" : ", " + n2(w.constituents, "grouping")) + ")"; } S textAndClasses(Word w) { ret w.text + bracketStuff(w); } S groupedTextAndClasses(Word w) { ret grouped(w) + bracketStuff(w); } S fullGrouped() { ret grouped(fullGroup()); } void linkWords(Word a, Word b) { a.next.add(b); b.prev.add(a); } void change() { ++changes; } void parseProductions() { for (WeightedProduction p : productionsText == null ? ai_buParser_parseWeightedProductions() : ai_buParser_parseWeightedProductions(productionsText)) if (p.b != null) productionsByA.put(p.a, p); else subClasses.put(p.a, p.c); } // TODO: now there are multiple groupings S grouped(Word g) { if (g == null) null; if (empty(g.constituents)) ret g.text; ret groupedConstituents(first(g.constituents)); } S groupedConstituents(L constituents) { new L l; for (Word w : constituents) l.add(curlyBraceIfMultipleTokens(grouped(w))); ret joinWithSpace(l); } Set allGroupings(Word g) { if (empty(g.constituents)) ret litorderedset(g.text); new LinkedHashSet set; for (L l : g.constituents) set.add(groupedConstituents(l)); ret set; } }