sclass AI_BottomUpParser1 { Map> theSet; // category to literal examples new LinkedHashSet allObjects; L words; long changes; new MultiMap> groupingsByA; new MultiMap subClasses; bool mergeGroupsInSamePosition = true; new HashMap groupsByPosition; class Updatable { void update {} void setField(S field, O value) { if (eq(get(this, field), value)) ret; set(this, field, value); change(); } } class Expectation { S ifClass; Runnable action; *() {} *(S *ifClass, Runnable *action) {} } class Word extends Updatable { S text; // or null if unknown int wordFrom, wordTo; // token indices new LinkedHashSet prev; new LinkedHashSet next; new LinkedHashSet> constituents; // if group new L expectations; new L fulfilledExpectations; new TreeSet classes; int classesConvertedToTraits; new LinkedHashSet groups; // I am part of new L traits; *() {} *(S *text, int *wordFrom, int *wordTo) { classes.add(quote(lower(text))); } void update { // Add direct word classes if (text != null) for (S c : reverseLookupInMapToSets(theSet, text)) addClass(c); // Process expectations for (Expectation e : cloneList(expectations)) { //print("Checking expected class " + e.ifClass); if (classes.contains(e.ifClass)) { moveElementFromCollectionToCollection(e, expectations, fulfilledExpectations); change(); callF(e.action); } } if (l(classes) > classesConvertedToTraits) { for (fS c : dropFirst(classesConvertedToTraits, classes)) addTraitsForClass(c); classesConvertedToTraits = l(classes); } for (Trait t : iterateListConcurrently(traits)) t.update(); } bool isGroup() { ret nempty(constituents); } bool addClass(S c) { if (!classes.addAll(makeHull_optimized(subClasses, c))) false; change(); true; } void addExpectation(Expectation e) { //print("addExpectation " + e); expectations.add(e); change(); } void addTraitsForClass(S c) { for (PairS p : groupingsByA.get(c)) addTrait(new LinkWithTo(p.a, p.b)); } void addTrait(Trait t) { set(t, w := this); traits.add(t); } toString { ret textAndClasses(this); } bool hasClass(S c) { ret contains(classes, c); } S text() { ret text; } } // end of class Word Word makeGroup(Word a, Word b, S newClass) { L list = null; Word g = null; if (mergeGroupsInSamePosition) g = groupsByPosition.get(IntRange(a.wordFrom, b.wordTo)); else { list = ll(a, b); // look for existing group for (Word _g : a.groups) if (contains(_g.constituents, list)) { g = _g; break; } } if (list == null) list = ll(a, b); if (g != null) { g.constituents.add(list); if (g.addClass(newClass)) { //print("Added class " + newClass + " to existing group: " + a.text + " + " + b.text); } ret g; } // new group //print("Making group " + newClass + " " + a.text + " + " + b.text); //print(" prev=" + sfu(collect(a.prev, 'text))); //print(" next=" + sfu(collect(b.next, 'text))); g = new Word(joinWithSpace(a.text, b.text), a.wordFrom, b.wordTo); allObjects.add(g); if (mergeGroupsInSamePosition) groupsByPosition.put(IntRange(a.wordFrom, b.wordTo), g); g.addClass(newClass); g.constituents.add(list); for (Word w : list) w.groups.add(g); g.prev.addAll(a.prev); g.next.addAll(b.next); for (Word prev : a.prev) prev.next.add(g); for (Word next : b.next) next.prev.add(g); ret g; } class Trait extends Updatable { Word w; } class LinkWithTo extends Trait { S linkWith, linkTo; // classes int expectationsSentToNext; *() {} *(S *linkWith, S *linkTo) {} void update { if (l(w.next) > expectationsSentToNext) { for (final Word next : dropFirst(expectationsSentToNext, w.next)) next.addExpectation(new Expectation(linkWith, r { makeGroup(w, next, linkTo) })); expectationsSentToNext = l(w.next); } } } void parse(fS sentence) { if (words != null) fail("only call once"); L rawWords = words(sentence); if (theSet == null) theSet = ai_wordCategoriesWithElements(); parseGroupings(); words = new L; for i over rawWords: { Word w = setAllAndReturn(new Word(rawWords.get(i), i, i+1)); words.add(w); if (isQuoted(w.text)) w.addClass(""); if (isInteger(w.text)) w.addClass(""); } for (int i = 0; i < l(words)-1; i++) linkWords(words.get(i), words.get(i+1)); //printStruct(first(words)); addAll(allObjects, words); long lastChanges; do { lastChanges = changes; //print(n2(changes, "change")); for (Updatable w : cloneList(allObjects)) w.update(); } while (lastChanges != changes); } void printWordsAndGroups() { for (Word w : words) print(" " + textAndClasses(w)); print(); L groups = groups(); print(); print(n2(groups, "group")); for (Word g : groups) print("Group: " + groupedTextAndClasses(g)); } void printConstituentsOfFullGroup() { Word g = fullGroup(); if (g == null) ret; print(); pnl(allGroupings(g)); } L groups() { ret [Word w : instancesOf(Word.class, allObjects) | w.isGroup()]; } // only one with default flags L fullGroups() { ret filterByFields(groups(), wordFrom := 0, wordTo := l(words)); } Word fullGroup() { ret findByFields(groups(), wordFrom := 0, wordTo := l(words)); } Set fullClasses() { new TreeSet set; for (Word g : fullGroups()) set.addAll(g.classes); ret set; } S bracketStuff(Word w) { ret " (" + joinWithComma(w.classes) + + (empty(w.constituents) ? "" : ", " + n2(w.constituents, "grouping")) + ")"; } S textAndClasses(Word w) { ret w.text + bracketStuff(w); } S groupedTextAndClasses(Word w) { ret grouped(w) + bracketStuff(w); } void linkWords(Word a, Word b) { a.next.add(b); b.prev.add(a); } void change() { ++changes; } void parseGroupings() { for (S s : mL(ai_language() + " bottom-up groupings")) { L tok = javaTokWithAngleBracketsC(s); if (l(tok) == 5) groupingsByA.put(tok.get(0), pair(tok.get(2), tok.get(4))); else if (l(tok) == 3) subClasses.put(tok.get(0), tok.get(2)); } } // TODO: now there are multiple groupings S grouped(Word g) { if (empty(g.constituents)) ret g.text; ret groupedConstituents(first(g.constituents)); } S groupedConstituents(L constituents) { new L l; for (Word w : constituents) l.add(curlyBraceIfMultipleTokens(grouped(w))); ret joinWithSpace(l); } Set allGroupings(Word g) { if (empty(g.constituents)) ret litorderedset(g.text); new LinkedHashSet set; for (L l : g.constituents) set.add(groupedConstituents(l)); ret set; } }