sclass AI_BottomUpParser2 { Map<S, Set<S>> theSet; // category to literal examples new LinkedHashSet<Updatable> allObjects; L<Word> words; long changes; new MultiMap<S, Production> productionsByA; new MultiMap<S> subClasses; bool mergeGroupsInSamePosition = true; new HashMap<IntRange, Word> groupsByPosition; sclass Production { S a, b, c; // a + b = c double weight; *() {} *(S *a, S *b, S *c, double *weight) {} } class Updatable { void update {} void setField(S field, O value) { if (eq(get(this, field), value)) ret; set(this, field, value); change(); } } class Expectation { S ifClass; Runnable action; *() {} *(S *ifClass, Runnable *action) {} } class Word extends Updatable { S text; // or null if unknown int wordFrom, wordTo; // token indices new LinkedHashSet<Word> prev; new LinkedHashSet<Word> next; new LinkedHashSet<L<Word>> constituents; // if group new L<Expectation> expectations; new L<Expectation> fulfilledExpectations; new Map<S, Double> classes; // weighted int classesConvertedToTraits; new LinkedHashSet<Word> groups; // I am part of new L<Trait> traits; *() {} *(S *text, int *wordFrom, int *wordTo) { addClass(quote(lower(text)), 1); } void update { // Add direct word classes if (text != null) for (S c : reverseLookupInMapToSets(theSet, text)) addClass(c, 1); // TODO // Process expectations for (Expectation e : cloneList(expectations)) { //print("Checking expected class " + e.ifClass); if (classes.containsKey(e.ifClass)) { moveElementFromCollectionToCollection(e, expectations, fulfilledExpectations); change(); callF(e.action); } } if (l(classes) > classesConvertedToTraits) { for (fS c : dropFirst(classesConvertedToTraits, classes)) addTraitsForClass(c); classesConvertedToTraits = l(classes); } for (Trait t : iterateListConcurrently(traits)) t.update(); } bool isGroup() { ret nempty(constituents); } bool addClass(S c, double weight) { bool change = false; for (S subClass : makeHull_optimized(subClasses, c)) if (weight > toDouble(classes.get(subClass))) { classes.put(subClass, c); change = true; } } if (change) change(); ret change; } void addExpectation(Expectation e) { //print("addExpectation " + e); expectations.add(e); change(); } void addTraitsForClass(S c) { for (Production p : productionsByA.get(c)) addTrait(new LinkWithTo(p)); } void addTrait(Trait t) { set(t, w := this); traits.add(t); } toString { ret textAndClasses(this); } bool hasClass(S c) { ret containsKey(classes, c); } S text() { ret text; } } // end of class Word Word makeGroup(Word a, Word b, Production prod) { L<Word> list = null; Word g = null; if (mergeGroupsInSamePosition) g = groupsByPosition.get(IntRange(a.wordFrom, b.wordTo)); else { list = ll(a, b); // look for existing group for (Word _g : a.groups) if (contains(_g.constituents, list)) { g = _g; break; } } if (list == null) list = ll(a, b); if (g != null) { g.constituents.add(list); double w = formula(prod, a, b); if (g.addClass(prod.c, w)) { //print("Added class " + newClass + " to existing group: " + a.text + " + " + b.text); } ret g; } // new group //print("Making group " + newClass + " " + a.text + " + " + b.text); //print(" prev=" + sfu(collect(a.prev, 'text))); //print(" next=" + sfu(collect(b.next, 'text))); g = new Word(joinWithSpace(a.text, b.text), a.wordFrom, b.wordTo); allObjects.add(g); if (mergeGroupsInSamePosition) groupsByPosition.put(IntRange(a.wordFrom, b.wordTo), g); g.constituents.add(list); g.addClass(prod.c, formula(prod, a, b)); for (Word w : list) w.groups.add(g); g.prev.addAll(a.prev); g.next.addAll(b.next); for (Word prev : a.prev) prev.next.add(g); for (Word next : b.next) next.prev.add(g); ret g; } double formula(Production prod, Word a, Word b) { retprod.weight*avg(a.getWeight(prod.a), b.getWeight(prod.b)); } class Trait extends Updatable { Word w; } class LinkWithTo extends Trait { Production production; int expectationsSentToNext; *() {} *(Production *production) {} void update { if (l(w.next) > expectationsSentToNext) { for (final Word next : dropFirst(expectationsSentToNext, w.next)) next.addExpectation(new Expectation(production.b, r { makeGroup(w, next, production) })); expectationsSentToNext = l(w.next); } } } void parse(fS sentence) { if (words != null) fail("only call once"); L<S> rawWords = words(sentence); if (theSet == null) theSet = ai_wordCategoriesWithElements(); parseGroupings(); words = new L; for i over rawWords: { Word w = setAllAndReturn(new Word(rawWords.get(i), i, i+1)); words.add(w); if (isQuoted(w.text)) w.addClass("<noun>", 0.8); if (isInteger(w.text)) w.addClass("<number>", 1); } for (int i = 0; i < l(words)-1; i++) linkWords(words.get(i), words.get(i+1)); //printStruct(first(words)); addAll(allObjects, words); long lastChanges; do { lastChanges = changes; //print(n2(changes, "change")); for (Updatable w : cloneList(allObjects)) w.update(); } while (lastChanges != changes); } void printWordsAndGroups() { for (Word w : words) print(" " + textAndClasses(w)); print(); L<Word> groups = groups(); print(); print(n2(groups, "group")); for (Word g : groups) print("Group: " + groupedTextAndClasses(g)); } void printConstituentsOfFullGroup() { Word g = fullGroup(); if (g == null) ret; print(); pnl(allGroupings(g)); } L<Word> groups() { ret [Word w : instancesOf(Word.class, allObjects) | w.isGroup()]; } // only one with default flags L<Word> fullGroups() { ret filterByFields(groups(), wordFrom := 0, wordTo := l(words)); } Word fullGroup() { ret findByFields(groups(), wordFrom := 0, wordTo := l(words)); } Set<S> fullClasses() { new TreeSet<S> set; for (Word g : fullGroups()) set.addAll(g.classes); ret set; } S bracketStuff(Word w) { ret " (" + joinWithComma(w.classes) + + (empty(w.constituents) ? "" : ", " + n2(w.constituents, "grouping")) + ")"; } S textAndClasses(Word w) { ret w.text + bracketStuff(w); } S groupedTextAndClasses(Word w) { ret grouped(w) + bracketStuff(w); } void linkWords(Word a, Word b) { a.next.add(b); b.prev.add(a); } void change() { ++changes; } void parseGroupings() { for (S s : mL(ai_language() + " bottom-up groupings")) { L<S> tok = javaTokWithAngleBracketsC(s); if (l(tok) == 5) groupingsByA.put(tok.get(0), pair(tok.get(2), tok.get(4))); else if (l(tok) == 3) subClasses.put(tok.get(0), tok.get(2)); } } // TODO: now there are multiple groupings S grouped(Word g) { if (empty(g.constituents)) ret g.text; ret groupedConstituents(first(g.constituents)); } S groupedConstituents(L<Word> constituents) { new L<S> l; for (Word w : constituents) l.add(curlyBraceIfMultipleTokens(grouped(w))); ret joinWithSpace(l); } Set<S> allGroupings(Word g) { if (empty(g.constituents)) ret litorderedset(g.text); new LinkedHashSet<S> set; for (L<Word> l : g.constituents) set.add(groupedConstituents(l)); ret set; } }
download show line numbers debug dex old transpilations
Travelled to 14 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, irmadwmeruwu, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
Snippet ID: | #1017385 |
Snippet name: | AI_BottomUpParser2 - with weights [dev.] |
Eternal ID of this version: | #1017385/3 |
Text MD5: | f045ef122c40514aeab6857dc7f7cfdd |
Author: | stefan |
Category: | javax / a.i. |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2018-07-19 17:05:03 |
Source code size: | 8310 bytes / 296 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 301 / 332 |
Version history: | 2 change(s) |
Referenced in: | [show references] |