Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

296
LINES

< > BotCompany Repo | #1017385 // AI_BottomUpParser2 - with weights [dev.]

JavaX fragment (include)

1  
sclass AI_BottomUpParser2 {
2  
  Map<S, Set<S>> theSet; // category to literal examples
3  
  new LinkedHashSet<Updatable> allObjects;
4  
  L<Word> words;
5  
  long changes;
6  
  new MultiMap<S, Production> productionsByA;
7  
  new MultiMap<S> subClasses;
8  
  bool mergeGroupsInSamePosition = true;
9  
  new HashMap<IntRange, Word> groupsByPosition;
10  
  
11  
  sclass Production {
12  
    S a, b, c; // a + b = c
13  
    double weight;
14  
    
15  
    *() {}
16  
    *(S *a, S *b, S *c, double *weight) {}
17  
  }
18  
19  
  class Updatable {
20  
    void update {}
21  
    
22  
    void setField(S field, O value) {
23  
      if (eq(get(this, field), value)) ret;
24  
      set(this, field, value);
25  
      change();
26  
    }
27  
  }
28  
  
29  
  class Expectation {
30  
    S ifClass;
31  
    Runnable action;
32  
    
33  
    *() {}
34  
    *(S *ifClass, Runnable *action) {}
35  
  }
36  
  
37  
  class Word extends Updatable {
38  
    S text; // or null if unknown
39  
    int wordFrom, wordTo; // token indices
40  
    new LinkedHashSet<Word> prev;
41  
    new LinkedHashSet<Word> next;
42  
    new LinkedHashSet<L<Word>> constituents; // if group
43  
    new L<Expectation> expectations;
44  
    new L<Expectation> fulfilledExpectations;
45  
    new Map<S, Double> classes; // weighted
46  
    int classesConvertedToTraits;
47  
    new LinkedHashSet<Word> groups; // I am part of
48  
    new L<Trait> traits;
49  
50  
    *() {}
51  
    *(S *text, int *wordFrom, int *wordTo) {
52  
      addClass(quote(lower(text)), 1);
53  
    }
54  
    
55  
    void update {
56  
      // Add direct word classes
57  
      if (text != null)
58  
        for (S c : reverseLookupInMapToSets(theSet, text))
59  
          addClass(c, 1); // TODO
60  
      
61  
      // Process expectations
62  
      for (Expectation e : cloneList(expectations)) {
63  
        //print("Checking expected class " + e.ifClass);
64  
        if (classes.containsKey(e.ifClass)) {
65  
          moveElementFromCollectionToCollection(e, expectations, fulfilledExpectations);
66  
          change();
67  
          callF(e.action);
68  
        }
69  
      }
70  
      
71  
      if (l(classes) > classesConvertedToTraits) {
72  
        for (fS c : dropFirst(classesConvertedToTraits, classes))
73  
          addTraitsForClass(c);
74  
        classesConvertedToTraits = l(classes);
75  
      }
76  
      
77  
      for (Trait t : iterateListConcurrently(traits))
78  
        t.update();
79  
    }
80  
    
81  
    bool isGroup() { ret nempty(constituents); }
82  
    
83  
    bool addClass(S c, double weight) {
84  
      bool change = false;
85  
      for (S subClass : makeHull_optimized(subClasses, c))
86  
        if (weight > toDouble(classes.get(subClass))) {
87  
          classes.put(subClass, c);
88  
          change = true;
89  
        }
90  
      }
91  
      if (change) change(); ret change;
92  
    }
93  
    
94  
    void addExpectation(Expectation e) {
95  
      //print("addExpectation " + e);
96  
      expectations.add(e);
97  
      change();
98  
    }
99  
    
100  
    void addTraitsForClass(S c) {
101  
      for (Production p : productionsByA.get(c))
102  
        addTrait(new LinkWithTo(p));
103  
    }
104  
    
105  
    void addTrait(Trait t) {
106  
      set(t, w := this);
107  
      traits.add(t);
108  
    }
109  
    
110  
    toString {
111  
      ret textAndClasses(this);
112  
    }
113  
    
114  
    bool hasClass(S c) { ret containsKey(classes, c); }
115  
    S text() { ret text; }
116  
  } // end of class Word
117  
  
118  
  Word makeGroup(Word a, Word b, Production prod) {
119  
    L<Word> list = null;
120  
    Word g = null;
121  
    if (mergeGroupsInSamePosition)
122  
      g = groupsByPosition.get(IntRange(a.wordFrom, b.wordTo));
123  
    else { 
124  
      list = ll(a, b);
125  
      // look for existing group
126  
      for (Word _g : a.groups)
127  
        if (contains(_g.constituents, list)) { g = _g; break; }
128  
    }
129  
    
130  
    if (list == null) list = ll(a, b);
131  
    if (g != null) {
132  
      g.constituents.add(list);
133  
      double w = formula(prod, a, b);
134  
      if (g.addClass(prod.c, w)) {
135  
        //print("Added class " + newClass + " to existing group: " + a.text + " + " + b.text);
136  
      }
137  
      ret g;
138  
    }
139  
140  
    // new group
141  
    //print("Making group " + newClass + " " + a.text + " + " + b.text);
142  
    //print("  prev=" + sfu(collect(a.prev, 'text)));
143  
    //print("  next=" + sfu(collect(b.next, 'text)));
144  
    g = new Word(joinWithSpace(a.text, b.text), a.wordFrom, b.wordTo);
145  
    allObjects.add(g);
146  
    if (mergeGroupsInSamePosition)
147  
      groupsByPosition.put(IntRange(a.wordFrom, b.wordTo), g);
148  
    g.constituents.add(list);
149  
    g.addClass(prod.c, formula(prod, a, b));
150  
    for (Word w : list)
151  
      w.groups.add(g);
152  
    g.prev.addAll(a.prev);
153  
    g.next.addAll(b.next);
154  
    for (Word prev : a.prev) prev.next.add(g);
155  
    for (Word next : b.next) next.prev.add(g);
156  
    ret g;
157  
  }
158  
  
159  
  double formula(Production prod, Word a, Word b) {
160  
    retprod.weight*avg(a.getWeight(prod.a), b.getWeight(prod.b));
161  
  }
162  
  
163  
  class Trait extends Updatable {
164  
    Word w;
165  
  }
166  
  
167  
  class LinkWithTo extends Trait {
168  
    Production production;
169  
    int expectationsSentToNext;
170  
    
171  
    *() {}
172  
    *(Production *production) {}
173  
    
174  
    void update {
175  
      if (l(w.next) > expectationsSentToNext) {
176  
        for (final Word next : dropFirst(expectationsSentToNext, w.next))
177  
          next.addExpectation(new Expectation(production.b, r {
178  
            makeGroup(w, next, production)
179  
          }));
180  
        expectationsSentToNext = l(w.next);
181  
      }
182  
    }
183  
  }
184  
  
185  
  void parse(fS sentence) {
186  
    if (words != null) fail("only call once");
187  
    L<S> rawWords = words(sentence);
188  
    
189  
    if (theSet == null) theSet = ai_wordCategoriesWithElements();
190  
    parseGroupings();
191  
    
192  
    words = new L;
193  
    for i over rawWords: {
194  
      Word w = setAllAndReturn(new Word(rawWords.get(i), i, i+1));
195  
      words.add(w);
196  
      if (isQuoted(w.text)) w.addClass("<noun>", 0.8);
197  
      if (isInteger(w.text)) w.addClass("<number>", 1);
198  
    }
199  
    for (int i = 0; i < l(words)-1; i++)
200  
      linkWords(words.get(i), words.get(i+1));
201  
    //printStruct(first(words));
202  
    
203  
    addAll(allObjects, words);
204  
    long lastChanges;
205  
    do {
206  
      lastChanges = changes;
207  
      //print(n2(changes, "change"));
208  
      for (Updatable w : cloneList(allObjects))
209  
        w.update();
210  
    } while (lastChanges != changes);
211  
  }
212  
  
213  
  void printWordsAndGroups() {
214  
    for (Word w : words) print("  " + textAndClasses(w));
215  
    print();
216  
      
217  
    L<Word> groups = groups();
218  
    print();
219  
    print(n2(groups, "group"));
220  
    for (Word g : groups)
221  
      print("Group: " + groupedTextAndClasses(g));
222  
  }
223  
  
224  
  void printConstituentsOfFullGroup() {
225  
    Word g = fullGroup();
226  
    if (g == null) ret;
227  
    print();
228  
    pnl(allGroupings(g));
229  
  }
230  
  
231  
  L<Word> groups() {
232  
    ret [Word w : instancesOf(Word.class, allObjects) | w.isGroup()];
233  
  }
234  
  
235  
  // only one with default flags
236  
  L<Word> fullGroups() {
237  
    ret filterByFields(groups(), wordFrom := 0, wordTo := l(words));
238  
  }
239  
  
240  
  Word fullGroup() {
241  
    ret findByFields(groups(), wordFrom := 0, wordTo := l(words));
242  
  }
243  
  
244  
  Set<S> fullClasses() {
245  
    new TreeSet<S> set;
246  
    for (Word g : fullGroups())
247  
      set.addAll(g.classes);
248  
    ret set;
249  
  }
250  
  
251  
  S bracketStuff(Word w) {
252  
    ret " (" + joinWithComma(w.classes) +
253  
      + (empty(w.constituents) ? "" : ", " + n2(w.constituents, "grouping")) +  ")";
254  
  }
255  
  
256  
  S textAndClasses(Word w) { ret w.text + bracketStuff(w); }
257  
  S groupedTextAndClasses(Word w) { ret grouped(w) + bracketStuff(w); }
258  
  
259  
  void linkWords(Word a, Word b) {
260  
    a.next.add(b);
261  
    b.prev.add(a);
262  
  }
263  
  
264  
  void change() { ++changes; }
265  
266  
  void parseGroupings() {
267  
    for (S s : mL(ai_language() + " bottom-up groupings")) {
268  
      L<S> tok = javaTokWithAngleBracketsC(s);
269  
      if (l(tok) == 5)
270  
        groupingsByA.put(tok.get(0), pair(tok.get(2), tok.get(4)));
271  
      else if (l(tok) == 3)
272  
        subClasses.put(tok.get(0), tok.get(2));
273  
    }
274  
  }
275  
276  
  // TODO: now there are multiple groupings
277  
  S grouped(Word g) {
278  
    if (empty(g.constituents)) ret g.text;
279  
    ret groupedConstituents(first(g.constituents));
280  
  }
281  
  
282  
  S groupedConstituents(L<Word> constituents) {
283  
    new L<S> l;
284  
    for (Word w : constituents)
285  
      l.add(curlyBraceIfMultipleTokens(grouped(w)));
286  
    ret joinWithSpace(l);
287  
  }
288  
  
289  
  Set<S> allGroupings(Word g) {
290  
    if (empty(g.constituents)) ret litorderedset(g.text);
291  
    new LinkedHashSet<S> set;
292  
    for (L<Word> l : g.constituents)
293  
      set.add(groupedConstituents(l));
294  
    ret set;
295  
  }
296  
}

download  show line numbers  debug dex  old transpilations   

Travelled to 14 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, irmadwmeruwu, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt

No comments. add comment

Snippet ID: #1017385
Snippet name: AI_BottomUpParser2 - with weights [dev.]
Eternal ID of this version: #1017385/3
Text MD5: f045ef122c40514aeab6857dc7f7cfdd
Author: stefan
Category: javax / a.i.
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2018-07-19 17:05:03
Source code size: 8310 bytes / 296 lines
Pitched / IR pitched: No / No
Views / Downloads: 251 / 280
Version history: 2 change(s)
Referenced in: [show references]