Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

277
LINES

< > BotCompany Repo | #1017384 // AI_BottomUpParser1 (old, without weights)

JavaX fragment (include)

1  
sclass AI_BottomUpParser1 {
2  
  Map<S, Set<S>> theSet; // category to literal examples
3  
  new LinkedHashSet<Updatable> allObjects;
4  
  L<Word> words;
5  
  long changes;
6  
  new MultiMap<S, Pair<S>> groupingsByA;
7  
  new MultiMap<S> subClasses;
8  
  bool mergeGroupsInSamePosition = true;
9  
  new HashMap<IntRange, Word> groupsByPosition;
10  
11  
  class Updatable {
12  
    void update {}
13  
    
14  
    void setField(S field, O value) {
15  
      if (eq(get(this, field), value)) ret;
16  
      set(this, field, value);
17  
      change();
18  
    }
19  
  }
20  
  
21  
  class Expectation {
22  
    S ifClass;
23  
    Runnable action;
24  
    
25  
    *() {}
26  
    *(S *ifClass, Runnable *action) {}
27  
  }
28  
  
29  
  class Word extends Updatable {
30  
    S text; // or null if unknown
31  
    int wordFrom, wordTo; // token indices
32  
    new LinkedHashSet<Word> prev;
33  
    new LinkedHashSet<Word> next;
34  
    new LinkedHashSet<L<Word>> constituents; // if group
35  
    new L<Expectation> expectations;
36  
    new L<Expectation> fulfilledExpectations;
37  
    new TreeSet<S> classes;
38  
    int classesConvertedToTraits;
39  
    new LinkedHashSet<Word> groups; // I am part of
40  
    new L<Trait> traits;
41  
    
42  
    *() {}
43  
    *(S *text, int *wordFrom, int *wordTo) {
44  
      classes.add(quote(lower(text)));
45  
    }
46  
    
47  
    void update {
48  
      // Add direct word classes
49  
      if (text != null)
50  
        for (S c : reverseLookupInMapToSets(theSet, text))
51  
          addClass(c);
52  
      
53  
      // Process expectations
54  
      for (Expectation e : cloneList(expectations)) {
55  
        //print("Checking expected class " + e.ifClass);
56  
        if (classes.contains(e.ifClass)) {
57  
          moveElementFromCollectionToCollection(e, expectations, fulfilledExpectations);
58  
          change();
59  
          callF(e.action);
60  
        }
61  
      }
62  
      
63  
      if (l(classes) > classesConvertedToTraits) {
64  
        for (fS c : dropFirst(classesConvertedToTraits, classes))
65  
          addTraitsForClass(c);
66  
        classesConvertedToTraits = l(classes);
67  
      }
68  
      
69  
      for (Trait t : iterateListConcurrently(traits))
70  
        t.update();
71  
    }
72  
    
73  
    bool isGroup() { ret nempty(constituents); }
74  
    
75  
    bool addClass(S c) {
76  
      if (!classes.addAll(makeHull_optimized(subClasses, c))) false;
77  
      change(); true;
78  
    }
79  
    
80  
    void addExpectation(Expectation e) {
81  
      //print("addExpectation " + e);
82  
      expectations.add(e);
83  
      change();
84  
    }
85  
    
86  
    void addTraitsForClass(S c) {
87  
      for (PairS p : groupingsByA.get(c))
88  
        addTrait(new LinkWithTo(p.a, p.b));
89  
    }
90  
    
91  
    void addTrait(Trait t) {
92  
      set(t, w := this);
93  
      traits.add(t);
94  
    }
95  
    
96  
    toString {
97  
      ret textAndClasses(this);
98  
    }
99  
    
100  
    bool hasClass(S c) { ret contains(classes, c); }
101  
    S text() { ret text; }
102  
  } // end of class Word
103  
  
104  
  Word makeGroup(Word a, Word b, S newClass) {
105  
    L<Word> list = null;
106  
    Word g = null;
107  
    if (mergeGroupsInSamePosition)
108  
      g = groupsByPosition.get(IntRange(a.wordFrom, b.wordTo));
109  
    else { 
110  
      list = ll(a, b);
111  
      // look for existing group
112  
      for (Word _g : a.groups)
113  
        if (contains(_g.constituents, list)) { g = _g; break; }
114  
    }
115  
    
116  
    if (list == null) list = ll(a, b);
117  
    if (g != null) {
118  
      g.constituents.add(list);
119  
      if (g.addClass(newClass)) {
120  
        //print("Added class " + newClass + " to existing group: " + a.text + " + " + b.text);
121  
      }
122  
      ret g;
123  
    }
124  
125  
    // new group
126  
    //print("Making group " + newClass + " " + a.text + " + " + b.text);
127  
    //print("  prev=" + sfu(collect(a.prev, 'text)));
128  
    //print("  next=" + sfu(collect(b.next, 'text)));
129  
    g = new Word(joinWithSpace(a.text, b.text), a.wordFrom, b.wordTo);
130  
    allObjects.add(g);
131  
    if (mergeGroupsInSamePosition)
132  
      groupsByPosition.put(IntRange(a.wordFrom, b.wordTo), g);
133  
    g.addClass(newClass);
134  
    g.constituents.add(list);
135  
    for (Word w : list)
136  
      w.groups.add(g);
137  
    g.prev.addAll(a.prev);
138  
    g.next.addAll(b.next);
139  
    for (Word prev : a.prev) prev.next.add(g);
140  
    for (Word next : b.next) next.prev.add(g);
141  
    ret g;
142  
  }
143  
  
144  
  class Trait extends Updatable {
145  
    Word w;
146  
  }
147  
  
148  
  class LinkWithTo extends Trait {
149  
    S linkWith, linkTo; // classes
150  
    int expectationsSentToNext;
151  
    
152  
    *() {}
153  
    *(S *linkWith, S *linkTo) {}
154  
    
155  
    void update {
156  
      if (l(w.next) > expectationsSentToNext) {
157  
        for (final Word next : dropFirst(expectationsSentToNext, w.next))
158  
          next.addExpectation(new Expectation(linkWith, r {
159  
            makeGroup(w, next, linkTo)
160  
          }));
161  
        expectationsSentToNext = l(w.next);
162  
      }
163  
    }
164  
  }
165  
  
166  
  void parse(fS sentence) {
167  
    if (words != null) fail("only call once");
168  
    L<S> rawWords = words(sentence);
169  
    
170  
    if (theSet == null) theSet = ai_wordCategoriesWithElements();
171  
    parseGroupings();
172  
    
173  
    words = new L;
174  
    for i over rawWords: {
175  
      Word w = setAllAndReturn(new Word(rawWords.get(i), i, i+1));
176  
      words.add(w);
177  
      if (isQuoted(w.text)) w.addClass("<noun>");
178  
      if (isInteger(w.text)) w.addClass("<number>");
179  
    }
180  
    for (int i = 0; i < l(words)-1; i++)
181  
      linkWords(words.get(i), words.get(i+1));
182  
    //printStruct(first(words));
183  
    
184  
    addAll(allObjects, words);
185  
    long lastChanges;
186  
    do {
187  
      lastChanges = changes;
188  
      //print(n2(changes, "change"));
189  
      for (Updatable w : cloneList(allObjects))
190  
        w.update();
191  
    } while (lastChanges != changes);
192  
  }
193  
  
194  
  void printWordsAndGroups() {
195  
    for (Word w : words) print("  " + textAndClasses(w));
196  
    print();
197  
      
198  
    L<Word> groups = groups();
199  
    print();
200  
    print(n2(groups, "group"));
201  
    for (Word g : groups)
202  
      print("Group: " + groupedTextAndClasses(g));
203  
  }
204  
  
205  
  void printConstituentsOfFullGroup() {
206  
    Word g = fullGroup();
207  
    if (g == null) ret;
208  
    print();
209  
    pnl(allGroupings(g));
210  
  }
211  
  
212  
  L<Word> groups() {
213  
    ret [Word w : instancesOf(Word.class, allObjects) | w.isGroup()];
214  
  }
215  
  
216  
  // only one with default flags
217  
  L<Word> fullGroups() {
218  
    ret filterByFields(groups(), wordFrom := 0, wordTo := l(words));
219  
  }
220  
  
221  
  Word fullGroup() {
222  
    ret findByFields(groups(), wordFrom := 0, wordTo := l(words));
223  
  }
224  
  
225  
  Set<S> fullClasses() {
226  
    new TreeSet<S> set;
227  
    for (Word g : fullGroups())
228  
      set.addAll(g.classes);
229  
    ret set;
230  
  }
231  
  
232  
  S bracketStuff(Word w) {
233  
    ret " (" + joinWithComma(w.classes) +
234  
      + (empty(w.constituents) ? "" : ", " + n2(w.constituents, "grouping")) +  ")";
235  
  }
236  
  
237  
  S textAndClasses(Word w) { ret w.text + bracketStuff(w); }
238  
  S groupedTextAndClasses(Word w) { ret grouped(w) + bracketStuff(w); }
239  
  
240  
  void linkWords(Word a, Word b) {
241  
    a.next.add(b);
242  
    b.prev.add(a);
243  
  }
244  
  
245  
  void change() { ++changes; }
246  
247  
  void parseGroupings() {
248  
    for (S s : mL(ai_language() + " bottom-up groupings")) {
249  
      L<S> tok = javaTokWithAngleBracketsC(s);
250  
      if (l(tok) == 5)
251  
        groupingsByA.put(tok.get(0), pair(tok.get(2), tok.get(4)));
252  
      else if (l(tok) == 3)
253  
        subClasses.put(tok.get(0), tok.get(2));
254  
    }
255  
  }
256  
257  
  // TODO: now there are multiple groupings
258  
  S grouped(Word g) {
259  
    if (empty(g.constituents)) ret g.text;
260  
    ret groupedConstituents(first(g.constituents));
261  
  }
262  
  
263  
  S groupedConstituents(L<Word> constituents) {
264  
    new L<S> l;
265  
    for (Word w : constituents)
266  
      l.add(curlyBraceIfMultipleTokens(grouped(w)));
267  
    ret joinWithSpace(l);
268  
  }
269  
  
270  
  Set<S> allGroupings(Word g) {
271  
    if (empty(g.constituents)) ret litorderedset(g.text);
272  
    new LinkedHashSet<S> set;
273  
    for (L<Word> l : g.constituents)
274  
      set.add(groupedConstituents(l));
275  
    ret set;
276  
  }
277  
}

Author comment

Began life as a copy of #1017351

download  show line numbers  debug dex  old transpilations   

Travelled to 14 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, irmadwmeruwu, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt

No comments. add comment

Snippet ID: #1017384
Snippet name: AI_BottomUpParser1 (old, without weights)
Eternal ID of this version: #1017384/1
Text MD5: f0f41ed91ea41a7e4eb6fb30e92afb08
Author: stefan
Category: javax / a.i.
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2018-07-19 16:50:08
Source code size: 7741 bytes / 277 lines
Pitched / IR pitched: No / No
Views / Downloads: 281 / 281
Referenced in: [show references]