Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

318
LINES

< > BotCompany Repo | #1017351 // AI_BottomUpParser1

JavaX fragment (include)

1  
sclass AI_BottomUpParser1 {
2  
  Map<S, Set<S>> wordsToCategories;
3  
  bool splitSplittables = true; // e.g. what's => what is
4  
  S productionsText;
5  
  
6  
  L<Word> words;
7  
  L<S> rawWords;
8  
  L<S> cnc; // words + N tokens (null if unknown)
9  
  
10  
  new LinkedHashSet<Updatable> allObjects;
11  
  long changes, iterations;
12  
  new MultiMap<S, WeightedProduction> productionsByA;
13  
  new MultiMap<S> subClasses;
14  
  bool mergeGroupsInSamePosition = true;
15  
  new HashMap<IntRange, Word> groupsByPosition;
16  
  bool verbose_callPlausibilityFunction;
17  
  Map<Int, L<S>> categoriesAtWordIndex; // user can set categories for each token
18  
  int maxDepth = 100;
19  
  bool maxDepthReached;
20  
  
21  
  // callbacks
22  
  Runnable afterIteration, afterParse;
23  
24  
  class Updatable {
25  
    void update {}
26  
    
27  
    void setField(S field, O value) {
28  
      if (eq(get(this, field), value)) ret;
29  
      set(this, field, value);
30  
      change();
31  
    }
32  
  }
33  
  
34  
  class Expectation {
35  
    S ifClass;
36  
    Runnable action;
37  
    
38  
    *() {}
39  
    *(S *ifClass, Runnable *action) {}
40  
  }
41  
  
42  
  class Word extends Updatable {
43  
    S text; // or null if unknown
44  
    int wordFrom, wordTo; // token indices
45  
    new LinkedHashSet<Word> prev;
46  
    new LinkedHashSet<Word> next;
47  
    new LinkedHashSet<L<Word>> constituents; // if group
48  
    new L<Expectation> expectations;
49  
    new L<Expectation> fulfilledExpectations;
50  
    new TreeSet<S> classes;
51  
    int classesConvertedToTraits;
52  
    new LinkedHashSet<Word> groups; // I am part of
53  
    new L<Trait> traits;
54  
    
55  
    *() {}
56  
    *(S *text, int *wordFrom, int *wordTo) {
57  
      classes.add(quote(lower(text)));
58  
    }
59  
    
60  
    void update {
61  
      // Add direct word classes
62  
      if (text != null)
63  
        for (S c : unnull(wordsToCategories.get(text)))
64  
          addClass(c);
65  
      
66  
      // Process expectations
67  
      for (Expectation e : cloneList(expectations)) {
68  
        //print("Checking expected class " + e.ifClass);
69  
        if (classes.contains(e.ifClass)) {
70  
          moveElementFromCollectionToCollection(e, expectations, fulfilledExpectations);
71  
          change();
72  
          callF(e.action);
73  
        }
74  
      }
75  
      
76  
      if (l(classes) > classesConvertedToTraits) {
77  
        for (fS c : dropFirst(classesConvertedToTraits, classes))
78  
          addTraitsForClass(c);
79  
        classesConvertedToTraits = l(classes);
80  
      }
81  
      
82  
      for (Trait t : iterateListConcurrently(traits))
83  
        t.update();
84  
    }
85  
    
86  
    bool isGroup() { ret nempty(constituents); }
87  
    
88  
    bool addClass(S c) {
89  
      if (!classes.addAll(makeHull_optimized(subClasses, c))) false;
90  
      change(); true;
91  
    }
92  
    
93  
    void addExpectation(Expectation e) {
94  
      //print("addExpectation " + e);
95  
      expectations.add(e);
96  
      change();
97  
    }
98  
    
99  
    void addTraitsForClass(S c) {
100  
      for (WeightedProduction p : productionsByA.get(c))
101  
        addTrait(new LinkWithTo(p.b, p.c, p.plausibilityFunction));
102  
    }
103  
    
104  
    void addTrait(Trait t) {
105  
      set(t, w := this);
106  
      traits.add(t);
107  
    }
108  
    
109  
    toString {
110  
      ret textAndClasses(this);
111  
    }
112  
    
113  
    bool hasClass(S c) { ret contains(classes, c); }
114  
    S text() { ret text; }
115  
  } // end of class Word
116  
  
117  
  Word makeGroup(Word a, Word b, S newClass) {
118  
    L<Word> list = null;
119  
    Word g = null;
120  
    if (mergeGroupsInSamePosition)
121  
      g = groupsByPosition.get(IntRange(a.wordFrom, b.wordTo));
122  
    else { 
123  
      list = ll(a, b);
124  
      // look for existing group
125  
      for (Word _g : a.groups)
126  
        if (contains(_g.constituents, list)) { g = _g; break; }
127  
    }
128  
    
129  
    if (list == null) list = ll(a, b);
130  
    if (g != null) {
131  
      g.constituents.add(list);
132  
      if (g.addClass(newClass)) {
133  
        //print("Added class " + newClass + " to existing group: " + a.text + " + " + b.text);
134  
      }
135  
      ret g;
136  
    }
137  
138  
    // new group
139  
    //print("Making group " + newClass + " " + a.text + " + " + b.text);
140  
    //print("  prev=" + sfu(collect(a.prev, 'text)));
141  
    //print("  next=" + sfu(collect(b.next, 'text)));
142  
    g = new Word(joinWithSpace(a.text, b.text), a.wordFrom, b.wordTo);
143  
    allObjects.add(g);
144  
    if (mergeGroupsInSamePosition)
145  
      groupsByPosition.put(IntRange(a.wordFrom, b.wordTo), g);
146  
    g.addClass(newClass);
147  
    g.constituents.add(list);
148  
    for (Word w : list)
149  
      w.groups.add(g);
150  
    g.prev.addAll(a.prev);
151  
    g.next.addAll(b.next);
152  
    for (Word prev : a.prev) prev.next.add(g);
153  
    for (Word next : b.next) next.prev.add(g);
154  
    ret g;
155  
  }
156  
  
157  
  class Trait extends Updatable {
158  
    Word w;
159  
  }
160  
  
161  
  class LinkWithTo extends Trait {
162  
    S linkWith, linkTo; // classes
163  
    S plausibilityFunction;
164  
    int expectationsSentToNext;
165  
    
166  
    *() {}
167  
    *(S *linkWith, S *linkTo, S *plausibilityFunction) {}
168  
    
169  
    void update {
170  
      if (l(w.next) > expectationsSentToNext) {
171  
        for (final Word next : dropFirst(expectationsSentToNext, w.next))
172  
          next.addExpectation(new Expectation(linkWith, r {
173  
            if (ai_parser_activateStandardFunctions_get() && plausibilityFunction != null) {
174  
              loadFunctions_preferDiskCache(); // note: changes this for whole program
175  
              O result = pcallAndMake(plausibilityFunction, w.text, next.text);
176  
              if (verbose_callPlausibilityFunction)
177  
                print("Called plausibility function " + plausibilityFunction + ": " + w.text + " + " + next.text + " => " + result);
178  
              if (isFalse(result))
179  
                ret;
180  
            }
181  
            makeGroup(w, next, linkTo)
182  
          }));
183  
        expectationsSentToNext = l(w.next);
184  
      }
185  
    }
186  
  }
187  
  
188  
  void parse(L<S> tok) {
189  
    cnc = /*simpleSpaces2*/(tok);
190  
    rawWords = codeTokens(cnc);
191  
    _parse();
192  
  }
193  
  
194  
  AI_BottomUpParser1 parse(fS sentence) {
195  
    rawWords = words_withAngleBrackets(sentence);
196  
    _parse();
197  
    this;
198  
  }
199  
  
200  
  void _parse() {
201  
    if (words != null) fail("only call once");
202  
    if (splitSplittables)
203  
      ai_splitSplittables(rawWords);
204  
    
205  
    if (wordsToCategories == null) wordsToCategories = ai_wordToCategories();
206  
    parseProductions();
207  
    
208  
    words = new L;
209  
    for i over rawWords: {
210  
      Word w = new(rawWords.get(i), i, i+1);
211  
      words.add(w);
212  
      if (isAngleBracketed(w.text)) w.addClass(w.text);
213  
      else if (isQuoted(w.text)) w.addClass("<noun>");
214  
      else if (isInteger(w.text)) w.addClass("<number>");
215  
      for (S cat : unnull(mapGet(categoriesAtWordIndex, i))) w.addClass(cat);
216  
    }
217  
    for (int i = 0; i < l(words)-1; i++)
218  
      linkWords(words.get(i), words.get(i+1));
219  
    //printStruct(first(words));
220  
    
221  
    addAll(allObjects, words);
222  
    long lastChanges;
223  
    do {
224  
      lastChanges = changes;
225  
      //print(n2(changes, "change"));
226  
      for (Updatable w : cloneList(allObjects))
227  
        w.update();
228  
      ++iterations;
229  
      callF(afterIteration);
230  
    } while (licensed() && lastChanges != changes);
231  
    callF(afterParse);
232  
  }
233  
  
234  
  void printWordsAndGroups() {
235  
    for (Word w : words) print("  " + textAndClasses(w));
236  
    print();
237  
      
238  
    L<Word> groups = groups();
239  
    print();
240  
    print(n2(groups, "group"));
241  
    for (Word g : groups)
242  
      print("Group: " + groupedTextAndClasses(g));
243  
  }
244  
  
245  
  void printConstituentsOfFullGroup() {
246  
    Word g = fullGroup();
247  
    if (g == null) ret;
248  
    print();
249  
    pnl(allGroupings(g));
250  
  }
251  
  
252  
  L<Word> words() { ret instancesOf(Word.class, allObjects); }
253  
  L<Word> groups() { ret [Word w : words() | w.isGroup()]; }
254  
  
255  
  // only one with default flags
256  
  L<Word> fullGroups() {
257  
    ret filterByFields(words(), wordFrom := 0, wordTo := l(words));
258  
  }
259  
  
260  
  Word fullGroup() {
261  
    ret findByFields(words(), wordFrom := 0, wordTo := l(words));
262  
  }
263  
  
264  
  Set<S> fullClasses() {
265  
    new TreeSet<S> set;
266  
    for (Word g : fullGroups())
267  
      set.addAll(g.classes);
268  
    ret set;
269  
  }
270  
  
271  
  S bracketStuff(Word w) {
272  
    ret " (" + joinWithComma(w.classes) +
273  
      + (empty(w.constituents) ? "" : ", " + n2(w.constituents, "grouping")) +  ")";
274  
  }
275  
  
276  
  S textAndClasses(Word w) { ret w.text + bracketStuff(w); }
277  
  S groupedTextAndClasses(Word w) { ret grouped(w) + bracketStuff(w); }
278  
  
279  
  S fullGrouped() { ret grouped(fullGroup()); }
280  
  
281  
  void linkWords(Word a, Word b) {
282  
    a.next.add(b);
283  
    b.prev.add(a);
284  
  }
285  
  
286  
  void change() { ++changes; }
287  
288  
  void parseProductions() {
289  
    for (WeightedProduction p : 
290  
      productionsText == null ? ai_buParser_parseWeightedProductions() : ai_buParser_parseWeightedProductions(productionsText))
291  
      if (p.b != null)
292  
        productionsByA.put(p.a, p);
293  
      else
294  
        subClasses.put(p.a, p.c);
295  
  }
296  
297  
  // TODO: now there are multiple groupings
298  
  S grouped(Word g) {
299  
    if (g == null) null;
300  
    if (empty(g.constituents)) ret g.text;
301  
    ret groupedConstituents(first(g.constituents));
302  
  }
303  
  
304  
  S groupedConstituents(L<Word> constituents) {
305  
    new L<S> l;
306  
    for (Word w : constituents)
307  
      l.add(curlyBraceIfMultipleTokens(grouped(w)));
308  
    ret joinWithSpace(l);
309  
  }
310  
  
311  
  Set<S> allGroupings(Word g) {
312  
    if (empty(g.constituents)) ret litorderedset(g.text);
313  
    new LinkedHashSet<S> set;
314  
    for (L<Word> l : g.constituents)
315  
      set.add(groupedConstituents(l));
316  
    ret set;
317  
  }
318  
}

Author comment

Began life as a copy of #1017348

download  show line numbers  debug dex  old transpilations   

Travelled to 14 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, irmadwmeruwu, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt

No comments. add comment

Snippet ID: #1017351
Snippet name: AI_BottomUpParser1
Eternal ID of this version: #1017351/73
Text MD5: d85fd834e7e0d984180eb6ea6ae46bbb
Author: stefan
Category: javax / a.i.
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2019-07-10 12:09:13
Source code size: 9330 bytes / 318 lines
Pitched / IR pitched: No / No
Views / Downloads: 489 / 1173
Version history: 72 change(s)
Referenced in: [show references]