Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

315
LINES

< > BotCompany Repo | #1017479 // AI_BottomUpParser1 with observeNTokenHygiene [dev., doesn't really work]

JavaX fragment (include)

1  
sclass AI_BottomUpParser1 {
2  
  Map<S, Set<S>> wordsToCategories;
3  
  new LinkedHashSet<Updatable> allObjects;
4  
  L<Word> words;
5  
  L<S> rawWords;
6  
  L<S> cnc; // words + N tokens (null if unknown)
7  
  long changes;
8  
  new MultiMap<S, WeightedProduction> productionsByA;
9  
  new MultiMap<S> subClasses;
10  
  bool mergeGroupsInSamePosition = true;
11  
  new HashMap<IntRange, Word> groupsByPosition;
12  
  bool verbose_callPlausibilityFunction;
13  
  bool observeNTokenHygiene;
14  
15  
  class Updatable {
16  
    void update {}
17  
    
18  
    void setField(S field, O value) {
19  
      if (eq(get(this, field), value)) ret;
20  
      set(this, field, value);
21  
      change();
22  
    }
23  
  }
24  
  
25  
  class Expectation {
26  
    S ifClass;
27  
    Runnable action;
28  
    
29  
    *() {}
30  
    *(S *ifClass, Runnable *action) {}
31  
  }
32  
  
33  
  class Word extends Updatable {
34  
    S text; // or null if unknown
35  
    int wordFrom, wordTo; // token indices
36  
    new LinkedHashSet<Word> prev;
37  
    new LinkedHashSet<Word> next;
38  
    new LinkedHashSet<L<Word>> constituents; // if group
39  
    new L<Expectation> expectations;
40  
    new L<Expectation> fulfilledExpectations;
41  
    new TreeSet<S> classes;
42  
    int classesConvertedToTraits;
43  
    new LinkedHashSet<Word> groups; // I am part of
44  
    new L<Trait> traits;
45  
    
46  
    *() {}
47  
    *(S *text, int *wordFrom, int *wordTo) {
48  
      classes.add(quote(lower(text)));
49  
    }
50  
    
51  
    void update {
52  
      // Add direct word classes
53  
      if (text != null)
54  
        for (S c : unnull(wordsToCategories.get(text)))
55  
          addClass(c);
56  
      
57  
      // Process expectations
58  
      for (Expectation e : cloneList(expectations)) {
59  
        //print("Checking expected class " + e.ifClass);
60  
        if (classes.contains(e.ifClass)) {
61  
          moveElementFromCollectionToCollection(e, expectations, fulfilledExpectations);
62  
          change();
63  
          callF(e.action);
64  
        }
65  
      }
66  
      
67  
      if (l(classes) > classesConvertedToTraits) {
68  
        for (fS c : dropFirst(classesConvertedToTraits, classes))
69  
          addTraitsForClass(c);
70  
        classesConvertedToTraits = l(classes);
71  
      }
72  
      
73  
      for (Trait t : iterateListConcurrently(traits))
74  
        t.update();
75  
    }
76  
    
77  
    bool isGroup() { ret nempty(constituents); }
78  
    
79  
    bool addClass(S c) {
80  
      if (!classes.addAll(makeHull_optimized(subClasses, c))) false;
81  
      change(); true;
82  
    }
83  
    
84  
    void addExpectation(Expectation e) {
85  
      //print("addExpectation " + e);
86  
      expectations.add(e);
87  
      change();
88  
    }
89  
    
90  
    void addTraitsForClass(S c) {
91  
      for (WeightedProduction p : productionsByA.get(c))
92  
        addTrait(new LinkWithTo(p.b, p.c, p.plausibilityFunction));
93  
    }
94  
    
95  
    void addTrait(Trait t) {
96  
      set(t, w := this);
97  
      traits.add(t);
98  
    }
99  
    
100  
    toString {
101  
      ret textAndClasses(this);
102  
    }
103  
    
104  
    bool hasClass(S c) { ret contains(classes, c); }
105  
    S text() { ret text; }
106  
  } // end of class Word
107  
  
108  
  Word makeGroup(Word a, Word b, S newClass) {
109  
    L<Word> list = null;
110  
    Word g = null;
111  
    if (mergeGroupsInSamePosition)
112  
      g = groupsByPosition.get(IntRange(a.wordFrom, b.wordTo));
113  
    else {
114  
      list = ll(a, b);
115  
      // look for existing group
116  
      for (Word _g : a.groups)
117  
        if (contains(_g.constituents, list)) { g = _g; break; }
118  
    }
119  
    
120  
    if (list == null) list = ll(a, b);
121  
    if (g != null) {
122  
      g.constituents.add(list);
123  
      if (g.addClass(newClass)) {
124  
        //print("Added class " + newClass + " to existing group: " + a.text + " + " + b.text);
125  
      }
126  
      ret g;
127  
    }
128  
129  
    // new group, check hygiene
130  
    
131  
    if (observeNTokenHygiene && cnc != null) {
132  
      L<S> t = subList(cnc, a.wordFrom*2, b.wordTo*2+1);
133  
      print("Checking hygiene: " + sfu(t));
134  
      if (!checkNTokenHygiene(t)) {
135  
        print("Rejecting unhygienic grouping: " + join(t));
136  
        null;
137  
      }
138  
    }
139  
      
140  
    //print("Making group " + newClass + " " + a.text + " + " + b.text);
141  
    //print("  prev=" + sfu(collect(a.prev, 'text)));
142  
    //print("  next=" + sfu(collect(b.next, 'text)));
143  
    g = new Word(joinWithSpace(a.text, b.text), a.wordFrom, b.wordTo);
144  
    allObjects.add(g);
145  
    if (mergeGroupsInSamePosition)
146  
      groupsByPosition.put(IntRange(a.wordFrom, b.wordTo), g);
147  
    g.addClass(newClass);
148  
    g.constituents.add(list);
149  
    for (Word w : list)
150  
      w.groups.add(g);
151  
    g.prev.addAll(a.prev);
152  
    g.next.addAll(b.next);
153  
    for (Word prev : a.prev) prev.next.add(g);
154  
    for (Word next : b.next) next.prev.add(g);
155  
    ret g;
156  
  }
157  
  
158  
  class Trait extends Updatable {
159  
    Word w;
160  
  }
161  
  
162  
  class LinkWithTo extends Trait {
163  
    S linkWith, linkTo; // classes
164  
    S plausibilityFunction;
165  
    int expectationsSentToNext;
166  
    
167  
    *() {}
168  
    *(S *linkWith, S *linkTo, S *plausibilityFunction) {}
169  
    
170  
    void update {
171  
      if (l(w.next) > expectationsSentToNext) {
172  
        for (final Word next : dropFirst(expectationsSentToNext, w.next))
173  
          next.addExpectation(new Expectation(linkWith, r {
174  
            if (ai_parser_activateStandardFunctions_get() && plausibilityFunction != null) {
175  
              O result = pcallAndMake(plausibilityFunction, w.text, next.text);
176  
              if (verbose_callPlausibilityFunction)
177  
                print("Called plausibility function " + plausibilityFunction + ": " + w.text + " + " + next.text + " => " + result);
178  
              if (isFalse(result))
179  
                ret;
180  
            }
181  
            makeGroup(w, next, linkTo);
182  
          }));
183  
        expectationsSentToNext = l(w.next);
184  
      }
185  
    }
186  
  }
187  
  
188  
  void parse(L<S> tok) {
189  
    cnc = tok;
190  
    rawWords = codeTokens(cnc);
191  
    _parse();
192  
  }
193  
  
194  
  void parse(fS sentence) {
195  
    if (words != null) fail("only call once");
196  
    if (observeNTokenHygiene)
197  
      parse(javaTokNPunctuation(sentence));
198  
    else
199  
      rawWords = main.words(sentence);
200  
    _parse();
201  
  }
202  
  
203  
  void _parse() {
204  
    ai_splitSplittables(rawWords);
205  
    
206  
    if (wordsToCategories == null) wordsToCategories = ai_wordToCategories();
207  
    parseGroupings();
208  
    
209  
    words = new L;
210  
    for i over rawWords: {
211  
      Word w = setAllAndReturn(new Word(rawWords.get(i), i, i+1));
212  
      words.add(w);
213  
      if (isQuoted(w.text)) w.addClass("<noun>");
214  
      if (isInteger(w.text)) w.addClass("<number>");
215  
    }
216  
    for (int i = 0; i < l(words)-1; i++)
217  
      linkWords(words.get(i), words.get(i+1));
218  
    //printStruct(first(words));
219  
    
220  
    addAll(allObjects, words);
221  
    long lastChanges;
222  
    do {
223  
      lastChanges = changes;
224  
      //print(n2(changes, "change"));
225  
      for (Updatable w : cloneList(allObjects))
226  
        w.update();
227  
    } while (lastChanges != changes);
228  
  }
229  
  
230  
  void printWordsAndGroups() {
231  
    for (Word w : words) print("  " + textAndClasses(w));
232  
    print();
233  
      
234  
    L<Word> groups = groups();
235  
    print();
236  
    print(n2(groups, "group"));
237  
    for (Word g : groups)
238  
      print("Group: " + groupedTextAndClasses(g));
239  
  }
240  
  
241  
  void printConstituentsOfFullGroup() {
242  
    Word g = fullGroup();
243  
    if (g == null) ret;
244  
    print();
245  
    pnl(allGroupings(g));
246  
  }
247  
  
248  
  L<Word> words() { ret instancesOf(Word.class, allObjects); }
249  
  L<Word> groups() { ret [Word w : words() | w.isGroup()]; }
250  
  
251  
  // only one with default flags
252  
  L<Word> fullGroups() {
253  
    ret filterByFields(words(), wordFrom := 0, wordTo := l(words));
254  
  }
255  
  
256  
  Word fullGroup() {
257  
    ret findByFields(words(), wordFrom := 0, wordTo := l(words));
258  
  }
259  
  
260  
  Set<S> fullClasses() {
261  
    new TreeSet<S> set;
262  
    for (Word g : fullGroups())
263  
      set.addAll(g.classes);
264  
    ret set;
265  
  }
266  
  
267  
  S bracketStuff(Word w) {
268  
    ret " (" + joinWithComma(w.classes) +
269  
      + (empty(w.constituents) ? "" : ", " + n2(w.constituents, "grouping")) +  ")";
270  
  }
271  
  
272  
  S textAndClasses(Word w) { ret w.text + bracketStuff(w); }
273  
  S groupedTextAndClasses(Word w) { ret grouped(w) + bracketStuff(w); }
274  
  
275  
  void linkWords(Word a, Word b) {
276  
    a.next.add(b);
277  
    b.prev.add(a);
278  
  }
279  
  
280  
  void change() { ++changes; }
281  
282  
  void parseGroupings() {
283  
    for (WeightedProduction p : ai_buParser_parseWeightedProductions())
284  
      if (p.b != null)
285  
        productionsByA.put(p.a, p);
286  
      else
287  
        subClasses.put(p.a, p.c);
288  
  }
289  
290  
  // TODO: now there are multiple groupings
291  
  S grouped(Word g) {
292  
    if (empty(g.constituents)) ret g.text;
293  
    ret groupedConstituents(first(g.constituents));
294  
  }
295  
  
296  
  S groupedConstituents(L<Word> constituents) {
297  
    new L<S> l;
298  
    for (Word w : constituents)
299  
      l.add(curlyBraceIfMultipleTokens(grouped(w)));
300  
    ret joinWithSpace(l);
301  
  }
302  
  
303  
  Set<S> allGroupings(Word g) {
304  
    if (empty(g.constituents)) ret litorderedset(g.text);
305  
    new LinkedHashSet<S> set;
306  
    for (L<Word> l : g.constituents)
307  
      set.add(groupedConstituents(l));
308  
    ret set;
309  
  }
310  
  
311  
  S textWithNTokens(int wordFrom, int wordTo) {
312  
    if (cnc == null) ret joinWithSpace(subList(rawWords, wordFrom, wordTo));
313  
    ret join(subList(cnc, wordFrom*2+1, wordTo*2));
314  
  }
315  
}

download  show line numbers  debug dex  old transpilations   

Travelled to 14 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, irmadwmeruwu, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt

No comments. add comment

Snippet ID: #1017479
Snippet name: AI_BottomUpParser1 with observeNTokenHygiene [dev., doesn't really work]
Eternal ID of this version: #1017479/10
Text MD5: 521afc8ae22c89e978bd34aa0bfd3ea9
Author: stefan
Category: javax / a.i.
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2018-07-23 22:00:58
Source code size: 9094 bytes / 315 lines
Pitched / IR pitched: No / No
Views / Downloads: 279 / 460
Version history: 9 change(s)
Referenced in: [show references]