AI_BottomUpParser1 with observeNTokenHygiene [dev., doesn't really work] [1017479]

sclass AI_BottomUpParser1 {
  Map<S, Set<S>> wordsToCategories;
  new LinkedHashSet<Updatable> allObjects;
  L<Word> words;
  L<S> rawWords;
  L<S> cnc; // words + N tokens (null if unknown)
  long changes;
  new MultiMap<S, WeightedProduction> productionsByA;
  new MultiMap<S> subClasses;
  bool mergeGroupsInSamePosition = true;
  new HashMap<IntRange, Word> groupsByPosition;
  bool verbose_callPlausibilityFunction;
  bool observeNTokenHygiene;

  class Updatable {
    void update {}
    
    void setField(S field, O value) {
      if (eq(get(this, field), value)) ret;
      set(this, field, value);
      change();
    }
  }
  
  class Expectation {
    S ifClass;
    Runnable action;
    
    *() {}
    *(S *ifClass, Runnable *action) {}
  }
  
  class Word extends Updatable {
    S text; // or null if unknown
    int wordFrom, wordTo; // token indices
    new LinkedHashSet<Word> prev;
    new LinkedHashSet<Word> next;
    new LinkedHashSet<L<Word>> constituents; // if group
    new L<Expectation> expectations;
    new L<Expectation> fulfilledExpectations;
    new TreeSet<S> classes;
    int classesConvertedToTraits;
    new LinkedHashSet<Word> groups; // I am part of
    new L<Trait> traits;
    
    *() {}
    *(S *text, int *wordFrom, int *wordTo) {
      classes.add(quote(lower(text)));
    }
    
    void update {
      // Add direct word classes
      if (text != null)
        for (S c : unnull(wordsToCategories.get(text)))
          addClass(c);
      
      // Process expectations
      for (Expectation e : cloneList(expectations)) {
        //print("Checking expected class " + e.ifClass);
        if (classes.contains(e.ifClass)) {
          moveElementFromCollectionToCollection(e, expectations, fulfilledExpectations);
          change();
          callF(e.action);
        }
      }
      
      if (l(classes) > classesConvertedToTraits) {
        for (fS c : dropFirst(classesConvertedToTraits, classes))
          addTraitsForClass(c);
        classesConvertedToTraits = l(classes);
      }
      
      for (Trait t : iterateListConcurrently(traits))
        t.update();
    }
    
    bool isGroup() { ret nempty(constituents); }
    
    bool addClass(S c) {
      if (!classes.addAll(makeHull_optimized(subClasses, c))) false;
      change(); true;
    }
    
    void addExpectation(Expectation e) {
      //print("addExpectation " + e);
      expectations.add(e);
      change();
    }
    
    void addTraitsForClass(S c) {
      for (WeightedProduction p : productionsByA.get(c))
        addTrait(new LinkWithTo(p.b, p.c, p.plausibilityFunction));
    }
    
    void addTrait(Trait t) {
      set(t, w := this);
      traits.add(t);
    }
    
    toString {
      ret textAndClasses(this);
    }
    
    bool hasClass(S c) { ret contains(classes, c); }
    S text() { ret text; }
  } // end of class Word
  
  Word makeGroup(Word a, Word b, S newClass) {
    L<Word> list = null;
    Word g = null;
    if (mergeGroupsInSamePosition)
      g = groupsByPosition.get(IntRange(a.wordFrom, b.wordTo));
    else {
      list = ll(a, b);
      // look for existing group
      for (Word _g : a.groups)
        if (contains(_g.constituents, list)) { g = _g; break; }
    }
    
    if (list == null) list = ll(a, b);
    if (g != null) {
      g.constituents.add(list);
      if (g.addClass(newClass)) {
        //print("Added class " + newClass + " to existing group: " + a.text + " + " + b.text);
      }
      ret g;
    }

    // new group, check hygiene
    
    if (observeNTokenHygiene && cnc != null) {
      L<S> t = subList(cnc, a.wordFrom*2, b.wordTo*2+1);
      print("Checking hygiene: " + sfu(t));
      if (!checkNTokenHygiene(t)) {
        print("Rejecting unhygienic grouping: " + join(t));
        null;
      }
    }
      
    //print("Making group " + newClass + " " + a.text + " + " + b.text);
    //print("  prev=" + sfu(collect(a.prev, 'text)));
    //print("  next=" + sfu(collect(b.next, 'text)));
    g = new Word(joinWithSpace(a.text, b.text), a.wordFrom, b.wordTo);
    allObjects.add(g);
    if (mergeGroupsInSamePosition)
      groupsByPosition.put(IntRange(a.wordFrom, b.wordTo), g);
    g.addClass(newClass);
    g.constituents.add(list);
    for (Word w : list)
      w.groups.add(g);
    g.prev.addAll(a.prev);
    g.next.addAll(b.next);
    for (Word prev : a.prev) prev.next.add(g);
    for (Word next : b.next) next.prev.add(g);
    ret g;
  }
  
  class Trait extends Updatable {
    Word w;
  }
  
  class LinkWithTo extends Trait {
    S linkWith, linkTo; // classes
    S plausibilityFunction;
    int expectationsSentToNext;
    
    *() {}
    *(S *linkWith, S *linkTo, S *plausibilityFunction) {}
    
    void update {
      if (l(w.next) > expectationsSentToNext) {
        for (final Word next : dropFirst(expectationsSentToNext, w.next))
          next.addExpectation(new Expectation(linkWith, r {
            if (ai_parser_activateStandardFunctions_get() && plausibilityFunction != null) {
              O result = pcallAndMake(plausibilityFunction, w.text, next.text);
              if (verbose_callPlausibilityFunction)
                print("Called plausibility function " + plausibilityFunction + ": " + w.text + " + " + next.text + " => " + result);
              if (isFalse(result))
                ret;
            }
            makeGroup(w, next, linkTo);
          }));
        expectationsSentToNext = l(w.next);
      }
    }
  }
  
  void parse(L<S> tok) {
    cnc = tok;
    rawWords = codeTokens(cnc);
    _parse();
  }
  
  void parse(fS sentence) {
    if (words != null) fail("only call once");
    if (observeNTokenHygiene)
      parse(javaTokNPunctuation(sentence));
    else
      rawWords = main.words(sentence);
    _parse();
  }
  
  void _parse() {
    ai_splitSplittables(rawWords);
    
    if (wordsToCategories == null) wordsToCategories = ai_wordToCategories();
    parseGroupings();
    
    words = new L;
    for i over rawWords: {
      Word w = setAllAndReturn(new Word(rawWords.get(i), i, i+1));
      words.add(w);
      if (isQuoted(w.text)) w.addClass("<noun>");
      if (isInteger(w.text)) w.addClass("<number>");
    }
    for (int i = 0; i < l(words)-1; i++)
      linkWords(words.get(i), words.get(i+1));
    //printStruct(first(words));
    
    addAll(allObjects, words);
    long lastChanges;
    do {
      lastChanges = changes;
      //print(n2(changes, "change"));
      for (Updatable w : cloneList(allObjects))
        w.update();
    } while (lastChanges != changes);
  }
  
  void printWordsAndGroups() {
    for (Word w : words) print("  " + textAndClasses(w));
    print();
      
    L<Word> groups = groups();
    print();
    print(n2(groups, "group"));
    for (Word g : groups)
      print("Group: " + groupedTextAndClasses(g));
  }
  
  void printConstituentsOfFullGroup() {
    Word g = fullGroup();
    if (g == null) ret;
    print();
    pnl(allGroupings(g));
  }
  
  L<Word> words() { ret instancesOf(Word.class, allObjects); }
  L<Word> groups() { ret [Word w : words() | w.isGroup()]; }
  
  // only one with default flags
  L<Word> fullGroups() {
    ret filterByFields(words(), wordFrom := 0, wordTo := l(words));
  }
  
  Word fullGroup() {
    ret findByFields(words(), wordFrom := 0, wordTo := l(words));
  }
  
  Set<S> fullClasses() {
    new TreeSet<S> set;
    for (Word g : fullGroups())
      set.addAll(g.classes);
    ret set;
  }
  
  S bracketStuff(Word w) {
    ret " (" + joinWithComma(w.classes) +
      + (empty(w.constituents) ? "" : ", " + n2(w.constituents, "grouping")) +  ")";
  }
  
  S textAndClasses(Word w) { ret w.text + bracketStuff(w); }
  S groupedTextAndClasses(Word w) { ret grouped(w) + bracketStuff(w); }
  
  void linkWords(Word a, Word b) {
    a.next.add(b);
    b.prev.add(a);
  }
  
  void change() { ++changes; }

  void parseGroupings() {
    for (WeightedProduction p : ai_buParser_parseWeightedProductions())
      if (p.b != null)
        productionsByA.put(p.a, p);
      else
        subClasses.put(p.a, p.c);
  }

  // TODO: now there are multiple groupings
  S grouped(Word g) {
    if (empty(g.constituents)) ret g.text;
    ret groupedConstituents(first(g.constituents));
  }
  
  S groupedConstituents(L<Word> constituents) {
    new L<S> l;
    for (Word w : constituents)
      l.add(curlyBraceIfMultipleTokens(grouped(w)));
    ret joinWithSpace(l);
  }
  
  Set<S> allGroupings(Word g) {
    if (empty(g.constituents)) ret litorderedset(g.text);
    new LinkedHashSet<S> set;
    for (L<Word> l : g.constituents)
      set.add(groupedConstituents(l));
    ret set;
  }
  
  S textWithNTokens(int wordFrom, int wordTo) {
    if (cnc == null) ret joinWithSpace(subList(rawWords, wordFrom, wordTo));
    ret join(subList(cnc, wordFrom*2+1, wordTo*2));
  }
}

Travelled to 14 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, irmadwmeruwu, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt

Snippet ID:	#1017479
Snippet name:	AI_BottomUpParser1 with observeNTokenHygiene [dev., doesn't really work]
Eternal ID of this version:	#1017479/10
Text MD5:	521afc8ae22c89e978bd34aa0bfd3ea9
Author:	stefan
Category:	javax / a.i.
Type:	JavaX fragment (include)
Public (visible to everyone):	Yes
Archived (hidden from active list):	No
Created/modified:	2018-07-23 22:00:58
Source code size:	9094 bytes / 315 lines
Pitched / IR pitched:	No / No
Views / Downloads:	329 / 511
Version history:	9 change(s)
Referenced in:	[show references]

1	sclass AI_BottomUpParser1 {
2	Map<S, Set<S>> wordsToCategories;
3	new LinkedHashSet<Updatable> allObjects;
4	L<Word> words;
5	L<S> rawWords;
6	L<S> cnc; // words + N tokens (null if unknown)
7	long changes;
8	new MultiMap<S, WeightedProduction> productionsByA;
9	new MultiMap<S> subClasses;
10	bool mergeGroupsInSamePosition = true;
11	new HashMap<IntRange, Word> groupsByPosition;
12	bool verbose_callPlausibilityFunction;
13	bool observeNTokenHygiene;
14
15	class Updatable {
16	void update {}
17
18	void setField(S field, O value) {
19	if (eq(get(this, field), value)) ret;
20	set(this, field, value);
21	change();
22	}
23	}
24
25	class Expectation {
26	S ifClass;
27	Runnable action;
28
29	*() {}
30	(S ifClass, Runnable *action) {}
31	}
32
33	class Word extends Updatable {
34	S text; // or null if unknown
35	int wordFrom, wordTo; // token indices
36	new LinkedHashSet<Word> prev;
37	new LinkedHashSet<Word> next;
38	new LinkedHashSet<L<Word>> constituents; // if group
39	new L<Expectation> expectations;
40	new L<Expectation> fulfilledExpectations;
41	new TreeSet<S> classes;
42	int classesConvertedToTraits;
43	new LinkedHashSet<Word> groups; // I am part of
44	new L<Trait> traits;
45
46	*() {}
47	(S text, int wordFrom, int wordTo) {
48	classes.add(quote(lower(text)));
49	}
50
51	void update {
52	// Add direct word classes
53	if (text != null)
54	for (S c : unnull(wordsToCategories.get(text)))
55	addClass(c);
56
57	// Process expectations
58	for (Expectation e : cloneList(expectations)) {
59	//print("Checking expected class " + e.ifClass);
60	if (classes.contains(e.ifClass)) {
61	moveElementFromCollectionToCollection(e, expectations, fulfilledExpectations);
62	change();
63	callF(e.action);
64	}
65	}
66
67	if (l(classes) > classesConvertedToTraits) {
68	for (fS c : dropFirst(classesConvertedToTraits, classes))
69	addTraitsForClass(c);
70	classesConvertedToTraits = l(classes);
71	}
72
73	for (Trait t : iterateListConcurrently(traits))
74	t.update();
75	}
76
77	bool isGroup() { ret nempty(constituents); }
78
79	bool addClass(S c) {
80	if (!classes.addAll(makeHull_optimized(subClasses, c))) false;
81	change(); true;
82	}
83
84	void addExpectation(Expectation e) {
85	//print("addExpectation " + e);
86	expectations.add(e);
87	change();
88	}
89
90	void addTraitsForClass(S c) {
91	for (WeightedProduction p : productionsByA.get(c))
92	addTrait(new LinkWithTo(p.b, p.c, p.plausibilityFunction));
93	}
94
95	void addTrait(Trait t) {
96	set(t, w := this);
97	traits.add(t);
98	}
99
100	toString {
101	ret textAndClasses(this);
102	}
103
104	bool hasClass(S c) { ret contains(classes, c); }
105	S text() { ret text; }
106	} // end of class Word
107
108	Word makeGroup(Word a, Word b, S newClass) {
109	L<Word> list = null;
110	Word g = null;
111	if (mergeGroupsInSamePosition)
112	g = groupsByPosition.get(IntRange(a.wordFrom, b.wordTo));
113	else {
114	list = ll(a, b);
115	// look for existing group
116	for (Word _g : a.groups)
117	if (contains(_g.constituents, list)) { g = _g; break; }
118	}
119
120	if (list == null) list = ll(a, b);
121	if (g != null) {
122	g.constituents.add(list);
123	if (g.addClass(newClass)) {
124	//print("Added class " + newClass + " to existing group: " + a.text + " + " + b.text);
125	}
126	ret g;
127	}
128
129	// new group, check hygiene
130
131	if (observeNTokenHygiene && cnc != null) {
132	L<S> t = subList(cnc, a.wordFrom2, b.wordTo2+1);
133	print("Checking hygiene: " + sfu(t));
134	if (!checkNTokenHygiene(t)) {
135	print("Rejecting unhygienic grouping: " + join(t));
136	null;
137	}
138	}
139
140	//print("Making group " + newClass + " " + a.text + " + " + b.text);
141	//print(" prev=" + sfu(collect(a.prev, 'text)));
142	//print(" next=" + sfu(collect(b.next, 'text)));
143	g = new Word(joinWithSpace(a.text, b.text), a.wordFrom, b.wordTo);
144	allObjects.add(g);
145	if (mergeGroupsInSamePosition)
146	groupsByPosition.put(IntRange(a.wordFrom, b.wordTo), g);
147	g.addClass(newClass);
148	g.constituents.add(list);
149	for (Word w : list)
150	w.groups.add(g);
151	g.prev.addAll(a.prev);
152	g.next.addAll(b.next);
153	for (Word prev : a.prev) prev.next.add(g);
154	for (Word next : b.next) next.prev.add(g);
155	ret g;
156	}
157
158	class Trait extends Updatable {
159	Word w;
160	}
161
162	class LinkWithTo extends Trait {
163	S linkWith, linkTo; // classes
164	S plausibilityFunction;
165	int expectationsSentToNext;
166
167	*() {}
168	(S linkWith, S linkTo, S plausibilityFunction) {}
169
170	void update {
171	if (l(w.next) > expectationsSentToNext) {
172	for (final Word next : dropFirst(expectationsSentToNext, w.next))
173	next.addExpectation(new Expectation(linkWith, r {
174	if (ai_parser_activateStandardFunctions_get() && plausibilityFunction != null) {
175	O result = pcallAndMake(plausibilityFunction, w.text, next.text);
176	if (verbose_callPlausibilityFunction)
177	print("Called plausibility function " + plausibilityFunction + ": " + w.text + " + " + next.text + " => " + result);
178	if (isFalse(result))
179	ret;
180	}
181	makeGroup(w, next, linkTo);
182	}));
183	expectationsSentToNext = l(w.next);
184	}
185	}
186	}
187
188	void parse(L<S> tok) {
189	cnc = tok;
190	rawWords = codeTokens(cnc);
191	_parse();
192	}
193
194	void parse(fS sentence) {
195	if (words != null) fail("only call once");
196	if (observeNTokenHygiene)
197	parse(javaTokNPunctuation(sentence));
198	else
199	rawWords = main.words(sentence);
200	_parse();
201	}
202
203	void _parse() {
204	ai_splitSplittables(rawWords);
205
206	if (wordsToCategories == null) wordsToCategories = ai_wordToCategories();
207	parseGroupings();
208
209	words = new L;
210	for i over rawWords: {
211	Word w = setAllAndReturn(new Word(rawWords.get(i), i, i+1));
212	words.add(w);
213	if (isQuoted(w.text)) w.addClass("<noun>");
214	if (isInteger(w.text)) w.addClass("<number>");
215	}
216	for (int i = 0; i < l(words)-1; i++)
217	linkWords(words.get(i), words.get(i+1));
218	//printStruct(first(words));
219
220	addAll(allObjects, words);
221	long lastChanges;
222	do {
223	lastChanges = changes;
224	//print(n2(changes, "change"));
225	for (Updatable w : cloneList(allObjects))
226	w.update();
227	} while (lastChanges != changes);
228	}
229
230	void printWordsAndGroups() {
231	for (Word w : words) print(" " + textAndClasses(w));
232	print();
233
234	L<Word> groups = groups();
235	print();
236	print(n2(groups, "group"));
237	for (Word g : groups)
238	print("Group: " + groupedTextAndClasses(g));
239	}
240
241	void printConstituentsOfFullGroup() {
242	Word g = fullGroup();
243	if (g == null) ret;
244	print();
245	pnl(allGroupings(g));
246	}
247
248	L<Word> words() { ret instancesOf(Word.class, allObjects); }
249	L<Word> groups() { ret [Word w : words() \| w.isGroup()]; }
250
251	// only one with default flags
252	L<Word> fullGroups() {
253	ret filterByFields(words(), wordFrom := 0, wordTo := l(words));
254	}
255
256	Word fullGroup() {
257	ret findByFields(words(), wordFrom := 0, wordTo := l(words));
258	}
259
260	Set<S> fullClasses() {
261	new TreeSet<S> set;
262	for (Word g : fullGroups())
263	set.addAll(g.classes);
264	ret set;
265	}
266
267	S bracketStuff(Word w) {
268	ret " (" + joinWithComma(w.classes) +
269	+ (empty(w.constituents) ? "" : ", " + n2(w.constituents, "grouping")) + ")";
270	}
271
272	S textAndClasses(Word w) { ret w.text + bracketStuff(w); }
273	S groupedTextAndClasses(Word w) { ret grouped(w) + bracketStuff(w); }
274
275	void linkWords(Word a, Word b) {
276	a.next.add(b);
277	b.prev.add(a);
278	}
279
280	void change() { ++changes; }
281
282	void parseGroupings() {
283	for (WeightedProduction p : ai_buParser_parseWeightedProductions())
284	if (p.b != null)
285	productionsByA.put(p.a, p);
286	else
287	subClasses.put(p.a, p.c);
288	}
289
290	// TODO: now there are multiple groupings
291	S grouped(Word g) {
292	if (empty(g.constituents)) ret g.text;
293	ret groupedConstituents(first(g.constituents));
294	}
295
296	S groupedConstituents(L<Word> constituents) {
297	new L<S> l;
298	for (Word w : constituents)
299	l.add(curlyBraceIfMultipleTokens(grouped(w)));
300	ret joinWithSpace(l);
301	}
302
303	Set<S> allGroupings(Word g) {
304	if (empty(g.constituents)) ret litorderedset(g.text);
305	new LinkedHashSet<S> set;
306	for (L<Word> l : g.constituents)
307	set.add(groupedConstituents(l));
308	ret set;
309	}
310
311	S textWithNTokens(int wordFrom, int wordTo) {
312	if (cnc == null) ret joinWithSpace(subList(rawWords, wordFrom, wordTo));
313	ret join(subList(cnc, wordFrom2+1, wordTo2));
314	}
315	}

< > BotCompany Repo | #1017479 // AI_BottomUpParser1 with observeNTokenHygiene [dev., doesn't really work]

JavaX fragment (include)