1 | sclass AI_BottomUpParser1 { |
2 | Map<S, Set<S>> wordsToCategories; |
3 | new LinkedHashSet<Updatable> allObjects; |
4 | L<Word> words; |
5 | L<S> rawWords; |
6 | L<S> cnc; // words + N tokens (null if unknown) |
7 | long changes; |
8 | new MultiMap<S, WeightedProduction> productionsByA; |
9 | new MultiMap<S> subClasses; |
10 | bool mergeGroupsInSamePosition = true; |
11 | new HashMap<IntRange, Word> groupsByPosition; |
12 | bool verbose_callPlausibilityFunction; |
13 | bool observeNTokenHygiene; |
14 | |
15 | class Updatable { |
16 | void update {} |
17 | |
18 | void setField(S field, O value) { |
19 | if (eq(get(this, field), value)) ret; |
20 | set(this, field, value); |
21 | change(); |
22 | } |
23 | } |
24 | |
25 | class Expectation { |
26 | S ifClass; |
27 | Runnable action; |
28 | |
29 | *() {} |
30 | *(S *ifClass, Runnable *action) {} |
31 | } |
32 | |
33 | class Word extends Updatable { |
34 | S text; // or null if unknown |
35 | int wordFrom, wordTo; // token indices |
36 | new LinkedHashSet<Word> prev; |
37 | new LinkedHashSet<Word> next; |
38 | new LinkedHashSet<L<Word>> constituents; // if group |
39 | new L<Expectation> expectations; |
40 | new L<Expectation> fulfilledExpectations; |
41 | new TreeSet<S> classes; |
42 | int classesConvertedToTraits; |
43 | new LinkedHashSet<Word> groups; // I am part of |
44 | new L<Trait> traits; |
45 | |
46 | *() {} |
47 | *(S *text, int *wordFrom, int *wordTo) { |
48 | classes.add(quote(lower(text))); |
49 | } |
50 | |
51 | void update { |
52 | // Add direct word classes |
53 | if (text != null) |
54 | for (S c : unnull(wordsToCategories.get(text))) |
55 | addClass(c); |
56 | |
57 | // Process expectations |
58 | for (Expectation e : cloneList(expectations)) { |
59 | //print("Checking expected class " + e.ifClass); |
60 | if (classes.contains(e.ifClass)) { |
61 | moveElementFromCollectionToCollection(e, expectations, fulfilledExpectations); |
62 | change(); |
63 | callF(e.action); |
64 | } |
65 | } |
66 | |
67 | if (l(classes) > classesConvertedToTraits) { |
68 | for (fS c : dropFirst(classesConvertedToTraits, classes)) |
69 | addTraitsForClass(c); |
70 | classesConvertedToTraits = l(classes); |
71 | } |
72 | |
73 | for (Trait t : iterateListConcurrently(traits)) |
74 | t.update(); |
75 | } |
76 | |
77 | bool isGroup() { ret nempty(constituents); } |
78 | |
79 | bool addClass(S c) { |
80 | if (!classes.addAll(makeHull_optimized(subClasses, c))) false; |
81 | change(); true; |
82 | } |
83 | |
84 | void addExpectation(Expectation e) { |
85 | //print("addExpectation " + e); |
86 | expectations.add(e); |
87 | change(); |
88 | } |
89 | |
90 | void addTraitsForClass(S c) { |
91 | for (WeightedProduction p : productionsByA.get(c)) |
92 | addTrait(new LinkWithTo(p.b, p.c, p.plausibilityFunction)); |
93 | } |
94 | |
95 | void addTrait(Trait t) { |
96 | set(t, w := this); |
97 | traits.add(t); |
98 | } |
99 | |
100 | toString { |
101 | ret textAndClasses(this); |
102 | } |
103 | |
104 | bool hasClass(S c) { ret contains(classes, c); } |
105 | S text() { ret text; } |
106 | } // end of class Word |
107 | |
108 | Word makeGroup(Word a, Word b, S newClass) { |
109 | L<Word> list = null; |
110 | Word g = null; |
111 | if (mergeGroupsInSamePosition) |
112 | g = groupsByPosition.get(IntRange(a.wordFrom, b.wordTo)); |
113 | else { |
114 | list = ll(a, b); |
115 | // look for existing group |
116 | for (Word _g : a.groups) |
117 | if (contains(_g.constituents, list)) { g = _g; break; } |
118 | } |
119 | |
120 | if (list == null) list = ll(a, b); |
121 | if (g != null) { |
122 | g.constituents.add(list); |
123 | if (g.addClass(newClass)) { |
124 | //print("Added class " + newClass + " to existing group: " + a.text + " + " + b.text); |
125 | } |
126 | ret g; |
127 | } |
128 | |
129 | // new group, check hygiene |
130 | |
131 | if (observeNTokenHygiene && cnc != null) { |
132 | L<S> t = subList(cnc, a.wordFrom*2, b.wordTo*2+1); |
133 | print("Checking hygiene: " + sfu(t)); |
134 | if (!checkNTokenHygiene(t)) { |
135 | print("Rejecting unhygienic grouping: " + join(t)); |
136 | null; |
137 | } |
138 | } |
139 | |
140 | //print("Making group " + newClass + " " + a.text + " + " + b.text); |
141 | //print(" prev=" + sfu(collect(a.prev, 'text))); |
142 | //print(" next=" + sfu(collect(b.next, 'text))); |
143 | g = new Word(joinWithSpace(a.text, b.text), a.wordFrom, b.wordTo); |
144 | allObjects.add(g); |
145 | if (mergeGroupsInSamePosition) |
146 | groupsByPosition.put(IntRange(a.wordFrom, b.wordTo), g); |
147 | g.addClass(newClass); |
148 | g.constituents.add(list); |
149 | for (Word w : list) |
150 | w.groups.add(g); |
151 | g.prev.addAll(a.prev); |
152 | g.next.addAll(b.next); |
153 | for (Word prev : a.prev) prev.next.add(g); |
154 | for (Word next : b.next) next.prev.add(g); |
155 | ret g; |
156 | } |
157 | |
158 | class Trait extends Updatable { |
159 | Word w; |
160 | } |
161 | |
162 | class LinkWithTo extends Trait { |
163 | S linkWith, linkTo; // classes |
164 | S plausibilityFunction; |
165 | int expectationsSentToNext; |
166 | |
167 | *() {} |
168 | *(S *linkWith, S *linkTo, S *plausibilityFunction) {} |
169 | |
170 | void update { |
171 | if (l(w.next) > expectationsSentToNext) { |
172 | for (final Word next : dropFirst(expectationsSentToNext, w.next)) |
173 | next.addExpectation(new Expectation(linkWith, r { |
174 | if (ai_parser_activateStandardFunctions_get() && plausibilityFunction != null) { |
175 | O result = pcallAndMake(plausibilityFunction, w.text, next.text); |
176 | if (verbose_callPlausibilityFunction) |
177 | print("Called plausibility function " + plausibilityFunction + ": " + w.text + " + " + next.text + " => " + result); |
178 | if (isFalse(result)) |
179 | ret; |
180 | } |
181 | makeGroup(w, next, linkTo); |
182 | })); |
183 | expectationsSentToNext = l(w.next); |
184 | } |
185 | } |
186 | } |
187 | |
188 | void parse(L<S> tok) { |
189 | cnc = tok; |
190 | rawWords = codeTokens(cnc); |
191 | _parse(); |
192 | } |
193 | |
194 | void parse(fS sentence) { |
195 | if (words != null) fail("only call once"); |
196 | if (observeNTokenHygiene) |
197 | parse(javaTokNPunctuation(sentence)); |
198 | else |
199 | rawWords = main.words(sentence); |
200 | _parse(); |
201 | } |
202 | |
203 | void _parse() { |
204 | ai_splitSplittables(rawWords); |
205 | |
206 | if (wordsToCategories == null) wordsToCategories = ai_wordToCategories(); |
207 | parseGroupings(); |
208 | |
209 | words = new L; |
210 | for i over rawWords: { |
211 | Word w = setAllAndReturn(new Word(rawWords.get(i), i, i+1)); |
212 | words.add(w); |
213 | if (isQuoted(w.text)) w.addClass("<noun>"); |
214 | if (isInteger(w.text)) w.addClass("<number>"); |
215 | } |
216 | for (int i = 0; i < l(words)-1; i++) |
217 | linkWords(words.get(i), words.get(i+1)); |
218 | //printStruct(first(words)); |
219 | |
220 | addAll(allObjects, words); |
221 | long lastChanges; |
222 | do { |
223 | lastChanges = changes; |
224 | //print(n2(changes, "change")); |
225 | for (Updatable w : cloneList(allObjects)) |
226 | w.update(); |
227 | } while (lastChanges != changes); |
228 | } |
229 | |
230 | void printWordsAndGroups() { |
231 | for (Word w : words) print(" " + textAndClasses(w)); |
232 | print(); |
233 | |
234 | L<Word> groups = groups(); |
235 | print(); |
236 | print(n2(groups, "group")); |
237 | for (Word g : groups) |
238 | print("Group: " + groupedTextAndClasses(g)); |
239 | } |
240 | |
241 | void printConstituentsOfFullGroup() { |
242 | Word g = fullGroup(); |
243 | if (g == null) ret; |
244 | print(); |
245 | pnl(allGroupings(g)); |
246 | } |
247 | |
248 | L<Word> words() { ret instancesOf(Word.class, allObjects); } |
249 | L<Word> groups() { ret [Word w : words() | w.isGroup()]; } |
250 | |
251 | // only one with default flags |
252 | L<Word> fullGroups() { |
253 | ret filterByFields(words(), wordFrom := 0, wordTo := l(words)); |
254 | } |
255 | |
256 | Word fullGroup() { |
257 | ret findByFields(words(), wordFrom := 0, wordTo := l(words)); |
258 | } |
259 | |
260 | Set<S> fullClasses() { |
261 | new TreeSet<S> set; |
262 | for (Word g : fullGroups()) |
263 | set.addAll(g.classes); |
264 | ret set; |
265 | } |
266 | |
267 | S bracketStuff(Word w) { |
268 | ret " (" + joinWithComma(w.classes) + |
269 | + (empty(w.constituents) ? "" : ", " + n2(w.constituents, "grouping")) + ")"; |
270 | } |
271 | |
272 | S textAndClasses(Word w) { ret w.text + bracketStuff(w); } |
273 | S groupedTextAndClasses(Word w) { ret grouped(w) + bracketStuff(w); } |
274 | |
275 | void linkWords(Word a, Word b) { |
276 | a.next.add(b); |
277 | b.prev.add(a); |
278 | } |
279 | |
280 | void change() { ++changes; } |
281 | |
282 | void parseGroupings() { |
283 | for (WeightedProduction p : ai_buParser_parseWeightedProductions()) |
284 | if (p.b != null) |
285 | productionsByA.put(p.a, p); |
286 | else |
287 | subClasses.put(p.a, p.c); |
288 | } |
289 | |
290 | // TODO: now there are multiple groupings |
291 | S grouped(Word g) { |
292 | if (empty(g.constituents)) ret g.text; |
293 | ret groupedConstituents(first(g.constituents)); |
294 | } |
295 | |
296 | S groupedConstituents(L<Word> constituents) { |
297 | new L<S> l; |
298 | for (Word w : constituents) |
299 | l.add(curlyBraceIfMultipleTokens(grouped(w))); |
300 | ret joinWithSpace(l); |
301 | } |
302 | |
303 | Set<S> allGroupings(Word g) { |
304 | if (empty(g.constituents)) ret litorderedset(g.text); |
305 | new LinkedHashSet<S> set; |
306 | for (L<Word> l : g.constituents) |
307 | set.add(groupedConstituents(l)); |
308 | ret set; |
309 | } |
310 | |
311 | S textWithNTokens(int wordFrom, int wordTo) { |
312 | if (cnc == null) ret joinWithSpace(subList(rawWords, wordFrom, wordTo)); |
313 | ret join(subList(cnc, wordFrom*2+1, wordTo*2)); |
314 | } |
315 | } |
download show line numbers debug dex old transpilations
Travelled to 14 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, irmadwmeruwu, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
Snippet ID: | #1017479 |
Snippet name: | AI_BottomUpParser1 with observeNTokenHygiene [dev., doesn't really work] |
Eternal ID of this version: | #1017479/10 |
Text MD5: | 521afc8ae22c89e978bd34aa0bfd3ea9 |
Author: | stefan |
Category: | javax / a.i. |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2018-07-23 22:00:58 |
Source code size: | 9094 bytes / 315 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 279 / 460 |
Version history: | 9 change(s) |
Referenced in: | [show references] |