1 | sclass AI_BottomUpParser1 { |
2 | Map<S, Set<S>> wordsToCategories; |
3 | bool splitSplittables = true; // e.g. what's => what is |
4 | S productionsText; |
5 | |
6 | L<Word> words; |
7 | L<S> rawWords; |
8 | L<S> cnc; // words + N tokens (null if unknown) |
9 | |
10 | new LinkedHashSet<Updatable> allObjects; |
11 | long changes, iterations; |
12 | new MultiMap<S, WeightedProduction> productionsByA; |
13 | new MultiMap<S> subClasses; |
14 | bool mergeGroupsInSamePosition = true; |
15 | new HashMap<IntRange, Word> groupsByPosition; |
16 | bool verbose_callPlausibilityFunction; |
17 | Map<Int, L<S>> categoriesAtWordIndex; // user can set categories for each token |
18 | int maxDepth = 100; |
19 | bool maxDepthReached; |
20 | |
21 | // callbacks |
22 | Runnable afterIteration, afterParse; |
23 | |
24 | class Updatable { |
25 | void update {} |
26 | |
27 | void setField(S field, O value) { |
28 | if (eq(get(this, field), value)) ret; |
29 | set(this, field, value); |
30 | change(); |
31 | } |
32 | } |
33 | |
34 | class Expectation { |
35 | S ifClass; |
36 | Runnable action; |
37 | |
38 | *() {} |
39 | *(S *ifClass, Runnable *action) {} |
40 | } |
41 | |
42 | class Word extends Updatable { |
43 | S text; // or null if unknown |
44 | int wordFrom, wordTo; // token indices |
45 | new LinkedHashSet<Word> prev; |
46 | new LinkedHashSet<Word> next; |
47 | new LinkedHashSet<L<Word>> constituents; // if group |
48 | new L<Expectation> expectations; |
49 | new L<Expectation> fulfilledExpectations; |
50 | new TreeSet<S> classes; |
51 | int classesConvertedToTraits; |
52 | new LinkedHashSet<Word> groups; // I am part of |
53 | new L<Trait> traits; |
54 | |
55 | *() {} |
56 | *(S *text, int *wordFrom, int *wordTo) { |
57 | classes.add(quote(lower(text))); |
58 | } |
59 | |
60 | void update { |
61 | // Add direct word classes |
62 | if (text != null) |
63 | for (S c : unnull(wordsToCategories.get(text))) |
64 | addClass(c); |
65 | |
66 | // Process expectations |
67 | for (Expectation e : cloneList(expectations)) { |
68 | //print("Checking expected class " + e.ifClass); |
69 | if (classes.contains(e.ifClass)) { |
70 | moveElementFromCollectionToCollection(e, expectations, fulfilledExpectations); |
71 | change(); |
72 | callF(e.action); |
73 | } |
74 | } |
75 | |
76 | if (l(classes) > classesConvertedToTraits) { |
77 | for (fS c : dropFirst(classesConvertedToTraits, classes)) |
78 | addTraitsForClass(c); |
79 | classesConvertedToTraits = l(classes); |
80 | } |
81 | |
82 | for (Trait t : iterateListConcurrently(traits)) |
83 | t.update(); |
84 | } |
85 | |
86 | bool isGroup() { ret nempty(constituents); } |
87 | |
88 | bool addClass(S c) { |
89 | if (!classes.addAll(makeHull_optimized(subClasses, c))) false; |
90 | change(); true; |
91 | } |
92 | |
93 | void addExpectation(Expectation e) { |
94 | //print("addExpectation " + e); |
95 | expectations.add(e); |
96 | change(); |
97 | } |
98 | |
99 | void addTraitsForClass(S c) { |
100 | for (WeightedProduction p : productionsByA.get(c)) |
101 | addTrait(new LinkWithTo(p.b, p.c, p.plausibilityFunction)); |
102 | } |
103 | |
104 | void addTrait(Trait t) { |
105 | set(t, w := this); |
106 | traits.add(t); |
107 | } |
108 | |
109 | toString { |
110 | ret textAndClasses(this); |
111 | } |
112 | |
113 | bool hasClass(S c) { ret contains(classes, c); } |
114 | S text() { ret text; } |
115 | } // end of class Word |
116 | |
117 | Word makeGroup(Word a, Word b, S newClass) { |
118 | L<Word> list = null; |
119 | Word g = null; |
120 | if (mergeGroupsInSamePosition) |
121 | g = groupsByPosition.get(IntRange(a.wordFrom, b.wordTo)); |
122 | else { |
123 | list = ll(a, b); |
124 | // look for existing group |
125 | for (Word _g : a.groups) |
126 | if (contains(_g.constituents, list)) { g = _g; break; } |
127 | } |
128 | |
129 | if (list == null) list = ll(a, b); |
130 | if (g != null) { |
131 | g.constituents.add(list); |
132 | if (g.addClass(newClass)) { |
133 | //print("Added class " + newClass + " to existing group: " + a.text + " + " + b.text); |
134 | } |
135 | ret g; |
136 | } |
137 | |
138 | // new group |
139 | //print("Making group " + newClass + " " + a.text + " + " + b.text); |
140 | //print(" prev=" + sfu(collect(a.prev, 'text))); |
141 | //print(" next=" + sfu(collect(b.next, 'text))); |
142 | g = new Word(joinWithSpace(a.text, b.text), a.wordFrom, b.wordTo); |
143 | allObjects.add(g); |
144 | if (mergeGroupsInSamePosition) |
145 | groupsByPosition.put(IntRange(a.wordFrom, b.wordTo), g); |
146 | g.addClass(newClass); |
147 | g.constituents.add(list); |
148 | for (Word w : list) |
149 | w.groups.add(g); |
150 | g.prev.addAll(a.prev); |
151 | g.next.addAll(b.next); |
152 | for (Word prev : a.prev) prev.next.add(g); |
153 | for (Word next : b.next) next.prev.add(g); |
154 | ret g; |
155 | } |
156 | |
157 | class Trait extends Updatable { |
158 | Word w; |
159 | } |
160 | |
161 | class LinkWithTo extends Trait { |
162 | S linkWith, linkTo; // classes |
163 | S plausibilityFunction; |
164 | int expectationsSentToNext; |
165 | |
166 | *() {} |
167 | *(S *linkWith, S *linkTo, S *plausibilityFunction) {} |
168 | |
169 | void update { |
170 | if (l(w.next) > expectationsSentToNext) { |
171 | for (final Word next : dropFirst(expectationsSentToNext, w.next)) |
172 | next.addExpectation(new Expectation(linkWith, r { |
173 | if (ai_parser_activateStandardFunctions_get() && plausibilityFunction != null) { |
174 | loadFunctions_preferDiskCache(); // note: changes this for whole program |
175 | O result = pcallAndMake(plausibilityFunction, w.text, next.text); |
176 | if (verbose_callPlausibilityFunction) |
177 | print("Called plausibility function " + plausibilityFunction + ": " + w.text + " + " + next.text + " => " + result); |
178 | if (isFalse(result)) |
179 | ret; |
180 | } |
181 | makeGroup(w, next, linkTo) |
182 | })); |
183 | expectationsSentToNext = l(w.next); |
184 | } |
185 | } |
186 | } |
187 | |
188 | void parse(L<S> tok) { |
189 | cnc = /*simpleSpaces2*/(tok); |
190 | rawWords = codeTokens(cnc); |
191 | _parse(); |
192 | } |
193 | |
194 | AI_BottomUpParser1 parse(fS sentence) { |
195 | rawWords = words_withAngleBrackets(sentence); |
196 | _parse(); |
197 | this; |
198 | } |
199 | |
200 | void _parse() { |
201 | if (words != null) fail("only call once"); |
202 | if (splitSplittables) |
203 | ai_splitSplittables(rawWords); |
204 | |
205 | if (wordsToCategories == null) wordsToCategories = ai_wordToCategories(); |
206 | parseProductions(); |
207 | |
208 | words = new L; |
209 | for i over rawWords: { |
210 | Word w = new(rawWords.get(i), i, i+1); |
211 | words.add(w); |
212 | if (isAngleBracketed(w.text)) w.addClass(w.text); |
213 | else if (isQuoted(w.text)) w.addClass("<noun>"); |
214 | else if (isInteger(w.text)) w.addClass("<number>"); |
215 | for (S cat : unnull(mapGet(categoriesAtWordIndex, i))) w.addClass(cat); |
216 | } |
217 | for (int i = 0; i < l(words)-1; i++) |
218 | linkWords(words.get(i), words.get(i+1)); |
219 | //printStruct(first(words)); |
220 | |
221 | addAll(allObjects, words); |
222 | long lastChanges; |
223 | do { |
224 | lastChanges = changes; |
225 | //print(n2(changes, "change")); |
226 | for (Updatable w : cloneList(allObjects)) |
227 | w.update(); |
228 | ++iterations; |
229 | callF(afterIteration); |
230 | } while (licensed() && lastChanges != changes); |
231 | callF(afterParse); |
232 | } |
233 | |
234 | void printWordsAndGroups() { |
235 | for (Word w : words) print(" " + textAndClasses(w)); |
236 | print(); |
237 | |
238 | L<Word> groups = groups(); |
239 | print(); |
240 | print(n2(groups, "group")); |
241 | for (Word g : groups) |
242 | print("Group: " + groupedTextAndClasses(g)); |
243 | } |
244 | |
245 | void printConstituentsOfFullGroup() { |
246 | Word g = fullGroup(); |
247 | if (g == null) ret; |
248 | print(); |
249 | pnl(allGroupings(g)); |
250 | } |
251 | |
252 | L<Word> words() { ret instancesOf(Word.class, allObjects); } |
253 | L<Word> groups() { ret [Word w : words() | w.isGroup()]; } |
254 | |
255 | // only one with default flags |
256 | L<Word> fullGroups() { |
257 | ret filterByFields(words(), wordFrom := 0, wordTo := l(words)); |
258 | } |
259 | |
260 | Word fullGroup() { |
261 | ret findByFields(words(), wordFrom := 0, wordTo := l(words)); |
262 | } |
263 | |
264 | Set<S> fullClasses() { |
265 | new TreeSet<S> set; |
266 | for (Word g : fullGroups()) |
267 | set.addAll(g.classes); |
268 | ret set; |
269 | } |
270 | |
271 | S bracketStuff(Word w) { |
272 | ret " (" + joinWithComma(w.classes) + |
273 | + (empty(w.constituents) ? "" : ", " + n2(w.constituents, "grouping")) + ")"; |
274 | } |
275 | |
276 | S textAndClasses(Word w) { ret w.text + bracketStuff(w); } |
277 | S groupedTextAndClasses(Word w) { ret grouped(w) + bracketStuff(w); } |
278 | |
279 | S fullGrouped() { ret grouped(fullGroup()); } |
280 | |
281 | void linkWords(Word a, Word b) { |
282 | a.next.add(b); |
283 | b.prev.add(a); |
284 | } |
285 | |
286 | void change() { ++changes; } |
287 | |
288 | void parseProductions() { |
289 | for (WeightedProduction p : |
290 | productionsText == null ? ai_buParser_parseWeightedProductions() : ai_buParser_parseWeightedProductions(productionsText)) |
291 | if (p.b != null) |
292 | productionsByA.put(p.a, p); |
293 | else |
294 | subClasses.put(p.a, p.c); |
295 | } |
296 | |
297 | // TODO: now there are multiple groupings |
298 | S grouped(Word g) { |
299 | if (g == null) null; |
300 | if (empty(g.constituents)) ret g.text; |
301 | ret groupedConstituents(first(g.constituents)); |
302 | } |
303 | |
304 | S groupedConstituents(L<Word> constituents) { |
305 | new L<S> l; |
306 | for (Word w : constituents) |
307 | l.add(curlyBraceIfMultipleTokens(grouped(w))); |
308 | ret joinWithSpace(l); |
309 | } |
310 | |
311 | Set<S> allGroupings(Word g) { |
312 | if (empty(g.constituents)) ret litorderedset(g.text); |
313 | new LinkedHashSet<S> set; |
314 | for (L<Word> l : g.constituents) |
315 | set.add(groupedConstituents(l)); |
316 | ret set; |
317 | } |
318 | } |
Began life as a copy of #1017348
download show line numbers debug dex old transpilations
Travelled to 14 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, irmadwmeruwu, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
Snippet ID: | #1017351 |
Snippet name: | AI_BottomUpParser1 |
Eternal ID of this version: | #1017351/73 |
Text MD5: | d85fd834e7e0d984180eb6ea6ae46bbb |
Author: | stefan |
Category: | javax / a.i. |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2019-07-10 12:09:13 |
Source code size: | 9330 bytes / 318 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 567 / 1256 |
Version history: | 72 change(s) |
Referenced in: | [show references] |