1 | sclass AI_BottomUpParser2 { |
2 | Map<S, Set<S>> theSet; // category to literal examples |
3 | new LinkedHashSet<Updatable> allObjects; |
4 | L<Word> words; |
5 | long changes; |
6 | new MultiMap<S, Production> productionsByA; |
7 | new MultiMap<S> subClasses; |
8 | bool mergeGroupsInSamePosition = true; |
9 | new HashMap<IntRange, Word> groupsByPosition; |
10 | |
11 | sclass Production { |
12 | S a, b, c; // a + b = c |
13 | double weight; |
14 | |
15 | *() {} |
16 | *(S *a, S *b, S *c, double *weight) {} |
17 | } |
18 | |
19 | class Updatable { |
20 | void update {} |
21 | |
22 | void setField(S field, O value) { |
23 | if (eq(get(this, field), value)) ret; |
24 | set(this, field, value); |
25 | change(); |
26 | } |
27 | } |
28 | |
29 | class Expectation { |
30 | S ifClass; |
31 | Runnable action; |
32 | |
33 | *() {} |
34 | *(S *ifClass, Runnable *action) {} |
35 | } |
36 | |
37 | class Word extends Updatable { |
38 | S text; // or null if unknown |
39 | int wordFrom, wordTo; // token indices |
40 | new LinkedHashSet<Word> prev; |
41 | new LinkedHashSet<Word> next; |
42 | new LinkedHashSet<L<Word>> constituents; // if group |
43 | new L<Expectation> expectations; |
44 | new L<Expectation> fulfilledExpectations; |
45 | new Map<S, Double> classes; // weighted |
46 | int classesConvertedToTraits; |
47 | new LinkedHashSet<Word> groups; // I am part of |
48 | new L<Trait> traits; |
49 | |
50 | *() {} |
51 | *(S *text, int *wordFrom, int *wordTo) { |
52 | addClass(quote(lower(text)), 1); |
53 | } |
54 | |
55 | void update { |
56 | // Add direct word classes |
57 | if (text != null) |
58 | for (S c : reverseLookupInMapToSets(theSet, text)) |
59 | addClass(c, 1); // TODO |
60 | |
61 | // Process expectations |
62 | for (Expectation e : cloneList(expectations)) { |
63 | //print("Checking expected class " + e.ifClass); |
64 | if (classes.containsKey(e.ifClass)) { |
65 | moveElementFromCollectionToCollection(e, expectations, fulfilledExpectations); |
66 | change(); |
67 | callF(e.action); |
68 | } |
69 | } |
70 | |
71 | if (l(classes) > classesConvertedToTraits) { |
72 | for (fS c : dropFirst(classesConvertedToTraits, classes)) |
73 | addTraitsForClass(c); |
74 | classesConvertedToTraits = l(classes); |
75 | } |
76 | |
77 | for (Trait t : iterateListConcurrently(traits)) |
78 | t.update(); |
79 | } |
80 | |
81 | bool isGroup() { ret nempty(constituents); } |
82 | |
83 | bool addClass(S c, double weight) { |
84 | bool change = false; |
85 | for (S subClass : makeHull_optimized(subClasses, c)) |
86 | if (weight > toDouble(classes.get(subClass))) { |
87 | classes.put(subClass, c); |
88 | change = true; |
89 | } |
90 | } |
91 | if (change) change(); ret change; |
92 | } |
93 | |
94 | void addExpectation(Expectation e) { |
95 | //print("addExpectation " + e); |
96 | expectations.add(e); |
97 | change(); |
98 | } |
99 | |
100 | void addTraitsForClass(S c) { |
101 | for (Production p : productionsByA.get(c)) |
102 | addTrait(new LinkWithTo(p)); |
103 | } |
104 | |
105 | void addTrait(Trait t) { |
106 | set(t, w := this); |
107 | traits.add(t); |
108 | } |
109 | |
110 | toString { |
111 | ret textAndClasses(this); |
112 | } |
113 | |
114 | bool hasClass(S c) { ret containsKey(classes, c); } |
115 | S text() { ret text; } |
116 | } // end of class Word |
117 | |
118 | Word makeGroup(Word a, Word b, Production prod) { |
119 | L<Word> list = null; |
120 | Word g = null; |
121 | if (mergeGroupsInSamePosition) |
122 | g = groupsByPosition.get(IntRange(a.wordFrom, b.wordTo)); |
123 | else { |
124 | list = ll(a, b); |
125 | // look for existing group |
126 | for (Word _g : a.groups) |
127 | if (contains(_g.constituents, list)) { g = _g; break; } |
128 | } |
129 | |
130 | if (list == null) list = ll(a, b); |
131 | if (g != null) { |
132 | g.constituents.add(list); |
133 | double w = formula(prod, a, b); |
134 | if (g.addClass(prod.c, w)) { |
135 | //print("Added class " + newClass + " to existing group: " + a.text + " + " + b.text); |
136 | } |
137 | ret g; |
138 | } |
139 | |
140 | // new group |
141 | //print("Making group " + newClass + " " + a.text + " + " + b.text); |
142 | //print(" prev=" + sfu(collect(a.prev, 'text))); |
143 | //print(" next=" + sfu(collect(b.next, 'text))); |
144 | g = new Word(joinWithSpace(a.text, b.text), a.wordFrom, b.wordTo); |
145 | allObjects.add(g); |
146 | if (mergeGroupsInSamePosition) |
147 | groupsByPosition.put(IntRange(a.wordFrom, b.wordTo), g); |
148 | g.constituents.add(list); |
149 | g.addClass(prod.c, formula(prod, a, b)); |
150 | for (Word w : list) |
151 | w.groups.add(g); |
152 | g.prev.addAll(a.prev); |
153 | g.next.addAll(b.next); |
154 | for (Word prev : a.prev) prev.next.add(g); |
155 | for (Word next : b.next) next.prev.add(g); |
156 | ret g; |
157 | } |
158 | |
159 | double formula(Production prod, Word a, Word b) { |
160 | retprod.weight*avg(a.getWeight(prod.a), b.getWeight(prod.b)); |
161 | } |
162 | |
163 | class Trait extends Updatable { |
164 | Word w; |
165 | } |
166 | |
167 | class LinkWithTo extends Trait { |
168 | Production production; |
169 | int expectationsSentToNext; |
170 | |
171 | *() {} |
172 | *(Production *production) {} |
173 | |
174 | void update { |
175 | if (l(w.next) > expectationsSentToNext) { |
176 | for (final Word next : dropFirst(expectationsSentToNext, w.next)) |
177 | next.addExpectation(new Expectation(production.b, r { |
178 | makeGroup(w, next, production) |
179 | })); |
180 | expectationsSentToNext = l(w.next); |
181 | } |
182 | } |
183 | } |
184 | |
185 | void parse(fS sentence) { |
186 | if (words != null) fail("only call once"); |
187 | L<S> rawWords = words(sentence); |
188 | |
189 | if (theSet == null) theSet = ai_wordCategoriesWithElements(); |
190 | parseGroupings(); |
191 | |
192 | words = new L; |
193 | for i over rawWords: { |
194 | Word w = setAllAndReturn(new Word(rawWords.get(i), i, i+1)); |
195 | words.add(w); |
196 | if (isQuoted(w.text)) w.addClass("<noun>", 0.8); |
197 | if (isInteger(w.text)) w.addClass("<number>", 1); |
198 | } |
199 | for (int i = 0; i < l(words)-1; i++) |
200 | linkWords(words.get(i), words.get(i+1)); |
201 | //printStruct(first(words)); |
202 | |
203 | addAll(allObjects, words); |
204 | long lastChanges; |
205 | do { |
206 | lastChanges = changes; |
207 | //print(n2(changes, "change")); |
208 | for (Updatable w : cloneList(allObjects)) |
209 | w.update(); |
210 | } while (lastChanges != changes); |
211 | } |
212 | |
213 | void printWordsAndGroups() { |
214 | for (Word w : words) print(" " + textAndClasses(w)); |
215 | print(); |
216 | |
217 | L<Word> groups = groups(); |
218 | print(); |
219 | print(n2(groups, "group")); |
220 | for (Word g : groups) |
221 | print("Group: " + groupedTextAndClasses(g)); |
222 | } |
223 | |
224 | void printConstituentsOfFullGroup() { |
225 | Word g = fullGroup(); |
226 | if (g == null) ret; |
227 | print(); |
228 | pnl(allGroupings(g)); |
229 | } |
230 | |
231 | L<Word> groups() { |
232 | ret [Word w : instancesOf(Word.class, allObjects) | w.isGroup()]; |
233 | } |
234 | |
235 | // only one with default flags |
236 | L<Word> fullGroups() { |
237 | ret filterByFields(groups(), wordFrom := 0, wordTo := l(words)); |
238 | } |
239 | |
240 | Word fullGroup() { |
241 | ret findByFields(groups(), wordFrom := 0, wordTo := l(words)); |
242 | } |
243 | |
244 | Set<S> fullClasses() { |
245 | new TreeSet<S> set; |
246 | for (Word g : fullGroups()) |
247 | set.addAll(g.classes); |
248 | ret set; |
249 | } |
250 | |
251 | S bracketStuff(Word w) { |
252 | ret " (" + joinWithComma(w.classes) + |
253 | + (empty(w.constituents) ? "" : ", " + n2(w.constituents, "grouping")) + ")"; |
254 | } |
255 | |
256 | S textAndClasses(Word w) { ret w.text + bracketStuff(w); } |
257 | S groupedTextAndClasses(Word w) { ret grouped(w) + bracketStuff(w); } |
258 | |
259 | void linkWords(Word a, Word b) { |
260 | a.next.add(b); |
261 | b.prev.add(a); |
262 | } |
263 | |
264 | void change() { ++changes; } |
265 | |
266 | void parseGroupings() { |
267 | for (S s : mL(ai_language() + " bottom-up groupings")) { |
268 | L<S> tok = javaTokWithAngleBracketsC(s); |
269 | if (l(tok) == 5) |
270 | groupingsByA.put(tok.get(0), pair(tok.get(2), tok.get(4))); |
271 | else if (l(tok) == 3) |
272 | subClasses.put(tok.get(0), tok.get(2)); |
273 | } |
274 | } |
275 | |
276 | // TODO: now there are multiple groupings |
277 | S grouped(Word g) { |
278 | if (empty(g.constituents)) ret g.text; |
279 | ret groupedConstituents(first(g.constituents)); |
280 | } |
281 | |
282 | S groupedConstituents(L<Word> constituents) { |
283 | new L<S> l; |
284 | for (Word w : constituents) |
285 | l.add(curlyBraceIfMultipleTokens(grouped(w))); |
286 | ret joinWithSpace(l); |
287 | } |
288 | |
289 | Set<S> allGroupings(Word g) { |
290 | if (empty(g.constituents)) ret litorderedset(g.text); |
291 | new LinkedHashSet<S> set; |
292 | for (L<Word> l : g.constituents) |
293 | set.add(groupedConstituents(l)); |
294 | ret set; |
295 | } |
296 | } |
download show line numbers debug dex old transpilations
Travelled to 14 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, irmadwmeruwu, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
Snippet ID: | #1017385 |
Snippet name: | AI_BottomUpParser2 - with weights [dev.] |
Eternal ID of this version: | #1017385/3 |
Text MD5: | f045ef122c40514aeab6857dc7f7cfdd |
Author: | stefan |
Category: | javax / a.i. |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2018-07-19 17:05:03 |
Source code size: | 8310 bytes / 296 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 302 / 333 |
Version history: | 2 change(s) |
Referenced in: | [show references] |