1 | sclass AI_BottomUpParser1 { |
2 | Map<S, Set<S>> theSet; // category to literal examples |
3 | new LinkedHashSet<Updatable> allObjects; |
4 | L<Word> words; |
5 | long changes; |
6 | new MultiMap<S, Pair<S>> groupingsByA; |
7 | new MultiMap<S> subClasses; |
8 | bool mergeGroupsInSamePosition = true; |
9 | new HashMap<IntRange, Word> groupsByPosition; |
10 | |
11 | class Updatable { |
12 | void update {} |
13 | |
14 | void setField(S field, O value) { |
15 | if (eq(get(this, field), value)) ret; |
16 | set(this, field, value); |
17 | change(); |
18 | } |
19 | } |
20 | |
21 | class Expectation { |
22 | S ifClass; |
23 | Runnable action; |
24 | |
25 | *() {} |
26 | *(S *ifClass, Runnable *action) {} |
27 | } |
28 | |
29 | class Word extends Updatable { |
30 | S text; // or null if unknown |
31 | int wordFrom, wordTo; // token indices |
32 | new LinkedHashSet<Word> prev; |
33 | new LinkedHashSet<Word> next; |
34 | new LinkedHashSet<L<Word>> constituents; // if group |
35 | new L<Expectation> expectations; |
36 | new L<Expectation> fulfilledExpectations; |
37 | new TreeSet<S> classes; |
38 | int classesConvertedToTraits; |
39 | new LinkedHashSet<Word> groups; // I am part of |
40 | new L<Trait> traits; |
41 | |
42 | *() {} |
43 | *(S *text, int *wordFrom, int *wordTo) { |
44 | classes.add(quote(lower(text))); |
45 | } |
46 | |
47 | void update { |
48 | // Add direct word classes |
49 | if (text != null) |
50 | for (S c : reverseLookupInMapToSets(theSet, text)) |
51 | addClass(c); |
52 | |
53 | // Process expectations |
54 | for (Expectation e : cloneList(expectations)) { |
55 | //print("Checking expected class " + e.ifClass); |
56 | if (classes.contains(e.ifClass)) { |
57 | moveElementFromCollectionToCollection(e, expectations, fulfilledExpectations); |
58 | change(); |
59 | callF(e.action); |
60 | } |
61 | } |
62 | |
63 | if (l(classes) > classesConvertedToTraits) { |
64 | for (fS c : dropFirst(classesConvertedToTraits, classes)) |
65 | addTraitsForClass(c); |
66 | classesConvertedToTraits = l(classes); |
67 | } |
68 | |
69 | for (Trait t : iterateListConcurrently(traits)) |
70 | t.update(); |
71 | } |
72 | |
73 | bool isGroup() { ret nempty(constituents); } |
74 | |
75 | bool addClass(S c) { |
76 | if (!classes.addAll(makeHull_optimized(subClasses, c))) false; |
77 | change(); true; |
78 | } |
79 | |
80 | void addExpectation(Expectation e) { |
81 | //print("addExpectation " + e); |
82 | expectations.add(e); |
83 | change(); |
84 | } |
85 | |
86 | void addTraitsForClass(S c) { |
87 | for (PairS p : groupingsByA.get(c)) |
88 | addTrait(new LinkWithTo(p.a, p.b)); |
89 | } |
90 | |
91 | void addTrait(Trait t) { |
92 | set(t, w := this); |
93 | traits.add(t); |
94 | } |
95 | |
96 | toString { |
97 | ret textAndClasses(this); |
98 | } |
99 | |
100 | bool hasClass(S c) { ret contains(classes, c); } |
101 | S text() { ret text; } |
102 | } // end of class Word |
103 | |
104 | Word makeGroup(Word a, Word b, S newClass) { |
105 | L<Word> list = null; |
106 | Word g = null; |
107 | if (mergeGroupsInSamePosition) |
108 | g = groupsByPosition.get(IntRange(a.wordFrom, b.wordTo)); |
109 | else { |
110 | list = ll(a, b); |
111 | // look for existing group |
112 | for (Word _g : a.groups) |
113 | if (contains(_g.constituents, list)) { g = _g; break; } |
114 | } |
115 | |
116 | if (list == null) list = ll(a, b); |
117 | if (g != null) { |
118 | g.constituents.add(list); |
119 | if (g.addClass(newClass)) { |
120 | //print("Added class " + newClass + " to existing group: " + a.text + " + " + b.text); |
121 | } |
122 | ret g; |
123 | } |
124 | |
125 | // new group |
126 | //print("Making group " + newClass + " " + a.text + " + " + b.text); |
127 | //print(" prev=" + sfu(collect(a.prev, 'text))); |
128 | //print(" next=" + sfu(collect(b.next, 'text))); |
129 | g = new Word(joinWithSpace(a.text, b.text), a.wordFrom, b.wordTo); |
130 | allObjects.add(g); |
131 | if (mergeGroupsInSamePosition) |
132 | groupsByPosition.put(IntRange(a.wordFrom, b.wordTo), g); |
133 | g.addClass(newClass); |
134 | g.constituents.add(list); |
135 | for (Word w : list) |
136 | w.groups.add(g); |
137 | g.prev.addAll(a.prev); |
138 | g.next.addAll(b.next); |
139 | for (Word prev : a.prev) prev.next.add(g); |
140 | for (Word next : b.next) next.prev.add(g); |
141 | ret g; |
142 | } |
143 | |
144 | class Trait extends Updatable { |
145 | Word w; |
146 | } |
147 | |
148 | class LinkWithTo extends Trait { |
149 | S linkWith, linkTo; // classes |
150 | int expectationsSentToNext; |
151 | |
152 | *() {} |
153 | *(S *linkWith, S *linkTo) {} |
154 | |
155 | void update { |
156 | if (l(w.next) > expectationsSentToNext) { |
157 | for (final Word next : dropFirst(expectationsSentToNext, w.next)) |
158 | next.addExpectation(new Expectation(linkWith, r { |
159 | makeGroup(w, next, linkTo) |
160 | })); |
161 | expectationsSentToNext = l(w.next); |
162 | } |
163 | } |
164 | } |
165 | |
166 | void parse(fS sentence) { |
167 | if (words != null) fail("only call once"); |
168 | L<S> rawWords = words(sentence); |
169 | |
170 | if (theSet == null) theSet = ai_wordCategoriesWithElements(); |
171 | parseGroupings(); |
172 | |
173 | words = new L; |
174 | for i over rawWords: { |
175 | Word w = setAllAndReturn(new Word(rawWords.get(i), i, i+1)); |
176 | words.add(w); |
177 | if (isQuoted(w.text)) w.addClass("<noun>"); |
178 | if (isInteger(w.text)) w.addClass("<number>"); |
179 | } |
180 | for (int i = 0; i < l(words)-1; i++) |
181 | linkWords(words.get(i), words.get(i+1)); |
182 | //printStruct(first(words)); |
183 | |
184 | addAll(allObjects, words); |
185 | long lastChanges; |
186 | do { |
187 | lastChanges = changes; |
188 | //print(n2(changes, "change")); |
189 | for (Updatable w : cloneList(allObjects)) |
190 | w.update(); |
191 | } while (lastChanges != changes); |
192 | } |
193 | |
194 | void printWordsAndGroups() { |
195 | for (Word w : words) print(" " + textAndClasses(w)); |
196 | print(); |
197 | |
198 | L<Word> groups = groups(); |
199 | print(); |
200 | print(n2(groups, "group")); |
201 | for (Word g : groups) |
202 | print("Group: " + groupedTextAndClasses(g)); |
203 | } |
204 | |
205 | void printConstituentsOfFullGroup() { |
206 | Word g = fullGroup(); |
207 | if (g == null) ret; |
208 | print(); |
209 | pnl(allGroupings(g)); |
210 | } |
211 | |
212 | L<Word> groups() { |
213 | ret [Word w : instancesOf(Word.class, allObjects) | w.isGroup()]; |
214 | } |
215 | |
216 | // only one with default flags |
217 | L<Word> fullGroups() { |
218 | ret filterByFields(groups(), wordFrom := 0, wordTo := l(words)); |
219 | } |
220 | |
221 | Word fullGroup() { |
222 | ret findByFields(groups(), wordFrom := 0, wordTo := l(words)); |
223 | } |
224 | |
225 | Set<S> fullClasses() { |
226 | new TreeSet<S> set; |
227 | for (Word g : fullGroups()) |
228 | set.addAll(g.classes); |
229 | ret set; |
230 | } |
231 | |
232 | S bracketStuff(Word w) { |
233 | ret " (" + joinWithComma(w.classes) + |
234 | + (empty(w.constituents) ? "" : ", " + n2(w.constituents, "grouping")) + ")"; |
235 | } |
236 | |
237 | S textAndClasses(Word w) { ret w.text + bracketStuff(w); } |
238 | S groupedTextAndClasses(Word w) { ret grouped(w) + bracketStuff(w); } |
239 | |
240 | void linkWords(Word a, Word b) { |
241 | a.next.add(b); |
242 | b.prev.add(a); |
243 | } |
244 | |
245 | void change() { ++changes; } |
246 | |
247 | void parseGroupings() { |
248 | for (S s : mL(ai_language() + " bottom-up groupings")) { |
249 | L<S> tok = javaTokWithAngleBracketsC(s); |
250 | if (l(tok) == 5) |
251 | groupingsByA.put(tok.get(0), pair(tok.get(2), tok.get(4))); |
252 | else if (l(tok) == 3) |
253 | subClasses.put(tok.get(0), tok.get(2)); |
254 | } |
255 | } |
256 | |
257 | // TODO: now there are multiple groupings |
258 | S grouped(Word g) { |
259 | if (empty(g.constituents)) ret g.text; |
260 | ret groupedConstituents(first(g.constituents)); |
261 | } |
262 | |
263 | S groupedConstituents(L<Word> constituents) { |
264 | new L<S> l; |
265 | for (Word w : constituents) |
266 | l.add(curlyBraceIfMultipleTokens(grouped(w))); |
267 | ret joinWithSpace(l); |
268 | } |
269 | |
270 | Set<S> allGroupings(Word g) { |
271 | if (empty(g.constituents)) ret litorderedset(g.text); |
272 | new LinkedHashSet<S> set; |
273 | for (L<Word> l : g.constituents) |
274 | set.add(groupedConstituents(l)); |
275 | ret set; |
276 | } |
277 | } |
Began life as a copy of #1017351
download show line numbers debug dex old transpilations
Travelled to 14 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, irmadwmeruwu, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
Snippet ID: | #1017384 |
Snippet name: | AI_BottomUpParser1 (old, without weights) |
Eternal ID of this version: | #1017384/1 |
Text MD5: | f0f41ed91ea41a7e4eb6fb30e92afb08 |
Author: | stefan |
Category: | javax / a.i. |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2018-07-19 16:50:08 |
Source code size: | 7741 bytes / 277 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 336 / 335 |
Referenced in: | [show references] |