1 | sclass AI_BottomUpParser1 {
|
2 | Map<S, Set<S>> theSet; // category to literal examples |
3 | new LinkedHashSet<Updatable> allObjects; |
4 | L<Word> words; |
5 | long changes; |
6 | new MultiMap<S, Pair<S>> groupingsByA; |
7 | new MultiMap<S> subClasses; |
8 | bool mergeGroupsInSamePosition = true; |
9 | new HashMap<IntRange, Word> groupsByPosition; |
10 | |
11 | class Updatable {
|
12 | void update {}
|
13 | |
14 | void setField(S field, O value) {
|
15 | if (eq(get(this, field), value)) ret; |
16 | set(this, field, value); |
17 | change(); |
18 | } |
19 | } |
20 | |
21 | class Expectation {
|
22 | S ifClass; |
23 | Runnable action; |
24 | |
25 | *() {}
|
26 | *(S *ifClass, Runnable *action) {}
|
27 | } |
28 | |
29 | class Word extends Updatable {
|
30 | S text; // or null if unknown |
31 | int wordFrom, wordTo; // token indices |
32 | new LinkedHashSet<Word> prev; |
33 | new LinkedHashSet<Word> next; |
34 | new LinkedHashSet<L<Word>> constituents; // if group |
35 | new L<Expectation> expectations; |
36 | new L<Expectation> fulfilledExpectations; |
37 | new TreeSet<S> classes; |
38 | int classesConvertedToTraits; |
39 | new LinkedHashSet<Word> groups; // I am part of |
40 | new L<Trait> traits; |
41 | |
42 | *() {}
|
43 | *(S *text, int *wordFrom, int *wordTo) {
|
44 | classes.add(quote(lower(text))); |
45 | } |
46 | |
47 | void update {
|
48 | // Add direct word classes |
49 | if (text != null) |
50 | for (S c : reverseLookupInMapToSets(theSet, text)) |
51 | addClass(c); |
52 | |
53 | // Process expectations |
54 | for (Expectation e : cloneList(expectations)) {
|
55 | //print("Checking expected class " + e.ifClass);
|
56 | if (classes.contains(e.ifClass)) {
|
57 | moveElementFromCollectionToCollection(e, expectations, fulfilledExpectations); |
58 | change(); |
59 | callF(e.action); |
60 | } |
61 | } |
62 | |
63 | if (l(classes) > classesConvertedToTraits) {
|
64 | for (fS c : dropFirst(classesConvertedToTraits, classes)) |
65 | addTraitsForClass(c); |
66 | classesConvertedToTraits = l(classes); |
67 | } |
68 | |
69 | for (Trait t : iterateListConcurrently(traits)) |
70 | t.update(); |
71 | } |
72 | |
73 | bool isGroup() { ret nempty(constituents); }
|
74 | |
75 | bool addClass(S c) {
|
76 | if (!classes.addAll(makeHull_optimized(subClasses, c))) false; |
77 | change(); true; |
78 | } |
79 | |
80 | void addExpectation(Expectation e) {
|
81 | //print("addExpectation " + e);
|
82 | expectations.add(e); |
83 | change(); |
84 | } |
85 | |
86 | void addTraitsForClass(S c) {
|
87 | for (PairS p : groupingsByA.get(c)) |
88 | addTrait(new LinkWithTo(p.a, p.b)); |
89 | } |
90 | |
91 | void addTrait(Trait t) {
|
92 | set(t, w := this); |
93 | traits.add(t); |
94 | } |
95 | |
96 | toString {
|
97 | ret textAndClasses(this); |
98 | } |
99 | |
100 | bool hasClass(S c) { ret contains(classes, c); }
|
101 | S text() { ret text; }
|
102 | } // end of class Word |
103 | |
104 | Word makeGroup(Word a, Word b, S newClass) {
|
105 | L<Word> list = null; |
106 | Word g = null; |
107 | if (mergeGroupsInSamePosition) |
108 | g = groupsByPosition.get(IntRange(a.wordFrom, b.wordTo)); |
109 | else {
|
110 | list = ll(a, b); |
111 | // look for existing group |
112 | for (Word _g : a.groups) |
113 | if (contains(_g.constituents, list)) { g = _g; break; }
|
114 | } |
115 | |
116 | if (list == null) list = ll(a, b); |
117 | if (g != null) {
|
118 | g.constituents.add(list); |
119 | if (g.addClass(newClass)) {
|
120 | //print("Added class " + newClass + " to existing group: " + a.text + " + " + b.text);
|
121 | } |
122 | ret g; |
123 | } |
124 | |
125 | // new group |
126 | //print("Making group " + newClass + " " + a.text + " + " + b.text);
|
127 | //print(" prev=" + sfu(collect(a.prev, 'text)));
|
128 | //print(" next=" + sfu(collect(b.next, 'text)));
|
129 | g = new Word(joinWithSpace(a.text, b.text), a.wordFrom, b.wordTo); |
130 | allObjects.add(g); |
131 | if (mergeGroupsInSamePosition) |
132 | groupsByPosition.put(IntRange(a.wordFrom, b.wordTo), g); |
133 | g.addClass(newClass); |
134 | g.constituents.add(list); |
135 | for (Word w : list) |
136 | w.groups.add(g); |
137 | g.prev.addAll(a.prev); |
138 | g.next.addAll(b.next); |
139 | for (Word prev : a.prev) prev.next.add(g); |
140 | for (Word next : b.next) next.prev.add(g); |
141 | ret g; |
142 | } |
143 | |
144 | class Trait extends Updatable {
|
145 | Word w; |
146 | } |
147 | |
148 | class LinkWithTo extends Trait {
|
149 | S linkWith, linkTo; // classes |
150 | int expectationsSentToNext; |
151 | |
152 | *() {}
|
153 | *(S *linkWith, S *linkTo) {}
|
154 | |
155 | void update {
|
156 | if (l(w.next) > expectationsSentToNext) {
|
157 | for (final Word next : dropFirst(expectationsSentToNext, w.next)) |
158 | next.addExpectation(new Expectation(linkWith, r {
|
159 | makeGroup(w, next, linkTo) |
160 | })); |
161 | expectationsSentToNext = l(w.next); |
162 | } |
163 | } |
164 | } |
165 | |
166 | void parse(fS sentence) {
|
167 | if (words != null) fail("only call once");
|
168 | L<S> rawWords = words(sentence); |
169 | |
170 | if (theSet == null) theSet = ai_wordCategoriesWithElements(); |
171 | parseGroupings(); |
172 | |
173 | words = new L; |
174 | for i over rawWords: {
|
175 | Word w = setAllAndReturn(new Word(rawWords.get(i), i, i+1)); |
176 | words.add(w); |
177 | if (isQuoted(w.text)) w.addClass("<noun>");
|
178 | if (isInteger(w.text)) w.addClass("<number>");
|
179 | } |
180 | for (int i = 0; i < l(words)-1; i++) |
181 | linkWords(words.get(i), words.get(i+1)); |
182 | //printStruct(first(words)); |
183 | |
184 | addAll(allObjects, words); |
185 | long lastChanges; |
186 | do {
|
187 | lastChanges = changes; |
188 | //print(n2(changes, "change")); |
189 | for (Updatable w : cloneList(allObjects)) |
190 | w.update(); |
191 | } while (lastChanges != changes); |
192 | } |
193 | |
194 | void printWordsAndGroups() {
|
195 | for (Word w : words) print(" " + textAndClasses(w));
|
196 | print(); |
197 | |
198 | L<Word> groups = groups(); |
199 | print(); |
200 | print(n2(groups, "group")); |
201 | for (Word g : groups) |
202 | print("Group: " + groupedTextAndClasses(g));
|
203 | } |
204 | |
205 | void printConstituentsOfFullGroup() {
|
206 | Word g = fullGroup(); |
207 | if (g == null) ret; |
208 | print(); |
209 | pnl(allGroupings(g)); |
210 | } |
211 | |
212 | L<Word> groups() {
|
213 | ret [Word w : instancesOf(Word.class, allObjects) | w.isGroup()]; |
214 | } |
215 | |
216 | // only one with default flags |
217 | L<Word> fullGroups() {
|
218 | ret filterByFields(groups(), wordFrom := 0, wordTo := l(words)); |
219 | } |
220 | |
221 | Word fullGroup() {
|
222 | ret findByFields(groups(), wordFrom := 0, wordTo := l(words)); |
223 | } |
224 | |
225 | Set<S> fullClasses() {
|
226 | new TreeSet<S> set; |
227 | for (Word g : fullGroups()) |
228 | set.addAll(g.classes); |
229 | ret set; |
230 | } |
231 | |
232 | S bracketStuff(Word w) {
|
233 | ret " (" + joinWithComma(w.classes) +
|
234 | + (empty(w.constituents) ? "" : ", " + n2(w.constituents, "grouping")) + ")"; |
235 | } |
236 | |
237 | S textAndClasses(Word w) { ret w.text + bracketStuff(w); }
|
238 | S groupedTextAndClasses(Word w) { ret grouped(w) + bracketStuff(w); }
|
239 | |
240 | void linkWords(Word a, Word b) {
|
241 | a.next.add(b); |
242 | b.prev.add(a); |
243 | } |
244 | |
245 | void change() { ++changes; }
|
246 | |
247 | void parseGroupings() {
|
248 | for (S s : mL(ai_language() + " bottom-up groupings")) {
|
249 | L<S> tok = javaTokWithAngleBracketsC(s); |
250 | if (l(tok) == 5) |
251 | groupingsByA.put(tok.get(0), pair(tok.get(2), tok.get(4))); |
252 | else if (l(tok) == 3) |
253 | subClasses.put(tok.get(0), tok.get(2)); |
254 | } |
255 | } |
256 | |
257 | // TODO: now there are multiple groupings |
258 | S grouped(Word g) {
|
259 | if (empty(g.constituents)) ret g.text; |
260 | ret groupedConstituents(first(g.constituents)); |
261 | } |
262 | |
263 | S groupedConstituents(L<Word> constituents) {
|
264 | new L<S> l; |
265 | for (Word w : constituents) |
266 | l.add(curlyBraceIfMultipleTokens(grouped(w))); |
267 | ret joinWithSpace(l); |
268 | } |
269 | |
270 | Set<S> allGroupings(Word g) {
|
271 | if (empty(g.constituents)) ret litorderedset(g.text); |
272 | new LinkedHashSet<S> set; |
273 | for (L<Word> l : g.constituents) |
274 | set.add(groupedConstituents(l)); |
275 | ret set; |
276 | } |
277 | } |
Began life as a copy of #1017351
download show line numbers debug dex old transpilations
Travelled to 14 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, irmadwmeruwu, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
| Snippet ID: | #1017384 |
| Snippet name: | AI_BottomUpParser1 (old, without weights) |
| Eternal ID of this version: | #1017384/1 |
| Text MD5: | f0f41ed91ea41a7e4eb6fb30e92afb08 |
| Author: | stefan |
| Category: | javax / a.i. |
| Type: | JavaX fragment (include) |
| Public (visible to everyone): | Yes |
| Archived (hidden from active list): | No |
| Created/modified: | 2018-07-19 16:50:08 |
| Source code size: | 7741 bytes / 277 lines |
| Pitched / IR pitched: | No / No |
| Views / Downloads: | 548 / 554 |
| Referenced in: | [show references] |