Libraryless. Click here for Pure Java version (8065L/53K/181K).
1 | !7 |
2 | |
3 | static Guesser best; |
4 | static double bestScore; |
5 | |
6 | concept Sentence { |
7 | S text; |
8 | S action; |
9 | } |
10 | |
11 | sclass Example { |
12 | L<S> tok; |
13 | int start, end; |
14 | |
15 | *() {} |
16 | *(L<S> *tok, IntRange subjectTokens) { |
17 | start = subjectTokens.start; |
18 | end = subjectTokens.end; |
19 | } |
20 | |
21 | toString { |
22 | ret quote(joinWithSpaces(tok)) + " => " + joinWithSpaces(subList(tok, start, end)); |
23 | } |
24 | } |
25 | |
26 | abstract sclass Guesser { |
27 | abstract IntRange getSubjectTokens(L<S> tok); |
28 | void learn(L<Example> material) { |
29 | for (Example e : material) |
30 | learn(e); |
31 | } |
32 | void learn(Example e) {} |
33 | } |
34 | |
35 | Guesser > G1 { // just returns first word |
36 | IntRange getSubjectTokens(L<S> tok) { |
37 | ret new IntRange(0, 1); |
38 | } |
39 | } |
40 | |
41 | Guesser > G2 { // skips first words |
42 | new StringTree1 skipTree; |
43 | |
44 | IntRange getSubjectTokens(L<S> tok) { |
45 | int n = walkStringTreeToLeaf(skipTree, allToLower(tok)); |
46 | if (n >= 0) ret new IntRange(n, n+1); |
47 | null; |
48 | } |
49 | |
50 | void learn(Example e) { |
51 | if (e.start > 0) |
52 | addToStringTree(skipTree, allToLower(takeFirst(e.tok, e.start))); |
53 | } |
54 | } |
55 | |
56 | Guesser > G3 { // continues expanding subject depending on words |
57 | new StringTree1 continuationTree; |
58 | |
59 | IntRange getSubjectTokens(L<S> tok) { |
60 | int n = walkStringTreeToLeaf(continuationTree, allToLower(tok)); |
61 | if (n >= 0) ret new IntRange(0, n+1); |
62 | null; |
63 | } |
64 | |
65 | void learn(Example e) { |
66 | L<S> l = allToLower(subList(e.tok, e.start, e.end-1)); |
67 | if (nempty(l)) |
68 | addToStringTree(continuationTree, l); |
69 | } |
70 | } |
71 | |
72 | Guesser > SkipFirst { |
73 | Guesser a, b; |
74 | |
75 | *() {} |
76 | *(Guesser *a, Guesser *b) {} |
77 | |
78 | IntRange getSubjectTokens(L<S> tok) { |
79 | IntRange r = a.getSubjectTokens(tok); |
80 | int skip = r == null ? 0 : r.start; |
81 | ret shiftIntRange(skip, b.getSubjectTokens(dropFirst(skip, tok))); |
82 | } |
83 | |
84 | void learn(L<Example> material) { |
85 | a.learn(material); |
86 | b.learn(material); |
87 | } |
88 | } |
89 | |
90 | Guesser > Chained { |
91 | new L<Guesser> l; |
92 | |
93 | *() {} |
94 | *(Guesser... guessers) { addAll(l, guessers); } |
95 | |
96 | IntRange getSubjectTokens(L<S> tok) { |
97 | for (Guesser g : l) { |
98 | IntRange result = cast pcall(g, "getSubjectTokens", tok); |
99 | if (result != null) ret result; |
100 | } |
101 | null; |
102 | } |
103 | } |
104 | |
105 | Guesser > GCheater { |
106 | new Map<S, IntRange> map; |
107 | |
108 | IntRange getSubjectTokens(L<S> tok) { |
109 | ret map.get(joinWithSpace(tok)); |
110 | } |
111 | |
112 | void learn(Example e) { |
113 | map.put(joinWithSpace(e.tok), intRange(e.start, e.end)); |
114 | } |
115 | } |
116 | |
117 | p { |
118 | loadConceptsFrom(#1008607); |
119 | L<Example> material = learningMaterial(); |
120 | pnlStruct(material); |
121 | |
122 | G1 g1; |
123 | G2 g2; |
124 | G3 g3; |
125 | Chained chained; |
126 | |
127 | checkGuesser(material, g1 = new G1); |
128 | set printSuccesses; |
129 | checkGuesserAfterFullLearn(material, g2 = new G2); |
130 | checkGuesserAfterFullLearn(material, g3 = new G3); |
131 | checkGuesserAfterFullLearn(material, new GCheater); |
132 | checkGuesserAfterPartialLearn(material, new GCheater, 50); |
133 | checkGuesser(material, chained = new Chained(g1, g2, g3)); |
134 | printStruct(g2); |
135 | printUnrolledStringTree(g2.skipTree); |
136 | print(); |
137 | printUnrolledStringTree(g3.continuationTree); |
138 | //printStruct(bestLearner(material, allNew(G1, G2, G3, GCheater), 50, 3, false)); |
139 | |
140 | Pair<Guesser, Double> p = bestLearner(material, |
141 | listPlus(allNew(G1, G2, G3, GCheater), |
142 | new Chained(new G2, new G1), |
143 | new SkipFirst(new G2, new Chained(new G3, new G1))), |
144 | 50, 3, true); |
145 | printStruct(reversePair(p)); |
146 | best = p.a; |
147 | bestScore = p.b; |
148 | } |
149 | |
150 | sbool printDetails, printSuccesses; |
151 | |
152 | static double checkGuesser(L<Example> testMaterial, Guesser g) { |
153 | print(); |
154 | int score = 0, n = 0; |
155 | for (Example e : testMaterial) { |
156 | IntRange r = cast pcall(g, "getSubjectTokens", e.tok); |
157 | bool ok = eq(IntRange(e.start, e.end), r); |
158 | if (ok) ++score; |
159 | ++n; |
160 | if (printDetails || ok && printSuccesses) |
161 | if (ok) |
162 | print("OK " + e); |
163 | else |
164 | print("FAIL " + (r == null ? "-" : joinWithSpaces(subList(e.tok, r.start, r.end))) + " for " + e); |
165 | } |
166 | printScore(shortClassName(g), score, n); |
167 | ret ratioToPercent(score, n); |
168 | } |
169 | |
170 | static double checkGuesserAfterFullLearn(L<Example> testMaterial, Guesser g) { |
171 | g.learn(testMaterial); |
172 | ret checkGuesser(testMaterial, g); |
173 | } |
174 | |
175 | static double checkGuesserAfterPartialLearn(L<Example> testMaterial, Guesser g, double percentToLearn) { |
176 | g.learn(getFirstPercent(testMaterial, percentToLearn)); |
177 | ret checkGuesser(testMaterial, g); |
178 | } |
179 | |
180 | static double checkGuesserAfterRandomizedPartialLearn(L<Example> testMaterial, Guesser g, double percentToLearn, bool hardMode) { |
181 | Pair<L<Example>> p = getRandomPercent2(testMaterial, percentToLearn); |
182 | g.learn(p.a); |
183 | ret checkGuesser(hardMode ? p.b : testMaterial, g); |
184 | } |
185 | |
186 | // best learner with randomized x% training material |
187 | // returns guesser, percentage solved |
188 | // hardMode = only count scores on untrained examples |
189 | static Pair<Guesser, Double> bestLearner(final L<Example> material, L<Guesser> guessers, final double percent, int repetitions, final bool hardMode) { |
190 | new Best<Guesser> best; |
191 | for (final Guesser g : guessers) |
192 | best.put(g, repeatAndAdd_double(repetitions, func { |
193 | checkGuesserAfterRandomizedPartialLearn(material, cloneObject(g), percent, hardMode) |
194 | })/repetitions); |
195 | ret best.pair(); |
196 | } |
197 | |
198 | static L<Example> learningMaterial() { |
199 | L<Example> out = new L; |
200 | for (Sentence s) { |
201 | if (s.action == null) continue; |
202 | IntRange r = ai_parseSubjectAction(s.action); |
203 | if (r != null) { |
204 | L<S> tok = nlTok5(s.text); |
205 | r = charRangeToTokenRange(tok, r); |
206 | r = IntRange((r.start | 1)/2, r.end/2); |
207 | tok = codeTokens(tok); |
208 | out.add(Example(tok, r)); |
209 | } |
210 | } |
211 | ret out; |
212 | } |
213 | |
214 | // to be called from applications - works on character level |
215 | static IntRange callGuesser(Guesser g, S sentence) { |
216 | L<S> tok = codeTokens(nlTok5(sentence)); |
217 | ret tokenRangeToCharRange(tok, g.getSubjectTokens(tok)); |
218 | } |
Began life as a copy of #1008643
download show line numbers debug dex old transpilations
Travelled to 14 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, onxytkatvevr, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
Snippet ID: | #1008653 |
Snippet name: | Learn parsing sentences 2 [dev.] |
Eternal ID of this version: | #1008653/44 |
Text MD5: | 7ac4b1a486b207f189091e31bc7c5e3a |
Transpilation MD5: | 07529db319c149fb6d4e903db0698f9a |
Author: | stefan |
Category: | javax / a.i. |
Type: | JavaX source code |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2017-05-27 14:54:38 |
Source code size: | 5926 bytes / 218 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 596 / 1104 |
Version history: | 43 change(s) |
Referenced in: | [show references] |