Libraryless. Click here for Pure Java version (8182L/54K/184K).
1 | !7 |
2 | |
3 | static Guesser best; |
4 | static double bestScore; |
5 | |
6 | concept Sentence { |
7 | S text; |
8 | SS data; |
9 | } |
10 | |
11 | sclass Example { |
12 | L<S> tok; |
13 | int start, end; |
14 | |
15 | *() {} |
16 | *(L<S> *tok, IntRange subjectTokens) { |
17 | start = subjectTokens.start; |
18 | end = subjectTokens.end; |
19 | } |
20 | |
21 | toString { |
22 | ret quote(joinWithSpaces(tok)) + " => " + joinWithSpaces(subList(tok, start, end)); |
23 | } |
24 | } |
25 | |
26 | abstract sclass GuesserBase { |
27 | void learn(L<Example> material) { |
28 | for (Example e : material) |
29 | learn(e); |
30 | } |
31 | void learn(Example e) {} |
32 | } |
33 | |
34 | abstract sclass Guesser extends GuesserBase { |
35 | abstract IntRange getSubjectTokens(L<S> tok); |
36 | } |
37 | |
38 | Guesser > GLengthOfSubject { |
39 | new MultiSet<S> pos; // words to end on |
40 | new MultiSet<S> neg; // words not to end on |
41 | |
42 | IntRange getSubjectTokens(L<S> tok) { |
43 | ret getSubjectTokens(tok, 0); |
44 | } |
45 | |
46 | IntRange getSubjectTokens(L<S> tok, int startAt) { |
47 | int i = startAt; |
48 | while (i < l(tok)) { |
49 | S t = lower(tok.get(i)); |
50 | if (pos.get(t) <= neg.get(t)) // also stop if unknown word |
51 | break; |
52 | ++i; |
53 | } |
54 | ret intRange(startAt, min(l(tok), i+1)); |
55 | } |
56 | |
57 | void learn(Example e) { |
58 | L<S> subjectTokens = allToLower(subList(e.tok, e.start, e.end)); |
59 | for (S word : dropLast(subjectTokens)) |
60 | pos.add(word); |
61 | addIfNotNull(neg, last(subjectTokens)); |
62 | } |
63 | } |
64 | |
65 | Guesser > GSkip1 { // returns first word or second word |
66 | new MultiSet<S> pos; // words to skip |
67 | new MultiSet<S> neg; // words not to skip |
68 | |
69 | void learn(Example e) { |
70 | (e.start > 0 ? pos : neg).add(lower(first(e.tok)); |
71 | } |
72 | |
73 | IntRange getSubjectTokens(L<S> tok) { |
74 | S t = lower(first(tok)); |
75 | ret intRangeFromStartAndLength(pos.get(t) > neg.get(t) ? 1 : 0, 1); |
76 | } |
77 | } |
78 | |
79 | Guesser > GSkip2 { // can skip multiple words |
80 | new MultiSet<S> pos; // words to skip |
81 | new MultiSet<S> neg; // words not to skip |
82 | |
83 | void learn(Example e) { |
84 | (e.start > 0 ? pos : neg).add(lower(first(e.tok)); |
85 | } |
86 | |
87 | IntRange getSubjectTokens(L<S> tok) { |
88 | int i = 0; |
89 | while (i < l(tok)) { |
90 | S t = lower(tok.get(i)); |
91 | if (pos.get(t) <= neg.get(t)) // also stop if unknown word |
92 | break; |
93 | ++i; |
94 | } |
95 | ret intRangeFromStartAndLength(i, i+1); |
96 | } |
97 | } |
98 | |
99 | Guesser > GCombine { |
100 | Guesser a; |
101 | new GLengthOfSubject b; |
102 | |
103 | *() {} |
104 | *(Guesser *a) {} |
105 | |
106 | IntRange getSubjectTokens(L<S> tok) { |
107 | IntRange r = a.getSubjectTokens(tok); |
108 | int skip = r == null ? 0 : r.start; |
109 | ret b.getSubjectTokens(tok, skip); |
110 | } |
111 | |
112 | void learn(L<Example> material) { |
113 | a.learn(material); |
114 | b.learn(material); |
115 | } |
116 | } |
117 | |
118 | p { |
119 | loadConceptsFrom(#1008692); |
120 | L<Example> material = learningMaterial(); |
121 | //pnlStruct(material); |
122 | |
123 | // This yields the empty learner |
124 | Pair<Guesser, Double> p = bestLearner(material, |
125 | //ll(new GSkip1), |
126 | ll(new GCombine(new GSkip1), new GCombine(new GSkip2)), |
127 | 50, 3, true); |
128 | |
129 | // Now we train it with all data for in-program use |
130 | p.a.learn(material); |
131 | |
132 | // Print and store |
133 | print("Best learner: " + formatDouble(p.b, 1) + "% - " + struct(p.a)); |
134 | best = p.a; |
135 | bestScore = p.b; |
136 | } |
137 | |
138 | sbool printDetails, printSuccesses; |
139 | |
140 | static double checkGuesser(L<Example> testMaterial, Guesser g) { |
141 | print(); |
142 | int score = 0, n = 0; |
143 | for (Example e : testMaterial) { |
144 | IntRange r = cast pcall(g, "getSubjectTokens", e.tok); |
145 | bool ok = eq(IntRange(e.start, e.end), r); |
146 | if (ok) ++score; |
147 | ++n; |
148 | if (printDetails || ok && printSuccesses) |
149 | if (ok) |
150 | print("OK " + e); |
151 | else |
152 | print("FAIL " + (r == null ? "-" : joinWithSpaces(subList(e.tok, r.start, r.end))) + " for " + e); |
153 | } |
154 | printScore(shortClassName(g), score, n); |
155 | ret ratioToPercent(score, n); |
156 | } |
157 | |
158 | static double checkGuesserAfterRandomizedPartialLearn(L<Example> testMaterial, Guesser g, double percentToLearn, bool hardMode) { |
159 | Pair<L<Example>> p = getRandomPercent2(testMaterial, percentToLearn); |
160 | g.learn(p.a); |
161 | ret checkGuesser(hardMode ? p.b : testMaterial, g); |
162 | } |
163 | |
164 | // best learner with randomized x% training material |
165 | // returns guesser, percentage solved |
166 | // hardMode = only count scores on untrained examples |
167 | static Pair<Guesser, Double> bestLearner(final L<Example> material, L<? extends Guesser> guessers, final double percent, int repetitions, final bool hardMode) { |
168 | new Best<Guesser> best; |
169 | for (final Guesser g : guessers) |
170 | best.put(g, repeatAndAdd_double(repetitions, func { |
171 | checkGuesserAfterRandomizedPartialLearn(material, cloneObject(g), percent, hardMode) |
172 | })/repetitions); |
173 | ret best.pair(); |
174 | } |
175 | |
176 | static L<Example> learningMaterial() { |
177 | L<Example> out = new L; |
178 | for (Sentence s) { |
179 | S action = s.data.get("subject"); |
180 | if (action == null) continue; |
181 | IntRange r = ai_parseAction(action); |
182 | if (r != null) { |
183 | L<S> tok = nlTok5(s.text); |
184 | r = charRangeToTokenRange(tok, r); |
185 | r = tokenRangeToCodeTokens(r); |
186 | tok = codeTokens(tok); |
187 | out.add(Example(tok, r)); |
188 | } |
189 | } |
190 | ret out; |
191 | } |
192 | |
193 | // to be called from applications - works on character level |
194 | // modifies data |
195 | static void callGuesser(Guesser g, S sentence, SS data) { |
196 | L<S> tok = nlTok5(sentence); |
197 | IntRange r = g.getSubjectTokens(codeTokens(tok)); |
198 | if (r == null) ret; |
199 | data.put("subject", ai_renderAction(sentence, codeTokenRangeToChars(tok, r))); |
200 | } |
Began life as a copy of #1008669
download show line numbers debug dex old transpilations
Travelled to 13 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
Snippet ID: | #1008696 |
Snippet name: | Find Subject (map version): Learner 1 [dev.] |
Eternal ID of this version: | #1008696/10 |
Text MD5: | 4642540bbd34ab5fdad468ff9ba185ec |
Transpilation MD5: | 9a8b3ac56722d34386d1ded2c58730d9 |
Author: | stefan |
Category: | javax / a.i. |
Type: | JavaX source code |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2017-05-29 03:00:33 |
Source code size: | 5390 bytes / 200 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 508 / 926 |
Version history: | 9 change(s) |
Referenced in: | [show references] |