Libraryless. Click here for Pure Java version (7920L/52K/179K).
1 | !7 |
2 | |
3 | static Guesser best; |
4 | static double bestScore; |
5 | sS key = "verb"; |
6 | |
7 | concept Sentence { |
8 | S text; |
9 | SS data; |
10 | |
11 | S get(S s) { ret data.get(s); } |
12 | S subject() { ret get("subject"); } |
13 | S verb() { ret get(key); } |
14 | } |
15 | |
16 | sclass Input { |
17 | L<S> tok; |
18 | IntRange subject; |
19 | |
20 | *() {} |
21 | *(L<S> *tok, IntRange *subject) {} |
22 | } |
23 | |
24 | Input > Example { |
25 | new L<IntRange> verbs; |
26 | |
27 | *() {} |
28 | *(L<S> *tok, IntRange *subject, L<IntRange> *verbs) {} |
29 | |
30 | toString { |
31 | ret quote(joinWithSpaces(tok)) + " => " + map(verbs, func(IntRange r) { joinWithSpaces(subList(tok, r.start, r.end)) }); |
32 | } |
33 | } |
34 | |
35 | abstract sclass GuesserBase { |
36 | void learn(L<Example> material) { |
37 | for (Example e : material) learn(e); |
38 | } |
39 | void learn(Example e) {} |
40 | } |
41 | |
42 | abstract sclass Guesser extends GuesserBase { |
43 | abstract L<IntRange> getVerbTokens(Input input); |
44 | } |
45 | |
46 | Guesser > GWordAfterSubject { |
47 | L<IntRange> getVerbTokens(Input input) { |
48 | IntRange r = input.subject; |
49 | ret r == null ? null : ll(intRange(r.end, r.end+1)); |
50 | } |
51 | } |
52 | |
53 | p { |
54 | loadConceptsFrom(#1008692); |
55 | L<Example> material = learningMaterial(); |
56 | pnlStruct(material); |
57 | |
58 | // This yields the empty learner |
59 | Pair<Guesser, Double> p = bestLearner(material, |
60 | ll(new GWordAfterSubject), |
61 | 50, 3, true); |
62 | |
63 | // Now we train it with all data for in-program use |
64 | if (p.a != null) p.a.learn(material); |
65 | |
66 | // Print and store |
67 | print("Best learner: " + formatDouble(p.b, 1) + "% - " + struct(p.a)); |
68 | best = p.a; |
69 | bestScore = p.b; |
70 | } |
71 | |
72 | sbool printDetails, printSuccesses; |
73 | |
74 | static double checkGuesser(L<Example> testMaterial, Guesser g) { |
75 | print(); |
76 | int score = 0, n = 0; |
77 | for (final Example e : testMaterial) { |
78 | L<IntRange> r = cast pcall(g, "getVerbTokens", e.tok); |
79 | bool ok = eq(r, e.verbs); |
80 | if (ok) ++score; |
81 | ++n; |
82 | if (printDetails || ok && printSuccesses) |
83 | if (ok) |
84 | print("OK " + e); |
85 | else |
86 | print("FAIL " + (r == null ? "-" : map(r, func(IntRange r) { joinWithSpaces(subList(e.tok, r)) })) + " for " + e); |
87 | } |
88 | printScore(shortClassName(g), score, n); |
89 | ret ratioToPercent(score, n); |
90 | } |
91 | |
92 | static double checkGuesserAfterRandomizedPartialLearn(L<Example> testMaterial, Guesser g, double percentToLearn, bool hardMode) { |
93 | Pair<L<Example>> p = getRandomPercent2(testMaterial, percentToLearn); |
94 | g.learn(p.a); |
95 | ret checkGuesser(hardMode ? p.b : testMaterial, g); |
96 | } |
97 | |
98 | // best learner with randomized x% training material |
99 | // returns guesser, percentage solved |
100 | // hardMode = only count scores on untrained examples |
101 | static Pair<Guesser, Double> bestLearner(final L<Example> material, L<? extends Guesser> guessers, final double percent, int repetitions, final bool hardMode) { |
102 | new Best<Guesser> best; |
103 | for (final Guesser g : guessers) |
104 | best.put(g, repeatAndAdd_double(repetitions, func { |
105 | checkGuesserAfterRandomizedPartialLearn(material, cloneObject(g), percent, hardMode) |
106 | })/repetitions); |
107 | ret best.pair(); |
108 | } |
109 | |
110 | static L<Example> learningMaterial() { |
111 | L<Example> out = new L; |
112 | for (Sentence s) { |
113 | if (s.verb() == null) continue; |
114 | L<IntRange> r = ai_parseVerbAction(s.verb()); |
115 | if (r != null) { |
116 | L<S> tok = nlTok5(s.text); |
117 | IntRange subject = ai_parseSubjectAction(s.subject()); |
118 | subject = charRangeToCodeTokens(tok, subject); |
119 | r = charRangeToCodeTokens(tok, r); |
120 | tok = codeTokens(tok); |
121 | out.add(Example(tok, subject, r)); |
122 | } |
123 | } |
124 | ret out; |
125 | } |
126 | |
127 | // to be called from applications - works on character level |
128 | static void callGuesser(Guesser g, S sentence, SS data) { |
129 | L<S> tok = nlTok5(sentence); |
130 | L<IntRange> r = g.getVerbTokens(new Input(codeTokens(tok), |
131 | charRangeToCodeTokens(tok, ai_parseAction(data.get("subject"))))); |
132 | if (r == null) ret; |
133 | data.put(key, ai_renderAction(sentence, codeTokenRangeToChars(tok, first/*XX*/(r)))); |
134 | } |
Began life as a copy of #1008680
download show line numbers debug dex old transpilations
Travelled to 13 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
Snippet ID: | #1008704 |
Snippet name: | Find Verb (map version): Learner 1 [dev.] |
Eternal ID of this version: | #1008704/12 |
Text MD5: | 71a2659f6c36ebc9cd7b87b20cfcf0de |
Transpilation MD5: | 3bebdae0e1e24d24e2ebde13aa918b82 |
Author: | stefan |
Category: | javax / a.i. |
Type: | JavaX source code |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2017-05-30 17:07:27 |
Source code size: | 3887 bytes / 134 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 434 / 838 |
Version history: | 11 change(s) |
Referenced in: | [show references] |