Libraryless. Click here for Pure Java version (7859L/52K/178K).
1 | !7 |
2 | |
3 | static Guesser best; |
4 | static double bestScore; |
5 | |
6 | concept Sentence { |
7 | S text; |
8 | S action, verb; |
9 | } |
10 | |
11 | sclass Input { |
12 | L<S> tok; |
13 | IntRange subject; |
14 | |
15 | *() {} |
16 | *(L<S> *tok, IntRange *subject) {} |
17 | } |
18 | |
19 | Input > Example { |
20 | new L<IntRange> verbs; |
21 | |
22 | *() {} |
23 | *(L<S> *tok, IntRange *subject, L<IntRange> *verbs) {} |
24 | |
25 | toString { |
26 | ret quote(joinWithSpaces(tok)) + " => " + map(verbs, func(IntRange r) { joinWithSpaces(subList(tok, r.start, r.end)) }); |
27 | } |
28 | } |
29 | |
30 | abstract sclass GuesserBase { |
31 | void learn(L<Example> material) { |
32 | for (Example e : material) learn(e); |
33 | } |
34 | void learn(Example e) {} |
35 | } |
36 | |
37 | abstract sclass Guesser extends GuesserBase { |
38 | abstract L<IntRange> getVerbTokens(Input input); |
39 | } |
40 | |
41 | Guesser > GWordAfterSubject { |
42 | L<IntRange> getVerbTokens(Input input) { |
43 | IntRange r = input.subject; |
44 | ret r == null ? null : ll(intRange(r.end, r.end+1)); |
45 | } |
46 | } |
47 | |
48 | p { |
49 | loadConceptsFrom(#1008607); |
50 | L<Example> material = learningMaterial(); |
51 | pnlStruct(material); |
52 | |
53 | // This yields the empty learner |
54 | Pair<Guesser, Double> p = bestLearner(material, |
55 | ll(new GWordAfterSubject), |
56 | 50, 3, true); |
57 | |
58 | // Now we train it with all data for in-program use |
59 | if (p.a != null) p.a.learn(material); |
60 | |
61 | // Print and store |
62 | print("Best learner: " + formatDouble(p.b, 1) + "% - " + struct(p.a)); |
63 | best = p.a; |
64 | bestScore = p.b; |
65 | } |
66 | |
67 | sbool printDetails, printSuccesses; |
68 | |
69 | static double checkGuesser(L<Example> testMaterial, Guesser g) { |
70 | print(); |
71 | int score = 0, n = 0; |
72 | for (final Example e : testMaterial) { |
73 | L<IntRange> r = cast pcall(g, "getVerbTokens", e.tok); |
74 | bool ok = eq(r, e.verbs); |
75 | if (ok) ++score; |
76 | ++n; |
77 | if (printDetails || ok && printSuccesses) |
78 | if (ok) |
79 | print("OK " + e); |
80 | else |
81 | print("FAIL " + (r == null ? "-" : map(r, func(IntRange r) { joinWithSpaces(subList(e.tok, r)) })) + " for " + e); |
82 | } |
83 | printScore(shortClassName(g), score, n); |
84 | ret ratioToPercent(score, n); |
85 | } |
86 | |
87 | static double checkGuesserAfterRandomizedPartialLearn(L<Example> testMaterial, Guesser g, double percentToLearn, bool hardMode) { |
88 | Pair<L<Example>> p = getRandomPercent2(testMaterial, percentToLearn); |
89 | g.learn(p.a); |
90 | ret checkGuesser(hardMode ? p.b : testMaterial, g); |
91 | } |
92 | |
93 | // best learner with randomized x% training material |
94 | // returns guesser, percentage solved |
95 | // hardMode = only count scores on untrained examples |
96 | static Pair<Guesser, Double> bestLearner(final L<Example> material, L<? extends Guesser> guessers, final double percent, int repetitions, final bool hardMode) { |
97 | new Best<Guesser> best; |
98 | for (final Guesser g : guessers) |
99 | best.put(g, repeatAndAdd_double(repetitions, func { |
100 | checkGuesserAfterRandomizedPartialLearn(material, cloneObject(g), percent, hardMode) |
101 | })/repetitions); |
102 | ret best.pair(); |
103 | } |
104 | |
105 | static L<Example> learningMaterial() { |
106 | L<Example> out = new L; |
107 | for (Sentence s) { |
108 | if (s.action == null) continue; |
109 | L<IntRange> r = ai_parseVerbAction(s.verb); |
110 | if (r != null) { |
111 | L<S> tok = nlTok5(s.text); |
112 | IntRange subject = ai_parseSubjectAction(s.action); |
113 | subject = charRangeToCodeTokens(tok, subject); |
114 | r = charRangeToCodeTokens(tok, r); |
115 | tok = codeTokens(tok); |
116 | out.add(Example(tok, subject, r)); |
117 | } |
118 | } |
119 | ret out; |
120 | } |
121 | |
122 | // to be called from applications - works on character level |
123 | static L<IntRange> callGuesser(Guesser g, S sentence, IntRange subject) { |
124 | L<S> tok = nlTok5(sentence); |
125 | L<IntRange> r = g.getVerbTokens(new Input(codeTokens(tok), |
126 | charRangeToCodeTokens(tok, subject))); |
127 | if (r == null) null; |
128 | ret codeTokenRangeToChars(tok, r); |
129 | } |
Began life as a copy of #1008669
download show line numbers debug dex old transpilations
Travelled to 13 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
Snippet ID: | #1008680 |
Snippet name: | Find Verb: Learner 1 [dev.] |
Eternal ID of this version: | #1008680/13 |
Text MD5: | c5d2f3c10d3251ab894175830e1d4544 |
Transpilation MD5: | 44f3c6792d496a0ab1cbfefaf6c30685 |
Author: | stefan |
Category: | javax / a.i. |
Type: | JavaX source code |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2017-05-28 18:29:33 |
Source code size: | 3701 bytes / 129 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 445 / 743 |
Version history: | 12 change(s) |
Referenced in: | [show references] |