Libraryless. Click here for Pure Java version (8182L/54K/184K).
!7 static Guesser best; static double bestScore; concept Sentence { S text; SS data; } sclass Example { L<S> tok; int start, end; *() {} *(L<S> *tok, IntRange subjectTokens) { start = subjectTokens.start; end = subjectTokens.end; } toString { ret quote(joinWithSpaces(tok)) + " => " + joinWithSpaces(subList(tok, start, end)); } } abstract sclass GuesserBase { void learn(L<Example> material) { for (Example e : material) learn(e); } void learn(Example e) {} } abstract sclass Guesser extends GuesserBase { abstract IntRange getSubjectTokens(L<S> tok); } Guesser > GLengthOfSubject { new MultiSet<S> pos; // words to end on new MultiSet<S> neg; // words not to end on IntRange getSubjectTokens(L<S> tok) { ret getSubjectTokens(tok, 0); } IntRange getSubjectTokens(L<S> tok, int startAt) { int i = startAt; while (i < l(tok)) { S t = lower(tok.get(i)); if (pos.get(t) <= neg.get(t)) // also stop if unknown word break; ++i; } ret intRange(startAt, min(l(tok), i+1)); } void learn(Example e) { L<S> subjectTokens = allToLower(subList(e.tok, e.start, e.end)); for (S word : dropLast(subjectTokens)) pos.add(word); addIfNotNull(neg, last(subjectTokens)); } } Guesser > GSkip1 { // returns first word or second word new MultiSet<S> pos; // words to skip new MultiSet<S> neg; // words not to skip void learn(Example e) { (e.start > 0 ? pos : neg).add(lower(first(e.tok)); } IntRange getSubjectTokens(L<S> tok) { S t = lower(first(tok)); ret intRangeFromStartAndLength(pos.get(t) > neg.get(t) ? 1 : 0, 1); } } Guesser > GSkip2 { // can skip multiple words new MultiSet<S> pos; // words to skip new MultiSet<S> neg; // words not to skip void learn(Example e) { (e.start > 0 ? pos : neg).add(lower(first(e.tok)); } IntRange getSubjectTokens(L<S> tok) { int i = 0; while (i < l(tok)) { S t = lower(tok.get(i)); if (pos.get(t) <= neg.get(t)) // also stop if unknown word break; ++i; } ret intRangeFromStartAndLength(i, i+1); } } Guesser > GCombine { Guesser a; new GLengthOfSubject b; *() {} *(Guesser *a) {} IntRange getSubjectTokens(L<S> tok) { IntRange r = a.getSubjectTokens(tok); int skip = r == null ? 0 : r.start; ret b.getSubjectTokens(tok, skip); } void learn(L<Example> material) { a.learn(material); b.learn(material); } } p { loadConceptsFrom(#1008692); L<Example> material = learningMaterial(); //pnlStruct(material); // This yields the empty learner Pair<Guesser, Double> p = bestLearner(material, //ll(new GSkip1), ll(new GCombine(new GSkip1), new GCombine(new GSkip2)), 50, 3, true); // Now we train it with all data for in-program use p.a.learn(material); // Print and store print("Best learner: " + formatDouble(p.b, 1) + "% - " + struct(p.a)); best = p.a; bestScore = p.b; } sbool printDetails, printSuccesses; static double checkGuesser(L<Example> testMaterial, Guesser g) { print(); int score = 0, n = 0; for (Example e : testMaterial) { IntRange r = cast pcall(g, "getSubjectTokens", e.tok); bool ok = eq(IntRange(e.start, e.end), r); if (ok) ++score; ++n; if (printDetails || ok && printSuccesses) if (ok) print("OK " + e); else print("FAIL " + (r == null ? "-" : joinWithSpaces(subList(e.tok, r.start, r.end))) + " for " + e); } printScore(shortClassName(g), score, n); ret ratioToPercent(score, n); } static double checkGuesserAfterRandomizedPartialLearn(L<Example> testMaterial, Guesser g, double percentToLearn, bool hardMode) { Pair<L<Example>> p = getRandomPercent2(testMaterial, percentToLearn); g.learn(p.a); ret checkGuesser(hardMode ? p.b : testMaterial, g); } // best learner with randomized x% training material // returns guesser, percentage solved // hardMode = only count scores on untrained examples static Pair<Guesser, Double> bestLearner(final L<Example> material, L<? extends Guesser> guessers, final double percent, int repetitions, final bool hardMode) { new Best<Guesser> best; for (final Guesser g : guessers) best.put(g, repeatAndAdd_double(repetitions, func { checkGuesserAfterRandomizedPartialLearn(material, cloneObject(g), percent, hardMode) })/repetitions); ret best.pair(); } static L<Example> learningMaterial() { L<Example> out = new L; for (Sentence s) { S action = s.data.get("subject"); if (action == null) continue; IntRange r = ai_parseAction(action); if (r != null) { L<S> tok = nlTok5(s.text); r = charRangeToTokenRange(tok, r); r = tokenRangeToCodeTokens(r); tok = codeTokens(tok); out.add(Example(tok, r)); } } ret out; } // to be called from applications - works on character level // modifies data static void callGuesser(Guesser g, S sentence, SS data) { L<S> tok = nlTok5(sentence); IntRange r = g.getSubjectTokens(codeTokens(tok)); if (r == null) ret; data.put("subject", ai_renderAction(sentence, codeTokenRangeToChars(tok, r))); }
Began life as a copy of #1008669
download show line numbers debug dex old transpilations
Travelled to 13 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
Snippet ID: | #1008696 |
Snippet name: | Find Subject (map version): Learner 1 [dev.] |
Eternal ID of this version: | #1008696/10 |
Text MD5: | 4642540bbd34ab5fdad468ff9ba185ec |
Transpilation MD5: | 9a8b3ac56722d34386d1ded2c58730d9 |
Author: | stefan |
Category: | javax / a.i. |
Type: | JavaX source code |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2017-05-29 03:00:33 |
Source code size: | 5390 bytes / 200 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 509 / 926 |
Version history: | 9 change(s) |
Referenced in: | #1008692 - Sentence analysis with map [WORKS] |