!7 static Guesser best; static double bestScore; concept Sentence { S text; S action; } sclass Example { L tok; int start, end; *() {} *(L *tok, IntRange subjectTokens) { start = subjectTokens.start; end = subjectTokens.end; } toString { ret quote(joinWithSpaces(tok)) + " => " + joinWithSpaces(subList(tok, start, end)); } } abstract sclass Guesser { abstract IntRange getSubjectTokens(L tok); void learn(L material) { for (Example e : material) learn(e); } void learn(Example e) {} } Guesser > GSkip1 { // returns first word or second word new MultiSet pos; // words to skip new MultiSet neg; // words not to skip void learn(Example e) { (e.start > 0 ? pos : neg).add(lower(first(e.tok)); } IntRange getSubjectTokens(L tok) { S t = lower(first(tok)); ret intRangeFromStartAndLength(pos.get(t) > neg.get(t), 1); } } p { loadConceptsFrom(#1008607); L material = learningMaterial(); pnlStruct(material); Pair p = bestLearner(material, ll(new GSkip1), 50, 3, true); printStruct(reversePair(p)); best = p.a; bestScore = p.b; } sbool printDetails, printSuccesses; static double checkGuesser(L testMaterial, Guesser g) { print(); int score = 0, n = 0; for (Example e : testMaterial) { IntRange r = cast pcall(g, "getSubjectTokens", e.tok); bool ok = eq(IntRange(e.start, e.end), r); if (ok) ++score; ++n; if (printDetails || ok && printSuccesses) if (ok) print("OK " + e); else print("FAIL " + (r == null ? "-" : joinWithSpaces(subList(e.tok, r.start, r.end))) + " for " + e); } printScore(shortClassName(g), score, n); ret ratioToPercent(score, n); } static double checkGuesserAfterFullLearn(L testMaterial, Guesser g) { g.learn(testMaterial); ret checkGuesser(testMaterial, g); } static double checkGuesserAfterPartialLearn(L testMaterial, Guesser g, double percentToLearn) { g.learn(getFirstPercent(testMaterial, percentToLearn)); ret checkGuesser(testMaterial, g); } static double checkGuesserAfterRandomizedPartialLearn(L testMaterial, Guesser g, double percentToLearn, bool hardMode) { Pair> p = getRandomPercent2(testMaterial, percentToLearn); g.learn(p.a); ret checkGuesser(hardMode ? p.b : testMaterial, g); } // best learner with randomized x% training material // returns guesser, percentage solved // hardMode = only count scores on untrained examples static Pair bestLearner(final L material, L guessers, final double percent, int repetitions, final bool hardMode) { new Best best; for (final Guesser g : guessers) best.put(g, repeatAndAdd_double(repetitions, func { checkGuesserAfterRandomizedPartialLearn(material, cloneObject(g), percent, hardMode) })/repetitions); ret best.pair(); } static L learningMaterial() { L out = new L; for (Sentence s) { if (s.action == null) continue; IntRange r = ai_parseSubjectAction(s.action); if (r != null) { L tok = nlTok5(s.text); r = charRangeToTokenRange(tok, r); r = IntRange((r.start | 1)/2, r.end/2); tok = codeTokens(tok); out.add(Example(tok, r)); } } ret out; } // to be called from applications - works on character level static IntRange callGuesser(Guesser g, S sentence) { L tok = codeTokens(nlTok5(sentence)); ret tokenRangeToCharRange(tok, g.getSubjectTokens(tok)); }