Libraryless. Click here for Pure Java version (8065L/53K/181K).
!7 static Guesser best; static double bestScore; concept Sentence { S text; S action; } sclass Example { L<S> tok; int start, end; *() {} *(L<S> *tok, IntRange subjectTokens) { start = subjectTokens.start; end = subjectTokens.end; } toString { ret quote(joinWithSpaces(tok)) + " => " + joinWithSpaces(subList(tok, start, end)); } } abstract sclass Guesser { abstract IntRange getSubjectTokens(L<S> tok); void learn(L<Example> material) { for (Example e : material) learn(e); } void learn(Example e) {} } Guesser > G1 { // just returns first word IntRange getSubjectTokens(L<S> tok) { ret new IntRange(0, 1); } } Guesser > G2 { // skips first words new StringTree1 skipTree; IntRange getSubjectTokens(L<S> tok) { int n = walkStringTreeToLeaf(skipTree, allToLower(tok)); if (n >= 0) ret new IntRange(n, n+1); null; } void learn(Example e) { if (e.start > 0) addToStringTree(skipTree, allToLower(takeFirst(e.tok, e.start))); } } Guesser > G3 { // continues expanding subject depending on words new StringTree1 continuationTree; IntRange getSubjectTokens(L<S> tok) { int n = walkStringTreeToLeaf(continuationTree, allToLower(tok)); if (n >= 0) ret new IntRange(0, n+1); null; } void learn(Example e) { L<S> l = allToLower(subList(e.tok, e.start, e.end-1)); if (nempty(l)) addToStringTree(continuationTree, l); } } Guesser > SkipFirst { Guesser a, b; *() {} *(Guesser *a, Guesser *b) {} IntRange getSubjectTokens(L<S> tok) { IntRange r = a.getSubjectTokens(tok); int skip = r == null ? 0 : r.start; ret shiftIntRange(skip, b.getSubjectTokens(dropFirst(skip, tok))); } void learn(L<Example> material) { a.learn(material); b.learn(material); } } Guesser > Chained { new L<Guesser> l; *() {} *(Guesser... guessers) { addAll(l, guessers); } IntRange getSubjectTokens(L<S> tok) { for (Guesser g : l) { IntRange result = cast pcall(g, "getSubjectTokens", tok); if (result != null) ret result; } null; } } Guesser > GCheater { new Map<S, IntRange> map; IntRange getSubjectTokens(L<S> tok) { ret map.get(joinWithSpace(tok)); } void learn(Example e) { map.put(joinWithSpace(e.tok), intRange(e.start, e.end)); } } p { loadConceptsFrom(#1008607); L<Example> material = learningMaterial(); pnlStruct(material); G1 g1; G2 g2; G3 g3; Chained chained; checkGuesser(material, g1 = new G1); set printSuccesses; checkGuesserAfterFullLearn(material, g2 = new G2); checkGuesserAfterFullLearn(material, g3 = new G3); checkGuesserAfterFullLearn(material, new GCheater); checkGuesserAfterPartialLearn(material, new GCheater, 50); checkGuesser(material, chained = new Chained(g1, g2, g3)); printStruct(g2); printUnrolledStringTree(g2.skipTree); print(); printUnrolledStringTree(g3.continuationTree); //printStruct(bestLearner(material, allNew(G1, G2, G3, GCheater), 50, 3, false)); Pair<Guesser, Double> p = bestLearner(material, listPlus(allNew(G1, G2, G3, GCheater), new Chained(new G2, new G1), new SkipFirst(new G2, new Chained(new G3, new G1))), 50, 3, true); printStruct(reversePair(p)); best = p.a; bestScore = p.b; } sbool printDetails, printSuccesses; static double checkGuesser(L<Example> testMaterial, Guesser g) { print(); int score = 0, n = 0; for (Example e : testMaterial) { IntRange r = cast pcall(g, "getSubjectTokens", e.tok); bool ok = eq(IntRange(e.start, e.end), r); if (ok) ++score; ++n; if (printDetails || ok && printSuccesses) if (ok) print("OK " + e); else print("FAIL " + (r == null ? "-" : joinWithSpaces(subList(e.tok, r.start, r.end))) + " for " + e); } printScore(shortClassName(g), score, n); ret ratioToPercent(score, n); } static double checkGuesserAfterFullLearn(L<Example> testMaterial, Guesser g) { g.learn(testMaterial); ret checkGuesser(testMaterial, g); } static double checkGuesserAfterPartialLearn(L<Example> testMaterial, Guesser g, double percentToLearn) { g.learn(getFirstPercent(testMaterial, percentToLearn)); ret checkGuesser(testMaterial, g); } static double checkGuesserAfterRandomizedPartialLearn(L<Example> testMaterial, Guesser g, double percentToLearn, bool hardMode) { Pair<L<Example>> p = getRandomPercent2(testMaterial, percentToLearn); g.learn(p.a); ret checkGuesser(hardMode ? p.b : testMaterial, g); } // best learner with randomized x% training material // returns guesser, percentage solved // hardMode = only count scores on untrained examples static Pair<Guesser, Double> bestLearner(final L<Example> material, L<Guesser> guessers, final double percent, int repetitions, final bool hardMode) { new Best<Guesser> best; for (final Guesser g : guessers) best.put(g, repeatAndAdd_double(repetitions, func { checkGuesserAfterRandomizedPartialLearn(material, cloneObject(g), percent, hardMode) })/repetitions); ret best.pair(); } static L<Example> learningMaterial() { L<Example> out = new L; for (Sentence s) { if (s.action == null) continue; IntRange r = ai_parseSubjectAction(s.action); if (r != null) { L<S> tok = nlTok5(s.text); r = charRangeToTokenRange(tok, r); r = IntRange((r.start | 1)/2, r.end/2); tok = codeTokens(tok); out.add(Example(tok, r)); } } ret out; } // to be called from applications - works on character level static IntRange callGuesser(Guesser g, S sentence) { L<S> tok = codeTokens(nlTok5(sentence)); ret tokenRangeToCharRange(tok, g.getSubjectTokens(tok)); }
Began life as a copy of #1008643
download show line numbers debug dex old transpilations
Travelled to 14 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, onxytkatvevr, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
Snippet ID: | #1008653 |
Snippet name: | Learn parsing sentences 2 [dev.] |
Eternal ID of this version: | #1008653/44 |
Text MD5: | 7ac4b1a486b207f189091e31bc7c5e3a |
Transpilation MD5: | 07529db319c149fb6d4e903db0698f9a |
Author: | stefan |
Category: | javax / a.i. |
Type: | JavaX source code |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2017-05-27 14:54:38 |
Source code size: | 5926 bytes / 218 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 594 / 1101 |
Version history: | 43 change(s) |
Referenced in: | [show references] |