Uses 911K of libraries. Click here for Pure Java version (10259L/51K).
/* Input: 123 is a number blubb is not a number 5$"%! is not a number 45 is a number task 1: tokenize well (in this case, ignore quotes) task 2: discover patterns "* is a number" and "* is not a number" task 3: call pattern 1 positive, pattern 2 negative task 4: make set of positive/negative example strings for argument task 5a: learn description for positive examples, output new examples task 5b: learn description for negative examples, output new examples Generates new examples for numbers, e.g.: 351 is a number 73 is a number */ cprint { S input = autoUnindent_mls([[ 123 is a number blubb is not a number 5$"%! is not a number 45 is a number ]]); S output; transient L<F1> someFunctions = ll( f<Char, Bool> isDigit, f<Char, Bool> isLetter, f<Char, Bool> isLetterOrDigit, f<Char, Bool> isSpace); visual jvsplit(jhsplit(dm_textAreaAsSection input(), dm_textAreaAsSection output()), super); start { dm_onFieldChangeAndNow input(r calc); } void calc { // task 1: tokenize well (in this case, ignore quotes) ITokenizer tokenizer = lambda1 splitAtSpaceTokenizer; LLS tokenizedInput = map(s -> simpleSpacesAndTrim(tokenizer.tokenize(s)), tlft(input)); pnlStruct(+tokenizedInput); // task 2: discover patterns "* is a number" and "* is not a number" Set<S> patterns = asSet(findOneArgumentPatterns(tokenizedInput)); assertEquals(2, l(patterns)); print(+patterns); // task 3: make set of positive/negative example strings for argument Map<S, LS> examples = mapToValues_linkedHashMap(patterns, pat -> mapNotNulls(tok -> firstMatch(getFlexMatchIC(pat, tok)), tokenizedInput)); print(+examples); // task 4: make theories replace Xmp with L<Char>. Map<S, L<Xmp>> examplesAsLists = mapValues(l -> lambdaMap characters(l), examples); print(examplesAsLists); //examplesAsLists = reverseKeys(examplesAsLists); // test reversing to see if theory gets reversed L<Xmp> posExamples = firstValue(examplesAsLists), negExamples = secondValue(examplesAsLists); // for functions that are applicable to the elements of the examples: new LinkedHashSet<ITheoryOn<Xmp>> theories; for (O function : concatAsOrderedSet(lambdaMap plusNegation(someFunctions))) { //if (!functionCallableOnAll_nonSynthetic(function, concatLists(values(examplesAsLists)))) continue; print("Testing function " + function); theories.add(new AllElementsSatisfy(function)); theories.add(new AnyElementSatisfies(function)); } theories.addAll(concatLists(lambdaMap theoryPlusInverse(theories))); pnl(theories); // test theories Map<ITheoryOn<Xmp>, PosNeg<Xmp>> theoryResults = mapToValues(theories, theory -> PosNeg(mapToValues(trueFalseMap(posExamples, negExamples), (example, x) -> theory.check(example) == x))); pnl(theoryResults); // make new examples replace Theory with ITheoryOn<Xmp>. new LS newStatements; L<Theory> perfectTheories = sortByMetaTransformerStructureComplexity(keysWhereValue(theoryResults, pn -> pn.perfect())); print(+perfectTheories); newStatements.addAll(makeExamplesFor(perfectTheories, posExamples, firstKey(examplesAsLists))); // perfect theories for negative examples L<Theory> antiPerfectTheories = sortByMetaTransformerStructureComplexity(keysWhereValue(theoryResults, pn -> pn.antiPerfect())); print(+antiPerfectTheories); newStatements.addAll(makeExamplesFor(antiPerfectTheories, negExamples, secondKey(examplesAsLists))); setField(output := lines(newStatements)); } LS makeExamplesFor(L<ITheoryOn<L<Char>>> theories, Cl<L<Char>> examples, S pattern) { new LS out; IntRange sizeRange = sizeRangeOfCollections(examples); print(+sizeRange); for (ITheoryOn<L<Char>> t : theories) if (t cast AllElementsSatisfy) { IF0<Char> generator = predicateToGenerator(t.pred); if (generator == null) continue with print("No generator for " + t.pred); repeat 3 { S example = charactersToString(repF(random(sizeRange), generator)); print("> ", addAndReturn(out, format_noQuote(pattern, example))); } break; } ret out; } IF0<Char> predicateToGenerator(O f) { ret () -> (Char) random(filter(f, printableASCIICharsList())); } }
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1028694 |
Snippet name: | Syntactic Learning [previously: What is a number Spike, OK] |
Eternal ID of this version: | #1028694/77 |
Text MD5: | 92ec7d851652b6ec517e0039459762d9 |
Transpilation MD5: | 6cb4095093efdc9b013899d5f4ab6fb4 |
Author: | stefan |
Category: | javax |
Type: | JavaX source code (Dynamic Module) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-07-06 16:38:22 |
Source code size: | 4533 bytes / 126 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 266 / 1345 |
Version history: | 76 change(s) |
Referenced in: | #1028761 - Input Type Detector [dev.] |