/* 123 is a number blubb is not a number 5$"%! is not a number 45 is a number task 1: tokenize well (in this case, ignore quotes) task 2: discover patterns "* is a number" and "* is not a number" task 3: call pattern 1 positive, pattern 2 negative task 4: make set of positive/negative example strings for argument task 5a: learn description for positive examples, output new examples task 5b: learn description for negative examples, output new examples */ cprint { S input = [[ 123 is a number blubb is not a number 5$"%! is not a number 45 is a number ]]; start-thread { // task 1: tokenize well (in this case, ignore quotes) ITokenizer tokenizer = lambda1 splitAtSpaceTokenizer; LLS tokenizedInput = map(s -> simpleSpacesAndTrim(tokenizer.tokenize(s)), tlft(input)); pnlStruct(+tokenizedInput); // task 2: discover patterns "* is a number" and "* is not a number" // (by finding best split point) /*int minTokens = max(lambdaMap countCodeTokens(tokenizedInput)); print(+minTokens); for (int i = 1; i <= minTokens; i++) { Set patterns = ciSet(); for (LS tok : tokenizedInput) patterns.add("* " + joinSubList(tok, indexOfCodeToken(i))); print("Patterns for split point " + i + ": " + patterns); }*/ int splitAtToken = 1; Set patterns = patternsTreatingFirstNTokensAsVariable(tokenizedInput, splitAtToken); print(+patterns); assertEquals(2, l(patterns)); // task 3: make set of positive/negative example strings for argument Map examples = mapToValues_linkedHashMap(patterns, pat -> mapNotNulls(tok -> firstMatch(getFlexMatchIC(pat, tok)), tokenizedInput)); print(+examples); //task 4a: make theories replace Xmp with L. Map> examplesAsLists = mapValues(l -> lambdaMap characters(l), examples); print(examplesAsLists); //examplesAsLists = reverseKeys(examplesAsLists); // test reversing to see if theory gets reversed L posExamples = firstValue(examplesAsLists), negExamples = secondValue(examplesAsLists); L<F1> someFunctions = ll( f<Char, Bool> isDigit, f<Char, Bool> isLetter, f<Char, Bool> isLetterOrDigit, f<Char, Bool> isSpace); // for functions that are applicable to the elements of the examples: new LinkedHashSet<ITheoryOn<Xmp>> theories; for (F1 function : someFunctions) { if (functionCallableOnAll_nonSynthetic(function, concatLists(values(examplesAsLists)))) continue; print("Testing function " + function); theories.add(new AllElementsSatisfy(function)); theories.add(new AnyElementSatisfies(function)); } theories.addAll(concatLists(lambdaMap theoryPlusInverse(theories))); pnl(theories); // test theories Map<ITheoryOn<Xmp>, PosNeg<Xmp>> theoryResults = mapToValues(theories, theory -> PosNeg(mapToValues(trueFalseMap(posExamples, negExamples), (example, x) -> theory.check(example) == x))); pnl(theoryResults); L<ITheoryOn<Xmp>> perfectTheories = keysWhereValue(theoryResults, pn -> pn.perfect()); print(+perfectTheories); ITheoryOn<Xmp> bestTheory = first(perfectTheories); if (bestTheory == null) ret; // make new examples IntRange sizeRange = sizeRangeOfCollections(posExamples); print(+sizeRange); if (bestTheory cast AllElementsSatisfy) { IF0<Char> generator = predicateToGenerator(bestTheory.pred); if (generator == null) ret with print("No generator for " + bestTheory.pred); repeat 10 { S example = charactersToString(repF(random(sizeRange), generator)); print(format(firstKey(examplesAsLists), example)); } } } IF0<Char> predicateToGenerator(O f) { if (eq(str(f), "isDigit(x)")) ret () -> randomDigit(); null; } }