/* 123 is a number blubb is not a number 5$"%! is not a number 45 is a number task 1: tokenize well (in this case, ignore quotes) task 2: discover patterns "* is a number" and "* is not a number" task 3: call pattern 1 positive, pattern 2 negative task 4: make set of positive/negative example strings for argument task 5a: learn description for positive examples, output new examples task 5b: learn description for negative examples, output new examples */ cprint { S input = [[ 123 is a number blubb is not a number 5$"%! is not a number 45 is a number ]]; start-thread { // task 1: tokenize well (in this case, ignore quotes) ITokenizer tokenizer = lambda1 splitAtSpaceTokenizer; LLS tokenizedInput = map(s -> simpleSpacesAndTrim(tokenizer.tokenize(s)), tlft(input)); pnlStruct(+tokenizedInput); // task 2: discover patterns "* is a number" and "* is not a number" // (by finding best split point) int minTokens = max(lambdaMap countCodeTokens(tokenizedInput)); print(+minTokens); for (int i = 1; i <= minTokens; i++) { Set patterns = ciSet(); for (LS tok : tokenizedInput) patterns.add("* " + joinSubList(tok, indexOfCodeToken(i))); print("Patterns for split point " + i + ": " + patterns); } //task 3: call pattern 1 positive, pattern 2 negative //task 4: make set of positive/negative example strings for argument //task 5a: learn description for positive examples, output new examples //task 5b: learn description for negative examples, output new examples } }