/* 123 is a number blubb is not a number 5$"%! is not a number 45 is a number task 1: tokenize well (in this case, ignore quotes) task 2: discover patterns "* is a number" and "* is not a number" task 3: call pattern 1 positive, pattern 2 negative task 4: make set of positive/negative example strings for argument task 5a: learn description for positive examples, output new examples task 5b: learn description for negative examples, output new examples */ cprint { S input = [[ 123 is a number blubb is not a number 5$"%! is not a number 45 is a number ]]; start-thread { // task 1: tokenize well (in this case, ignore quotes) ITokenizer tokenizer = lambda1 splitAtSpaceTokenizer; LLS tokenizedInput = map(s -> simpleSpacesAndTrim(tokenizer.tokenize(s)), tlft(input)); pnlStruct(+tokenizedInput); // task 2: discover patterns "* is a number" and "* is not a number" // (by finding best split point) /*int minTokens = max(lambdaMap countCodeTokens(tokenizedInput)); print(+minTokens); for (int i = 1; i <= minTokens; i++) { Set patterns = ciSet(); for (LS tok : tokenizedInput) patterns.add("* " + joinSubList(tok, indexOfCodeToken(i))); print("Patterns for split point " + i + ": " + patterns); }*/ int splitAtToken = 1; Set patterns = patternsTreatingFirstNTokensAsVariable(tokenizedInput, splitAtToken); print(+patterns); assertEquals(2, l(patterns)); // task 3: call pattern 1 positive, pattern 2 negative S posPattern = first(patterns), negPattern = second(patterns); // task 4: make set of positive/negative example strings for argument LS posExamples = mapNotNulls(tok -> firstMatch(getFlexMatchIC(posPattern, tok)), tokenizedInput); LS negExamples = mapNotNulls(tok -> firstMatch(getFlexMatchIC(negPattern, tok)), tokenizedInput); print(+posExamples); print(+negExamples); //task 5a: learn description for positive examples, output new examples //task 5b: learn description for negative examples, output new examples } }