/*
123 is a number
blubb is not a number
5$"%! is not a number
45 is a number

task 1: tokenize well (in this case, ignore quotes)
task 2: discover patterns "* is a number" and "* is not a number"
task 3: call pattern 1 positive, pattern 2 negative
task 4: make set of positive/negative example strings for argument
task 5a: learn description for positive examples, output new examples
task 5b: learn description for negative examples, output new examples
*/

cprint {
  S input = [[
    123 is a number
    blubb is not a number
    5$"%! is not a number
    45 is a number
  ]];
  
  start-thread {
    // task 1: tokenize well (in this case, ignore quotes)
    
    ITokenizer tokenizer = lambda1 splitAtSpaceTokenizer;
    LLS tokenizedInput = map(s -> simpleSpacesAndTrim(tokenizer.tokenize(s)), tlft(input));
    pnlStruct(+tokenizedInput);
    
    // task 2: discover patterns "* is a number" and "* is not a number"
    // (by finding best split point)
    
    /*int minTokens = max(lambdaMap countCodeTokens(tokenizedInput));
    print(+minTokens);
    
    for (int i = 1; i <= minTokens; i++) {
      Set<S> patterns = ciSet();
      for (LS tok : tokenizedInput)
        patterns.add("* " + joinSubList(tok, indexOfCodeToken(i)));
      print("Patterns for split point " + i + ": " + patterns);
    }*/
    
    int splitAtToken = 1;
    Set<S> patterns = patternsTreatingFirstNTokensAsVariable(tokenizedInput, splitAtToken);
    print(+patterns);
    assertEquals(2, l(patterns));
    
    // task 3: call pattern 1 positive, pattern 2 negative
    
    S posPattern = first(patterns), negPattern = second(patterns);
    
    // task 4: make set of positive/negative example strings for argument
    
    LS posExamples = mapNotNulls(tok -> firstMatch(getFlexMatchIC(posPattern, tok)), tokenizedInput);
    LS negExamples = mapNotNulls(tok -> firstMatch(getFlexMatchIC(negPattern, tok)), tokenizedInput);
    print(+posExamples);
    print(+negExamples);

    //task 5a: learn description for positive examples, output new   examples
    //task 5b: learn description for negative examples, output new examples
  }
}