!7

/* 
This pattern maker takes a fixed list of examples and counterexamples and searches for PhraseCache patterns (also called theories or solutions) which distinguish positive from negative examples.

Each pattern has a "complexity class" which is the number of words its literal form contains.

Ultimately, we are looking for the best pattern (highest score) with the lowest complexity class. These two requirements can contradict each other, so we usually output one best pattern for each complexity class. 
Let's call the number of examples a pattern solves its "score" and the actual set of examples solved the "exact score".

During the computation, we usually keep additional patterns as partial solutions which can then eventually be combined or extended to form a final solution. Thus two patterns which solve distinct sets of examples are good to keep around for possibly combining them.

Solutions with no advantage over another known solution, such as a  longer pattern but with the same complexity and exact score, can probably be discarded right away.

There should also be procedures that simplify patterns without changing their operation.
*/

concept Example {
  S text;
  bool pos;
}

concept Theory {
  S pattern;
  
  transient simplyCached Cl<Example> solvedExamples() {
    ret filter(list(_concepts, Example), e -> mmo2_match(pattern, e.text));
  }
  
  transient simplyCached Cl<Example> unsolvedExamples() {
    ret setMinusSet(list(_concepts, Example), solvedExamples());
  }
  
  int score() { ret l(solvedExamples()); }
  
  transient simplyCached int complexityClass() { ret numberOfWords2(pattern); }
}

cmodule PatternMaker {
  switchable bool enabled = true;
  switchable S caseID = aGlobalID();
  S examplesText, counterexamplesText;
  transient JTable theoriesTable, resultsTable;
  transient Concepts cc;
  
  // collectors
  transient new TreeMap<Int, Theory> bestByComplexity;

  start {
    //cc = dm_handleCaseIDField();
    cc = new Concepts;
    indexConceptFieldCI(cc, Example, 'text);
    indexConceptFieldCI(cc, Theory, 'pattern);
    //for (Theory t : list(cc, Theory)) addToCollectors(t);
    if (enabled) thread { think(); }
  }

  S _modifyStructForDuplication(S struct) {
    ret jreplace_first(struct, "caseID=*", "caseID=" + quote(aGlobalID()));
  }
  
  visualize {
    JComponent c = withCenteredButtons(jvsplit(
      jhgrid(
        jCenteredSection("Examples", dm_textArea examplesText()),
        jCenteredSection("Counterexamples", dm_textArea counterexamplesText())),
      jhsplit(
        jCenteredSection("Results",
          jtabs(
            "Best", resultsTable = sexyTable(),
            "All theories", theoriesTable = sexyTable())),
        dm_printLogComponent())),
      jThreadedButton("Think", rEnter think),
      dm_checkBox enabled());
    thread { resultsToTables(); }
    ret c;
  }

  void think enter {
    Set<S> l1 = asLinkedHashSet(tlftj(examplesText));
    Set<S> l2 = asLinkedHashSet(tlftj(counterexamplesText));
    Set<S> intersection = setIntersection(l1, l2);
    if (nempty(intersection))
      ret with infoBox("Error: Examples appear in both lists, e.g. " + first(intersection));

    deleteConcepts(cc, Example);
    for (S s : l1) uniqCI(cc, Example, text := s, pos := true);
    for (S s : l2) uniqCI(cc, Example, text := s, pos := false);
    int nExamples = countConcepts(cc, Example);
    print("Have " + nExamples(nExamples));

    for (S s : l1)
      addPatterns(ai_inputExampleToPossibleMMOPatterns1(s));

    print("Have " + nTheories(countConcepts(cc, Theory)));

    if (dm_vis() != null) resultsToTables();
  }

  Map theoryToMap(Theory t) {  
    ret t == null ? null :
      litorderedmap(
        "Pattern" := t.pattern,
        "Complexity class" := t.complexityClass(),
        "Solved examples" := l(t.solvedExamples()) + " of " + countConcepts(cc, Example),
        "Unsolved" := collect text(t.unsolvedExamples()));
  }
  
  void resultsToTables enter {
    dataToTable_uneditable_ifHasTable(resultsTable, mapValuesToList theoryToMap(bestByComplexity));
    dataToTable_uneditable_ifHasTable(theoriesTable, lambdaMap theoryToMap(list(cc, Theory)));
  }

  void addPatterns(Iterable<S> l) { fOr (S s : l) addPattern(s); }
  void addPattern(S pattern) {
    addToCollectors(uniqCI_returnIfNew(cc, Theory, +pattern));
  }
  
  int theoryScore(Theory t) { ret t == null ? 0 : t.score(); }
  
  void addToCollectors(Theory t) {
    if (t == null) ret;
    putIfHigherByCalculatedField theoryScore(bestByComplexity, t.complexityClass(), t);
  }
}