Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

169
LINES

< > BotCompany Repo | #1028089 // Pattern Maker v1 [OK]

JavaX source code (Dynamic Module) [tags: use-pretranspiled] - run with: Stefan's OS

Uses 911K of libraries. Click here for Pure Java version (16794L/91K).

!7

/* 
This pattern maker takes a fixed list of examples and counterexamples and searches for PhraseCache patterns (also called theories or solutions) which distinguish positive from negative examples.

Each pattern has a "complexity class" which is the number of words its literal form contains.

Ultimately, we are looking for the best pattern (highest score) with the lowest complexity class. These two requirements can contradict each other, so we usually output one best pattern for each complexity class. Let's call the number of examples a pattern solves its "score".

During the computation, we usually keep additional patterns as partial solutions which can then eventually be combined or extended to form a final solution. Thus two patterns which solve distinct sets of examples are good to keep around for possibly combining them.

Solutions with no advantage over another known solution, such as a longer pattern but with the same complexity and exact score, can probably be discarded right away.

There should also be procedures that simplify patterns without changing their operation.
*/

concept Example {
  S text;
  bool pos;
}

concept Theory {
  S pattern;
  
  transient simplyCached Cl<Example> solvedExamples() {
    ret filter(list(_concepts, Example), e -> e.pos == mmo2_match(pattern, e.text));
  }
  
  transient simplyCached Cl<Example> unsolvedExamples() {
    ret setMinusSet(list(_concepts, Example), solvedExamples());
  }
  
  bool isFullSolution() { ret empty(unsolvedExamples()); }
  
  int score() { ret l(solvedExamples()); }
  
  transient simplyCached int complexityClass() { ret numberOfWords2(pattern); }
}

cmodule PatternMaker {
  switchable bool enabled = true;
  switchable S caseID = aGlobalID();
  S comment, examplesText, counterexamplesText;
  transient JTable theoriesTable, resultsTable;
  transient Concepts cc;
  transient Set<S> examples, counterexamples;
  transient new Lowest<Theory> simplestFullSolutionCollector;
  TreeMap<Int, Scored<S>> bestPatternsByClass;
  S simplestFullSolution; // simplest pattern solving all examples
  
  // collectors
  transient new TreeMap<Int, Theory> bestByComplexity;
  transient Map<Int, TreeSetWithDuplicates<Theory>> allByComplexity
    = autoTreeMap(() -> treeSetWithDuplicatesOverCalculatedField theoryScore());

  start {
    //cc = dm_handleCaseIDField();
    //for (Theory t : list(cc, Theory)) addToCollectors(t);
    if (enabled) thread { think(); }
  }

  S _modifyStructForDuplication(S struct) {
    ret jreplace_first(struct, "caseID=*", "caseID=" + quote(aGlobalID()));
  }
  
  visualize {
    JComponent c = withCenteredButtons(
      northAndCenterWithMargins(dm_fieldWithLabel comment(),
      jvsplit(
        jhgrid(
          jCenteredSection("Examples", dm_textArea examplesText()),
          jCenteredSection("Counterexamples", dm_textArea counterexamplesText())),
        jhsplit(
          jCenteredSection("Results",
            northAndCenterWithMargin(dm_calculatedCenteredLabel(() -> empty(simplestFullSolution) ? "" : "Simplest solution pattern: " + quote(simplestFullSolution)),
            jtabs(
              "Best", resultsTable = sexyTable(),
              "All theories", theoriesTable = sexyTable()))),
          dm_printLogComponent()))),
      jThreadedButton("Think", rEnter think),
      jPopDownButton_noText(dm_importAndExportAllDataMenuItems()),
      dm_checkBox("Think on load", 'enabled));
    thread { resultsToTables(); }
    ret c;
  }

  void think enter {
    cc = new Concepts;
    indexConceptFieldCI(cc, Example, 'text);
    indexConceptFieldCI(cc, Theory, 'pattern);
    setField(simplestFullSolution := null);
    simplestFullSolutionCollector.clear();
    
    examples = asLinkedHashSet(tlftj(examplesText));
    counterexamples = asLinkedHashSet(tlftj(counterexamplesText));
    Set<S> intersection = setIntersection(examples, counterexamples);
    if (nempty(intersection))
      ret with infoBox("Error: Examples appear in both lists, e.g. " + first(intersection));

    deleteConcepts(cc, Example);
    for (S s : examples) uniqCI(cc, Example, text := s, pos := true);
    for (S s : counterexamples) uniqCI(cc, Example, text := s, pos := false);
    int nExamples = countConcepts(cc, Example);
    print("Have " + nExamples(nExamples));

    new Strategy2().run();

    print("Have " + nTheories(countConcepts(cc, Theory)));
    
    setField(bestPatternsByClass := mapValues(t -> scoredNonPercent(t.score(), t.pattern), bestByComplexity));

    if (dm_vis() != null) resultsToTables();
  }
  
  runnable class Strategy {
    // positive examples to pattern
    for (S s : examples)
      addPatterns(ai_inputExampleToPossibleMMOPatterns1(s));
    
    // combine some pattern pairs from complexity classes 1-2
    twice {
      for (Theory a : allByComplexity.get(1))
        for (int n = 1; n <= 2; n++)
          for (Theory b : allByComplexity.get(n))
            addPattern(mmo2_combineWithOr(a.pattern, b.pattern));
    }
  }
  
  class Strategy2 extends Strategy { run {
    super.run();
    for (Theory t : values(bestByComplexity)) {
      print("Have theory: " + t.pattern + ", unsolved: " + t.unsolvedExamples());
      for (Example e : filterWhere(t.unsolvedExamples(), pos := false))
        for (S negPat : ai_inputExampleToPossibleMMOPatterns1(e.text))
          addPattern(print("Trying pattern", "(" + t.pattern + ") + !(" + negPat + ")"));
    }
  }}

  Map theoryToMap(Theory t) {  
    ret t == null ? null :
      litorderedmap(
        "Pattern" := t.pattern,
        "Complexity class" := t.complexityClass(),
        "Solved examples" := l(t.solvedExamples()) + " of " + countConcepts(cc, Example),
        "Unsolved" := joinWithComma(quoteAll(collect text(t.unsolvedExamples()))));
  }
  
  void resultsToTables enter {
    dataToTable_uneditable_ifHasTable(resultsTable, mapValuesToList theoryToMap(bestByComplexity));
    updateTabNameWithTableCount(resultsTable);
    dataToTable_uneditable_ifHasTable(theoriesTable, lambdaMap theoryToMap(list(cc, Theory)));
    updateTabNameWithTableCount(theoriesTable);
  }

  void addPatterns(Iterable<S> l) { fOr (S s : l) addPattern(s); }
  void addPattern(S pattern) {
    addToCollectors(uniqCI_returnIfNew(cc, Theory, +pattern));
  }
  
  int theoryScore(Theory t) { ret t == null ? 0 : t.score(); }
  
  void addToCollectors(Theory t) {
    if (t == null) ret;
    putIfHigherByCalculatedField theoryScore(bestByComplexity, t.complexityClass(), t);
    allByComplexity.get(t.complexityClass()).add(t);
    if (t.isFullSolution() && simplestFullSolutionCollector.put(t, t.complexityClass()))
      setField(simplestFullSolution := t.pattern);
  }
}

download  show line numbers  debug dex  old transpilations   

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

No comments. add comment

Snippet ID: #1028089
Snippet name: Pattern Maker v1 [OK]
Eternal ID of this version: #1028089/65
Text MD5: 9b2e74282403c6bac1b9cc7bebcb5fcb
Transpilation MD5: 897b57a33d85ae67ccda39c14e6688db
Author: stefan
Category:
Type: JavaX source code (Dynamic Module)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-05-21 13:58:52
Source code size: 6847 bytes / 169 lines
Pitched / IR pitched: No / No
Views / Downloads: 315 / 2221
Version history: 64 change(s)
Referenced in: #1028122 - PatternMaker1 [OK]