Uses 911K of libraries. Click here for Pure Java version (16794L/91K).
1 | !7 |
2 | |
3 | /* |
4 | This pattern maker takes a fixed list of examples and counterexamples and searches for PhraseCache patterns (also called theories or solutions) which distinguish positive from negative examples. |
5 | |
6 | Each pattern has a "complexity class" which is the number of words its literal form contains. |
7 | |
8 | Ultimately, we are looking for the best pattern (highest score) with the lowest complexity class. These two requirements can contradict each other, so we usually output one best pattern for each complexity class. Let's call the number of examples a pattern solves its "score". |
9 | |
10 | During the computation, we usually keep additional patterns as partial solutions which can then eventually be combined or extended to form a final solution. Thus two patterns which solve distinct sets of examples are good to keep around for possibly combining them. |
11 | |
12 | Solutions with no advantage over another known solution, such as a longer pattern but with the same complexity and exact score, can probably be discarded right away. |
13 | |
14 | There should also be procedures that simplify patterns without changing their operation. |
15 | */ |
16 | |
17 | concept Example { |
18 | S text; |
19 | bool pos; |
20 | } |
21 | |
22 | concept Theory { |
23 | S pattern; |
24 | |
25 | transient simplyCached Cl<Example> solvedExamples() { |
26 | ret filter(list(_concepts, Example), e -> e.pos == mmo2_match(pattern, e.text)); |
27 | } |
28 | |
29 | transient simplyCached Cl<Example> unsolvedExamples() { |
30 | ret setMinusSet(list(_concepts, Example), solvedExamples()); |
31 | } |
32 | |
33 | bool isFullSolution() { ret empty(unsolvedExamples()); } |
34 | |
35 | int score() { ret l(solvedExamples()); } |
36 | |
37 | transient simplyCached int complexityClass() { ret numberOfWords2(pattern); } |
38 | } |
39 | |
40 | cmodule PatternMaker { |
41 | switchable bool enabled = true; |
42 | switchable S caseID = aGlobalID(); |
43 | S comment, examplesText, counterexamplesText; |
44 | transient JTable theoriesTable, resultsTable; |
45 | transient Concepts cc; |
46 | transient Set<S> examples, counterexamples; |
47 | transient new Lowest<Theory> simplestFullSolutionCollector; |
48 | TreeMap<Int, Scored<S>> bestPatternsByClass; |
49 | S simplestFullSolution; // simplest pattern solving all examples |
50 | |
51 | // collectors |
52 | transient new TreeMap<Int, Theory> bestByComplexity; |
53 | transient Map<Int, TreeSetWithDuplicates<Theory>> allByComplexity |
54 | = autoTreeMap(() -> treeSetWithDuplicatesOverCalculatedField theoryScore()); |
55 | |
56 | start { |
57 | //cc = dm_handleCaseIDField(); |
58 | //for (Theory t : list(cc, Theory)) addToCollectors(t); |
59 | if (enabled) thread { think(); } |
60 | } |
61 | |
62 | S _modifyStructForDuplication(S struct) { |
63 | ret jreplace_first(struct, "caseID=*", "caseID=" + quote(aGlobalID())); |
64 | } |
65 | |
66 | visualize { |
67 | JComponent c = withCenteredButtons( |
68 | northAndCenterWithMargins(dm_fieldWithLabel comment(), |
69 | jvsplit( |
70 | jhgrid( |
71 | jCenteredSection("Examples", dm_textArea examplesText()), |
72 | jCenteredSection("Counterexamples", dm_textArea counterexamplesText())), |
73 | jhsplit( |
74 | jCenteredSection("Results", |
75 | northAndCenterWithMargin(dm_calculatedCenteredLabel(() -> empty(simplestFullSolution) ? "" : "Simplest solution pattern: " + quote(simplestFullSolution)), |
76 | jtabs( |
77 | "Best", resultsTable = sexyTable(), |
78 | "All theories", theoriesTable = sexyTable()))), |
79 | dm_printLogComponent()))), |
80 | jThreadedButton("Think", rEnter think), |
81 | jPopDownButton_noText(dm_importAndExportAllDataMenuItems()), |
82 | dm_checkBox("Think on load", 'enabled)); |
83 | thread { resultsToTables(); } |
84 | ret c; |
85 | } |
86 | |
87 | void think enter { |
88 | cc = new Concepts; |
89 | indexConceptFieldCI(cc, Example, 'text); |
90 | indexConceptFieldCI(cc, Theory, 'pattern); |
91 | setField(simplestFullSolution := null); |
92 | simplestFullSolutionCollector.clear(); |
93 | |
94 | examples = asLinkedHashSet(tlftj(examplesText)); |
95 | counterexamples = asLinkedHashSet(tlftj(counterexamplesText)); |
96 | Set<S> intersection = setIntersection(examples, counterexamples); |
97 | if (nempty(intersection)) |
98 | ret with infoBox("Error: Examples appear in both lists, e.g. " + first(intersection)); |
99 | |
100 | deleteConcepts(cc, Example); |
101 | for (S s : examples) uniqCI(cc, Example, text := s, pos := true); |
102 | for (S s : counterexamples) uniqCI(cc, Example, text := s, pos := false); |
103 | int nExamples = countConcepts(cc, Example); |
104 | print("Have " + nExamples(nExamples)); |
105 | |
106 | new Strategy2().run(); |
107 | |
108 | print("Have " + nTheories(countConcepts(cc, Theory))); |
109 | |
110 | setField(bestPatternsByClass := mapValues(t -> scoredNonPercent(t.score(), t.pattern), bestByComplexity)); |
111 | |
112 | if (dm_vis() != null) resultsToTables(); |
113 | } |
114 | |
115 | runnable class Strategy { |
116 | // positive examples to pattern |
117 | for (S s : examples) |
118 | addPatterns(ai_inputExampleToPossibleMMOPatterns1(s)); |
119 | |
120 | // combine some pattern pairs from complexity classes 1-2 |
121 | twice { |
122 | for (Theory a : allByComplexity.get(1)) |
123 | for (int n = 1; n <= 2; n++) |
124 | for (Theory b : allByComplexity.get(n)) |
125 | addPattern(mmo2_combineWithOr(a.pattern, b.pattern)); |
126 | } |
127 | } |
128 | |
129 | class Strategy2 extends Strategy { run { |
130 | super.run(); |
131 | for (Theory t : values(bestByComplexity)) { |
132 | print("Have theory: " + t.pattern + ", unsolved: " + t.unsolvedExamples()); |
133 | for (Example e : filterWhere(t.unsolvedExamples(), pos := false)) |
134 | for (S negPat : ai_inputExampleToPossibleMMOPatterns1(e.text)) |
135 | addPattern(print("Trying pattern", "(" + t.pattern + ") + !(" + negPat + ")")); |
136 | } |
137 | }} |
138 | |
139 | Map theoryToMap(Theory t) { |
140 | ret t == null ? null : |
141 | litorderedmap( |
142 | "Pattern" := t.pattern, |
143 | "Complexity class" := t.complexityClass(), |
144 | "Solved examples" := l(t.solvedExamples()) + " of " + countConcepts(cc, Example), |
145 | "Unsolved" := joinWithComma(quoteAll(collect text(t.unsolvedExamples())))); |
146 | } |
147 | |
148 | void resultsToTables enter { |
149 | dataToTable_uneditable_ifHasTable(resultsTable, mapValuesToList theoryToMap(bestByComplexity)); |
150 | updateTabNameWithTableCount(resultsTable); |
151 | dataToTable_uneditable_ifHasTable(theoriesTable, lambdaMap theoryToMap(list(cc, Theory))); |
152 | updateTabNameWithTableCount(theoriesTable); |
153 | } |
154 | |
155 | void addPatterns(Iterable<S> l) { fOr (S s : l) addPattern(s); } |
156 | void addPattern(S pattern) { |
157 | addToCollectors(uniqCI_returnIfNew(cc, Theory, +pattern)); |
158 | } |
159 | |
160 | int theoryScore(Theory t) { ret t == null ? 0 : t.score(); } |
161 | |
162 | void addToCollectors(Theory t) { |
163 | if (t == null) ret; |
164 | putIfHigherByCalculatedField theoryScore(bestByComplexity, t.complexityClass(), t); |
165 | allByComplexity.get(t.complexityClass()).add(t); |
166 | if (t.isFullSolution() && simplestFullSolutionCollector.put(t, t.complexityClass())) |
167 | setField(simplestFullSolution := t.pattern); |
168 | } |
169 | } |
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1028089 |
Snippet name: | Pattern Maker v1 [OK] |
Eternal ID of this version: | #1028089/65 |
Text MD5: | 9b2e74282403c6bac1b9cc7bebcb5fcb |
Transpilation MD5: | 897b57a33d85ae67ccda39c14e6688db |
Author: | stefan |
Category: | |
Type: | JavaX source code (Dynamic Module) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-05-21 13:58:52 |
Source code size: | 6847 bytes / 169 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 314 / 2220 |
Version history: | 64 change(s) |
Referenced in: | [show references] |