Uses 911K of libraries. Click here for Pure Java version (10259L/51K).
1 | /* |
2 | Input: |
3 | 123 is a number |
4 | blubb is not a number |
5 | 5$"%! is not a number |
6 | 45 is a number |
7 | |
8 | task 1: tokenize well (in this case, ignore quotes) |
9 | task 2: discover patterns "* is a number" and "* is not a number" |
10 | task 3: call pattern 1 positive, pattern 2 negative |
11 | task 4: make set of positive/negative example strings for argument |
12 | task 5a: learn description for positive examples, output new examples |
13 | task 5b: learn description for negative examples, output new examples |
14 | |
15 | Generates new examples for numbers, e.g.: |
16 | 351 is a number |
17 | 73 is a number |
18 | */ |
19 | |
20 | cprint { |
21 | S input = autoUnindent_mls([[ |
22 | 123 is a number |
23 | blubb is not a number |
24 | 5$"%! is not a number |
25 | 45 is a number |
26 | ]]); |
27 | S output; |
28 | |
29 | transient L<F1> someFunctions = ll( |
30 | f<Char, Bool> isDigit, |
31 | f<Char, Bool> isLetter, |
32 | f<Char, Bool> isLetterOrDigit, |
33 | f<Char, Bool> isSpace); |
34 | |
35 | visual jvsplit(jhsplit(dm_textAreaAsSection input(), dm_textAreaAsSection output()), super); |
36 | |
37 | start { dm_onFieldChangeAndNow input(r calc); } |
38 | |
39 | void calc { |
40 | // task 1: tokenize well (in this case, ignore quotes) |
41 | |
42 | ITokenizer tokenizer = lambda1 splitAtSpaceTokenizer; |
43 | LLS tokenizedInput = map(s -> simpleSpacesAndTrim(tokenizer.tokenize(s)), tlft(input)); |
44 | pnlStruct(+tokenizedInput); |
45 | |
46 | // task 2: discover patterns "* is a number" and "* is not a number" |
47 | |
48 | Set<S> patterns = asSet(findOneArgumentPatterns(tokenizedInput)); |
49 | assertEquals(2, l(patterns)); |
50 | print(+patterns); |
51 | |
52 | // task 3: make set of positive/negative example strings for argument |
53 | |
54 | Map<S, LS> examples = mapToValues_linkedHashMap(patterns, pat -> |
55 | mapNotNulls(tok -> firstMatch(getFlexMatchIC(pat, tok)), tokenizedInput)); |
56 | print(+examples); |
57 | |
58 | // task 4: make theories |
59 | |
60 | replace Xmp with L<Char>. |
61 | |
62 | Map<S, L<Xmp>> examplesAsLists = mapValues(l -> lambdaMap characters(l), examples); |
63 | print(examplesAsLists); |
64 | //examplesAsLists = reverseKeys(examplesAsLists); // test reversing to see if theory gets reversed |
65 | L<Xmp> posExamples = firstValue(examplesAsLists), negExamples = secondValue(examplesAsLists); |
66 | |
67 | // for functions that are applicable to the elements of the examples: |
68 | new LinkedHashSet<ITheoryOn<Xmp>> theories; |
69 | for (O function : concatAsOrderedSet(lambdaMap plusNegation(someFunctions))) { |
70 | //if (!functionCallableOnAll_nonSynthetic(function, concatLists(values(examplesAsLists)))) continue; |
71 | print("Testing function " + function); |
72 | theories.add(new AllElementsSatisfy(function)); |
73 | theories.add(new AnyElementSatisfies(function)); |
74 | } |
75 | |
76 | theories.addAll(concatLists(lambdaMap theoryPlusInverse(theories))); |
77 | pnl(theories); |
78 | |
79 | // test theories |
80 | Map<ITheoryOn<Xmp>, PosNeg<Xmp>> theoryResults = mapToValues(theories, theory -> |
81 | PosNeg(mapToValues(trueFalseMap(posExamples, negExamples), (example, x) -> |
82 | theory.check(example) == x))); |
83 | pnl(theoryResults); |
84 | |
85 | // make new examples |
86 | |
87 | replace Theory with ITheoryOn<Xmp>. |
88 | |
89 | new LS newStatements; |
90 | |
91 | L<Theory> perfectTheories = sortByMetaTransformerStructureComplexity(keysWhereValue(theoryResults, pn -> pn.perfect())); |
92 | print(+perfectTheories); |
93 | newStatements.addAll(makeExamplesFor(perfectTheories, posExamples, firstKey(examplesAsLists))); |
94 | |
95 | // perfect theories for negative examples |
96 | L<Theory> antiPerfectTheories = sortByMetaTransformerStructureComplexity(keysWhereValue(theoryResults, pn -> pn.antiPerfect())); |
97 | print(+antiPerfectTheories); |
98 | newStatements.addAll(makeExamplesFor(antiPerfectTheories, negExamples, secondKey(examplesAsLists))); |
99 | |
100 | setField(output := lines(newStatements)); |
101 | } |
102 | |
103 | LS makeExamplesFor(L<ITheoryOn<L<Char>>> theories, Cl<L<Char>> examples, S pattern) { |
104 | new LS out; |
105 | |
106 | IntRange sizeRange = sizeRangeOfCollections(examples); |
107 | print(+sizeRange); |
108 | |
109 | for (ITheoryOn<L<Char>> t : theories) |
110 | if (t cast AllElementsSatisfy) { |
111 | IF0<Char> generator = predicateToGenerator(t.pred); |
112 | if (generator == null) continue with print("No generator for " + t.pred); |
113 | repeat 3 { |
114 | S example = charactersToString(repF(random(sizeRange), generator)); |
115 | print("> ", addAndReturn(out, format_noQuote(pattern, example))); |
116 | } |
117 | break; |
118 | } |
119 | |
120 | ret out; |
121 | } |
122 | |
123 | IF0<Char> predicateToGenerator(O f) { |
124 | ret () -> (Char) random(filter(f, printableASCIICharsList())); |
125 | } |
126 | } |
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1028694 |
Snippet name: | Syntactic Learning [previously: What is a number Spike, OK] |
Eternal ID of this version: | #1028694/77 |
Text MD5: | 92ec7d851652b6ec517e0039459762d9 |
Transpilation MD5: | 6cb4095093efdc9b013899d5f4ab6fb4 |
Author: | stefan |
Category: | javax |
Type: | JavaX source code (Dynamic Module) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-07-06 16:38:22 |
Source code size: | 4533 bytes / 126 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 267 / 1347 |
Version history: | 76 change(s) |
Referenced in: | [show references] |