Syntactic Learning [previously: What is a number Spike, OK] [1028694]

/*
Input:
  123 is a number
  blubb is not a number
  5$"%! is not a number
  45 is a number

task 1: tokenize well (in this case, ignore quotes)
task 2: discover patterns "* is a number" and "* is not a number"
task 3: call pattern 1 positive, pattern 2 negative
task 4: make set of positive/negative example strings for argument
task 5a: learn description for positive examples, output new examples
task 5b: learn description for negative examples, output new examples

Generates new examples for numbers, e.g.:
  351 is a number
  73 is a number
*/

cprint {
  S input = autoUnindent_mls([[
    123 is a number
    blubb is not a number
    5$"%! is not a number
    45 is a number
  ]]);
  S output;

  transient L<F1> someFunctions = ll(
    f<Char, Bool> isDigit,
    f<Char, Bool> isLetter,
    f<Char, Bool> isLetterOrDigit,
    f<Char, Bool> isSpace);

  visual jvsplit(jhsplit(dm_textAreaAsSection input(), dm_textAreaAsSection output()), super);
  
  start { dm_onFieldChangeAndNow input(r calc); }

  void calc {
    // task 1: tokenize well (in this case, ignore quotes)
    
    ITokenizer tokenizer = lambda1 splitAtSpaceTokenizer;
    LLS tokenizedInput = map(s -> simpleSpacesAndTrim(tokenizer.tokenize(s)), tlft(input));
    pnlStruct(+tokenizedInput);
    
    // task 2: discover patterns "* is a number" and "* is not a number"

    Set<S> patterns = asSet(findOneArgumentPatterns(tokenizedInput));
    assertEquals(2, l(patterns));
    print(+patterns);
    
    // task 3: make set of positive/negative example strings for argument
    
    Map<S, LS> examples = mapToValues_linkedHashMap(patterns, pat ->
      mapNotNulls(tok -> firstMatch(getFlexMatchIC(pat, tok)), tokenizedInput));
    print(+examples);
    
    // task 4: make theories

    replace Xmp with L<Char>.
    
    Map<S, L<Xmp>> examplesAsLists = mapValues(l -> lambdaMap characters(l), examples);
    print(examplesAsLists);
    //examplesAsLists = reverseKeys(examplesAsLists); // test reversing to see if theory gets reversed
    L<Xmp> posExamples = firstValue(examplesAsLists), negExamples = secondValue(examplesAsLists);

    // for functions that are applicable to the elements of the examples:
    new LinkedHashSet<ITheoryOn<Xmp>> theories;
    for (O function : concatAsOrderedSet(lambdaMap plusNegation(someFunctions))) {
      //if (!functionCallableOnAll_nonSynthetic(function, concatLists(values(examplesAsLists)))) continue;
      print("Testing function " + function);
      theories.add(new AllElementsSatisfy(function));
      theories.add(new AnyElementSatisfies(function));
    }

    theories.addAll(concatLists(lambdaMap theoryPlusInverse(theories)));
    pnl(theories);

    // test theories
    Map<ITheoryOn<Xmp>, PosNeg<Xmp>> theoryResults = mapToValues(theories, theory -> 
      PosNeg(mapToValues(trueFalseMap(posExamples, negExamples), (example, x) ->
        theory.check(example) == x)));
    pnl(theoryResults);

    // make new examples

    replace Theory with ITheoryOn<Xmp>.

    new LS newStatements;

    L<Theory> perfectTheories = sortByMetaTransformerStructureComplexity(keysWhereValue(theoryResults, pn -> pn.perfect()));
    print(+perfectTheories);
    newStatements.addAll(makeExamplesFor(perfectTheories, posExamples, firstKey(examplesAsLists)));

    // perfect theories for negative examples
    L<Theory> antiPerfectTheories = sortByMetaTransformerStructureComplexity(keysWhereValue(theoryResults, pn -> pn.antiPerfect()));
    print(+antiPerfectTheories);
    newStatements.addAll(makeExamplesFor(antiPerfectTheories, negExamples, secondKey(examplesAsLists)));

    setField(output := lines(newStatements));
  }

  LS makeExamplesFor(L<ITheoryOn<L<Char>>> theories, Cl<L<Char>> examples, S pattern) {
    new LS out;
    
    IntRange sizeRange = sizeRangeOfCollections(examples);
    print(+sizeRange);

    for (ITheoryOn<L<Char>> t : theories)
      if (t cast AllElementsSatisfy) {
        IF0<Char> generator = predicateToGenerator(t.pred);
        if (generator == null) continue with print("No generator for " + t.pred);
        repeat 3 {
          S example = charactersToString(repF(random(sizeRange), generator));
          print("> ", addAndReturn(out, format_noQuote(pattern, example)));
        }
        break;
      }

    ret out;
  }

  IF0<Char> predicateToGenerator(O f) {
    ret () -> (Char) random(filter(f, printableASCIICharsList()));
  }
}

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

1	/*
2	Input:
3	123 is a number
4	blubb is not a number
5	5$"%! is not a number
6	45 is a number
7
8	task 1: tokenize well (in this case, ignore quotes)
9	task 2: discover patterns "* is a number" and "* is not a number"
10	task 3: call pattern 1 positive, pattern 2 negative
11	task 4: make set of positive/negative example strings for argument
12	task 5a: learn description for positive examples, output new examples
13	task 5b: learn description for negative examples, output new examples
14
15	Generates new examples for numbers, e.g.:
16	351 is a number
17	73 is a number
18	*/
19
20	cprint {
21	S input = autoUnindent_mls([[
22	123 is a number
23	blubb is not a number
24	5$"%! is not a number
25	45 is a number
26	]]);
27	S output;
28
29	transient L<F1> someFunctions = ll(
30	f<Char, Bool> isDigit,
31	f<Char, Bool> isLetter,
32	f<Char, Bool> isLetterOrDigit,
33	f<Char, Bool> isSpace);
34
35	visual jvsplit(jhsplit(dm_textAreaAsSection input(), dm_textAreaAsSection output()), super);
36
37	start { dm_onFieldChangeAndNow input(r calc); }
38
39	void calc {
40	// task 1: tokenize well (in this case, ignore quotes)
41
42	ITokenizer tokenizer = lambda1 splitAtSpaceTokenizer;
43	LLS tokenizedInput = map(s -> simpleSpacesAndTrim(tokenizer.tokenize(s)), tlft(input));
44	pnlStruct(+tokenizedInput);
45
46	// task 2: discover patterns "* is a number" and "* is not a number"
47
48	Set<S> patterns = asSet(findOneArgumentPatterns(tokenizedInput));
49	assertEquals(2, l(patterns));
50	print(+patterns);
51
52	// task 3: make set of positive/negative example strings for argument
53
54	Map<S, LS> examples = mapToValues_linkedHashMap(patterns, pat ->
55	mapNotNulls(tok -> firstMatch(getFlexMatchIC(pat, tok)), tokenizedInput));
56	print(+examples);
57
58	// task 4: make theories
59
60	replace Xmp with L<Char>.
61
62	Map<S, L<Xmp>> examplesAsLists = mapValues(l -> lambdaMap characters(l), examples);
63	print(examplesAsLists);
64	//examplesAsLists = reverseKeys(examplesAsLists); // test reversing to see if theory gets reversed
65	L<Xmp> posExamples = firstValue(examplesAsLists), negExamples = secondValue(examplesAsLists);
66
67	// for functions that are applicable to the elements of the examples:
68	new LinkedHashSet<ITheoryOn<Xmp>> theories;
69	for (O function : concatAsOrderedSet(lambdaMap plusNegation(someFunctions))) {
70	//if (!functionCallableOnAll_nonSynthetic(function, concatLists(values(examplesAsLists)))) continue;
71	print("Testing function " + function);
72	theories.add(new AllElementsSatisfy(function));
73	theories.add(new AnyElementSatisfies(function));
74	}
75
76	theories.addAll(concatLists(lambdaMap theoryPlusInverse(theories)));
77	pnl(theories);
78
79	// test theories
80	Map<ITheoryOn<Xmp>, PosNeg<Xmp>> theoryResults = mapToValues(theories, theory ->
81	PosNeg(mapToValues(trueFalseMap(posExamples, negExamples), (example, x) ->
82	theory.check(example) == x)));
83	pnl(theoryResults);
84
85	// make new examples
86
87	replace Theory with ITheoryOn<Xmp>.
88
89	new LS newStatements;
90
91	L<Theory> perfectTheories = sortByMetaTransformerStructureComplexity(keysWhereValue(theoryResults, pn -> pn.perfect()));
92	print(+perfectTheories);
93	newStatements.addAll(makeExamplesFor(perfectTheories, posExamples, firstKey(examplesAsLists)));
94
95	// perfect theories for negative examples
96	L<Theory> antiPerfectTheories = sortByMetaTransformerStructureComplexity(keysWhereValue(theoryResults, pn -> pn.antiPerfect()));
97	print(+antiPerfectTheories);
98	newStatements.addAll(makeExamplesFor(antiPerfectTheories, negExamples, secondKey(examplesAsLists)));
99
100	setField(output := lines(newStatements));
101	}
102
103	LS makeExamplesFor(L<ITheoryOn<L<Char>>> theories, Cl<L<Char>> examples, S pattern) {
104	new LS out;
105
106	IntRange sizeRange = sizeRangeOfCollections(examples);
107	print(+sizeRange);
108
109	for (ITheoryOn<L<Char>> t : theories)
110	if (t cast AllElementsSatisfy) {
111	IF0<Char> generator = predicateToGenerator(t.pred);
112	if (generator == null) continue with print("No generator for " + t.pred);
113	repeat 3 {
114	S example = charactersToString(repF(random(sizeRange), generator));
115	print("> ", addAndReturn(out, format_noQuote(pattern, example)));
116	}
117	break;
118	}
119
120	ret out;
121	}
122
123	IF0<Char> predicateToGenerator(O f) {
124	ret () -> (Char) random(filter(f, printableASCIICharsList()));
125	}
126	}

Snippet ID:	#1028694
Snippet name:	Syntactic Learning [previously: What is a number Spike, OK]
Eternal ID of this version:	#1028694/77
Text MD5:	92ec7d851652b6ec517e0039459762d9
Transpilation MD5:	6cb4095093efdc9b013899d5f4ab6fb4
Author:	stefan
Category:	javax
Type:	JavaX source code (Dynamic Module)
Public (visible to everyone):	Yes
Archived (hidden from active list):	No
Created/modified:	2020-07-06 16:38:22
Source code size:	4533 bytes / 126 lines
Pitched / IR pitched:	No / No
Views / Downloads:	690 / 1800
Version history:	76 change(s)
Referenced in:	[show references]

< > BotCompany Repo | #1028694 // Syntactic Learning [previously: What is a number Spike, OK]

JavaX source code (Dynamic Module) [tags: use-pretranspiled] - run with: Stefan's OS