Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

200
LINES

< > BotCompany Repo | #1008696 // Find Subject (map version): Learner 1 [dev.]

JavaX source code [tags: use-pretranspiled] - run with: x30.jar

Libraryless. Click here for Pure Java version (8182L/54K/184K).

1  
!7
2  
3  
static Guesser best;
4  
static double bestScore;
5  
6  
concept Sentence {
7  
  S text;
8  
  SS data;
9  
}
10  
11  
sclass Example {
12  
  L<S> tok;
13  
  int start, end;
14  
  
15  
  *() {}
16  
  *(L<S> *tok, IntRange subjectTokens) {
17  
    start = subjectTokens.start;
18  
    end = subjectTokens.end;
19  
  }
20  
  
21  
  toString {
22  
    ret quote(joinWithSpaces(tok)) + " => " + joinWithSpaces(subList(tok, start, end));
23  
  }
24  
}
25  
26  
abstract sclass GuesserBase {
27  
  void learn(L<Example> material) {
28  
    for (Example e : material)
29  
      learn(e);
30  
  }
31  
  void learn(Example e) {}
32  
}
33  
34  
abstract sclass Guesser extends GuesserBase {
35  
  abstract IntRange getSubjectTokens(L<S> tok);
36  
}
37  
38  
Guesser > GLengthOfSubject {
39  
  new MultiSet<S> pos; // words to end on
40  
  new MultiSet<S> neg; // words not to end on
41  
  
42  
  IntRange getSubjectTokens(L<S> tok) {
43  
    ret getSubjectTokens(tok, 0);
44  
  }
45  
  
46  
  IntRange getSubjectTokens(L<S> tok, int startAt) {
47  
    int i = startAt;
48  
    while (i < l(tok)) {
49  
      S t = lower(tok.get(i));
50  
      if (pos.get(t) <= neg.get(t)) // also stop if unknown word
51  
        break;
52  
      ++i;
53  
    }
54  
    ret intRange(startAt, min(l(tok), i+1));
55  
  }
56  
  
57  
  void learn(Example e) {
58  
    L<S> subjectTokens = allToLower(subList(e.tok, e.start, e.end));
59  
    for (S word : dropLast(subjectTokens))
60  
      pos.add(word);
61  
    addIfNotNull(neg, last(subjectTokens));
62  
  }
63  
}
64  
65  
Guesser > GSkip1 { // returns first word or second word
66  
  new MultiSet<S> pos; // words to skip
67  
  new MultiSet<S> neg; // words not to skip
68  
  
69  
  void learn(Example e) {
70  
    (e.start > 0 ? pos : neg).add(lower(first(e.tok));
71  
  }
72  
  
73  
  IntRange getSubjectTokens(L<S> tok) {
74  
    S t = lower(first(tok));
75  
    ret intRangeFromStartAndLength(pos.get(t) > neg.get(t) ? 1 : 0, 1);
76  
  }
77  
}
78  
79  
Guesser > GSkip2 { // can skip multiple words
80  
  new MultiSet<S> pos; // words to skip
81  
  new MultiSet<S> neg; // words not to skip
82  
  
83  
  void learn(Example e) {
84  
    (e.start > 0 ? pos : neg).add(lower(first(e.tok));
85  
  }
86  
  
87  
  IntRange getSubjectTokens(L<S> tok) {
88  
    int i = 0;
89  
    while (i < l(tok)) {
90  
      S t = lower(tok.get(i));
91  
      if (pos.get(t) <= neg.get(t)) // also stop if unknown word
92  
        break;
93  
      ++i;
94  
    }
95  
    ret intRangeFromStartAndLength(i, i+1);
96  
  }
97  
}
98  
99  
Guesser > GCombine {
100  
  Guesser a;
101  
  new GLengthOfSubject b;
102  
  
103  
  *() {}
104  
  *(Guesser *a) {}
105  
  
106  
  IntRange getSubjectTokens(L<S> tok) {
107  
    IntRange r = a.getSubjectTokens(tok);
108  
    int skip = r == null ? 0 : r.start;
109  
    ret b.getSubjectTokens(tok, skip);
110  
  }
111  
  
112  
  void learn(L<Example> material) {
113  
    a.learn(material);
114  
    b.learn(material);
115  
  }  
116  
}
117  
118  
p {
119  
  loadConceptsFrom(#1008692);
120  
  L<Example> material = learningMaterial();
121  
  //pnlStruct(material);
122  
  
123  
  // This yields the empty learner
124  
  Pair<Guesser, Double> p = bestLearner(material, 
125  
    //ll(new GSkip1),
126  
    ll(new GCombine(new GSkip1), new GCombine(new GSkip2)),
127  
    50, 3, true);
128  
    
129  
  // Now we train it with all data for in-program use
130  
  p.a.learn(material);
131  
  
132  
  // Print and store
133  
  print("Best learner: " + formatDouble(p.b, 1) + "% - " + struct(p.a));
134  
  best = p.a;
135  
  bestScore = p.b;
136  
}
137  
138  
sbool printDetails, printSuccesses;
139  
140  
static double checkGuesser(L<Example> testMaterial, Guesser g) {
141  
  print();
142  
  int score = 0, n = 0;
143  
  for (Example e : testMaterial) {
144  
    IntRange r = cast pcall(g, "getSubjectTokens", e.tok);
145  
    bool ok = eq(IntRange(e.start, e.end), r);
146  
    if (ok) ++score;
147  
    ++n;
148  
    if (printDetails || ok && printSuccesses)
149  
      if (ok)
150  
        print("OK " + e);
151  
      else
152  
        print("FAIL " + (r == null ? "-" : joinWithSpaces(subList(e.tok, r.start, r.end))) + " for " + e);
153  
  }
154  
  printScore(shortClassName(g), score, n);
155  
  ret ratioToPercent(score, n);
156  
}
157  
158  
static double checkGuesserAfterRandomizedPartialLearn(L<Example> testMaterial, Guesser g, double percentToLearn, bool hardMode) {
159  
  Pair<L<Example>> p = getRandomPercent2(testMaterial, percentToLearn);
160  
  g.learn(p.a);
161  
  ret checkGuesser(hardMode ? p.b : testMaterial, g);
162  
}
163  
164  
// best learner with randomized x% training material
165  
// returns guesser, percentage solved
166  
// hardMode = only count scores on untrained examples
167  
static Pair<Guesser, Double> bestLearner(final L<Example> material, L<? extends Guesser> guessers, final double percent, int repetitions, final bool hardMode) {
168  
  new Best<Guesser> best;
169  
  for (final Guesser g : guessers)
170  
    best.put(g, repeatAndAdd_double(repetitions, func {
171  
      checkGuesserAfterRandomizedPartialLearn(material, cloneObject(g), percent, hardMode)
172  
    })/repetitions);
173  
  ret best.pair();
174  
}
175  
176  
static L<Example> learningMaterial() {
177  
  L<Example> out = new L;
178  
  for (Sentence s) {
179  
    S action = s.data.get("subject");
180  
    if (action == null) continue;
181  
    IntRange r = ai_parseAction(action);
182  
    if (r != null) {
183  
      L<S> tok = nlTok5(s.text);
184  
      r = charRangeToTokenRange(tok, r);
185  
      r = tokenRangeToCodeTokens(r);
186  
      tok = codeTokens(tok);
187  
      out.add(Example(tok, r));
188  
    }
189  
  }
190  
  ret out;
191  
}
192  
193  
// to be called from applications - works on character level
194  
// modifies data
195  
static void callGuesser(Guesser g, S sentence, SS data) {
196  
  L<S> tok = nlTok5(sentence);
197  
  IntRange r = g.getSubjectTokens(codeTokens(tok));
198  
  if (r == null) ret;
199  
  data.put("subject", ai_renderAction(sentence, codeTokenRangeToChars(tok, r)));
200  
}

Author comment

Began life as a copy of #1008669

download  show line numbers  debug dex  old transpilations   

Travelled to 13 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt

No comments. add comment

Snippet ID: #1008696
Snippet name: Find Subject (map version): Learner 1 [dev.]
Eternal ID of this version: #1008696/10
Text MD5: 4642540bbd34ab5fdad468ff9ba185ec
Transpilation MD5: 9a8b3ac56722d34386d1ded2c58730d9
Author: stefan
Category: javax / a.i.
Type: JavaX source code
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2017-05-29 03:00:33
Source code size: 5390 bytes / 200 lines
Pitched / IR pitched: No / No
Views / Downloads: 508 / 926
Version history: 9 change(s)
Referenced in: [show references]