Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

218
LINES

< > BotCompany Repo | #1008653 // Learn parsing sentences 2 [dev.]

JavaX source code [tags: use-pretranspiled] - run with: x30.jar

Libraryless. Click here for Pure Java version (8065L/53K/181K).

1  
!7
2  
3  
static Guesser best;
4  
static double bestScore;
5  
6  
concept Sentence {
7  
  S text;
8  
  S action;
9  
}
10  
11  
sclass Example {
12  
  L<S> tok;
13  
  int start, end;
14  
  
15  
  *() {}
16  
  *(L<S> *tok, IntRange subjectTokens) {
17  
    start = subjectTokens.start;
18  
    end = subjectTokens.end;
19  
  }
20  
  
21  
  toString {
22  
    ret quote(joinWithSpaces(tok)) + " => " + joinWithSpaces(subList(tok, start, end));
23  
  }
24  
}
25  
26  
abstract sclass Guesser {
27  
  abstract IntRange getSubjectTokens(L<S> tok);
28  
  void learn(L<Example> material) {
29  
    for (Example e : material)
30  
      learn(e);
31  
  }
32  
  void learn(Example e) {}
33  
}
34  
35  
Guesser > G1 { // just returns first word
36  
  IntRange getSubjectTokens(L<S> tok) {
37  
    ret new IntRange(0, 1);
38  
  }
39  
}
40  
41  
Guesser > G2 { // skips first words
42  
  new StringTree1 skipTree;
43  
  
44  
  IntRange getSubjectTokens(L<S> tok) {
45  
    int n = walkStringTreeToLeaf(skipTree, allToLower(tok));
46  
    if (n >= 0) ret new IntRange(n, n+1);
47  
    null;
48  
  }
49  
  
50  
  void learn(Example e) {
51  
    if (e.start > 0)
52  
      addToStringTree(skipTree, allToLower(takeFirst(e.tok, e.start)));
53  
  }
54  
}
55  
56  
Guesser > G3 { // continues expanding subject depending on words
57  
  new StringTree1 continuationTree;
58  
  
59  
  IntRange getSubjectTokens(L<S> tok) {
60  
    int n = walkStringTreeToLeaf(continuationTree, allToLower(tok));
61  
    if (n >= 0) ret new IntRange(0, n+1);
62  
    null;
63  
  }
64  
  
65  
  void learn(Example e) {
66  
    L<S> l = allToLower(subList(e.tok, e.start, e.end-1));
67  
    if (nempty(l))
68  
      addToStringTree(continuationTree, l);
69  
  }
70  
}
71  
72  
Guesser > SkipFirst {
73  
  Guesser a, b;
74  
  
75  
  *() {}
76  
  *(Guesser *a, Guesser *b) {}
77  
  
78  
  IntRange getSubjectTokens(L<S> tok) {
79  
    IntRange r = a.getSubjectTokens(tok);
80  
    int skip = r == null ? 0 : r.start;
81  
    ret shiftIntRange(skip, b.getSubjectTokens(dropFirst(skip, tok)));
82  
  }
83  
  
84  
  void learn(L<Example> material) {
85  
    a.learn(material);
86  
    b.learn(material);
87  
  }
88  
}
89  
90  
Guesser > Chained {
91  
  new L<Guesser> l;
92  
  
93  
  *() {}
94  
  *(Guesser... guessers) { addAll(l, guessers); }
95  
  
96  
  IntRange getSubjectTokens(L<S> tok) {
97  
    for (Guesser g : l) {
98  
      IntRange result = cast pcall(g, "getSubjectTokens", tok);
99  
      if (result != null) ret result;
100  
    }
101  
    null;
102  
  }
103  
}
104  
105  
Guesser > GCheater {
106  
  new Map<S, IntRange> map;
107  
  
108  
  IntRange getSubjectTokens(L<S> tok) {
109  
    ret map.get(joinWithSpace(tok));
110  
  }
111  
  
112  
  void learn(Example e) {
113  
    map.put(joinWithSpace(e.tok), intRange(e.start, e.end));
114  
  }
115  
}
116  
117  
p {
118  
  loadConceptsFrom(#1008607);
119  
  L<Example> material = learningMaterial();
120  
  pnlStruct(material);
121  
  
122  
  G1 g1;
123  
  G2 g2;
124  
  G3 g3;
125  
  Chained chained;
126  
  
127  
  checkGuesser(material, g1 = new G1);
128  
  set printSuccesses;
129  
  checkGuesserAfterFullLearn(material, g2 = new G2);
130  
  checkGuesserAfterFullLearn(material, g3 = new G3);
131  
  checkGuesserAfterFullLearn(material, new GCheater);
132  
  checkGuesserAfterPartialLearn(material, new GCheater, 50);
133  
  checkGuesser(material, chained = new Chained(g1, g2, g3));
134  
  printStruct(g2);
135  
  printUnrolledStringTree(g2.skipTree);
136  
  print();
137  
  printUnrolledStringTree(g3.continuationTree);
138  
  //printStruct(bestLearner(material, allNew(G1, G2, G3, GCheater), 50, 3, false));
139  
  
140  
  Pair<Guesser, Double> p = bestLearner(material, 
141  
    listPlus(allNew(G1, G2, G3, GCheater),
142  
    new Chained(new G2, new G1),
143  
    new SkipFirst(new G2, new Chained(new G3, new G1))),
144  
    50, 3, true);
145  
  printStruct(reversePair(p));
146  
  best = p.a;
147  
  bestScore = p.b;
148  
}
149  
150  
sbool printDetails, printSuccesses;
151  
152  
static double checkGuesser(L<Example> testMaterial, Guesser g) {
153  
  print();
154  
  int score = 0, n = 0;
155  
  for (Example e : testMaterial) {
156  
    IntRange r = cast pcall(g, "getSubjectTokens", e.tok);
157  
    bool ok = eq(IntRange(e.start, e.end), r);
158  
    if (ok) ++score;
159  
    ++n;
160  
    if (printDetails || ok && printSuccesses)
161  
      if (ok)
162  
        print("OK " + e);
163  
      else
164  
        print("FAIL " + (r == null ? "-" : joinWithSpaces(subList(e.tok, r.start, r.end))) + " for " + e);
165  
  }
166  
  printScore(shortClassName(g), score, n);
167  
  ret ratioToPercent(score, n);
168  
}
169  
170  
static double checkGuesserAfterFullLearn(L<Example> testMaterial, Guesser g) {
171  
  g.learn(testMaterial);
172  
  ret checkGuesser(testMaterial, g);
173  
}
174  
175  
static double checkGuesserAfterPartialLearn(L<Example> testMaterial, Guesser g, double percentToLearn) {
176  
  g.learn(getFirstPercent(testMaterial, percentToLearn));
177  
  ret checkGuesser(testMaterial, g);
178  
}
179  
180  
static double checkGuesserAfterRandomizedPartialLearn(L<Example> testMaterial, Guesser g, double percentToLearn, bool hardMode) {
181  
  Pair<L<Example>> p = getRandomPercent2(testMaterial, percentToLearn);
182  
  g.learn(p.a);
183  
  ret checkGuesser(hardMode ? p.b : testMaterial, g);
184  
}
185  
186  
// best learner with randomized x% training material
187  
// returns guesser, percentage solved
188  
// hardMode = only count scores on untrained examples
189  
static Pair<Guesser, Double> bestLearner(final L<Example> material, L<Guesser> guessers, final double percent, int repetitions, final bool hardMode) {
190  
  new Best<Guesser> best;
191  
  for (final Guesser g : guessers)
192  
    best.put(g, repeatAndAdd_double(repetitions, func {
193  
      checkGuesserAfterRandomizedPartialLearn(material, cloneObject(g), percent, hardMode)
194  
    })/repetitions);
195  
  ret best.pair();
196  
}
197  
198  
static L<Example> learningMaterial() {
199  
  L<Example> out = new L;
200  
  for (Sentence s) {
201  
    if (s.action == null) continue;
202  
    IntRange r = ai_parseSubjectAction(s.action);
203  
    if (r != null) {
204  
      L<S> tok = nlTok5(s.text);
205  
      r = charRangeToTokenRange(tok, r);
206  
      r = IntRange((r.start | 1)/2, r.end/2);
207  
      tok = codeTokens(tok);
208  
      out.add(Example(tok, r));
209  
    }
210  
  }
211  
  ret out;
212  
}
213  
214  
// to be called from applications - works on character level
215  
static IntRange callGuesser(Guesser g, S sentence) {
216  
  L<S> tok = codeTokens(nlTok5(sentence));
217  
  ret tokenRangeToCharRange(tok, g.getSubjectTokens(tok));
218  
}

Author comment

Began life as a copy of #1008643

download  show line numbers  debug dex  old transpilations   

Travelled to 14 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, onxytkatvevr, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt

No comments. add comment

Snippet ID: #1008653
Snippet name: Learn parsing sentences 2 [dev.]
Eternal ID of this version: #1008653/44
Text MD5: 7ac4b1a486b207f189091e31bc7c5e3a
Transpilation MD5: 07529db319c149fb6d4e903db0698f9a
Author: stefan
Category: javax / a.i.
Type: JavaX source code
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2017-05-27 14:54:38
Source code size: 5926 bytes / 218 lines
Pitched / IR pitched: No / No
Views / Downloads: 596 / 1104
Version history: 43 change(s)
Referenced in: [show references]