Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

218
LINES

< > BotCompany Repo | #1008653 // Learn parsing sentences 2 [dev.]

JavaX source code [tags: use-pretranspiled] - run with: x30.jar

Libraryless. Click here for Pure Java version (8065L/53K/181K).

!7

static Guesser best;
static double bestScore;

concept Sentence {
  S text;
  S action;
}

sclass Example {
  L<S> tok;
  int start, end;
  
  *() {}
  *(L<S> *tok, IntRange subjectTokens) {
    start = subjectTokens.start;
    end = subjectTokens.end;
  }
  
  toString {
    ret quote(joinWithSpaces(tok)) + " => " + joinWithSpaces(subList(tok, start, end));
  }
}

abstract sclass Guesser {
  abstract IntRange getSubjectTokens(L<S> tok);
  void learn(L<Example> material) {
    for (Example e : material)
      learn(e);
  }
  void learn(Example e) {}
}

Guesser > G1 { // just returns first word
  IntRange getSubjectTokens(L<S> tok) {
    ret new IntRange(0, 1);
  }
}

Guesser > G2 { // skips first words
  new StringTree1 skipTree;
  
  IntRange getSubjectTokens(L<S> tok) {
    int n = walkStringTreeToLeaf(skipTree, allToLower(tok));
    if (n >= 0) ret new IntRange(n, n+1);
    null;
  }
  
  void learn(Example e) {
    if (e.start > 0)
      addToStringTree(skipTree, allToLower(takeFirst(e.tok, e.start)));
  }
}

Guesser > G3 { // continues expanding subject depending on words
  new StringTree1 continuationTree;
  
  IntRange getSubjectTokens(L<S> tok) {
    int n = walkStringTreeToLeaf(continuationTree, allToLower(tok));
    if (n >= 0) ret new IntRange(0, n+1);
    null;
  }
  
  void learn(Example e) {
    L<S> l = allToLower(subList(e.tok, e.start, e.end-1));
    if (nempty(l))
      addToStringTree(continuationTree, l);
  }
}

Guesser > SkipFirst {
  Guesser a, b;
  
  *() {}
  *(Guesser *a, Guesser *b) {}
  
  IntRange getSubjectTokens(L<S> tok) {
    IntRange r = a.getSubjectTokens(tok);
    int skip = r == null ? 0 : r.start;
    ret shiftIntRange(skip, b.getSubjectTokens(dropFirst(skip, tok)));
  }
  
  void learn(L<Example> material) {
    a.learn(material);
    b.learn(material);
  }
}

Guesser > Chained {
  new L<Guesser> l;
  
  *() {}
  *(Guesser... guessers) { addAll(l, guessers); }
  
  IntRange getSubjectTokens(L<S> tok) {
    for (Guesser g : l) {
      IntRange result = cast pcall(g, "getSubjectTokens", tok);
      if (result != null) ret result;
    }
    null;
  }
}

Guesser > GCheater {
  new Map<S, IntRange> map;
  
  IntRange getSubjectTokens(L<S> tok) {
    ret map.get(joinWithSpace(tok));
  }
  
  void learn(Example e) {
    map.put(joinWithSpace(e.tok), intRange(e.start, e.end));
  }
}

p {
  loadConceptsFrom(#1008607);
  L<Example> material = learningMaterial();
  pnlStruct(material);
  
  G1 g1;
  G2 g2;
  G3 g3;
  Chained chained;
  
  checkGuesser(material, g1 = new G1);
  set printSuccesses;
  checkGuesserAfterFullLearn(material, g2 = new G2);
  checkGuesserAfterFullLearn(material, g3 = new G3);
  checkGuesserAfterFullLearn(material, new GCheater);
  checkGuesserAfterPartialLearn(material, new GCheater, 50);
  checkGuesser(material, chained = new Chained(g1, g2, g3));
  printStruct(g2);
  printUnrolledStringTree(g2.skipTree);
  print();
  printUnrolledStringTree(g3.continuationTree);
  //printStruct(bestLearner(material, allNew(G1, G2, G3, GCheater), 50, 3, false));
  
  Pair<Guesser, Double> p = bestLearner(material, 
    listPlus(allNew(G1, G2, G3, GCheater),
    new Chained(new G2, new G1),
    new SkipFirst(new G2, new Chained(new G3, new G1))),
    50, 3, true);
  printStruct(reversePair(p));
  best = p.a;
  bestScore = p.b;
}

sbool printDetails, printSuccesses;

static double checkGuesser(L<Example> testMaterial, Guesser g) {
  print();
  int score = 0, n = 0;
  for (Example e : testMaterial) {
    IntRange r = cast pcall(g, "getSubjectTokens", e.tok);
    bool ok = eq(IntRange(e.start, e.end), r);
    if (ok) ++score;
    ++n;
    if (printDetails || ok && printSuccesses)
      if (ok)
        print("OK " + e);
      else
        print("FAIL " + (r == null ? "-" : joinWithSpaces(subList(e.tok, r.start, r.end))) + " for " + e);
  }
  printScore(shortClassName(g), score, n);
  ret ratioToPercent(score, n);
}

static double checkGuesserAfterFullLearn(L<Example> testMaterial, Guesser g) {
  g.learn(testMaterial);
  ret checkGuesser(testMaterial, g);
}

static double checkGuesserAfterPartialLearn(L<Example> testMaterial, Guesser g, double percentToLearn) {
  g.learn(getFirstPercent(testMaterial, percentToLearn));
  ret checkGuesser(testMaterial, g);
}

static double checkGuesserAfterRandomizedPartialLearn(L<Example> testMaterial, Guesser g, double percentToLearn, bool hardMode) {
  Pair<L<Example>> p = getRandomPercent2(testMaterial, percentToLearn);
  g.learn(p.a);
  ret checkGuesser(hardMode ? p.b : testMaterial, g);
}

// best learner with randomized x% training material
// returns guesser, percentage solved
// hardMode = only count scores on untrained examples
static Pair<Guesser, Double> bestLearner(final L<Example> material, L<Guesser> guessers, final double percent, int repetitions, final bool hardMode) {
  new Best<Guesser> best;
  for (final Guesser g : guessers)
    best.put(g, repeatAndAdd_double(repetitions, func {
      checkGuesserAfterRandomizedPartialLearn(material, cloneObject(g), percent, hardMode)
    })/repetitions);
  ret best.pair();
}

static L<Example> learningMaterial() {
  L<Example> out = new L;
  for (Sentence s) {
    if (s.action == null) continue;
    IntRange r = ai_parseSubjectAction(s.action);
    if (r != null) {
      L<S> tok = nlTok5(s.text);
      r = charRangeToTokenRange(tok, r);
      r = IntRange((r.start | 1)/2, r.end/2);
      tok = codeTokens(tok);
      out.add(Example(tok, r));
    }
  }
  ret out;
}

// to be called from applications - works on character level
static IntRange callGuesser(Guesser g, S sentence) {
  L<S> tok = codeTokens(nlTok5(sentence));
  ret tokenRangeToCharRange(tok, g.getSubjectTokens(tok));
}

Author comment

Began life as a copy of #1008643

download  show line numbers  debug dex  old transpilations   

Travelled to 14 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, onxytkatvevr, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt

No comments. add comment

Snippet ID: #1008653
Snippet name: Learn parsing sentences 2 [dev.]
Eternal ID of this version: #1008653/44
Text MD5: 7ac4b1a486b207f189091e31bc7c5e3a
Transpilation MD5: 07529db319c149fb6d4e903db0698f9a
Author: stefan
Category: javax / a.i.
Type: JavaX source code
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2017-05-27 14:54:38
Source code size: 5926 bytes / 218 lines
Pitched / IR pitched: No / No
Views / Downloads: 523 / 1000
Version history: 43 change(s)
Referenced in: [show references]