ProbabilisticParser1 (before shortening) [1027937]

sclass ProbabilisticParser1 {
  transient TreeSetWithDuplicates<State> doneStates = new(byProbability());
  transient TreeSetWithDuplicates<State> states = new(byProbability());
  transient TreeSetWithDuplicates<State> steppableStates = new(byProbability());
  transient TreeSetWithDuplicates<State> droppedStates = new(byProbability());
  transient int stateCount;

  double cutoffPercentage = 50;
  
  Comparator<State> byProbability() { ret (a, b) -> cmp(b.probability, a.probability); }
  
  abstract class Action {
    abstract void run(State state);
    
    State prepareClone(State state) {
      new State s;
      copyFields(state, s, 'tok, 'iNextToken, 'probability, 'matches);
      s.prev = state;
      s.remainingRule = optCast BasicLogicRule(state.remainingRule.rhs);
      ret s;
    }
  }
  
  abstract class Consumer extends Action {
    // override this or the next method
    double calcProbabilityForMatchedText(S s) { throw overrideMe(); }
    // tok is CNC starting & ending with code token
    double calcProbabilityForMatchedTokens(LS tok) {
 ret calcProbabilityForMatchedText(join(tok));
 }
    
    void run(State state) {
      int maxTokensToConsume = state.remainingTokens();
      
      for (int n = 0; n <= maxTokensToConsume; n++) {
        State s = prepareClone(state);
        s.iNextToken += n*2;
        LS tok = subList(state.tok, state.iNextToken, s.iNextToken-1);
        s.probability = multiplyPercentages(s.probability, calcProbabilityForMatchedTokens(tok));
        s.matches = revChainPlus(s.matches, pair(state, tok));
        addState(s);
      }
    }
  }
  
  noeq record ConsumeToken(S token) extends Consumer {
    double calcProbabilityForMatchedText(S s) {
      ret empty(s) ? 50 : levenSimilarityIntIC(s, token);
    }
  }
  
  noeq record Any extends Consumer {
    double calcProbabilityForMatchedText(S s) {
      ret 90;
    }
  }
  
  noeq record Filler extends Consumer {
    double calcProbabilityForMatchedTokens(LS tok) {
      ret 100-countCodeTokensInReversedCNC(tok)*10;
    }
  }
  
  noeq record EndOfInput extends Action {
    void run(State state) {
      State s = prepareClone(state);
      if (!state.endOfInput()) s.probability /= 2;
      s.matches = revChainPlus(s.matches, pair(state, subList(s.tok, s.iNextToken)));
      addState(s);
    }
  }

  class State {
    int number = ++stateCount;
    State prev;
    double probability = 100;
    LS tok; // CNC
    int iNextToken = 1;
    BasicLogicRule remainingRule;
    ReverseChain<Pair<State, LS>> matches; // values: reversed CNC

    toString {
      ret toStringWithFields(this, "number", "probability", "iNextToken") + stringIf(done(), " (done)" + " matches: " + matchesFromAction());
    }

    LPair<Action, LS> matchesFromAction() { ret mapPairsA(s -> s.action(), matches); }
    
    bool done() { ret remainingRule == null; }
    
    bool endOfInput() { ret iNextToken >= l(tok); }
    int remainingTokens() { ret (l(tok)-iNextToken)/2+1; }
    S nextToken() { ret get(tok, iNextToken); }

    Action action() { ret remainingRule == null ? null : (Action) remainingRule.lhs; }
    
    void step { if (!done()) action().run(this); }
  }
  
  void addState(State s) {
    if (s.probability < cutoffPercentage) ret with droppedStates.add(s);
    addToCollections(s, states, steppableStates);
    if (s.done()) doneStates.add(s);
  }
  
  bool stepFirstUnstepped() {
    State s = popFirst(steppableStates), ret false if null;
    ret true with s.step();
  }

  BasicLogicRule patternToRule(S pattern) {
    ret curryLHS(BasicLogicRule(
      makeAnd(listPlus(
        mapWithIndex(javaTok(pattern), (i, t) -> even(i)
          ? new Filler
          : eq(t, "*") ? new Any : new ConsumeToken(t)),
        new EndOfInput)),
      formatFrag("parsed")));
  }

  void reset {
    clearAll(doneStates, states, steppableStates, droppedStates);
    stateCount = 0;
  }

  // pattern e.g.: "Das * hat *.";
  void parse(S pattern, S input) {
    reset();
    BasicLogicRule rule = patternToRule(pattern);
    print(rule);
    
    new State state;
    state.tok = javaTok(input);
    state.remainingRule = rule;
    addState(state);
    while ping (stepFirstUnstepped()) {}
  }

  L<State> bestStates(int n) {
    ret takeFirst(n, doneStates);
  }

  Matches stateToMatches(State state) {
    if (state == null) null;
    new LS out;
    for (Pair<Action, LS> p : state.matchesFromAction())
      if (p.a instanceof Any)
        out.add(join(p.b));
    ret matches(out);
  }
  
  Matches bestMatches() { ret stateToMatches(first(doneStates)); }
}

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

Snippet ID:	#1027937
Snippet name:	ProbabilisticParser1 (before shortening)
Eternal ID of this version:	#1027937/10
Text MD5:	a13434d08ef0fa5a9650a1bf6d46537d
Transpilation MD5:	be801c469d5bed4d2a544f28694105a4
Author:	stefan
Category:	javax
Type:	JavaX fragment (include)
Public (visible to everyone):	Yes
Archived (hidden from active list):	No
Created/modified:	2020-04-20 12:02:45
Source code size:	4731 bytes / 152 lines
Pitched / IR pitched:	No / No
Views / Downloads:	263 / 390
Version history:	9 change(s)
Referenced in:	[show references]

< > BotCompany Repo | #1027937 // ProbabilisticParser1 (before shortening)

JavaX fragment (include) [tags: use-pretranspiled]