Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

183
LINES

< > BotCompany Repo | #1027944 // ProbabilisticParser1 (shortened, LIVE)

JavaX fragment (include) [tags: use-pretranspiled]

Libraryless. Click here for Pure Java version (4977L/32K).

// A top-down left-to-right parser using probabilistic states
sclass ProbabilisticParser1 {
  new ProbabilisticMachine<State> pm;
  bool verbose;

  abstract class Action {
    S grammarClass;
    
    abstract void run(State state);
    
    //!include #1027987 // setExtraField, getExtraField
    
    S grammarClass() { ret grammarClass /*(S) getExtraField(ef_grammarClass)*/; }
    selfType setGrammarClass(S grammarClass) { this.grammarClass = grammarClass; this; }
    
    // extra field names
    //static final S ef_grammarClass = "grammarClass";
  }
  
  abstract class Consumer extends Action {
    // override this or the next method
    double calcProbabilityForMatchedText(S s) { throw overrideMe(); }
    // tok is CNC starting & ending with code token
    double calcProbabilityForMatchedTokens(LS tok) { ret calcProbabilityForMatchedText(join(tok)); }
    
    int minTokensToConsume = 0;

    @Override
    void run(State state) {
      int maxTokensToConsume = state.remainingTokens();
      
      if (verbose) print(this + ": maxTokensToConsume= " + maxTokensToConsume);
      
      for (int n = minTokensToConsume; n <= maxTokensToConsume; n++) {
        State s = state.prepareClone();
        s.iNextToken += n*2;
        LS tok = subList(state.tok, state.iNextToken, s.iNextToken-1);
        s.probability = multiplyPercentages(s.probability, calcProbabilityForMatchedTokens(tok));
        s.matches = revChainPlus(s.matches, pair(state, tok));
        pm.addState(s);
      }
    }
  }
  
  noeq record ConsumeOneOfTokens(Set<S> tokens) extends Consumer {
    { tokens = asCISet(tokens); }
    *(S... tokens) { this.tokens = litciset(tokens); }
    
    double calcProbabilityForMatchedText(S s) {
      double p;
      if (tokens.contains(s)) p = empty(s) ? 90 : 100;
      else if (empty(s)) p = 50;
      else p = levenSimilarityIntIC_multi(s, tokens);
      ifdef ConsumeOneOfTokens_debug
      print("ConsumeOneOfTokens: " + p + " for " + s + " [" + tokens + "]");
      endifdef
      ret p;
    }
  }
  
  noeq record ConsumeToken(S token) extends Consumer {
    double emptyProbability = 50;
    
    double calcProbabilityForMatchedText(S s) {
      ret empty(s) ? emptyProbability : levenSimilarityIntIC(s, token);
    }
  }
  
  noeq record Any extends Consumer {
    *(S grammarClass) { setGrammarClass(grammarClass); }

    double calcProbabilityForMatchedText(S s) {
      ret 90;
    }
    
    toString { ret joinNemptiesWithSpace("Any", grammarClass); }
  }
  
  noeq record Filler extends Consumer {
    double calcProbabilityForMatchedTokens(LS tok) {
      ret 100-countCodeTokensInReversedCNC(tok)*10;
    }
  }
  
  noeq record EndOfInput extends Action {
    void run(State state) {
      State s = state.prepareClone();
      if (!state.endOfInput()) s.probability /= 2;
      s.matches = revChainPlus(s.matches, pair(state, subList(s.tok, s.iNextToken)));
      pm.addState(s);
    }
  }

  class State extends ProbabilisticMachine.State {
    LS tok; // CNC
    int iNextToken = 1;
    ReverseChain<Pair<State, LS>> matches; // values: reversed CNC
    O userObject; // copied around from state to state, e.g. reference to production

    toString {
      ret super.toString() + " iNextToken=\*iNextToken*/, matches: " + matchesFromAction();
    }
    
    ProbabilisticParser1 parser() { ret ProbabilisticParser1.this; }

    LPair<Action, LS> matchesFromAction() { ret mapPairsA(s -> s.action(), matches); }
    
    Action action() { ret remainingRule == null ? null : (Action) remainingRule.lhs; }
    
    bool endOfInput() { ret iNextToken >= l(tok); }
    int remainingTokens() { ret (l(tok)-iNextToken)/2+1; }
    S nextToken() { ret get(tok, iNextToken); }

    State emptyClone() { ret new State; }
    
    State prepareClone() {
      ret copyFields(this, (State) super.prepareClone(), 'tok, 'iNextToken, 'matches, 'userObject);
    }
    void runAction(O action) {
      assertSame(machine, pm);
      if (verbose) print("Running action: " + action + ", machine: " + machine);
      ((Action) action).run(this);
      if (verbose) print("Ran action: " + className(action));
    }
  }
  
  BasicLogicRule patternToRule(S pattern) {
    ret ruleFromActions(
      listPlus(
        mapWithIndex(javaTok(pattern), (i, t) -> even(i)
          ? new Filler
          : eq(t, "*") ? new Any : new ConsumeToken(t)),
        new EndOfInput));
  }
  
  BasicLogicRule ruleFromActions(Action... actions) {
    ret ruleFromActions(asList(actions));
  }
  
  BasicLogicRule ruleFromActions(L<Action> actions) {
    ret BasicLogicRule(makeAnd(actions), formatFrag("parsed"));
  }

  // pattern e.g.: "Das * hat *.";
  void parse(S pattern, S input) {
    pm.reset();
    addState(javaTok(input), patternToRule(pattern));
    pm.think();
  }
  
  void parse(BasicLogicRule rule, S input) {
    pm.reset();
    addState(javaTok(input), rule);
    pm.think();
  }
  
  State addState(LS tok, BasicLogicRule rule) {
    new State state;
    state.tok = tok;
    state.remainingRule = curryLHS(rule);
    pm.addState(state);
    ret state;
  }

  Matches stateToMatches(State state) {
    ret stateToMatches(state, null);
  }
  
  Matches stateToMatches(State state, IPred<Action> actionsToCount) {
    if (state == null) null;
    new LS out;
    for (Pair<Action, LS> p : state.matchesFromAction())
      if (actionsToCount != null ? actionsToCount.get(p.a)
        : p.a instanceof Any || nempty(p.a.grammarClass()))
        out.add(join(p.b));
    ret matches(out);
  }
  
  Matches bestMatches() { ret stateToMatches(bestDoneState()); }
  State bestDoneState() { ret first(pm.doneStates); }
  
  void think { pm.think(); }
}

Author comment

Began life as a copy of #1027937

download  show line numbers  debug dex  old transpilations   

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

No comments. add comment

Snippet ID: #1027944
Snippet name: ProbabilisticParser1 (shortened, LIVE)
Eternal ID of this version: #1027944/57
Text MD5: b7a199bc89b5c1c41101d4f8f9eae039
Transpilation MD5: fba0efcb5a7be3b887a79aa00427ad4a
Author: stefan
Category: javax
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-06-25 21:21:19
Source code size: 5859 bytes / 183 lines
Pitched / IR pitched: No / No
Views / Downloads: 298 / 821
Version history: 56 change(s)
Referenced in: [show references]