Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

183
LINES

< > BotCompany Repo | #1027944 // ProbabilisticParser1 (shortened, LIVE)

JavaX fragment (include) [tags: use-pretranspiled]

Libraryless. Click here for Pure Java version (4977L/32K).

1  
// A top-down left-to-right parser using probabilistic states
2  
sclass ProbabilisticParser1 {
3  
  new ProbabilisticMachine<State> pm;
4  
  bool verbose;
5  
6  
  abstract class Action {
7  
    S grammarClass;
8  
    
9  
    abstract void run(State state);
10  
    
11  
    //!include #1027987 // setExtraField, getExtraField
12  
    
13  
    S grammarClass() { ret grammarClass /*(S) getExtraField(ef_grammarClass)*/; }
14  
    selfType setGrammarClass(S grammarClass) { this.grammarClass = grammarClass; this; }
15  
    
16  
    // extra field names
17  
    //static final S ef_grammarClass = "grammarClass";
18  
  }
19  
  
20  
  abstract class Consumer extends Action {
21  
    // override this or the next method
22  
    double calcProbabilityForMatchedText(S s) { throw overrideMe(); }
23  
    // tok is CNC starting & ending with code token
24  
    double calcProbabilityForMatchedTokens(LS tok) { ret calcProbabilityForMatchedText(join(tok)); }
25  
    
26  
    int minTokensToConsume = 0;
27  
28  
    @Override
29  
    void run(State state) {
30  
      int maxTokensToConsume = state.remainingTokens();
31  
      
32  
      if (verbose) print(this + ": maxTokensToConsume= " + maxTokensToConsume);
33  
      
34  
      for (int n = minTokensToConsume; n <= maxTokensToConsume; n++) {
35  
        State s = state.prepareClone();
36  
        s.iNextToken += n*2;
37  
        LS tok = subList(state.tok, state.iNextToken, s.iNextToken-1);
38  
        s.probability = multiplyPercentages(s.probability, calcProbabilityForMatchedTokens(tok));
39  
        s.matches = revChainPlus(s.matches, pair(state, tok));
40  
        pm.addState(s);
41  
      }
42  
    }
43  
  }
44  
  
45  
  noeq record ConsumeOneOfTokens(Set<S> tokens) extends Consumer {
46  
    { tokens = asCISet(tokens); }
47  
    *(S... tokens) { this.tokens = litciset(tokens); }
48  
    
49  
    double calcProbabilityForMatchedText(S s) {
50  
      double p;
51  
      if (tokens.contains(s)) p = empty(s) ? 90 : 100;
52  
      else if (empty(s)) p = 50;
53  
      else p = levenSimilarityIntIC_multi(s, tokens);
54  
      ifdef ConsumeOneOfTokens_debug
55  
      print("ConsumeOneOfTokens: " + p + " for " + s + " [" + tokens + "]");
56  
      endifdef
57  
      ret p;
58  
    }
59  
  }
60  
  
61  
  noeq record ConsumeToken(S token) extends Consumer {
62  
    double emptyProbability = 50;
63  
    
64  
    double calcProbabilityForMatchedText(S s) {
65  
      ret empty(s) ? emptyProbability : levenSimilarityIntIC(s, token);
66  
    }
67  
  }
68  
  
69  
  noeq record Any extends Consumer {
70  
    *(S grammarClass) { setGrammarClass(grammarClass); }
71  
72  
    double calcProbabilityForMatchedText(S s) {
73  
      ret 90;
74  
    }
75  
    
76  
    toString { ret joinNemptiesWithSpace("Any", grammarClass); }
77  
  }
78  
  
79  
  noeq record Filler extends Consumer {
80  
    double calcProbabilityForMatchedTokens(LS tok) {
81  
      ret 100-countCodeTokensInReversedCNC(tok)*10;
82  
    }
83  
  }
84  
  
85  
  noeq record EndOfInput extends Action {
86  
    void run(State state) {
87  
      State s = state.prepareClone();
88  
      if (!state.endOfInput()) s.probability /= 2;
89  
      s.matches = revChainPlus(s.matches, pair(state, subList(s.tok, s.iNextToken)));
90  
      pm.addState(s);
91  
    }
92  
  }
93  
94  
  class State extends ProbabilisticMachine.State {
95  
    LS tok; // CNC
96  
    int iNextToken = 1;
97  
    ReverseChain<Pair<State, LS>> matches; // values: reversed CNC
98  
    O userObject; // copied around from state to state, e.g. reference to production
99  
100  
    toString {
101  
      ret super.toString() + " iNextToken=\*iNextToken*/, matches: " + matchesFromAction();
102  
    }
103  
    
104  
    ProbabilisticParser1 parser() { ret ProbabilisticParser1.this; }
105  
106  
    LPair<Action, LS> matchesFromAction() { ret mapPairsA(s -> s.action(), matches); }
107  
    
108  
    Action action() { ret remainingRule == null ? null : (Action) remainingRule.lhs; }
109  
    
110  
    bool endOfInput() { ret iNextToken >= l(tok); }
111  
    int remainingTokens() { ret (l(tok)-iNextToken)/2+1; }
112  
    S nextToken() { ret get(tok, iNextToken); }
113  
114  
    State emptyClone() { ret new State; }
115  
    
116  
    State prepareClone() {
117  
      ret copyFields(this, (State) super.prepareClone(), 'tok, 'iNextToken, 'matches, 'userObject);
118  
    }
119  
    void runAction(O action) {
120  
      assertSame(machine, pm);
121  
      if (verbose) print("Running action: " + action + ", machine: " + machine);
122  
      ((Action) action).run(this);
123  
      if (verbose) print("Ran action: " + className(action));
124  
    }
125  
  }
126  
  
127  
  BasicLogicRule patternToRule(S pattern) {
128  
    ret ruleFromActions(
129  
      listPlus(
130  
        mapWithIndex(javaTok(pattern), (i, t) -> even(i)
131  
          ? new Filler
132  
          : eq(t, "*") ? new Any : new ConsumeToken(t)),
133  
        new EndOfInput));
134  
  }
135  
  
136  
  BasicLogicRule ruleFromActions(Action... actions) {
137  
    ret ruleFromActions(asList(actions));
138  
  }
139  
  
140  
  BasicLogicRule ruleFromActions(L<Action> actions) {
141  
    ret BasicLogicRule(makeAnd(actions), formatFrag("parsed"));
142  
  }
143  
144  
  // pattern e.g.: "Das * hat *.";
145  
  void parse(S pattern, S input) {
146  
    pm.reset();
147  
    addState(javaTok(input), patternToRule(pattern));
148  
    pm.think();
149  
  }
150  
  
151  
  void parse(BasicLogicRule rule, S input) {
152  
    pm.reset();
153  
    addState(javaTok(input), rule);
154  
    pm.think();
155  
  }
156  
  
157  
  State addState(LS tok, BasicLogicRule rule) {
158  
    new State state;
159  
    state.tok = tok;
160  
    state.remainingRule = curryLHS(rule);
161  
    pm.addState(state);
162  
    ret state;
163  
  }
164  
165  
  Matches stateToMatches(State state) {
166  
    ret stateToMatches(state, null);
167  
  }
168  
  
169  
  Matches stateToMatches(State state, IPred<Action> actionsToCount) {
170  
    if (state == null) null;
171  
    new LS out;
172  
    for (Pair<Action, LS> p : state.matchesFromAction())
173  
      if (actionsToCount != null ? actionsToCount.get(p.a)
174  
        : p.a instanceof Any || nempty(p.a.grammarClass()))
175  
        out.add(join(p.b));
176  
    ret matches(out);
177  
  }
178  
  
179  
  Matches bestMatches() { ret stateToMatches(bestDoneState()); }
180  
  State bestDoneState() { ret first(pm.doneStates); }
181  
  
182  
  void think { pm.think(); }
183  
}

Author comment

Began life as a copy of #1027937

download  show line numbers  debug dex  old transpilations   

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

No comments. add comment

Snippet ID: #1027944
Snippet name: ProbabilisticParser1 (shortened, LIVE)
Eternal ID of this version: #1027944/57
Text MD5: b7a199bc89b5c1c41101d4f8f9eae039
Transpilation MD5: fba0efcb5a7be3b887a79aa00427ad4a
Author: stefan
Category: javax
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-06-25 21:21:19
Source code size: 5859 bytes / 183 lines
Pitched / IR pitched: No / No
Views / Downloads: 357 / 906
Version history: 56 change(s)
Referenced in: [show references]