Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

152
LINES

< > BotCompany Repo | #1027937 // ProbabilisticParser1 (before shortening)

JavaX fragment (include) [tags: use-pretranspiled]

Libraryless. Click here for Pure Java version (4078L/26K).

1  
sclass ProbabilisticParser1 {
2  
  transient TreeSetWithDuplicates<State> doneStates = new(byProbability());
3  
  transient TreeSetWithDuplicates<State> states = new(byProbability());
4  
  transient TreeSetWithDuplicates<State> steppableStates = new(byProbability());
5  
  transient TreeSetWithDuplicates<State> droppedStates = new(byProbability());
6  
  transient int stateCount;
7  
8  
  double cutoffPercentage = 50;
9  
  
10  
  Comparator<State> byProbability() { ret (a, b) -> cmp(b.probability, a.probability); }
11  
  
12  
  abstract class Action {
13  
    abstract void run(State state);
14  
    
15  
    State prepareClone(State state) {
16  
      new State s;
17  
      copyFields(state, s, 'tok, 'iNextToken, 'probability, 'matches);
18  
      s.prev = state;
19  
      s.remainingRule = optCast BasicLogicRule(state.remainingRule.rhs);
20  
      ret s;
21  
    }
22  
  }
23  
  
24  
  abstract class Consumer extends Action {
25  
    // override this or the next method
26  
    double calcProbabilityForMatchedText(S s) { throw overrideMe(); }
27  
    // tok is CNC starting & ending with code token
28  
    double calcProbabilityForMatchedTokens(LS tok) {
29  
 ret calcProbabilityForMatchedText(join(tok));
30  
 }
31  
    
32  
    void run(State state) {
33  
      int maxTokensToConsume = state.remainingTokens();
34  
      
35  
      for (int n = 0; n <= maxTokensToConsume; n++) {
36  
        State s = prepareClone(state);
37  
        s.iNextToken += n*2;
38  
        LS tok = subList(state.tok, state.iNextToken, s.iNextToken-1);
39  
        s.probability = multiplyPercentages(s.probability, calcProbabilityForMatchedTokens(tok));
40  
        s.matches = revChainPlus(s.matches, pair(state, tok));
41  
        addState(s);
42  
      }
43  
    }
44  
  }
45  
  
46  
  noeq record ConsumeToken(S token) extends Consumer {
47  
    double calcProbabilityForMatchedText(S s) {
48  
      ret empty(s) ? 50 : levenSimilarityIntIC(s, token);
49  
    }
50  
  }
51  
  
52  
  noeq record Any extends Consumer {
53  
    double calcProbabilityForMatchedText(S s) {
54  
      ret 90;
55  
    }
56  
  }
57  
  
58  
  noeq record Filler extends Consumer {
59  
    double calcProbabilityForMatchedTokens(LS tok) {
60  
      ret 100-countCodeTokensInReversedCNC(tok)*10;
61  
    }
62  
  }
63  
  
64  
  noeq record EndOfInput extends Action {
65  
    void run(State state) {
66  
      State s = prepareClone(state);
67  
      if (!state.endOfInput()) s.probability /= 2;
68  
      s.matches = revChainPlus(s.matches, pair(state, subList(s.tok, s.iNextToken)));
69  
      addState(s);
70  
    }
71  
  }
72  
73  
  class State {
74  
    int number = ++stateCount;
75  
    State prev;
76  
    double probability = 100;
77  
    LS tok; // CNC
78  
    int iNextToken = 1;
79  
    BasicLogicRule remainingRule;
80  
    ReverseChain<Pair<State, LS>> matches; // values: reversed CNC
81  
82  
    toString {
83  
      ret toStringWithFields(this, "number", "probability", "iNextToken") + stringIf(done(), " (done)" + " matches: " + matchesFromAction());
84  
    }
85  
86  
    LPair<Action, LS> matchesFromAction() { ret mapPairsA(s -> s.action(), matches); }
87  
    
88  
    bool done() { ret remainingRule == null; }
89  
    
90  
    bool endOfInput() { ret iNextToken >= l(tok); }
91  
    int remainingTokens() { ret (l(tok)-iNextToken)/2+1; }
92  
    S nextToken() { ret get(tok, iNextToken); }
93  
94  
    Action action() { ret remainingRule == null ? null : (Action) remainingRule.lhs; }
95  
    
96  
    void step { if (!done()) action().run(this); }
97  
  }
98  
  
99  
  void addState(State s) {
100  
    if (s.probability < cutoffPercentage) ret with droppedStates.add(s);
101  
    addToCollections(s, states, steppableStates);
102  
    if (s.done()) doneStates.add(s);
103  
  }
104  
  
105  
  bool stepFirstUnstepped() {
106  
    State s = popFirst(steppableStates), ret false if null;
107  
    ret true with s.step();
108  
  }
109  
110  
  BasicLogicRule patternToRule(S pattern) {
111  
    ret curryLHS(BasicLogicRule(
112  
      makeAnd(listPlus(
113  
        mapWithIndex(javaTok(pattern), (i, t) -> even(i)
114  
          ? new Filler
115  
          : eq(t, "*") ? new Any : new ConsumeToken(t)),
116  
        new EndOfInput)),
117  
      formatFrag("parsed")));
118  
  }
119  
120  
  void reset {
121  
    clearAll(doneStates, states, steppableStates, droppedStates);
122  
    stateCount = 0;
123  
  }
124  
125  
  // pattern e.g.: "Das * hat *.";
126  
  void parse(S pattern, S input) {
127  
    reset();
128  
    BasicLogicRule rule = patternToRule(pattern);
129  
    print(rule);
130  
    
131  
    new State state;
132  
    state.tok = javaTok(input);
133  
    state.remainingRule = rule;
134  
    addState(state);
135  
    while ping (stepFirstUnstepped()) {}
136  
  }
137  
138  
  L<State> bestStates(int n) {
139  
    ret takeFirst(n, doneStates);
140  
  }
141  
142  
  Matches stateToMatches(State state) {
143  
    if (state == null) null;
144  
    new LS out;
145  
    for (Pair<Action, LS> p : state.matchesFromAction())
146  
      if (p.a instanceof Any)
147  
        out.add(join(p.b));
148  
    ret matches(out);
149  
  }
150  
  
151  
  Matches bestMatches() { ret stateToMatches(first(doneStates)); }
152  
}

download  show line numbers  debug dex  old transpilations   

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

No comments. add comment

Snippet ID: #1027937
Snippet name: ProbabilisticParser1 (before shortening)
Eternal ID of this version: #1027937/10
Text MD5: a13434d08ef0fa5a9650a1bf6d46537d
Transpilation MD5: be801c469d5bed4d2a544f28694105a4
Author: stefan
Category: javax
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-04-20 12:02:45
Source code size: 4731 bytes / 152 lines
Pitched / IR pitched: No / No
Views / Downloads: 210 / 320
Version history: 9 change(s)
Referenced in: [show references]