Libraryless. Click here for Pure Java version (4078L/26K).
sclass ProbabilisticParser1 { transient TreeSetWithDuplicates<State> doneStates = new(byProbability()); transient TreeSetWithDuplicates<State> states = new(byProbability()); transient TreeSetWithDuplicates<State> steppableStates = new(byProbability()); transient TreeSetWithDuplicates<State> droppedStates = new(byProbability()); transient int stateCount; double cutoffPercentage = 50; Comparator<State> byProbability() { ret (a, b) -> cmp(b.probability, a.probability); } abstract class Action { abstract void run(State state); State prepareClone(State state) { new State s; copyFields(state, s, 'tok, 'iNextToken, 'probability, 'matches); s.prev = state; s.remainingRule = optCast BasicLogicRule(state.remainingRule.rhs); ret s; } } abstract class Consumer extends Action { // override this or the next method double calcProbabilityForMatchedText(S s) { throw overrideMe(); } // tok is CNC starting & ending with code token double calcProbabilityForMatchedTokens(LS tok) { ret calcProbabilityForMatchedText(join(tok)); } void run(State state) { int maxTokensToConsume = state.remainingTokens(); for (int n = 0; n <= maxTokensToConsume; n++) { State s = prepareClone(state); s.iNextToken += n*2; LS tok = subList(state.tok, state.iNextToken, s.iNextToken-1); s.probability = multiplyPercentages(s.probability, calcProbabilityForMatchedTokens(tok)); s.matches = revChainPlus(s.matches, pair(state, tok)); addState(s); } } } noeq record ConsumeToken(S token) extends Consumer { double calcProbabilityForMatchedText(S s) { ret empty(s) ? 50 : levenSimilarityIntIC(s, token); } } noeq record Any extends Consumer { double calcProbabilityForMatchedText(S s) { ret 90; } } noeq record Filler extends Consumer { double calcProbabilityForMatchedTokens(LS tok) { ret 100-countCodeTokensInReversedCNC(tok)*10; } } noeq record EndOfInput extends Action { void run(State state) { State s = prepareClone(state); if (!state.endOfInput()) s.probability /= 2; s.matches = revChainPlus(s.matches, pair(state, subList(s.tok, s.iNextToken))); addState(s); } } class State { int number = ++stateCount; State prev; double probability = 100; LS tok; // CNC int iNextToken = 1; BasicLogicRule remainingRule; ReverseChain<Pair<State, LS>> matches; // values: reversed CNC toString { ret toStringWithFields(this, "number", "probability", "iNextToken") + stringIf(done(), " (done)" + " matches: " + matchesFromAction()); } LPair<Action, LS> matchesFromAction() { ret mapPairsA(s -> s.action(), matches); } bool done() { ret remainingRule == null; } bool endOfInput() { ret iNextToken >= l(tok); } int remainingTokens() { ret (l(tok)-iNextToken)/2+1; } S nextToken() { ret get(tok, iNextToken); } Action action() { ret remainingRule == null ? null : (Action) remainingRule.lhs; } void step { if (!done()) action().run(this); } } void addState(State s) { if (s.probability < cutoffPercentage) ret with droppedStates.add(s); addToCollections(s, states, steppableStates); if (s.done()) doneStates.add(s); } bool stepFirstUnstepped() { State s = popFirst(steppableStates), ret false if null; ret true with s.step(); } BasicLogicRule patternToRule(S pattern) { ret curryLHS(BasicLogicRule( makeAnd(listPlus( mapWithIndex(javaTok(pattern), (i, t) -> even(i) ? new Filler : eq(t, "*") ? new Any : new ConsumeToken(t)), new EndOfInput)), formatFrag("parsed"))); } void reset { clearAll(doneStates, states, steppableStates, droppedStates); stateCount = 0; } // pattern e.g.: "Das * hat *."; void parse(S pattern, S input) { reset(); BasicLogicRule rule = patternToRule(pattern); print(rule); new State state; state.tok = javaTok(input); state.remainingRule = rule; addState(state); while ping (stepFirstUnstepped()) {} } L<State> bestStates(int n) { ret takeFirst(n, doneStates); } Matches stateToMatches(State state) { if (state == null) null; new LS out; for (Pair<Action, LS> p : state.matchesFromAction()) if (p.a instanceof Any) out.add(join(p.b)); ret matches(out); } Matches bestMatches() { ret stateToMatches(first(doneStates)); } }
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1027937 |
Snippet name: | ProbabilisticParser1 (before shortening) |
Eternal ID of this version: | #1027937/10 |
Text MD5: | a13434d08ef0fa5a9650a1bf6d46537d |
Transpilation MD5: | be801c469d5bed4d2a544f28694105a4 |
Author: | stefan |
Category: | javax |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-04-20 12:02:45 |
Source code size: | 4731 bytes / 152 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 208 / 316 |
Version history: | 9 change(s) |
Referenced in: | [show references] |