Libraryless. Click here for Pure Java version (4977L/32K).
// A top-down left-to-right parser using probabilistic states sclass ProbabilisticParser1 { new ProbabilisticMachine<State> pm; bool verbose; abstract class Action { S grammarClass; abstract void run(State state); //!include #1027987 // setExtraField, getExtraField S grammarClass() { ret grammarClass /*(S) getExtraField(ef_grammarClass)*/; } selfType setGrammarClass(S grammarClass) { this.grammarClass = grammarClass; this; } // extra field names //static final S ef_grammarClass = "grammarClass"; } abstract class Consumer extends Action { // override this or the next method double calcProbabilityForMatchedText(S s) { throw overrideMe(); } // tok is CNC starting & ending with code token double calcProbabilityForMatchedTokens(LS tok) { ret calcProbabilityForMatchedText(join(tok)); } int minTokensToConsume = 0; @Override void run(State state) { int maxTokensToConsume = state.remainingTokens(); if (verbose) print(this + ": maxTokensToConsume= " + maxTokensToConsume); for (int n = minTokensToConsume; n <= maxTokensToConsume; n++) { State s = state.prepareClone(); s.iNextToken += n*2; LS tok = subList(state.tok, state.iNextToken, s.iNextToken-1); s.probability = multiplyPercentages(s.probability, calcProbabilityForMatchedTokens(tok)); s.matches = revChainPlus(s.matches, pair(state, tok)); pm.addState(s); } } } noeq record ConsumeOneOfTokens(Set<S> tokens) extends Consumer { { tokens = asCISet(tokens); } *(S... tokens) { this.tokens = litciset(tokens); } double calcProbabilityForMatchedText(S s) { double p; if (tokens.contains(s)) p = empty(s) ? 90 : 100; else if (empty(s)) p = 50; else p = levenSimilarityIntIC_multi(s, tokens); ifdef ConsumeOneOfTokens_debug print("ConsumeOneOfTokens: " + p + " for " + s + " [" + tokens + "]"); endifdef ret p; } } noeq record ConsumeToken(S token) extends Consumer { double emptyProbability = 50; double calcProbabilityForMatchedText(S s) { ret empty(s) ? emptyProbability : levenSimilarityIntIC(s, token); } } noeq record Any extends Consumer { *(S grammarClass) { setGrammarClass(grammarClass); } double calcProbabilityForMatchedText(S s) { ret 90; } toString { ret joinNemptiesWithSpace("Any", grammarClass); } } noeq record Filler extends Consumer { double calcProbabilityForMatchedTokens(LS tok) { ret 100-countCodeTokensInReversedCNC(tok)*10; } } noeq record EndOfInput extends Action { void run(State state) { State s = state.prepareClone(); if (!state.endOfInput()) s.probability /= 2; s.matches = revChainPlus(s.matches, pair(state, subList(s.tok, s.iNextToken))); pm.addState(s); } } class State extends ProbabilisticMachine.State { LS tok; // CNC int iNextToken = 1; ReverseChain<Pair<State, LS>> matches; // values: reversed CNC O userObject; // copied around from state to state, e.g. reference to production toString { ret super.toString() + " iNextToken=\*iNextToken*/, matches: " + matchesFromAction(); } ProbabilisticParser1 parser() { ret ProbabilisticParser1.this; } LPair<Action, LS> matchesFromAction() { ret mapPairsA(s -> s.action(), matches); } Action action() { ret remainingRule == null ? null : (Action) remainingRule.lhs; } bool endOfInput() { ret iNextToken >= l(tok); } int remainingTokens() { ret (l(tok)-iNextToken)/2+1; } S nextToken() { ret get(tok, iNextToken); } State emptyClone() { ret new State; } State prepareClone() { ret copyFields(this, (State) super.prepareClone(), 'tok, 'iNextToken, 'matches, 'userObject); } void runAction(O action) { assertSame(machine, pm); if (verbose) print("Running action: " + action + ", machine: " + machine); ((Action) action).run(this); if (verbose) print("Ran action: " + className(action)); } } BasicLogicRule patternToRule(S pattern) { ret ruleFromActions( listPlus( mapWithIndex(javaTok(pattern), (i, t) -> even(i) ? new Filler : eq(t, "*") ? new Any : new ConsumeToken(t)), new EndOfInput)); } BasicLogicRule ruleFromActions(Action... actions) { ret ruleFromActions(asList(actions)); } BasicLogicRule ruleFromActions(L<Action> actions) { ret BasicLogicRule(makeAnd(actions), formatFrag("parsed")); } // pattern e.g.: "Das * hat *."; void parse(S pattern, S input) { pm.reset(); addState(javaTok(input), patternToRule(pattern)); pm.think(); } void parse(BasicLogicRule rule, S input) { pm.reset(); addState(javaTok(input), rule); pm.think(); } State addState(LS tok, BasicLogicRule rule) { new State state; state.tok = tok; state.remainingRule = curryLHS(rule); pm.addState(state); ret state; } Matches stateToMatches(State state) { ret stateToMatches(state, null); } Matches stateToMatches(State state, IPred<Action> actionsToCount) { if (state == null) null; new LS out; for (Pair<Action, LS> p : state.matchesFromAction()) if (actionsToCount != null ? actionsToCount.get(p.a) : p.a instanceof Any || nempty(p.a.grammarClass())) out.add(join(p.b)); ret matches(out); } Matches bestMatches() { ret stateToMatches(bestDoneState()); } State bestDoneState() { ret first(pm.doneStates); } void think { pm.think(); } }
Began life as a copy of #1027937
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1027944 |
Snippet name: | ProbabilisticParser1 (shortened, LIVE) |
Eternal ID of this version: | #1027944/57 |
Text MD5: | b7a199bc89b5c1c41101d4f8f9eae039 |
Transpilation MD5: | fba0efcb5a7be3b887a79aa00427ad4a |
Author: | stefan |
Category: | javax |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-06-25 21:21:19 |
Source code size: | 5859 bytes / 183 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 355 / 902 |
Version history: | 56 change(s) |
Referenced in: | [show references] |