// A top-down left-to-right parser using probabilistic states sclass ProbabilisticParser1 { new ProbabilisticMachine pm; bool verbose; abstract class Action { S grammarClass; abstract void run(State state); //!include #1027987 // setExtraField, getExtraField S grammarClass() { ret grammarClass /*(S) getExtraField(ef_grammarClass)*/; } selfType setGrammarClass(S grammarClass) { this.grammarClass = grammarClass; this; } // extra field names //static final S ef_grammarClass = "grammarClass"; } abstract class Consumer extends Action { // override this or the next method double calcProbabilityForMatchedText(S s) { throw overrideMe(); } // tok is CNC starting & ending with code token double calcProbabilityForMatchedTokens(LS tok) { ret calcProbabilityForMatchedText(join(tok)); } int minTokensToConsume = 0; @Override void run(State state) { int maxTokensToConsume = state.remainingTokens(); if (verbose) print(this + ": maxTokensToConsume= " + maxTokensToConsume); for (int n = minTokensToConsume; n <= maxTokensToConsume; n++) { State s = state.prepareClone(); s.iNextToken += n*2; LS tok = subList(state.tok, state.iNextToken, s.iNextToken-1); s.probability = multiplyPercentages(s.probability, calcProbabilityForMatchedTokens(tok)); s.matches = revChainPlus(s.matches, pair(state, tok)); pm.addState(s); } } } noeq record ConsumeOneOfTokens(Set tokens) extends Consumer { { tokens = asCISet(tokens); } *(S... tokens) { this.tokens = litciset(tokens); } double calcProbabilityForMatchedText(S s) { double p; if (tokens.contains(s)) p = empty(s) ? 90 : 100; else if (empty(s)) p = 50; else p = levenSimilarityIntIC_multi(s, tokens); ifdef ConsumeOneOfTokens_debug print("ConsumeOneOfTokens: " + p + " for " + s + " [" + tokens + "]"); endifdef ret p; } } noeq record ConsumeToken(S token) extends Consumer { double emptyProbability = 50; double calcProbabilityForMatchedText(S s) { ret empty(s) ? emptyProbability : levenSimilarityIntIC(s, token); } } noeq record Any extends Consumer { *(S grammarClass) { setGrammarClass(grammarClass); } double calcProbabilityForMatchedText(S s) { ret 90; } toString { ret joinNemptiesWithSpace("Any", grammarClass); } } noeq record Filler extends Consumer { double calcProbabilityForMatchedTokens(LS tok) { ret 100-countCodeTokensInReversedCNC(tok)*10; } } noeq record EndOfInput extends Action { void run(State state) { State s = state.prepareClone(); if (!state.endOfInput()) s.probability /= 2; s.matches = revChainPlus(s.matches, pair(state, subList(s.tok, s.iNextToken))); pm.addState(s); } } class State extends ProbabilisticMachine.State { LS tok; // CNC int iNextToken = 1; ReverseChain> matches; // values: reversed CNC O userObject; // copied around from state to state, e.g. reference to production toString { ret super.toString() + " iNextToken=\*iNextToken*/, matches: " + matchesFromAction(); } ProbabilisticParser1 parser() { ret ProbabilisticParser1.this; } LPair matchesFromAction() { ret mapPairsA(s -> s.action(), matches); } Action action() { ret remainingRule == null ? null : (Action) remainingRule.lhs; } bool endOfInput() { ret iNextToken >= l(tok); } int remainingTokens() { ret (l(tok)-iNextToken)/2+1; } S nextToken() { ret get(tok, iNextToken); } State emptyClone() { ret new State; } State prepareClone() { ret copyFields(this, (State) super.prepareClone(), 'tok, 'iNextToken, 'matches, 'userObject); } void runAction(O action) { assertSame(machine, pm); if (verbose) print("Running action: " + action + ", machine: " + machine); ((Action) action).run(this); if (verbose) print("Ran action: " + className(action)); } } BasicLogicRule patternToRule(S pattern) { ret ruleFromActions( listPlus( mapWithIndex(javaTok(pattern), (i, t) -> even(i) ? new Filler : eq(t, "*") ? new Any : new ConsumeToken(t)), new EndOfInput)); } BasicLogicRule ruleFromActions(Action... actions) { ret ruleFromActions(asList(actions)); } BasicLogicRule ruleFromActions(L actions) { ret BasicLogicRule(makeAnd(actions), formatFrag("parsed")); } // pattern e.g.: "Das * hat *."; void parse(S pattern, S input) { pm.reset(); addState(javaTok(input), patternToRule(pattern)); pm.think(); } void parse(BasicLogicRule rule, S input) { pm.reset(); addState(javaTok(input), rule); pm.think(); } State addState(LS tok, BasicLogicRule rule) { new State state; state.tok = tok; state.remainingRule = curryLHS(rule); pm.addState(state); ret state; } Matches stateToMatches(State state) { ret stateToMatches(state, null); } Matches stateToMatches(State state, IPred actionsToCount) { if (state == null) null; new LS out; for (Pair p : state.matchesFromAction()) if (actionsToCount != null ? actionsToCount.get(p.a) : p.a instanceof Any || nempty(p.a.grammarClass())) out.add(join(p.b)); ret matches(out); } Matches bestMatches() { ret stateToMatches(bestDoneState()); } State bestDoneState() { ret first(pm.doneStates); } void think { pm.think(); } }