cmodule PPSpike > DynPrintLog { transient new DeepZipTools tools; transient TreeSetWithDuplicates doneStates = new(byProbability()); transient TreeSetWithDuplicates states = new(byProbability()); transient TreeSetWithDuplicates steppableStates = new(byProbability()); transient TreeSetWithDuplicates droppedStates = new(byProbability()); transient int stateCount; switchable double cutoffPercentage = 50; Comparator byProbability() { ret (a, b) -> cmp(b.probability, a.probability); } abstract class Action { abstract void run(State state); State prepareClone(State state) { new State s; copyFields(state, s, 'tok, 'iNextToken, 'probability, 'matches); s.prev = state; s.remainingRule = optCast BasicLogicRule(state.remainingRule.rhs); ret s; } } abstract class Consumer extends Action { // override this or the next method double calcProbabilityForMatchedText(S s) { throw overrideMe(); } // tok is CNC starting & ending with code token double calcProbabilityForMatchedTokens(LS tok) { ret calcProbabilityForMatchedText(join(tok)); } void run(State state) { int maxTokensToConsume = min(2, state.remainingTokens()); for (int n = 0; n <= maxTokensToConsume; n++) { State s = prepareClone(state); s.iNextToken += n*2; LS tok = subList(state.tok, state.iNextToken, s.iNextToken-1); s.probability = multiplyPercentages(s.probability, calcProbabilityForMatchedTokens(tok)); s.matches = revChainPlus(s.matches, pair(state, tok)); addState(s); } } } noeq record ConsumeToken(S token) extends Consumer { double calcProbabilityForMatchedText(S s) { ret empty(s) ? 50 : levenSimilarityIntIC(s, token); } } noeq record Any extends Consumer { double calcProbabilityForMatchedText(S s) { ret 90; } } noeq record Filler extends Consumer { double calcProbabilityForMatchedTokens(LS tok) { ret 100-countCodeTokensInReversedCNC(tok)*10; } } noeq record EndOfInput extends Action { void run(State state) { State s = prepareClone(state); if (!state.endOfInput()) s.probability /= 2; s.matches = revChainPlus(s.matches, pair(state, subList(s.tok, s.iNextToken))); addState(s); } } class State { int number = ++stateCount; State prev; double probability = 100; LS tok; // CNC int iNextToken = 1; BasicLogicRule remainingRule; ReverseChain> matches; // values: reversed CNC toString { ret toStringWithFields(this, "number", "probability", "iNextToken") + stringIf(done(), " (done)" + " matches: " + matchesFromAction()); } LPair matchesFromAction() { ret mapPairsA(s -> s.action(), matches); } bool done() { ret remainingRule == null; } bool endOfInput() { ret iNextToken >= l(tok); } int remainingTokens() { ret (l(tok)-iNextToken)/2+1; } S nextToken() { ret get(tok, iNextToken); } Action action() { ret remainingRule == null ? null : (Action) remainingRule.lhs; } void step { if (!done()) action().run(this); } } void addState(State s) { if (s.probability < cutoffPercentage) ret with droppedStates.add(s); addToCollections(s, states, steppableStates); if (s.done()) doneStates.add(s); } bool stepFirstUnstepped() { State s = popFirst(steppableStates), ret false if null; ret true with s.step(); } BasicLogicRule patternToRule(S pattern) { ret curryLHS(BasicLogicRule( makeAnd(listPlus( mapWithIndex(javaTok(pattern), (i, t) -> even(i) ? new Filler : eq(t, "*") ? new Any : new ConsumeToken(t)), new EndOfInput)), formatFrag("parsed"))); } start-thread { //S pattern = "Das Wohnzimmer hat auch einen Wohnbreich."; //S pattern = "Das * hat einen *."; S pattern = "Das * hat *."; BasicLogicRule rule = patternToRule(pattern); print(rule); S input = "Das Wohnzimmer hatt auch einen Wohnbereich."; //S input = "Das Wohnzimmer hat einen Wohnbereich."; new State state; state.tok = javaTok(input); state.remainingRule = rule; addState(state); time "Inference" { while ping (stepFirstUnstepped()) {} } print("States: " + l(states) + ", dropped: " + l(droppedStates)); L top = takeFirst(3, doneStates); pnl(top); print("Pattern: " + pattern); print("Input: " + input); for (State bestState : top) { LPair matches = mapPairB join(bestState.matchesFromAction()); //pnl("MATCH", matches); //pnl("ANY", filterPairA(matches, action -> action instanceof Any)); print("Cleaned", joinNemptiesWithSpace(mapPairsToList(matches, (action, s) -> { if (action instanceof Filler) ret ""; if (action cast ConsumeToken) ret action.token; ret s; }))); } } }