Uses 911K of libraries. Click here for Pure Java version (5718L/29K).
cmodule AModule { switchable S input = "STATICALAFLATTENLISTOFPAIRSLPAIRALLAOUTEMPTYLISTLLFORPAIRAPUNNULLLOUTADDPAOUTADDPBRETOUT"; switchable S words = "ret ll ls static out list pair pairs flatten null of if else while for"; S firstResult; transient BigInt totalCombinations; // basically immutable (we make a new copy for every step) // to allow evaluating states in parallel and in any order class GreedySplitIntoWordsCI_Multi implements IF0<Either<Iterable<GreedySplitIntoWordsCI_Multi>, LS>> { S s; // the input to be split TreeSet<S> wordsSet; Map<Int> longestMatchMap; int i = 0, last = 0; ReverseChain<S> out; *() {} *(S *s, Cl<S> words) { wordsSet = asCISet(words); longestMatchMap = new AutoMap<Int>(i -> dontPrint("longest match at " + i + ": ", lengthOfLongestPrefixInCISet(substring(s, i), wordsSet))); } Cl<S> wordsAtPosition(int i) { int longestMatch = longestMatchMap.get(i); ret mapNonNulls(countBackwardsTo1(longestMatch), matchLength -> { S word = substring(s, i, i+matchLength); ret contains(wordsSet, word) ? word : null; }); } // either we return some choices or a final result Either<Iterable<GreedySplitIntoWordsCI_Multi>, LS> get() { if (i >= l(s)) ret done(); // done with input ret eitherA(listPlus(map(wordsAtPosition(i), wordMatched -> { GreedySplitIntoWordsCI_Multi clone = shallowClone(this, new GreedySplitIntoWordsCI_Multi); clone.flush(); clone.i = clone.last = i+l(wordMatched); clone.out = revChainPlus(clone.out, substring(s, i, clone.i)); ret clone; }), getVar(() -> { GreedySplitIntoWordsCI_Multi clone = shallowClone(this, new GreedySplitIntoWordsCI_Multi); clone.i++; ret clone; }))); } Either<Iterable<GreedySplitIntoWordsCI_Multi>, LS> done() { flush(); ret eitherB(asList(out)); } S unflushed() { ret substring(s, last, i); } void flush { if (i <= last) ret; // modifying this object in spite of convention out = revChainPlus(out, unflushed()); last = i; } toString { ret asList(out) + prependIfNempty("|", unflushed()) + ", " + i + "/" + l(s); } S sentence() { ret joinNemptiesWithSpace(listPlus(asList(out), unflushed())); } } GreedySplitIntoWordsCI_Multi root() { ret new GreedySplitIntoWordsCI_Multi(input, splitAtSpace(words)); } visual northAndCenterWithMargins( jvstackWithSpacing( withLabel("Total combinations:", dm_label totalCombinations()), withLabel("Preferred result:", dm_label firstResult())), jDynamicEitherTree(root(), valueToText := (IF1<Either<GreedySplitIntoWordsCI_Multi, LS>, S>) x -> isEitherA(x) ? eitherAOpt(x).sentence() + "..." : joinWithSpace(eitherBOpt(x)))); start-thread { dm_reloadOnFieldChange('words, 'input); time { print("First result: " + setField(firstResult := joinWithSpace(getFirstResultOfEitherTree(root()!)))); } GreedySplitIntoWordsCI_Multi root = root(); // We should subtract combinationsAtPosition(i-l(word)) for each word because I think it is counted twice setField(totalCombinations := combinationsForPositionalParser( l(input), i -> listPlus(lambdaMap l(root.wordsAtPosition(i)), 1), debug := true); } }
Began life as a copy of #1028153
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1028159 |
Snippet name: | greedySplitIntoWordsCI with choices Spike, shortened [OK] |
Eternal ID of this version: | #1028159/12 |
Text MD5: | 0b05f6aa10b1bab2de898d163f91fe32 |
Transpilation MD5: | 973fe94cf68116abf1ed4e6a46e133b0 |
Author: | stefan |
Category: | javax / stefan's os / nlp |
Type: | JavaX source code (Dynamic Module) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-05-26 14:12:21 |
Source code size: | 3487 bytes / 89 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 259 / 1190 |
Version history: | 11 change(s) |
Referenced in: | -