Uses 911K of libraries. Click here for Pure Java version (5718L/29K).
1 | cmodule AModule { |
2 | switchable S input = "STATICALAFLATTENLISTOFPAIRSLPAIRALLAOUTEMPTYLISTLLFORPAIRAPUNNULLLOUTADDPAOUTADDPBRETOUT"; |
3 | switchable S words = "ret ll ls static out list pair pairs flatten null of if else while for"; |
4 | |
5 | S firstResult; |
6 | transient BigInt totalCombinations; |
7 | |
8 | // basically immutable (we make a new copy for every step) |
9 | // to allow evaluating states in parallel and in any order |
10 | class GreedySplitIntoWordsCI_Multi implements IF0<Either<Iterable<GreedySplitIntoWordsCI_Multi>, LS>> { |
11 | S s; // the input to be split |
12 | TreeSet<S> wordsSet; |
13 | Map<Int> longestMatchMap; |
14 | int i = 0, last = 0; |
15 | ReverseChain<S> out; |
16 | |
17 | *() {} |
18 | *(S *s, Cl<S> words) { |
19 | wordsSet = asCISet(words); |
20 | longestMatchMap = new AutoMap<Int>(i -> dontPrint("longest match at " + i + ": ", lengthOfLongestPrefixInCISet(substring(s, i), wordsSet))); |
21 | } |
22 | |
23 | Cl<S> wordsAtPosition(int i) { |
24 | int longestMatch = longestMatchMap.get(i); |
25 | ret mapNonNulls(countBackwardsTo1(longestMatch), matchLength -> { |
26 | S word = substring(s, i, i+matchLength); |
27 | ret contains(wordsSet, word) ? word : null; |
28 | }); |
29 | } |
30 | |
31 | // either we return some choices or a final result |
32 | Either<Iterable<GreedySplitIntoWordsCI_Multi>, LS> get() { |
33 | if (i >= l(s)) ret done(); // done with input |
34 | ret eitherA(listPlus(map(wordsAtPosition(i), wordMatched -> { |
35 | GreedySplitIntoWordsCI_Multi clone = shallowClone(this, new GreedySplitIntoWordsCI_Multi); |
36 | clone.flush(); |
37 | clone.i = clone.last = i+l(wordMatched); |
38 | clone.out = revChainPlus(clone.out, substring(s, i, clone.i)); |
39 | ret clone; |
40 | }), getVar(() -> { |
41 | GreedySplitIntoWordsCI_Multi clone = shallowClone(this, new GreedySplitIntoWordsCI_Multi); |
42 | clone.i++; |
43 | ret clone; |
44 | }))); |
45 | } |
46 | |
47 | Either<Iterable<GreedySplitIntoWordsCI_Multi>, LS> done() { |
48 | flush(); |
49 | ret eitherB(asList(out)); |
50 | } |
51 | |
52 | S unflushed() { ret substring(s, last, i); } |
53 | |
54 | void flush { |
55 | if (i <= last) ret; |
56 | // modifying this object in spite of convention |
57 | out = revChainPlus(out, unflushed()); |
58 | last = i; |
59 | } |
60 | |
61 | toString { ret asList(out) + prependIfNempty("|", unflushed()) + ", " + i + "/" + l(s); } |
62 | |
63 | S sentence() { ret joinNemptiesWithSpace(listPlus(asList(out), unflushed())); } |
64 | } |
65 | |
66 | GreedySplitIntoWordsCI_Multi root() { |
67 | ret new GreedySplitIntoWordsCI_Multi(input, splitAtSpace(words)); |
68 | } |
69 | |
70 | visual northAndCenterWithMargins( |
71 | jvstackWithSpacing( |
72 | withLabel("Total combinations:", dm_label totalCombinations()), |
73 | withLabel("Preferred result:", dm_label firstResult())), |
74 | jDynamicEitherTree(root(), |
75 | valueToText := (IF1<Either<GreedySplitIntoWordsCI_Multi, LS>, S>) x |
76 | -> isEitherA(x) ? eitherAOpt(x).sentence() + "..." : joinWithSpace(eitherBOpt(x)))); |
77 | |
78 | start-thread { |
79 | dm_reloadOnFieldChange('words, 'input); |
80 | time { |
81 | print("First result: " + setField(firstResult := joinWithSpace(getFirstResultOfEitherTree(root()!)))); |
82 | } |
83 | |
84 | GreedySplitIntoWordsCI_Multi root = root(); |
85 | // We should subtract combinationsAtPosition(i-l(word)) for each word because I think it is counted twice |
86 | setField(totalCombinations := combinationsForPositionalParser( |
87 | l(input), i -> listPlus(lambdaMap l(root.wordsAtPosition(i)), 1), debug := true); |
88 | } |
89 | } |
Began life as a copy of #1028153
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1028159 |
Snippet name: | greedySplitIntoWordsCI with choices Spike, shortened [OK] |
Eternal ID of this version: | #1028159/12 |
Text MD5: | 0b05f6aa10b1bab2de898d163f91fe32 |
Transpilation MD5: | 973fe94cf68116abf1ed4e6a46e133b0 |
Author: | stefan |
Category: | javax / stefan's os / nlp |
Type: | JavaX source code (Dynamic Module) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-05-26 14:12:21 |
Source code size: | 3487 bytes / 89 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 258 / 1190 |
Version history: | 11 change(s) |
Referenced in: | [show references] |