Uses 911K of libraries. Click here for Pure Java version (5497L/27K).
1 | cmodule AModule { |
2 | switchable S input = "STATICALAFLATTENLISTOFPAIRSLPAIRALLAOUTEMPTYLISTLLFORPAIRAPUNNULLLOUTADDPAOUTADDPBRETOUT"; |
3 | switchable S words = "ret ll ls static out list pair pairs flatten null of if else while for"; |
4 | transient BigInt totalCombinations; |
5 | |
6 | // basically immutable (we make a new copy for every step) |
7 | // to allow evaluating states in parallel and in any order |
8 | class GreedySplitIntoWordsCI_Multi implements IF0<Either<Iterable<GreedySplitIntoWordsCI_Multi>, LS>> { |
9 | S s; // the input to be split |
10 | TreeSet<S> wordsSet; |
11 | Map<Int> longestMatchMap; |
12 | int i = 0, last = 0; |
13 | ReverseChain<S> out; |
14 | |
15 | *() {} |
16 | *(S *s, Cl<S> words) { |
17 | wordsSet = asCISet(words); |
18 | longestMatchMap = new AutoMap<Int>(i -> dontPrint("longest match at " + i + ": ", lengthOfLongestPrefixInCISet(substring(s, i), wordsSet))); |
19 | } |
20 | |
21 | Cl<S> wordsAtPosition(int i) { |
22 | int longestMatch = longestMatchMap.get(i); |
23 | ret mapNonNulls(countBackwardsTo1(longestMatch), matchLength -> { |
24 | S word = substring(s, i, i+matchLength); |
25 | ret contains(wordsSet, word) ? word : null; |
26 | }); |
27 | } |
28 | |
29 | // either we return some choices or a final result |
30 | Either<Iterable<GreedySplitIntoWordsCI_Multi>, LS> get() { |
31 | if (i >= l(s)) ret done(); // done with input |
32 | ret eitherA(listPlus(map(wordsAtPosition(i), wordMatched -> { |
33 | GreedySplitIntoWordsCI_Multi clone = shallowClone(this, new GreedySplitIntoWordsCI_Multi); |
34 | clone.flush(); |
35 | clone.i = clone.last = i+l(wordMatched); |
36 | clone.out = revChainPlus(clone.out, substring(s, i, clone.i)); |
37 | ret clone; |
38 | }), getVar(() -> { |
39 | GreedySplitIntoWordsCI_Multi clone = shallowClone(this, new GreedySplitIntoWordsCI_Multi); |
40 | clone.i++; |
41 | ret clone; |
42 | }))); |
43 | } |
44 | |
45 | Either<Iterable<GreedySplitIntoWordsCI_Multi>, LS> done() { |
46 | flush(); |
47 | ret eitherB(asList(out)); |
48 | } |
49 | |
50 | void flush { |
51 | if (i > last) { |
52 | // modifying this object in spite of convention |
53 | out = revChainPlus(out, substring(s, last, i)); |
54 | last = i; |
55 | } |
56 | } |
57 | |
58 | toString { ret asList(out) + prependIfNempty("|", substring(s, last, i)) + ", " + i + "/" + l(s); } |
59 | } |
60 | |
61 | GreedySplitIntoWordsCI_Multi root() { |
62 | ret new GreedySplitIntoWordsCI_Multi(input, splitAtSpace(words)); |
63 | } |
64 | |
65 | /*visual jDynamicTree( |
66 | root()!, |
67 | x -> getVars(eitherAOpt(x)));*/ |
68 | visual northAndCenterWithMargins( |
69 | withLabel("Total combinations:", dm_label totalCombinations()), |
70 | jDynamicTree( |
71 | (Either<GreedySplitIntoWordsCI_Multi, LS>) (Either) eitherA(root()), |
72 | x -> { |
73 | if (!isEitherA(x)) null; // leaves have no children |
74 | Either<Iterable<GreedySplitIntoWordsCI_Multi>, LS> result = eitherAOpt(x)!; |
75 | ret (Cl<Either<GreedySplitIntoWordsCI_Multi, LS>>) |
76 | (isEitherA(result) ? map eitherA(eitherGetA(result)) : ll(eitherBOpt(result))); |
77 | })); |
78 | |
79 | start-thread { |
80 | dm_reloadOnFieldChange('words, 'input); |
81 | time { |
82 | print("First result: " + getFirstResultOfEitherTree(root()!)); |
83 | } |
84 | |
85 | GreedySplitIntoWordsCI_Multi root = root(); |
86 | Map<Int, BigInt> combinationsFromPosition = autoTreeMap(() -> bigint(1)); |
87 | for (int i = l(input)-1; i >= 0; i--) { |
88 | // skipping a character is one combination |
89 | BigInt combinations = combinationsFromPosition.get(i+1); |
90 | |
91 | for (S word : root.wordsAtPosition(i)) |
92 | combinations = plus(combinations, combinationsFromPosition.get(i+l(word))); |
93 | |
94 | print("Combinations from " + i + ": " + combinations); |
95 | combinationsFromPosition.put(i, combinations); |
96 | } |
97 | setField(totalCombinations := combinationsFromPosition.get(0)); |
98 | } |
99 | } |
Began life as a copy of #1028147
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1028153 |
Snippet name: | greedySplitIntoWordsCI with choices Spike [OK] |
Eternal ID of this version: | #1028153/62 |
Text MD5: | 160495a7c2306f4999b11c85234145ec |
Transpilation MD5: | 0993c11201e253c3d5478b6826ae6e3c |
Author: | stefan |
Category: | javax / stefan's os / nlp |
Type: | JavaX source code (Dynamic Module) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-05-23 18:34:47 |
Source code size: | 3815 bytes / 99 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 348 / 1484 |
Version history: | 61 change(s) |
Referenced in: | [show references] |