Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

89
LINES

< > BotCompany Repo | #1028159 // greedySplitIntoWordsCI with choices Spike, shortened [OK]

JavaX source code (Dynamic Module) [tags: use-pretranspiled] - run with: Stefan's OS

Uses 911K of libraries. Click here for Pure Java version (5718L/29K).

1  
cmodule AModule {
2  
  switchable S input = "STATICALAFLATTENLISTOFPAIRSLPAIRALLAOUTEMPTYLISTLLFORPAIRAPUNNULLLOUTADDPAOUTADDPBRETOUT";
3  
  switchable S words = "ret ll ls static out list pair pairs flatten null of if else while for";
4  
  
5  
  S firstResult;
6  
  transient BigInt totalCombinations;
7  
8  
  // basically immutable (we make a new copy for every step)
9  
  // to allow evaluating states in parallel and in any order
10  
  class GreedySplitIntoWordsCI_Multi implements IF0<Either<Iterable<GreedySplitIntoWordsCI_Multi>, LS>> {
11  
    S s; // the input to be split
12  
    TreeSet<S> wordsSet;
13  
    Map<Int> longestMatchMap;
14  
    int i = 0, last = 0;
15  
    ReverseChain<S> out;
16  
    
17  
    *() {}
18  
    *(S *s, Cl<S> words) {
19  
      wordsSet = asCISet(words);
20  
      longestMatchMap = new AutoMap<Int>(i -> dontPrint("longest match at " + i + ": ", lengthOfLongestPrefixInCISet(substring(s, i), wordsSet)));
21  
    }
22  
    
23  
    Cl<S> wordsAtPosition(int i) {
24  
      int longestMatch = longestMatchMap.get(i);
25  
      ret mapNonNulls(countBackwardsTo1(longestMatch), matchLength -> {
26  
        S word = substring(s, i, i+matchLength);
27  
        ret contains(wordsSet, word) ? word : null;
28  
      });
29  
    }
30  
      
31  
    // either we return some choices or a final result
32  
    Either<Iterable<GreedySplitIntoWordsCI_Multi>, LS> get() {
33  
      if (i >= l(s)) ret done(); // done with input
34  
      ret eitherA(listPlus(map(wordsAtPosition(i), wordMatched -> {
35  
        GreedySplitIntoWordsCI_Multi clone = shallowClone(this, new GreedySplitIntoWordsCI_Multi);
36  
        clone.flush();
37  
        clone.i = clone.last = i+l(wordMatched);
38  
        clone.out = revChainPlus(clone.out, substring(s, i, clone.i));
39  
        ret clone;
40  
      }), getVar(() -> {
41  
        GreedySplitIntoWordsCI_Multi clone = shallowClone(this, new GreedySplitIntoWordsCI_Multi);
42  
        clone.i++;
43  
        ret clone;
44  
      })));
45  
    }
46  
47  
    Either<Iterable<GreedySplitIntoWordsCI_Multi>, LS> done() {
48  
      flush();
49  
      ret eitherB(asList(out));
50  
    }
51  
    
52  
    S unflushed() { ret substring(s, last, i); }
53  
54  
    void flush {
55  
      if (i <= last) ret;
56  
      // modifying this object in spite of convention
57  
      out = revChainPlus(out, unflushed());
58  
      last = i;
59  
    }
60  
    
61  
    toString { ret asList(out) + prependIfNempty("|", unflushed()) + ", " + i + "/" + l(s); }
62  
    
63  
    S sentence() { ret joinNemptiesWithSpace(listPlus(asList(out), unflushed())); }
64  
  }
65  
  
66  
  GreedySplitIntoWordsCI_Multi root() {
67  
    ret new GreedySplitIntoWordsCI_Multi(input, splitAtSpace(words));
68  
  }
69  
  
70  
  visual northAndCenterWithMargins(
71  
    jvstackWithSpacing(
72  
      withLabel("Total combinations:", dm_label totalCombinations()),
73  
      withLabel("Preferred result:", dm_label firstResult())),
74  
    jDynamicEitherTree(root(),
75  
      valueToText := (IF1<Either<GreedySplitIntoWordsCI_Multi, LS>, S>) x
76  
        -> isEitherA(x) ? eitherAOpt(x).sentence() + "..." : joinWithSpace(eitherBOpt(x))));
77  
78  
  start-thread {
79  
    dm_reloadOnFieldChange('words, 'input);
80  
    time {
81  
      print("First result: " + setField(firstResult := joinWithSpace(getFirstResultOfEitherTree(root()!))));
82  
    }
83  
    
84  
    GreedySplitIntoWordsCI_Multi root = root();
85  
    // We should subtract combinationsAtPosition(i-l(word)) for each word because I think it is counted twice
86  
    setField(totalCombinations := combinationsForPositionalParser(
87  
      l(input), i -> listPlus(lambdaMap l(root.wordsAtPosition(i)), 1), debug := true);
88  
  }
89  
}

Author comment

Began life as a copy of #1028153

download  show line numbers  debug dex  old transpilations   

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

No comments. add comment

Snippet ID: #1028159
Snippet name: greedySplitIntoWordsCI with choices Spike, shortened [OK]
Eternal ID of this version: #1028159/12
Text MD5: 0b05f6aa10b1bab2de898d163f91fe32
Transpilation MD5: 973fe94cf68116abf1ed4e6a46e133b0
Author: stefan
Category: javax / stefan's os / nlp
Type: JavaX source code (Dynamic Module)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-05-26 14:12:21
Source code size: 3487 bytes / 89 lines
Pitched / IR pitched: No / No
Views / Downloads: 258 / 1190
Version history: 11 change(s)
Referenced in: [show references]