Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

99
LINES

< > BotCompany Repo | #1028153 // greedySplitIntoWordsCI with choices Spike [OK]

JavaX source code (Dynamic Module) [tags: use-pretranspiled] - run with: Stefan's OS

Uses 911K of libraries. Click here for Pure Java version (5497L/27K).

cmodule AModule {
  switchable S input = "STATICALAFLATTENLISTOFPAIRSLPAIRALLAOUTEMPTYLISTLLFORPAIRAPUNNULLLOUTADDPAOUTADDPBRETOUT";
  switchable S words = "ret ll ls static out list pair pairs flatten null of if else while for";
  transient BigInt totalCombinations;

  // basically immutable (we make a new copy for every step)
  // to allow evaluating states in parallel and in any order
  class GreedySplitIntoWordsCI_Multi implements IF0<Either<Iterable<GreedySplitIntoWordsCI_Multi>, LS>> {
    S s; // the input to be split
    TreeSet<S> wordsSet;
    Map<Int> longestMatchMap;
    int i = 0, last = 0;
    ReverseChain<S> out;
    
    *() {}
    *(S *s, Cl<S> words) {
      wordsSet = asCISet(words);
      longestMatchMap = new AutoMap<Int>(i -> dontPrint("longest match at " + i + ": ", lengthOfLongestPrefixInCISet(substring(s, i), wordsSet)));
    }
    
    Cl<S> wordsAtPosition(int i) {
      int longestMatch = longestMatchMap.get(i);
      ret mapNonNulls(countBackwardsTo1(longestMatch), matchLength -> {
        S word = substring(s, i, i+matchLength);
        ret contains(wordsSet, word) ? word : null;
      });
    }
      
    // either we return some choices or a final result
    Either<Iterable<GreedySplitIntoWordsCI_Multi>, LS> get() {
      if (i >= l(s)) ret done(); // done with input
      ret eitherA(listPlus(map(wordsAtPosition(i), wordMatched -> {
        GreedySplitIntoWordsCI_Multi clone = shallowClone(this, new GreedySplitIntoWordsCI_Multi);
        clone.flush();
        clone.i = clone.last = i+l(wordMatched);
        clone.out = revChainPlus(clone.out, substring(s, i, clone.i));
        ret clone;
      }), getVar(() -> {
        GreedySplitIntoWordsCI_Multi clone = shallowClone(this, new GreedySplitIntoWordsCI_Multi);
        clone.i++;
        ret clone;
      })));
    }

    Either<Iterable<GreedySplitIntoWordsCI_Multi>, LS> done() {
      flush();
      ret eitherB(asList(out));
    }

    void flush {
      if (i > last) {
        // modifying this object in spite of convention
        out = revChainPlus(out, substring(s, last, i));
        last = i;
      }
    }
    
    toString { ret asList(out) + prependIfNempty("|", substring(s, last, i)) + ", " + i + "/" + l(s); }
  }
  
  GreedySplitIntoWordsCI_Multi root() {
    ret new GreedySplitIntoWordsCI_Multi(input, splitAtSpace(words));
  }
  
  /*visual jDynamicTree(
    root()!,
    x -> getVars(eitherAOpt(x)));*/
  visual northAndCenterWithMargins(
    withLabel("Total combinations:", dm_label totalCombinations()),
    jDynamicTree(
      (Either<GreedySplitIntoWordsCI_Multi, LS>) (Either) eitherA(root()),
      x -> {
        if (!isEitherA(x)) null; // leaves have no children
        Either<Iterable<GreedySplitIntoWordsCI_Multi>, LS> result = eitherAOpt(x)!;
        ret (Cl<Either<GreedySplitIntoWordsCI_Multi, LS>>)
          (isEitherA(result) ? map eitherA(eitherGetA(result)) : ll(eitherBOpt(result)));
      }));

  start-thread {
    dm_reloadOnFieldChange('words, 'input);
    time {
      print("First result: " + getFirstResultOfEitherTree(root()!));
    }
    
    GreedySplitIntoWordsCI_Multi root = root();
    Map<Int, BigInt> combinationsFromPosition = autoTreeMap(() -> bigint(1));
    for (int i = l(input)-1; i >= 0; i--) {
      // skipping a character is one combination
      BigInt combinations = combinationsFromPosition.get(i+1);
      
      for (S word : root.wordsAtPosition(i))
        combinations = plus(combinations, combinationsFromPosition.get(i+l(word)));

      print("Combinations from " + i + ": " + combinations);
      combinationsFromPosition.put(i, combinations);
    }
    setField(totalCombinations := combinationsFromPosition.get(0));
  }
}

Author comment

Began life as a copy of #1028147

download  show line numbers  debug dex  old transpilations   

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

No comments. add comment

Snippet ID: #1028153
Snippet name: greedySplitIntoWordsCI with choices Spike [OK]
Eternal ID of this version: #1028153/62
Text MD5: 160495a7c2306f4999b11c85234145ec
Transpilation MD5: 0993c11201e253c3d5478b6826ae6e3c
Author: stefan
Category: javax / stefan's os / nlp
Type: JavaX source code (Dynamic Module)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-05-23 18:34:47
Source code size: 3815 bytes / 99 lines
Pitched / IR pitched: No / No
Views / Downloads: 294 / 1404
Version history: 61 change(s)
Referenced in: [show references]