Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

167
LINES

< > BotCompany Repo | #1029107 // LCStreamingCompressor [seems to work]

JavaX fragment (include) [tags: use-pretranspiled]

Uses 11335K of libraries. Click here for Pure Java version (4367L/28K).

!include once #1027304 // Eclipse Collections

import org.eclipse.collections.impl.map.mutable.primitive.*;

// compressor that takes input byte by byte and compresses
// as soon as possible.
// byte mode+case-sensitive only
// We now may have literals and pairs in any order
final sclass LCStreamingCompressor {
  LineComp_PairIndex.Ch chain;
  
  abstract sclass Chunk {
    abstract S text(L<Chunk> chunks);
    IntPair getPair() { null; }
  }
  
  srecord CPair(int i1, int i2) > Chunk {
    CPair(IntPair p) { i1 = p.a; i2 = p.b; }
    
    S text(L<Chunk> chunks) {
      ret linesLL_rtrim(chunks.get(i1).text(chunks), chunks.get(i2).text(chunks));
    }
    
    IntPair getPair() {
      ret IntPair(i1, i2);
    }
  }
  
  srecord CPrim(char c) > Chunk {
    S text(L<Chunk> chunks) { ret str(c); }
  }

  bool verbose = false, verboseCompressionSteps = false;

  new LineComp_PairIndex pairIndex;
  new L<Chunk> chunks;
  new Map<Char, Int> literalIndex;
  new LongIntHashMap existingPairsIndex; // formerly Map<IntPair, Int>
  //new Map<Int, OptimizedMultiSet<Int>> popularRightPartners;
  
  *() {
    // start with a single empty chain
    pairIndex.existingPairsIndex = existingPairsIndex;
    chain = pairIndex.newChain("main", null);
    pairIndex.haveChains();
    //pairIndex.onAddingPair = ll(lambda1 registerPartners);
    //pairIndex.onRemovingPair = ll(lambda1 unregisterPartners);
  }
  
  /*void registerPartners(LineComp_PairIndex.Node node) {
    IntPair p = node.pair();
    OptimizedMultiSet<Int> ms = popularRightPartners.get(p.a);
    if (ms == null) popularRightPartners.put(p.a, ms = new OptimizedMultiSet);
    ms.add(p.b);
    print("Added partners: " + p + " => " + popularRightPartners);
  }
  
  void unregisterPartners(LineComp_PairIndex.Node node) {
    IntPair p = node.pair();
    OptimizedMultiSet<Int> ms = popularRightPartners.get(p.a);
    ms.remove(p.b);
    if (ms.isEmpty())
      popularRightPartners.remove(p.a);
    print("Removed partners: " + p + " => " + popularRightPartners);
  }*/
  
  void append(S s) {
    fOr (char c : characters(s))
      append(c);
  }

  void append(char c) {
    pairIndex.verbose = verboseCompressionSteps;
    
    // add literal to chain
    
    Int idx = literalIndex.get(c);
    if (idx == null)
      literalIndex.put(c, idx = addAndReturnIndex(chunks, new CPrim(c)));
    
    pairIndex.firstChain.add(pairIndex, idx);

    while (compressionStep()) {}    
  }
  
  S exportEncoding() {
    new LS buf;
    buf.add("STREAMING BYTECOMP"); // magic signature
    for (Chunk c : chunks) {
      if (c cast CPair)
        buf.add(c.i1 + " " + c.i2);
      else
        buf.add("prim " + ((CPrim) c).c);
    }
    buf.add("main=" + joinWithSpace(chain.toList()));
    ret lines_rtrim(buf);
  }
  
  bool compressionStep() {
    IntPair toCompress;
    bool change;
    
    // Compress only most popular pair in one step
    while ping ((toCompress = pairIndex.mostPopularDuplicate()) != null) {
      int count = pairIndex.getCount(toCompress);
      Int idx = makeCPair(toCompress);
      int dups = pairIndex.numberOfDuplicates();
      if (verboseCompressionSteps) print("Compressing pair " + toCompress + " (count=" + count + ") -> " + idx + ", " + (dups-1) + " remaining");
      pairIndex.replacePair(toCompress.a, toCompress.b, idx);
      set change;
    }
    ret change;
  }
  
  Int makeCPair(IntPair p) {
    long l = intPairToLong(p);
    S text = null;
    int idx = existingPairsIndex.getIfAbsent(l, -1);
    if (idx < 0) {
      idx = addAndReturnIndex(chunks, new CPair(p));
      existingPairsIndex.put(l, idx);
    }
    ret idx;
  }
  
  S itemToString(int idx) {
    if (idx < 0) null;
    new StringBuilder buf;
    itemToString_step(buf, idx);
    ret str(buf);
  }
  
  void itemToString_step(StringBuilder buf, int idx) {
    Chunk c = chunks.get(idx);
    if (c cast CPair) {
      itemToString_step(buf, c.i1);
      itemToString_step(buf, c.i2);
    } else
      buf.append(((CPrim) c).c);
  }
  
  int nItems() { ret l(chunks); }
  
  IntPair getPair(int idx) {
    ret chunks.get(idx).getPair();
  }
  
  S text() {
    new StringBuilder buf;
    for (int idx : chain.toList())
      itemToString_step(buf, idx);
    ret str(buf);
  }
  
  int tailValue() {
    ret chain.tail == null ? -1 : chain.tail.value;
  }
  
  /*public LCStreamingCompressor clone() {
    new LCStreamingCompressor c;
    c.chunks = cloneList(chunks);
    c.pairIndex = pairIndex.clone();
    c.literalIndex = cloneMap(literalIndex);
    c.existingPairsIndex = new LongIntHashMap(existingPairsIndex);
    ret c;
  }*/
}

Author comment

Began life as a copy of #1028186

download  show line numbers  debug dex  old transpilations   

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

No comments. add comment

Snippet ID: #1029107
Snippet name: LCStreamingCompressor [seems to work]
Eternal ID of this version: #1029107/28
Text MD5: a40b956b928baa83512aa1e63a5249d8
Transpilation MD5: 70b52f455b5323c3b4e828785a79b763
Author: stefan
Category: javax
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-07-20 02:12:09
Source code size: 4776 bytes / 167 lines
Pitched / IR pitched: No / No
Views / Downloads: 278 / 620
Version history: 27 change(s)
Referenced in: #1034167 - Standard Classes + Interfaces (LIVE, continuation of #1003674)