Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

167
LINES

< > BotCompany Repo | #1029107 // LCStreamingCompressor [seems to work]

JavaX fragment (include) [tags: use-pretranspiled]

Uses 11335K of libraries. Click here for Pure Java version (4367L/28K).

1  
!include once #1027304 // Eclipse Collections
2  
3  
import org.eclipse.collections.impl.map.mutable.primitive.*;
4  
5  
// compressor that takes input byte by byte and compresses
6  
// as soon as possible.
7  
// byte mode+case-sensitive only
8  
// We now may have literals and pairs in any order
9  
final sclass LCStreamingCompressor {
10  
  LineComp_PairIndex.Ch chain;
11  
  
12  
  abstract sclass Chunk {
13  
    abstract S text(L<Chunk> chunks);
14  
    IntPair getPair() { null; }
15  
  }
16  
  
17  
  srecord CPair(int i1, int i2) > Chunk {
18  
    CPair(IntPair p) { i1 = p.a; i2 = p.b; }
19  
    
20  
    S text(L<Chunk> chunks) {
21  
      ret linesLL_rtrim(chunks.get(i1).text(chunks), chunks.get(i2).text(chunks));
22  
    }
23  
    
24  
    IntPair getPair() {
25  
      ret IntPair(i1, i2);
26  
    }
27  
  }
28  
  
29  
  srecord CPrim(char c) > Chunk {
30  
    S text(L<Chunk> chunks) { ret str(c); }
31  
  }
32  
33  
  bool verbose = false, verboseCompressionSteps = false;
34  
35  
  new LineComp_PairIndex pairIndex;
36  
  new L<Chunk> chunks;
37  
  new Map<Char, Int> literalIndex;
38  
  new LongIntHashMap existingPairsIndex; // formerly Map<IntPair, Int>
39  
  //new Map<Int, OptimizedMultiSet<Int>> popularRightPartners;
40  
  
41  
  *() {
42  
    // start with a single empty chain
43  
    pairIndex.existingPairsIndex = existingPairsIndex;
44  
    chain = pairIndex.newChain("main", null);
45  
    pairIndex.haveChains();
46  
    //pairIndex.onAddingPair = ll(lambda1 registerPartners);
47  
    //pairIndex.onRemovingPair = ll(lambda1 unregisterPartners);
48  
  }
49  
  
50  
  /*void registerPartners(LineComp_PairIndex.Node node) {
51  
    IntPair p = node.pair();
52  
    OptimizedMultiSet<Int> ms = popularRightPartners.get(p.a);
53  
    if (ms == null) popularRightPartners.put(p.a, ms = new OptimizedMultiSet);
54  
    ms.add(p.b);
55  
    print("Added partners: " + p + " => " + popularRightPartners);
56  
  }
57  
  
58  
  void unregisterPartners(LineComp_PairIndex.Node node) {
59  
    IntPair p = node.pair();
60  
    OptimizedMultiSet<Int> ms = popularRightPartners.get(p.a);
61  
    ms.remove(p.b);
62  
    if (ms.isEmpty())
63  
      popularRightPartners.remove(p.a);
64  
    print("Removed partners: " + p + " => " + popularRightPartners);
65  
  }*/
66  
  
67  
  void append(S s) {
68  
    fOr (char c : characters(s))
69  
      append(c);
70  
  }
71  
72  
  void append(char c) {
73  
    pairIndex.verbose = verboseCompressionSteps;
74  
    
75  
    // add literal to chain
76  
    
77  
    Int idx = literalIndex.get(c);
78  
    if (idx == null)
79  
      literalIndex.put(c, idx = addAndReturnIndex(chunks, new CPrim(c)));
80  
    
81  
    pairIndex.firstChain.add(pairIndex, idx);
82  
83  
    while (compressionStep()) {}    
84  
  }
85  
  
86  
  S exportEncoding() {
87  
    new LS buf;
88  
    buf.add("STREAMING BYTECOMP"); // magic signature
89  
    for (Chunk c : chunks) {
90  
      if (c cast CPair)
91  
        buf.add(c.i1 + " " + c.i2);
92  
      else
93  
        buf.add("prim " + ((CPrim) c).c);
94  
    }
95  
    buf.add("main=" + joinWithSpace(chain.toList()));
96  
    ret lines_rtrim(buf);
97  
  }
98  
  
99  
  bool compressionStep() {
100  
    IntPair toCompress;
101  
    bool change;
102  
    
103  
    // Compress only most popular pair in one step
104  
    while ping ((toCompress = pairIndex.mostPopularDuplicate()) != null) {
105  
      int count = pairIndex.getCount(toCompress);
106  
      Int idx = makeCPair(toCompress);
107  
      int dups = pairIndex.numberOfDuplicates();
108  
      if (verboseCompressionSteps) print("Compressing pair " + toCompress + " (count=" + count + ") -> " + idx + ", " + (dups-1) + " remaining");
109  
      pairIndex.replacePair(toCompress.a, toCompress.b, idx);
110  
      set change;
111  
    }
112  
    ret change;
113  
  }
114  
  
115  
  Int makeCPair(IntPair p) {
116  
    long l = intPairToLong(p);
117  
    S text = null;
118  
    int idx = existingPairsIndex.getIfAbsent(l, -1);
119  
    if (idx < 0) {
120  
      idx = addAndReturnIndex(chunks, new CPair(p));
121  
      existingPairsIndex.put(l, idx);
122  
    }
123  
    ret idx;
124  
  }
125  
  
126  
  S itemToString(int idx) {
127  
    if (idx < 0) null;
128  
    new StringBuilder buf;
129  
    itemToString_step(buf, idx);
130  
    ret str(buf);
131  
  }
132  
  
133  
  void itemToString_step(StringBuilder buf, int idx) {
134  
    Chunk c = chunks.get(idx);
135  
    if (c cast CPair) {
136  
      itemToString_step(buf, c.i1);
137  
      itemToString_step(buf, c.i2);
138  
    } else
139  
      buf.append(((CPrim) c).c);
140  
  }
141  
  
142  
  int nItems() { ret l(chunks); }
143  
  
144  
  IntPair getPair(int idx) {
145  
    ret chunks.get(idx).getPair();
146  
  }
147  
  
148  
  S text() {
149  
    new StringBuilder buf;
150  
    for (int idx : chain.toList())
151  
      itemToString_step(buf, idx);
152  
    ret str(buf);
153  
  }
154  
  
155  
  int tailValue() {
156  
    ret chain.tail == null ? -1 : chain.tail.value;
157  
  }
158  
  
159  
  /*public LCStreamingCompressor clone() {
160  
    new LCStreamingCompressor c;
161  
    c.chunks = cloneList(chunks);
162  
    c.pairIndex = pairIndex.clone();
163  
    c.literalIndex = cloneMap(literalIndex);
164  
    c.existingPairsIndex = new LongIntHashMap(existingPairsIndex);
165  
    ret c;
166  
  }*/
167  
}

Author comment

Began life as a copy of #1028186

download  show line numbers  debug dex  old transpilations   

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

No comments. add comment

Snippet ID: #1029107
Snippet name: LCStreamingCompressor [seems to work]
Eternal ID of this version: #1029107/28
Text MD5: a40b956b928baa83512aa1e63a5249d8
Transpilation MD5: 70b52f455b5323c3b4e828785a79b763
Author: stefan
Category: javax
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-07-20 02:12:09
Source code size: 4776 bytes / 167 lines
Pitched / IR pitched: No / No
Views / Downloads: 277 / 618
Version history: 27 change(s)
Referenced in: [show references]