Uses 11335K of libraries. Click here for Pure Java version (4367L/28K).
1 | !include once #1027304 // Eclipse Collections |
2 | |
3 | import org.eclipse.collections.impl.map.mutable.primitive.*; |
4 | |
5 | // compressor that takes input byte by byte and compresses |
6 | // as soon as possible. |
7 | // byte mode+case-sensitive only |
8 | // We now may have literals and pairs in any order |
9 | final sclass LCStreamingCompressor { |
10 | LineComp_PairIndex.Ch chain; |
11 | |
12 | abstract sclass Chunk { |
13 | abstract S text(L<Chunk> chunks); |
14 | IntPair getPair() { null; } |
15 | } |
16 | |
17 | srecord CPair(int i1, int i2) > Chunk { |
18 | CPair(IntPair p) { i1 = p.a; i2 = p.b; } |
19 | |
20 | S text(L<Chunk> chunks) { |
21 | ret linesLL_rtrim(chunks.get(i1).text(chunks), chunks.get(i2).text(chunks)); |
22 | } |
23 | |
24 | IntPair getPair() { |
25 | ret IntPair(i1, i2); |
26 | } |
27 | } |
28 | |
29 | srecord CPrim(char c) > Chunk { |
30 | S text(L<Chunk> chunks) { ret str(c); } |
31 | } |
32 | |
33 | bool verbose = false, verboseCompressionSteps = false; |
34 | |
35 | new LineComp_PairIndex pairIndex; |
36 | new L<Chunk> chunks; |
37 | new Map<Char, Int> literalIndex; |
38 | new LongIntHashMap existingPairsIndex; // formerly Map<IntPair, Int> |
39 | //new Map<Int, OptimizedMultiSet<Int>> popularRightPartners; |
40 | |
41 | *() { |
42 | // start with a single empty chain |
43 | pairIndex.existingPairsIndex = existingPairsIndex; |
44 | chain = pairIndex.newChain("main", null); |
45 | pairIndex.haveChains(); |
46 | //pairIndex.onAddingPair = ll(lambda1 registerPartners); |
47 | //pairIndex.onRemovingPair = ll(lambda1 unregisterPartners); |
48 | } |
49 | |
50 | /*void registerPartners(LineComp_PairIndex.Node node) { |
51 | IntPair p = node.pair(); |
52 | OptimizedMultiSet<Int> ms = popularRightPartners.get(p.a); |
53 | if (ms == null) popularRightPartners.put(p.a, ms = new OptimizedMultiSet); |
54 | ms.add(p.b); |
55 | print("Added partners: " + p + " => " + popularRightPartners); |
56 | } |
57 | |
58 | void unregisterPartners(LineComp_PairIndex.Node node) { |
59 | IntPair p = node.pair(); |
60 | OptimizedMultiSet<Int> ms = popularRightPartners.get(p.a); |
61 | ms.remove(p.b); |
62 | if (ms.isEmpty()) |
63 | popularRightPartners.remove(p.a); |
64 | print("Removed partners: " + p + " => " + popularRightPartners); |
65 | }*/ |
66 | |
67 | void append(S s) { |
68 | fOr (char c : characters(s)) |
69 | append(c); |
70 | } |
71 | |
72 | void append(char c) { |
73 | pairIndex.verbose = verboseCompressionSteps; |
74 | |
75 | // add literal to chain |
76 | |
77 | Int idx = literalIndex.get(c); |
78 | if (idx == null) |
79 | literalIndex.put(c, idx = addAndReturnIndex(chunks, new CPrim(c))); |
80 | |
81 | pairIndex.firstChain.add(pairIndex, idx); |
82 | |
83 | while (compressionStep()) {} |
84 | } |
85 | |
86 | S exportEncoding() { |
87 | new LS buf; |
88 | buf.add("STREAMING BYTECOMP"); // magic signature |
89 | for (Chunk c : chunks) { |
90 | if (c cast CPair) |
91 | buf.add(c.i1 + " " + c.i2); |
92 | else |
93 | buf.add("prim " + ((CPrim) c).c); |
94 | } |
95 | buf.add("main=" + joinWithSpace(chain.toList())); |
96 | ret lines_rtrim(buf); |
97 | } |
98 | |
99 | bool compressionStep() { |
100 | IntPair toCompress; |
101 | bool change; |
102 | |
103 | // Compress only most popular pair in one step |
104 | while ping ((toCompress = pairIndex.mostPopularDuplicate()) != null) { |
105 | int count = pairIndex.getCount(toCompress); |
106 | Int idx = makeCPair(toCompress); |
107 | int dups = pairIndex.numberOfDuplicates(); |
108 | if (verboseCompressionSteps) print("Compressing pair " + toCompress + " (count=" + count + ") -> " + idx + ", " + (dups-1) + " remaining"); |
109 | pairIndex.replacePair(toCompress.a, toCompress.b, idx); |
110 | set change; |
111 | } |
112 | ret change; |
113 | } |
114 | |
115 | Int makeCPair(IntPair p) { |
116 | long l = intPairToLong(p); |
117 | S text = null; |
118 | int idx = existingPairsIndex.getIfAbsent(l, -1); |
119 | if (idx < 0) { |
120 | idx = addAndReturnIndex(chunks, new CPair(p)); |
121 | existingPairsIndex.put(l, idx); |
122 | } |
123 | ret idx; |
124 | } |
125 | |
126 | S itemToString(int idx) { |
127 | if (idx < 0) null; |
128 | new StringBuilder buf; |
129 | itemToString_step(buf, idx); |
130 | ret str(buf); |
131 | } |
132 | |
133 | void itemToString_step(StringBuilder buf, int idx) { |
134 | Chunk c = chunks.get(idx); |
135 | if (c cast CPair) { |
136 | itemToString_step(buf, c.i1); |
137 | itemToString_step(buf, c.i2); |
138 | } else |
139 | buf.append(((CPrim) c).c); |
140 | } |
141 | |
142 | int nItems() { ret l(chunks); } |
143 | |
144 | IntPair getPair(int idx) { |
145 | ret chunks.get(idx).getPair(); |
146 | } |
147 | |
148 | S text() { |
149 | new StringBuilder buf; |
150 | for (int idx : chain.toList()) |
151 | itemToString_step(buf, idx); |
152 | ret str(buf); |
153 | } |
154 | |
155 | int tailValue() { |
156 | ret chain.tail == null ? -1 : chain.tail.value; |
157 | } |
158 | |
159 | /*public LCStreamingCompressor clone() { |
160 | new LCStreamingCompressor c; |
161 | c.chunks = cloneList(chunks); |
162 | c.pairIndex = pairIndex.clone(); |
163 | c.literalIndex = cloneMap(literalIndex); |
164 | c.existingPairsIndex = new LongIntHashMap(existingPairsIndex); |
165 | ret c; |
166 | }*/ |
167 | } |
Began life as a copy of #1028186
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1029107 |
Snippet name: | LCStreamingCompressor [seems to work] |
Eternal ID of this version: | #1029107/28 |
Text MD5: | a40b956b928baa83512aa1e63a5249d8 |
Transpilation MD5: | 70b52f455b5323c3b4e828785a79b763 |
Author: | stefan |
Category: | javax |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-07-20 02:12:09 |
Source code size: | 4776 bytes / 167 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 277 / 618 |
Version history: | 27 change(s) |
Referenced in: | [show references] |