!include once #1027304 // Eclipse Collections import org.eclipse.collections.impl.map.mutable.primitive.*; // compressor that takes input byte by byte and compresses // as soon as possible. // byte mode+case-sensitive only // We now may have literals and pairs in any order final sclass LCStreamingCompressor { LineComp_PairIndex.Ch chain; abstract sclass Chunk { abstract S text(L chunks); IntPair getPair() { null; } } srecord CPair(int i1, int i2) > Chunk { CPair(IntPair p) { i1 = p.a; i2 = p.b; } S text(L chunks) { ret linesLL_rtrim(chunks.get(i1).text(chunks), chunks.get(i2).text(chunks)); } IntPair getPair() { ret IntPair(i1, i2); } } srecord CPrim(char c) > Chunk { S text(L chunks) { ret str(c); } } bool verbose = false, verboseCompressionSteps = false; new LineComp_PairIndex pairIndex; new L chunks; new Map literalIndex; new LongIntHashMap existingPairsIndex; // formerly Map //new Map> popularRightPartners; *() { // start with a single empty chain pairIndex.existingPairsIndex = existingPairsIndex; chain = pairIndex.newChain("main", null); pairIndex.haveChains(); //pairIndex.onAddingPair = ll(lambda1 registerPartners); //pairIndex.onRemovingPair = ll(lambda1 unregisterPartners); } /*void registerPartners(LineComp_PairIndex.Node node) { IntPair p = node.pair(); OptimizedMultiSet ms = popularRightPartners.get(p.a); if (ms == null) popularRightPartners.put(p.a, ms = new OptimizedMultiSet); ms.add(p.b); print("Added partners: " + p + " => " + popularRightPartners); } void unregisterPartners(LineComp_PairIndex.Node node) { IntPair p = node.pair(); OptimizedMultiSet ms = popularRightPartners.get(p.a); ms.remove(p.b); if (ms.isEmpty()) popularRightPartners.remove(p.a); print("Removed partners: " + p + " => " + popularRightPartners); }*/ void append(S s) { fOr (char c : characters(s)) append(c); } void append(char c) { pairIndex.verbose = verboseCompressionSteps; // add literal to chain Int idx = literalIndex.get(c); if (idx == null) literalIndex.put(c, idx = addAndReturnIndex(chunks, new CPrim(c))); pairIndex.firstChain.add(pairIndex, idx); while (compressionStep()) {} } S exportEncoding() { new LS buf; buf.add("STREAMING BYTECOMP"); // magic signature for (Chunk c : chunks) { if (c cast CPair) buf.add(c.i1 + " " + c.i2); else buf.add("prim " + ((CPrim) c).c); } buf.add("main=" + joinWithSpace(chain.toList())); ret lines_rtrim(buf); } bool compressionStep() { IntPair toCompress; bool change; // Compress only most popular pair in one step while ping ((toCompress = pairIndex.mostPopularDuplicate()) != null) { int count = pairIndex.getCount(toCompress); Int idx = makeCPair(toCompress); int dups = pairIndex.numberOfDuplicates(); if (verboseCompressionSteps) print("Compressing pair " + toCompress + " (count=" + count + ") -> " + idx + ", " + (dups-1) + " remaining"); pairIndex.replacePair(toCompress.a, toCompress.b, idx); set change; } ret change; } Int makeCPair(IntPair p) { long l = intPairToLong(p); S text = null; int idx = existingPairsIndex.getIfAbsent(l, -1); if (idx < 0) { idx = addAndReturnIndex(chunks, new CPair(p)); existingPairsIndex.put(l, idx); } ret idx; } S itemToString(int idx) { if (idx < 0) null; new StringBuilder buf; itemToString_step(buf, idx); ret str(buf); } void itemToString_step(StringBuilder buf, int idx) { Chunk c = chunks.get(idx); if (c cast CPair) { itemToString_step(buf, c.i1); itemToString_step(buf, c.i2); } else buf.append(((CPrim) c).c); } int nItems() { ret l(chunks); } IntPair getPair(int idx) { ret chunks.get(idx).getPair(); } S text() { new StringBuilder buf; for (int idx : chain.toList()) itemToString_step(buf, idx); ret str(buf); } int tailValue() { ret chain.tail == null ? -1 : chain.tail.value; } /*public LCStreamingCompressor clone() { new LCStreamingCompressor c; c.chunks = cloneList(chunks); c.pairIndex = pairIndex.clone(); c.literalIndex = cloneMap(literalIndex); c.existingPairsIndex = new LongIntHashMap(existingPairsIndex); ret c; }*/ }