!include once #1027304 // Eclipse Collections import org.eclipse.collections.impl.map.mutable.primitive.*; // compressor that takes input byte by byte and compresses // as soon as possible. // byte mode+case-sensitive only // We now may have literals and pairs in any order final sclass LCStreamingCompressor { int safety = 0; abstract sclass Chunk { abstract S text(L chunks); IntPair getPair() { null; } } srecord CPair(int i1, int i2) > Chunk { CPair(IntPair p) { i1 = p.a; i2 = p.b; } S text(L chunks) { ret linesLL_rtrim(chunks.get(i1).text(chunks), chunks.get(i2).text(chunks)); } IntPair getPair() { ret IntPair(i1, i2); } } srecord CPrim(char c) > Chunk { S text(L chunks) { ret str(c); } } bool verbose = false, verboseCompressionSteps = false; new LineComp_PairIndex pairIndex; new L chunks; Map literalIndex; new LongIntHashMap existingPairsIndex; // formerly Map *() { // start with a single empty chain pairIndex.newChain("main", null); } void addChar(char c) { pairIndex.verbose = verboseCompressionSteps; // add literal to chain Int idx = literalIndex.get(c); if (idx == null) literalIndex.put(c, idx = addAndReturnIndex(chunks, new CPrim(c))); pairIndex.firstChain.add(pairIndex, idx); while (compressionStep()) {} } S exportEncoding() { new LS buf; buf.add("STREAMING BYTECOMP"); // magic signature for (Chunk c : chunks) { if (c cast CPair) buf.add(c.i1 + " " + c.i2); else buf.add("prim " + ((CPrim) c).c); } buf.add("main=" + joinWithSpace(firstChains.toList())); ret lines_rtrim(buf); } bool compressionStep() { IntPair toCompress; bool change; // Compress only most popular pair in one step while ping ((toCompress = pairIndex.mostPopularDuplicate()) != null) { int count = pairIndex.getCount(toCompress); Int idx = makeCPair(toCompress); int dups = pairIndex.numberOfDuplicates(); if (verboseCompressionSteps) print("Compressing pair " + toCompress + " (count=" + count + ") -> " + idx + ", " + (dups-1) + " remaining"); pairIndex.replacePair(toCompress.a, toCompress.b, idx); set change; } ret change; } Int makeCPair(IntPair p) { long l = intPairToLong(p); S text = null; int idx = existingPairsIndex.getIfAbsent(l, -1); if (idx < 0) { idx = addAndReturnIndex(chunks, new CPair(p)); existingPairsIndex.put(l, idx); } ret idx; } S itemToString(int idx) { new StringBuilder buf; itemToString_step(buf, idx); ret str(buf); } void itemToString_step(StringBuilder buf, int idx) { Chunk c = chunks.get(idx); if (c cast CPair) { itemToString_step(buf, c.i1); itemToString_step(buf, c.i2); } else { S s = ((CPrim) c).s; buf.append(s); } } int nLiterals() { ret primChunks; } int nItems() { ret l(chunks); } IntPair getPair(int idx) { ret chunks.get(idx).getPair(); } }