sclass LineCompReader { new LS literals; IVF1 onPair; // if not null, don't save pairs, but stream them to onPair instead int[] literalOffsets; // where they start in file new LongBuffer pairs; new LinkedHashMap> versions; bool byteMode; // internal, optional CountingInputStream countingInputStream; long fileSize; *() {} // takes text or gzipped input file *(File f) { load(f); } *(InputStream in) { load(rawByteReader(in, 128*1024)); } *(BufferedReader reader) { load(reader); } void load(File f) { fileSize = fileSize(f); countingInputStream = new CountingInputStream(bufferedFileInputStream(f)); temp BufferedReader reader = isGZipFile(f) ? rawByteReader(gzipInputStream(countingInputStream)) : rawByteReader(countingInputStream); //rawByteReader_possiblyGZipped(f); load(reader); } void load(BufferedReader reader) ctex { new StringBuilder lineBuf; S s = readLineIgnoreCR(reader, lineBuf); int ofs = l(s)+1; new Matches m; if (startsWith(s, "BYTECOMP ", m)) set byteMode; else if (!startsWith(s, "LINECOMP ", m)) fail("Not a LINECOMP file"); int nLiterals = parseInt(m.rest()); new IntBuffer offsets; for i to nLiterals: { S line = readLineIgnoreCR(reader, lineBuf); assertNotNull(line); literals.add(byteMode ? str(charFromHex(line)) : line); offsets.add(ofs); ofs += l(line)+1; } offsets.add(ofs); literalOffsets = offsets.toArray(); int n = 0; while licensed { s = readLineIgnoreCR(reader, lineBuf); if (s == null || contains(s, "=")) break; try { int iSpace = s.indexOf(' '); long pair = twoIntsToLong( Int.parseInt(s, 0, iSpace, 10), Int.parseInt(s, iSpace+1, l(s), 10)); if (onPair != null) onPair.get(pair); else pairs.add(pair); if (((++n) % oneMillion()) == 0) { S percentage = ""; if (fileSize != 0 && countingInputStream != null) percentage = " (" + intPercentRatio(countingInputStream.getFilePointer(), fileSize) + "%)"; print(nPairs(n) + " read" + percentage); } } on fail { print("On line " + (nLiterals + l(pairs))); } } pairs.trimToSize(); while (contains(s, "=")) { int i = indexOf(s, '='); versions.put(takeFirst(s, i), compactIntList(parseInts(splitAtSpace(substring(s, i+1))))); s = readLineIgnoreCR(reader, lineBuf); } } Set versions() { ret keys(versions); } S getText(S version) { ret textForVersion(version); } S textForVersion(S version) { L encoded = versions.get(version); if (encoded == null) null; new LS buf; for (int idx : encoded) decode(idx, buf); ret myFromLines(buf); } // name of first (or only) file S firstFile() { ret first(versions()); } // text for first (or only) file S text() { ret getText(firstFile()); } L encoding() { ret versions.get(firstFile()); } S myFromLines(LS l) { ret byteMode ? join(l) : fromLines_rtrim(l); } void decode(int idx, LS buf) { if (idx < l(literals)) buf.add(literals.get(idx)); else { long p = pairs.get(idx-l(literals)); decode(firstIntFromLong(p), buf); decode(secondIntFromLong(p), buf); } } // That was it! The rest of this file is just for calculating some stats. new Map lineCountsForPairs; new Map byteCountsForPairs; int lineCountForPointer(int idx) { ret idx < l(literals) ? 1 : lineCountForPair(idx); } long byteCountForPointer(int idx) { ret idx < l(literals) ? l(literals.get(idx))+1 : byteCountForPair(idx); } int lineCountForPair(int idx) { Int c = lineCountsForPairs.get(idx); if (c == null) { long p = pairs.get(idx-l(literals)); c = lineCountForPointer(firstIntFromLong(p)) + lineCountForPointer(secondIntFromLong(p)); lineCountsForPairs.put(idx, c); } ret c; } long byteCountForPair(int idx) { Long c = byteCountsForPairs.get(idx); if (c == null) { long p = pairs.get(idx-l(literals)); c = byteCountForPointer(firstIntFromLong(p)) + byteCountForPointer(secondIntFromLong(p)); byteCountsForPairs.put(idx, c); } ret c; } int lineCountForVersion(S version) { L encoded = versions.get(version); if (encoded == null) ret 0; int n = 0; for (int i : encoded) n += lineCountForPointer(i); ret n; } long byteCountForVersion(S version) { L encoded = versions.get(version); if (encoded == null) ret 0; long n = 0; for (int i : encoded) n += byteCountForPointer(i); ret max(0, n-1); } long totalByteCount() { ret longSum(lambdaMap byteCountForVersion(versions())); } // now we can also save again void save(PrintWriter out) { out.println((byteMode ? "BYTECOMP " : "LINECOMP ") + l(literals)); for (S s : literals) out.println(byteMode ? charToHex(first(s)) : s); for (long p : pairs) out.println(firstIntFromLong(p) + " " + secondIntFromLong(p)); for (S id, L l : versions) out.println(id + "=" + joinWithSpace(l)); } }