!include once #1027304 // Eclipse Collections srecord noeq LCMerger(LineCompReader lc1, LineCompReader lc2) { LineCompReader lcOut; // actually the same as lc1 now new LongIntHashMap pairIndex; Map literalIndex; int newLiterals; // number of literals added compared to lc1 int newPairs; int nOriginalLiterals, nOriginalPairs; run { lcOut = lc1; nOriginalLiterals = l(lc1.literals); nOriginalPairs = l(lc1.pairs); printWithTime("Making pairIndex"); for (int i = 0; i < lc1.pairs.size(); i++) pairIndex.put(lc1.pairs.get(i), i); printWithTime("Making literalIndex"); literalIndex = indexList(lc1.literals); // add lc2.literals to lc1.literals printWithTime("Merging literals"); for (int i = 0; i < l(lc2.literals); i++) { S c = lc2.literals.get(i); Int iLit = literalIndex.get(c); if (iLit == null) { iLit = addAndReturnIndex(lc1.literals, c); ++newLiterals; literalIndex.put(c, iLit); } } // merge pairs printWithTime("Merging pairs"); for i to nOriginalPairs: { long p = lc1.pairs.get(i); lc1.pairs.set(i, twoIntsToLong(adjust1(firstIntFromLong(p)), adjust1(secondIntFromLong(p)))); } for (int i = 0; i < l(lc2.pairs); i++) { long p = lc2.pairs.get(i); long pAdjusted = twoIntsToLong(adjust2(firstIntFromLong(p)), adjust2(secondIntFromLong(p))); int iPair = pairIndex.getIfAbsent(pAdjusted, -1); if (iPair < 0) { // new pair iPair = l(lc1.pairs); lc1.pairs.add(pAdjusted); ++newPairs; pairIndex.put(pAdjusted, iPair); } } // copy files printWithTime("Merging files"); lc1.versions = (LinkedHashMap) mapValues(lc1.versions, enc -> lmap adjust1(enc)); for (S name, L encoding : lc2.versions) { if (lc1.versions.containsKey(name)) continue with print("Warning: Duplicate file name " + name); lc1.versions.put(name, lmap adjust2(encoding)); } printVars_str(+newLiterals, +newPairs); print("Synergy factor: " + doubleRatio(l(lc1.pairs), nOriginalPairs+l(lc2.pairs))); } // convert symbols from lc1 int adjust1(int i) { if (i >= nOriginalLiterals) if (i >= nOriginalLiterals+nOriginalPairs) ret i+newLiterals+newPairs; else ret i+newLiterals; ret i; } int adjust2(int i) { if (i < l(lc2.literals)) ret literalIndex.get(lc2.literals.get(i)); else { long p = lc2.pairs.get(i-l(lc2.literals)); int idx = pairIndex.getIfAbsent(p, -1); if (idx < 0) fail("adjust2 failed: " + i + " / " + longToIntPair(p)); ret idx; } } }