!include once #1027304 // Eclipse Collections srecord noeq LCMerger(LineCompReader lc1, LineCompReader lc2) { new LineCompReader lcOut; new LongIntHashMap pairIndex; Map literalIndex; new IntIntHashMap lc2map; int newLiterals; // number of literals added compared to lc1 int newPairs; run { lcOut.byteMode = lc1.byteMode; printWithTime("Making pairIndex"); for (int i = 0; i < lc1.pairs.size(); i++) pairIndex.put(lc1.pairs.get(i), i); printWithTime("Making literalIndex"); literalIndex = indexList(lc1.literals); // make lcOut.literals & literalIndex printWithTime("Merging literals"); lcOut.literals = cloneList(lc1.literals); for (int i = 0; i < l(lc2.literals); i++) { S c = lc2.literals.get(i); Int iLit = literalIndex.get(c); if (iLit == null) { iLit = addAndReturnIndex(lcOut.literals, c); literalIndex.put(c, iLit); } lc2map.put(i, iLit); } newLiterals = l(lcOut.literals)-l(lc1.literals); // clone lc1.pairs into lcOut.pairs, add lc2.pairs printWithTime("Merging pairs"); new IntIntHashMap map2; lcOut.pairs = new LongBuffer(l(lc1.pairs)); for (long p : lc1.pairs.asVirtualList()) lcOut.pairs.add(twoIntsToLong(adjust1(firstIntFromLong(p)), adjust1(secondIntFromLong(p)))); for (int i = 0; i < l(lc2.pairs); i++) { long p = lc2.pairs.get(i); long pAdjusted = twoIntsToLong(adjust2(firstIntFromLong(p)), adjust2(secondIntFromLong(p))); int iPair = pairIndex.getIfAbsent(pAdjusted, -1); if (iPair < 0) { // new pair int iPair = l(lcOut.pairs); lcOut.pairs.add(pAdjusted); pairIndex.put(pAdjusted, iPair); } lc2map.put(l(lc2.literals)+i, l(lcOut.literals)+iPair); } newPairs = l(lcOut.pairs)-l(lc1.pairs); // copy files printWithTime("Merging files"); lcOut.versions = new LinkedHashMap; for (S name, L encoding : lc1.versions) { lcOut.versions.put(name, lmap adjust1(encoding)); } for (S name, L encoding : lc2.versions) { if (lcOut.versions.containsKey(name)) continue with print("Warning: Duplicate file name " + name); lcOut.versions.put(name, lmap adjust2(encoding)); } print("Synergy factor: " + doubleRatio(l(lcOut.pairs), l(lc1.pairs)+l(lc2.pairs))); } // convert symbols from lc1 to lcOut int adjust1(int i) { if (i >= l(lc1.literals)) if (i >= l(lc1.literals)+l(lc1.pairs)) ret i+newLiterals+newPairs; else ret i+newLiterals; ret i; } int adjust2(int i) { ret lc2map.getIfAbsent(i, i); } }