!include once #1027304 // Eclipse Collections import org.eclipse.collections.impl.map.mutable.primitive.*; final sclass LineComp_PairIndex { bool verbose; bool inOrder = true; //MultiSetMap nodes; new LongObjectHashMap> nodes; MultiSetMap byCount = null; // init later //= multiSetMap_innerHashSet_outerRevTreeMap(); // makes a difference in selecting a pair if there are multiple with the same count // = multiSetMap_innerCompactHashSet_outerRevTreeMap(); // same but more compact //= multiSetMap_innerTreeSet_outerRevTreeMap(); // initial version that kind of pointlessly sorts the same-count nodes lexicographically new LinkedHashMap chains; int initialNodesSetCapacity = 1; void init { /*nodes = inOrder ? multiSetMap_innerLinkedHashSet_outerHashMap() : multiSetMap_innerHashSet_outerHashMap();*/ } // indicate end of chain-making phase void haveChains { if (byCount == null) { byCount = multiSetMap_innerCompactHashSet_outerRevTreeMap(); for (LongObjectPair p : nodes.keyValuesView()) byCount.put(p.getOne(), p.getTwo()); } } // a "chain" of nodes (one input file) sclass Ch { Node tail; *(LineComp_PairIndex pi, L l) { fOr (int i : l) add(pi, i); } L toList() { new L l; Node node = tail; while (node != null) { l.add(node.value); node = node.prev; } ret reverseInPlace(l); } void add(LineComp_PairIndex pi, int i) { new Node n; n.ch = this; n.value = i; n.prev = tail; if (tail != null) tail.next = n; tail = n; pi.addToIndex(n.prev); } } sclass Node { Ch ch; int value; Node prev, next; int a() { ret value; } int b() { ret next == null ? -1 : next.value; } IntPair pair() { ret next == null ? null : IntPair(a(), b()); } } IntPair nodeToPair(Node n) { ret n?.pair(); } // add node to pair index void addToIndex(Node n) { IntPair p = nodeToPair(n); if (p == null) ret; int count = nodes_getSize(p); nodes_add(p, n); if (byCount != null) { // not in init phase if (count != 0) byCount.remove(count, p); byCount.put(count+1, p); } } // remove node from pair index void removeFromIndex(Node n) { IntPair p = nodeToPair(n); if (p == null) ret; int count = nodes_getSize(p); if (count == 0) fail("Can't remove pair " + p); nodes_remove(p, n); byCount.remove(count, p); if (--count > 0) byCount.put(count, p); } IntPair mostPopularDuplicate() { ret toInt(firstKey(byCount)) < 2 ? null : firstValue(byCount); } int numberOfDuplicates() { ret byCount.size()-l(byCount.get(1)); } int getCount(IntPair p) { ret nodes_getSize(p); } void replacePair(int pa, int pb, int replaceWith) { IntPair p = IntPair(pa, pb); Set set = nodes.get(intPairToLong(p)); if (set != null) for (Node n : cloneList(set)) { continue if n.a() != pa || n.b() != pb; // nodes might have been deleted or changed replacePair(n, replaceWith); } } void replacePair(Node node, int replaceWith) { removeFromIndex(node.prev); removeFromIndex(node); removeFromIndex(node.next); node.value = replaceWith; deleteNode(node.next); addToIndex(node.prev); addToIndex(node); } void deleteNode(Node node) { if (node.next != null) node.next.prev = node.prev; else node.ch.tail = node.prev; if (node.prev != null) node.prev.next = node.next; node.value = -1; // mark invalid } void newChain(S version, L encoding) { chains.put(version, new Ch(this, encoding)); } int nodes_getSize(IntPair p) { ret l(nodes.get(intPairToLong(p))); } void nodes_add(IntPair p, Node n) { long l = intPairToLong(p); LinkedHashSet set = nodes.get(l); if (set == null) nodes.put(l, set = new LinkedHashSet(initialNodesSetCapacity); set.add(n); } void nodes_remove(IntPair p, Node n) { long l = intPairToLong(p); LinkedHashSet set = nodes.get(l); if (set == null) ret; set.remove(n); if (set.isEmpty()) nodes.remove(l); } // also shrinks sets... void printStats { print("Different pairs: " + l(nodes)); print("Different counts: " + l(byCount)); long count = 0, waste = 0, newWaste = 0; long count1 = 0, waste1 = 0; for (HashSet set : nodes.values()) { count1 += set.size(); int capacity = hashSetCapacity(set); waste1 += capacity-set.size(); //if (capacity > set.size()*3) set.shrink(); // TODO } for (CompactHashSet set : (Cl) (Cl) values(byCount.data)) { count += set.size(); waste += set.capacity()-set.size(); set.shrinkToFactor(0.5); newWaste += set.capacity()-set.size(); } print("Nodes set count: " + count1 + ". Waste: " + waste1); print("Set count: " + count + ". Waste: " + waste + (waste == newWaste ? "" : " => " + newWaste)); } }