fsclass LineComp_PairIndex_v2 { bool verbose; new HashMap pairInfos; new LinkedHashMap chains; // buckets Bucket oneBucket = new Bucket(1), highest = oneBucket; // 1-bucket is lowest // duplicates counting didn't work for some reason... //int /*bucketCount = 1,*/duplicates; fclass PairInfo { IntPair pair; Bucket bucket; new LinkedHashSet nodes; *(IntPair *pair) {} public bool equals(O o) { if (o cast PairInfo) ret pair.equals(o.pair); false; } public int hashCode() { ret pair.hashCode(); } } // a bucket lists all pairs with the same number of duplicates // and it also knows its next-highest and next-lowest non-empty bucket fclass Bucket { Bucket higher, lower; int level; // number of duplicates of each IntPair in this bucket new LinkedHashSet pairs; *(int *level) {} toString { ret "Bucket " + level + appendBracketed(nPairs(pairs)); } Bucket oneHigher() { if (higher != null && higher.level == level+1) ret higher; Bucket b = new(level+1); //++bucketCount; b.lower = this; b.higher = higher; if (highest == this) highest = b; else higher.lower = b; ret higher = b; } Bucket oneLower() { if (level == 1) fail("Can't go lower than oneBucket"); if (lower.level == level-1) ret lower; //print("Making bucket " + (level-1) + " (lower: " + lower + ")"); if (lower.level < 0) fail("Invalid lower bucket"); Bucket b = new(level-1); //++bucketCount; b.higher = this; b.lower = lower; lower.higher = b; ret lower = b; } void add(PairInfo pair) { pairs.add(pair); pair.bucket = this; } void movePairOneHigher(PairInfo pair) { //if (level == 1) ++duplicates; // shortcut: just lift ourselves if we only contain one pair if (pairs.size() == 1 && higher != null && higher.level != level+1) { //print("shortcut higher"); ret with level++; } oneHigher().add(pair); remove(pair); } void remove(PairInfo pair) { pairs.remove(pair); if (empty(pairs) && this != oneBucket) dropMe(); } void dropMe { //print("Dropping " + this); if (level == 1) fail("Trying to drop oneBucket"); if (nempty(pairs)) fail("Bucket still has pairs"); if (higher != null) higher.lower = lower; else highest = lower; lower.higher = higher; level = -1; // mark invalid for safety //--bucketCount; } void movePairOneLower(PairInfo pair) { //if (verbose) print("Moving pair lower: " + pair); if (this == oneBucket) { remove(pair); forgetPair(pair); } else { //if (level == 2) --duplicates; // shortcut: just lower ourselves if we only contain one pair if (pairs.size() == 1 && lower.level != level-1) { //print("shortcut lower"); ret with level--; } oneLower().add(pair); remove(pair); } } } Bucket oneBucket() { ret oneBucket; } // a "chain" of nodes (one input file) fclass Ch { Node tail; *(L l) { fOr (int i : l) add(i); } L toList() { new L l; Node node = tail; while (node != null) { l.add(node.value); node = node.prev; } ret reverseInPlace(l); } void add(int i) { new Node n; n.ch = this; n.value = i; n.prev = tail; if (tail != null) tail.next = n; tail = n; addToIndex(n.prev); } } fclass Node { Ch ch; int value; Node prev, next; int a() { ret value; } int b() { ret next == null ? -1 : next.value; } IntPair pair() { ret next == null ? null : IntPair(a(), b()); } } IntPair nodeToPair(Node n) { ret n?.pair(); } // add node to pair index (add to lowest or move to higher bucket) void addToIndex(Node n) { IntPair p = nodeToPair(n), ret if null; PairInfo pi = getPairInfo(p); if (pi == null) { pi = new PairInfo(p); pairInfos.put(p, pi); oneBucket.add(pi); } else pi.bucket.movePairOneHigher(pi); pi.nodes.add(n); } // remove node from pair index (move to lower bucket or drop) void removeFromIndex(Node n) { IntPair p = nodeToPair(n), ret if null; PairInfo pi = getPairInfo(p); if (pi == null) fail("Can't remove pair " + p); pi.nodes.remove(n); pi.bucket.movePairOneLower(pi); } IntPair mostPopularDuplicate() { //if (verbose) print("Highest bucket: " + highest); if (highest == oneBucket) null; // no more duplicates PairInfo pi = first(highest.pairs); if (pi == null) fail("Bucket empty! " + highest); ret pi.pair; } int getCount(IntPair pair) { ret getPairInfo(pair).bucket.level; } int numberOfDuplicates() { //ret duplicates; ret l(pairInfos)-l(oneBucket.pairs); } void replacePair(int pa, int pb, int replaceWith) { IntPair p = IntPair(pa, pb); PairInfo pi = getPairInfo(p); //print("Have " + nNodes(pi.nodes)); for (Node n : cloneList(pi.nodes)) { //print("Processing node " + n.pair()); continue if n.a() != pa || n.b() != pb; // nodes might have been deleted or changed replacePair(n, replaceWith); } } void replacePair(Node node, int replaceWith) { removeFromIndex(node.prev); removeFromIndex(node); removeFromIndex(node.next); node.value = replaceWith; deleteNode(node.next); addToIndex(node.prev); addToIndex(node); } void deleteNode(Node node) { if (node.next != null) node.next.prev = node.prev; else node.ch.tail = node.prev; if (node.prev != null) node.prev.next = node.next; node.value = -1; // mark invalid } void newChain(S version, L encoding) { chains.put(version, new Ch(encoding)); } PairInfo getPairInfo(IntPair pair) { ret pairInfos.get(pair); } void forgetPair(PairInfo pair) { pairInfos.remove(pair.pair); pair.bucket = null; } void fullCheck { assertTrue(oneBucket.lower == null); assertTrue(oneBucket.level == 1); Bucket b = highest; while (b != oneBucket) { assertNempty(b.pairs); assertSame(b.lower.higher, b); assertEquals(b.lower.level, b.level-1); b = b.lower; } } }