Uses 11335K of libraries. Click here for Pure Java version (3961L/25K).
1 | // Gave nodes + cousin sets a counter to make their hashing deterministic. |
2 | |
3 | !include once #1027304 // Eclipse Collections |
4 | import org.eclipse.collections.impl.map.mutable.primitive.*; |
5 | import org.eclipse.collections.api.tuple.primitive.*; |
6 | |
7 | set flag NoModCount. // save some space. Make sure that this flag doesn't screw other collections/map you compile with this. |
8 | |
9 | final sclass LineComp_PairIndex {
|
10 | bool verbose; |
11 | bool inOrder = true; |
12 | //MultiSetMap<IntPair, Node> nodes; |
13 | new LongObjectHashMap<Cousins> nodes; |
14 | MultiSetMap<Int, Cousins> byCount; |
15 | new LinkedHashMap<S, Ch> chains; |
16 | Ch firstChain; |
17 | int objectCounter; // for making deterministic hash codes |
18 | int totalPairCount; |
19 | |
20 | static int initialNodesSetCapacity = 1; |
21 | |
22 | void init {} // not needed anymore
|
23 | |
24 | // Cousins is a replacement for LinkedHashSet<Node> |
25 | // - a set of all nodes forming a certain pair with their successor |
26 | // Nodes are linked through prevCousin/nextCousin |
27 | final sclass Cousins extends CompactHashSet<Node> {
|
28 | Node head, tail; |
29 | |
30 | int hashCode; |
31 | *(LineComp_PairIndex pi) {
|
32 | super(initialNodesSetCapacity); |
33 | hashCode = ++pi.objectCounter; |
34 | } |
35 | @Override public int hashCode() { ret hashCode; }
|
36 | |
37 | public bool add(Node n) {
|
38 | n.prevCousin = tail; |
39 | if (tail != null) // there already are entries |
40 | tail.nextCousin = n; |
41 | else |
42 | head = n; |
43 | tail = n; |
44 | ret super.add(n); |
45 | } |
46 | |
47 | public bool remove(Node node) {
|
48 | if (node.nextCousin != null) |
49 | node.nextCousin.prevCousin = node.prevCousin; |
50 | else |
51 | tail = node.prevCousin; |
52 | if (node.prevCousin != null) |
53 | node.prevCousin.nextCousin = node.nextCousin; |
54 | else |
55 | head = node.nextCousin; |
56 | node.prevCousin = node.nextCousin = null; |
57 | ret super.remove(node); |
58 | } |
59 | |
60 | L<Node> asList() {
|
61 | L<Node> l = emptyList(size()); |
62 | Node n = head; |
63 | while (n != null) {
|
64 | l.add(n); |
65 | n = n.nextCousin; |
66 | } |
67 | ret l; |
68 | } |
69 | } |
70 | |
71 | // indicate end of chain-making phase |
72 | void haveChains {
|
73 | if (byCount == null) {
|
74 | print("n=" + nodes.size());
|
75 | time "Making byCount" {
|
76 | byCount = multiSetMap_innerCompactHashSet_outerRevTreeMap(); |
77 | for (Cousins set : nodes.values()) |
78 | byCount.put(set.size(), set); |
79 | } |
80 | } |
81 | } |
82 | |
83 | // a "chain" of nodes (one input file) |
84 | sclass Ch {
|
85 | Node tail; |
86 | |
87 | *(LineComp_PairIndex pi, L<Int> l) {
|
88 | int count = 0; |
89 | fOr (int i : l) {
|
90 | add(pi, i); |
91 | if (((++count) % oneMillion()) == 0) |
92 | print("Added " + count + " entries to chain");
|
93 | } |
94 | } |
95 | |
96 | L<Int> toList() {
|
97 | new L<Int> l; |
98 | Node node = tail; |
99 | while (node != null) {
|
100 | l.add(node.value); |
101 | node = node.prev; |
102 | } |
103 | ret reverseInPlace(l); |
104 | } |
105 | |
106 | void add(LineComp_PairIndex pi, int i) {
|
107 | Node n = new(pi); |
108 | ifndef LineComp_SingleChain |
109 | n.ch = this; |
110 | endifndef |
111 | n.value = i; |
112 | n.prev = tail; |
113 | if (tail != null) tail.next = n; |
114 | tail = n; |
115 | pi.addToIndex(n.prev); |
116 | } |
117 | } |
118 | |
119 | sclass Node {
|
120 | ifndef LineComp_SingleChain |
121 | Ch ch; |
122 | endifndef |
123 | int value; |
124 | Node prev, next; |
125 | Node prevCousin, nextCousin; // prev & next with same pair value |
126 | |
127 | int hashCode; |
128 | *(LineComp_PairIndex pi) { hashCode = ++pi.objectCounter; }
|
129 | @Override public int hashCode() { ret hashCode; }
|
130 | |
131 | int a() { ret value; }
|
132 | int b() { ret next == null ? -1 : next.value; }
|
133 | IntPair pair() { ret next == null ? null : IntPair(a(), b()); }
|
134 | } |
135 | |
136 | IntPair nodeToPair(Node n) {
|
137 | ret n?.pair(); |
138 | } |
139 | |
140 | // add node to pair index |
141 | void addToIndex(Node n) {
|
142 | IntPair p = nodeToPair(n); |
143 | if (p == null) ret; |
144 | int count = nodes_getSize(p); |
145 | Cousins set = nodes_add(p, n); |
146 | if (byCount != null) { // not in init phase
|
147 | if (count != 0) byCount.remove(count, set); |
148 | byCount.put(count+1, set); |
149 | } |
150 | } |
151 | |
152 | // remove node from pair index |
153 | void removeFromIndex(Node n) {
|
154 | IntPair p = nodeToPair(n); |
155 | if (p == null) ret; |
156 | Cousins set = nodes.get(intPairToLong(p)); |
157 | int count = l(set); |
158 | if (count == 0) fail("Can't remove pair " + p);
|
159 | nodes_remove(p, n); |
160 | byCount.remove(count, set); |
161 | if (--count > 0) byCount.put(count, set); |
162 | } |
163 | |
164 | IntPair mostPopularDuplicate() {
|
165 | ret toInt(firstKey(byCount)) < 2 ? null : firstValue(byCount).head.pair(); |
166 | } |
167 | |
168 | int numberOfDuplicates() {
|
169 | ret byCount.size()-l(byCount.get(1)); |
170 | } |
171 | |
172 | int getCount(IntPair p) {
|
173 | ret nodes_getSize(p); |
174 | } |
175 | |
176 | void replacePair(int pa, int pb, int replaceWith) {
|
177 | IntPair p = IntPair(pa, pb); |
178 | Cousins set = nodes.get(intPairToLong(p)); |
179 | if (set != null) {
|
180 | // TODO: might not even have to make this cloned list |
181 | L<Node> l = set.asList(); |
182 | //for (Node n = cousins.head; n != null; n = n.nextCousin) {
|
183 | for (Node n : l) {
|
184 | continue if n.a() != pa || n.b() != pb; // nodes might have been deleted or changed |
185 | replacePair(n, replaceWith); |
186 | } |
187 | } |
188 | } |
189 | |
190 | void replacePair(Node node, int replaceWith) {
|
191 | removeFromIndex(node.prev); |
192 | removeFromIndex(node); |
193 | removeFromIndex(node.next); |
194 | node.value = replaceWith; |
195 | deleteNode(node.next); |
196 | addToIndex(node.prev); |
197 | addToIndex(node); |
198 | } |
199 | |
200 | void deleteNode(Node node) {
|
201 | if (node.next != null) node.next.prev = node.prev; else getChain(node).tail = node.prev; |
202 | if (node.prev != null) node.prev.next = node.next; |
203 | node.value = -1; // mark invalid |
204 | } |
205 | |
206 | void newChain(S version, L<Int> encoding) {
|
207 | Ch ch = new(this, encoding); |
208 | chains.put(version, ch); |
209 | ifdef LineComp_SingleChain |
210 | assertNull(firstChain); |
211 | firstChain = ch; |
212 | endifdef |
213 | } |
214 | |
215 | int nodes_getSize(IntPair p) {
|
216 | ret l(nodes.get(intPairToLong(p))); |
217 | } |
218 | |
219 | Cousins nodes_add(IntPair p, Node n) {
|
220 | ++totalPairCount; |
221 | long l = intPairToLong(p); |
222 | Cousins set = nodes.get(l); |
223 | if (set == null) |
224 | nodes.put(l, set = new Cousins(this)); |
225 | set.add(n); |
226 | ret set; |
227 | } |
228 | |
229 | void nodes_remove(IntPair p, Node n) {
|
230 | --totalPairCount; |
231 | long l = intPairToLong(p); |
232 | Cousins set = nodes.get(l); |
233 | if (set == null || !set.remove(n)) fail("Consistency error");
|
234 | if (set.isEmpty()) |
235 | nodes.remove(l); |
236 | } |
237 | |
238 | // also shrinks sets... |
239 | void printStats {
|
240 | //print("Different pairs: " + l(nodes));
|
241 | print("Different counts: " + byCount.keysSize() + " (highest count: " + firstKey(byCount) + ")");
|
242 | long count = 0, waste = 0, newWaste = 0; |
243 | long count1 = 0, waste1 = 0, newWaste1 = 0; |
244 | |
245 | for (Cousins set : nodes.values()) {
|
246 | count1 += set.size(); |
247 | int capacity = set.capacity(); |
248 | waste1 += capacity-set.size(); |
249 | set.shrinkToFactor(0.5); |
250 | newWaste1 += capacity-set.size(); |
251 | } |
252 | |
253 | for (CompactHashSet set : (Cl<CompactHashSet>) (Cl) values(byCount.data)) {
|
254 | count += set.size(); |
255 | waste += set.capacity()-set.size(); |
256 | set.shrinkToFactor(0.5); |
257 | newWaste += set.capacity()-set.size(); |
258 | } |
259 | |
260 | print("Chain length : " + count1 + ". Waste: " + waste1 + (waste1 == newWaste1 ? "" : " => " + newWaste1));
|
261 | print("Different pairs: " + count + ". Waste: " + waste + (waste == newWaste ? "" : " => " + newWaste));
|
262 | } |
263 | |
264 | Ch getChain(Node node) {
|
265 | ifdef LineComp_SingleChain |
266 | ret firstChain; |
267 | endifdef |
268 | ifndef LineComp_SingleChain |
269 | ret node.ch; |
270 | endifndef |
271 | } |
272 | } |
Began life as a copy of #1028247
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
| Snippet ID: | #1028521 |
| Snippet name: | LineComp_PairIndex [v2, finally replacing all LinkedHashSets in the critical path] |
| Eternal ID of this version: | #1028521/33 |
| Text MD5: | de8173bbb49cadb5d2b784747b12cbd5 |
| Transpilation MD5: | 96571637b156f958f5eda0b52cb9ad03 |
| Author: | stefan |
| Category: | javax |
| Type: | JavaX fragment (include) |
| Public (visible to everyone): | Yes |
| Archived (hidden from active list): | No |
| Created/modified: | 2020-07-04 06:44:37 |
| Source code size: | 7724 bytes / 272 lines |
| Pitched / IR pitched: | No / No |
| Views / Downloads: | 495 / 877 |
| Version history: | 32 change(s) |
| Referenced in: | [show references] |