Uses 11335K of libraries. Click here for Pure Java version (3961L/25K).
1 | // Gave nodes + cousin sets a counter to make their hashing deterministic. |
2 | |
3 | !include once #1027304 // Eclipse Collections |
4 | import org.eclipse.collections.impl.map.mutable.primitive.*; |
5 | import org.eclipse.collections.api.tuple.primitive.*; |
6 | |
7 | set flag NoModCount. // save some space. Make sure that this flag doesn't screw other collections/map you compile with this. |
8 | |
9 | final sclass LineComp_PairIndex { |
10 | bool verbose; |
11 | bool inOrder = true; |
12 | //MultiSetMap<IntPair, Node> nodes; |
13 | new LongObjectHashMap<Cousins> nodes; |
14 | MultiSetMap<Int, Cousins> byCount; |
15 | new LinkedHashMap<S, Ch> chains; |
16 | Ch firstChain; |
17 | int objectCounter; // for making deterministic hash codes |
18 | int totalPairCount; |
19 | |
20 | static int initialNodesSetCapacity = 1; |
21 | |
22 | void init {} // not needed anymore |
23 | |
24 | // Cousins is a replacement for LinkedHashSet<Node> |
25 | // - a set of all nodes forming a certain pair with their successor |
26 | // Nodes are linked through prevCousin/nextCousin |
27 | final sclass Cousins extends CompactHashSet<Node> { |
28 | Node head, tail; |
29 | |
30 | int hashCode; |
31 | *(LineComp_PairIndex pi) { |
32 | super(initialNodesSetCapacity); |
33 | hashCode = ++pi.objectCounter; |
34 | } |
35 | @Override public int hashCode() { ret hashCode; } |
36 | |
37 | public bool add(Node n) { |
38 | n.prevCousin = tail; |
39 | if (tail != null) // there already are entries |
40 | tail.nextCousin = n; |
41 | else |
42 | head = n; |
43 | tail = n; |
44 | ret super.add(n); |
45 | } |
46 | |
47 | public bool remove(Node node) { |
48 | if (node.nextCousin != null) |
49 | node.nextCousin.prevCousin = node.prevCousin; |
50 | else |
51 | tail = node.prevCousin; |
52 | if (node.prevCousin != null) |
53 | node.prevCousin.nextCousin = node.nextCousin; |
54 | else |
55 | head = node.nextCousin; |
56 | node.prevCousin = node.nextCousin = null; |
57 | ret super.remove(node); |
58 | } |
59 | |
60 | L<Node> asList() { |
61 | L<Node> l = emptyList(size()); |
62 | Node n = head; |
63 | while (n != null) { |
64 | l.add(n); |
65 | n = n.nextCousin; |
66 | } |
67 | ret l; |
68 | } |
69 | } |
70 | |
71 | // indicate end of chain-making phase |
72 | void haveChains { |
73 | if (byCount == null) { |
74 | print("n=" + nodes.size()); |
75 | time "Making byCount" { |
76 | byCount = multiSetMap_innerCompactHashSet_outerRevTreeMap(); |
77 | for (Cousins set : nodes.values()) |
78 | byCount.put(set.size(), set); |
79 | } |
80 | } |
81 | } |
82 | |
83 | // a "chain" of nodes (one input file) |
84 | sclass Ch { |
85 | Node tail; |
86 | |
87 | *(LineComp_PairIndex pi, L<Int> l) { |
88 | int count = 0; |
89 | fOr (int i : l) { |
90 | add(pi, i); |
91 | if (((++count) % oneMillion()) == 0) |
92 | print("Added " + count + " entries to chain"); |
93 | } |
94 | } |
95 | |
96 | L<Int> toList() { |
97 | new L<Int> l; |
98 | Node node = tail; |
99 | while (node != null) { |
100 | l.add(node.value); |
101 | node = node.prev; |
102 | } |
103 | ret reverseInPlace(l); |
104 | } |
105 | |
106 | void add(LineComp_PairIndex pi, int i) { |
107 | Node n = new(pi); |
108 | ifndef LineComp_SingleChain |
109 | n.ch = this; |
110 | endifndef |
111 | n.value = i; |
112 | n.prev = tail; |
113 | if (tail != null) tail.next = n; |
114 | tail = n; |
115 | pi.addToIndex(n.prev); |
116 | } |
117 | } |
118 | |
119 | sclass Node { |
120 | ifndef LineComp_SingleChain |
121 | Ch ch; |
122 | endifndef |
123 | int value; |
124 | Node prev, next; |
125 | Node prevCousin, nextCousin; // prev & next with same pair value |
126 | |
127 | int hashCode; |
128 | *(LineComp_PairIndex pi) { hashCode = ++pi.objectCounter; } |
129 | @Override public int hashCode() { ret hashCode; } |
130 | |
131 | int a() { ret value; } |
132 | int b() { ret next == null ? -1 : next.value; } |
133 | IntPair pair() { ret next == null ? null : IntPair(a(), b()); } |
134 | } |
135 | |
136 | IntPair nodeToPair(Node n) { |
137 | ret n?.pair(); |
138 | } |
139 | |
140 | // add node to pair index |
141 | void addToIndex(Node n) { |
142 | IntPair p = nodeToPair(n); |
143 | if (p == null) ret; |
144 | int count = nodes_getSize(p); |
145 | Cousins set = nodes_add(p, n); |
146 | if (byCount != null) { // not in init phase |
147 | if (count != 0) byCount.remove(count, set); |
148 | byCount.put(count+1, set); |
149 | } |
150 | } |
151 | |
152 | // remove node from pair index |
153 | void removeFromIndex(Node n) { |
154 | IntPair p = nodeToPair(n); |
155 | if (p == null) ret; |
156 | Cousins set = nodes.get(intPairToLong(p)); |
157 | int count = l(set); |
158 | if (count == 0) fail("Can't remove pair " + p); |
159 | nodes_remove(p, n); |
160 | byCount.remove(count, set); |
161 | if (--count > 0) byCount.put(count, set); |
162 | } |
163 | |
164 | IntPair mostPopularDuplicate() { |
165 | ret toInt(firstKey(byCount)) < 2 ? null : firstValue(byCount).head.pair(); |
166 | } |
167 | |
168 | int numberOfDuplicates() { |
169 | ret byCount.size()-l(byCount.get(1)); |
170 | } |
171 | |
172 | int getCount(IntPair p) { |
173 | ret nodes_getSize(p); |
174 | } |
175 | |
176 | void replacePair(int pa, int pb, int replaceWith) { |
177 | IntPair p = IntPair(pa, pb); |
178 | Cousins set = nodes.get(intPairToLong(p)); |
179 | if (set != null) { |
180 | // TODO: might not even have to make this cloned list |
181 | L<Node> l = set.asList(); |
182 | //for (Node n = cousins.head; n != null; n = n.nextCousin) { |
183 | for (Node n : l) { |
184 | continue if n.a() != pa || n.b() != pb; // nodes might have been deleted or changed |
185 | replacePair(n, replaceWith); |
186 | } |
187 | } |
188 | } |
189 | |
190 | void replacePair(Node node, int replaceWith) { |
191 | removeFromIndex(node.prev); |
192 | removeFromIndex(node); |
193 | removeFromIndex(node.next); |
194 | node.value = replaceWith; |
195 | deleteNode(node.next); |
196 | addToIndex(node.prev); |
197 | addToIndex(node); |
198 | } |
199 | |
200 | void deleteNode(Node node) { |
201 | if (node.next != null) node.next.prev = node.prev; else getChain(node).tail = node.prev; |
202 | if (node.prev != null) node.prev.next = node.next; |
203 | node.value = -1; // mark invalid |
204 | } |
205 | |
206 | void newChain(S version, L<Int> encoding) { |
207 | Ch ch = new(this, encoding); |
208 | chains.put(version, ch); |
209 | ifdef LineComp_SingleChain |
210 | assertNull(firstChain); |
211 | firstChain = ch; |
212 | endifdef |
213 | } |
214 | |
215 | int nodes_getSize(IntPair p) { |
216 | ret l(nodes.get(intPairToLong(p))); |
217 | } |
218 | |
219 | Cousins nodes_add(IntPair p, Node n) { |
220 | ++totalPairCount; |
221 | long l = intPairToLong(p); |
222 | Cousins set = nodes.get(l); |
223 | if (set == null) |
224 | nodes.put(l, set = new Cousins(this)); |
225 | set.add(n); |
226 | ret set; |
227 | } |
228 | |
229 | void nodes_remove(IntPair p, Node n) { |
230 | --totalPairCount; |
231 | long l = intPairToLong(p); |
232 | Cousins set = nodes.get(l); |
233 | if (set == null || !set.remove(n)) fail("Consistency error"); |
234 | if (set.isEmpty()) |
235 | nodes.remove(l); |
236 | } |
237 | |
238 | // also shrinks sets... |
239 | void printStats { |
240 | //print("Different pairs: " + l(nodes)); |
241 | print("Different counts: " + byCount.keysSize() + " (highest count: " + firstKey(byCount) + ")"); |
242 | long count = 0, waste = 0, newWaste = 0; |
243 | long count1 = 0, waste1 = 0, newWaste1 = 0; |
244 | |
245 | for (Cousins set : nodes.values()) { |
246 | count1 += set.size(); |
247 | int capacity = set.capacity(); |
248 | waste1 += capacity-set.size(); |
249 | set.shrinkToFactor(0.5); |
250 | newWaste1 += capacity-set.size(); |
251 | } |
252 | |
253 | for (CompactHashSet set : (Cl<CompactHashSet>) (Cl) values(byCount.data)) { |
254 | count += set.size(); |
255 | waste += set.capacity()-set.size(); |
256 | set.shrinkToFactor(0.5); |
257 | newWaste += set.capacity()-set.size(); |
258 | } |
259 | |
260 | print("Chain length : " + count1 + ". Waste: " + waste1 + (waste1 == newWaste1 ? "" : " => " + newWaste1)); |
261 | print("Different pairs: " + count + ". Waste: " + waste + (waste == newWaste ? "" : " => " + newWaste)); |
262 | } |
263 | |
264 | Ch getChain(Node node) { |
265 | ifdef LineComp_SingleChain |
266 | ret firstChain; |
267 | endifdef |
268 | ifndef LineComp_SingleChain |
269 | ret node.ch; |
270 | endifndef |
271 | } |
272 | } |
Began life as a copy of #1028247
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1028521 |
Snippet name: | LineComp_PairIndex [v2, finally replacing all LinkedHashSets in the critical path] |
Eternal ID of this version: | #1028521/33 |
Text MD5: | de8173bbb49cadb5d2b784747b12cbd5 |
Transpilation MD5: | 96571637b156f958f5eda0b52cb9ad03 |
Author: | stefan |
Category: | javax |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-07-04 06:44:37 |
Source code size: | 7724 bytes / 272 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 283 / 609 |
Version history: | 32 change(s) |
Referenced in: | [show references] |