Uses 11335K of libraries. Click here for Pure Java version (4091L/26K).
1 | // assumes LineComp_SingleChain! |
2 | // For multi-chain, use v2 of this class (#1028521) |
3 | // |
4 | // nodeHashCode is not used yet |
5 | // |
6 | // Some bug with the freeing of nodes... |
7 | |
8 | !include once #1027304 // Eclipse Collections |
9 | import org.eclipse.collections.impl.map.mutable.primitive.*; |
10 | import org.eclipse.collections.api.tuple.primitive.*; |
11 | |
12 | set flag NoModCount. // save some space. Make sure that this flag doesn't screw other collections/map you compile with this. |
13 | |
14 | final sclass LineComp_PairIndex { |
15 | replace Node with int. |
16 | replace _Node with Int. |
17 | |
18 | bool verbose; |
19 | bool garbageCollect = false; // still buggy |
20 | new LongObjectHashMap<Cousins> nodes; |
21 | MultiSetMap<Int, Cousins> byCount; |
22 | new LinkedHashMap<S, Ch> chains; |
23 | Ch firstChain; |
24 | int objectCounter; // for making deterministic hash codes |
25 | int nodeCounter; |
26 | |
27 | static int initialNodesSetCapacity = 1; |
28 | |
29 | static final int nodeSize = 6; // 5 ints (hash code + value + 4 pointers) |
30 | static new ChunkedIntList nodeData; |
31 | |
32 | int nodeHashCode(int node) { ret nodeData.get(node); } |
33 | int value(int node) { ret nodeData.get(node+1); } |
34 | int prev(int node) { ret nodeData.get(node+2); } |
35 | int next(int node) { ret nodeData.get(node+3); } |
36 | int prevCousin(int node) { ret nodeData.get(node+4); } |
37 | int nextCousin(int node) { ret nodeData.get(node+5); } |
38 | |
39 | void setNodeHashCode(int node, int x) { nodeDataSet(node, x); } |
40 | void setValue(int node, int x) { nodeDataSet(node+1, x); } |
41 | void setPrev(int node, int x) { nodeDataSet(node+2, x); } |
42 | void setNext(int node, int x) { nodeDataSet(node+3, x); } |
43 | void setPrevCousin(int node, int x) { nodeDataSet(node+4, x); } |
44 | void setNextCousin(int node, int x) { nodeDataSet(node+5, x); } |
45 | |
46 | void nodeDataSet(int idx, int value) { |
47 | nodeData.set(idx, value); |
48 | } |
49 | |
50 | void init { |
51 | for i to nodeSize: nodeData.add(0); |
52 | } |
53 | |
54 | // Cousins is a replacement for LinkedHashSet<Node> |
55 | // - a set of all nodes forming a certain pair with their successor |
56 | // Nodes are linked through prevCousin/nextCousin |
57 | final class Cousins extends CompactHashSet<_Node> { |
58 | Node head, tail; |
59 | |
60 | int hashCode; |
61 | *(LineComp_PairIndex pi) { |
62 | super(initialNodesSetCapacity); |
63 | hashCode = ++pi.objectCounter; |
64 | } |
65 | @Override public int hashCode() { ret hashCode; } |
66 | |
67 | public bool add(Node n) { |
68 | setPrevCousin(n, tail); |
69 | if (tail != 0) // there already are entries |
70 | setNextCousin(tail, n); |
71 | else |
72 | head = n; |
73 | tail = n; |
74 | ret super.add(n); |
75 | } |
76 | |
77 | public bool remove(Node node) { |
78 | if (nextCousin(node) != 0) |
79 | setPrevCousin(nextCousin(node), prevCousin(node)); |
80 | else |
81 | tail = prevCousin(node); |
82 | if (prevCousin(node) != 0) |
83 | setNextCousin(prevCousin(node), nextCousin(node)); |
84 | else |
85 | head = nextCousin(node); |
86 | setPrevCousin(node, 0); |
87 | setNextCousin(node, 0); |
88 | ret super.remove(node); |
89 | } |
90 | |
91 | // node has changed internal location. don't change the linked list stuff! |
92 | void moveNode(Node from, Node to) { |
93 | super.remove(from); |
94 | super.add(to); |
95 | } |
96 | |
97 | L<_Node> asList() { |
98 | L<_Node> l = emptyList(size()); |
99 | Node n = head; |
100 | while (n != 0) { |
101 | l.add(n); |
102 | n = nextCousin(n); |
103 | } |
104 | ret l; |
105 | } |
106 | } |
107 | |
108 | // indicate end of chain-making phase |
109 | void haveChains { |
110 | if (byCount == null) { |
111 | print("n=" + nodes.size()); |
112 | time "Making byCount" { |
113 | byCount = multiSetMap_innerCompactHashSet_outerRevTreeMap(); |
114 | for (Cousins set : nodes.values()) |
115 | byCount.put(set.size(), set); |
116 | } |
117 | } |
118 | } |
119 | |
120 | // a "chain" of nodes (one input file) |
121 | class Ch { |
122 | Node tail; |
123 | |
124 | *(LineComp_PairIndex pi, L<Int> l) { |
125 | int count = 0; |
126 | fOr (int i : l) { |
127 | add(pi, i); |
128 | if (((++count) % oneMillion()) == 0) |
129 | print("Added " + count + " entries to chain"); |
130 | } |
131 | } |
132 | |
133 | L<Int> toList() { |
134 | new L<Int> l; |
135 | Node node = tail; |
136 | while (node != 0) { |
137 | l.add(value(node)); |
138 | node = prev(node); |
139 | } |
140 | ret reverseInPlace(l); |
141 | } |
142 | |
143 | void add(LineComp_PairIndex pi, int i) { |
144 | Node n = pi.newNode(); |
145 | setValue(n, i); |
146 | setPrev(n, tail); |
147 | if (tail != 0) setNext(tail, n); |
148 | tail = n; |
149 | pi.addToIndex(prev(n)); |
150 | } |
151 | } |
152 | |
153 | Node newNode() { |
154 | for i to nodeSize: nodeData.add(0); |
155 | ret nodeCounter += nodeSize; |
156 | } |
157 | |
158 | int node_a(int node) { ret value(node); } |
159 | int node_b(int node) { int next = next(node); ret next == 0 ? -1 : value(next); } |
160 | |
161 | IntPair nodeToPair(int node) { |
162 | if (node == 0) null; |
163 | int next = next(node); |
164 | ret next == 0 ? null : IntPair(node_a(node), node_b(node)); |
165 | } |
166 | |
167 | // add node to pair index |
168 | void addToIndex(Node n) { |
169 | IntPair p = nodeToPair(n); |
170 | if (p == null) ret; |
171 | int count = nodes_getSize(p); |
172 | Cousins set = nodes_add(p, n); |
173 | if (byCount != null) { // not in init phase |
174 | if (count != 0) byCount.remove(count, set); |
175 | byCount.put(count+1, set); |
176 | } |
177 | } |
178 | |
179 | // remove node from pair index |
180 | void removeFromIndex(Node n) { |
181 | IntPair p = nodeToPair(n); |
182 | if (p == null) ret; |
183 | Cousins set = nodes.get(intPairToLong(p)); |
184 | int count = l(set); |
185 | if (count == 0) fail("Can't remove pair " + p); |
186 | nodes_remove(p, n); |
187 | byCount.remove(count, set); |
188 | if (--count > 0) byCount.put(count, set); |
189 | } |
190 | |
191 | IntPair mostPopularDuplicate() { |
192 | ret toInt(firstKey(byCount)) < 2 ? null : nodeToPair(firstValue(byCount).head); |
193 | } |
194 | |
195 | int numberOfDuplicates() { |
196 | ret byCount.size()-l(byCount.get(1)); |
197 | } |
198 | |
199 | int getCount(IntPair p) { |
200 | ret nodes_getSize(p); |
201 | } |
202 | |
203 | L<_Node> replacing; |
204 | Map<_Node, Int> replacing_index; |
205 | |
206 | void replacePair(int pa, int pb, int replaceWith) { |
207 | IntPair p = IntPair(pa, pb); |
208 | Cousins set = nodes.get(intPairToLong(p)); |
209 | if (set != null) { |
210 | replacing = set.asList(); |
211 | replacing_index = listIndex(replacing); |
212 | |
213 | for i over replacing: { |
214 | Node n = replacing.get(i); |
215 | continue if node_a(n) != pa || node_b(n) != pb; // nodes might have been deleted or changed |
216 | replacePair(n, replaceWith); |
217 | } |
218 | |
219 | replacing = null; |
220 | replacing_index = null; |
221 | } |
222 | } |
223 | |
224 | void replacePair(Node node, int replaceWith) { |
225 | removeFromIndex(prev(node)); |
226 | removeFromIndex(node); |
227 | removeFromIndex(next(node)); |
228 | setValue(node, replaceWith); |
229 | deleteNode(next(node)); |
230 | addToIndex(prev(node)); |
231 | addToIndex(node); |
232 | } |
233 | |
234 | void deleteNode(Node node) { |
235 | if (next(node) != 0) |
236 | setPrev(next(node), prev(node)); |
237 | else |
238 | firstChain.tail = prev(node); |
239 | if (prev(node) != 0) |
240 | setNext(prev(node), next(node)); |
241 | //setValue(node, -1); // mark invalid |
242 | physicallyDisposeOfNode(node); |
243 | } |
244 | |
245 | void physicallyDisposeOfNode(Node node) { |
246 | if (!garbageCollect) ret; |
247 | // move last node in list to place that is now free |
248 | if (node != nodeCounter) { |
249 | //print("Moving node " + nodeCounter + " to " + node); |
250 | moveNode(nodeCounter, node); |
251 | } |
252 | //print("Disposing of node " + nodeCounter); |
253 | nodeData.truncateAt(nodeCounter); |
254 | nodeCounter -= nodeSize; |
255 | } |
256 | |
257 | void moveNode(int from, int to) { |
258 | // Copy hash code & value. |
259 | setNodeHashCode(to, nodeHashCode(from)); |
260 | setValue(to, value(from)); |
261 | |
262 | // Be sure to update all the pointers there may be to this node! |
263 | // First, find Cousins object. |
264 | IntPair pair = nodeToPair(from); |
265 | if (pair != null) { |
266 | Cousins cousins = nodes.get(intPairToLong(pair)); |
267 | if (cousins.head == from) cousins.head = to; |
268 | if (cousins.tail == from) cousins.tail = to; |
269 | cousins.moveNode(from, to); |
270 | } |
271 | |
272 | // Next, check the chain. |
273 | if (firstChain.tail == from) firstChain.tail = to; |
274 | |
275 | // Now the 4 prev and next pointers |
276 | int i; |
277 | if ((i = prev(from)) != 0) setNext(i, to); |
278 | if ((i = next(from)) != 0) setPrev(i, to); |
279 | if ((i = prevCousin(from)) != 0) setNextCousin(i, to); |
280 | if ((i = nextCousin(from)) != 0) setPrevCousin(i, to); |
281 | |
282 | // Finally the list we are currently replacing |
283 | if (replacing_index != null) { |
284 | _Node ref = replacing_index.get(from); |
285 | if (ref != null) |
286 | replacing.set(ref, to); |
287 | } |
288 | |
289 | // That should be it...!? |
290 | } |
291 | |
292 | void newChain(S version, L<Int> encoding) { |
293 | Ch ch = new(this, encoding); |
294 | chains.put(version, ch); |
295 | assertNull(firstChain); |
296 | firstChain = ch; |
297 | } |
298 | |
299 | int nodes_getSize(IntPair p) { |
300 | ret l(nodes.get(intPairToLong(p))); |
301 | } |
302 | |
303 | Cousins nodes_add(IntPair p, Node n) { |
304 | long l = intPairToLong(p); |
305 | Cousins set = nodes.get(l); |
306 | if (set == null) |
307 | nodes.put(l, set = new Cousins(this)); |
308 | set.add(n); |
309 | ret set; |
310 | } |
311 | |
312 | void nodes_remove(IntPair p, Node n) { |
313 | long l = intPairToLong(p); |
314 | Cousins set = nodes.get(l); |
315 | if (set == null) ret; |
316 | set.remove(n); |
317 | if (set.isEmpty()) |
318 | nodes.remove(l); |
319 | } |
320 | |
321 | // also shrinks sets... |
322 | void printStats { |
323 | //print("Different pairs: " + l(nodes)); |
324 | print("Different counts: " + byCount.keysSize() + " (highest count: " + firstKey(byCount) + ")"); |
325 | long count = 0, waste = 0, newWaste = 0; |
326 | long count1 = 0, waste1 = 0, newWaste1 = 0; |
327 | |
328 | for (Cousins set : nodes.values()) { |
329 | count1 += set.size(); |
330 | int capacity = set.capacity(); |
331 | waste1 += capacity-set.size(); |
332 | set.shrinkToFactor(0.5); |
333 | newWaste1 += capacity-set.size(); |
334 | } |
335 | |
336 | for (CompactHashSet set : (Cl<CompactHashSet>) (Cl) values(byCount.data)) { |
337 | count += set.size(); |
338 | waste += set.capacity()-set.size(); |
339 | set.shrinkToFactor(0.5); |
340 | newWaste += set.capacity()-set.size(); |
341 | } |
342 | |
343 | print("Chain length : " + count1 + ". Waste: " + waste1 + (waste1 == newWaste1 ? "" : " => " + newWaste1)); |
344 | print("Different pairs: " + count + ". Waste: " + waste + (waste == newWaste ? "" : " => " + newWaste)); |
345 | } |
346 | |
347 | Ch getChain(Node node) { |
348 | ret firstChain; |
349 | } |
350 | } |
Began life as a copy of #1028521
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1028523 |
Snippet name: | LineComp_PairIndex [v3, getting rid of Node object headers, dev.] |
Eternal ID of this version: | #1028523/34 |
Text MD5: | 0b2a20e98f6d14083ed8e91672e537af |
Transpilation MD5: | 73cdced494d34c6eaae0f741661a9341 |
Author: | stefan |
Category: | javax |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-06-24 03:01:36 |
Source code size: | 10276 bytes / 350 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 269 / 546 |
Version history: | 33 change(s) |
Referenced in: | [show references] |