Uses 11335K of libraries. Click here for Pure Java version (3953L/25K).
1 | // This doesn't work yet because byCount uses a LinkedHashSet to |
2 | // store LinkedHashSets which have equality by contents, so calling |
3 | // byCount.remove(x, y) takes like forever. |
4 | // We'd need a LinkedIdentityHashSet but I haven't found a good |
5 | // implementation of that yet. |
6 | |
7 | !include once #1027304 // Eclipse Collections |
8 | import org.eclipse.collections.impl.map.mutable.primitive.*; |
9 | import org.eclipse.collections.api.tuple.primitive.*; |
10 | |
11 | set flag NoModCount. // save some space. Make sure that this flag doesn't screw other collections/map you compile with this. |
12 | |
13 | final sclass LineComp_PairIndex { |
14 | bool verbose; |
15 | bool inOrder = true; |
16 | //MultiSetMap<IntPair, Node> nodes; |
17 | new LongObjectHashMap<LinkedHashSet<Node>> nodes; |
18 | MultiSetMap<Int, LinkedHashSet<Node>> byCount; |
19 | new LinkedHashMap<S, Ch> chains; |
20 | Ch firstChain; |
21 | int initialNodesSetCapacity = 1; |
22 | |
23 | void init { |
24 | /*nodes = inOrder |
25 | ? multiSetMap_innerLinkedHashSet_outerHashMap() |
26 | : multiSetMap_innerHashSet_outerHashMap();*/ |
27 | } |
28 | |
29 | // indicate end of chain-making phase |
30 | void haveChains { |
31 | if (byCount == null) { |
32 | print("n=" + nodes.size()); |
33 | time "Making byCount" { |
34 | byCount = multiSetMap_innerCompactHashSet_outerRevTreeMap(); |
35 | for (LinkedHashSet<Node> set : nodes.values()) |
36 | byCount.put(set.size(), set); |
37 | } |
38 | } |
39 | } |
40 | |
41 | // a "chain" of nodes (one input file) |
42 | sclass Ch { |
43 | Node tail; |
44 | |
45 | *(LineComp_PairIndex pi, L<Int> l) { |
46 | int count = 0; |
47 | fOr (int i : l) { |
48 | add(pi, i); |
49 | if (((++count) % oneMillion()) == 0) |
50 | print("Added " + count + " entries to chain"); |
51 | } |
52 | } |
53 | |
54 | // TODO: optimize |
55 | L<Int> toList() { |
56 | new L<Int> l; |
57 | Node node = tail; |
58 | while (node != null) { |
59 | l.add(node.value); |
60 | node = node.prev; |
61 | } |
62 | ret reverseInPlace(l); |
63 | } |
64 | |
65 | void add(LineComp_PairIndex pi, int i) { |
66 | new Node n; |
67 | ifndef LineComp_SingleChain |
68 | n.ch = this; |
69 | endifndef |
70 | n.value = i; |
71 | n.prev = tail; |
72 | if (tail != null) tail.next = n; |
73 | tail = n; |
74 | pi.addToIndex(n.prev); |
75 | } |
76 | } |
77 | |
78 | sclass Node { |
79 | ifndef LineComp_SingleChain |
80 | Ch ch; |
81 | endifndef |
82 | int value; |
83 | Node prev, next; |
84 | |
85 | int a() { ret value; } |
86 | int b() { ret next == null ? -1 : next.value; } |
87 | IntPair pair() { ret next == null ? null : IntPair(a(), b()); } |
88 | } |
89 | |
90 | IntPair nodeToPair(Node n) { |
91 | ret n?.pair(); |
92 | } |
93 | |
94 | // add node to pair index |
95 | void addToIndex(Node n) { |
96 | IntPair p = nodeToPair(n); |
97 | if (p == null) ret; |
98 | int count = nodes_getSize(p); |
99 | LinkedHashSet<Node> set = nodes_add(p, n); |
100 | if (byCount != null) { // not in init phase |
101 | if (count != 0) byCount.remove(count, set); |
102 | byCount.put(count+1, set); |
103 | } |
104 | } |
105 | |
106 | // remove node from pair index |
107 | void removeFromIndex(Node n) { |
108 | IntPair p = nodeToPair(n); |
109 | if (p == null) ret; |
110 | LinkedHashSet<Node> set = nodes.get(intPairToLong(p)); |
111 | int count = l(set); |
112 | if (count == 0) fail("Can't remove pair " + p); |
113 | nodes_remove(p, n); |
114 | byCount.remove(count, set); |
115 | if (--count > 0) byCount.put(count, set); |
116 | } |
117 | |
118 | IntPair mostPopularDuplicate() { |
119 | ret toInt(firstKey(byCount)) < 2 ? null : first(firstValue(byCount)).pair(); |
120 | } |
121 | |
122 | int numberOfDuplicates() { |
123 | ret byCount.size()-l(byCount.get(1)); |
124 | } |
125 | |
126 | int getCount(IntPair p) { |
127 | ret nodes_getSize(p); |
128 | } |
129 | |
130 | void replacePair(int pa, int pb, int replaceWith) { |
131 | IntPair p = IntPair(pa, pb); |
132 | Set<Node> set = nodes.get(intPairToLong(p)); |
133 | if (set != null) |
134 | for (Node n : cloneList(set)) { |
135 | continue if n.a() != pa || n.b() != pb; // nodes might have been deleted or changed |
136 | replacePair(n, replaceWith); |
137 | } |
138 | } |
139 | |
140 | void replacePair(Node node, int replaceWith) { |
141 | removeFromIndex(node.prev); |
142 | removeFromIndex(node); |
143 | removeFromIndex(node.next); |
144 | node.value = replaceWith; |
145 | deleteNode(node.next); |
146 | addToIndex(node.prev); |
147 | addToIndex(node); |
148 | } |
149 | |
150 | void deleteNode(Node node) { |
151 | if (node.next != null) node.next.prev = node.prev; else getChain(node).tail = node.prev; |
152 | if (node.prev != null) node.prev.next = node.next; |
153 | node.value = -1; // mark invalid |
154 | } |
155 | |
156 | void newChain(S version, L<Int> encoding) { |
157 | Ch ch = new(this, encoding); |
158 | chains.put(version, ch); |
159 | ifdef LineComp_SingleChain |
160 | assertNull(firstChain); |
161 | firstChain = ch; |
162 | endifdef |
163 | } |
164 | |
165 | int nodes_getSize(IntPair p) { |
166 | ret l(nodes.get(intPairToLong(p))); |
167 | } |
168 | |
169 | LinkedHashSet<Node> nodes_add(IntPair p, Node n) { |
170 | long l = intPairToLong(p); |
171 | LinkedHashSet<Node> set = nodes.get(l); |
172 | if (set == null) |
173 | nodes.put(l, set = new LinkedHashSet(initialNodesSetCapacity); |
174 | set.add(n); |
175 | ret set; |
176 | } |
177 | |
178 | void nodes_remove(IntPair p, Node n) { |
179 | long l = intPairToLong(p); |
180 | LinkedHashSet<Node> set = nodes.get(l); |
181 | if (set == null) ret; |
182 | set.remove(n); |
183 | if (set.isEmpty()) |
184 | nodes.remove(l); |
185 | } |
186 | |
187 | // also shrinks sets... |
188 | void printStats { |
189 | print("Different pairs: " + l(nodes)); |
190 | print("Different counts: " + byCount.keysSize() + " (highest count: " + firstKey(byCount) + ")"); |
191 | long count = 0, waste = 0, newWaste = 0; |
192 | long count1 = 0, waste1 = 0; |
193 | |
194 | for (HashSet set : nodes.values()) { |
195 | count1 += set.size(); |
196 | int capacity = hashSetCapacity(set); |
197 | waste1 += capacity-set.size(); |
198 | //if (capacity > set.size()*3) set.shrink(); // TODO |
199 | } |
200 | |
201 | for (CompactHashSet set : (Cl<CompactHashSet>) (Cl) values(byCount.data)) { |
202 | count += set.size(); |
203 | waste += set.capacity()-set.size(); |
204 | set.shrinkToFactor(0.5); |
205 | newWaste += set.capacity()-set.size(); |
206 | } |
207 | |
208 | print("Nodes set count: " + count1 + ". Waste: " + waste1); |
209 | print("Set count: " + count + ". Waste: " + waste + (waste == newWaste ? "" : " => " + newWaste)); |
210 | } |
211 | |
212 | Ch getChain(Node node) { |
213 | ifdef LineComp_SingleChain |
214 | ret firstChain; |
215 | endifdef |
216 | ifndef LineComp_SingleChain |
217 | ret node.ch; |
218 | endifndef |
219 | } |
220 | } |
Began life as a copy of #1028247
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1028519 |
Snippet name: | LineComp_PairIndex [byCount pointing to sets, dev.] |
Eternal ID of this version: | #1028519/7 |
Text MD5: | 1071b4d8e829bbec277f6a59a7438566 |
Transpilation MD5: | 897501b8740caf1967b7058836a5a848 |
Author: | stefan |
Category: | javax |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-06-24 01:24:35 |
Source code size: | 6289 bytes / 220 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 198 / 310 |
Version history: | 6 change(s) |
Referenced in: | [show references] |