Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

350
LINES

< > BotCompany Repo | #1028523 // LineComp_PairIndex [v3, getting rid of Node object headers, dev.]

JavaX fragment (include) [tags: use-pretranspiled]

Uses 11335K of libraries. Click here for Pure Java version (4091L/26K).

1  
// assumes LineComp_SingleChain!
2  
// For multi-chain, use v2 of this class (#1028521)
3  
//
4  
// nodeHashCode is not used yet
5  
//
6  
// Some bug with the freeing of nodes...
7  
8  
!include once #1027304 // Eclipse Collections
9  
import org.eclipse.collections.impl.map.mutable.primitive.*;
10  
import org.eclipse.collections.api.tuple.primitive.*;
11  
12  
set flag NoModCount. // save some space. Make sure that this flag doesn't screw other collections/map you compile with this.
13  
14  
final sclass LineComp_PairIndex {
15  
  replace Node with int.
16  
  replace _Node with Int.
17  
  
18  
  bool verbose;
19  
  bool garbageCollect = false; // still buggy
20  
  new LongObjectHashMap<Cousins> nodes;
21  
  MultiSetMap<Int, Cousins> byCount;
22  
  new LinkedHashMap<S, Ch> chains;
23  
  Ch firstChain;
24  
  int objectCounter; // for making deterministic hash codes
25  
  int nodeCounter;
26  
27  
  static int initialNodesSetCapacity = 1;
28  
  
29  
  static final int nodeSize = 6; // 5 ints (hash code + value + 4 pointers)
30  
  static new ChunkedIntList nodeData;
31  
  
32  
  int nodeHashCode(int node) { ret nodeData.get(node); }
33  
  int value(int node) { ret nodeData.get(node+1); }
34  
  int prev(int node) { ret nodeData.get(node+2); }
35  
  int next(int node) { ret nodeData.get(node+3); }
36  
  int prevCousin(int node) { ret nodeData.get(node+4); }
37  
  int nextCousin(int node) { ret nodeData.get(node+5); }
38  
  
39  
  void setNodeHashCode(int node, int x) { nodeDataSet(node, x); }
40  
  void setValue(int node, int x) { nodeDataSet(node+1, x); }
41  
  void setPrev(int node, int x) { nodeDataSet(node+2, x); }
42  
  void setNext(int node, int x) { nodeDataSet(node+3, x); }
43  
  void setPrevCousin(int node, int x) { nodeDataSet(node+4, x); }
44  
  void setNextCousin(int node, int x) { nodeDataSet(node+5, x); }
45  
  
46  
  void nodeDataSet(int idx, int value) {
47  
    nodeData.set(idx, value);
48  
  }
49  
  
50  
  void init {
51  
    for i to nodeSize: nodeData.add(0);
52  
  }
53  
  
54  
  // Cousins is a replacement for LinkedHashSet<Node>
55  
  // - a set of all nodes forming a certain pair with their successor
56  
  // Nodes are linked through prevCousin/nextCousin
57  
  final class Cousins extends CompactHashSet<_Node> {
58  
    Node head, tail;
59  
60  
    int hashCode;
61  
    *(LineComp_PairIndex pi) {
62  
      super(initialNodesSetCapacity);
63  
      hashCode = ++pi.objectCounter;
64  
    }
65  
    @Override public int hashCode() { ret hashCode; }
66  
67  
    public bool add(Node n) {
68  
      setPrevCousin(n, tail);
69  
      if (tail != 0) // there already are entries
70  
        setNextCousin(tail, n); 
71  
      else
72  
        head = n;
73  
      tail = n;
74  
      ret super.add(n);
75  
    }
76  
    
77  
    public bool remove(Node node) {
78  
      if (nextCousin(node) != 0)
79  
        setPrevCousin(nextCousin(node), prevCousin(node));
80  
      else
81  
        tail = prevCousin(node);
82  
      if (prevCousin(node) != 0)
83  
        setNextCousin(prevCousin(node), nextCousin(node));
84  
      else
85  
        head = nextCousin(node);
86  
      setPrevCousin(node, 0);
87  
      setNextCousin(node, 0);
88  
      ret super.remove(node);
89  
    }
90  
    
91  
    // node has changed internal location. don't change the linked list stuff!
92  
    void moveNode(Node from, Node to) {
93  
      super.remove(from);
94  
      super.add(to);
95  
    }
96  
    
97  
    L<_Node> asList() {
98  
      L<_Node> l = emptyList(size());
99  
      Node n = head;
100  
      while (n != 0) {
101  
        l.add(n);
102  
        n = nextCousin(n);
103  
      }
104  
      ret l;
105  
    }
106  
  }
107  
  
108  
  // indicate end of chain-making phase
109  
  void haveChains {
110  
    if (byCount == null) {
111  
      print("n=" + nodes.size());
112  
      time "Making byCount" {
113  
        byCount = multiSetMap_innerCompactHashSet_outerRevTreeMap();
114  
        for (Cousins set : nodes.values())
115  
          byCount.put(set.size(), set);
116  
      }
117  
    }
118  
  }
119  
  
120  
  // a "chain" of nodes (one input file)
121  
  class Ch {
122  
    Node tail;
123  
    
124  
    *(LineComp_PairIndex pi, L<Int> l) {
125  
      int count = 0;
126  
      fOr (int i : l) {
127  
        add(pi, i);
128  
        if (((++count) % oneMillion()) == 0)
129  
          print("Added " + count + " entries to chain");
130  
      }
131  
    }
132  
    
133  
    L<Int> toList() {
134  
      new L<Int> l;
135  
      Node node = tail;
136  
      while (node != 0) {
137  
        l.add(value(node));
138  
        node = prev(node);
139  
      }
140  
      ret reverseInPlace(l);
141  
    }
142  
    
143  
    void add(LineComp_PairIndex pi, int i) {
144  
      Node n = pi.newNode();
145  
      setValue(n, i);
146  
      setPrev(n, tail);
147  
      if (tail != 0) setNext(tail, n);
148  
      tail = n;
149  
      pi.addToIndex(prev(n));
150  
    }
151  
  }
152  
  
153  
  Node newNode() {
154  
    for i to nodeSize: nodeData.add(0);
155  
    ret nodeCounter += nodeSize;
156  
  }
157  
  
158  
  int node_a(int node) { ret value(node); }
159  
  int node_b(int node) { int next = next(node); ret next == 0 ? -1 : value(next); }
160  
  
161  
  IntPair nodeToPair(int node) {
162  
    if (node == 0) null;
163  
    int next = next(node);
164  
    ret next == 0 ? null : IntPair(node_a(node), node_b(node));
165  
  }
166  
167  
  // add node to pair index
168  
  void addToIndex(Node n) {
169  
    IntPair p = nodeToPair(n);
170  
    if (p == null) ret;
171  
    int count = nodes_getSize(p);
172  
    Cousins set = nodes_add(p, n);
173  
    if (byCount != null) { // not in init phase
174  
      if (count != 0) byCount.remove(count, set);
175  
      byCount.put(count+1, set);
176  
    }
177  
  }
178  
  
179  
  // remove node from pair index
180  
  void removeFromIndex(Node n) {
181  
    IntPair p = nodeToPair(n);
182  
    if (p == null) ret;
183  
    Cousins set = nodes.get(intPairToLong(p));
184  
    int count = l(set);
185  
    if (count == 0) fail("Can't remove pair " + p);
186  
    nodes_remove(p, n);
187  
    byCount.remove(count, set);
188  
    if (--count > 0) byCount.put(count, set);
189  
  }
190  
  
191  
  IntPair mostPopularDuplicate() {
192  
    ret toInt(firstKey(byCount)) < 2 ? null : nodeToPair(firstValue(byCount).head);
193  
  }
194  
  
195  
  int numberOfDuplicates() {
196  
    ret byCount.size()-l(byCount.get(1));
197  
  }
198  
  
199  
  int getCount(IntPair p) {
200  
    ret nodes_getSize(p);
201  
  }
202  
  
203  
  L<_Node> replacing;
204  
  Map<_Node, Int> replacing_index;
205  
  
206  
  void replacePair(int pa, int pb, int replaceWith) {
207  
    IntPair p = IntPair(pa, pb);
208  
    Cousins set = nodes.get(intPairToLong(p));
209  
    if (set != null) {
210  
      replacing = set.asList();
211  
      replacing_index = listIndex(replacing);
212  
      
213  
      for i over replacing: {
214  
        Node n = replacing.get(i);
215  
        continue if node_a(n) != pa || node_b(n) != pb; // nodes might have been deleted or changed
216  
        replacePair(n, replaceWith);
217  
      }
218  
      
219  
      replacing = null;
220  
      replacing_index = null;
221  
    }
222  
  }
223  
  
224  
  void replacePair(Node node, int replaceWith) {
225  
    removeFromIndex(prev(node));
226  
    removeFromIndex(node);
227  
    removeFromIndex(next(node));
228  
    setValue(node, replaceWith);
229  
    deleteNode(next(node));
230  
    addToIndex(prev(node));
231  
    addToIndex(node);
232  
  }
233  
  
234  
  void deleteNode(Node node) {
235  
    if (next(node) != 0)
236  
      setPrev(next(node), prev(node));
237  
    else
238  
      firstChain.tail = prev(node);
239  
    if (prev(node) != 0)
240  
      setNext(prev(node), next(node));
241  
    //setValue(node, -1); // mark invalid
242  
    physicallyDisposeOfNode(node);
243  
  }
244  
  
245  
  void physicallyDisposeOfNode(Node node) {
246  
    if (!garbageCollect) ret;
247  
    // move last node in list to place that is now free
248  
    if (node != nodeCounter) {
249  
      //print("Moving node " + nodeCounter + " to " + node);
250  
      moveNode(nodeCounter, node);
251  
    }
252  
    //print("Disposing of node " + nodeCounter);
253  
    nodeData.truncateAt(nodeCounter);
254  
    nodeCounter -= nodeSize;
255  
  }
256  
  
257  
  void moveNode(int from, int to) {
258  
    // Copy hash code & value.
259  
    setNodeHashCode(to, nodeHashCode(from));
260  
    setValue(to, value(from));
261  
    
262  
    // Be sure to update all the pointers there may be to this node!
263  
    // First, find Cousins object.
264  
    IntPair pair = nodeToPair(from);
265  
    if (pair != null) {
266  
      Cousins cousins = nodes.get(intPairToLong(pair));
267  
      if (cousins.head == from) cousins.head = to;
268  
      if (cousins.tail == from) cousins.tail = to;
269  
      cousins.moveNode(from, to);
270  
    }
271  
    
272  
    // Next, check the chain.
273  
    if (firstChain.tail == from) firstChain.tail = to;
274  
    
275  
    // Now the 4 prev and next pointers
276  
    int i;
277  
    if ((i = prev(from)) != 0) setNext(i, to);
278  
    if ((i = next(from)) != 0) setPrev(i, to);
279  
    if ((i = prevCousin(from)) != 0) setNextCousin(i, to);
280  
    if ((i = nextCousin(from)) != 0) setPrevCousin(i, to);
281  
    
282  
    // Finally the list we are currently replacing
283  
    if (replacing_index != null) {
284  
      _Node ref = replacing_index.get(from);
285  
      if (ref != null)
286  
        replacing.set(ref, to);
287  
    }
288  
289  
    // That should be it...!?
290  
  }
291  
292  
  void newChain(S version, L<Int> encoding) {
293  
    Ch ch = new(this, encoding);
294  
    chains.put(version, ch);
295  
    assertNull(firstChain);
296  
    firstChain = ch;
297  
  }
298  
  
299  
  int nodes_getSize(IntPair p) {
300  
    ret l(nodes.get(intPairToLong(p)));
301  
  }
302  
  
303  
  Cousins nodes_add(IntPair p, Node n) {
304  
    long l = intPairToLong(p);
305  
    Cousins set = nodes.get(l);
306  
    if (set == null)
307  
      nodes.put(l, set = new Cousins(this));
308  
    set.add(n);
309  
    ret set;
310  
  }
311  
  
312  
  void nodes_remove(IntPair p, Node n) {
313  
    long l = intPairToLong(p);
314  
    Cousins set = nodes.get(l);
315  
    if (set == null) ret;
316  
    set.remove(n);
317  
    if (set.isEmpty())
318  
      nodes.remove(l);
319  
  }
320  
  
321  
  // also shrinks sets...
322  
  void printStats {
323  
    //print("Different pairs: " + l(nodes));
324  
    print("Different counts: " + byCount.keysSize() + " (highest count: " + firstKey(byCount) + ")");
325  
    long count = 0, waste = 0, newWaste = 0;
326  
    long count1 = 0, waste1 = 0, newWaste1 = 0;
327  
    
328  
    for (Cousins set : nodes.values()) {
329  
      count1 += set.size();
330  
      int capacity = set.capacity();
331  
      waste1 += capacity-set.size();
332  
      set.shrinkToFactor(0.5);
333  
      newWaste1 += capacity-set.size();
334  
    }
335  
     
336  
    for (CompactHashSet set : (Cl<CompactHashSet>) (Cl) values(byCount.data)) {
337  
      count += set.size();
338  
      waste += set.capacity()-set.size();
339  
      set.shrinkToFactor(0.5);
340  
      newWaste += set.capacity()-set.size();
341  
    }
342  
    
343  
    print("Chain length   : " + count1 + ". Waste: " + waste1 + (waste1 == newWaste1 ? "" : " => " + newWaste1));
344  
    print("Different pairs: " + count + ". Waste: " + waste + (waste == newWaste ? "" : " => " + newWaste));
345  
  }
346  
  
347  
  Ch getChain(Node node) {
348  
    ret firstChain;
349  
  }
350  
}

Author comment

Began life as a copy of #1028521

download  show line numbers  debug dex  old transpilations   

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

No comments. add comment

Snippet ID: #1028523
Snippet name: LineComp_PairIndex [v3, getting rid of Node object headers, dev.]
Eternal ID of this version: #1028523/34
Text MD5: 0b2a20e98f6d14083ed8e91672e537af
Transpilation MD5: 73cdced494d34c6eaae0f741661a9341
Author: stefan
Category: javax
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-06-24 03:01:36
Source code size: 10276 bytes / 350 lines
Pitched / IR pitched: No / No
Views / Downloads: 204 / 455
Version history: 33 change(s)
Referenced in: [show references]