Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

272
LINES

< > BotCompany Repo | #1028521 // LineComp_PairIndex [v2, finally replacing all LinkedHashSets in the critical path]

JavaX fragment (include) [tags: use-pretranspiled]

Uses 11335K of libraries. Click here for Pure Java version (3961L/25K).

1  
// Gave nodes + cousin sets a counter to make their hashing deterministic.
2  
3  
!include once #1027304 // Eclipse Collections
4  
import org.eclipse.collections.impl.map.mutable.primitive.*;
5  
import org.eclipse.collections.api.tuple.primitive.*;
6  
7  
set flag NoModCount. // save some space. Make sure that this flag doesn't screw other collections/map you compile with this.
8  
9  
final sclass LineComp_PairIndex {
10  
  bool verbose;
11  
  bool inOrder = true;
12  
  //MultiSetMap<IntPair, Node> nodes;
13  
  new LongObjectHashMap<Cousins> nodes;
14  
  MultiSetMap<Int, Cousins> byCount;
15  
  new LinkedHashMap<S, Ch> chains;
16  
  Ch firstChain;
17  
  int objectCounter; // for making deterministic hash codes
18  
  int totalPairCount;
19  
  
20  
  static int initialNodesSetCapacity = 1;
21  
  
22  
  void init {} // not needed anymore
23  
  
24  
  // Cousins is a replacement for LinkedHashSet<Node>
25  
  // - a set of all nodes forming a certain pair with their successor
26  
  // Nodes are linked through prevCousin/nextCousin
27  
  final sclass Cousins extends CompactHashSet<Node> {
28  
    Node head, tail;
29  
30  
    int hashCode;
31  
    *(LineComp_PairIndex pi) {
32  
      super(initialNodesSetCapacity);
33  
      hashCode = ++pi.objectCounter;
34  
    }
35  
    @Override public int hashCode() { ret hashCode; }
36  
37  
    public bool add(Node n) {
38  
      n.prevCousin = tail;
39  
      if (tail != null) // there already are entries
40  
        tail.nextCousin = n; 
41  
      else
42  
        head = n;
43  
      tail = n;
44  
      ret super.add(n);
45  
    }
46  
    
47  
    public bool remove(Node node) {
48  
      if (node.nextCousin != null)
49  
        node.nextCousin.prevCousin = node.prevCousin;
50  
      else
51  
        tail = node.prevCousin;
52  
      if (node.prevCousin != null)
53  
        node.prevCousin.nextCousin = node.nextCousin;
54  
      else
55  
        head = node.nextCousin;
56  
      node.prevCousin = node.nextCousin = null;
57  
      ret super.remove(node);
58  
    }
59  
    
60  
    L<Node> asList() {
61  
      L<Node> l = emptyList(size());
62  
      Node n = head;
63  
      while (n != null) {
64  
        l.add(n);
65  
        n = n.nextCousin;
66  
      }
67  
      ret l;
68  
    }
69  
  }
70  
  
71  
  // indicate end of chain-making phase
72  
  void haveChains {
73  
    if (byCount == null) {
74  
      print("n=" + nodes.size());
75  
      time "Making byCount" {
76  
        byCount = multiSetMap_innerCompactHashSet_outerRevTreeMap();
77  
        for (Cousins set : nodes.values())
78  
          byCount.put(set.size(), set);
79  
      }
80  
    }
81  
  }
82  
  
83  
  // a "chain" of nodes (one input file)
84  
  sclass Ch {
85  
    Node tail;
86  
    
87  
    *(LineComp_PairIndex pi, L<Int> l) {
88  
      int count = 0;
89  
      fOr (int i : l) {
90  
        add(pi, i);
91  
        if (((++count) % oneMillion()) == 0)
92  
          print("Added " + count + " entries to chain");
93  
      }
94  
    }
95  
    
96  
    L<Int> toList() {
97  
      new L<Int> l;
98  
      Node node = tail;
99  
      while (node != null) {
100  
        l.add(node.value);
101  
        node = node.prev;
102  
      }
103  
      ret reverseInPlace(l);
104  
    }
105  
    
106  
    void add(LineComp_PairIndex pi, int i) {
107  
      Node n = new(pi);
108  
      ifndef LineComp_SingleChain
109  
        n.ch = this;
110  
      endifndef
111  
      n.value = i;
112  
      n.prev = tail;
113  
      if (tail != null) tail.next = n;
114  
      tail = n;
115  
      pi.addToIndex(n.prev);
116  
    }
117  
  }
118  
  
119  
  sclass Node {
120  
    ifndef LineComp_SingleChain
121  
      Ch ch;
122  
    endifndef
123  
    int value;
124  
    Node prev, next;
125  
    Node prevCousin, nextCousin; // prev & next with same pair value
126  
    
127  
    int hashCode;
128  
    *(LineComp_PairIndex pi) { hashCode = ++pi.objectCounter; }
129  
    @Override public int hashCode() { ret hashCode; }
130  
131  
    int a() { ret value; }
132  
    int b() { ret next == null ? -1 : next.value; }
133  
    IntPair pair() { ret next == null ? null : IntPair(a(), b()); }
134  
  }
135  
  
136  
   IntPair nodeToPair(Node n) {
137  
    ret n?.pair();
138  
  }
139  
  
140  
  // add node to pair index
141  
  void addToIndex(Node n) {
142  
    IntPair p = nodeToPair(n);
143  
    if (p == null) ret;
144  
    int count = nodes_getSize(p);
145  
    Cousins set = nodes_add(p, n);
146  
    if (byCount != null) { // not in init phase
147  
      if (count != 0) byCount.remove(count, set);
148  
      byCount.put(count+1, set);
149  
    }
150  
  }
151  
  
152  
  // remove node from pair index
153  
  void removeFromIndex(Node n) {
154  
    IntPair p = nodeToPair(n);
155  
    if (p == null) ret;
156  
    Cousins set = nodes.get(intPairToLong(p));
157  
    int count = l(set);
158  
    if (count == 0) fail("Can't remove pair " + p);
159  
    nodes_remove(p, n);
160  
    byCount.remove(count, set);
161  
    if (--count > 0) byCount.put(count, set);
162  
  }
163  
  
164  
  IntPair mostPopularDuplicate() {
165  
    ret toInt(firstKey(byCount)) < 2 ? null : firstValue(byCount).head.pair();
166  
  }
167  
  
168  
  int numberOfDuplicates() {
169  
    ret byCount.size()-l(byCount.get(1));
170  
  }
171  
  
172  
  int getCount(IntPair p) {
173  
    ret nodes_getSize(p);
174  
  }
175  
  
176  
  void replacePair(int pa, int pb, int replaceWith) {
177  
    IntPair p = IntPair(pa, pb);
178  
    Cousins set = nodes.get(intPairToLong(p));
179  
    if (set != null) {
180  
      // TODO: might not even have to make this cloned list
181  
      L<Node> l = set.asList();
182  
      //for (Node n = cousins.head; n != null; n = n.nextCousin) {
183  
      for (Node n : l) {
184  
        continue if n.a() != pa || n.b() != pb; // nodes might have been deleted or changed
185  
        replacePair(n, replaceWith);
186  
      }
187  
    }
188  
  }
189  
  
190  
  void replacePair(Node node, int replaceWith) {
191  
    removeFromIndex(node.prev);
192  
    removeFromIndex(node);
193  
    removeFromIndex(node.next);
194  
    node.value = replaceWith;
195  
    deleteNode(node.next);
196  
    addToIndex(node.prev);
197  
    addToIndex(node);
198  
  }
199  
  
200  
  void deleteNode(Node node) {
201  
    if (node.next != null) node.next.prev = node.prev; else getChain(node).tail = node.prev;
202  
    if (node.prev != null) node.prev.next = node.next;
203  
    node.value = -1; // mark invalid
204  
  }
205  
  
206  
  void newChain(S version, L<Int> encoding) {
207  
    Ch ch = new(this, encoding);
208  
    chains.put(version, ch);
209  
    ifdef LineComp_SingleChain
210  
      assertNull(firstChain);
211  
      firstChain = ch;
212  
    endifdef
213  
  }
214  
  
215  
  int nodes_getSize(IntPair p) {
216  
    ret l(nodes.get(intPairToLong(p)));
217  
  }
218  
  
219  
  Cousins nodes_add(IntPair p, Node n) {
220  
    ++totalPairCount;
221  
    long l = intPairToLong(p);
222  
    Cousins set = nodes.get(l);
223  
    if (set == null)
224  
      nodes.put(l, set = new Cousins(this));
225  
    set.add(n);
226  
    ret set;
227  
  }
228  
  
229  
  void nodes_remove(IntPair p, Node n) {
230  
    --totalPairCount;
231  
    long l = intPairToLong(p);
232  
    Cousins set = nodes.get(l);
233  
    if (set == null || !set.remove(n)) fail("Consistency error");
234  
    if (set.isEmpty())
235  
      nodes.remove(l);
236  
  }
237  
  
238  
  // also shrinks sets...
239  
  void printStats {
240  
    //print("Different pairs: " + l(nodes));
241  
    print("Different counts: " + byCount.keysSize() + " (highest count: " + firstKey(byCount) + ")");
242  
    long count = 0, waste = 0, newWaste = 0;
243  
    long count1 = 0, waste1 = 0, newWaste1 = 0;
244  
    
245  
    for (Cousins set : nodes.values()) {
246  
      count1 += set.size();
247  
      int capacity = set.capacity();
248  
      waste1 += capacity-set.size();
249  
      set.shrinkToFactor(0.5);
250  
      newWaste1 += capacity-set.size();
251  
    }
252  
     
253  
    for (CompactHashSet set : (Cl<CompactHashSet>) (Cl) values(byCount.data)) {
254  
      count += set.size();
255  
      waste += set.capacity()-set.size();
256  
      set.shrinkToFactor(0.5);
257  
      newWaste += set.capacity()-set.size();
258  
    }
259  
    
260  
    print("Chain length   : " + count1 + ". Waste: " + waste1 + (waste1 == newWaste1 ? "" : " => " + newWaste1));
261  
    print("Different pairs: " + count + ". Waste: " + waste + (waste == newWaste ? "" : " => " + newWaste));
262  
  }
263  
  
264  
  Ch getChain(Node node) {
265  
    ifdef LineComp_SingleChain
266  
      ret firstChain;
267  
    endifdef
268  
    ifndef LineComp_SingleChain
269  
      ret node.ch;
270  
    endifndef
271  
  }
272  
}

Author comment

Began life as a copy of #1028247

download  show line numbers  debug dex  old transpilations   

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

No comments. add comment

Snippet ID: #1028521
Snippet name: LineComp_PairIndex [v2, finally replacing all LinkedHashSets in the critical path]
Eternal ID of this version: #1028521/33
Text MD5: de8173bbb49cadb5d2b784747b12cbd5
Transpilation MD5: 96571637b156f958f5eda0b52cb9ad03
Author: stefan
Category: javax
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-07-04 06:44:37
Source code size: 7724 bytes / 272 lines
Pitched / IR pitched: No / No
Views / Downloads: 283 / 609
Version history: 32 change(s)
Referenced in: [show references]