Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

289
LINES

< > BotCompany Repo | #1030401 // LineComp_PairIndex - helper for LineComp compressor [backup before IntPairTreeSet]

JavaX fragment (include)

1  
!include once #1027304 // Eclipse Collections
2  
import org.eclipse.collections.impl.map.mutable.primitive.*;
3  
import org.eclipse.collections.api.tuple.primitive.*;
4  
5  
set flag NoModCount. // save some space. Make sure that this flag doesn't screw other collections/map you compile with this.
6  
7  
final sclass LineComp_PairIndex {
8  
  bool verbose;
9  
  bool inOrder = true;
10  
  //MultiSetMap<IntPair, Node> nodes;
11  
  new LongObjectHashMap<LinkedHashSet<Node>> nodes; // key: pair, value: set of nodes in order
12  
  MultiSetMap<Int, IntPair> byCount // values are pairs
13  
    = null; // init later
14  
    //= multiSetMap_innerHashSet_outerRevTreeMap(); // makes a difference in selecting a pair if there are multiple with the same count
15  
    // = multiSetMap_innerCompactHashSet_outerRevTreeMap(); // same but more compact
16  
    //= multiSetMap_innerTreeSet_outerRevTreeMap(); // initial version that kind of pointlessly sorts the same-count nodes lexicographically
17  
  new LinkedHashMap<S, Ch> chains;
18  
  Ch firstChain;
19  
  int totalPairCount;
20  
  int initialNodesSetCapacity = 1;
21  
  L<IVF1<Node>> onAddingPair, onRemovingPair;
22  
  LongIntHashMap existingPairsIndex;
23  
  
24  
  // This queue checks all newly made pairs to see if they already
25  
  // have a symbol.
26  
  new LinkedList<Node> compressionQueue;
27  
  
28  
  Comparator<IntPair> pairComparatorForBalancing;
29  
30  
  void init {
31  
    /*nodes = inOrder
32  
      ? multiSetMap_innerLinkedHashSet_outerHashMap()
33  
      : multiSetMap_innerHashSet_outerHashMap();*/
34  
  }
35  
  
36  
  // indicate end of chain-making phase.
37  
  // we still accept more chains after this though, I think (for late input)
38  
  void haveChains {
39  
    if (byCount == null) {
40  
      //time "Making byCount" {
41  
        if (pairComparatorForBalancing != null)
42  
          byCount = multiSetMap_innerCustomTreeSet_outerRevTreeMap(pairComparatorForBalancing);
43  
        else
44  
          byCount = multiSetMap_innerCompactHashSet_outerRevTreeMap();
45  
        for (Set<Node> set : nodes.values()) {
46  
          IntPair p = first(set).pair();
47  
          int n = set.size();
48  
          if (verbose) print("Adding node " + p + " (" + n + ")");
49  
          byCount.put(n, p);
50  
        }
51  
      //}
52  
      ifdef LineComp_PairIndex_printSize
53  
        print("byCount size: " + n2(guessObjectSizeWithout(ll(this, pairComparatorForBalancing), byCount)) + " for " + nNodes(nodes.size()));
54  
      endifdef
55  
    }
56  
  }
57  
  
58  
  // a "chain" of nodes (one input file)
59  
  sclass Ch {
60  
    Node tail;
61  
    
62  
    *(LineComp_PairIndex pi, L<Int> l) {
63  
      int count = 0;
64  
      fOr (int i : l) {
65  
        add(pi, i);
66  
        if (((++count) % oneMillion()) == 0)
67  
          print("Added " + n2(count) + " entries to chain");
68  
      }
69  
    }
70  
    
71  
    L<Int> toList() {
72  
      new L<Int> l;
73  
      Node node = tail;
74  
      while (node != null) {
75  
        l.add(node.value);
76  
        node = node.prev;
77  
      }
78  
      ret reverseInPlace(l);
79  
    }
80  
    
81  
    void add(LineComp_PairIndex pi, int i) {
82  
      new Node n;
83  
      ifndef LineComp_SingleChain
84  
        n.ch = this;
85  
      endifndef
86  
      n.value = i;
87  
      n.prev = tail;
88  
      if (tail != null) tail.next = n;
89  
      tail = n;
90  
      pi.addToIndex(n.prev);
91  
      pi.processQueue();
92  
    }
93  
  }
94  
  
95  
  sclass Node {
96  
    ifndef LineComp_SingleChain
97  
      Ch ch;
98  
    endifndef
99  
    int value;
100  
    Node prev, next;
101  
    
102  
    int a() { ret value; }
103  
    int b() { ret next == null ? -1 : next.value; }
104  
    IntPair pair() { ret next == null ? null : IntPair(a(), b()); }
105  
    
106  
    toString {
107  
      ret stringIf(prev != null, "<") + value +
108  
        (next == null ? "" : next.value
109  
         + stringIf(next.next != null, ">")); 
110  
    }
111  
  }
112  
  
113  
  IntPair nodeToPair(Node n) {
114  
    ret n?.pair();
115  
  }
116  
  
117  
  // add node to pair index
118  
  void addToIndex(Node n) {
119  
    IntPair p = nodeToPair(n);
120  
    if (p == null) ret;
121  
    callFAll(onAddingPair, n);
122  
    int count = nodes_getSize(p);
123  
    nodes_add(p, n);
124  
    if (byCount != null) { // not in init phase
125  
      if (count != 0) byCount.remove(count, p);
126  
      byCount.put(count+1, p);
127  
    }
128  
    if (existingPairsIndex != null)
129  
      compressionQueue.add(n);
130  
  }
131  
  
132  
  // remove node from pair index
133  
  void removeFromIndex(Node n) {
134  
    IntPair p = nodeToPair(n);
135  
    if (p == null) ret;
136  
    callFAll(onRemovingPair, n);
137  
    int count = nodes_getSize(p);
138  
    if (count == 0) fail("Can't remove pair " + p);
139  
    nodes_remove(p, n);
140  
    byCount.remove(count, p);
141  
    if (--count > 0) byCount.put(count, p);
142  
  }
143  
  
144  
  // return all pairs in "most popular" bucket
145  
  Cl<IntPair> mostPopularDuplicates() {
146  
    Int key = firstKey(byCount);
147  
    ret toInt(key) < 2 ? null : byCount.get(key);
148  
  }
149  
  
150  
  IntPair mostPopularDuplicate() {
151  
    ret toInt(firstKey(byCount)) < 2 ? null : firstValue(byCount);
152  
  }
153  
  
154  
  IntPair mostPopularPair() {
155  
    ret firstValue(byCount);
156  
  }
157  
  
158  
  int highestBucketNr() {
159  
    ret or0(firstKey(byCount));
160  
  }
161  
  
162  
  // counts buckets actually (lower value than totalPairCount)
163  
  int numberOfDuplicates() {
164  
    ret byCount.size()-l(byCount.get(1));
165  
  }
166  
  
167  
  int getCount(IntPair p) {
168  
    ret nodes_getSize(p);
169  
  }
170  
  
171  
  void replacePair(int pa, int pb, int replaceWith) {
172  
    IntPair p = IntPair(pa, pb);
173  
    Set<Node> set = nodes.get(intPairToLong(p));
174  
    if (set != null)
175  
      for (Node n : cloneList(set)) {
176  
        continue if n.a() != pa || n.b() != pb; // nodes might have been deleted or changed
177  
        replacePair(n, replaceWith);
178  
      }
179  
  }
180  
  
181  
  void replacePair(Node node, int replaceWith) {
182  
    removeFromIndex(node.prev);
183  
    removeFromIndex(node);
184  
    removeFromIndex(node.next);
185  
    node.value = replaceWith;
186  
    deleteNode(node.next);
187  
    addToIndex(node.prev);
188  
    addToIndex(node);
189  
    processQueue();
190  
  }
191  
  
192  
  void processQueue {
193  
    if (existingPairsIndex != null) {
194  
      Node n;
195  
      while not null (n = popFirst(compressionQueue))
196  
        checkIfPairExists(n);
197  
    }
198  
  }
199  
  
200  
  void checkIfPairExists(Node node) {
201  
    if (existingPairsIndex == null) ret;
202  
    if (node.value < 0) ret;
203  
    IntPair pair = nodeToPair(node);
204  
    if (pair == null) ret;
205  
    //print("Checking pair " + pair);
206  
    int i = existingPairsIndex.getIfAbsent(intPairToLong(pair), -1);
207  
    if (i >= 0) {
208  
      //print("  Replacing with: " + i);
209  
      replacePair(node, i);
210  
    }
211  
  }
212  
  
213  
  void deleteNode(Node node) {
214  
    if (node.next != null) node.next.prev = node.prev; else getChain(node).tail = node.prev;
215  
    if (node.prev != null) node.prev.next = node.next;
216  
    node.value = -1; // mark invalid
217  
  }
218  
  
219  
  Ch newChain(S version, L<Int> encoding) {
220  
    Ch ch = new(this, encoding);
221  
    chains.put(version, ch);
222  
    ifdef LineComp_SingleChain
223  
      assertNull(firstChain);
224  
    endifdef
225  
    if (firstChain == null) firstChain = ch;
226  
    ret ch;
227  
  }
228  
  
229  
  int nodes_getSize(IntPair p) {
230  
    ret l(nodes.get(intPairToLong(p)));
231  
  }
232  
  
233  
  void nodes_add(IntPair p, Node n) {
234  
    ++totalPairCount;
235  
    long l = intPairToLong(p);
236  
    LinkedHashSet<Node> set = nodes.get(l);
237  
    if (set == null)
238  
      nodes.put(l, set = new LinkedHashSet(initialNodesSetCapacity);
239  
    set.add(n);
240  
  }
241  
  
242  
  void nodes_remove(IntPair p, Node n) {
243  
    --totalPairCount;
244  
    long l = intPairToLong(p);
245  
    LinkedHashSet<Node> set = nodes.get(l);
246  
    if (set == null || !set.remove(n)) fail("Consistency error");
247  
    if (set.isEmpty())
248  
      nodes.remove(l);
249  
  }
250  
  
251  
  // also shrinks sets...
252  
  void printStats {
253  
    print("Different pairs: " + l(nodes));
254  
    print("Different counts: " + byCount.keysSize() + " (highest count: " + firstKey(byCount) + ")");
255  
    long count = 0, waste = 0, newWaste = 0;
256  
    long count1 = 0, waste1 = 0;
257  
    
258  
    for (HashSet set : nodes.values()) {
259  
      count1 += set.size();
260  
      int capacity = hashSetCapacity(set);
261  
      waste1 += capacity-set.size();
262  
      //if (capacity > set.size()*3) set.shrink(); // TODO
263  
    }
264  
     
265  
    if (pairComparatorForBalancing == null) for (CompactHashSet set : (Cl<CompactHashSet>) (Cl) values(byCount.data)) {
266  
      count += set.size();
267  
      waste += set.capacity()-set.size();
268  
      set.shrinkToFactor(0.5);
269  
      newWaste += set.capacity()-set.size();
270  
    }
271  
    
272  
    print("Nodes set count: " + count1 + ". Waste: " + waste1);
273  
    //print("Set count: " + count + ". Waste: " + waste + (waste == newWaste ? "" : " => " + newWaste));
274  
    print("Duplicates: " + n2(totalPairCount-byCount.count(1)));
275  
  }
276  
  
277  
  Ch getChain(Node node) {
278  
    ifdef LineComp_SingleChain
279  
      ret firstChain;
280  
    endifdef
281  
    ifndef LineComp_SingleChain
282  
      ret node.ch;
283  
    endifndef
284  
  }
285  
  
286  
  /*public LineComp_PairIndex clone() {
287  
    
288  
  }*/
289  
}

Author comment

Began life as a copy of #1028247

download  show line numbers  debug dex  old transpilations   

Travelled to 4 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, vouqrxazstgt

No comments. add comment

Snippet ID: #1030401
Snippet name: LineComp_PairIndex - helper for LineComp compressor [backup before IntPairTreeSet]
Eternal ID of this version: #1030401/6
Text MD5: f8d383902744b65e9e47ef0b05d2c67e
Author: stefan
Category: javax
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-12-13 18:30:43
Source code size: 8717 bytes / 289 lines
Pitched / IR pitched: No / No
Views / Downloads: 128 / 203
Version history: 5 change(s)
Referenced in: [show references]