Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

220
LINES

< > BotCompany Repo | #1028519 // LineComp_PairIndex [byCount pointing to sets, dev.]

JavaX fragment (include) [tags: use-pretranspiled]

Uses 11335K of libraries. Click here for Pure Java version (3953L/25K).

1  
// This doesn't work yet because byCount uses a LinkedHashSet to
2  
// store LinkedHashSets which have equality by contents, so calling
3  
// byCount.remove(x, y) takes like forever.
4  
// We'd need a LinkedIdentityHashSet but I haven't found a good
5  
// implementation of that yet.
6  
7  
!include once #1027304 // Eclipse Collections
8  
import org.eclipse.collections.impl.map.mutable.primitive.*;
9  
import org.eclipse.collections.api.tuple.primitive.*;
10  
11  
set flag NoModCount. // save some space. Make sure that this flag doesn't screw other collections/map you compile with this.
12  
13  
final sclass LineComp_PairIndex {
14  
  bool verbose;
15  
  bool inOrder = true;
16  
  //MultiSetMap<IntPair, Node> nodes;
17  
  new LongObjectHashMap<LinkedHashSet<Node>> nodes;
18  
  MultiSetMap<Int, LinkedHashSet<Node>> byCount;
19  
  new LinkedHashMap<S, Ch> chains;
20  
  Ch firstChain;
21  
  int initialNodesSetCapacity = 1;
22  
  
23  
  void init {
24  
    /*nodes = inOrder
25  
      ? multiSetMap_innerLinkedHashSet_outerHashMap()
26  
      : multiSetMap_innerHashSet_outerHashMap();*/
27  
  }
28  
  
29  
  // indicate end of chain-making phase
30  
  void haveChains {
31  
    if (byCount == null) {
32  
      print("n=" + nodes.size());
33  
      time "Making byCount" {
34  
        byCount = multiSetMap_innerCompactHashSet_outerRevTreeMap();
35  
        for (LinkedHashSet<Node> set : nodes.values())
36  
          byCount.put(set.size(), set);
37  
      }
38  
    }
39  
  }
40  
  
41  
  // a "chain" of nodes (one input file)
42  
  sclass Ch {
43  
    Node tail;
44  
    
45  
    *(LineComp_PairIndex pi, L<Int> l) {
46  
      int count = 0;
47  
      fOr (int i : l) {
48  
        add(pi, i);
49  
        if (((++count) % oneMillion()) == 0)
50  
          print("Added " + count + " entries to chain");
51  
      }
52  
    }
53  
    
54  
    // TODO: optimize
55  
    L<Int> toList() {
56  
      new L<Int> l;
57  
      Node node = tail;
58  
      while (node != null) {
59  
        l.add(node.value);
60  
        node = node.prev;
61  
      }
62  
      ret reverseInPlace(l);
63  
    }
64  
    
65  
    void add(LineComp_PairIndex pi, int i) {
66  
      new Node n;
67  
      ifndef LineComp_SingleChain
68  
        n.ch = this;
69  
      endifndef
70  
      n.value = i;
71  
      n.prev = tail;
72  
      if (tail != null) tail.next = n;
73  
      tail = n;
74  
      pi.addToIndex(n.prev);
75  
    }
76  
  }
77  
  
78  
  sclass Node {
79  
    ifndef LineComp_SingleChain
80  
      Ch ch;
81  
    endifndef
82  
    int value;
83  
    Node prev, next;
84  
    
85  
    int a() { ret value; }
86  
    int b() { ret next == null ? -1 : next.value; }
87  
    IntPair pair() { ret next == null ? null : IntPair(a(), b()); }
88  
  }
89  
  
90  
  IntPair nodeToPair(Node n) {
91  
    ret n?.pair();
92  
  }
93  
  
94  
  // add node to pair index
95  
  void addToIndex(Node n) {
96  
    IntPair p = nodeToPair(n);
97  
    if (p == null) ret;
98  
    int count = nodes_getSize(p);
99  
    LinkedHashSet<Node> set = nodes_add(p, n);
100  
    if (byCount != null) { // not in init phase
101  
      if (count != 0) byCount.remove(count, set);
102  
      byCount.put(count+1, set);
103  
    }
104  
  }
105  
  
106  
  // remove node from pair index
107  
  void removeFromIndex(Node n) {
108  
    IntPair p = nodeToPair(n);
109  
    if (p == null) ret;
110  
    LinkedHashSet<Node> set = nodes.get(intPairToLong(p));
111  
    int count = l(set);
112  
    if (count == 0) fail("Can't remove pair " + p);
113  
    nodes_remove(p, n);
114  
    byCount.remove(count, set);
115  
    if (--count > 0) byCount.put(count, set);
116  
  }
117  
  
118  
  IntPair mostPopularDuplicate() {
119  
    ret toInt(firstKey(byCount)) < 2 ? null : first(firstValue(byCount)).pair();
120  
  }
121  
  
122  
  int numberOfDuplicates() {
123  
    ret byCount.size()-l(byCount.get(1));
124  
  }
125  
  
126  
  int getCount(IntPair p) {
127  
    ret nodes_getSize(p);
128  
  }
129  
  
130  
  void replacePair(int pa, int pb, int replaceWith) {
131  
    IntPair p = IntPair(pa, pb);
132  
    Set<Node> set = nodes.get(intPairToLong(p));
133  
    if (set != null)
134  
      for (Node n : cloneList(set)) {
135  
        continue if n.a() != pa || n.b() != pb; // nodes might have been deleted or changed
136  
        replacePair(n, replaceWith);
137  
      }
138  
  }
139  
  
140  
  void replacePair(Node node, int replaceWith) {
141  
    removeFromIndex(node.prev);
142  
    removeFromIndex(node);
143  
    removeFromIndex(node.next);
144  
    node.value = replaceWith;
145  
    deleteNode(node.next);
146  
    addToIndex(node.prev);
147  
    addToIndex(node);
148  
  }
149  
  
150  
  void deleteNode(Node node) {
151  
    if (node.next != null) node.next.prev = node.prev; else getChain(node).tail = node.prev;
152  
    if (node.prev != null) node.prev.next = node.next;
153  
    node.value = -1; // mark invalid
154  
  }
155  
  
156  
  void newChain(S version, L<Int> encoding) {
157  
    Ch ch = new(this, encoding);
158  
    chains.put(version, ch);
159  
    ifdef LineComp_SingleChain
160  
      assertNull(firstChain);
161  
      firstChain = ch;
162  
    endifdef
163  
  }
164  
  
165  
  int nodes_getSize(IntPair p) {
166  
    ret l(nodes.get(intPairToLong(p)));
167  
  }
168  
  
169  
  LinkedHashSet<Node> nodes_add(IntPair p, Node n) {
170  
    long l = intPairToLong(p);
171  
    LinkedHashSet<Node> set = nodes.get(l);
172  
    if (set == null)
173  
      nodes.put(l, set = new LinkedHashSet(initialNodesSetCapacity);
174  
    set.add(n);
175  
    ret set;
176  
  }
177  
  
178  
  void nodes_remove(IntPair p, Node n) {
179  
    long l = intPairToLong(p);
180  
    LinkedHashSet<Node> set = nodes.get(l);
181  
    if (set == null) ret;
182  
    set.remove(n);
183  
    if (set.isEmpty())
184  
      nodes.remove(l);
185  
  }
186  
  
187  
  // also shrinks sets...
188  
  void printStats {
189  
    print("Different pairs: " + l(nodes));
190  
    print("Different counts: " + byCount.keysSize() + " (highest count: " + firstKey(byCount) + ")");
191  
    long count = 0, waste = 0, newWaste = 0;
192  
    long count1 = 0, waste1 = 0;
193  
    
194  
    for (HashSet set : nodes.values()) {
195  
      count1 += set.size();
196  
      int capacity = hashSetCapacity(set);
197  
      waste1 += capacity-set.size();
198  
      //if (capacity > set.size()*3) set.shrink(); // TODO
199  
    }
200  
     
201  
    for (CompactHashSet set : (Cl<CompactHashSet>) (Cl) values(byCount.data)) {
202  
      count += set.size();
203  
      waste += set.capacity()-set.size();
204  
      set.shrinkToFactor(0.5);
205  
      newWaste += set.capacity()-set.size();
206  
    }
207  
    
208  
    print("Nodes set count: " + count1 + ". Waste: " + waste1);
209  
    print("Set count: " + count + ". Waste: " + waste + (waste == newWaste ? "" : " => " + newWaste));
210  
  }
211  
  
212  
  Ch getChain(Node node) {
213  
    ifdef LineComp_SingleChain
214  
      ret firstChain;
215  
    endifdef
216  
    ifndef LineComp_SingleChain
217  
      ret node.ch;
218  
    endifndef
219  
  }
220  
}

Author comment

Began life as a copy of #1028247

download  show line numbers  debug dex  old transpilations   

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

No comments. add comment

Snippet ID: #1028519
Snippet name: LineComp_PairIndex [byCount pointing to sets, dev.]
Eternal ID of this version: #1028519/7
Text MD5: 1071b4d8e829bbec277f6a59a7438566
Transpilation MD5: 897501b8740caf1967b7058836a5a848
Author: stefan
Category: javax
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-06-24 01:24:35
Source code size: 6289 bytes / 220 lines
Pitched / IR pitched: No / No
Views / Downloads: 198 / 310
Version history: 6 change(s)
Referenced in: [show references]