Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

220
LINES

< > BotCompany Repo | #1028519 // LineComp_PairIndex [byCount pointing to sets, dev.]

JavaX fragment (include) [tags: use-pretranspiled]

Uses 11335K of libraries. Click here for Pure Java version (3953L/25K).

// This doesn't work yet because byCount uses a LinkedHashSet to
// store LinkedHashSets which have equality by contents, so calling
// byCount.remove(x, y) takes like forever.
// We'd need a LinkedIdentityHashSet but I haven't found a good
// implementation of that yet.

!include once #1027304 // Eclipse Collections
import org.eclipse.collections.impl.map.mutable.primitive.*;
import org.eclipse.collections.api.tuple.primitive.*;

set flag NoModCount. // save some space. Make sure that this flag doesn't screw other collections/map you compile with this.

final sclass LineComp_PairIndex {
  bool verbose;
  bool inOrder = true;
  //MultiSetMap<IntPair, Node> nodes;
  new LongObjectHashMap<LinkedHashSet<Node>> nodes;
  MultiSetMap<Int, LinkedHashSet<Node>> byCount;
  new LinkedHashMap<S, Ch> chains;
  Ch firstChain;
  int initialNodesSetCapacity = 1;
  
  void init {
    /*nodes = inOrder
      ? multiSetMap_innerLinkedHashSet_outerHashMap()
      : multiSetMap_innerHashSet_outerHashMap();*/
  }
  
  // indicate end of chain-making phase
  void haveChains {
    if (byCount == null) {
      print("n=" + nodes.size());
      time "Making byCount" {
        byCount = multiSetMap_innerCompactHashSet_outerRevTreeMap();
        for (LinkedHashSet<Node> set : nodes.values())
          byCount.put(set.size(), set);
      }
    }
  }
  
  // a "chain" of nodes (one input file)
  sclass Ch {
    Node tail;
    
    *(LineComp_PairIndex pi, L<Int> l) {
      int count = 0;
      fOr (int i : l) {
        add(pi, i);
        if (((++count) % oneMillion()) == 0)
          print("Added " + count + " entries to chain");
      }
    }
    
    // TODO: optimize
    L<Int> toList() {
      new L<Int> l;
      Node node = tail;
      while (node != null) {
        l.add(node.value);
        node = node.prev;
      }
      ret reverseInPlace(l);
    }
    
    void add(LineComp_PairIndex pi, int i) {
      new Node n;
      ifndef LineComp_SingleChain
        n.ch = this;
      endifndef
      n.value = i;
      n.prev = tail;
      if (tail != null) tail.next = n;
      tail = n;
      pi.addToIndex(n.prev);
    }
  }
  
  sclass Node {
    ifndef LineComp_SingleChain
      Ch ch;
    endifndef
    int value;
    Node prev, next;
    
    int a() { ret value; }
    int b() { ret next == null ? -1 : next.value; }
    IntPair pair() { ret next == null ? null : IntPair(a(), b()); }
  }
  
  IntPair nodeToPair(Node n) {
    ret n?.pair();
  }
  
  // add node to pair index
  void addToIndex(Node n) {
    IntPair p = nodeToPair(n);
    if (p == null) ret;
    int count = nodes_getSize(p);
    LinkedHashSet<Node> set = nodes_add(p, n);
    if (byCount != null) { // not in init phase
      if (count != 0) byCount.remove(count, set);
      byCount.put(count+1, set);
    }
  }
  
  // remove node from pair index
  void removeFromIndex(Node n) {
    IntPair p = nodeToPair(n);
    if (p == null) ret;
    LinkedHashSet<Node> set = nodes.get(intPairToLong(p));
    int count = l(set);
    if (count == 0) fail("Can't remove pair " + p);
    nodes_remove(p, n);
    byCount.remove(count, set);
    if (--count > 0) byCount.put(count, set);
  }
  
  IntPair mostPopularDuplicate() {
    ret toInt(firstKey(byCount)) < 2 ? null : first(firstValue(byCount)).pair();
  }
  
  int numberOfDuplicates() {
    ret byCount.size()-l(byCount.get(1));
  }
  
  int getCount(IntPair p) {
    ret nodes_getSize(p);
  }
  
  void replacePair(int pa, int pb, int replaceWith) {
    IntPair p = IntPair(pa, pb);
    Set<Node> set = nodes.get(intPairToLong(p));
    if (set != null)
      for (Node n : cloneList(set)) {
        continue if n.a() != pa || n.b() != pb; // nodes might have been deleted or changed
        replacePair(n, replaceWith);
      }
  }
  
  void replacePair(Node node, int replaceWith) {
    removeFromIndex(node.prev);
    removeFromIndex(node);
    removeFromIndex(node.next);
    node.value = replaceWith;
    deleteNode(node.next);
    addToIndex(node.prev);
    addToIndex(node);
  }
  
  void deleteNode(Node node) {
    if (node.next != null) node.next.prev = node.prev; else getChain(node).tail = node.prev;
    if (node.prev != null) node.prev.next = node.next;
    node.value = -1; // mark invalid
  }
  
  void newChain(S version, L<Int> encoding) {
    Ch ch = new(this, encoding);
    chains.put(version, ch);
    ifdef LineComp_SingleChain
      assertNull(firstChain);
      firstChain = ch;
    endifdef
  }
  
  int nodes_getSize(IntPair p) {
    ret l(nodes.get(intPairToLong(p)));
  }
  
  LinkedHashSet<Node> nodes_add(IntPair p, Node n) {
    long l = intPairToLong(p);
    LinkedHashSet<Node> set = nodes.get(l);
    if (set == null)
      nodes.put(l, set = new LinkedHashSet(initialNodesSetCapacity);
    set.add(n);
    ret set;
  }
  
  void nodes_remove(IntPair p, Node n) {
    long l = intPairToLong(p);
    LinkedHashSet<Node> set = nodes.get(l);
    if (set == null) ret;
    set.remove(n);
    if (set.isEmpty())
      nodes.remove(l);
  }
  
  // also shrinks sets...
  void printStats {
    print("Different pairs: " + l(nodes));
    print("Different counts: " + byCount.keysSize() + " (highest count: " + firstKey(byCount) + ")");
    long count = 0, waste = 0, newWaste = 0;
    long count1 = 0, waste1 = 0;
    
    for (HashSet set : nodes.values()) {
      count1 += set.size();
      int capacity = hashSetCapacity(set);
      waste1 += capacity-set.size();
      //if (capacity > set.size()*3) set.shrink(); // TODO
    }
     
    for (CompactHashSet set : (Cl<CompactHashSet>) (Cl) values(byCount.data)) {
      count += set.size();
      waste += set.capacity()-set.size();
      set.shrinkToFactor(0.5);
      newWaste += set.capacity()-set.size();
    }
    
    print("Nodes set count: " + count1 + ". Waste: " + waste1);
    print("Set count: " + count + ". Waste: " + waste + (waste == newWaste ? "" : " => " + newWaste));
  }
  
  Ch getChain(Node node) {
    ifdef LineComp_SingleChain
      ret firstChain;
    endifdef
    ifndef LineComp_SingleChain
      ret node.ch;
    endifndef
  }
}

Author comment

Began life as a copy of #1028247

download  show line numbers  debug dex  old transpilations   

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

No comments. add comment

Snippet ID: #1028519
Snippet name: LineComp_PairIndex [byCount pointing to sets, dev.]
Eternal ID of this version: #1028519/7
Text MD5: 1071b4d8e829bbec277f6a59a7438566
Transpilation MD5: 897501b8740caf1967b7058836a5a848
Author: stefan
Category: javax
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-06-24 01:24:35
Source code size: 6289 bytes / 220 lines
Pitched / IR pitched: No / No
Views / Downloads: 120 / 217
Version history: 6 change(s)
Referenced in: [show references]