Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

272
LINES

< > BotCompany Repo | #1028521 // LineComp_PairIndex [v2, finally replacing all LinkedHashSets in the critical path]

JavaX fragment (include) [tags: use-pretranspiled]

Uses 11335K of libraries. Click here for Pure Java version (3961L/25K).

// Gave nodes + cousin sets a counter to make their hashing deterministic.

!include once #1027304 // Eclipse Collections
import org.eclipse.collections.impl.map.mutable.primitive.*;
import org.eclipse.collections.api.tuple.primitive.*;

set flag NoModCount. // save some space. Make sure that this flag doesn't screw other collections/map you compile with this.

final sclass LineComp_PairIndex {
  bool verbose;
  bool inOrder = true;
  //MultiSetMap<IntPair, Node> nodes;
  new LongObjectHashMap<Cousins> nodes;
  MultiSetMap<Int, Cousins> byCount;
  new LinkedHashMap<S, Ch> chains;
  Ch firstChain;
  int objectCounter; // for making deterministic hash codes
  int totalPairCount;
  
  static int initialNodesSetCapacity = 1;
  
  void init {} // not needed anymore
  
  // Cousins is a replacement for LinkedHashSet<Node>
  // - a set of all nodes forming a certain pair with their successor
  // Nodes are linked through prevCousin/nextCousin
  final sclass Cousins extends CompactHashSet<Node> {
    Node head, tail;

    int hashCode;
    *(LineComp_PairIndex pi) {
      super(initialNodesSetCapacity);
      hashCode = ++pi.objectCounter;
    }
    @Override public int hashCode() { ret hashCode; }

    public bool add(Node n) {
      n.prevCousin = tail;
      if (tail != null) // there already are entries
        tail.nextCousin = n; 
      else
        head = n;
      tail = n;
      ret super.add(n);
    }
    
    public bool remove(Node node) {
      if (node.nextCousin != null)
        node.nextCousin.prevCousin = node.prevCousin;
      else
        tail = node.prevCousin;
      if (node.prevCousin != null)
        node.prevCousin.nextCousin = node.nextCousin;
      else
        head = node.nextCousin;
      node.prevCousin = node.nextCousin = null;
      ret super.remove(node);
    }
    
    L<Node> asList() {
      L<Node> l = emptyList(size());
      Node n = head;
      while (n != null) {
        l.add(n);
        n = n.nextCousin;
      }
      ret l;
    }
  }
  
  // indicate end of chain-making phase
  void haveChains {
    if (byCount == null) {
      print("n=" + nodes.size());
      time "Making byCount" {
        byCount = multiSetMap_innerCompactHashSet_outerRevTreeMap();
        for (Cousins set : nodes.values())
          byCount.put(set.size(), set);
      }
    }
  }
  
  // a "chain" of nodes (one input file)
  sclass Ch {
    Node tail;
    
    *(LineComp_PairIndex pi, L<Int> l) {
      int count = 0;
      fOr (int i : l) {
        add(pi, i);
        if (((++count) % oneMillion()) == 0)
          print("Added " + count + " entries to chain");
      }
    }
    
    L<Int> toList() {
      new L<Int> l;
      Node node = tail;
      while (node != null) {
        l.add(node.value);
        node = node.prev;
      }
      ret reverseInPlace(l);
    }
    
    void add(LineComp_PairIndex pi, int i) {
      Node n = new(pi);
      ifndef LineComp_SingleChain
        n.ch = this;
      endifndef
      n.value = i;
      n.prev = tail;
      if (tail != null) tail.next = n;
      tail = n;
      pi.addToIndex(n.prev);
    }
  }
  
  sclass Node {
    ifndef LineComp_SingleChain
      Ch ch;
    endifndef
    int value;
    Node prev, next;
    Node prevCousin, nextCousin; // prev & next with same pair value
    
    int hashCode;
    *(LineComp_PairIndex pi) { hashCode = ++pi.objectCounter; }
    @Override public int hashCode() { ret hashCode; }

    int a() { ret value; }
    int b() { ret next == null ? -1 : next.value; }
    IntPair pair() { ret next == null ? null : IntPair(a(), b()); }
  }
  
   IntPair nodeToPair(Node n) {
    ret n?.pair();
  }
  
  // add node to pair index
  void addToIndex(Node n) {
    IntPair p = nodeToPair(n);
    if (p == null) ret;
    int count = nodes_getSize(p);
    Cousins set = nodes_add(p, n);
    if (byCount != null) { // not in init phase
      if (count != 0) byCount.remove(count, set);
      byCount.put(count+1, set);
    }
  }
  
  // remove node from pair index
  void removeFromIndex(Node n) {
    IntPair p = nodeToPair(n);
    if (p == null) ret;
    Cousins set = nodes.get(intPairToLong(p));
    int count = l(set);
    if (count == 0) fail("Can't remove pair " + p);
    nodes_remove(p, n);
    byCount.remove(count, set);
    if (--count > 0) byCount.put(count, set);
  }
  
  IntPair mostPopularDuplicate() {
    ret toInt(firstKey(byCount)) < 2 ? null : firstValue(byCount).head.pair();
  }
  
  int numberOfDuplicates() {
    ret byCount.size()-l(byCount.get(1));
  }
  
  int getCount(IntPair p) {
    ret nodes_getSize(p);
  }
  
  void replacePair(int pa, int pb, int replaceWith) {
    IntPair p = IntPair(pa, pb);
    Cousins set = nodes.get(intPairToLong(p));
    if (set != null) {
      // TODO: might not even have to make this cloned list
      L<Node> l = set.asList();
      //for (Node n = cousins.head; n != null; n = n.nextCousin) {
      for (Node n : l) {
        continue if n.a() != pa || n.b() != pb; // nodes might have been deleted or changed
        replacePair(n, replaceWith);
      }
    }
  }
  
  void replacePair(Node node, int replaceWith) {
    removeFromIndex(node.prev);
    removeFromIndex(node);
    removeFromIndex(node.next);
    node.value = replaceWith;
    deleteNode(node.next);
    addToIndex(node.prev);
    addToIndex(node);
  }
  
  void deleteNode(Node node) {
    if (node.next != null) node.next.prev = node.prev; else getChain(node).tail = node.prev;
    if (node.prev != null) node.prev.next = node.next;
    node.value = -1; // mark invalid
  }
  
  void newChain(S version, L<Int> encoding) {
    Ch ch = new(this, encoding);
    chains.put(version, ch);
    ifdef LineComp_SingleChain
      assertNull(firstChain);
      firstChain = ch;
    endifdef
  }
  
  int nodes_getSize(IntPair p) {
    ret l(nodes.get(intPairToLong(p)));
  }
  
  Cousins nodes_add(IntPair p, Node n) {
    ++totalPairCount;
    long l = intPairToLong(p);
    Cousins set = nodes.get(l);
    if (set == null)
      nodes.put(l, set = new Cousins(this));
    set.add(n);
    ret set;
  }
  
  void nodes_remove(IntPair p, Node n) {
    --totalPairCount;
    long l = intPairToLong(p);
    Cousins set = nodes.get(l);
    if (set == null || !set.remove(n)) fail("Consistency error");
    if (set.isEmpty())
      nodes.remove(l);
  }
  
  // also shrinks sets...
  void printStats {
    //print("Different pairs: " + l(nodes));
    print("Different counts: " + byCount.keysSize() + " (highest count: " + firstKey(byCount) + ")");
    long count = 0, waste = 0, newWaste = 0;
    long count1 = 0, waste1 = 0, newWaste1 = 0;
    
    for (Cousins set : nodes.values()) {
      count1 += set.size();
      int capacity = set.capacity();
      waste1 += capacity-set.size();
      set.shrinkToFactor(0.5);
      newWaste1 += capacity-set.size();
    }
     
    for (CompactHashSet set : (Cl<CompactHashSet>) (Cl) values(byCount.data)) {
      count += set.size();
      waste += set.capacity()-set.size();
      set.shrinkToFactor(0.5);
      newWaste += set.capacity()-set.size();
    }
    
    print("Chain length   : " + count1 + ". Waste: " + waste1 + (waste1 == newWaste1 ? "" : " => " + newWaste1));
    print("Different pairs: " + count + ". Waste: " + waste + (waste == newWaste ? "" : " => " + newWaste));
  }
  
  Ch getChain(Node node) {
    ifdef LineComp_SingleChain
      ret firstChain;
    endifdef
    ifndef LineComp_SingleChain
      ret node.ch;
    endifndef
  }
}

Author comment

Began life as a copy of #1028247

download  show line numbers  debug dex  old transpilations   

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

No comments. add comment

Snippet ID: #1028521
Snippet name: LineComp_PairIndex [v2, finally replacing all LinkedHashSets in the critical path]
Eternal ID of this version: #1028521/33
Text MD5: de8173bbb49cadb5d2b784747b12cbd5
Transpilation MD5: 96571637b156f958f5eda0b52cb9ad03
Author: stefan
Category: javax
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-07-04 06:44:37
Source code size: 7724 bytes / 272 lines
Pitched / IR pitched: No / No
Views / Downloads: 282 / 607
Version history: 32 change(s)
Referenced in: #1028520 - Benchmark LineComp v2 [OK, compact and fast!]
#1028523 - LineComp_PairIndex [v3, getting rid of Node object headers, dev.]