Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

350
LINES

< > BotCompany Repo | #1028523 // LineComp_PairIndex [v3, getting rid of Node object headers, dev.]

JavaX fragment (include) [tags: use-pretranspiled]

Uses 11335K of libraries. Click here for Pure Java version (4091L/26K).

// assumes LineComp_SingleChain!
// For multi-chain, use v2 of this class (#1028521)
//
// nodeHashCode is not used yet
//
// Some bug with the freeing of nodes...

!include once #1027304 // Eclipse Collections
import org.eclipse.collections.impl.map.mutable.primitive.*;
import org.eclipse.collections.api.tuple.primitive.*;

set flag NoModCount. // save some space. Make sure that this flag doesn't screw other collections/map you compile with this.

final sclass LineComp_PairIndex {
  replace Node with int.
  replace _Node with Int.
  
  bool verbose;
  bool garbageCollect = false; // still buggy
  new LongObjectHashMap<Cousins> nodes;
  MultiSetMap<Int, Cousins> byCount;
  new LinkedHashMap<S, Ch> chains;
  Ch firstChain;
  int objectCounter; // for making deterministic hash codes
  int nodeCounter;

  static int initialNodesSetCapacity = 1;
  
  static final int nodeSize = 6; // 5 ints (hash code + value + 4 pointers)
  static new ChunkedIntList nodeData;
  
  int nodeHashCode(int node) { ret nodeData.get(node); }
  int value(int node) { ret nodeData.get(node+1); }
  int prev(int node) { ret nodeData.get(node+2); }
  int next(int node) { ret nodeData.get(node+3); }
  int prevCousin(int node) { ret nodeData.get(node+4); }
  int nextCousin(int node) { ret nodeData.get(node+5); }
  
  void setNodeHashCode(int node, int x) { nodeDataSet(node, x); }
  void setValue(int node, int x) { nodeDataSet(node+1, x); }
  void setPrev(int node, int x) { nodeDataSet(node+2, x); }
  void setNext(int node, int x) { nodeDataSet(node+3, x); }
  void setPrevCousin(int node, int x) { nodeDataSet(node+4, x); }
  void setNextCousin(int node, int x) { nodeDataSet(node+5, x); }
  
  void nodeDataSet(int idx, int value) {
    nodeData.set(idx, value);
  }
  
  void init {
    for i to nodeSize: nodeData.add(0);
  }
  
  // Cousins is a replacement for LinkedHashSet<Node>
  // - a set of all nodes forming a certain pair with their successor
  // Nodes are linked through prevCousin/nextCousin
  final class Cousins extends CompactHashSet<_Node> {
    Node head, tail;

    int hashCode;
    *(LineComp_PairIndex pi) {
      super(initialNodesSetCapacity);
      hashCode = ++pi.objectCounter;
    }
    @Override public int hashCode() { ret hashCode; }

    public bool add(Node n) {
      setPrevCousin(n, tail);
      if (tail != 0) // there already are entries
        setNextCousin(tail, n); 
      else
        head = n;
      tail = n;
      ret super.add(n);
    }
    
    public bool remove(Node node) {
      if (nextCousin(node) != 0)
        setPrevCousin(nextCousin(node), prevCousin(node));
      else
        tail = prevCousin(node);
      if (prevCousin(node) != 0)
        setNextCousin(prevCousin(node), nextCousin(node));
      else
        head = nextCousin(node);
      setPrevCousin(node, 0);
      setNextCousin(node, 0);
      ret super.remove(node);
    }
    
    // node has changed internal location. don't change the linked list stuff!
    void moveNode(Node from, Node to) {
      super.remove(from);
      super.add(to);
    }
    
    L<_Node> asList() {
      L<_Node> l = emptyList(size());
      Node n = head;
      while (n != 0) {
        l.add(n);
        n = nextCousin(n);
      }
      ret l;
    }
  }
  
  // indicate end of chain-making phase
  void haveChains {
    if (byCount == null) {
      print("n=" + nodes.size());
      time "Making byCount" {
        byCount = multiSetMap_innerCompactHashSet_outerRevTreeMap();
        for (Cousins set : nodes.values())
          byCount.put(set.size(), set);
      }
    }
  }
  
  // a "chain" of nodes (one input file)
  class Ch {
    Node tail;
    
    *(LineComp_PairIndex pi, L<Int> l) {
      int count = 0;
      fOr (int i : l) {
        add(pi, i);
        if (((++count) % oneMillion()) == 0)
          print("Added " + count + " entries to chain");
      }
    }
    
    L<Int> toList() {
      new L<Int> l;
      Node node = tail;
      while (node != 0) {
        l.add(value(node));
        node = prev(node);
      }
      ret reverseInPlace(l);
    }
    
    void add(LineComp_PairIndex pi, int i) {
      Node n = pi.newNode();
      setValue(n, i);
      setPrev(n, tail);
      if (tail != 0) setNext(tail, n);
      tail = n;
      pi.addToIndex(prev(n));
    }
  }
  
  Node newNode() {
    for i to nodeSize: nodeData.add(0);
    ret nodeCounter += nodeSize;
  }
  
  int node_a(int node) { ret value(node); }
  int node_b(int node) { int next = next(node); ret next == 0 ? -1 : value(next); }
  
  IntPair nodeToPair(int node) {
    if (node == 0) null;
    int next = next(node);
    ret next == 0 ? null : IntPair(node_a(node), node_b(node));
  }

  // add node to pair index
  void addToIndex(Node n) {
    IntPair p = nodeToPair(n);
    if (p == null) ret;
    int count = nodes_getSize(p);
    Cousins set = nodes_add(p, n);
    if (byCount != null) { // not in init phase
      if (count != 0) byCount.remove(count, set);
      byCount.put(count+1, set);
    }
  }
  
  // remove node from pair index
  void removeFromIndex(Node n) {
    IntPair p = nodeToPair(n);
    if (p == null) ret;
    Cousins set = nodes.get(intPairToLong(p));
    int count = l(set);
    if (count == 0) fail("Can't remove pair " + p);
    nodes_remove(p, n);
    byCount.remove(count, set);
    if (--count > 0) byCount.put(count, set);
  }
  
  IntPair mostPopularDuplicate() {
    ret toInt(firstKey(byCount)) < 2 ? null : nodeToPair(firstValue(byCount).head);
  }
  
  int numberOfDuplicates() {
    ret byCount.size()-l(byCount.get(1));
  }
  
  int getCount(IntPair p) {
    ret nodes_getSize(p);
  }
  
  L<_Node> replacing;
  Map<_Node, Int> replacing_index;
  
  void replacePair(int pa, int pb, int replaceWith) {
    IntPair p = IntPair(pa, pb);
    Cousins set = nodes.get(intPairToLong(p));
    if (set != null) {
      replacing = set.asList();
      replacing_index = listIndex(replacing);
      
      for i over replacing: {
        Node n = replacing.get(i);
        continue if node_a(n) != pa || node_b(n) != pb; // nodes might have been deleted or changed
        replacePair(n, replaceWith);
      }
      
      replacing = null;
      replacing_index = null;
    }
  }
  
  void replacePair(Node node, int replaceWith) {
    removeFromIndex(prev(node));
    removeFromIndex(node);
    removeFromIndex(next(node));
    setValue(node, replaceWith);
    deleteNode(next(node));
    addToIndex(prev(node));
    addToIndex(node);
  }
  
  void deleteNode(Node node) {
    if (next(node) != 0)
      setPrev(next(node), prev(node));
    else
      firstChain.tail = prev(node);
    if (prev(node) != 0)
      setNext(prev(node), next(node));
    //setValue(node, -1); // mark invalid
    physicallyDisposeOfNode(node);
  }
  
  void physicallyDisposeOfNode(Node node) {
    if (!garbageCollect) ret;
    // move last node in list to place that is now free
    if (node != nodeCounter) {
      //print("Moving node " + nodeCounter + " to " + node);
      moveNode(nodeCounter, node);
    }
    //print("Disposing of node " + nodeCounter);
    nodeData.truncateAt(nodeCounter);
    nodeCounter -= nodeSize;
  }
  
  void moveNode(int from, int to) {
    // Copy hash code & value.
    setNodeHashCode(to, nodeHashCode(from));
    setValue(to, value(from));
    
    // Be sure to update all the pointers there may be to this node!
    // First, find Cousins object.
    IntPair pair = nodeToPair(from);
    if (pair != null) {
      Cousins cousins = nodes.get(intPairToLong(pair));
      if (cousins.head == from) cousins.head = to;
      if (cousins.tail == from) cousins.tail = to;
      cousins.moveNode(from, to);
    }
    
    // Next, check the chain.
    if (firstChain.tail == from) firstChain.tail = to;
    
    // Now the 4 prev and next pointers
    int i;
    if ((i = prev(from)) != 0) setNext(i, to);
    if ((i = next(from)) != 0) setPrev(i, to);
    if ((i = prevCousin(from)) != 0) setNextCousin(i, to);
    if ((i = nextCousin(from)) != 0) setPrevCousin(i, to);
    
    // Finally the list we are currently replacing
    if (replacing_index != null) {
      _Node ref = replacing_index.get(from);
      if (ref != null)
        replacing.set(ref, to);
    }

    // That should be it...!?
  }

  void newChain(S version, L<Int> encoding) {
    Ch ch = new(this, encoding);
    chains.put(version, ch);
    assertNull(firstChain);
    firstChain = ch;
  }
  
  int nodes_getSize(IntPair p) {
    ret l(nodes.get(intPairToLong(p)));
  }
  
  Cousins nodes_add(IntPair p, Node n) {
    long l = intPairToLong(p);
    Cousins set = nodes.get(l);
    if (set == null)
      nodes.put(l, set = new Cousins(this));
    set.add(n);
    ret set;
  }
  
  void nodes_remove(IntPair p, Node n) {
    long l = intPairToLong(p);
    Cousins set = nodes.get(l);
    if (set == null) ret;
    set.remove(n);
    if (set.isEmpty())
      nodes.remove(l);
  }
  
  // also shrinks sets...
  void printStats {
    //print("Different pairs: " + l(nodes));
    print("Different counts: " + byCount.keysSize() + " (highest count: " + firstKey(byCount) + ")");
    long count = 0, waste = 0, newWaste = 0;
    long count1 = 0, waste1 = 0, newWaste1 = 0;
    
    for (Cousins set : nodes.values()) {
      count1 += set.size();
      int capacity = set.capacity();
      waste1 += capacity-set.size();
      set.shrinkToFactor(0.5);
      newWaste1 += capacity-set.size();
    }
     
    for (CompactHashSet set : (Cl<CompactHashSet>) (Cl) values(byCount.data)) {
      count += set.size();
      waste += set.capacity()-set.size();
      set.shrinkToFactor(0.5);
      newWaste += set.capacity()-set.size();
    }
    
    print("Chain length   : " + count1 + ". Waste: " + waste1 + (waste1 == newWaste1 ? "" : " => " + newWaste1));
    print("Different pairs: " + count + ". Waste: " + waste + (waste == newWaste ? "" : " => " + newWaste));
  }
  
  Ch getChain(Node node) {
    ret firstChain;
  }
}

Author comment

Began life as a copy of #1028521

download  show line numbers  debug dex  old transpilations   

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

No comments. add comment

Snippet ID: #1028523
Snippet name: LineComp_PairIndex [v3, getting rid of Node object headers, dev.]
Eternal ID of this version: #1028523/34
Text MD5: 0b2a20e98f6d14083ed8e91672e537af
Transpilation MD5: 73cdced494d34c6eaae0f741661a9341
Author: stefan
Category: javax
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-06-24 03:01:36
Source code size: 10276 bytes / 350 lines
Pitched / IR pitched: No / No
Views / Downloads: 270 / 546
Version history: 33 change(s)
Referenced in: #1028524 - Benchmark LineComp v3 [dev.]