Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

106
LINES

< > BotCompany Repo | #1029424 - LCMerger_v3 [streaming lc2, OK, first one that actually works]

JavaX fragment (include) [tags: use-pretranspiled]

Libraryless. Click here for Pure Java version (6730L/43K).

srecord noeq LCMerger_v3(LineCompReader lc1) {
  LineCompReader lc2;
  LineCompReader lcOut; // actually the same as lc1 now
  LCSortedPairIndex pairIndex;
  Map<S, Int> literalIndex;
  new IntBuffer lc2map;
  int newLiterals; // number of literals added compared to lc1
  int newPairs;
  int nOriginalLiterals, nOriginalPairs;
  int lc2PairCount;
  
  *(LineCompReader *lc1, LineCompReader *lc2) {}
  *(File ubcFile) {
    lcOut = lc1 = LineCompReader(ubcFile);
  }
  
  void add(File ubcFile) {
    //if (lc2 != null) fail("Can only run once for now");
    lc2 = new LineCompReader;
    lc2PairCount = 0;
    
    lcOut = lc1;
    nOriginalLiterals = l(lc1.literals);
    nOriginalPairs = l(lc1.pairs);
    newLiterals = newPairs = 0;

    printWithTime("Making literalIndex");
    literalIndex = indexList(lc1.literals);
    
    lc2.onPair = p -> { 
      if (lc2PairCount == 0) { initPhase2(); }
      ++lc2PairCount;
      
      long pAdjusted = twoIntsToLong(adjust2(firstIntFromLong(p)), adjust2(secondIntFromLong(p)));
      int iPair = pairIndex.get(pAdjusted);
      if (iPair < 0) { // new pair
        iPair = l(lc1.pairs);
        lc1.pairs.add(pAdjusted);
        ++newPairs;
      }
      lc2map.add(l(lcOut.literals)+iPair);
    };
    
    lc2.load(ubcFile);
    
    // copy files
    
    printWithTime("Merging files");
    lc1.versions = (LinkedHashMap) mapValues(lc1.versions, enc -> lmap adjust1(enc));

    for (S name, L<Int> encoding : lc2.versions) {
      if (lc1.versions.containsKey(name))
        continue with print("Warning: Duplicate file name " + name);
      lc1.versions.put(name, lmap adjust2(encoding));
    }
    
    printVars_str(+newLiterals, +newPairs, +lc2PairCount);
    print("Synergy factor: " + doubleRatio(lc2PairCount-newPairs, lc2PairCount));
  }
  
  // convert symbols from lc1
  int adjust1(int i) {
    if (i >= nOriginalLiterals)
      if (i >= nOriginalLiterals+nOriginalPairs)
        ret i+newLiterals+newPairs;
      else
        ret i+newLiterals;
    ret i;
  }
  
  int adjust2(int i) { ret lc2map.get(i); }
  
  void initPhase2 {
    // add lc2.literals to lc1.literals
  
    lc2map = new IntBuffer(l(lc2.literals) + l(lc2.pairs));
    
    printWithTime("Merging literals");
    for (int i = 0; i < l(lc2.literals); i++) {
      S c = lc2.literals.get(i);
      Int iLit = literalIndex.get(c);
      if (iLit == null) {
        iLit = addAndReturnIndex(lc1.literals, c);
        ++newLiterals;
        literalIndex.put(c, iLit);
      }
      lc2map.add(iLit);
    }
    
    // merge pairs
    
    printWithTime("Adjusting lc1 pairs");
    for i to nOriginalPairs: {
      long p = lc1.pairs.get(i);
      lc1.pairs.set(i, twoIntsToLong(adjust1(firstIntFromLong(p)), adjust1(secondIntFromLong(p))));
    }
    
    // Make index after adjustment
    
    printWithTime("Making pairIndex for " + nPairs(l(lc1.pairs)));
    pairIndex = new LCSortedPairIndex(lc1.pairs.toArray(), pairIndex);
    
    printWithTime("Merging pairs");
  }
}

Author comment

Began life as a copy of #1029423

download  show line numbers  debug dex  old transpilations   

Travelled to 6 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, xrpafgyirdlv

No comments. add comment

Snippet ID: #1029424
Snippet name: LCMerger_v3 [streaming lc2, OK, first one that actually works]
Eternal ID of this version: #1029424/10
Text MD5: 1ae4f73a67ae2ae47f9440ed3d94ab05
Transpilation MD5: 5a0d2424d75fb8de4298dc75d164571c
Author: stefan
Category: javax
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-08-06 15:17:51
Source code size: 3112 bytes / 106 lines
Pitched / IR pitched: No / No
Views / Downloads: 62 / 170
Version history: 9 change(s)
Referenced in: [show references]

Formerly at http://tinybrain.de/1029424 & http://1029424.tinybrain.de