Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

86
LINES

< > BotCompany Repo | #1029353 - LCMerger [working backup]

JavaX fragment (include) [tags: use-pretranspiled]

Uses 11335K of libraries. Click here for Pure Java version (6300L/40K).

!include once #1027304 // Eclipse Collections

srecord noeq LCMerger(LineCompReader lc1, LineCompReader lc2) {
  new LineCompReader lcOut; // TODO: merge with lc1
  new LongIntHashMap pairIndex;
  Map<S, Int> literalIndex;
  new IntIntHashMap lc2map; // TODO: Drop. This is actually contained in the pairIndex
  int newLiterals; // number of literals added compared to lc1
  int newPairs;

  run {
    lcOut.byteMode = lc1.byteMode;
    
    printWithTime("Making pairIndex");
    for (int i = 0; i < lc1.pairs.size(); i++)
      pairIndex.put(lc1.pairs.get(i), i);
    printWithTime("Making literalIndex");
    literalIndex = indexList(lc1.literals);
  
    // make lcOut.literals & literalIndex
  
    printWithTime("Merging literals");
    lcOut.literals = cloneList(lc1.literals);
    for (int i = 0; i < l(lc2.literals); i++) {
      S c = lc2.literals.get(i);
      Int iLit = literalIndex.get(c);
      if (iLit == null) {
        iLit = addAndReturnIndex(lcOut.literals, c);
        literalIndex.put(c, iLit);
      }
      lc2map.put(i, iLit);
    }
    
    newLiterals = l(lcOut.literals)-l(lc1.literals);
    
    // clone lc1.pairs into lcOut.pairs, add lc2.pairs
    
    printWithTime("Merging pairs");
    new IntIntHashMap map2;
    lcOut.pairs = new LongBuffer(l(lc1.pairs));
    for (long p : lc1.pairs.asVirtualList())
      lcOut.pairs.add(twoIntsToLong(adjust1(firstIntFromLong(p)), adjust1(secondIntFromLong(p))));
    
    for (int i = 0; i < l(lc2.pairs); i++) {
      long p = lc2.pairs.get(i);
      long pAdjusted = twoIntsToLong(adjust2(firstIntFromLong(p)), adjust2(secondIntFromLong(p)));
      int iPair = pairIndex.getIfAbsent(pAdjusted, -1);
      if (iPair < 0) { // new pair
        iPair = l(lcOut.pairs);
        lcOut.pairs.add(pAdjusted);
        pairIndex.put(pAdjusted, iPair);
      }
      lc2map.put(l(lc2.literals)+i, l(lcOut.literals)+iPair);
    }
    
    newPairs = l(lcOut.pairs)-l(lc1.pairs);
    
    // copy files
    
    printWithTime("Merging files");
    lcOut.versions = new LinkedHashMap;
    for (S name, L<Int> encoding : lc1.versions) {
      lcOut.versions.put(name, lmap adjust1(encoding));
    }
    
    for (S name, L<Int> encoding : lc2.versions) {
      if (lcOut.versions.containsKey(name))
        continue with print("Warning: Duplicate file name " + name);
      lcOut.versions.put(name, lmap adjust2(encoding));
    }
    
    print("Synergy factor: " + doubleRatio(l(lcOut.pairs), l(lc1.pairs)+l(lc2.pairs)));
  }
  
  // convert symbols from lc1 to lcOut
  int adjust1(int i) {
    if (i >= l(lc1.literals))
      if (i >= l(lc1.literals)+l(lc1.pairs))
        ret i+newLiterals+newPairs;
      else
        ret i+newLiterals;
    ret i;
  }
  
  int adjust2(int i) { ret lc2map.getIfAbsent(i, i); }
}

Author comment

Began life as a copy of #1029327

download  show line numbers  debug dex   

Travelled to 6 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, xrpafgyirdlv

No comments. add comment

Snippet ID: #1029353
Snippet name: LCMerger [working backup]
Eternal ID of this version: #1029353/5
Text MD5: 12cb017ecec87ce8b3948edf6ddc558a
Transpilation MD5: 106f6dbdd359ca413c4c5a4c9c77a27d
Author: stefan
Category: javax
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-08-02 11:25:33
Source code size: 2863 bytes / 86 lines
Pitched / IR pitched: No / No
Views / Downloads: 43 / 72
Version history: 4 change(s)
Referenced in: [show references]

Formerly at http://tinybrain.de/1029353 & http://1029353.tinybrain.de