Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

106
LINES

< > BotCompany Repo | #1029424 // LCMerger_v3 [streaming lc2, OK, first one that actually works]

JavaX fragment (include) [tags: use-pretranspiled]

Libraryless. Click here for Pure Java version (6730L/43K).

1  
2  
srecord noeq LCMerger_v3(LineCompReader lc1) {
3  
  LineCompReader lc2;
4  
  LineCompReader lcOut; // actually the same as lc1 now
5  
  LCSortedPairIndex pairIndex;
6  
  Map<S, Int> literalIndex;
7  
  new IntBuffer lc2map;
8  
  int newLiterals; // number of literals added compared to lc1
9  
  int newPairs;
10  
  int nOriginalLiterals, nOriginalPairs;
11  
  int lc2PairCount;
12  
  
13  
  *(LineCompReader *lc1, LineCompReader *lc2) {}
14  
  *(File ubcFile) {
15  
    lcOut = lc1 = LineCompReader(ubcFile);
16  
  }
17  
  
18  
  void add(File ubcFile) {
19  
    //if (lc2 != null) fail("Can only run once for now");
20  
    lc2 = new LineCompReader;
21  
    lc2PairCount = 0;
22  
    
23  
    lcOut = lc1;
24  
    nOriginalLiterals = l(lc1.literals);
25  
    nOriginalPairs = l(lc1.pairs);
26  
    newLiterals = newPairs = 0;
27  
28  
    printWithTime("Making literalIndex");
29  
    literalIndex = indexList(lc1.literals);
30  
    
31  
    lc2.onPair = p -> { 
32  
      if (lc2PairCount == 0) { initPhase2(); }
33  
      ++lc2PairCount;
34  
      
35  
      long pAdjusted = twoIntsToLong(adjust2(firstIntFromLong(p)), adjust2(secondIntFromLong(p)));
36  
      int iPair = pairIndex.get(pAdjusted);
37  
      if (iPair < 0) { // new pair
38  
        iPair = l(lc1.pairs);
39  
        lc1.pairs.add(pAdjusted);
40  
        ++newPairs;
41  
      }
42  
      lc2map.add(l(lcOut.literals)+iPair);
43  
    };
44  
    
45  
    lc2.load(ubcFile);
46  
    
47  
    // copy files
48  
    
49  
    printWithTime("Merging files");
50  
    lc1.versions = (LinkedHashMap) mapValues(lc1.versions, enc -> lmap adjust1(enc));
51  
52  
    for (S name, L<Int> encoding : lc2.versions) {
53  
      if (lc1.versions.containsKey(name))
54  
        continue with print("Warning: Duplicate file name " + name);
55  
      lc1.versions.put(name, lmap adjust2(encoding));
56  
    }
57  
    
58  
    printVars_str(+newLiterals, +newPairs, +lc2PairCount);
59  
    print("Synergy factor: " + doubleRatio(lc2PairCount-newPairs, lc2PairCount));
60  
  }
61  
  
62  
  // convert symbols from lc1
63  
  int adjust1(int i) {
64  
    if (i >= nOriginalLiterals)
65  
      if (i >= nOriginalLiterals+nOriginalPairs)
66  
        ret i+newLiterals+newPairs;
67  
      else
68  
        ret i+newLiterals;
69  
    ret i;
70  
  }
71  
  
72  
  int adjust2(int i) { ret lc2map.get(i); }
73  
  
74  
  void initPhase2 {
75  
    // add lc2.literals to lc1.literals
76  
  
77  
    lc2map = new IntBuffer(l(lc2.literals) + l(lc2.pairs));
78  
    
79  
    printWithTime("Merging literals");
80  
    for (int i = 0; i < l(lc2.literals); i++) {
81  
      S c = lc2.literals.get(i);
82  
      Int iLit = literalIndex.get(c);
83  
      if (iLit == null) {
84  
        iLit = addAndReturnIndex(lc1.literals, c);
85  
        ++newLiterals;
86  
        literalIndex.put(c, iLit);
87  
      }
88  
      lc2map.add(iLit);
89  
    }
90  
    
91  
    // merge pairs
92  
    
93  
    printWithTime("Adjusting lc1 pairs");
94  
    for i to nOriginalPairs: {
95  
      long p = lc1.pairs.get(i);
96  
      lc1.pairs.set(i, twoIntsToLong(adjust1(firstIntFromLong(p)), adjust1(secondIntFromLong(p))));
97  
    }
98  
    
99  
    // Make index after adjustment
100  
    
101  
    printWithTime("Making pairIndex for " + nPairs(l(lc1.pairs)));
102  
    pairIndex = new LCSortedPairIndex(lc1.pairs.toArray(), pairIndex);
103  
    
104  
    printWithTime("Merging pairs");
105  
  }
106  
}

Author comment

Began life as a copy of #1029423

download  show line numbers  debug dex  old transpilations   

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

No comments. add comment

Snippet ID: #1029424
Snippet name: LCMerger_v3 [streaming lc2, OK, first one that actually works]
Eternal ID of this version: #1029424/10
Text MD5: 1ae4f73a67ae2ae47f9440ed3d94ab05
Transpilation MD5: 5a0d2424d75fb8de4298dc75d164571c
Author: stefan
Category: javax
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-08-06 15:17:51
Source code size: 3112 bytes / 106 lines
Pitched / IR pitched: No / No
Views / Downloads: 253 / 556
Version history: 9 change(s)
Referenced in: [show references]