Libraryless. Click here for Pure Java version (6730L/43K).
| 1 | |
| 2 | srecord noeq LCMerger_v3(LineCompReader lc1) {
 | 
| 3 | LineCompReader lc2; | 
| 4 | LineCompReader lcOut; // actually the same as lc1 now | 
| 5 | LCSortedPairIndex pairIndex; | 
| 6 | Map<S, Int> literalIndex; | 
| 7 | new IntBuffer lc2map; | 
| 8 | int newLiterals; // number of literals added compared to lc1 | 
| 9 | int newPairs; | 
| 10 | int nOriginalLiterals, nOriginalPairs; | 
| 11 | int lc2PairCount; | 
| 12 | |
| 13 |   *(LineCompReader *lc1, LineCompReader *lc2) {}
 | 
| 14 |   *(File ubcFile) {
 | 
| 15 | lcOut = lc1 = LineCompReader(ubcFile); | 
| 16 | } | 
| 17 | |
| 18 |   void add(File ubcFile) {
 | 
| 19 |     //if (lc2 != null) fail("Can only run once for now");
 | 
| 20 | lc2 = new LineCompReader; | 
| 21 | lc2PairCount = 0; | 
| 22 | |
| 23 | lcOut = lc1; | 
| 24 | nOriginalLiterals = l(lc1.literals); | 
| 25 | nOriginalPairs = l(lc1.pairs); | 
| 26 | newLiterals = newPairs = 0; | 
| 27 | |
| 28 |     printWithTime("Making literalIndex");
 | 
| 29 | literalIndex = indexList(lc1.literals); | 
| 30 | |
| 31 |     lc2.onPair = p -> { 
 | 
| 32 |       if (lc2PairCount == 0) { initPhase2(); }
 | 
| 33 | ++lc2PairCount; | 
| 34 | |
| 35 | long pAdjusted = twoIntsToLong(adjust2(firstIntFromLong(p)), adjust2(secondIntFromLong(p))); | 
| 36 | int iPair = pairIndex.get(pAdjusted); | 
| 37 |       if (iPair < 0) { // new pair
 | 
| 38 | iPair = l(lc1.pairs); | 
| 39 | lc1.pairs.add(pAdjusted); | 
| 40 | ++newPairs; | 
| 41 | } | 
| 42 | lc2map.add(l(lcOut.literals)+iPair); | 
| 43 | }; | 
| 44 | |
| 45 | lc2.load(ubcFile); | 
| 46 | |
| 47 | // copy files | 
| 48 | |
| 49 |     printWithTime("Merging files");
 | 
| 50 | lc1.versions = (LinkedHashMap) mapValues(lc1.versions, enc -> lmap adjust1(enc)); | 
| 51 | |
| 52 |     for (S name, L<Int> encoding : lc2.versions) {
 | 
| 53 | if (lc1.versions.containsKey(name)) | 
| 54 |         continue with print("Warning: Duplicate file name " + name);
 | 
| 55 | lc1.versions.put(name, lmap adjust2(encoding)); | 
| 56 | } | 
| 57 | |
| 58 | printVars_str(+newLiterals, +newPairs, +lc2PairCount); | 
| 59 |     print("Synergy factor: " + doubleRatio(lc2PairCount-newPairs, lc2PairCount));
 | 
| 60 | } | 
| 61 | |
| 62 | // convert symbols from lc1 | 
| 63 |   int adjust1(int i) {
 | 
| 64 | if (i >= nOriginalLiterals) | 
| 65 | if (i >= nOriginalLiterals+nOriginalPairs) | 
| 66 | ret i+newLiterals+newPairs; | 
| 67 | else | 
| 68 | ret i+newLiterals; | 
| 69 | ret i; | 
| 70 | } | 
| 71 | |
| 72 |   int adjust2(int i) { ret lc2map.get(i); }
 | 
| 73 | |
| 74 |   void initPhase2 {
 | 
| 75 | // add lc2.literals to lc1.literals | 
| 76 | |
| 77 | lc2map = new IntBuffer(l(lc2.literals) + l(lc2.pairs)); | 
| 78 | |
| 79 |     printWithTime("Merging literals");
 | 
| 80 |     for (int i = 0; i < l(lc2.literals); i++) {
 | 
| 81 | S c = lc2.literals.get(i); | 
| 82 | Int iLit = literalIndex.get(c); | 
| 83 |       if (iLit == null) {
 | 
| 84 | iLit = addAndReturnIndex(lc1.literals, c); | 
| 85 | ++newLiterals; | 
| 86 | literalIndex.put(c, iLit); | 
| 87 | } | 
| 88 | lc2map.add(iLit); | 
| 89 | } | 
| 90 | |
| 91 | // merge pairs | 
| 92 | |
| 93 |     printWithTime("Adjusting lc1 pairs");
 | 
| 94 |     for i to nOriginalPairs: {
 | 
| 95 | long p = lc1.pairs.get(i); | 
| 96 | lc1.pairs.set(i, twoIntsToLong(adjust1(firstIntFromLong(p)), adjust1(secondIntFromLong(p)))); | 
| 97 | } | 
| 98 | |
| 99 | // Make index after adjustment | 
| 100 | |
| 101 |     printWithTime("Making pairIndex for " + nPairs(l(lc1.pairs)));
 | 
| 102 | pairIndex = new LCSortedPairIndex(lc1.pairs.toArray(), pairIndex); | 
| 103 | |
| 104 |     printWithTime("Merging pairs");
 | 
| 105 | } | 
| 106 | } | 
Began life as a copy of #1029423
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
| Snippet ID: | #1029424 | 
| Snippet name: | LCMerger_v3 [streaming lc2, OK, first one that actually works] | 
| Eternal ID of this version: | #1029424/10 | 
| Text MD5: | 1ae4f73a67ae2ae47f9440ed3d94ab05 | 
| Transpilation MD5: | 5a0d2424d75fb8de4298dc75d164571c | 
| Author: | stefan | 
| Category: | javax | 
| Type: | JavaX fragment (include) | 
| Public (visible to everyone): | Yes | 
| Archived (hidden from active list): | No | 
| Created/modified: | 2020-08-06 15:17:51 | 
| Source code size: | 3112 bytes / 106 lines | 
| Pitched / IR pitched: | No / No | 
| Views / Downloads: | 483 / 834 | 
| Version history: | 9 change(s) | 
| Referenced in: | [show references] |