Libraryless. Click here for Pure Java version (6730L/43K).
1 | |
2 | srecord noeq LCMerger_v3(LineCompReader lc1) { |
3 | LineCompReader lc2; |
4 | LineCompReader lcOut; // actually the same as lc1 now |
5 | LCSortedPairIndex pairIndex; |
6 | Map<S, Int> literalIndex; |
7 | new IntBuffer lc2map; |
8 | int newLiterals; // number of literals added compared to lc1 |
9 | int newPairs; |
10 | int nOriginalLiterals, nOriginalPairs; |
11 | int lc2PairCount; |
12 | |
13 | *(LineCompReader *lc1, LineCompReader *lc2) {} |
14 | *(File ubcFile) { |
15 | lcOut = lc1 = LineCompReader(ubcFile); |
16 | } |
17 | |
18 | void add(File ubcFile) { |
19 | //if (lc2 != null) fail("Can only run once for now"); |
20 | lc2 = new LineCompReader; |
21 | lc2PairCount = 0; |
22 | |
23 | lcOut = lc1; |
24 | nOriginalLiterals = l(lc1.literals); |
25 | nOriginalPairs = l(lc1.pairs); |
26 | newLiterals = newPairs = 0; |
27 | |
28 | printWithTime("Making literalIndex"); |
29 | literalIndex = indexList(lc1.literals); |
30 | |
31 | lc2.onPair = p -> { |
32 | if (lc2PairCount == 0) { initPhase2(); } |
33 | ++lc2PairCount; |
34 | |
35 | long pAdjusted = twoIntsToLong(adjust2(firstIntFromLong(p)), adjust2(secondIntFromLong(p))); |
36 | int iPair = pairIndex.get(pAdjusted); |
37 | if (iPair < 0) { // new pair |
38 | iPair = l(lc1.pairs); |
39 | lc1.pairs.add(pAdjusted); |
40 | ++newPairs; |
41 | } |
42 | lc2map.add(l(lcOut.literals)+iPair); |
43 | }; |
44 | |
45 | lc2.load(ubcFile); |
46 | |
47 | // copy files |
48 | |
49 | printWithTime("Merging files"); |
50 | lc1.versions = (LinkedHashMap) mapValues(lc1.versions, enc -> lmap adjust1(enc)); |
51 | |
52 | for (S name, L<Int> encoding : lc2.versions) { |
53 | if (lc1.versions.containsKey(name)) |
54 | continue with print("Warning: Duplicate file name " + name); |
55 | lc1.versions.put(name, lmap adjust2(encoding)); |
56 | } |
57 | |
58 | printVars_str(+newLiterals, +newPairs, +lc2PairCount); |
59 | print("Synergy factor: " + doubleRatio(lc2PairCount-newPairs, lc2PairCount)); |
60 | } |
61 | |
62 | // convert symbols from lc1 |
63 | int adjust1(int i) { |
64 | if (i >= nOriginalLiterals) |
65 | if (i >= nOriginalLiterals+nOriginalPairs) |
66 | ret i+newLiterals+newPairs; |
67 | else |
68 | ret i+newLiterals; |
69 | ret i; |
70 | } |
71 | |
72 | int adjust2(int i) { ret lc2map.get(i); } |
73 | |
74 | void initPhase2 { |
75 | // add lc2.literals to lc1.literals |
76 | |
77 | lc2map = new IntBuffer(l(lc2.literals) + l(lc2.pairs)); |
78 | |
79 | printWithTime("Merging literals"); |
80 | for (int i = 0; i < l(lc2.literals); i++) { |
81 | S c = lc2.literals.get(i); |
82 | Int iLit = literalIndex.get(c); |
83 | if (iLit == null) { |
84 | iLit = addAndReturnIndex(lc1.literals, c); |
85 | ++newLiterals; |
86 | literalIndex.put(c, iLit); |
87 | } |
88 | lc2map.add(iLit); |
89 | } |
90 | |
91 | // merge pairs |
92 | |
93 | printWithTime("Adjusting lc1 pairs"); |
94 | for i to nOriginalPairs: { |
95 | long p = lc1.pairs.get(i); |
96 | lc1.pairs.set(i, twoIntsToLong(adjust1(firstIntFromLong(p)), adjust1(secondIntFromLong(p)))); |
97 | } |
98 | |
99 | // Make index after adjustment |
100 | |
101 | printWithTime("Making pairIndex for " + nPairs(l(lc1.pairs))); |
102 | pairIndex = new LCSortedPairIndex(lc1.pairs.toArray(), pairIndex); |
103 | |
104 | printWithTime("Merging pairs"); |
105 | } |
106 | } |
Began life as a copy of #1029423
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1029424 |
Snippet name: | LCMerger_v3 [streaming lc2, OK, first one that actually works] |
Eternal ID of this version: | #1029424/10 |
Text MD5: | 1ae4f73a67ae2ae47f9440ed3d94ab05 |
Transpilation MD5: | 5a0d2424d75fb8de4298dc75d164571c |
Author: | stefan |
Category: | javax |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-08-06 15:17:51 |
Source code size: | 3112 bytes / 106 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 253 / 556 |
Version history: | 9 change(s) |
Referenced in: | [show references] |