Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

101
LINES

< > BotCompany Repo | #1029423 // LCMerger_v2 [with sorted pair index, uses less memory, dev.]

JavaX fragment (include) [tags: use-pretranspiled]

Uses 11335K of libraries. Click here for Pure Java version (6621L/42K).

1  
!include once #1027304 // Eclipse Collections
2  
3  
srecord noeq LCMerger_v2(LineCompReader lc1) {
4  
  LineCompReader lc2;
5  
  LineCompReader lcOut; // actually the same as lc1 now
6  
  LCSortedPairIndex pairIndex;
7  
  Map<S, Int> literalIndex;
8  
  new IntBuffer lc2map;
9  
  int newLiterals; // number of literals added compared to lc1
10  
  int newPairs;
11  
  int nOriginalLiterals, nOriginalPairs;
12  
  
13  
  *(LineCompReader *lc1, LineCompReader *lc2) {}
14  
  *(File ubcFile) {
15  
    lc1 = LineCompReader(ubcFile);
16  
  }
17  
  
18  
  void add(LineCompReader lc2) {
19  
    //if (lc2 != null) fail("Can only run once for now");
20  
    this.lc2 = lc2;
21  
    run();
22  
  }
23  
24  
  run {
25  
    lcOut = lc1;
26  
    nOriginalLiterals = l(lc1.literals);
27  
    nOriginalPairs = l(lc1.pairs);
28  
    newLiterals = newPairs = 0;
29  
30  
    printWithTime("Making literalIndex");
31  
    literalIndex = indexList(lc1.literals);
32  
  
33  
    // add lc2.literals to lc1.literals
34  
  
35  
    lc2map = new IntBuffer(l(lc2.literals) + l(lc2.pairs));
36  
    
37  
    printWithTime("Merging literals");
38  
    for (int i = 0; i < l(lc2.literals); i++) {
39  
      S c = lc2.literals.get(i);
40  
      Int iLit = literalIndex.get(c);
41  
      if (iLit == null) {
42  
        iLit = addAndReturnIndex(lc1.literals, c);
43  
        ++newLiterals;
44  
        literalIndex.put(c, iLit);
45  
      }
46  
      lc2map.add(iLit);
47  
    }
48  
    
49  
    // merge pairs
50  
    
51  
    printWithTime("Adjusting lc1 pairs");
52  
    for i to nOriginalPairs: {
53  
      long p = lc1.pairs.get(i);
54  
      lc1.pairs.set(i, twoIntsToLong(adjust1(firstIntFromLong(p)), adjust1(secondIntFromLong(p))));
55  
    }
56  
    
57  
    // Make index after adjustment
58  
    
59  
    printWithTime("Making pairIndex for " + nPairs(l(lc1.pairs)));
60  
    pairIndex = new LCSortedPairIndex(lc1.pairs.toArray());
61  
62  
    printWithTime("Merging pairs");
63  
    for (int i = 0; i < l(lc2.pairs); i++) {
64  
      long p = lc2.pairs.get(i);
65  
      long pAdjusted = twoIntsToLong(adjust2(firstIntFromLong(p)), adjust2(secondIntFromLong(p)));
66  
      int iPair = pairIndex.get(pAdjusted);
67  
      if (iPair < 0) { // new pair
68  
        iPair = l(lc1.pairs);
69  
        lc1.pairs.add(pAdjusted);
70  
        ++newPairs;
71  
      }
72  
      lc2map.add(l(lcOut.literals)+iPair);
73  
    }
74  
    
75  
    // copy files
76  
    
77  
    printWithTime("Merging files");
78  
    lc1.versions = (LinkedHashMap) mapValues(lc1.versions, enc -> lmap adjust1(enc));
79  
80  
    for (S name, L<Int> encoding : lc2.versions) {
81  
      if (lc1.versions.containsKey(name))
82  
        continue with print("Warning: Duplicate file name " + name);
83  
      lc1.versions.put(name, lmap adjust2(encoding));
84  
    }
85  
    
86  
    printVars_str(+newLiterals, +newPairs);
87  
    print("Synergy factor: " + doubleRatio(l(lc1.pairs), nOriginalPairs+l(lc2.pairs)));
88  
  }
89  
  
90  
  // convert symbols from lc1
91  
  int adjust1(int i) {
92  
    if (i >= nOriginalLiterals)
93  
      if (i >= nOriginalLiterals+nOriginalPairs)
94  
        ret i+newLiterals+newPairs;
95  
      else
96  
        ret i+newLiterals;
97  
    ret i;
98  
  }
99  
  
100  
  int adjust2(int i) { ret lc2map.get(i); }
101  
}

Author comment

Began life as a copy of #1029327

download  show line numbers  debug dex  old transpilations   

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

No comments. add comment

Snippet ID: #1029423
Snippet name: LCMerger_v2 [with sorted pair index, uses less memory, dev.]
Eternal ID of this version: #1029423/8
Text MD5: d45045d9ba943d419c037e1e2a8eaad0
Transpilation MD5: ebb12ef1c62d91fe60dee5d8329ecb7c
Author: stefan
Category: javax
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-08-05 19:23:44
Source code size: 3013 bytes / 101 lines
Pitched / IR pitched: No / No
Views / Downloads: 235 / 524
Version history: 7 change(s)
Referenced in: [show references]