Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

86
LINES

< > BotCompany Repo | #1029353 // LCMerger [working backup]

JavaX fragment (include) [tags: use-pretranspiled]

Uses 11335K of libraries. Click here for Pure Java version (6300L/40K).

1  
!include once #1027304 // Eclipse Collections
2  
3  
srecord noeq LCMerger(LineCompReader lc1, LineCompReader lc2) {
4  
  new LineCompReader lcOut; // TODO: merge with lc1
5  
  new LongIntHashMap pairIndex;
6  
  Map<S, Int> literalIndex;
7  
  new IntIntHashMap lc2map; // TODO: Drop. This is actually contained in the pairIndex
8  
  int newLiterals; // number of literals added compared to lc1
9  
  int newPairs;
10  
11  
  run {
12  
    lcOut.byteMode = lc1.byteMode;
13  
    
14  
    printWithTime("Making pairIndex");
15  
    for (int i = 0; i < lc1.pairs.size(); i++)
16  
      pairIndex.put(lc1.pairs.get(i), i);
17  
    printWithTime("Making literalIndex");
18  
    literalIndex = indexList(lc1.literals);
19  
  
20  
    // make lcOut.literals & literalIndex
21  
  
22  
    printWithTime("Merging literals");
23  
    lcOut.literals = cloneList(lc1.literals);
24  
    for (int i = 0; i < l(lc2.literals); i++) {
25  
      S c = lc2.literals.get(i);
26  
      Int iLit = literalIndex.get(c);
27  
      if (iLit == null) {
28  
        iLit = addAndReturnIndex(lcOut.literals, c);
29  
        literalIndex.put(c, iLit);
30  
      }
31  
      lc2map.put(i, iLit);
32  
    }
33  
    
34  
    newLiterals = l(lcOut.literals)-l(lc1.literals);
35  
    
36  
    // clone lc1.pairs into lcOut.pairs, add lc2.pairs
37  
    
38  
    printWithTime("Merging pairs");
39  
    new IntIntHashMap map2;
40  
    lcOut.pairs = new LongBuffer(l(lc1.pairs));
41  
    for (long p : lc1.pairs.asVirtualList())
42  
      lcOut.pairs.add(twoIntsToLong(adjust1(firstIntFromLong(p)), adjust1(secondIntFromLong(p))));
43  
    
44  
    for (int i = 0; i < l(lc2.pairs); i++) {
45  
      long p = lc2.pairs.get(i);
46  
      long pAdjusted = twoIntsToLong(adjust2(firstIntFromLong(p)), adjust2(secondIntFromLong(p)));
47  
      int iPair = pairIndex.getIfAbsent(pAdjusted, -1);
48  
      if (iPair < 0) { // new pair
49  
        iPair = l(lcOut.pairs);
50  
        lcOut.pairs.add(pAdjusted);
51  
        pairIndex.put(pAdjusted, iPair);
52  
      }
53  
      lc2map.put(l(lc2.literals)+i, l(lcOut.literals)+iPair);
54  
    }
55  
    
56  
    newPairs = l(lcOut.pairs)-l(lc1.pairs);
57  
    
58  
    // copy files
59  
    
60  
    printWithTime("Merging files");
61  
    lcOut.versions = new LinkedHashMap;
62  
    for (S name, L<Int> encoding : lc1.versions) {
63  
      lcOut.versions.put(name, lmap adjust1(encoding));
64  
    }
65  
    
66  
    for (S name, L<Int> encoding : lc2.versions) {
67  
      if (lcOut.versions.containsKey(name))
68  
        continue with print("Warning: Duplicate file name " + name);
69  
      lcOut.versions.put(name, lmap adjust2(encoding));
70  
    }
71  
    
72  
    print("Synergy factor: " + doubleRatio(l(lcOut.pairs), l(lc1.pairs)+l(lc2.pairs)));
73  
  }
74  
  
75  
  // convert symbols from lc1 to lcOut
76  
  int adjust1(int i) {
77  
    if (i >= l(lc1.literals))
78  
      if (i >= l(lc1.literals)+l(lc1.pairs))
79  
        ret i+newLiterals+newPairs;
80  
      else
81  
        ret i+newLiterals;
82  
    ret i;
83  
  }
84  
  
85  
  int adjust2(int i) { ret lc2map.getIfAbsent(i, i); }
86  
}

Author comment

Began life as a copy of #1029327

download  show line numbers  debug dex  old transpilations   

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

No comments. add comment

Snippet ID: #1029353
Snippet name: LCMerger [working backup]
Eternal ID of this version: #1029353/5
Text MD5: 12cb017ecec87ce8b3948edf6ddc558a
Transpilation MD5: 106f6dbdd359ca413c4c5a4c9c77a27d
Author: stefan
Category: javax
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-08-02 11:25:33
Source code size: 2863 bytes / 86 lines
Pitched / IR pitched: No / No
Views / Downloads: 192 / 275
Version history: 4 change(s)
Referenced in: [show references]