Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

179
LINES

< > BotCompany Repo | #1028182 // LineCompReader - read LINECOMP format

JavaX fragment (include) [tags: use-pretranspiled]

Libraryless. Click here for Pure Java version (6206L/39K).

1  
sclass LineCompReader {
2  
  new LS literals;
3  
  IVF1<Long> onPair; // if not null, don't save pairs, but stream them to onPair instead
4  
  int[] literalOffsets; // where they start in file
5  
  new LongBuffer pairs;
6  
  new LinkedHashMap<S, L<Int>> versions;
7  
  bool byteMode;
8  
  
9  
  // internal, optional
10  
  CountingInputStream countingInputStream;
11  
  long fileSize;
12  
  
13  
  *() {}
14  
  
15  
  // takes text or gzipped input file
16  
  *(File f) { load(f); }
17  
  
18  
  *(InputStream in) { load(rawByteReader(in, 128*1024)); }
19  
  *(BufferedReader reader) { load(reader); }
20  
  
21  
  void load(File f) {
22  
    fileSize = fileSize(f);
23  
    countingInputStream = new CountingInputStream(bufferedFileInputStream(f));
24  
    temp BufferedReader reader = isGZipFile(f)
25  
      ? rawByteReader(gzipInputStream(countingInputStream))
26  
      : rawByteReader(countingInputStream);
27  
      //rawByteReader_possiblyGZipped(f);
28  
    load(reader);
29  
  }
30  
  
31  
  void load(BufferedReader reader) ctex {
32  
    new StringBuilder lineBuf;
33  
    S s = readLineIgnoreCR(reader, lineBuf);
34  
    int ofs = l(s)+1;
35  
    new Matches m;
36  
    if (startsWith(s, "BYTECOMP ", m)) set byteMode;
37  
    else if (!startsWith(s, "LINECOMP ", m))
38  
      fail("Not a LINECOMP file");
39  
    int nLiterals = parseInt(m.rest());
40  
    new IntBuffer offsets;
41  
    for i to nLiterals: {
42  
      S line = readLineIgnoreCR(reader, lineBuf);
43  
      assertNotNull(line);
44  
      literals.add(byteMode ? str(charFromHex(line)) : line);
45  
      offsets.add(ofs);
46  
      ofs += l(line)+1;
47  
    }
48  
    offsets.add(ofs);
49  
    literalOffsets = offsets.toArray();
50  
    int n = 0;
51  
    while licensed {
52  
      s = readLineIgnoreCR(reader, lineBuf);
53  
      if (s == null || contains(s, "=")) break;
54  
      try {
55  
        int iSpace = s.indexOf(' ');
56  
        long pair = twoIntsToLong(
57  
          Int.parseInt(s, 0, iSpace, 10),
58  
          Int.parseInt(s, iSpace+1, l(s), 10));
59  
        if (onPair != null) onPair.get(pair);
60  
        else pairs.add(pair);
61  
        if (((++n) % oneMillion()) == 0) {
62  
          S percentage = "";
63  
          if (fileSize != 0 && countingInputStream != null)
64  
            percentage = " (" + intPercentRatio(countingInputStream.getFilePointer(), fileSize) + "%)";
65  
          print(nPairs(n) + " read" + percentage);
66  
        }
67  
      } on fail {
68  
        print("On line " + (nLiterals + l(pairs)));
69  
      }
70  
    }
71  
    pairs.trimToSize();
72  
    while (contains(s, "=")) {
73  
      int i = indexOf(s, '=');
74  
      versions.put(takeFirst(s, i), compactIntList(parseInts(splitAtSpace(substring(s, i+1)))));
75  
      s = readLineIgnoreCR(reader, lineBuf);
76  
    }
77  
  }
78  
  
79  
  Set<S> versions() { ret keys(versions); }
80  
  
81  
  S getText(S version) { ret textForVersion(version); }
82  
  S textForVersion(S version) {
83  
    L<Int> encoded = versions.get(version);
84  
    if (encoded == null) null;
85  
    new LS buf;
86  
    for (int idx : encoded)
87  
      decode(idx, buf);
88  
    ret myFromLines(buf);
89  
  }
90  
  
91  
  // name of first (or only) file
92  
  S firstFile() { ret first(versions()); }
93  
  
94  
  // text for first (or only) file
95  
  S text() { ret getText(firstFile()); }
96  
  
97  
  L<Int> encoding() { ret versions.get(firstFile()); }
98  
  
99  
  S myFromLines(LS l) {
100  
    ret byteMode
101  
      ? join(l)
102  
      : fromLines_rtrim(l);
103  
  }
104  
  
105  
  void decode(int idx, LS buf) {
106  
    if (idx < l(literals))
107  
      buf.add(literals.get(idx));
108  
    else {
109  
      long p = pairs.get(idx-l(literals));
110  
      decode(firstIntFromLong(p), buf);
111  
      decode(secondIntFromLong(p), buf);
112  
    }
113  
  }
114  
  
115  
  // That was it! The rest of this file is just for calculating some stats.
116  
  
117  
  new Map<Int> lineCountsForPairs;
118  
  new Map<Int, Long> byteCountsForPairs;
119  
120  
  int lineCountForPointer(int idx) {
121  
    ret idx < l(literals) ? 1 : lineCountForPair(idx);
122  
  }
123  
  
124  
  long byteCountForPointer(int idx) {
125  
    ret idx < l(literals) ? l(literals.get(idx))+1 : byteCountForPair(idx);
126  
  }
127  
  
128  
  int lineCountForPair(int idx) {
129  
    Int c = lineCountsForPairs.get(idx);
130  
    if (c == null) {
131  
      long p = pairs.get(idx-l(literals));
132  
      c = lineCountForPointer(firstIntFromLong(p)) + lineCountForPointer(secondIntFromLong(p));
133  
      lineCountsForPairs.put(idx, c);
134  
    }
135  
    ret c;
136  
  }
137  
  
138  
  long byteCountForPair(int idx) {
139  
    Long c = byteCountsForPairs.get(idx);
140  
    if (c == null) {
141  
      long p = pairs.get(idx-l(literals));
142  
      c = byteCountForPointer(firstIntFromLong(p)) + byteCountForPointer(secondIntFromLong(p));
143  
      byteCountsForPairs.put(idx, c);
144  
    }
145  
    ret c;
146  
  }
147  
  
148  
  int lineCountForVersion(S version) {
149  
    L<Int> encoded = versions.get(version);
150  
    if (encoded == null) ret 0;
151  
    int n = 0;
152  
    for (int i : encoded) n += lineCountForPointer(i);
153  
    ret n;
154  
  }
155  
  
156  
  long byteCountForVersion(S version) {
157  
    L<Int> encoded = versions.get(version);
158  
    if (encoded == null) ret 0;
159  
    long n = 0;
160  
    for (int i : encoded) n += byteCountForPointer(i);
161  
    ret max(0, n-1);
162  
  }
163  
  
164  
  long totalByteCount() {
165  
    ret longSum(lambdaMap byteCountForVersion(versions()));
166  
  }
167  
  
168  
  // now we can also save again
169  
  
170  
  void save(PrintWriter out) {
171  
    out.println((byteMode ? "BYTECOMP " : "LINECOMP ") + l(literals));
172  
    for (S s : literals)
173  
      out.println(byteMode ? charToHex(first(s)) : s);
174  
    for (long p : pairs)
175  
      out.println(firstIntFromLong(p) + " " + secondIntFromLong(p));
176  
    for (S id, L<Int> l : versions)
177  
      out.println(id + "=" + joinWithSpace(l));
178  
  }  
179  
}

download  show line numbers  debug dex  old transpilations   

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

No comments. add comment

Snippet ID: #1028182
Snippet name: LineCompReader - read LINECOMP format
Eternal ID of this version: #1028182/59
Text MD5: 34176f987e66773e35d6946d5c139f00
Transpilation MD5: f50c04c11824feba0e4ed0b675fa5957
Author: stefan
Category: javax
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-08-06 15:05:30
Source code size: 5454 bytes / 179 lines
Pitched / IR pitched: No / No
Views / Downloads: 399 / 939
Version history: 58 change(s)
Referenced in: [show references]