Libraryless. Click here for Pure Java version (6206L/39K).
1 | sclass LineCompReader { |
2 | new LS literals; |
3 | IVF1<Long> onPair; // if not null, don't save pairs, but stream them to onPair instead |
4 | int[] literalOffsets; // where they start in file |
5 | new LongBuffer pairs; |
6 | new LinkedHashMap<S, L<Int>> versions; |
7 | bool byteMode; |
8 | |
9 | // internal, optional |
10 | CountingInputStream countingInputStream; |
11 | long fileSize; |
12 | |
13 | *() {} |
14 | |
15 | // takes text or gzipped input file |
16 | *(File f) { load(f); } |
17 | |
18 | *(InputStream in) { load(rawByteReader(in, 128*1024)); } |
19 | *(BufferedReader reader) { load(reader); } |
20 | |
21 | void load(File f) { |
22 | fileSize = fileSize(f); |
23 | countingInputStream = new CountingInputStream(bufferedFileInputStream(f)); |
24 | temp BufferedReader reader = isGZipFile(f) |
25 | ? rawByteReader(gzipInputStream(countingInputStream)) |
26 | : rawByteReader(countingInputStream); |
27 | //rawByteReader_possiblyGZipped(f); |
28 | load(reader); |
29 | } |
30 | |
31 | void load(BufferedReader reader) ctex { |
32 | new StringBuilder lineBuf; |
33 | S s = readLineIgnoreCR(reader, lineBuf); |
34 | int ofs = l(s)+1; |
35 | new Matches m; |
36 | if (startsWith(s, "BYTECOMP ", m)) set byteMode; |
37 | else if (!startsWith(s, "LINECOMP ", m)) |
38 | fail("Not a LINECOMP file"); |
39 | int nLiterals = parseInt(m.rest()); |
40 | new IntBuffer offsets; |
41 | for i to nLiterals: { |
42 | S line = readLineIgnoreCR(reader, lineBuf); |
43 | assertNotNull(line); |
44 | literals.add(byteMode ? str(charFromHex(line)) : line); |
45 | offsets.add(ofs); |
46 | ofs += l(line)+1; |
47 | } |
48 | offsets.add(ofs); |
49 | literalOffsets = offsets.toArray(); |
50 | int n = 0; |
51 | while licensed { |
52 | s = readLineIgnoreCR(reader, lineBuf); |
53 | if (s == null || contains(s, "=")) break; |
54 | try { |
55 | int iSpace = s.indexOf(' '); |
56 | long pair = twoIntsToLong( |
57 | Int.parseInt(s, 0, iSpace, 10), |
58 | Int.parseInt(s, iSpace+1, l(s), 10)); |
59 | if (onPair != null) onPair.get(pair); |
60 | else pairs.add(pair); |
61 | if (((++n) % oneMillion()) == 0) { |
62 | S percentage = ""; |
63 | if (fileSize != 0 && countingInputStream != null) |
64 | percentage = " (" + intPercentRatio(countingInputStream.getFilePointer(), fileSize) + "%)"; |
65 | print(nPairs(n) + " read" + percentage); |
66 | } |
67 | } on fail { |
68 | print("On line " + (nLiterals + l(pairs))); |
69 | } |
70 | } |
71 | pairs.trimToSize(); |
72 | while (contains(s, "=")) { |
73 | int i = indexOf(s, '='); |
74 | versions.put(takeFirst(s, i), compactIntList(parseInts(splitAtSpace(substring(s, i+1))))); |
75 | s = readLineIgnoreCR(reader, lineBuf); |
76 | } |
77 | } |
78 | |
79 | Set<S> versions() { ret keys(versions); } |
80 | |
81 | S getText(S version) { ret textForVersion(version); } |
82 | S textForVersion(S version) { |
83 | L<Int> encoded = versions.get(version); |
84 | if (encoded == null) null; |
85 | new LS buf; |
86 | for (int idx : encoded) |
87 | decode(idx, buf); |
88 | ret myFromLines(buf); |
89 | } |
90 | |
91 | // name of first (or only) file |
92 | S firstFile() { ret first(versions()); } |
93 | |
94 | // text for first (or only) file |
95 | S text() { ret getText(firstFile()); } |
96 | |
97 | L<Int> encoding() { ret versions.get(firstFile()); } |
98 | |
99 | S myFromLines(LS l) { |
100 | ret byteMode |
101 | ? join(l) |
102 | : fromLines_rtrim(l); |
103 | } |
104 | |
105 | void decode(int idx, LS buf) { |
106 | if (idx < l(literals)) |
107 | buf.add(literals.get(idx)); |
108 | else { |
109 | long p = pairs.get(idx-l(literals)); |
110 | decode(firstIntFromLong(p), buf); |
111 | decode(secondIntFromLong(p), buf); |
112 | } |
113 | } |
114 | |
115 | // That was it! The rest of this file is just for calculating some stats. |
116 | |
117 | new Map<Int> lineCountsForPairs; |
118 | new Map<Int, Long> byteCountsForPairs; |
119 | |
120 | int lineCountForPointer(int idx) { |
121 | ret idx < l(literals) ? 1 : lineCountForPair(idx); |
122 | } |
123 | |
124 | long byteCountForPointer(int idx) { |
125 | ret idx < l(literals) ? l(literals.get(idx))+1 : byteCountForPair(idx); |
126 | } |
127 | |
128 | int lineCountForPair(int idx) { |
129 | Int c = lineCountsForPairs.get(idx); |
130 | if (c == null) { |
131 | long p = pairs.get(idx-l(literals)); |
132 | c = lineCountForPointer(firstIntFromLong(p)) + lineCountForPointer(secondIntFromLong(p)); |
133 | lineCountsForPairs.put(idx, c); |
134 | } |
135 | ret c; |
136 | } |
137 | |
138 | long byteCountForPair(int idx) { |
139 | Long c = byteCountsForPairs.get(idx); |
140 | if (c == null) { |
141 | long p = pairs.get(idx-l(literals)); |
142 | c = byteCountForPointer(firstIntFromLong(p)) + byteCountForPointer(secondIntFromLong(p)); |
143 | byteCountsForPairs.put(idx, c); |
144 | } |
145 | ret c; |
146 | } |
147 | |
148 | int lineCountForVersion(S version) { |
149 | L<Int> encoded = versions.get(version); |
150 | if (encoded == null) ret 0; |
151 | int n = 0; |
152 | for (int i : encoded) n += lineCountForPointer(i); |
153 | ret n; |
154 | } |
155 | |
156 | long byteCountForVersion(S version) { |
157 | L<Int> encoded = versions.get(version); |
158 | if (encoded == null) ret 0; |
159 | long n = 0; |
160 | for (int i : encoded) n += byteCountForPointer(i); |
161 | ret max(0, n-1); |
162 | } |
163 | |
164 | long totalByteCount() { |
165 | ret longSum(lambdaMap byteCountForVersion(versions())); |
166 | } |
167 | |
168 | // now we can also save again |
169 | |
170 | void save(PrintWriter out) { |
171 | out.println((byteMode ? "BYTECOMP " : "LINECOMP ") + l(literals)); |
172 | for (S s : literals) |
173 | out.println(byteMode ? charToHex(first(s)) : s); |
174 | for (long p : pairs) |
175 | out.println(firstIntFromLong(p) + " " + secondIntFromLong(p)); |
176 | for (S id, L<Int> l : versions) |
177 | out.println(id + "=" + joinWithSpace(l)); |
178 | } |
179 | } |
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1028182 |
Snippet name: | LineCompReader - read LINECOMP format |
Eternal ID of this version: | #1028182/59 |
Text MD5: | 34176f987e66773e35d6946d5c139f00 |
Transpilation MD5: | f50c04c11824feba0e4ed0b675fa5957 |
Author: | stefan |
Category: | javax |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-08-06 15:05:30 |
Source code size: | 5454 bytes / 179 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 479 / 1042 |
Version history: | 58 change(s) |
Referenced in: | [show references] |