Libraryless. Click here for Pure Java version (6206L/39K).
1 | sclass LineCompReader {
|
2 | new LS literals; |
3 | IVF1<Long> onPair; // if not null, don't save pairs, but stream them to onPair instead |
4 | int[] literalOffsets; // where they start in file |
5 | new LongBuffer pairs; |
6 | new LinkedHashMap<S, L<Int>> versions; |
7 | bool byteMode; |
8 | |
9 | // internal, optional |
10 | CountingInputStream countingInputStream; |
11 | long fileSize; |
12 | |
13 | *() {}
|
14 | |
15 | // takes text or gzipped input file |
16 | *(File f) { load(f); }
|
17 | |
18 | *(InputStream in) { load(rawByteReader(in, 128*1024)); }
|
19 | *(BufferedReader reader) { load(reader); }
|
20 | |
21 | void load(File f) {
|
22 | fileSize = fileSize(f); |
23 | countingInputStream = new CountingInputStream(bufferedFileInputStream(f)); |
24 | temp BufferedReader reader = isGZipFile(f) |
25 | ? rawByteReader(gzipInputStream(countingInputStream)) |
26 | : rawByteReader(countingInputStream); |
27 | //rawByteReader_possiblyGZipped(f); |
28 | load(reader); |
29 | } |
30 | |
31 | void load(BufferedReader reader) ctex {
|
32 | new StringBuilder lineBuf; |
33 | S s = readLineIgnoreCR(reader, lineBuf); |
34 | int ofs = l(s)+1; |
35 | new Matches m; |
36 | if (startsWith(s, "BYTECOMP ", m)) set byteMode; |
37 | else if (!startsWith(s, "LINECOMP ", m)) |
38 | fail("Not a LINECOMP file");
|
39 | int nLiterals = parseInt(m.rest()); |
40 | new IntBuffer offsets; |
41 | for i to nLiterals: {
|
42 | S line = readLineIgnoreCR(reader, lineBuf); |
43 | assertNotNull(line); |
44 | literals.add(byteMode ? str(charFromHex(line)) : line); |
45 | offsets.add(ofs); |
46 | ofs += l(line)+1; |
47 | } |
48 | offsets.add(ofs); |
49 | literalOffsets = offsets.toArray(); |
50 | int n = 0; |
51 | while licensed {
|
52 | s = readLineIgnoreCR(reader, lineBuf); |
53 | if (s == null || contains(s, "=")) break; |
54 | try {
|
55 | int iSpace = s.indexOf(' ');
|
56 | long pair = twoIntsToLong( |
57 | Int.parseInt(s, 0, iSpace, 10), |
58 | Int.parseInt(s, iSpace+1, l(s), 10)); |
59 | if (onPair != null) onPair.get(pair); |
60 | else pairs.add(pair); |
61 | if (((++n) % oneMillion()) == 0) {
|
62 | S percentage = ""; |
63 | if (fileSize != 0 && countingInputStream != null) |
64 | percentage = " (" + intPercentRatio(countingInputStream.getFilePointer(), fileSize) + "%)";
|
65 | print(nPairs(n) + " read" + percentage); |
66 | } |
67 | } on fail {
|
68 | print("On line " + (nLiterals + l(pairs)));
|
69 | } |
70 | } |
71 | pairs.trimToSize(); |
72 | while (contains(s, "=")) {
|
73 | int i = indexOf(s, '='); |
74 | versions.put(takeFirst(s, i), compactIntList(parseInts(splitAtSpace(substring(s, i+1))))); |
75 | s = readLineIgnoreCR(reader, lineBuf); |
76 | } |
77 | } |
78 | |
79 | Set<S> versions() { ret keys(versions); }
|
80 | |
81 | S getText(S version) { ret textForVersion(version); }
|
82 | S textForVersion(S version) {
|
83 | L<Int> encoded = versions.get(version); |
84 | if (encoded == null) null; |
85 | new LS buf; |
86 | for (int idx : encoded) |
87 | decode(idx, buf); |
88 | ret myFromLines(buf); |
89 | } |
90 | |
91 | // name of first (or only) file |
92 | S firstFile() { ret first(versions()); }
|
93 | |
94 | // text for first (or only) file |
95 | S text() { ret getText(firstFile()); }
|
96 | |
97 | L<Int> encoding() { ret versions.get(firstFile()); }
|
98 | |
99 | S myFromLines(LS l) {
|
100 | ret byteMode |
101 | ? join(l) |
102 | : fromLines_rtrim(l); |
103 | } |
104 | |
105 | void decode(int idx, LS buf) {
|
106 | if (idx < l(literals)) |
107 | buf.add(literals.get(idx)); |
108 | else {
|
109 | long p = pairs.get(idx-l(literals)); |
110 | decode(firstIntFromLong(p), buf); |
111 | decode(secondIntFromLong(p), buf); |
112 | } |
113 | } |
114 | |
115 | // That was it! The rest of this file is just for calculating some stats. |
116 | |
117 | new Map<Int> lineCountsForPairs; |
118 | new Map<Int, Long> byteCountsForPairs; |
119 | |
120 | int lineCountForPointer(int idx) {
|
121 | ret idx < l(literals) ? 1 : lineCountForPair(idx); |
122 | } |
123 | |
124 | long byteCountForPointer(int idx) {
|
125 | ret idx < l(literals) ? l(literals.get(idx))+1 : byteCountForPair(idx); |
126 | } |
127 | |
128 | int lineCountForPair(int idx) {
|
129 | Int c = lineCountsForPairs.get(idx); |
130 | if (c == null) {
|
131 | long p = pairs.get(idx-l(literals)); |
132 | c = lineCountForPointer(firstIntFromLong(p)) + lineCountForPointer(secondIntFromLong(p)); |
133 | lineCountsForPairs.put(idx, c); |
134 | } |
135 | ret c; |
136 | } |
137 | |
138 | long byteCountForPair(int idx) {
|
139 | Long c = byteCountsForPairs.get(idx); |
140 | if (c == null) {
|
141 | long p = pairs.get(idx-l(literals)); |
142 | c = byteCountForPointer(firstIntFromLong(p)) + byteCountForPointer(secondIntFromLong(p)); |
143 | byteCountsForPairs.put(idx, c); |
144 | } |
145 | ret c; |
146 | } |
147 | |
148 | int lineCountForVersion(S version) {
|
149 | L<Int> encoded = versions.get(version); |
150 | if (encoded == null) ret 0; |
151 | int n = 0; |
152 | for (int i : encoded) n += lineCountForPointer(i); |
153 | ret n; |
154 | } |
155 | |
156 | long byteCountForVersion(S version) {
|
157 | L<Int> encoded = versions.get(version); |
158 | if (encoded == null) ret 0; |
159 | long n = 0; |
160 | for (int i : encoded) n += byteCountForPointer(i); |
161 | ret max(0, n-1); |
162 | } |
163 | |
164 | long totalByteCount() {
|
165 | ret longSum(lambdaMap byteCountForVersion(versions())); |
166 | } |
167 | |
168 | // now we can also save again |
169 | |
170 | void save(PrintWriter out) {
|
171 | out.println((byteMode ? "BYTECOMP " : "LINECOMP ") + l(literals)); |
172 | for (S s : literals) |
173 | out.println(byteMode ? charToHex(first(s)) : s); |
174 | for (long p : pairs) |
175 | out.println(firstIntFromLong(p) + " " + secondIntFromLong(p)); |
176 | for (S id, L<Int> l : versions) |
177 | out.println(id + "=" + joinWithSpace(l)); |
178 | } |
179 | } |
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
| Snippet ID: | #1028182 |
| Snippet name: | LineCompReader - read LINECOMP format |
| Eternal ID of this version: | #1028182/59 |
| Text MD5: | 34176f987e66773e35d6946d5c139f00 |
| Transpilation MD5: | f50c04c11824feba0e4ed0b675fa5957 |
| Author: | stefan |
| Category: | javax |
| Type: | JavaX fragment (include) |
| Public (visible to everyone): | Yes |
| Archived (hidden from active list): | No |
| Created/modified: | 2020-08-06 15:05:30 |
| Source code size: | 5454 bytes / 179 lines |
| Pitched / IR pitched: | No / No |
| Views / Downloads: | 730 / 1341 |
| Version history: | 58 change(s) |
| Referenced in: | [show references] |