Libraryless. Compilation Failed (7093L/45K).
1 | sclass LineCompCompressor { |
2 | int safety = 0; |
3 | |
4 | replace Encodings with Map<S, L<Int>>. |
5 | |
6 | abstract sclass Chunk { |
7 | abstract S text(L<Chunk> chunks); |
8 | } |
9 | |
10 | srecord CPair(int i1, int i2) > Chunk { |
11 | CPair(Pair<Int> p) { i1 = p.a; i2 = p.b; } |
12 | |
13 | S text(L<Chunk> chunks) { |
14 | ret linesLL_rtrim(chunks.get(i1).text(chunks), chunks.get(i2).text(chunks)); |
15 | } |
16 | } |
17 | |
18 | srecord CPrim(S s) > Chunk { |
19 | S text(L<Chunk> chunks) { ret s; } |
20 | } |
21 | |
22 | bool verbose = false, verboseCompressionSteps = false; |
23 | bool sortLines = true; |
24 | bool verify = true; |
25 | |
26 | Map<S, LS> textIDToLines; |
27 | LS allUniqueLines; |
28 | new L<Chunk> chunks; |
29 | int primChunks; |
30 | Map<S, Int> lineIndex; |
31 | new Map<PairS, Int> linePairIndex; |
32 | Encodings finalEncodings; |
33 | |
34 | // key = version ID, values = text |
35 | *(SS texts) { |
36 | textIDToLines = mapValuesToLinkedHashMap myToLines(texts); |
37 | } |
38 | |
39 | LS myToLines(S s) { ret toLines_nOnly_reversible(s); } |
40 | S myFromLines(LS l) { ret fromLines_rtrim(l); } |
41 | |
42 | run { |
43 | LS allLines = concatLists(values(textIDToLines)); |
44 | if (verboseCompressionSteps) print("Uniquifying " + nLines(allLines)); |
45 | allUniqueLines = uniquify(allLines); |
46 | if (verboseCompressionSteps) print("Have " + n2(allUniqueLines, "unique line")); |
47 | allLines = null; // allow me to forget |
48 | if (sortLines) sortInPlace(allUniqueLines); |
49 | if (verboseCompressionSteps) print("Sorted " + nLines(allUniqueLines)); |
50 | for (S line : allUniqueLines) |
51 | chunks.add(new CPrim(line)); |
52 | primChunks = l(chunks); |
53 | lineIndex = listIndex(collect s(chunks)); |
54 | |
55 | // simple encoding (only direct line references) |
56 | Encodings simpleEncodings = mapValues(textIDToLines, |
57 | (IF1<LS, L<Int>>) (lines -> map(lines, line -> lineIndex.get(line)))); |
58 | //printAndCheckEncodings(simpleEncodings); |
59 | |
60 | if (verboseCompressionSteps) print("Have simple encodings"); |
61 | finalEncodings = compressPairs(simpleEncodings); |
62 | if (verbose || verify) printAndCheckEncodings(finalEncodings); |
63 | } |
64 | |
65 | void saveAsTextFile(File f) { |
66 | S out = exportEncoding(finalEncodings); |
67 | saveTextFile(f, out); |
68 | |
69 | if (verify) checkDecompression(f, textIDToLines); |
70 | } |
71 | |
72 | void checkDecompression(File file, Map<S, LS> textIDToLines) { |
73 | temp BufferedReader reader = bufferedUtf8Reader(file); |
74 | LineCompReader lcr = new(reader); |
75 | assertEquals(keysList(textIDToLines), asList(lcr.versions())); |
76 | for (S version : keys(textIDToLines)) |
77 | assertEquals(lcr.textForVersion(version), lines_rtrim(textIDToLines.get(version))); |
78 | if (verbose) print("Decompression OK for " + nVersions(textIDToLines)); |
79 | } |
80 | |
81 | S asText() { ret exportEncoding(finalEncodings); } |
82 | |
83 | S exportEncoding(Encodings encodings) { |
84 | new LS buf; |
85 | buf.add("LINECOMP " + primChunks); // magic signature |
86 | for (Chunk c : chunks) { |
87 | if (c cast CPair) |
88 | buf.add(c.i1 + " " + c.i2); |
89 | else |
90 | buf.add(((CPrim) c).s); |
91 | } |
92 | for (S id, L<Int> l : encodings) |
93 | buf.add(id + "=" + joinWithSpace(l)); |
94 | ret lines_rtrim(buf); |
95 | } |
96 | |
97 | // new fast version of magic compression function |
98 | |
99 | Encodings compressPairs(Encodings encodings) { |
100 | // get initial pair counts |
101 | new LineComp_PairCounts pairCounts; |
102 | for (L<Int> l : values(encodings)) |
103 | pairCounts.addAll(overlappingPairs(l)); |
104 | |
105 | // Convert to LinkedList for more efficient modification |
106 | Map<S, L<Int>> encodings2 = /*mapValues toLinkedList*/(encodings); |
107 | |
108 | Pair<Int> toCompress; |
109 | // Compress only most popular pair in one step |
110 | //int lastDups = Int.MAX_VALUE; |
111 | while ping ((toCompress = pairCounts.mostPopularDuplicate()) != null) { |
112 | if (safety > 0 && --safety <= 0) fail("safety"); |
113 | int count = pairCounts.getCount(toCompress), idx = makeCPair(toCompress); |
114 | int dups = pairCounts.numberOfDuplicates(); |
115 | /*if (lastDups == dups) |
116 | fail("Number of duplicates not decreasing"); |
117 | lastDups = dups;*/ |
118 | if (verboseCompressionSteps) print("Compressing pair " + toCompress + " (count=" + count + ") -> " + idx + ", " + (dups-1) + " remaining"); |
119 | for (L<Int> l : values(encodings2)) |
120 | compressPair(pairCounts, l, toCompress, idx); |
121 | } |
122 | |
123 | // reconvert to normal list |
124 | ret mapValues toArrayList(encodings2); |
125 | } |
126 | |
127 | // replace replacing (pair) with replaceWith in l |
128 | void compressPair(LineComp_PairCounts pairCounts, L<Int> l, Pair<Int> replacing, int replaceWith) { |
129 | lineComp_replaceSublistWithUpdatingPairCount(l, pairToList(replacing), ll(replaceWith), pairCounts); |
130 | } |
131 | |
132 | int makeCPair(Pair<Int> p) { |
133 | int idx = addAndReturnIndex(chunks, new CPair(p)); |
134 | ret idx; |
135 | } |
136 | |
137 | void printAndCheckEncodings(Encodings encodings) { |
138 | for (S id, L<Int> encoded : encodings) { |
139 | if (verbose) print(id + ": " + joinWithSpace(encoded)); |
140 | assertEquals(myFromLines(textIDToLines.get(id)), decode(encoded)); |
141 | } |
142 | } |
143 | |
144 | S decode(L<Int> encoded) { |
145 | ret myFromLines(lambdaMap chunkText(encoded)); |
146 | } |
147 | |
148 | S chunkText(int idx) { |
149 | ret chunks.get(idx).text(chunks); |
150 | } |
151 | } |
Began life as a copy of #1028186
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1028234 |
Snippet name: | LineCompCompressor, faster version backup before IntPair |
Eternal ID of this version: | #1028234/4 |
Text MD5: | b48988ec7819b1a73eaafe978ff2a049 |
Transpilation MD5: | 13a4af9ad25d0b8ed7a02fb5235cf3e5 |
Author: | stefan |
Category: | javax |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-05-27 16:41:26 |
Source code size: | 5165 bytes / 151 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 210 / 307 |
Version history: | 3 change(s) |
Referenced in: | [show references] |