Uses 12245K of libraries. Click here for Pure Java version (9479L/47K).
1 | !7 |
2 | |
3 | cprint { |
4 | switchable S query = "world"; |
5 | |
6 | transient LineCompedSingle<Char> lc; |
7 | transient LL<Int> productions; |
8 | transient Map<Int> prodLengths; // prod index -> length of production in characters |
9 | transient int iFirstPair, iFirstFile; |
10 | transient Map<Char, Int> literalIndex; |
11 | transient MultiMap<Int, IntPair> prodIndex; // keys: item looked for, value: pair(prod index, position in prod) |
12 | transient Map<Int, L<Int>> fileOffsetMap; // for every file index, character offsets |
13 | |
14 | start-thread { |
15 | dm_reloadOnFieldChange query(); |
16 | new LineCompCompressor comp; |
17 | lc = lcString2(comp, "hello world world"); |
18 | |
19 | // Turn pairs and file encodings into a single structure ("productions") |
20 | productions = new L; |
21 | productions.addAll(rep(null, l(lc.literals))); |
22 | iFirstPair = l(productions); |
23 | productions.addAll(intPairsToLists(lc.pairs)); |
24 | iFirstFile = l(productions); |
25 | productions.add(lc.main); |
26 | |
27 | prodLengths = new Map; |
28 | fileOffsetMap = new Map; |
29 | for (int i = iFirstFile; i < l(productions); i++) { |
30 | L<Int> prod = productions.get(i); |
31 | new L<Int> l; |
32 | int ofs = 0; |
33 | for (int x : prod) { |
34 | l.add(ofs); |
35 | ofs += getProdLength(x); |
36 | } |
37 | fileOffsetMap.put(i, l); |
38 | } |
39 | |
40 | // build the reconstitution index |
41 | literalIndex = indexList(lc.literals); |
42 | prodIndex = new MultiMap; |
43 | for iProd over productions: { |
44 | L<Int> prod = productions.get(iProd); |
45 | for (int i, int x : unpair iterateListWithIndex(prod)) { |
46 | prodIndex.put(x, intPair(iProd, i)); |
47 | } |
48 | } |
49 | |
50 | print(+literalIndex); |
51 | print(+prodIndex); |
52 | |
53 | L<Char> queryList = characters(query); |
54 | new L<Int> queryLiterals; |
55 | for (Char c : queryList) { |
56 | Int iLit = literalIndex.get(c); |
57 | if (iLit == null) ret with print("literal not found"); |
58 | queryLiterals.add(iLit); |
59 | } |
60 | |
61 | print(+queryLiterals); |
62 | |
63 | print("Result", reconstitute(queryLiterals, 0)); |
64 | } |
65 | |
66 | /*void calcProdLengths { |
67 | prodLengths = new Map; |
68 | for (int i = iFirstPair; i < l(productions); i++) |
69 | getProdLength(i); |
70 | }*/ |
71 | |
72 | int getProdLength(int i) { |
73 | if (i < iFirstPair) ret 1; // literal |
74 | Int x = prodLengths.get(i); |
75 | if (x != null) ret x; |
76 | prodLengths.put(i, x = intSum(lmap getProdLength(productions.get(i)))); |
77 | ret x; |
78 | } |
79 | |
80 | L<IntPair> reconstitute(L<Int> baseList, int i) { |
81 | L<Int> needLeft = subList(baseList, 0, i); |
82 | L<Int> needRight = subList(baseList, i+1); |
83 | ret reconstitute(baseList.get(i), |
84 | lcUncompressItems(lc, needLeft), |
85 | lcUncompressItems(lc, needRight)); |
86 | } |
87 | |
88 | // returns list of (file index, character offset) |
89 | L<IntPair> reconstitute(int item, S needLeft, S needRight) { |
90 | print("Reconstituing " + quote(lcUncompressItem(lc, item))); |
91 | print(" needLeft: " + quote(needLeft)); |
92 | print(" needRight: " + quote(needRight)); |
93 | L<IntPair> l = prodIndex.get(item); |
94 | print("Found: " + l); |
95 | new L<IntPair> out; |
96 | search: for (IntPair p : l) { |
97 | int iProd = p.a; |
98 | L<Int> prod = productions.get(iProd); |
99 | int j = p.b; |
100 | bool isFile = iProd >= iFirstFile; |
101 | if (isFile) { |
102 | int iChar = fileOffsetMap.get(iProd).get(j); |
103 | print("Reached file level: " + iProd + "/" + iChar + " with " + quote(needLeft) + "/" + quote(needRight)); |
104 | |
105 | // check left & right |
106 | if (nempty(needLeft)) fail(); // needLeft not used yet |
107 | |
108 | int iRight = j+1; |
109 | while (nempty(needRight)) { |
110 | if (j >= l(prod)) continue search; // need more, but have no more text |
111 | S rest = itemStartsWith(prod.get(iRight), needRight); |
112 | if (rest == null) continue search; // mismatch |
113 | needRight = rest; |
114 | ++iRight; |
115 | } |
116 | |
117 | // result found! |
118 | out.add(intPair(iProd, iChar)); |
119 | } else { // we're on a pair, check left & right and then go higher up |
120 | if (j == 0) { |
121 | S rest = itemStartsWith(prod.get(1), needRight); |
122 | print("checking right: " + quote(lcUncompressItem(lc, prod.get(1))) |
123 | + " => " + quote(rest)); |
124 | if (rest == null) continue; |
125 | out.addAll(reconstitute(iProd, needLeft, rest)); |
126 | } else { |
127 | S rest = itemEndsWith(prod.get(0), needLeft); |
128 | print("checking left: " + quote(lcUncompressItem(lc, prod.get(0))) |
129 | + " => " + quote(rest)); |
130 | if (rest == null) continue; |
131 | out.addAll(reconstitute(iProd, rest, needRight)); |
132 | } |
133 | } |
134 | } |
135 | ret out; |
136 | } |
137 | |
138 | S itemStartsWith(int i, S need) { |
139 | if (empty(need)) ret need; |
140 | S have = lcUncompressItem(lc, i); |
141 | int l = lCommonPrefix(have, need); |
142 | if (l < l(need) && l < l(have)) null; |
143 | ret substring(need, l); |
144 | } |
145 | |
146 | S itemEndsWith(int i, S need) { |
147 | if (empty(need)) ret need; |
148 | S have = lcUncompressItem(lc, i); |
149 | int l = lCommonSuffix(have, need); |
150 | if (l < l(need) && l < l(have)) null; |
151 | ret dropLast(need, l); |
152 | |
153 | } |
154 | } |
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1029095 |
Snippet name: | Search from Compression Spike [dev.] |
Eternal ID of this version: | #1029095/33 |
Text MD5: | a8dfbd7502773c978cfe6f9f664cc07e |
Transpilation MD5: | 156c30495a2b405be0c5e7d5efcf91de |
Author: | stefan |
Category: | |
Type: | JavaX source code (Dynamic Module) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-07-19 15:20:12 |
Source code size: | 4913 bytes / 154 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 197 / 2448 |
Version history: | 32 change(s) |
Referenced in: | [show references] |