Uses 11335K of libraries. Click here for Pure Java version (9199L/59K).
1 | sclass LCSingleSearcher_v1 { |
2 | S text; |
3 | bool debug; |
4 | transient LineCompedSingle<Char> lc; |
5 | transient LL<Int> productions; |
6 | transient Map<Int> prodLengths; // prod index -> length of production in characters |
7 | transient int iFirstPair, iFirstFile; |
8 | transient Map<Char, Int> literalIndex; |
9 | transient MultiMap<Int, IntPair> prodIndex; // keys: item looked for, value: pair(prod index, position in prod) |
10 | transient Map<Int, L<Int>> fileOffsetMap; // for every file index, character offsets |
11 | transient bool prepared; |
12 | |
13 | void prepare { |
14 | ret unless set prepared; |
15 | |
16 | lc = lcString2(text); |
17 | |
18 | // Turn pairs and file encodings into a single structure ("productions") |
19 | productions = new L; |
20 | productions.addAll(rep(null, l(lc.literals))); |
21 | iFirstPair = l(productions); |
22 | productions.addAll(intPairsToLists(lc.pairs)); |
23 | iFirstFile = l(productions); |
24 | productions.add(lc.main); |
25 | |
26 | prodLengths = new Map; |
27 | fileOffsetMap = new Map; |
28 | for (int i = iFirstFile; i < l(productions); i++) { |
29 | L<Int> prod = productions.get(i); |
30 | new L<Int> l; |
31 | int ofs = 0; |
32 | for (int x : prod) { |
33 | l.add(ofs); |
34 | ofs += getProdLength(x); |
35 | } |
36 | fileOffsetMap.put(i, l); |
37 | } |
38 | |
39 | // build the reconstitution index |
40 | literalIndex = indexList(lc.literals); |
41 | prodIndex = new MultiMap; |
42 | for iProd over productions: { |
43 | L<Int> prod = productions.get(iProd); |
44 | for (int i, int x : unpair iterateListWithIndex(prod)) { |
45 | prodIndex.put(x, intPair(iProd, i)); |
46 | } |
47 | } |
48 | |
49 | if (debug) print(+literalIndex); |
50 | if (debug) print(+prodIndex); |
51 | } |
52 | |
53 | L<Int> search(S query) { |
54 | prepare(); |
55 | |
56 | L<Char> queryList = characters(query); |
57 | new L<Int> queryLiterals; |
58 | for (Char c : queryList) { |
59 | Int iLit = literalIndex.get(c); |
60 | if (iLit == null) { |
61 | if (debug) print("literal not found"); |
62 | ret emptyList(); |
63 | } |
64 | queryLiterals.add(iLit); |
65 | } |
66 | |
67 | if (debug) print(+queryLiterals); |
68 | |
69 | ret intPairsB(print("Result", reconstitute(queryLiterals))); |
70 | } |
71 | |
72 | int getProdLength(int i) { |
73 | if (i < iFirstPair) ret 1; // literal |
74 | Int x = prodLengths.get(i); |
75 | if (x != null) ret x; |
76 | prodLengths.put(i, x = intSum(lmap getProdLength(productions.get(i)))); |
77 | ret x; |
78 | } |
79 | |
80 | L<IntPair> reconstitute(L<Int> baseList) { |
81 | L<Int> needRight = subList(baseList, 1); |
82 | ret reconstitute(first(baseList), |
83 | "", |
84 | lcUncompressItems(lc, needRight)); |
85 | } |
86 | |
87 | // returns list of (file index, character offset) |
88 | L<IntPair> reconstitute(int item, S needLeft, S needRight) { |
89 | if (debug) print("Reconstituting " + quote(lcUncompressItem(lc, item))); |
90 | if (debug) print(" needLeft: " + quote(needLeft)); |
91 | if (debug) print(" needRight: " + quote(needRight)); |
92 | L<IntPair> l = prodIndex.get(item); |
93 | if (debug) print("Found: " + l); |
94 | new L<IntPair> out; |
95 | search: for (IntPair p : l) { |
96 | int iProd = p.a; |
97 | L<Int> prod = productions.get(iProd); |
98 | int j = p.b; |
99 | bool isFile = iProd >= iFirstFile; |
100 | if (isFile) { |
101 | int iChar = fileOffsetMap.get(iProd).get(j); |
102 | if (debug) print("Reached file level: " + iProd + "/" + iChar + " with " + quote(needLeft) + "/" + quote(needRight)); |
103 | |
104 | // check left & right |
105 | if (nempty(needLeft)) fail(); // needLeft not used yet (or is it?) |
106 | |
107 | int iRight = j+1; |
108 | S rest = needRight; |
109 | while (nempty(rest)) { |
110 | if (iRight >= l(prod)) continue search; // need more, but have no more text |
111 | rest = itemStartsWith(prod.get(iRight), rest); |
112 | if (rest == null) continue search; // mismatch |
113 | ++iRight; |
114 | } |
115 | |
116 | // result found! |
117 | out.add(intPair(iProd, iChar)); |
118 | } else { // we're on a pair, check left & right and then go higher up |
119 | if (j == 0) { |
120 | S rest = itemStartsWith(prod.get(1), needRight); |
121 | if (debug) print("checking right: " + quote(lcUncompressItem(lc, prod.get(1))) |
122 | + " => " + quote(rest)); |
123 | if (rest == null) continue; |
124 | out.addAll(reconstitute(iProd, needLeft, rest)); |
125 | } else { |
126 | S rest = itemEndsWith(prod.get(0), needLeft); |
127 | if (debug) print("checking left: " + quote(lcUncompressItem(lc, prod.get(0))) |
128 | + " => " + quote(rest)); |
129 | if (rest == null) continue; |
130 | int leftLength = getProdLength(prod.get(0)); |
131 | for (IntPair result : reconstitute(iProd, rest, needRight)) |
132 | out.add(intPair(result.a, result.b+leftLength)); |
133 | } |
134 | } |
135 | } |
136 | ret out; |
137 | } |
138 | |
139 | S itemStartsWith(int i, S need) { |
140 | if (empty(need)) ret need; |
141 | S have = lcUncompressItem(lc, i); |
142 | int l = lCommonPrefix(have, need); |
143 | if (l < l(need) && l < l(have)) null; |
144 | ret substring(need, l); |
145 | } |
146 | |
147 | S itemEndsWith(int i, S need) { |
148 | if (empty(need)) ret need; |
149 | S have = lcUncompressItem(lc, i); |
150 | int l = lCommonSuffix(have, need); |
151 | if (l < l(need) && l < l(have)) null; |
152 | ret dropLast(need, l); |
153 | } |
154 | } |
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1029098 |
Snippet name: | LCSingleSearcher_v1 - probably slow, but works [tested up until length 7] |
Eternal ID of this version: | #1029098/19 |
Text MD5: | 890dfe63c0c30e923d518fd511dea419 |
Transpilation MD5: | 16c841f2c71c6de6e776c7e9d48065c6 |
Author: | stefan |
Category: | javax |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-07-20 13:09:49 |
Source code size: | 5163 bytes / 154 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 293 / 608 |
Version history: | 18 change(s) |
Referenced in: | [show references] |