Uses 11335K of libraries. Click here for Pure Java version (9199L/59K).
1 | sclass LCSingleSearcher_v1 {
|
2 | S text; |
3 | bool debug; |
4 | transient LineCompedSingle<Char> lc; |
5 | transient LL<Int> productions; |
6 | transient Map<Int> prodLengths; // prod index -> length of production in characters |
7 | transient int iFirstPair, iFirstFile; |
8 | transient Map<Char, Int> literalIndex; |
9 | transient MultiMap<Int, IntPair> prodIndex; // keys: item looked for, value: pair(prod index, position in prod) |
10 | transient Map<Int, L<Int>> fileOffsetMap; // for every file index, character offsets |
11 | transient bool prepared; |
12 | |
13 | void prepare {
|
14 | ret unless set prepared; |
15 | |
16 | lc = lcString2(text); |
17 | |
18 | // Turn pairs and file encodings into a single structure ("productions")
|
19 | productions = new L; |
20 | productions.addAll(rep(null, l(lc.literals))); |
21 | iFirstPair = l(productions); |
22 | productions.addAll(intPairsToLists(lc.pairs)); |
23 | iFirstFile = l(productions); |
24 | productions.add(lc.main); |
25 | |
26 | prodLengths = new Map; |
27 | fileOffsetMap = new Map; |
28 | for (int i = iFirstFile; i < l(productions); i++) {
|
29 | L<Int> prod = productions.get(i); |
30 | new L<Int> l; |
31 | int ofs = 0; |
32 | for (int x : prod) {
|
33 | l.add(ofs); |
34 | ofs += getProdLength(x); |
35 | } |
36 | fileOffsetMap.put(i, l); |
37 | } |
38 | |
39 | // build the reconstitution index |
40 | literalIndex = indexList(lc.literals); |
41 | prodIndex = new MultiMap; |
42 | for iProd over productions: {
|
43 | L<Int> prod = productions.get(iProd); |
44 | for (int i, int x : unpair iterateListWithIndex(prod)) {
|
45 | prodIndex.put(x, intPair(iProd, i)); |
46 | } |
47 | } |
48 | |
49 | if (debug) print(+literalIndex); |
50 | if (debug) print(+prodIndex); |
51 | } |
52 | |
53 | L<Int> search(S query) {
|
54 | prepare(); |
55 | |
56 | L<Char> queryList = characters(query); |
57 | new L<Int> queryLiterals; |
58 | for (Char c : queryList) {
|
59 | Int iLit = literalIndex.get(c); |
60 | if (iLit == null) {
|
61 | if (debug) print("literal not found");
|
62 | ret emptyList(); |
63 | } |
64 | queryLiterals.add(iLit); |
65 | } |
66 | |
67 | if (debug) print(+queryLiterals); |
68 | |
69 | ret intPairsB(print("Result", reconstitute(queryLiterals)));
|
70 | } |
71 | |
72 | int getProdLength(int i) {
|
73 | if (i < iFirstPair) ret 1; // literal |
74 | Int x = prodLengths.get(i); |
75 | if (x != null) ret x; |
76 | prodLengths.put(i, x = intSum(lmap getProdLength(productions.get(i)))); |
77 | ret x; |
78 | } |
79 | |
80 | L<IntPair> reconstitute(L<Int> baseList) {
|
81 | L<Int> needRight = subList(baseList, 1); |
82 | ret reconstitute(first(baseList), |
83 | "", |
84 | lcUncompressItems(lc, needRight)); |
85 | } |
86 | |
87 | // returns list of (file index, character offset) |
88 | L<IntPair> reconstitute(int item, S needLeft, S needRight) {
|
89 | if (debug) print("Reconstituting " + quote(lcUncompressItem(lc, item)));
|
90 | if (debug) print(" needLeft: " + quote(needLeft));
|
91 | if (debug) print(" needRight: " + quote(needRight));
|
92 | L<IntPair> l = prodIndex.get(item); |
93 | if (debug) print("Found: " + l);
|
94 | new L<IntPair> out; |
95 | search: for (IntPair p : l) {
|
96 | int iProd = p.a; |
97 | L<Int> prod = productions.get(iProd); |
98 | int j = p.b; |
99 | bool isFile = iProd >= iFirstFile; |
100 | if (isFile) {
|
101 | int iChar = fileOffsetMap.get(iProd).get(j); |
102 | if (debug) print("Reached file level: " + iProd + "/" + iChar + " with " + quote(needLeft) + "/" + quote(needRight));
|
103 | |
104 | // check left & right |
105 | if (nempty(needLeft)) fail(); // needLeft not used yet (or is it?) |
106 | |
107 | int iRight = j+1; |
108 | S rest = needRight; |
109 | while (nempty(rest)) {
|
110 | if (iRight >= l(prod)) continue search; // need more, but have no more text |
111 | rest = itemStartsWith(prod.get(iRight), rest); |
112 | if (rest == null) continue search; // mismatch |
113 | ++iRight; |
114 | } |
115 | |
116 | // result found! |
117 | out.add(intPair(iProd, iChar)); |
118 | } else { // we're on a pair, check left & right and then go higher up
|
119 | if (j == 0) {
|
120 | S rest = itemStartsWith(prod.get(1), needRight); |
121 | if (debug) print("checking right: " + quote(lcUncompressItem(lc, prod.get(1)))
|
122 | + " => " + quote(rest)); |
123 | if (rest == null) continue; |
124 | out.addAll(reconstitute(iProd, needLeft, rest)); |
125 | } else {
|
126 | S rest = itemEndsWith(prod.get(0), needLeft); |
127 | if (debug) print("checking left: " + quote(lcUncompressItem(lc, prod.get(0)))
|
128 | + " => " + quote(rest)); |
129 | if (rest == null) continue; |
130 | int leftLength = getProdLength(prod.get(0)); |
131 | for (IntPair result : reconstitute(iProd, rest, needRight)) |
132 | out.add(intPair(result.a, result.b+leftLength)); |
133 | } |
134 | } |
135 | } |
136 | ret out; |
137 | } |
138 | |
139 | S itemStartsWith(int i, S need) {
|
140 | if (empty(need)) ret need; |
141 | S have = lcUncompressItem(lc, i); |
142 | int l = lCommonPrefix(have, need); |
143 | if (l < l(need) && l < l(have)) null; |
144 | ret substring(need, l); |
145 | } |
146 | |
147 | S itemEndsWith(int i, S need) {
|
148 | if (empty(need)) ret need; |
149 | S have = lcUncompressItem(lc, i); |
150 | int l = lCommonSuffix(have, need); |
151 | if (l < l(need) && l < l(have)) null; |
152 | ret dropLast(need, l); |
153 | } |
154 | } |
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
| Snippet ID: | #1029098 |
| Snippet name: | LCSingleSearcher_v1 - probably slow, but works [tested up until length 7] |
| Eternal ID of this version: | #1029098/19 |
| Text MD5: | 890dfe63c0c30e923d518fd511dea419 |
| Transpilation MD5: | 16c841f2c71c6de6e776c7e9d48065c6 |
| Author: | stefan |
| Category: | javax |
| Type: | JavaX fragment (include) |
| Public (visible to everyone): | Yes |
| Archived (hidden from active list): | No |
| Created/modified: | 2020-07-20 13:09:49 |
| Source code size: | 5163 bytes / 154 lines |
| Pitched / IR pitched: | No / No |
| Views / Downloads: | 487 / 839 |
| Version history: | 18 change(s) |
| Referenced in: | [show references] |