Uses 11335K of libraries. Click here for Pure Java version (9199L/59K).
sclass LCSingleSearcher_v1 { S text; bool debug; transient LineCompedSingle<Char> lc; transient LL<Int> productions; transient Map<Int> prodLengths; // prod index -> length of production in characters transient int iFirstPair, iFirstFile; transient Map<Char, Int> literalIndex; transient MultiMap<Int, IntPair> prodIndex; // keys: item looked for, value: pair(prod index, position in prod) transient Map<Int, L<Int>> fileOffsetMap; // for every file index, character offsets transient bool prepared; void prepare { ret unless set prepared; lc = lcString2(text); // Turn pairs and file encodings into a single structure ("productions") productions = new L; productions.addAll(rep(null, l(lc.literals))); iFirstPair = l(productions); productions.addAll(intPairsToLists(lc.pairs)); iFirstFile = l(productions); productions.add(lc.main); prodLengths = new Map; fileOffsetMap = new Map; for (int i = iFirstFile; i < l(productions); i++) { L<Int> prod = productions.get(i); new L<Int> l; int ofs = 0; for (int x : prod) { l.add(ofs); ofs += getProdLength(x); } fileOffsetMap.put(i, l); } // build the reconstitution index literalIndex = indexList(lc.literals); prodIndex = new MultiMap; for iProd over productions: { L<Int> prod = productions.get(iProd); for (int i, int x : unpair iterateListWithIndex(prod)) { prodIndex.put(x, intPair(iProd, i)); } } if (debug) print(+literalIndex); if (debug) print(+prodIndex); } L<Int> search(S query) { prepare(); L<Char> queryList = characters(query); new L<Int> queryLiterals; for (Char c : queryList) { Int iLit = literalIndex.get(c); if (iLit == null) { if (debug) print("literal not found"); ret emptyList(); } queryLiterals.add(iLit); } if (debug) print(+queryLiterals); ret intPairsB(print("Result", reconstitute(queryLiterals))); } int getProdLength(int i) { if (i < iFirstPair) ret 1; // literal Int x = prodLengths.get(i); if (x != null) ret x; prodLengths.put(i, x = intSum(lmap getProdLength(productions.get(i)))); ret x; } L<IntPair> reconstitute(L<Int> baseList) { L<Int> needRight = subList(baseList, 1); ret reconstitute(first(baseList), "", lcUncompressItems(lc, needRight)); } // returns list of (file index, character offset) L<IntPair> reconstitute(int item, S needLeft, S needRight) { if (debug) print("Reconstituting " + quote(lcUncompressItem(lc, item))); if (debug) print(" needLeft: " + quote(needLeft)); if (debug) print(" needRight: " + quote(needRight)); L<IntPair> l = prodIndex.get(item); if (debug) print("Found: " + l); new L<IntPair> out; search: for (IntPair p : l) { int iProd = p.a; L<Int> prod = productions.get(iProd); int j = p.b; bool isFile = iProd >= iFirstFile; if (isFile) { int iChar = fileOffsetMap.get(iProd).get(j); if (debug) print("Reached file level: " + iProd + "/" + iChar + " with " + quote(needLeft) + "/" + quote(needRight)); // check left & right if (nempty(needLeft)) fail(); // needLeft not used yet (or is it?) int iRight = j+1; S rest = needRight; while (nempty(rest)) { if (iRight >= l(prod)) continue search; // need more, but have no more text rest = itemStartsWith(prod.get(iRight), rest); if (rest == null) continue search; // mismatch ++iRight; } // result found! out.add(intPair(iProd, iChar)); } else { // we're on a pair, check left & right and then go higher up if (j == 0) { S rest = itemStartsWith(prod.get(1), needRight); if (debug) print("checking right: " + quote(lcUncompressItem(lc, prod.get(1))) + " => " + quote(rest)); if (rest == null) continue; out.addAll(reconstitute(iProd, needLeft, rest)); } else { S rest = itemEndsWith(prod.get(0), needLeft); if (debug) print("checking left: " + quote(lcUncompressItem(lc, prod.get(0))) + " => " + quote(rest)); if (rest == null) continue; int leftLength = getProdLength(prod.get(0)); for (IntPair result : reconstitute(iProd, rest, needRight)) out.add(intPair(result.a, result.b+leftLength)); } } } ret out; } S itemStartsWith(int i, S need) { if (empty(need)) ret need; S have = lcUncompressItem(lc, i); int l = lCommonPrefix(have, need); if (l < l(need) && l < l(have)) null; ret substring(need, l); } S itemEndsWith(int i, S need) { if (empty(need)) ret need; S have = lcUncompressItem(lc, i); int l = lCommonSuffix(have, need); if (l < l(need) && l < l(have)) null; ret dropLast(need, l); } }
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1029098 |
Snippet name: | LCSingleSearcher_v1 - probably slow, but works [tested up until length 7] |
Eternal ID of this version: | #1029098/19 |
Text MD5: | 890dfe63c0c30e923d518fd511dea419 |
Transpilation MD5: | 16c841f2c71c6de6e776c7e9d48065c6 |
Author: | stefan |
Category: | javax |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-07-20 13:09:49 |
Source code size: | 5163 bytes / 154 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 292 / 607 |
Version history: | 18 change(s) |
Referenced in: | [show references] |