Libraryless. Click here for Pure Java version (4313L/26K).
// returns a prefiltered list of elements; you still need to do a // full-text search on those. // If it returns null, you have to search all elements static <A> Iterable<A> deepDoubleWordIndex_lookupString(DoubleWordIndex<WithIntArray<A>> index, S query, O... _) { optPar bool debug; L<IntRange> ranges = index.wordRanges(query); if (empty(ranges)) null; int nRanges = l(ranges); new Map<A, MultiSetMap<Int>> theMap; // for every snippet, a map of string position to word index Set<A> baseSet = null; for (int iWord = 1; iWord < nRanges-1; iWord++) { // go through all "full" words IntRange r = ranges.get(iWord); S word = substring(query, r); Set<WithIntArray<A>> entries = index.index1.get(word); baseSet = intersectSets_nullIsFull(baseSet, getVarsToSet(entries)); } if (baseSet != null) { print("baseSet: " + l(baseSet)); ret baseSet; } // special case, just a single word in query if (l(ranges) == 1 && first(ranges).start == 0 && first(ranges).end == l(query)) { new Set<A> seen; ret nestedIterator(containingIC(index.index1.words(), query), fullWord -> mapI_nonNulls_if1(index.index1.get(fullWord), e -> addAndReturnIfNew(seen, e!))); } for iWord over ranges: { // go through words in query IntRange r = ranges.get(iWord); S word = substring(query, r); Cl<S> l; // all matching words in index WordIndex<WithIntArray<A>> indexToUse = index.index1; if (r.start == 0) { // look for ending of word - use reverse index l = prefixSubSet(index.index2.words(), reversed(word)); if (empty(l)) ret emptyList(); if (debug) print("word=" + word + ", fullWords=" + l); // special loop that accounts for length of actual word for (S fullWord : l) for (WithIntArray<A> entry : index.index2.index.get(fullWord)) { if (baseSet != null && !baseSet.contains(entry!)) continue; MultiSetMap<Int> msm = theMap.get(entry!); if (msm == null) theMap.put(entry!, msm = new MultiSetMap); int ofs = l(fullWord)-l(word)-r.start; for (int i : entry.array) { int idx = i+ofs; if (debug) print("Got idx " + idx); if (idx >= 0) msm.put(idx, iWord); } } continue; } else if (r.end == l(query)) { // look for start of word l = prefixSubSet(index.index1.words(), word); } else // look for complete word l = ll(word); if (empty(l)) ret emptyList(); if (debug) print("word=" + word + ", fullWords=" + l); for (S fullWord : l) for (WithIntArray<A> entry : indexToUse.index.get(fullWord)) { if (baseSet != null && !baseSet.contains(entry!)) continue; if (debug) print("Got entry " + entry); MultiSetMap<Int> msm = theMap.get(entry!); if (msm == null) theMap.put(entry!, msm = new MultiSetMap); for (int i : entry.array) { int idx = i-r.start; if (debug) print("Got idx " + idx); if (idx >= 0) msm.put(idx, iWord); } } } if (debug) print("theMap size=" + l(theMap)); ret asList(mapI_nonNulls_if1(theMap.entrySet(), e -> { A snippet = e.getKey(); MultiSetMap<Int> msm = e.getValue(); if (debug) print("snippet " + snippet); for (int position, Set<Int> wordIndices : msm.data) { if (debug) print("position " + position + ": " + l(wordIndices) + "/" + nRanges); if (l(wordIndices) == nRanges) ret snippet; } null; })); }
Began life as a copy of #1029005
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1029012 |
Snippet name: | deepDoubleWordIndex_lookupString |
Eternal ID of this version: | #1029012/39 |
Text MD5: | cf27eeff711b4e705be3d43437d936ea |
Transpilation MD5: | a601c76df940a3b3873ec40bc0eca0a5 |
Author: | stefan |
Category: | javax |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-07-17 00:20:08 |
Source code size: | 3657 bytes / 93 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 284 / 465 |
Version history: | 38 change(s) |
Referenced in: | #1006654 - Standard functions list 2 (LIVE, continuation of #761) #1029015 - deepDoubleWordIndex_search - does the full search [dev.] #1029016 - deepDoubleWordIndex_lookupString_withPositions |