// returns a prefiltered list of elements; you still need to do a
// full-text search on those.
// If it returns null, you have to search all elements
static Iterable deepDoubleWordIndex_lookupString(DoubleWordIndex> index, S query, O... _) {
optPar bool debug;
L ranges = index.wordRanges(query);
if (empty(ranges)) null;
int nRanges = l(ranges);
new Map> theMap; // for every snippet, a map of string position to word index
Set baseSet = null;
for (int iWord = 1; iWord < nRanges-1; iWord++) { // go through all "full" words
IntRange r = ranges.get(iWord);
S word = substring(query, r);
Set> entries = index.index1.get(word);
baseSet = intersectSets_nullIsFull(baseSet, getVarsToSet(entries));
}
if (baseSet != null) {
print("baseSet: " + l(baseSet));
ret baseSet;
}
// special case, just a single word in query
if (l(ranges) == 1 && first(ranges).start == 0 && first(ranges).end == l(query)) {
new Set seen;
ret nestedIterator(containingIC(index.index1.words(), query), fullWord ->
mapI_nonNulls_if1(index.index1.get(fullWord), e -> addAndReturnIfNew(seen, e!)));
}
for iWord over ranges: { // go through words in query
IntRange r = ranges.get(iWord);
S word = substring(query, r);
Cl l; // all matching words in index
WordIndex> indexToUse = index.index1;
if (r.start == 0) { // look for ending of word - use reverse index
l = prefixSubSet(index.index2.words(), reversed(word));
if (empty(l)) ret emptyList();
if (debug) print("word=" + word + ", fullWords=" + l);
// special loop that accounts for length of actual word
for (S fullWord : l)
for (WithIntArray entry : index.index2.index.get(fullWord)) {
if (baseSet != null && !baseSet.contains(entry!)) continue;
MultiSetMap msm = theMap.get(entry!);
if (msm == null) theMap.put(entry!, msm = new MultiSetMap);
int ofs = l(fullWord)-l(word)-r.start;
for (int i : entry.array) {
int idx = i+ofs;
if (debug) print("Got idx " + idx);
if (idx >= 0)
msm.put(idx, iWord);
}
}
continue;
} else if (r.end == l(query)) { // look for start of word
l = prefixSubSet(index.index1.words(), word);
} else // look for complete word
l = ll(word);
if (empty(l)) ret emptyList();
if (debug) print("word=" + word + ", fullWords=" + l);
for (S fullWord : l)
for (WithIntArray entry : indexToUse.index.get(fullWord)) {
if (baseSet != null && !baseSet.contains(entry!)) continue;
if (debug) print("Got entry " + entry);
MultiSetMap msm = theMap.get(entry!);
if (msm == null) theMap.put(entry!, msm = new MultiSetMap);
for (int i : entry.array) {
int idx = i-r.start;
if (debug) print("Got idx " + idx);
if (idx >= 0)
msm.put(idx, iWord);
}
}
}
if (debug) print("theMap size=" + l(theMap));
ret asList(mapI_nonNulls_if1(theMap.entrySet(), e -> {
A snippet = e.getKey();
MultiSetMap msm = e.getValue();
if (debug) print("snippet " + snippet);
for (int position, Set wordIndices : msm.data) {
if (debug) print("position " + position + ": " + l(wordIndices) + "/" + nRanges);
if (l(wordIndices) == nRanges)
ret snippet;
}
null;
}));
}