transient sclass DeepBitSetWordIndex { S regexp = "\\w+"; Map singleTextIndices; new ElementInstanceMatrix mainIndex; void addDocument(A a, S text) { singleTextIndices.put(a, new SingleTextWordIndex(regexp, text)); mainIndex.add(a, mapToSet upper(regexpExtractAll(regexp, text)); } void doneAdding { mainIndex.doneAdding(); } LPair wordsAndOffsets(S text) { ret map(regexpFindRanges(regexp, text), r -> pair(upper(substring(text, r)), r.start)); } // assumes word boundaries left and right of query Cl preSearch(S query) { LPair l = wordsAndOffsets(query); Cl candidates = mainIndex.instancesContainingAllElements(pairsA(l)); print(nCandidates(candidates)); ret filter(candidates, a -> nempty(singleTextIndices.get(a).indicesOfWordCombination(l))); } int numWords() { ret mainIndex.numElements(); } }