transient sclass SingleTextWordIndex { S regexp = "\\w+"; new ElementInstanceMatrix wordMatrix; int length; *(S text) { init(text); } *(S *regexp, S text) { init(text); } void init(S text) { length = l(text); wordMatrix.numberToInstance = wordMatrix.instanceToNumber = i -> i; for (IntRange r : regexpFindRanges(regexp, text)) wordMatrix.add(r.start, ll(upper(substring(text, r)))); wordMatrix.doneAdding(); } LPair wordsAndOffsets(S text) { ret map(regexpFindRanges(regexp, text), r -> pair(upper(substring(text, r)), r.start)); } // assumes word boundaries left and right of query int[] preSearch(S query) { ret indicesOfWordCombination(wordsAndOffsets(query)); } int[] indicesOfWordCombination(LPair wordsWithOffsets) { int n = l(wordsWithOffsets); if (n == 0) null; if (n == 1) ret intArray_minus(first(wordsWithOffsets).b, wordMatrix.instancesContainingElement_intArray(first(wordsWithOffsets).a); // get entries for words, exit when a word is unknown ElementInstanceMatrix.Entry[] entries = new ElementInstanceMatrix.Entry[n]; for i to n: { ElementInstanceMatrix.Entry e = wordMatrix.index.get(wordsWithOffsets.get(i).a); if (e == null) null; entries[i] = e; } // go through words again, shift & AND-combine all bit sets BitSet bs = leftShiftBitSet(wordsWithOffsets.get(0).b, cloneBitSet(entries[0].bitSet())); for (int i = 1; i < n; i++) bs.and(leftShiftBitSet(wordsWithOffsets.get(i).b, entries[i].bitSet())); ret bitSetToIntArray(bs); } }