transient sclass DeepWordIndex { S regexp = "\\w+"; new Map> entries; MultiSetMap> entriesByWord = ciMultiSetMap(); sclass Entry extends Var { Map wordPositions = ciMap(); // int array is sorted *(A id) { super(id); } } L wordRanges(S text) { ret regexpFindRanges(regexp, text); } void add(A a, S text) { Entry e = new Entry(a); if (entries.put(a, e) != null) fail("Double insertion"); MultiMap wordPositions = ciMultiMap(); for (IntRange r : wordRanges(text)) { S word = substring(text, r); wordPositions.put(word, r.start); entriesByWord.put(word, e); } for (S word : keys(wordPositions)) e.wordPositions.put(word, toIntArray(wordPositions.get(word))); } Set> get(S word) { ret entriesByWord.get(word); } int numWords() { ret entriesByWord.keysSize(); } Iterable>>lookupString_withPositions(S query, O... _) { optPar bool debug; L ranges = wordRanges(query); if (empty(ranges)) null; int nRanges = l(ranges); if (nRanges >= 3) { IntRange r = ranges.get(1); S word = substring(query, r); Set> entries = entriesByWord.get(word); if (entries == null) ret emptyList(); new LPair> out; entrySearch: for (Entry entry : entries) { int[] positions = entry.wordPositions.get(word); for (int iWord = 2; iWord < nRanges-1; iWord++) { IntRange r2 = ranges.get(iWord); S word2 = substring(query, r2); int[] positions2 = entry.wordPositions.get(word2); if (positions2 == null) continue entrySearch; int ofs = r2.start-r.start; positions = intersectSortedIntArrays_ofs(positions, positions2, ofs); if (empty(positions)) continue entrySearch; } out.add(pair(entry!, toList(positions))); } ret out; } null; } }