transient sclass DeepWordIndex {
S regexp = "\\w+";
new Map> entries;
MultiSetMap> entriesByWord = ciMultiSetMap();
sclass Entry {
A id;
Map wordPositions = ciMap();
*(A *id) {}
}
L wordRanges(S text) {
ret regexpFindRanges(regexp, text);
}
void add(A a, S text) {
Entry e = new Entry(a);
if (entries.put(a, e) != null) fail("Double insertion");
MultiMap wordPositions = ciMultiMap();
for (IntRange r : wordRanges(text)) {
S word = substring(text, r);
wordPositions.put(word, r.start);
entriesByWord.put(word, e);
}
for (S word : keys(wordPositions))
e.wordPositions.put(word, toIntArray(wordPositions.get(word)));
}
Set> get(S word) { ret entriesByWord.get(word); }
int numWords() { ret entriesByWord.keysSize(); }
}