transient sclass WordIndex { // if this is set, it's used for sorting the values // which can speed up lookups Comparator valueComparator; S regexp = "\\w+"; MultiSetMap index = ciMultiSetMap(); // sets are better for lookups *() {} *(Comparator *valueComparator) { index = ciMultiSetMap_innerTreeSet(valueComparator); } *(Map map) { fOr (A a, S text : map) add(a, text); } void add(A a, S text) { Set words = extractWords(text); for (S word : words) addWord(a, word); } void addWord(A a, S word) { index.add(word, a); } Set extractWords(S text) { ret asCISet(extractWords_list(text)); } LS extractWords_list(S text) { ret regexpExtractAll(regexp, text); } L wordRanges(S text) { ret regexpFindRanges(regexp, text); } Set get(S word) { ret index.get(word); } void remove(A a, S text) { Set words = extractWords(text); for (S word : words) index.remove(word, a); } NavigableSet words() { ret (NavigableSet) keys(index); } int numWords() { ret index.keysSize(); } // These methods only work when A = S void add(S s) { add((A) s, s); } void remove(S s) { remove((A) s, s); } }