// clever index of tokenized strings // maintaining one CI-sorted list per token position sclass PositionalTokenIndex { L sorts = new L; // token index -> entries sorted by token at index void addAll(LLS toks) { if (empty(toks)) ret; for (LS tok : toks) for (int i = 1; i < l(tok); i += 2) { LL idx = listGetOrCreate_List(sorts, i/2); idx.add(tok); } sort(); } // internal void sort { for i over sorts: { int j = i*2+1; sortInPlaceByCalculatedFieldIC(sorts.get(i), func(LS tok) -> S { tok.get(j) }); } } // return all tokenizations with token t in position i // may return null LLS byToken(int i, S t) { LLS list = get(sorts, i); if (list == null) null; int mid = binarySearch(list, i, t); if (mid < 0) null; // TODO: do more binary search here int start = mid; while (start > 0 && eqic(list.get(start-1).get(i*2+1), t)) --start; int end = mid+1; int l = l(list); while (end < l && eqic(list.get(end).get(i*2+1), t)) ++end; ret subList(list, start, end); } // internal int binarySearch(LLS list, int position, S t) { int low = 0, high = list.size()-1; while (low <= high) { int mid = (low + high) >>> 1; LS tok = list.get(mid); S token = tok.get(position*2+1); int cmp = compareIC(token, t); if (cmp < 0) low = mid + 1; else if (cmp > 0) high = mid - 1; else ret mid; } ret -1; } }