Download Jar. Libraryless. Click here for Pure Java version (3599L/23K).
| 1 | transient sclass DeepBitSetWordIndex<A> {
 | 
| 2 | S regexp = "\\w+"; | 
| 3 | new Map<A, SingleTextWordIndex> singleTextIndices; | 
| 4 | new ElementInstanceMatrix<A, S> mainIndex; | 
| 5 | |
| 6 |   void add(A a, S text) {
 | 
| 7 | singleTextIndices.put(a, new SingleTextWordIndex(regexp, text)); | 
| 8 | mainIndex.add(a, mapToSet upper(regexpExtractAll(regexp, text)); | 
| 9 | } | 
| 10 | |
| 11 |   void doneAdding {
 | 
| 12 | mainIndex.doneAdding(); | 
| 13 | } | 
| 14 | |
| 15 |   LPair<S, Int> wordsAndOffsets(S text) {
 | 
| 16 | ret map(regexpFindRanges(regexp, text), | 
| 17 | r -> pair(upper(substring(text, r)), r.start)); | 
| 18 | } | 
| 19 | |
| 20 | // assumes word boundaries left and right of query | 
| 21 |   Cl<A> preSearch(S query, O... _) {
 | 
| 22 | optPar bool debug; | 
| 23 | LPair<S, Int> l = wordsAndOffsets(query); | 
| 24 | Cl<A> candidates = mainIndex.instancesContainingAllElements(pairsA(l)); | 
| 25 |     if (debug) {
 | 
| 26 | L<Int> lengths = map(candidates, a -> singleTextIndices.get(a).length); | 
| 27 | print(nCandidates(candidates) + ", total length: " + n2(intSum(lengths)) + ", lengths: " + lengths); | 
| 28 | } | 
| 29 | ret filter(candidates, a -> nempty(singleTextIndices.get(a).indicesOfWordCombination(l))); | 
| 30 | } | 
| 31 | |
| 32 |   int numWords() { ret mainIndex.numElements(); }
 | 
| 33 | } | 
Began life as a copy of #1029078
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
| Snippet ID: | #1029082 | 
| Snippet name: | DeepBitSetWordIndex | 
| Eternal ID of this version: | #1029082/15 | 
| Text MD5: | e26b556e2fd8d141d4bb5535451c02db | 
| Transpilation MD5: | 7d55bec89146d478c8b80ae0361a74a6 | 
| Author: | stefan | 
| Category: | javax | 
| Type: | JavaX source code (desktop) | 
| Public (visible to everyone): | Yes | 
| Archived (hidden from active list): | No | 
| Created/modified: | 2020-07-19 02:39:47 | 
| Source code size: | 1161 bytes / 33 lines | 
| Pitched / IR pitched: | No / No | 
| Views / Downloads: | 662 / 1966 | 
| Version history: | 14 change(s) | 
| Referenced in: | [show references] |