Download Jar. Libraryless. Click here for Pure Java version (3242L/21K).
transient sclass SingleTextWordIndex { S regexp = "\\w+"; new ElementInstanceMatrix<Int, S> wordMatrix; int length; *(S text) { init(text); } *(S *regexp, S text) { init(text); } void init(S text) { length = l(text); wordMatrix.numberToInstance = wordMatrix.instanceToNumber = i -> i; for (IntRange r : regexpFindRanges(regexp, text)) wordMatrix.add(r.start, ll(upper(substring(text, r)))); wordMatrix.doneAdding(); } LPair<S, Int> wordsAndOffsets(S text) { ret map(regexpFindRanges(regexp, text), r -> pair(upper(substring(text, r)), r.start)); } // assumes word boundaries left and right of query int[] preSearch(S query) { ret indicesOfWordCombination(wordsAndOffsets(query)); } int[] indicesOfWordCombination(LPair<S, Int> wordsWithOffsets) { int n = l(wordsWithOffsets); if (n == 0) null; if (n == 1) ret intArray_minus(first(wordsWithOffsets).b, wordMatrix.instancesContainingElement_intArray(first(wordsWithOffsets).a); // get entries for words, exit when a word is unknown ElementInstanceMatrix.Entry[] entries = new ElementInstanceMatrix.Entry[n]; for i to n: { ElementInstanceMatrix.Entry e = wordMatrix.index.get(wordsWithOffsets.get(i).a); if (e == null) null; entries[i] = e; } // go through words again, shift & AND-combine all bit sets BitSet bs = leftShiftBitSet(wordsWithOffsets.get(0).b, cloneBitSet(entries[0].bitSet())); for (int i = 1; i < n; i++) bs.and(leftShiftBitSet(wordsWithOffsets.get(i).b, entries[i].bitSet())); ret bitSetToIntArray(bs); } }
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
| Snippet ID: | #1029078 | 
| Snippet name: | SingleTextWordIndex | 
| Eternal ID of this version: | #1029078/17 | 
| Text MD5: | 0700c3e516890f194314dae86bd68c81 | 
| Transpilation MD5: | 45deeedf03bfcaab885d3fb179d76ea2 | 
| Author: | stefan | 
| Category: | javax | 
| Type: | JavaX source code (desktop) | 
| Public (visible to everyone): | Yes | 
| Archived (hidden from active list): | No | 
| Created/modified: | 2020-07-19 02:36:35 | 
| Source code size: | 1689 bytes / 52 lines | 
| Pitched / IR pitched: | No / No | 
| Views / Downloads: | 566 / 1710 | 
| Version history: | 16 change(s) | 
| Referenced in: | [show references] |