Download Jar. Libraryless. Click here for Pure Java version (3242L/21K).
transient sclass SingleTextWordIndex { S regexp = "\\w+"; new ElementInstanceMatrix<Int, S> wordMatrix; int length; *(S text) { init(text); } *(S *regexp, S text) { init(text); } void init(S text) { length = l(text); wordMatrix.numberToInstance = wordMatrix.instanceToNumber = i -> i; for (IntRange r : regexpFindRanges(regexp, text)) wordMatrix.add(r.start, ll(upper(substring(text, r)))); wordMatrix.doneAdding(); } LPair<S, Int> wordsAndOffsets(S text) { ret map(regexpFindRanges(regexp, text), r -> pair(upper(substring(text, r)), r.start)); } // assumes word boundaries left and right of query int[] preSearch(S query) { ret indicesOfWordCombination(wordsAndOffsets(query)); } int[] indicesOfWordCombination(LPair<S, Int> wordsWithOffsets) { int n = l(wordsWithOffsets); if (n == 0) null; if (n == 1) ret intArray_minus(first(wordsWithOffsets).b, wordMatrix.instancesContainingElement_intArray(first(wordsWithOffsets).a); // get entries for words, exit when a word is unknown ElementInstanceMatrix.Entry[] entries = new ElementInstanceMatrix.Entry[n]; for i to n: { ElementInstanceMatrix.Entry e = wordMatrix.index.get(wordsWithOffsets.get(i).a); if (e == null) null; entries[i] = e; } // go through words again, shift & AND-combine all bit sets BitSet bs = leftShiftBitSet(wordsWithOffsets.get(0).b, cloneBitSet(entries[0].bitSet())); for (int i = 1; i < n; i++) bs.and(leftShiftBitSet(wordsWithOffsets.get(i).b, entries[i].bitSet())); ret bitSetToIntArray(bs); } }
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1029078 |
Snippet name: | SingleTextWordIndex |
Eternal ID of this version: | #1029078/17 |
Text MD5: | 0700c3e516890f194314dae86bd68c81 |
Transpilation MD5: | 45deeedf03bfcaab885d3fb179d76ea2 |
Author: | stefan |
Category: | javax |
Type: | JavaX source code (desktop) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-07-19 02:36:35 |
Source code size: | 1689 bytes / 52 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 344 / 1183 |
Version history: | 16 change(s) |
Referenced in: | #1029082 - DeepBitSetWordIndex #1034167 - Standard Classes + Interfaces (LIVE, continuation of #1003674) |