Libraryless. Click here for Pure Java version (4313L/26K).
| 1 | // returns a prefiltered list of elements; you still need to do a | 
| 2 | // full-text search on those. | 
| 3 | // If it returns null, you have to search all elements | 
| 4 | static <A> Iterable<A> deepDoubleWordIndex_lookupString(DoubleWordIndex<WithIntArray<A>> index, S query, O... _) {
 | 
| 5 | optPar bool debug; | 
| 6 | L<IntRange> ranges = index.wordRanges(query); | 
| 7 | if (empty(ranges)) null; | 
| 8 | int nRanges = l(ranges); | 
| 9 | new Map<A, MultiSetMap<Int>> theMap; // for every snippet, a map of string position to word index | 
| 10 | |
| 11 | Set<A> baseSet = null; | 
| 12 |   for (int iWord = 1; iWord < nRanges-1; iWord++) { // go through all "full" words
 | 
| 13 | IntRange r = ranges.get(iWord); | 
| 14 | S word = substring(query, r); | 
| 15 | Set<WithIntArray<A>> entries = index.index1.get(word); | 
| 16 | baseSet = intersectSets_nullIsFull(baseSet, getVarsToSet(entries)); | 
| 17 | } | 
| 18 |   if (baseSet != null) {
 | 
| 19 |     print("baseSet: " + l(baseSet));
 | 
| 20 | ret baseSet; | 
| 21 | } | 
| 22 | |
| 23 | // special case, just a single word in query | 
| 24 |   if (l(ranges) == 1 && first(ranges).start == 0 && first(ranges).end == l(query)) {
 | 
| 25 | new Set<A> seen; | 
| 26 | ret nestedIterator(containingIC(index.index1.words(), query), fullWord -> | 
| 27 | mapI_nonNulls_if1(index.index1.get(fullWord), e -> addAndReturnIfNew(seen, e!))); | 
| 28 | } | 
| 29 | |
| 30 |   for iWord over ranges: { // go through words in query
 | 
| 31 | IntRange r = ranges.get(iWord); | 
| 32 | S word = substring(query, r); | 
| 33 | Cl<S> l; // all matching words in index | 
| 34 | WordIndex<WithIntArray<A>> indexToUse = index.index1; | 
| 35 | |
| 36 |     if (r.start == 0) { // look for ending of word - use reverse index
 | 
| 37 | l = prefixSubSet(index.index2.words(), reversed(word)); | 
| 38 | if (empty(l)) ret emptyList(); | 
| 39 |       if (debug) print("word=" + word + ", fullWords=" + l);
 | 
| 40 | |
| 41 | // special loop that accounts for length of actual word | 
| 42 | for (S fullWord : l) | 
| 43 |         for (WithIntArray<A> entry : index.index2.index.get(fullWord)) {
 | 
| 44 | if (baseSet != null && !baseSet.contains(entry!)) continue; | 
| 45 | MultiSetMap<Int> msm = theMap.get(entry!); | 
| 46 | if (msm == null) theMap.put(entry!, msm = new MultiSetMap); | 
| 47 | int ofs = l(fullWord)-l(word)-r.start; | 
| 48 |           for (int i : entry.array) {
 | 
| 49 | int idx = i+ofs; | 
| 50 |             if (debug) print("Got idx " + idx);
 | 
| 51 | if (idx >= 0) | 
| 52 | msm.put(idx, iWord); | 
| 53 | } | 
| 54 | } | 
| 55 | continue; | 
| 56 |     } else if (r.end == l(query)) { // look for start of word
 | 
| 57 | l = prefixSubSet(index.index1.words(), word); | 
| 58 | } else // look for complete word | 
| 59 | l = ll(word); | 
| 60 | |
| 61 | if (empty(l)) ret emptyList(); | 
| 62 | |
| 63 |     if (debug) print("word=" + word + ", fullWords=" + l);
 | 
| 64 | |
| 65 | for (S fullWord : l) | 
| 66 |       for (WithIntArray<A> entry : indexToUse.index.get(fullWord)) {
 | 
| 67 | if (baseSet != null && !baseSet.contains(entry!)) continue; | 
| 68 |         if (debug) print("Got entry " + entry);
 | 
| 69 | MultiSetMap<Int> msm = theMap.get(entry!); | 
| 70 | if (msm == null) theMap.put(entry!, msm = new MultiSetMap); | 
| 71 |         for (int i : entry.array) {
 | 
| 72 | int idx = i-r.start; | 
| 73 |           if (debug) print("Got idx " + idx);
 | 
| 74 | if (idx >= 0) | 
| 75 | msm.put(idx, iWord); | 
| 76 | } | 
| 77 | } | 
| 78 | } | 
| 79 | |
| 80 |   if (debug) print("theMap size=" + l(theMap));
 | 
| 81 |   ret asList(mapI_nonNulls_if1(theMap.entrySet(), e -> {
 | 
| 82 | A snippet = e.getKey(); | 
| 83 | MultiSetMap<Int> msm = e.getValue(); | 
| 84 | |
| 85 |     if (debug) print("snippet " + snippet);
 | 
| 86 |     for (int position, Set<Int> wordIndices : msm.data) {
 | 
| 87 |       if (debug) print("position " + position + ": " + l(wordIndices) + "/" + nRanges);
 | 
| 88 | if (l(wordIndices) == nRanges) | 
| 89 | ret snippet; | 
| 90 | } | 
| 91 | null; | 
| 92 | })); | 
| 93 | } | 
Began life as a copy of #1029005
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
| Snippet ID: | #1029012 | 
| Snippet name: | deepDoubleWordIndex_lookupString | 
| Eternal ID of this version: | #1029012/39 | 
| Text MD5: | cf27eeff711b4e705be3d43437d936ea | 
| Transpilation MD5: | a601c76df940a3b3873ec40bc0eca0a5 | 
| Author: | stefan | 
| Category: | javax | 
| Type: | JavaX fragment (include) | 
| Public (visible to everyone): | Yes | 
| Archived (hidden from active list): | No | 
| Created/modified: | 2020-07-17 00:20:08 | 
| Source code size: | 3657 bytes / 93 lines | 
| Pitched / IR pitched: | No / No | 
| Views / Downloads: | 508 / 723 | 
| Version history: | 38 change(s) | 
| Referenced in: | [show references] |