Libraryless. Click here for Pure Java version (4313L/26K).
1 | // returns a prefiltered list of elements; you still need to do a |
2 | // full-text search on those. |
3 | // If it returns null, you have to search all elements |
4 | static <A> Iterable<A> deepDoubleWordIndex_lookupString(DoubleWordIndex<WithIntArray<A>> index, S query, O... _) {
|
5 | optPar bool debug; |
6 | L<IntRange> ranges = index.wordRanges(query); |
7 | if (empty(ranges)) null; |
8 | int nRanges = l(ranges); |
9 | new Map<A, MultiSetMap<Int>> theMap; // for every snippet, a map of string position to word index |
10 | |
11 | Set<A> baseSet = null; |
12 | for (int iWord = 1; iWord < nRanges-1; iWord++) { // go through all "full" words
|
13 | IntRange r = ranges.get(iWord); |
14 | S word = substring(query, r); |
15 | Set<WithIntArray<A>> entries = index.index1.get(word); |
16 | baseSet = intersectSets_nullIsFull(baseSet, getVarsToSet(entries)); |
17 | } |
18 | if (baseSet != null) {
|
19 | print("baseSet: " + l(baseSet));
|
20 | ret baseSet; |
21 | } |
22 | |
23 | // special case, just a single word in query |
24 | if (l(ranges) == 1 && first(ranges).start == 0 && first(ranges).end == l(query)) {
|
25 | new Set<A> seen; |
26 | ret nestedIterator(containingIC(index.index1.words(), query), fullWord -> |
27 | mapI_nonNulls_if1(index.index1.get(fullWord), e -> addAndReturnIfNew(seen, e!))); |
28 | } |
29 | |
30 | for iWord over ranges: { // go through words in query
|
31 | IntRange r = ranges.get(iWord); |
32 | S word = substring(query, r); |
33 | Cl<S> l; // all matching words in index |
34 | WordIndex<WithIntArray<A>> indexToUse = index.index1; |
35 | |
36 | if (r.start == 0) { // look for ending of word - use reverse index
|
37 | l = prefixSubSet(index.index2.words(), reversed(word)); |
38 | if (empty(l)) ret emptyList(); |
39 | if (debug) print("word=" + word + ", fullWords=" + l);
|
40 | |
41 | // special loop that accounts for length of actual word |
42 | for (S fullWord : l) |
43 | for (WithIntArray<A> entry : index.index2.index.get(fullWord)) {
|
44 | if (baseSet != null && !baseSet.contains(entry!)) continue; |
45 | MultiSetMap<Int> msm = theMap.get(entry!); |
46 | if (msm == null) theMap.put(entry!, msm = new MultiSetMap); |
47 | int ofs = l(fullWord)-l(word)-r.start; |
48 | for (int i : entry.array) {
|
49 | int idx = i+ofs; |
50 | if (debug) print("Got idx " + idx);
|
51 | if (idx >= 0) |
52 | msm.put(idx, iWord); |
53 | } |
54 | } |
55 | continue; |
56 | } else if (r.end == l(query)) { // look for start of word
|
57 | l = prefixSubSet(index.index1.words(), word); |
58 | } else // look for complete word |
59 | l = ll(word); |
60 | |
61 | if (empty(l)) ret emptyList(); |
62 | |
63 | if (debug) print("word=" + word + ", fullWords=" + l);
|
64 | |
65 | for (S fullWord : l) |
66 | for (WithIntArray<A> entry : indexToUse.index.get(fullWord)) {
|
67 | if (baseSet != null && !baseSet.contains(entry!)) continue; |
68 | if (debug) print("Got entry " + entry);
|
69 | MultiSetMap<Int> msm = theMap.get(entry!); |
70 | if (msm == null) theMap.put(entry!, msm = new MultiSetMap); |
71 | for (int i : entry.array) {
|
72 | int idx = i-r.start; |
73 | if (debug) print("Got idx " + idx);
|
74 | if (idx >= 0) |
75 | msm.put(idx, iWord); |
76 | } |
77 | } |
78 | } |
79 | |
80 | if (debug) print("theMap size=" + l(theMap));
|
81 | ret asList(mapI_nonNulls_if1(theMap.entrySet(), e -> {
|
82 | A snippet = e.getKey(); |
83 | MultiSetMap<Int> msm = e.getValue(); |
84 | |
85 | if (debug) print("snippet " + snippet);
|
86 | for (int position, Set<Int> wordIndices : msm.data) {
|
87 | if (debug) print("position " + position + ": " + l(wordIndices) + "/" + nRanges);
|
88 | if (l(wordIndices) == nRanges) |
89 | ret snippet; |
90 | } |
91 | null; |
92 | })); |
93 | } |
Began life as a copy of #1029005
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
| Snippet ID: | #1029012 |
| Snippet name: | deepDoubleWordIndex_lookupString |
| Eternal ID of this version: | #1029012/39 |
| Text MD5: | cf27eeff711b4e705be3d43437d936ea |
| Transpilation MD5: | a601c76df940a3b3873ec40bc0eca0a5 |
| Author: | stefan |
| Category: | javax |
| Type: | JavaX fragment (include) |
| Public (visible to everyone): | Yes |
| Archived (hidden from active list): | No |
| Created/modified: | 2020-07-17 00:20:08 |
| Source code size: | 3657 bytes / 93 lines |
| Pitched / IR pitched: | No / No |
| Views / Downloads: | 555 / 765 |
| Version history: | 38 change(s) |
| Referenced in: | [show references] |