Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

93
LINES

< > BotCompany Repo | #1029012 // deepDoubleWordIndex_lookupString

JavaX fragment (include) [tags: use-pretranspiled]

Libraryless. Click here for Pure Java version (4313L/26K).

1  
// returns a prefiltered list of elements; you still need to do a
2  
// full-text search on those.
3  
// If it returns null, you have to search all elements
4  
static <A> Iterable<A> deepDoubleWordIndex_lookupString(DoubleWordIndex<WithIntArray<A>> index, S query, O... _) {
5  
  optPar bool debug;
6  
  L<IntRange> ranges = index.wordRanges(query);
7  
  if (empty(ranges)) null;
8  
  int nRanges = l(ranges);
9  
  new Map<A, MultiSetMap<Int>> theMap; // for every snippet, a map of string position to word index
10  
  
11  
  Set<A> baseSet = null;
12  
  for (int iWord = 1; iWord < nRanges-1; iWord++) { // go through all "full" words
13  
    IntRange r = ranges.get(iWord);
14  
    S word = substring(query, r);
15  
    Set<WithIntArray<A>> entries = index.index1.get(word);
16  
    baseSet = intersectSets_nullIsFull(baseSet, getVarsToSet(entries));
17  
  }
18  
  if (baseSet != null) {
19  
    print("baseSet: " + l(baseSet));
20  
    ret baseSet;
21  
  }
22  
23  
  // special case, just a single word in query
24  
  if (l(ranges) == 1 && first(ranges).start == 0 && first(ranges).end == l(query)) {
25  
    new Set<A> seen;
26  
    ret nestedIterator(containingIC(index.index1.words(), query), fullWord ->
27  
      mapI_nonNulls_if1(index.index1.get(fullWord), e -> addAndReturnIfNew(seen, e!)));
28  
  }
29  
  
30  
  for iWord over ranges: { // go through words in query
31  
    IntRange r = ranges.get(iWord);
32  
    S word = substring(query, r);
33  
    Cl<S> l; // all matching words in index
34  
    WordIndex<WithIntArray<A>> indexToUse = index.index1;
35  
      
36  
    if (r.start == 0) { // look for ending of word - use reverse index
37  
      l = prefixSubSet(index.index2.words(), reversed(word));
38  
      if (empty(l)) ret emptyList();
39  
      if (debug) print("word=" + word + ", fullWords=" + l);
40  
      
41  
      // special loop that accounts for length of actual word
42  
      for (S fullWord : l)
43  
        for (WithIntArray<A> entry : index.index2.index.get(fullWord)) {
44  
          if (baseSet != null && !baseSet.contains(entry!)) continue;
45  
          MultiSetMap<Int> msm = theMap.get(entry!);
46  
          if (msm == null) theMap.put(entry!, msm = new MultiSetMap);
47  
          int ofs = l(fullWord)-l(word)-r.start;
48  
          for (int i : entry.array) {
49  
            int idx = i+ofs;
50  
            if (debug) print("Got idx " + idx);
51  
            if (idx >= 0)
52  
              msm.put(idx, iWord);
53  
          }
54  
        }
55  
      continue;
56  
    } else if (r.end == l(query)) { // look for start of word
57  
      l = prefixSubSet(index.index1.words(), word);
58  
    } else // look for complete word
59  
      l = ll(word);
60  
      
61  
    if (empty(l)) ret emptyList();
62  
    
63  
    if (debug) print("word=" + word + ", fullWords=" + l);
64  
      
65  
    for (S fullWord : l)
66  
      for (WithIntArray<A> entry : indexToUse.index.get(fullWord)) {
67  
        if (baseSet != null && !baseSet.contains(entry!)) continue;
68  
        if (debug) print("Got entry " + entry);
69  
        MultiSetMap<Int> msm = theMap.get(entry!);
70  
        if (msm == null) theMap.put(entry!, msm = new MultiSetMap);
71  
        for (int i : entry.array) {
72  
          int idx = i-r.start;
73  
          if (debug) print("Got idx " + idx);
74  
          if (idx >= 0)
75  
            msm.put(idx, iWord);
76  
        }
77  
      }
78  
  }
79  
  
80  
  if (debug) print("theMap size=" + l(theMap));
81  
  ret asList(mapI_nonNulls_if1(theMap.entrySet(), e -> {
82  
    A snippet = e.getKey();
83  
    MultiSetMap<Int> msm = e.getValue();
84  
    
85  
    if (debug) print("snippet " + snippet);
86  
    for (int position, Set<Int> wordIndices : msm.data) {
87  
      if (debug) print("position " + position + ": " + l(wordIndices) + "/" + nRanges);
88  
      if (l(wordIndices) == nRanges)
89  
        ret snippet;
90  
    }
91  
    null;
92  
  }));
93  
}

Author comment

Began life as a copy of #1029005

download  show line numbers  debug dex  old transpilations   

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

No comments. add comment

Snippet ID: #1029012
Snippet name: deepDoubleWordIndex_lookupString
Eternal ID of this version: #1029012/39
Text MD5: cf27eeff711b4e705be3d43437d936ea
Transpilation MD5: a601c76df940a3b3873ec40bc0eca0a5
Author: stefan
Category: javax
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-07-17 00:20:08
Source code size: 3657 bytes / 93 lines
Pitched / IR pitched: No / No
Views / Downloads: 285 / 466
Version history: 38 change(s)
Referenced in: [show references]