Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

97
LINES

< > BotCompany Repo | #1029016 // deepDoubleWordIndex_lookupString_withPositions

JavaX fragment (include) [tags: use-pretranspiled]

Libraryless. Click here for Pure Java version (4384L/26K).

// returns a prefiltered list of elements; you still need to do a
// full-text search on those.
// If it returns null, you have to search all elements
static <A> Iterable<Pair<A, Cl<Int>>> deepDoubleWordIndex_lookupString_withPositions(DoubleWordIndex<WithIntArray<A>> index, S query, O... _) {
  optPar bool debug;
  L<IntRange> ranges = index.wordRanges(query);
  if (empty(ranges)) null;
  int nRanges = l(ranges);
  new Map<A, MultiSetMap<Int>> theMap; // for every snippet, a map of string position to word index

  // special case, just a single word in query
  if (l(ranges) == 1 && first(ranges).start == 0 && first(ranges).end == l(query)) {
    ret nestedIterator(containingIC(index.index1.words(), query), fullWord -> {
      int ofs = l(fullWord)-l(query);
      ret mapI_notNulls(index.index1.get(fullWord), entry -> {
        L<Int> positions = null;
        for (int i : entry.array) {
          int idx = i-ofs;
          if (idx >= 0) {
            if (positions == null) positions = new L;
            positions.add(idx);
          }
        }
        if (positions != null)
          ret pair(entry!, positions);
        null;
      });
    });
  }
  
  for iWord over ranges: { // go through words in query
    IntRange r = ranges.get(iWord);
    S word = substring(query, r);
    Cl<S> l; // all matching words in index
    WordIndex<WithIntArray<A>> indexToUse = index.index1;
      
    if (r.start == 0) { // look for ending of word - use reverse index
      l = prefixSubSet(index.index2.words(), reversed(word));
      if (empty(l)) ret emptyList();
      if (debug) print("word=" + word + ", fullWords=" + l);
      
      // special loop that accounts for length of actual word
      for (S fullWord : l)
        for (WithIntArray<A> entry : index.index2.index.get(fullWord)) {
          MultiSetMap<Int> msm = theMap.get(entry!);
          if (msm == null) theMap.put(entry!, msm = new MultiSetMap);
          int ofs = l(fullWord)-l(word)-r.start;
          for (int i : entry.array) {
            int idx = i+ofs;
            if (debug) print("Got idx " + idx);
            if (idx >= 0)
              msm.put(idx, iWord);
          }
        }
      continue;
    } else if (r.end == l(query)) { // look for start of word
      l = prefixSubSet(index.index1.words(), word);
    } else // look for complete word
      l = ll(word);
      
    if (empty(l)) ret emptyList();
    
    if (debug) print("word=" + word + ", fullWords=" + l);
      
    for (S fullWord : l)
      for (WithIntArray<A> entry : indexToUse.index.get(fullWord)) {
        if (debug) print("Got entry " + entry);
        MultiSetMap<Int> msm = theMap.get(entry!);
        if (msm == null) theMap.put(entry!, msm = new MultiSetMap);
        for (int i : entry.array) {
          int idx = i-r.start;
          if (debug) print("Got idx " + idx);
          if (idx >= 0)
            msm.put(idx, iWord);
        }
      }
  }
  
  /*if (debug)*/ print("theMap size=" + l(theMap));
  ret mapI_nonNulls_if1(theMap.entrySet(), e -> {
    A snippet = e.getKey();
    MultiSetMap<Int> msm = e.getValue();
    
    if (debug) print("snippet " + snippet);
    L<Int> positions = null;
    for (int position, Set<Int> wordIndices : msm.data) {
      if (debug) print("position " + position + ": " + l(wordIndices) + "/" + nRanges);
      if (l(wordIndices) == nRanges) {
        if (positions == null) positions = new L;
        positions.add(position);
      }
    }
    if (positions != null)
      ret pair(snippet, positions);
    null;
  });
}

Author comment

Began life as a copy of #1029012

download  show line numbers  debug dex  old transpilations   

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

No comments. add comment

Snippet ID: #1029016
Snippet name: deepDoubleWordIndex_lookupString_withPositions
Eternal ID of this version: #1029016/10
Text MD5: 281342e7b9f46b27f268a2b90a78a845
Transpilation MD5: e6cee25de9098bc53f327e18a6eb063f
Author: stefan
Category: javax
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-07-16 23:12:24
Source code size: 3639 bytes / 97 lines
Pitched / IR pitched: No / No
Views / Downloads: 147 / 228
Version history: 9 change(s)
Referenced in: [show references]