Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

154
LINES

< > BotCompany Repo | #1029098 // LCSingleSearcher_v1 - probably slow, but works [tested up until length 7]

JavaX fragment (include) [tags: use-pretranspiled]

Uses 11335K of libraries. Click here for Pure Java version (9199L/59K).

1  
sclass LCSingleSearcher_v1 {
2  
  S text;
3  
  bool debug;
4  
  transient LineCompedSingle<Char> lc;
5  
  transient LL<Int> productions;
6  
  transient Map<Int> prodLengths; // prod index -> length of production in characters
7  
  transient int iFirstPair, iFirstFile;
8  
  transient Map<Char, Int> literalIndex;
9  
  transient MultiMap<Int, IntPair> prodIndex; // keys: item looked for, value: pair(prod index, position in prod)
10  
  transient Map<Int, L<Int>> fileOffsetMap; // for every file index, character offsets
11  
  transient bool prepared;
12  
  
13  
  void prepare {
14  
    ret unless set prepared;
15  
    
16  
    lc = lcString2(text);
17  
    
18  
    // Turn pairs and file encodings into a single structure ("productions")
19  
    productions = new L;
20  
    productions.addAll(rep(null, l(lc.literals)));
21  
    iFirstPair = l(productions);
22  
    productions.addAll(intPairsToLists(lc.pairs));
23  
    iFirstFile = l(productions);
24  
    productions.add(lc.main);
25  
26  
    prodLengths = new Map;
27  
    fileOffsetMap = new Map;
28  
    for (int i = iFirstFile; i < l(productions); i++) {
29  
      L<Int> prod = productions.get(i);
30  
      new L<Int> l;
31  
      int ofs = 0;
32  
      for (int x : prod) {
33  
        l.add(ofs);
34  
        ofs += getProdLength(x);
35  
      }
36  
      fileOffsetMap.put(i, l);
37  
    }
38  
    
39  
    // build the reconstitution index
40  
    literalIndex = indexList(lc.literals);
41  
    prodIndex = new MultiMap;
42  
    for iProd over productions: {
43  
      L<Int> prod = productions.get(iProd);
44  
      for (int i, int x : unpair iterateListWithIndex(prod)) {
45  
        prodIndex.put(x, intPair(iProd, i));
46  
      }
47  
    }
48  
    
49  
    if (debug) print(+literalIndex);
50  
    if (debug) print(+prodIndex);
51  
  }
52  
  
53  
  L<Int> search(S query) {
54  
    prepare();
55  
    
56  
    L<Char> queryList = characters(query);
57  
    new L<Int> queryLiterals;
58  
    for (Char c : queryList) {
59  
      Int iLit = literalIndex.get(c);
60  
      if (iLit == null) {
61  
        if (debug) print("literal not found");
62  
        ret emptyList();
63  
      }
64  
      queryLiterals.add(iLit);
65  
    }
66  
    
67  
    if (debug) print(+queryLiterals);
68  
69  
    ret intPairsB(print("Result", reconstitute(queryLiterals)));
70  
  }
71  
72  
  int getProdLength(int i) {
73  
    if (i < iFirstPair) ret 1; // literal
74  
    Int x = prodLengths.get(i);
75  
    if (x != null) ret x;
76  
    prodLengths.put(i, x = intSum(lmap getProdLength(productions.get(i))));
77  
    ret x;
78  
  }
79  
  
80  
  L<IntPair> reconstitute(L<Int> baseList) {
81  
    L<Int> needRight = subList(baseList, 1);
82  
    ret reconstitute(first(baseList),
83  
      "",
84  
      lcUncompressItems(lc, needRight));
85  
  }
86  
87  
  // returns list of (file index, character offset)
88  
  L<IntPair> reconstitute(int item, S needLeft, S needRight) {
89  
    if (debug) print("Reconstituting " + quote(lcUncompressItem(lc, item)));
90  
    if (debug) print("  needLeft: " + quote(needLeft));
91  
    if (debug) print("  needRight: " + quote(needRight));
92  
    L<IntPair> l = prodIndex.get(item);
93  
    if (debug) print("Found: " + l);
94  
    new L<IntPair> out;
95  
    search: for (IntPair p : l) {
96  
      int iProd = p.a;
97  
      L<Int> prod = productions.get(iProd);
98  
      int j = p.b;
99  
      bool isFile = iProd >= iFirstFile;
100  
      if (isFile) {
101  
        int iChar = fileOffsetMap.get(iProd).get(j);
102  
        if (debug) print("Reached file level: " + iProd + "/" + iChar + " with " + quote(needLeft) + "/" + quote(needRight));
103  
104  
        // check left & right
105  
        if (nempty(needLeft)) fail(); // needLeft not used yet (or is it?)
106  
107  
        int iRight = j+1;
108  
        S rest = needRight;
109  
        while (nempty(rest)) {
110  
          if (iRight >= l(prod)) continue search; // need more, but have no more text
111  
          rest = itemStartsWith(prod.get(iRight), rest);
112  
          if (rest == null) continue search; // mismatch
113  
          ++iRight;
114  
        }
115  
116  
        // result found!
117  
        out.add(intPair(iProd, iChar));
118  
      } else { // we're on a pair, check left & right and then go higher up
119  
        if (j == 0) {
120  
          S rest = itemStartsWith(prod.get(1), needRight);
121  
          if (debug) print("checking right: " + quote(lcUncompressItem(lc, prod.get(1)))
122  
            + " => " + quote(rest));
123  
          if (rest == null) continue;
124  
          out.addAll(reconstitute(iProd, needLeft, rest));
125  
        } else {
126  
          S rest = itemEndsWith(prod.get(0), needLeft);
127  
          if (debug) print("checking left: " + quote(lcUncompressItem(lc, prod.get(0)))
128  
            + " => " + quote(rest));
129  
          if (rest == null) continue;
130  
          int leftLength = getProdLength(prod.get(0));
131  
          for (IntPair result : reconstitute(iProd, rest, needRight))
132  
            out.add(intPair(result.a, result.b+leftLength));
133  
        }
134  
      }
135  
    }
136  
    ret out;
137  
  }
138  
139  
  S itemStartsWith(int i, S need) {
140  
    if (empty(need)) ret need;
141  
    S have = lcUncompressItem(lc, i);
142  
    int l = lCommonPrefix(have, need);
143  
    if (l < l(need) && l < l(have)) null;
144  
    ret substring(need, l);
145  
  }
146  
  
147  
  S itemEndsWith(int i, S need) {
148  
    if (empty(need)) ret need;
149  
    S have = lcUncompressItem(lc, i);
150  
    int l = lCommonSuffix(have, need);
151  
    if (l < l(need) && l < l(have)) null;
152  
    ret dropLast(need, l);
153  
  }
154  
}

download  show line numbers  debug dex  old transpilations   

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

No comments. add comment

Snippet ID: #1029098
Snippet name: LCSingleSearcher_v1 - probably slow, but works [tested up until length 7]
Eternal ID of this version: #1029098/19
Text MD5: 890dfe63c0c30e923d518fd511dea419
Transpilation MD5: 16c841f2c71c6de6e776c7e9d48065c6
Author: stefan
Category: javax
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-07-20 13:09:49
Source code size: 5163 bytes / 154 lines
Pitched / IR pitched: No / No
Views / Downloads: 207 / 507
Version history: 18 change(s)
Referenced in: [show references]