Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

154
LINES

< > BotCompany Repo | #1029095 // Search from Compression Spike [dev.]

JavaX source code (Dynamic Module) [tags: use-pretranspiled] - run with: Stefan's OS

Uses 12245K of libraries. Click here for Pure Java version (9479L/47K).

1  
!7
2  
3  
cprint {
4  
  switchable S query = "world";
5  
    
6  
  transient LineCompedSingle<Char> lc;
7  
  transient LL<Int> productions;
8  
  transient Map<Int> prodLengths; // prod index -> length of production in characters
9  
  transient int iFirstPair, iFirstFile;
10  
  transient Map<Char, Int> literalIndex;
11  
  transient MultiMap<Int, IntPair> prodIndex; // keys: item looked for, value: pair(prod index, position in prod)
12  
  transient Map<Int, L<Int>> fileOffsetMap; // for every file index, character offsets
13  
  
14  
  start-thread {
15  
    dm_reloadOnFieldChange query();
16  
    new LineCompCompressor comp;
17  
    lc = lcString2(comp, "hello world world");
18  
    
19  
    // Turn pairs and file encodings into a single structure ("productions")
20  
    productions = new L;
21  
    productions.addAll(rep(null, l(lc.literals)));
22  
    iFirstPair = l(productions);
23  
    productions.addAll(intPairsToLists(lc.pairs));
24  
    iFirstFile = l(productions);
25  
    productions.add(lc.main);
26  
27  
    prodLengths = new Map;
28  
    fileOffsetMap = new Map;
29  
    for (int i = iFirstFile; i < l(productions); i++) {
30  
      L<Int> prod = productions.get(i);
31  
      new L<Int> l;
32  
      int ofs = 0;
33  
      for (int x : prod) {
34  
        l.add(ofs);
35  
        ofs += getProdLength(x);
36  
      }
37  
      fileOffsetMap.put(i, l);
38  
    }
39  
    
40  
    // build the reconstitution index
41  
    literalIndex = indexList(lc.literals);
42  
    prodIndex = new MultiMap;
43  
    for iProd over productions: {
44  
      L<Int> prod = productions.get(iProd);
45  
      for (int i, int x : unpair iterateListWithIndex(prod)) {
46  
        prodIndex.put(x, intPair(iProd, i));
47  
      }
48  
    }
49  
    
50  
    print(+literalIndex);
51  
    print(+prodIndex);
52  
    
53  
    L<Char> queryList = characters(query);
54  
    new L<Int> queryLiterals;
55  
    for (Char c : queryList) {
56  
      Int iLit = literalIndex.get(c);
57  
      if (iLit == null) ret with print("literal not found");
58  
      queryLiterals.add(iLit);
59  
    }
60  
    
61  
    print(+queryLiterals);
62  
63  
    print("Result", reconstitute(queryLiterals, 0));
64  
  }
65  
66  
  /*void calcProdLengths {
67  
    prodLengths = new Map;
68  
    for (int i = iFirstPair; i < l(productions); i++)
69  
      getProdLength(i);
70  
  }*/
71  
72  
  int getProdLength(int i) {
73  
    if (i < iFirstPair) ret 1; // literal
74  
    Int x = prodLengths.get(i);
75  
    if (x != null) ret x;
76  
    prodLengths.put(i, x = intSum(lmap getProdLength(productions.get(i))));
77  
    ret x;
78  
  }
79  
  
80  
  L<IntPair> reconstitute(L<Int> baseList, int i) {
81  
    L<Int> needLeft = subList(baseList, 0, i);
82  
    L<Int> needRight = subList(baseList, i+1);
83  
    ret reconstitute(baseList.get(i),
84  
      lcUncompressItems(lc, needLeft),
85  
      lcUncompressItems(lc, needRight));
86  
  }
87  
88  
  // returns list of (file index, character offset)
89  
  L<IntPair> reconstitute(int item, S needLeft, S needRight) {
90  
    print("Reconstituing " + quote(lcUncompressItem(lc, item)));
91  
    print("  needLeft: " + quote(needLeft));
92  
    print("  needRight: " + quote(needRight));
93  
    L<IntPair> l = prodIndex.get(item);
94  
    print("Found: " + l);
95  
    new L<IntPair> out;
96  
    search: for (IntPair p : l) {
97  
      int iProd = p.a;
98  
      L<Int> prod = productions.get(iProd);
99  
      int j = p.b;
100  
      bool isFile = iProd >= iFirstFile;
101  
      if (isFile) {
102  
        int iChar = fileOffsetMap.get(iProd).get(j);
103  
        print("Reached file level: " + iProd + "/" + iChar + " with " + quote(needLeft) + "/" + quote(needRight));
104  
105  
        // check left & right
106  
        if (nempty(needLeft)) fail(); // needLeft not used yet
107  
108  
        int iRight = j+1;
109  
        while (nempty(needRight)) {
110  
          if (j >= l(prod)) continue search; // need more, but have no more text
111  
          S rest = itemStartsWith(prod.get(iRight), needRight);
112  
          if (rest == null) continue search; // mismatch
113  
          needRight = rest;
114  
          ++iRight;
115  
        }
116  
117  
        // result found!
118  
        out.add(intPair(iProd, iChar));
119  
      } else { // we're on a pair, check left & right and then go higher up
120  
        if (j == 0) {
121  
          S rest = itemStartsWith(prod.get(1), needRight);
122  
          print("checking right: " + quote(lcUncompressItem(lc, prod.get(1)))
123  
            + " => " + quote(rest));
124  
          if (rest == null) continue;
125  
          out.addAll(reconstitute(iProd, needLeft, rest));
126  
        } else {
127  
          S rest = itemEndsWith(prod.get(0), needLeft);
128  
          print("checking left: " + quote(lcUncompressItem(lc, prod.get(0)))
129  
            + " => " + quote(rest));
130  
          if (rest == null) continue;
131  
          out.addAll(reconstitute(iProd, rest, needRight));
132  
        }
133  
      }
134  
    }
135  
    ret out;
136  
  }
137  
138  
  S itemStartsWith(int i, S need) {
139  
    if (empty(need)) ret need;
140  
    S have = lcUncompressItem(lc, i);
141  
    int l = lCommonPrefix(have, need);
142  
    if (l < l(need) && l < l(have)) null;
143  
    ret substring(need, l);
144  
  }
145  
  
146  
  S itemEndsWith(int i, S need) {
147  
    if (empty(need)) ret need;
148  
    S have = lcUncompressItem(lc, i);
149  
    int l = lCommonSuffix(have, need);
150  
    if (l < l(need) && l < l(have)) null;
151  
    ret dropLast(need, l);
152  
153  
  }
154  
}

download  show line numbers  debug dex  old transpilations   

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

No comments. add comment

Snippet ID: #1029095
Snippet name: Search from Compression Spike [dev.]
Eternal ID of this version: #1029095/33
Text MD5: a8dfbd7502773c978cfe6f9f664cc07e
Transpilation MD5: 156c30495a2b405be0c5e7d5efcf91de
Author: stefan
Category:
Type: JavaX source code (Dynamic Module)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-07-19 15:20:12
Source code size: 4913 bytes / 154 lines
Pitched / IR pitched: No / No
Views / Downloads: 122 / 2352
Version history: 32 change(s)
Referenced in: [show references]