Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

63
LINES

< > BotCompany Repo | #1012023 // reTok_multi - reTok multiple token ranges efficiently

JavaX fragment (include) [tags: use-pretranspiled]

Transpiled version (2678L) is out of date.

1  
static LS reTok_multi(LS tok, L<IntRange> places) {
2  
  if (empty(places)) ret tok;
3  
  if (l(places) == 1) ret reTok(tok, first(places));
4  
  L<S> orig = cloneList(tok); // copy to orig
5  
  
6  
  // sort, extend & merge ranges
7  
  sortIntRangesInPlace(places);
8  
  new L<IntRange> places2;
9  
  for (IntRange p : places) {
10  
    p = intRange(p.start & ~1, p.end | 1); // extend to N-to-N
11  
    if (nempty(places2) && p.start <= last(places2).end)
12  
      last(places2).end = p.end; // merge if overlapping
13  
    else
14  
      places2.add(p);
15  
  }
16  
  
17  
  ifdef reTok_multi_debug
18  
    printStruct("places: ", places2);
19  
  endifdef
20  
 
21  
  int iPlace = 0, n = l(orig);
22  
  IntRange p = get(places2, iPlace);
23  
24  
  int next = p.start, i = next;
25  
  tok.subList(next, tok.size()).clear();
26  
  while (i < n)
27  
    if (i < next)
28  
      tok.add(orig.get(i++));
29  
    else {
30  
      int j = p.end;
31  
      
32  
      S s = joinSubList(orig, i, j);
33  
      ifdef reTok_multi_debug
34  
        printStruct("retokking: ", s);
35  
      endifdef
36  
37  
      tok.addAll(javaTok(s));
38  
      i = j;
39  
      p = get(places2, ++iPlace);
40  
      if (p == null) break;
41  
      next = p.start;
42  
    }
43  
    
44  
  while (i < n)
45  
    tok.add(orig.get(i++));
46  
    
47  
  ifdef reTok_multi_check
48  
    LS correct = javaTok(join(orig));
49  
    if (neq(correct, tok)) {
50  
      n = min(l(correct), l(tok));
51  
      if (l(correct) != l(tok)) print("reTok_multi_check: size difference " + l(correct) + " / " + l(tok));
52  
      for ii to n:
53  
        if (!eq(tok.get(ii), correct.get(ii))) {
54  
          for (int j = max(0, ii-1); j < min(n, ii+1); j++)
55  
            print("reTok_multi_check diff @ " + j + "/" + n + ": " + quote(correct.get(j)) + " / " + quote(tok.get(j)));
56  
          break;
57  
        }
58  
      fail("reTok_multi_check");
59  
    }
60  
  endifdef
61  
62  
  ret tok;
63  
}

Author comment

Began life as a copy of #1003367

download  show line numbers  debug dex  old transpilations   

Travelled to 13 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt

No comments. add comment

Snippet ID: #1012023
Snippet name: reTok_multi - reTok multiple token ranges efficiently
Eternal ID of this version: #1012023/20
Text MD5: 401f70b190eb68515a7f9f6676c4c09a
Author: stefan
Category: javax
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2021-06-04 22:09:13
Source code size: 1789 bytes / 63 lines
Pitched / IR pitched: No / No
Views / Downloads: 372 / 555
Version history: 19 change(s)
Referenced in: [show references]