Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

123
LINES

< > BotCompany Repo | #1029276 // UkkonenIntSuffixTree [ints instead of chars, compact version, dev.]

JavaX fragment (include) [tags: use-pretranspiled]

Libraryless. Compilation Failed (15002L/102K).

1  
// from https://gist.github.com/bicepjai/3355993#file-gistfile1-java-L147
2  
3  
/*
4  
 * Copyright (c) 2016 Sergey Makagonov
5  
 * 
6  
 * Permission is hereby granted, free of charge, to any person obtaining
7  
 * a copy of this software and associated documentation files (the
8  
 * "Software"), to deal in the Software without restriction, including
9  
 * without limitation the rights to use, copy, modify, merge, publish,
10  
 * distribute, sublicense, and/or sell copies of the Software, and to
11  
 * permit persons to whom the Software is furnished to do so, subject to
12  
 * the following conditions:
13  
 * 
14  
 * The above copyright notice and this permission notice shall be
15  
 * included in all copies or substantial portions of the Software.
16  
 * 
17  
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18  
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19  
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20  
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21  
 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22  
 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23  
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24  
 * 
25  
 */
26  
 
27  
sclass UkkonenIntSuffixTree {
28  
  Node[] nodes;
29  
  int[] text;
30  
  int root, position = -1, currentNode, needSuffixLink, remainder;
31  
32  
  int active_node, active_length, active_edge;
33  
  
34  
  static final int oo = Integer.MAX_VALUE/2; // TODO: Why /2?
35  
  Comparator<Int> childComparator = (a, b) -> nodes[a].firstCharOrMinus1(this)-nodes[b].firstCharOrMinus1(this);
36  
 
37  
38  
  sclass Node {
39  
    int start, end = oo, link;
40  
    public CompactTreeSet<Int> next = new(childComparator());
41  
42  
    *(int *start, int *end) {}
43  
44  
    public int edgeLength(UkkonenSuffixTree tree) {
45  
      ret Math.min(end, tree.position + 1) - start;
46  
    }
47  
    
48  
    CompactTreeSet<Int> makeEmptyChildrenSet(UkkonenIntSuffixTree tree) {
49  
      ret new CompactTreeSet<>(tree.childComparator);
50  
    }
51  
    
52  
    public int firstCharOrMinus1(IntSuffixTree tree) {
53  
      ret tree.fullText[from);
54  
    }
55  
  }
56  
57  
  *(int length) {
58  
    nodes = new Node[2*length+2];
59  
    text = new char[length];
60  
    root = active_node = newNode(-1, -1);
61  
  }
62  
63  
  private void addSuffixLink(int node) {
64  
      if (needSuffixLink > 0)
65  
          nodes[needSuffixLink].link = node;
66  
      needSuffixLink = node;
67  
  }
68  
69  
  char active_edge() {
70  
      return text[active_edge];
71  
  }
72  
73  
  boolean walkDown(int next) {
74  
      if (active_length >= nodes[next].edgeLength(this)) {
75  
          active_edge += nodes[next].edgeLength(this);
76  
          active_length -= nodes[next].edgeLength(this);
77  
          active_node = next;
78  
          return true;
79  
      }
80  
      return false;
81  
  }
82  
83  
  int newNode(int start, int end) {
84  
      nodes[++currentNode] = new Node(start, end);
85  
      return currentNode;
86  
  }
87  
88  
  public void addChar(char c) {
89  
      text[++position] = c;
90  
      needSuffixLink = -1;
91  
      remainder++;
92  
      while(remainder > 0) {
93  
          if (active_length == 0) active_edge = position;
94  
          if (!nodes[active_node].next.containsKey(active_edge())){
95  
              int leaf = newNode(position, oo);
96  
              nodes[active_node].next.put(active_edge(), leaf);
97  
              addSuffixLink(active_node); //rule 2
98  
          } else {
99  
              int next = nodes[active_node].next.get(active_edge());
100  
              if (walkDown(next)) continue;   //observation 2
101  
              if (text[nodes[next].start + active_length] == c) { //observation 1
102  
                  active_length++;
103  
                  addSuffixLink(active_node); // observation 3
104  
                  break;
105  
              }
106  
              int split = newNode(nodes[next].start, nodes[next].start + active_length);
107  
              nodes[active_node].next.put(active_edge(), split);
108  
              int leaf = newNode(position, oo);
109  
              nodes[split].next.put(c, leaf);
110  
              nodes[next].start += active_length;
111  
              nodes[split].next.put(text[nodes[next].start], next);
112  
              addSuffixLink(split); //rule 2
113  
          }
114  
          remainder--;
115  
116  
          if (active_node == root && active_length > 0) {  //rule 1
117  
              active_length--;
118  
              active_edge = position - remainder + 1;
119  
          } else
120  
              active_node = nodes[active_node].link > 0 ? nodes[active_node].link : root; //rule 3
121  
      }
122  
  }
123  
}

Author comment

Began life as a copy of #1029275

download  show line numbers  debug dex  old transpilations   

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

No comments. add comment

Snippet ID: #1029276
Snippet name: UkkonenIntSuffixTree [ints instead of chars, compact version, dev.]
Eternal ID of this version: #1029276/1
Text MD5: 3feab062968fc13a6086cda8815af3b8
Transpilation MD5: fd2ad803444444b4b0824b0a436a7e46
Author: stefan
Category: javax / suffix trees
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-07-29 19:59:47
Source code size: 4447 bytes / 123 lines
Pitched / IR pitched: No / No
Views / Downloads: 195 / 271
Referenced in: [show references]