Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

118
LINES

< > BotCompany Repo | #1029277 // UkkonenIntSuffixTree [ints instead of chars]

JavaX fragment (include) [tags: use-pretranspiled]

Libraryless. Click here for Pure Java version (2592L/17K).

1  
// from https://gist.github.com/bicepjai/3355993#file-gistfile1-java-L147
2  
3  
/*
4  
 * Copyright (c) 2016 Sergey Makagonov
5  
 * 
6  
 * Permission is hereby granted, free of charge, to any person obtaining
7  
 * a copy of this software and associated documentation files (the
8  
 * "Software"), to deal in the Software without restriction, including
9  
 * without limitation the rights to use, copy, modify, merge, publish,
10  
 * distribute, sublicense, and/or sell copies of the Software, and to
11  
 * permit persons to whom the Software is furnished to do so, subject to
12  
 * the following conditions:
13  
 * 
14  
 * The above copyright notice and this permission notice shall be
15  
 * included in all copies or substantial portions of the Software.
16  
 * 
17  
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18  
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19  
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20  
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21  
 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22  
 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23  
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24  
 * 
25  
 */
26  
 
27  
sclass UkkonenIntSuffixTree {
28  
  static final int oo = Integer.MAX_VALUE/2;
29  
  Node [] nodes;
30  
  int [] text;
31  
  int root, position = -1, currentNode, needSuffixLink, remainder;
32  
33  
  int active_node, active_length, active_edge;
34  
35  
  sclass Node {
36  
    int start, end = oo, link;
37  
    public new TreeMap<Int> next;
38  
39  
    *(int *start, int *end) {}
40  
41  
    public int edgeLength(UkkonenIntSuffixTree tree) {
42  
      ret Math.min(end, tree.position + 1) - start;
43  
    }
44  
  }
45  
46  
  *(int length) {
47  
    nodes = new Node[2*length+2];
48  
    text = new int[length];
49  
    root = active_node = newNode(-1, -1);
50  
  }
51  
  
52  
  *(L<Int> symbols) {
53  
    this(l(symbols));
54  
    for (int i : symbols)
55  
      addChar(i);
56  
  }
57  
58  
  private void addSuffixLink(int node) {
59  
      if (needSuffixLink > 0)
60  
          nodes[needSuffixLink].link = node;
61  
      needSuffixLink = node;
62  
  }
63  
64  
  int active_edge() {
65  
      return text[active_edge];
66  
  }
67  
68  
  boolean walkDown(int next) {
69  
      if (active_length >= nodes[next].edgeLength(this)) {
70  
          active_edge += nodes[next].edgeLength(this);
71  
          active_length -= nodes[next].edgeLength(this);
72  
          active_node = next;
73  
          return true;
74  
      }
75  
      return false;
76  
  }
77  
78  
  int newNode(int start, int end) {
79  
      nodes[++currentNode] = new Node(start, end);
80  
      return currentNode;
81  
  }
82  
83  
  public void addChar(int c) {
84  
      text[++position] = c;
85  
      needSuffixLink = -1;
86  
      remainder++;
87  
      while(remainder > 0) {
88  
          if (active_length == 0) active_edge = position;
89  
          if (!nodes[active_node].next.containsKey(active_edge())){
90  
              int leaf = newNode(position, oo);
91  
              nodes[active_node].next.put(active_edge(), leaf);
92  
              addSuffixLink(active_node); //rule 2
93  
          } else {
94  
              int next = nodes[active_node].next.get(active_edge());
95  
              if (walkDown(next)) continue;   //observation 2
96  
              if (text[nodes[next].start + active_length] == c) { //observation 1
97  
                  active_length++;
98  
                  addSuffixLink(active_node); // observation 3
99  
                  break;
100  
              }
101  
              int split = newNode(nodes[next].start, nodes[next].start + active_length);
102  
              nodes[active_node].next.put(active_edge(), split);
103  
              int leaf = newNode(position, oo);
104  
              nodes[split].next.put(c, leaf);
105  
              nodes[next].start += active_length;
106  
              nodes[split].next.put(text[nodes[next].start], next);
107  
              addSuffixLink(split); //rule 2
108  
          }
109  
          remainder--;
110  
111  
          if (active_node == root && active_length > 0) {  //rule 1
112  
              active_length--;
113  
              active_edge = position - remainder + 1;
114  
          } else
115  
              active_node = nodes[active_node].link > 0 ? nodes[active_node].link : root; //rule 3
116  
      }
117  
  }
118  
}

Author comment

Began life as a copy of #1029275

download  show line numbers  debug dex  old transpilations   

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

No comments. add comment

Snippet ID: #1029277
Snippet name: UkkonenIntSuffixTree [ints instead of chars]
Eternal ID of this version: #1029277/5
Text MD5: a202d713402f67d87dc4d0fc0f8a3437
Transpilation MD5: cdac3d0fd4e4565bfa653fe0e62a1ae6
Author: stefan
Category: javax / suffix trees
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-07-29 20:10:08
Source code size: 4140 bytes / 118 lines
Pitched / IR pitched: No / No
Views / Downloads: 263 / 537
Version history: 4 change(s)
Referenced in: [show references]