Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

112
LINES

< > BotCompany Repo | #1029275 // UkkonenSuffixTree

JavaX fragment (include) [tags: use-pretranspiled]

Libraryless. Click here for Pure Java version (139L/2K).

1  
// from https://gist.github.com/bicepjai/3355993#file-gistfile1-java-L147
2  
3  
/*
4  
 * Copyright (c) 2016 Sergey Makagonov
5  
 * 
6  
 * Permission is hereby granted, free of charge, to any person obtaining
7  
 * a copy of this software and associated documentation files (the
8  
 * "Software"), to deal in the Software without restriction, including
9  
 * without limitation the rights to use, copy, modify, merge, publish,
10  
 * distribute, sublicense, and/or sell copies of the Software, and to
11  
 * permit persons to whom the Software is furnished to do so, subject to
12  
 * the following conditions:
13  
 * 
14  
 * The above copyright notice and this permission notice shall be
15  
 * included in all copies or substantial portions of the Software.
16  
 * 
17  
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18  
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19  
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20  
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21  
 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22  
 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23  
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24  
 * 
25  
 */
26  
 
27  
sclass UkkonenSuffixTree {
28  
  static final int oo = Integer.MAX_VALUE/2; // TODO: Why /2?
29  
  Node [] nodes;
30  
  char [] text;
31  
  int root, position = -1, currentNode, needSuffixLink, remainder;
32  
33  
  int active_node, active_length, active_edge;
34  
35  
  sclass Node {
36  
    int start, end = oo, link;
37  
    public TreeMap<Character, Integer> next = new TreeMap<Character, Integer>();
38  
39  
    *(int *start, int *end) {}
40  
41  
    public int edgeLength(UkkonenSuffixTree tree) {
42  
      ret Math.min(end, tree.position + 1) - start;
43  
    }
44  
  }
45  
46  
  *(int length) {
47  
    nodes = new Node[2*length+2];
48  
    text = new char[length];
49  
    root = active_node = newNode(-1, -1);
50  
  }
51  
52  
  private void addSuffixLink(int node) {
53  
      if (needSuffixLink > 0)
54  
          nodes[needSuffixLink].link = node;
55  
      needSuffixLink = node;
56  
  }
57  
58  
  char active_edge() {
59  
      return text[active_edge];
60  
  }
61  
62  
  boolean walkDown(int next) {
63  
      if (active_length >= nodes[next].edgeLength(this)) {
64  
          active_edge += nodes[next].edgeLength(this);
65  
          active_length -= nodes[next].edgeLength(this);
66  
          active_node = next;
67  
          return true;
68  
      }
69  
      return false;
70  
  }
71  
72  
  int newNode(int start, int end) {
73  
      nodes[++currentNode] = new Node(start, end);
74  
      return currentNode;
75  
  }
76  
77  
  public void addChar(char c) {
78  
      text[++position] = c;
79  
      needSuffixLink = -1;
80  
      remainder++;
81  
      while(remainder > 0) {
82  
          if (active_length == 0) active_edge = position;
83  
          if (!nodes[active_node].next.containsKey(active_edge())){
84  
              int leaf = newNode(position, oo);
85  
              nodes[active_node].next.put(active_edge(), leaf);
86  
              addSuffixLink(active_node); //rule 2
87  
          } else {
88  
              int next = nodes[active_node].next.get(active_edge());
89  
              if (walkDown(next)) continue;   //observation 2
90  
              if (text[nodes[next].start + active_length] == c) { //observation 1
91  
                  active_length++;
92  
                  addSuffixLink(active_node); // observation 3
93  
                  break;
94  
              }
95  
              int split = newNode(nodes[next].start, nodes[next].start + active_length);
96  
              nodes[active_node].next.put(active_edge(), split);
97  
              int leaf = newNode(position, oo);
98  
              nodes[split].next.put(c, leaf);
99  
              nodes[next].start += active_length;
100  
              nodes[split].next.put(text[nodes[next].start], next);
101  
              addSuffixLink(split); //rule 2
102  
          }
103  
          remainder--;
104  
105  
          if (active_node == root && active_length > 0) {  //rule 1
106  
              active_length--;
107  
              active_edge = position - remainder + 1;
108  
          } else
109  
              active_node = nodes[active_node].link > 0 ? nodes[active_node].link : root; //rule 3
110  
      }
111  
  }
112  
}

Author comment

Began life as a copy of #1029274

download  show line numbers  debug dex  old transpilations   

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

No comments. add comment

Snippet ID: #1029275
Snippet name: UkkonenSuffixTree
Eternal ID of this version: #1029275/3
Text MD5: 67c01b3997a871bb5d85c32ab5f042c4
Transpilation MD5: 345d8fd26c4166c2858c09fb0ff2ad3c
Author: stefan
Category: javax / suffix trees
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-07-29 19:51:04
Source code size: 4101 bytes / 112 lines
Pitched / IR pitched: No / No
Views / Downloads: 127 / 192
Version history: 2 change(s)
Referenced in: [show references]