Download Jar. Libraryless. Click here for Pure Java version (2973L/20K).
1 | // from https://gist.github.com/bicepjai/3355993#file-gistfile1-java-L147 |
2 | |
3 | /* |
4 | * Copyright (c) 2016 Sergey Makagonov |
5 | * |
6 | * Permission is hereby granted, free of charge, to any person obtaining |
7 | * a copy of this software and associated documentation files (the |
8 | * "Software"), to deal in the Software without restriction, including |
9 | * without limitation the rights to use, copy, modify, merge, publish, |
10 | * distribute, sublicense, and/or sell copies of the Software, and to |
11 | * permit persons to whom the Software is furnished to do so, subject to |
12 | * the following conditions: |
13 | * |
14 | * The above copyright notice and this permission notice shall be |
15 | * included in all copies or substantial portions of the Software. |
16 | * |
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
18 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
19 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
20 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE |
21 | * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
22 | * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
23 | * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
24 | * |
25 | */ |
26 | |
27 | do not include class SuffixTree. |
28 | |
29 | p { new ST; } |
30 | |
31 | sclass ST { |
32 | BufferedReader in; |
33 | PrintWriter out; |
34 | |
35 | class SuffixTree { |
36 | final int oo = Integer.MAX_VALUE/2; |
37 | Node [] nodes; |
38 | char [] text; |
39 | int root, position = -1, |
40 | currentNode, |
41 | needSuffixLink, |
42 | remainder; |
43 | |
44 | int active_node, active_length, active_edge; |
45 | |
46 | class Node { |
47 | |
48 | /* |
49 | There is no need to create an "Edge" class. |
50 | Information about the edge is stored right in the node. |
51 | [start; end) interval specifies the edge, |
52 | by which the node is connected to its parent node. |
53 | */ |
54 | |
55 | int start, end = oo, link; |
56 | public TreeMap<Character, Integer> next = new TreeMap<Character, Integer>(); |
57 | |
58 | public Node(int start, int end) { |
59 | this.start = start; |
60 | this.end = end; |
61 | } |
62 | |
63 | public int edgeLength() { |
64 | return Math.min(end, position + 1) - start; |
65 | } |
66 | } |
67 | |
68 | public SuffixTree(int length) { |
69 | nodes = new Node[2* length + 2]; |
70 | text = new char[length]; |
71 | root = active_node = newNode(-1, -1); |
72 | } |
73 | |
74 | private void addSuffixLink(int node) { |
75 | if (needSuffixLink > 0) |
76 | nodes[needSuffixLink].link = node; |
77 | needSuffixLink = node; |
78 | } |
79 | |
80 | char active_edge() { |
81 | return text[active_edge]; |
82 | } |
83 | |
84 | boolean walkDown(int next) { |
85 | if (active_length >= nodes[next].edgeLength()) { |
86 | active_edge += nodes[next].edgeLength(); |
87 | active_length -= nodes[next].edgeLength(); |
88 | active_node = next; |
89 | return true; |
90 | } |
91 | return false; |
92 | } |
93 | |
94 | int newNode(int start, int end) { |
95 | nodes[++currentNode] = new Node(start, end); |
96 | return currentNode; |
97 | } |
98 | |
99 | public void addChar(char c) throws Exception { |
100 | text[++position] = c; |
101 | needSuffixLink = -1; |
102 | remainder++; |
103 | while(remainder > 0) { |
104 | if (active_length == 0) active_edge = position; |
105 | if (!nodes[active_node].next.containsKey(active_edge())){ |
106 | int leaf = newNode(position, oo); |
107 | nodes[active_node].next.put(active_edge(), leaf); |
108 | addSuffixLink(active_node); //rule 2 |
109 | } else { |
110 | int next = nodes[active_node].next.get(active_edge()); |
111 | if (walkDown(next)) continue; //observation 2 |
112 | if (text[nodes[next].start + active_length] == c) { //observation 1 |
113 | active_length++; |
114 | addSuffixLink(active_node); // observation 3 |
115 | break; |
116 | } |
117 | int split = newNode(nodes[next].start, nodes[next].start + active_length); |
118 | nodes[active_node].next.put(active_edge(), split); |
119 | int leaf = newNode(position, oo); |
120 | nodes[split].next.put(c, leaf); |
121 | nodes[next].start += active_length; |
122 | nodes[split].next.put(text[nodes[next].start], next); |
123 | addSuffixLink(split); //rule 2 |
124 | } |
125 | remainder--; |
126 | |
127 | if (active_node == root && active_length > 0) { //rule 1 |
128 | active_length--; |
129 | active_edge = position - remainder + 1; |
130 | } else |
131 | active_node = nodes[active_node].link > 0 ? nodes[active_node].link : root; //rule 3 |
132 | } |
133 | } |
134 | |
135 | /* |
136 | printing the Suffix Tree in a format understandable by graphviz. The output is written into |
137 | st.dot file. In order to see the suffix tree as a PNG image, run the following command: |
138 | dot -Tpng -O st.dot |
139 | */ |
140 | |
141 | String edgeString(int node) { |
142 | return new String(Arrays.copyOfRange(text, nodes[node].start, Math.min(position + 1, nodes[node].end))); |
143 | } |
144 | |
145 | void printTree() { |
146 | out.println("digraph {"); |
147 | out.println("\trankdir = LR;"); |
148 | out.println("\tedge [arrowsize=0.4,fontsize=10]"); |
149 | out.println("\tnode1 [label=\"\",style=filled,fillcolor=lightgrey,shape=circle,width=.1,height=.1];"); |
150 | out.println("//------leaves------"); |
151 | printLeaves(root); |
152 | out.println("//------internal nodes------"); |
153 | printInternalNodes(root); |
154 | out.println("//------edges------"); |
155 | printEdges(root); |
156 | out.println("//------suffix links------"); |
157 | printSLinks(root); |
158 | out.println("}"); |
159 | } |
160 | |
161 | void printLeaves(int x) { |
162 | if (nodes[x].next.size() == 0) |
163 | out.println("\tnode"+x+" [label=\"\",shape=point]"); |
164 | else { |
165 | for (int child : nodes[x].next.values()) |
166 | printLeaves(child); |
167 | } |
168 | } |
169 | |
170 | void printInternalNodes(int x) { |
171 | if (x != root && nodes[x].next.size() > 0) |
172 | out.println("\tnode"+x+" [label=\"\",style=filled,fillcolor=lightgrey,shape=circle,width=.07,height=.07]"); |
173 | |
174 | for (int child : nodes[x].next.values()) |
175 | printInternalNodes(child); |
176 | } |
177 | |
178 | void printEdges(int x) { |
179 | for (int child : nodes[x].next.values()) { |
180 | out.println("\tnode"+x+" -> node"+child+" [label=\""+edgeString(child)+"\",weight=3]"); |
181 | printEdges(child); |
182 | } |
183 | } |
184 | |
185 | void printSLinks(int x) { |
186 | if (nodes[x].link > 0) |
187 | out.println("\tnode"+x+" -> node"+nodes[x].link+" [label=\"\",weight=1,style=dotted]"); |
188 | for (int child : nodes[x].next.values()) |
189 | printSLinks(child); |
190 | } |
191 | } |
192 | |
193 | public ST() throws Exception { |
194 | in = new BufferedReader(new StringReader("abc hello abc")); |
195 | File f = prepareProgramFile("st.dot"); |
196 | out = new PrintWriter(new FileWriter(f)); |
197 | String line = in.readLine(); |
198 | SuffixTree st = new SuffixTree(line.length()); |
199 | for (int i = 0; i < line.length(); i++) |
200 | st.addChar(line.charAt(i)); |
201 | st.printTree(); |
202 | out.close(); |
203 | printFileInfo(f); |
204 | } |
205 | } |
Began life as a copy of #1029272
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1029274 |
Snippet name: | Ukkonen's algorithm Spike [version by Sergey Makagonov, dev.] |
Eternal ID of this version: | #1029274/4 |
Text MD5: | 9c07947a326fe8fe3edca550b149be58 |
Transpilation MD5: | fd34127d685764ccc3143e96d62b7d65 |
Author: | stefan |
Category: | javax / suffix trees |
Type: | JavaX source code (desktop) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-07-29 19:44:05 |
Source code size: | 7849 bytes / 205 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 303 / 928 |
Version history: | 3 change(s) |
Referenced in: | [show references] |