Libraryless. Click here for Pure Java version (139L/2K).
1 | // from https://gist.github.com/bicepjai/3355993#file-gistfile1-java-L147 |
2 | |
3 | /* |
4 | * Copyright (c) 2016 Sergey Makagonov |
5 | * |
6 | * Permission is hereby granted, free of charge, to any person obtaining |
7 | * a copy of this software and associated documentation files (the |
8 | * "Software"), to deal in the Software without restriction, including |
9 | * without limitation the rights to use, copy, modify, merge, publish, |
10 | * distribute, sublicense, and/or sell copies of the Software, and to |
11 | * permit persons to whom the Software is furnished to do so, subject to |
12 | * the following conditions: |
13 | * |
14 | * The above copyright notice and this permission notice shall be |
15 | * included in all copies or substantial portions of the Software. |
16 | * |
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
18 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
19 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
20 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE |
21 | * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
22 | * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
23 | * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
24 | * |
25 | */ |
26 | |
27 | sclass UkkonenSuffixTree { |
28 | static final int oo = Integer.MAX_VALUE/2; // TODO: Why /2? |
29 | Node [] nodes; |
30 | char [] text; |
31 | int root, position = -1, currentNode, needSuffixLink, remainder; |
32 | |
33 | int active_node, active_length, active_edge; |
34 | |
35 | sclass Node { |
36 | int start, end = oo, link; |
37 | public TreeMap<Character, Integer> next = new TreeMap<Character, Integer>(); |
38 | |
39 | *(int *start, int *end) {} |
40 | |
41 | public int edgeLength(UkkonenSuffixTree tree) { |
42 | ret Math.min(end, tree.position + 1) - start; |
43 | } |
44 | } |
45 | |
46 | *(int length) { |
47 | nodes = new Node[2*length+2]; |
48 | text = new char[length]; |
49 | root = active_node = newNode(-1, -1); |
50 | } |
51 | |
52 | private void addSuffixLink(int node) { |
53 | if (needSuffixLink > 0) |
54 | nodes[needSuffixLink].link = node; |
55 | needSuffixLink = node; |
56 | } |
57 | |
58 | char active_edge() { |
59 | return text[active_edge]; |
60 | } |
61 | |
62 | boolean walkDown(int next) { |
63 | if (active_length >= nodes[next].edgeLength(this)) { |
64 | active_edge += nodes[next].edgeLength(this); |
65 | active_length -= nodes[next].edgeLength(this); |
66 | active_node = next; |
67 | return true; |
68 | } |
69 | return false; |
70 | } |
71 | |
72 | int newNode(int start, int end) { |
73 | nodes[++currentNode] = new Node(start, end); |
74 | return currentNode; |
75 | } |
76 | |
77 | public void addChar(char c) { |
78 | text[++position] = c; |
79 | needSuffixLink = -1; |
80 | remainder++; |
81 | while(remainder > 0) { |
82 | if (active_length == 0) active_edge = position; |
83 | if (!nodes[active_node].next.containsKey(active_edge())){ |
84 | int leaf = newNode(position, oo); |
85 | nodes[active_node].next.put(active_edge(), leaf); |
86 | addSuffixLink(active_node); //rule 2 |
87 | } else { |
88 | int next = nodes[active_node].next.get(active_edge()); |
89 | if (walkDown(next)) continue; //observation 2 |
90 | if (text[nodes[next].start + active_length] == c) { //observation 1 |
91 | active_length++; |
92 | addSuffixLink(active_node); // observation 3 |
93 | break; |
94 | } |
95 | int split = newNode(nodes[next].start, nodes[next].start + active_length); |
96 | nodes[active_node].next.put(active_edge(), split); |
97 | int leaf = newNode(position, oo); |
98 | nodes[split].next.put(c, leaf); |
99 | nodes[next].start += active_length; |
100 | nodes[split].next.put(text[nodes[next].start], next); |
101 | addSuffixLink(split); //rule 2 |
102 | } |
103 | remainder--; |
104 | |
105 | if (active_node == root && active_length > 0) { //rule 1 |
106 | active_length--; |
107 | active_edge = position - remainder + 1; |
108 | } else |
109 | active_node = nodes[active_node].link > 0 ? nodes[active_node].link : root; //rule 3 |
110 | } |
111 | } |
112 | } |
Began life as a copy of #1029274
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1029275 |
Snippet name: | UkkonenSuffixTree |
Eternal ID of this version: | #1029275/3 |
Text MD5: | 67c01b3997a871bb5d85c32ab5f042c4 |
Transpilation MD5: | 345d8fd26c4166c2858c09fb0ff2ad3c |
Author: | stefan |
Category: | javax / suffix trees |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-07-29 19:51:04 |
Source code size: | 4101 bytes / 112 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 218 / 319 |
Version history: | 2 change(s) |
Referenced in: | [show references] |