Libraryless. Click here for Pure Java version (2592L/17K).
1 | // from https://gist.github.com/bicepjai/3355993#file-gistfile1-java-L147 |
2 | |
3 | /* |
4 | * Copyright (c) 2016 Sergey Makagonov |
5 | * |
6 | * Permission is hereby granted, free of charge, to any person obtaining |
7 | * a copy of this software and associated documentation files (the |
8 | * "Software"), to deal in the Software without restriction, including |
9 | * without limitation the rights to use, copy, modify, merge, publish, |
10 | * distribute, sublicense, and/or sell copies of the Software, and to |
11 | * permit persons to whom the Software is furnished to do so, subject to |
12 | * the following conditions: |
13 | * |
14 | * The above copyright notice and this permission notice shall be |
15 | * included in all copies or substantial portions of the Software. |
16 | * |
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
18 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
19 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
20 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE |
21 | * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
22 | * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
23 | * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
24 | * |
25 | */ |
26 | |
27 | sclass UkkonenIntSuffixTree { |
28 | static final int oo = Integer.MAX_VALUE/2; |
29 | Node [] nodes; |
30 | int [] text; |
31 | int root, position = -1, currentNode, needSuffixLink, remainder; |
32 | |
33 | int active_node, active_length, active_edge; |
34 | |
35 | sclass Node { |
36 | int start, end = oo, link; |
37 | public new TreeMap<Int> next; |
38 | |
39 | *(int *start, int *end) {} |
40 | |
41 | public int edgeLength(UkkonenIntSuffixTree tree) { |
42 | ret Math.min(end, tree.position + 1) - start; |
43 | } |
44 | } |
45 | |
46 | *(int length) { |
47 | nodes = new Node[2*length+2]; |
48 | text = new int[length]; |
49 | root = active_node = newNode(-1, -1); |
50 | } |
51 | |
52 | *(L<Int> symbols) { |
53 | this(l(symbols)); |
54 | for (int i : symbols) |
55 | addChar(i); |
56 | } |
57 | |
58 | private void addSuffixLink(int node) { |
59 | if (needSuffixLink > 0) |
60 | nodes[needSuffixLink].link = node; |
61 | needSuffixLink = node; |
62 | } |
63 | |
64 | int active_edge() { |
65 | return text[active_edge]; |
66 | } |
67 | |
68 | boolean walkDown(int next) { |
69 | if (active_length >= nodes[next].edgeLength(this)) { |
70 | active_edge += nodes[next].edgeLength(this); |
71 | active_length -= nodes[next].edgeLength(this); |
72 | active_node = next; |
73 | return true; |
74 | } |
75 | return false; |
76 | } |
77 | |
78 | int newNode(int start, int end) { |
79 | nodes[++currentNode] = new Node(start, end); |
80 | return currentNode; |
81 | } |
82 | |
83 | public void addChar(int c) { |
84 | text[++position] = c; |
85 | needSuffixLink = -1; |
86 | remainder++; |
87 | while(remainder > 0) { |
88 | if (active_length == 0) active_edge = position; |
89 | if (!nodes[active_node].next.containsKey(active_edge())){ |
90 | int leaf = newNode(position, oo); |
91 | nodes[active_node].next.put(active_edge(), leaf); |
92 | addSuffixLink(active_node); //rule 2 |
93 | } else { |
94 | int next = nodes[active_node].next.get(active_edge()); |
95 | if (walkDown(next)) continue; //observation 2 |
96 | if (text[nodes[next].start + active_length] == c) { //observation 1 |
97 | active_length++; |
98 | addSuffixLink(active_node); // observation 3 |
99 | break; |
100 | } |
101 | int split = newNode(nodes[next].start, nodes[next].start + active_length); |
102 | nodes[active_node].next.put(active_edge(), split); |
103 | int leaf = newNode(position, oo); |
104 | nodes[split].next.put(c, leaf); |
105 | nodes[next].start += active_length; |
106 | nodes[split].next.put(text[nodes[next].start], next); |
107 | addSuffixLink(split); //rule 2 |
108 | } |
109 | remainder--; |
110 | |
111 | if (active_node == root && active_length > 0) { //rule 1 |
112 | active_length--; |
113 | active_edge = position - remainder + 1; |
114 | } else |
115 | active_node = nodes[active_node].link > 0 ? nodes[active_node].link : root; //rule 3 |
116 | } |
117 | } |
118 | } |
Began life as a copy of #1029275
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1029277 |
Snippet name: | UkkonenIntSuffixTree [ints instead of chars] |
Eternal ID of this version: | #1029277/5 |
Text MD5: | a202d713402f67d87dc4d0fc0f8a3437 |
Transpilation MD5: | cdac3d0fd4e4565bfa653fe0e62a1ae6 |
Author: | stefan |
Category: | javax / suffix trees |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-07-29 20:10:08 |
Source code size: | 4140 bytes / 118 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 263 / 537 |
Version history: | 4 change(s) |
Referenced in: | [show references] |