Download Jar. Libraryless. Click here for Pure Java version (8870L/63K).
1 | // from https://gist.github.com/bicepjai/3355993#file-gistfile1-java-L147 |
2 | |
3 | /* |
4 | Ukkonen's algorithm for linear time construction of suffix trees. |
5 | */ |
6 | |
7 | public static int stacktrack; |
8 | public char TERMINATORS_RANGE = '\ud800'; |
9 | public static int count=0; |
10 | public static void dfsd(Node c){ |
11 | if (c.isLeaf()){ |
12 | //print("\nbasecase"); |
13 | //count++; |
14 | return; |
15 | } |
16 | Node a; |
17 | print(c.sons.keySet()); |
18 | |
19 | Iterator it = c.sons.entrySet().iterator(); |
20 | while (it.hasNext()) { |
21 | Map.Entry pairs = (Map.Entry)it.next(); |
22 | a = (Node)pairs.getValue(); |
23 | for(int i=0;i<stacktrack;i++)System.out.print("\t"); |
24 | print(stacktrack+" br>>>>>>> ="+count+"= "+pairs.getKey() + " = " + a.edgeStart + " : " + a.edgeEnd ); |
25 | stacktrack++; |
26 | count++; |
27 | dfsd(a); |
28 | |
29 | stacktrack--; |
30 | for(int i=0;i<stacktrack;i++)System.out.print("\t"); |
31 | print(stacktrack+" bt<<<<<<< ="+count+"= "+pairs.getKey() + " = " + a.edgeStart + " : " + a.edgeEnd ); |
32 | } |
33 | } |
34 | |
35 | p-exp { |
36 | /*Scanner sc = new Scanner(System.in); |
37 | int n = sc.nextInt(); |
38 | sc.nextLine(); |
39 | */ |
40 | |
41 | S s = "abbab"; |
42 | SuffixTree t1 = new SuffixTree(print(s)); |
43 | print("Number of nodes: " + t1.nofnodes()); |
44 | t1 = new SuffixTree(printStruct(new String[]{"aab","aac"})); |
45 | print("Number of nodes: " + t1.nofnodes()); |
46 | |
47 | t1 = new SuffixTree; |
48 | t1.addString(print("aab")); |
49 | t1.addString(print("aac")); |
50 | print("Number of nodes: " + t1.nofnodes()); |
51 | dfsd(t1.root); |
52 | print(+count); |
53 | } |
54 | |
55 | sclass Node { |
56 | Node parent, suffixLink; |
57 | int edgeStart, edgeEnd, parentDepth; |
58 | // The edge that reaches this node contains the substring s[edgeStart, edgeEnd] |
59 | TreeMap<Character, Node> sons; |
60 | |
61 | public Node(){ |
62 | parent = suffixLink = null; |
63 | edgeStart = edgeEnd = parentDepth = 0; |
64 | sons = new TreeMap<Character, Node>(); |
65 | } |
66 | |
67 | // Returns true if there is a path starting at root having length position + 1 that ends |
68 | // in the edge that reaches this node. |
69 | public boolean inEdge(int position){ |
70 | return parentDepth <= position && position < depth(); |
71 | } |
72 | |
73 | public int edgeLength(){ |
74 | return edgeEnd - edgeStart; |
75 | } |
76 | |
77 | public int depth(){ |
78 | return parentDepth + edgeLength(); |
79 | } |
80 | |
81 | void link(Node son, int start, int end, String s){ |
82 | // Links the current node with the son. The edge will have substring s[start, end) |
83 | son.parent = this; |
84 | son.parentDepth = this.depth(); |
85 | son.edgeStart = start; |
86 | son.edgeEnd = end; |
87 | sons.put(s.charAt(start),son); |
88 | } |
89 | |
90 | public boolean isLeaf(){ |
91 | return sons.size() == 0; |
92 | } |
93 | }; |
94 | |
95 | sclass SuffixTree { |
96 | ArrayList<Node> nodes; |
97 | Node root, needSuffix; |
98 | int currentNode; |
99 | int length; |
100 | char TERMINATORS_RANGE = '\ud800'; |
101 | int termi=0; |
102 | String generalized; |
103 | |
104 | public SuffixTree(String str) { |
105 | nodes = new ArrayList<Node>(); |
106 | currentNode = 0; |
107 | str = str + (char)TERMINATORS_RANGE; |
108 | length = str.length(); |
109 | root = newNode(); |
110 | build(root, str); |
111 | } |
112 | |
113 | public SuffixTree(String[] stra) { |
114 | nodes = new ArrayList<Node>(); |
115 | currentNode = 0; |
116 | root = newNode(); |
117 | |
118 | StringBuilder sb = new StringBuilder(); |
119 | for (int i = 0; i < stra.length; i++) { |
120 | sb.append(stra[i]); |
121 | sb.append((char)(TERMINATORS_RANGE + i)); |
122 | } |
123 | generalized = sb.toString(); |
124 | length = generalized.length(); |
125 | build(root, generalized); |
126 | } |
127 | |
128 | public SuffixTree() { |
129 | nodes = new ArrayList<Node>(); |
130 | currentNode = 0; |
131 | root = newNode(); |
132 | } |
133 | |
134 | void addString(String str){ |
135 | str = str+ (char)(TERMINATORS_RANGE + termi); |
136 | termi++; |
137 | length = str.length(); |
138 | build(root, str); |
139 | } |
140 | |
141 | int nofnodes() { |
142 | return currentNode; |
143 | } |
144 | |
145 | Node newNode(){ |
146 | new Node node; |
147 | nodes.add(currentNode, node); |
148 | currentNode++; |
149 | ret node; |
150 | } |
151 | |
152 | Node walkDown(Node c, int j, int i, String str) { |
153 | int k = j + c.depth(); |
154 | if (i - j + 1 > 0){ |
155 | while (!c.inEdge(i - j)){ |
156 | c = c.sons.get(str.charAt(k)); |
157 | k += c.edgeLength(); |
158 | } |
159 | } |
160 | return c; |
161 | } |
162 | |
163 | void addSuffixLink(Node current){ |
164 | if (needSuffix != null){ |
165 | needSuffix.suffixLink = current; |
166 | } |
167 | needSuffix = null; |
168 | } |
169 | |
170 | void build(Node root, String s) { |
171 | |
172 | Node c = newNode(); |
173 | needSuffix = null; |
174 | root.link(c, 0, length, s); |
175 | |
176 | // Indicates if at the beginning of the phase we need to follow the suffix link of the current node |
177 | //and then walk down the tree using the skip and count trick. |
178 | boolean needWalk = true; |
179 | |
180 | for (int i=0, j=1; i<length-1; ++i){ |
181 | char nc = s.charAt(i+1); |
182 | while (j <= i + 1){ |
183 | if (needWalk){ |
184 | if (c.suffixLink == null && c.parent != null) c = c.parent; |
185 | c = (c.suffixLink == null ? root : c.suffixLink); |
186 | c = walkDown(c, j, i, s); |
187 | } |
188 | |
189 | needWalk = true; |
190 | // Here c == the highest node below s[j...i] and we will add char s[i+1] |
191 | int m = i - j + 1; // Length of the string s[j..i]. |
192 | if (m == c.depth()){ |
193 | // String s[j...i] ends exactly at node c (explicit node). |
194 | addSuffixLink(c); |
195 | if (c.sons.containsKey(nc)){ |
196 | c = c.sons.get(nc); |
197 | needWalk = false; |
198 | break; |
199 | }else{ |
200 | Node leaf = newNode(); |
201 | c.link(leaf, i+1, length, s); |
202 | } |
203 | }else{ |
204 | // String s[j...i] ends at some place in the edge that reaches node c. |
205 | int where = c.edgeStart + m - c.parentDepth; |
206 | // The next character in the path after string s[j...i] is s[where] |
207 | if (s.charAt(where) == nc){ //Either rule 3 or rule 1 |
208 | addSuffixLink(c); |
209 | if (!c.isLeaf() || j != c.edgeStart - c.parentDepth){ |
210 | // Rule 3 |
211 | needWalk = false; |
212 | break; |
213 | } |
214 | }else{ |
215 | Node split = newNode(); |
216 | c.parent.link(split, c.edgeStart, where, s); |
217 | split.link(c, where, c.edgeEnd, s); |
218 | split.link(newNode(), i+1, length, s); |
219 | |
220 | addSuffixLink(split); |
221 | |
222 | if (split.depth() == 1){ |
223 | //The suffix link is the root because we remove the only character and end with an empty string. |
224 | split.suffixLink = root; |
225 | }else{ |
226 | needSuffix = split; |
227 | } |
228 | c = split; |
229 | } |
230 | } |
231 | j++; |
232 | } |
233 | } |
234 | } |
235 | } |
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1029272 |
Snippet name: | Ukkonen's algorithm Spike [dev.] |
Eternal ID of this version: | #1029272/6 |
Text MD5: | 3c18b2dee87246b626c4737a8994c5f2 |
Transpilation MD5: | 3cb32035074f86647e5e25299e1a79a6 |
Author: | stefan |
Category: | javax / suffix trees |
Type: | JavaX source code (desktop) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-07-28 13:26:35 |
Source code size: | 6265 bytes / 235 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 269 / 918 |
Version history: | 5 change(s) |
Referenced in: | [show references] |