Libraryless. Click here for Pure Java version (139L/2K).
// from https://gist.github.com/bicepjai/3355993#file-gistfile1-java-L147 /* * Copyright (c) 2016 Sergey Makagonov * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * */ sclass UkkonenSuffixTree { static final int oo = Integer.MAX_VALUE/2; // TODO: Why /2? Node [] nodes; char [] text; int root, position = -1, currentNode, needSuffixLink, remainder; int active_node, active_length, active_edge; sclass Node { int start, end = oo, link; public TreeMap<Character, Integer> next = new TreeMap<Character, Integer>(); *(int *start, int *end) {} public int edgeLength(UkkonenSuffixTree tree) { ret Math.min(end, tree.position + 1) - start; } } *(int length) { nodes = new Node[2*length+2]; text = new char[length]; root = active_node = newNode(-1, -1); } private void addSuffixLink(int node) { if (needSuffixLink > 0) nodes[needSuffixLink].link = node; needSuffixLink = node; } char active_edge() { return text[active_edge]; } boolean walkDown(int next) { if (active_length >= nodes[next].edgeLength(this)) { active_edge += nodes[next].edgeLength(this); active_length -= nodes[next].edgeLength(this); active_node = next; return true; } return false; } int newNode(int start, int end) { nodes[++currentNode] = new Node(start, end); return currentNode; } public void addChar(char c) { text[++position] = c; needSuffixLink = -1; remainder++; while(remainder > 0) { if (active_length == 0) active_edge = position; if (!nodes[active_node].next.containsKey(active_edge())){ int leaf = newNode(position, oo); nodes[active_node].next.put(active_edge(), leaf); addSuffixLink(active_node); //rule 2 } else { int next = nodes[active_node].next.get(active_edge()); if (walkDown(next)) continue; //observation 2 if (text[nodes[next].start + active_length] == c) { //observation 1 active_length++; addSuffixLink(active_node); // observation 3 break; } int split = newNode(nodes[next].start, nodes[next].start + active_length); nodes[active_node].next.put(active_edge(), split); int leaf = newNode(position, oo); nodes[split].next.put(c, leaf); nodes[next].start += active_length; nodes[split].next.put(text[nodes[next].start], next); addSuffixLink(split); //rule 2 } remainder--; if (active_node == root && active_length > 0) { //rule 1 active_length--; active_edge = position - remainder + 1; } else active_node = nodes[active_node].link > 0 ? nodes[active_node].link : root; //rule 3 } } }
Began life as a copy of #1029274
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1029275 |
Snippet name: | UkkonenSuffixTree |
Eternal ID of this version: | #1029275/3 |
Text MD5: | 67c01b3997a871bb5d85c32ab5f042c4 |
Transpilation MD5: | 345d8fd26c4166c2858c09fb0ff2ad3c |
Author: | stefan |
Category: | javax / suffix trees |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-07-29 19:51:04 |
Source code size: | 4101 bytes / 112 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 219 / 320 |
Version history: | 2 change(s) |
Referenced in: | #1029276 - UkkonenIntSuffixTree [ints instead of chars, compact version, dev.] #1029277 - UkkonenIntSuffixTree [ints instead of chars] |