Download Jar. Libraryless. Click here for Pure Java version (2973L/20K).
// from https://gist.github.com/bicepjai/3355993#file-gistfile1-java-L147 /* * Copyright (c) 2016 Sergey Makagonov * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * */ do not include class SuffixTree. p { new ST; } sclass ST { BufferedReader in; PrintWriter out; class SuffixTree { final int oo = Integer.MAX_VALUE/2; Node [] nodes; char [] text; int root, position = -1, currentNode, needSuffixLink, remainder; int active_node, active_length, active_edge; class Node { /* There is no need to create an "Edge" class. Information about the edge is stored right in the node. [start; end) interval specifies the edge, by which the node is connected to its parent node. */ int start, end = oo, link; public TreeMap<Character, Integer> next = new TreeMap<Character, Integer>(); public Node(int start, int end) { this.start = start; this.end = end; } public int edgeLength() { return Math.min(end, position + 1) - start; } } public SuffixTree(int length) { nodes = new Node[2* length + 2]; text = new char[length]; root = active_node = newNode(-1, -1); } private void addSuffixLink(int node) { if (needSuffixLink > 0) nodes[needSuffixLink].link = node; needSuffixLink = node; } char active_edge() { return text[active_edge]; } boolean walkDown(int next) { if (active_length >= nodes[next].edgeLength()) { active_edge += nodes[next].edgeLength(); active_length -= nodes[next].edgeLength(); active_node = next; return true; } return false; } int newNode(int start, int end) { nodes[++currentNode] = new Node(start, end); return currentNode; } public void addChar(char c) throws Exception { text[++position] = c; needSuffixLink = -1; remainder++; while(remainder > 0) { if (active_length == 0) active_edge = position; if (!nodes[active_node].next.containsKey(active_edge())){ int leaf = newNode(position, oo); nodes[active_node].next.put(active_edge(), leaf); addSuffixLink(active_node); //rule 2 } else { int next = nodes[active_node].next.get(active_edge()); if (walkDown(next)) continue; //observation 2 if (text[nodes[next].start + active_length] == c) { //observation 1 active_length++; addSuffixLink(active_node); // observation 3 break; } int split = newNode(nodes[next].start, nodes[next].start + active_length); nodes[active_node].next.put(active_edge(), split); int leaf = newNode(position, oo); nodes[split].next.put(c, leaf); nodes[next].start += active_length; nodes[split].next.put(text[nodes[next].start], next); addSuffixLink(split); //rule 2 } remainder--; if (active_node == root && active_length > 0) { //rule 1 active_length--; active_edge = position - remainder + 1; } else active_node = nodes[active_node].link > 0 ? nodes[active_node].link : root; //rule 3 } } /* printing the Suffix Tree in a format understandable by graphviz. The output is written into st.dot file. In order to see the suffix tree as a PNG image, run the following command: dot -Tpng -O st.dot */ String edgeString(int node) { return new String(Arrays.copyOfRange(text, nodes[node].start, Math.min(position + 1, nodes[node].end))); } void printTree() { out.println("digraph {"); out.println("\trankdir = LR;"); out.println("\tedge [arrowsize=0.4,fontsize=10]"); out.println("\tnode1 [label=\"\",style=filled,fillcolor=lightgrey,shape=circle,width=.1,height=.1];"); out.println("//------leaves------"); printLeaves(root); out.println("//------internal nodes------"); printInternalNodes(root); out.println("//------edges------"); printEdges(root); out.println("//------suffix links------"); printSLinks(root); out.println("}"); } void printLeaves(int x) { if (nodes[x].next.size() == 0) out.println("\tnode"+x+" [label=\"\",shape=point]"); else { for (int child : nodes[x].next.values()) printLeaves(child); } } void printInternalNodes(int x) { if (x != root && nodes[x].next.size() > 0) out.println("\tnode"+x+" [label=\"\",style=filled,fillcolor=lightgrey,shape=circle,width=.07,height=.07]"); for (int child : nodes[x].next.values()) printInternalNodes(child); } void printEdges(int x) { for (int child : nodes[x].next.values()) { out.println("\tnode"+x+" -> node"+child+" [label=\""+edgeString(child)+"\",weight=3]"); printEdges(child); } } void printSLinks(int x) { if (nodes[x].link > 0) out.println("\tnode"+x+" -> node"+nodes[x].link+" [label=\"\",weight=1,style=dotted]"); for (int child : nodes[x].next.values()) printSLinks(child); } } public ST() throws Exception { in = new BufferedReader(new StringReader("abc hello abc")); File f = prepareProgramFile("st.dot"); out = new PrintWriter(new FileWriter(f)); String line = in.readLine(); SuffixTree st = new SuffixTree(line.length()); for (int i = 0; i < line.length(); i++) st.addChar(line.charAt(i)); st.printTree(); out.close(); printFileInfo(f); } }
Began life as a copy of #1029272
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1029274 |
Snippet name: | Ukkonen's algorithm Spike [version by Sergey Makagonov, dev.] |
Eternal ID of this version: | #1029274/4 |
Text MD5: | 9c07947a326fe8fe3edca550b149be58 |
Transpilation MD5: | fd34127d685764ccc3143e96d62b7d65 |
Author: | stefan |
Category: | javax / suffix trees |
Type: | JavaX source code (desktop) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-07-29 19:44:05 |
Source code size: | 7849 bytes / 205 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 300 / 920 |
Version history: | 3 change(s) |
Referenced in: | [show references] |