Download Jar. Libraryless. Click here for Pure Java version (8870L/63K).
// from https://gist.github.com/bicepjai/3355993#file-gistfile1-java-L147 /* Ukkonen's algorithm for linear time construction of suffix trees. */ public static int stacktrack; public char TERMINATORS_RANGE = '\ud800'; public static int count=0; public static void dfsd(Node c){ if (c.isLeaf()){ //print("\nbasecase"); //count++; return; } Node a; print(c.sons.keySet()); Iterator it = c.sons.entrySet().iterator(); while (it.hasNext()) { Map.Entry pairs = (Map.Entry)it.next(); a = (Node)pairs.getValue(); for(int i=0;i<stacktrack;i++)System.out.print("\t"); print(stacktrack+" br>>>>>>> ="+count+"= "+pairs.getKey() + " = " + a.edgeStart + " : " + a.edgeEnd ); stacktrack++; count++; dfsd(a); stacktrack--; for(int i=0;i<stacktrack;i++)System.out.print("\t"); print(stacktrack+" bt<<<<<<< ="+count+"= "+pairs.getKey() + " = " + a.edgeStart + " : " + a.edgeEnd ); } } p-exp { /*Scanner sc = new Scanner(System.in); int n = sc.nextInt(); sc.nextLine(); */ S s = "abbab"; SuffixTree t1 = new SuffixTree(print(s)); print("Number of nodes: " + t1.nofnodes()); t1 = new SuffixTree(printStruct(new String[]{"aab","aac"})); print("Number of nodes: " + t1.nofnodes()); t1 = new SuffixTree; t1.addString(print("aab")); t1.addString(print("aac")); print("Number of nodes: " + t1.nofnodes()); dfsd(t1.root); print(+count); } sclass Node { Node parent, suffixLink; int edgeStart, edgeEnd, parentDepth; // The edge that reaches this node contains the substring s[edgeStart, edgeEnd] TreeMap<Character, Node> sons; public Node(){ parent = suffixLink = null; edgeStart = edgeEnd = parentDepth = 0; sons = new TreeMap<Character, Node>(); } // Returns true if there is a path starting at root having length position + 1 that ends // in the edge that reaches this node. public boolean inEdge(int position){ return parentDepth <= position && position < depth(); } public int edgeLength(){ return edgeEnd - edgeStart; } public int depth(){ return parentDepth + edgeLength(); } void link(Node son, int start, int end, String s){ // Links the current node with the son. The edge will have substring s[start, end) son.parent = this; son.parentDepth = this.depth(); son.edgeStart = start; son.edgeEnd = end; sons.put(s.charAt(start),son); } public boolean isLeaf(){ return sons.size() == 0; } }; sclass SuffixTree { ArrayList<Node> nodes; Node root, needSuffix; int currentNode; int length; char TERMINATORS_RANGE = '\ud800'; int termi=0; String generalized; public SuffixTree(String str) { nodes = new ArrayList<Node>(); currentNode = 0; str = str + (char)TERMINATORS_RANGE; length = str.length(); root = newNode(); build(root, str); } public SuffixTree(String[] stra) { nodes = new ArrayList<Node>(); currentNode = 0; root = newNode(); StringBuilder sb = new StringBuilder(); for (int i = 0; i < stra.length; i++) { sb.append(stra[i]); sb.append((char)(TERMINATORS_RANGE + i)); } generalized = sb.toString(); length = generalized.length(); build(root, generalized); } public SuffixTree() { nodes = new ArrayList<Node>(); currentNode = 0; root = newNode(); } void addString(String str){ str = str+ (char)(TERMINATORS_RANGE + termi); termi++; length = str.length(); build(root, str); } int nofnodes() { return currentNode; } Node newNode(){ new Node node; nodes.add(currentNode, node); currentNode++; ret node; } Node walkDown(Node c, int j, int i, String str) { int k = j + c.depth(); if (i - j + 1 > 0){ while (!c.inEdge(i - j)){ c = c.sons.get(str.charAt(k)); k += c.edgeLength(); } } return c; } void addSuffixLink(Node current){ if (needSuffix != null){ needSuffix.suffixLink = current; } needSuffix = null; } void build(Node root, String s) { Node c = newNode(); needSuffix = null; root.link(c, 0, length, s); // Indicates if at the beginning of the phase we need to follow the suffix link of the current node //and then walk down the tree using the skip and count trick. boolean needWalk = true; for (int i=0, j=1; i<length-1; ++i){ char nc = s.charAt(i+1); while (j <= i + 1){ if (needWalk){ if (c.suffixLink == null && c.parent != null) c = c.parent; c = (c.suffixLink == null ? root : c.suffixLink); c = walkDown(c, j, i, s); } needWalk = true; // Here c == the highest node below s[j...i] and we will add char s[i+1] int m = i - j + 1; // Length of the string s[j..i]. if (m == c.depth()){ // String s[j...i] ends exactly at node c (explicit node). addSuffixLink(c); if (c.sons.containsKey(nc)){ c = c.sons.get(nc); needWalk = false; break; }else{ Node leaf = newNode(); c.link(leaf, i+1, length, s); } }else{ // String s[j...i] ends at some place in the edge that reaches node c. int where = c.edgeStart + m - c.parentDepth; // The next character in the path after string s[j...i] is s[where] if (s.charAt(where) == nc){ //Either rule 3 or rule 1 addSuffixLink(c); if (!c.isLeaf() || j != c.edgeStart - c.parentDepth){ // Rule 3 needWalk = false; break; } }else{ Node split = newNode(); c.parent.link(split, c.edgeStart, where, s); split.link(c, where, c.edgeEnd, s); split.link(newNode(), i+1, length, s); addSuffixLink(split); if (split.depth() == 1){ //The suffix link is the root because we remove the only character and end with an empty string. split.suffixLink = root; }else{ needSuffix = split; } c = split; } } j++; } } } }
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1029272 |
Snippet name: | Ukkonen's algorithm Spike [dev.] |
Eternal ID of this version: | #1029272/6 |
Text MD5: | 3c18b2dee87246b626c4737a8994c5f2 |
Transpilation MD5: | 3cb32035074f86647e5e25299e1a79a6 |
Author: | stefan |
Category: | javax / suffix trees |
Type: | JavaX source code (desktop) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-07-28 13:26:35 |
Source code size: | 6265 bytes / 235 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 268 / 917 |
Version history: | 5 change(s) |
Referenced in: | [show references] |