Ukkonen's algorithm Spike [dev.] [1029272]

// from https://gist.github.com/bicepjai/3355993#file-gistfile1-java-L147

/*
 Ukkonen's algorithm for linear time construction of suffix trees.
*/

public static int stacktrack;
public char TERMINATORS_RANGE = '\ud800';
public static int count=0;
public static void dfsd(Node c){
  if (c.isLeaf()){
    //print("\nbasecase");
    //count++;
    return;
  }
  Node a;
  print(c.sons.keySet());
  
  Iterator it = c.sons.entrySet().iterator();
  while (it.hasNext()) {
    Map.Entry pairs = (Map.Entry)it.next();
    a = (Node)pairs.getValue();
    for(int i=0;i<stacktrack;i++)System.out.print("\t");
    print(stacktrack+" br>>>>>>> ="+count+"= "+pairs.getKey() + " = " + a.edgeStart + " : " + a.edgeEnd );
    stacktrack++;
    count++;
    dfsd(a);
  
    stacktrack--;
    for(int i=0;i<stacktrack;i++)System.out.print("\t");
    print(stacktrack+" bt<<<<<<< ="+count+"= "+pairs.getKey() + " = " + a.edgeStart + " : " + a.edgeEnd );
  }
}
        
p-exp {
  /*Scanner sc = new Scanner(System.in);
  int n = sc.nextInt();
  sc.nextLine();
  */
  
  S s = "abbab";
  SuffixTree t1 = new SuffixTree(print(s));
  print("Number of nodes: " + t1.nofnodes());
  t1 = new SuffixTree(printStruct(new String[]{"aab","aac"}));
  print("Number of nodes: " + t1.nofnodes());

  t1 = new SuffixTree;
  t1.addString(print("aab"));
  t1.addString(print("aac"));
  print("Number of nodes: " + t1.nofnodes());
  dfsd(t1.root);
  print(+count);
}

sclass Node {
  Node parent, suffixLink;
  int edgeStart, edgeEnd, parentDepth;
  // The edge that reaches this node contains the substring s[edgeStart, edgeEnd]
  TreeMap<Character, Node> sons;

  public Node(){
    parent = suffixLink = null;
    edgeStart = edgeEnd = parentDepth = 0;
    sons = new TreeMap<Character, Node>();
  }

  // Returns true if there is a path starting at root having length position + 1 that ends
  // in the edge that reaches this node.
  public boolean inEdge(int position){
    return parentDepth <= position && position < depth();
  }

  public int edgeLength(){
    return edgeEnd - edgeStart;
  }

  public int depth(){
    return parentDepth + edgeLength();
  }

  void link(Node son, int start, int end, String s){
    // Links the current node with the son. The edge will have substring s[start, end)
    son.parent = this;
    son.parentDepth = this.depth();
    son.edgeStart = start;
    son.edgeEnd = end;
    sons.put(s.charAt(start),son);
  }

  public boolean isLeaf(){
    return sons.size() == 0;
  }
};

sclass SuffixTree {
  ArrayList<Node> nodes;
  Node root, needSuffix;
  int currentNode;
  int length;
  char TERMINATORS_RANGE = '\ud800';
  int termi=0;
  String generalized;

  public SuffixTree(String str) {
    nodes = new ArrayList<Node>();
    currentNode = 0;
    str = str + (char)TERMINATORS_RANGE;
    length = str.length();
    root = newNode();
    build(root, str);
  }

  public SuffixTree(String[] stra) {
    nodes = new ArrayList<Node>();
    currentNode = 0;
    root = newNode();
    
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < stra.length; i++) {
        sb.append(stra[i]);
        sb.append((char)(TERMINATORS_RANGE + i));
    }
    generalized = sb.toString();
    length = generalized.length();
    build(root, generalized); 
  }

  public SuffixTree() {
    nodes = new ArrayList<Node>();
    currentNode = 0;
    root = newNode();
  }
  
  void addString(String str){
    str = str+ (char)(TERMINATORS_RANGE + termi);
    termi++;
    length = str.length();
    build(root, str);
  } 
  
  int nofnodes() {
    return currentNode;
  }
  
  Node newNode(){
    new Node node;
    nodes.add(currentNode, node);
    currentNode++;
    ret node;
  }

  Node walkDown(Node c, int j, int i, String str) {
    int k = j + c.depth();
    if (i - j + 1 > 0){
      while (!c.inEdge(i - j)){
        c = c.sons.get(str.charAt(k));
        k += c.edgeLength();
      }
    }
    return c;
  }

  void addSuffixLink(Node current){
    if (needSuffix != null){
      needSuffix.suffixLink = current;
    }
    needSuffix = null;
  }

  void build(Node root, String s) {
    
    Node c = newNode();
    needSuffix = null;
    root.link(c, 0, length, s);

    // Indicates if at the beginning of the phase we need to follow the suffix link of the current node 
    //and then walk down the tree using the skip and count trick.
    boolean needWalk = true;

    for (int i=0, j=1; i<length-1; ++i){
      char nc = s.charAt(i+1);
      while (j <= i + 1){
        if (needWalk){
          if (c.suffixLink == null && c.parent != null) c = c.parent;
          c = (c.suffixLink == null ? root : c.suffixLink);
          c = walkDown(c, j, i, s);
        }

        needWalk = true;
        // Here c == the highest node below s[j...i] and we will add char s[i+1]
        int m = i - j + 1; // Length of the string s[j..i].
        if (m == c.depth()){
          // String s[j...i] ends exactly at node c (explicit node).
          addSuffixLink(c);
          if (c.sons.containsKey(nc)){
            c = c.sons.get(nc);
            needWalk = false;
            break;
          }else{
            Node leaf = newNode();
            c.link(leaf, i+1, length, s);
          }
        }else{
          // String s[j...i] ends at some place in the edge that reaches node c.
          int where = c.edgeStart + m - c.parentDepth;
          // The next character in the path after string s[j...i] is s[where]
          if (s.charAt(where) == nc){ //Either rule 3 or rule 1
            addSuffixLink(c);
            if (!c.isLeaf() || j != c.edgeStart - c.parentDepth){
              // Rule 3
              needWalk = false;
              break;
            }
          }else{
            Node split = newNode();
            c.parent.link(split, c.edgeStart, where, s);
            split.link(c, where, c.edgeEnd, s);
            split.link(newNode(), i+1, length, s);
      
            addSuffixLink(split);
      
            if (split.depth() == 1){
              //The suffix link is the root because we remove the only character and end with an empty string.
              split.suffixLink = root;
            }else{
              needSuffix = split;
            }
            c = split;
          }
        }
        j++;
      }
    }
  }
}

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

Snippet ID:	#1029272
Snippet name:	Ukkonen's algorithm Spike [dev.]
Eternal ID of this version:	#1029272/6
Text MD5:	3c18b2dee87246b626c4737a8994c5f2
Transpilation MD5:	3cb32035074f86647e5e25299e1a79a6
Author:	stefan
Category:	javax / suffix trees
Type:	JavaX source code (desktop)
Public (visible to everyone):	Yes
Archived (hidden from active list):	No
Created/modified:	2020-07-28 13:26:35
Source code size:	6265 bytes / 235 lines
Pitched / IR pitched:	No / No
Views / Downloads:	269 / 918
Version history:	5 change(s)
Referenced in:	[show references]

< > BotCompany Repo | #1029272 // Ukkonen's algorithm Spike [dev.]

JavaX source code (desktop) [tags: use-pretranspiled] - run with: x30.jar

1	// from https://gist.github.com/bicepjai/3355993#file-gistfile1-java-L147
2
3	/*
4	Ukkonen's algorithm for linear time construction of suffix trees.
5	*/
6
7	public static int stacktrack;
8	public char TERMINATORS_RANGE = '\ud800';
9	public static int count=0;
10	public static void dfsd(Node c){
11	if (c.isLeaf()){
12	//print("\nbasecase");
13	//count++;
14	return;
15	}
16	Node a;
17	print(c.sons.keySet());
18
19	Iterator it = c.sons.entrySet().iterator();
20	while (it.hasNext()) {
21	Map.Entry pairs = (Map.Entry)it.next();
22	a = (Node)pairs.getValue();
23	for(int i=0;i<stacktrack;i++)System.out.print("\t");
24	print(stacktrack+" br>>>>>>> ="+count+"= "+pairs.getKey() + " = " + a.edgeStart + " : " + a.edgeEnd );
25	stacktrack++;
26	count++;
27	dfsd(a);
28
29	stacktrack--;
30	for(int i=0;i<stacktrack;i++)System.out.print("\t");
31	print(stacktrack+" bt<<<<<<< ="+count+"= "+pairs.getKey() + " = " + a.edgeStart + " : " + a.edgeEnd );
32	}
33	}
34
35	p-exp {
36	/*Scanner sc = new Scanner(System.in);
37	int n = sc.nextInt();
38	sc.nextLine();
39	*/
40
41	S s = "abbab";
42	SuffixTree t1 = new SuffixTree(print(s));
43	print("Number of nodes: " + t1.nofnodes());
44	t1 = new SuffixTree(printStruct(new String[]{"aab","aac"}));
45	print("Number of nodes: " + t1.nofnodes());
46
47	t1 = new SuffixTree;
48	t1.addString(print("aab"));
49	t1.addString(print("aac"));
50	print("Number of nodes: " + t1.nofnodes());
51	dfsd(t1.root);
52	print(+count);
53	}
54
55	sclass Node {
56	Node parent, suffixLink;
57	int edgeStart, edgeEnd, parentDepth;
58	// The edge that reaches this node contains the substring s[edgeStart, edgeEnd]
59	TreeMap<Character, Node> sons;
60
61	public Node(){
62	parent = suffixLink = null;
63	edgeStart = edgeEnd = parentDepth = 0;
64	sons = new TreeMap<Character, Node>();
65	}
66
67	// Returns true if there is a path starting at root having length position + 1 that ends
68	// in the edge that reaches this node.
69	public boolean inEdge(int position){
70	return parentDepth <= position && position < depth();
71	}
72
73	public int edgeLength(){
74	return edgeEnd - edgeStart;
75	}
76
77	public int depth(){
78	return parentDepth + edgeLength();
79	}
80
81	void link(Node son, int start, int end, String s){
82	// Links the current node with the son. The edge will have substring s[start, end)
83	son.parent = this;
84	son.parentDepth = this.depth();
85	son.edgeStart = start;
86	son.edgeEnd = end;
87	sons.put(s.charAt(start),son);
88	}
89
90	public boolean isLeaf(){
91	return sons.size() == 0;
92	}
93	};
94
95	sclass SuffixTree {
96	ArrayList<Node> nodes;
97	Node root, needSuffix;
98	int currentNode;
99	int length;
100	char TERMINATORS_RANGE = '\ud800';
101	int termi=0;
102	String generalized;
103
104	public SuffixTree(String str) {
105	nodes = new ArrayList<Node>();
106	currentNode = 0;
107	str = str + (char)TERMINATORS_RANGE;
108	length = str.length();
109	root = newNode();
110	build(root, str);
111	}
112
113	public SuffixTree(String[] stra) {
114	nodes = new ArrayList<Node>();
115	currentNode = 0;
116	root = newNode();
117
118	StringBuilder sb = new StringBuilder();
119	for (int i = 0; i < stra.length; i++) {
120	sb.append(stra[i]);
121	sb.append((char)(TERMINATORS_RANGE + i));
122	}
123	generalized = sb.toString();
124	length = generalized.length();
125	build(root, generalized);
126	}
127
128	public SuffixTree() {
129	nodes = new ArrayList<Node>();
130	currentNode = 0;
131	root = newNode();
132	}
133
134	void addString(String str){
135	str = str+ (char)(TERMINATORS_RANGE + termi);
136	termi++;
137	length = str.length();
138	build(root, str);
139	}
140
141	int nofnodes() {
142	return currentNode;
143	}
144
145	Node newNode(){
146	new Node node;
147	nodes.add(currentNode, node);
148	currentNode++;
149	ret node;
150	}
151
152	Node walkDown(Node c, int j, int i, String str) {
153	int k = j + c.depth();
154	if (i - j + 1 > 0){
155	while (!c.inEdge(i - j)){
156	c = c.sons.get(str.charAt(k));
157	k += c.edgeLength();
158	}
159	}
160	return c;
161	}
162
163	void addSuffixLink(Node current){
164	if (needSuffix != null){
165	needSuffix.suffixLink = current;
166	}
167	needSuffix = null;
168	}
169
170	void build(Node root, String s) {
171
172	Node c = newNode();
173	needSuffix = null;
174	root.link(c, 0, length, s);
175
176	// Indicates if at the beginning of the phase we need to follow the suffix link of the current node
177	//and then walk down the tree using the skip and count trick.
178	boolean needWalk = true;
179
180	for (int i=0, j=1; i<length-1; ++i){
181	char nc = s.charAt(i+1);
182	while (j <= i + 1){
183	if (needWalk){
184	if (c.suffixLink == null && c.parent != null) c = c.parent;
185	c = (c.suffixLink == null ? root : c.suffixLink);
186	c = walkDown(c, j, i, s);
187	}
188
189	needWalk = true;
190	// Here c == the highest node below s[j...i] and we will add char s[i+1]
191	int m = i - j + 1; // Length of the string s[j..i].
192	if (m == c.depth()){
193	// String s[j...i] ends exactly at node c (explicit node).
194	addSuffixLink(c);
195	if (c.sons.containsKey(nc)){
196	c = c.sons.get(nc);
197	needWalk = false;
198	break;
199	}else{
200	Node leaf = newNode();
201	c.link(leaf, i+1, length, s);
202	}
203	}else{
204	// String s[j...i] ends at some place in the edge that reaches node c.
205	int where = c.edgeStart + m - c.parentDepth;
206	// The next character in the path after string s[j...i] is s[where]
207	if (s.charAt(where) == nc){ //Either rule 3 or rule 1
208	addSuffixLink(c);
209	if (!c.isLeaf() \|\| j != c.edgeStart - c.parentDepth){
210	// Rule 3
211	needWalk = false;
212	break;
213	}
214	}else{
215	Node split = newNode();
216	c.parent.link(split, c.edgeStart, where, s);
217	split.link(c, where, c.edgeEnd, s);
218	split.link(newNode(), i+1, length, s);
219
220	addSuffixLink(split);
221
222	if (split.depth() == 1){
223	//The suffix link is the root because we remove the only character and end with an empty string.
224	split.suffixLink = root;
225	}else{
226	needSuffix = split;
227	}
228	c = split;
229	}
230	}
231	j++;
232	}
233	}
234	}
235	}