Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

235
LINES

< > BotCompany Repo | #1029272 // Ukkonen's algorithm Spike [dev.]

JavaX source code (desktop) [tags: use-pretranspiled] - run with: x30.jar

Download Jar. Libraryless. Click here for Pure Java version (8870L/63K).

1  
// from https://gist.github.com/bicepjai/3355993#file-gistfile1-java-L147
2  
3  
/*
4  
 Ukkonen's algorithm for linear time construction of suffix trees.
5  
*/
6  
7  
public static int stacktrack;
8  
public char TERMINATORS_RANGE = '\ud800';
9  
public static int count=0;
10  
public static void dfsd(Node c){
11  
  if (c.isLeaf()){
12  
    //print("\nbasecase");
13  
    //count++;
14  
    return;
15  
  }
16  
  Node a;
17  
  print(c.sons.keySet());
18  
  
19  
  Iterator it = c.sons.entrySet().iterator();
20  
  while (it.hasNext()) {
21  
    Map.Entry pairs = (Map.Entry)it.next();
22  
    a = (Node)pairs.getValue();
23  
    for(int i=0;i<stacktrack;i++)System.out.print("\t");
24  
    print(stacktrack+" br>>>>>>> ="+count+"= "+pairs.getKey() + " = " + a.edgeStart + " : " + a.edgeEnd );
25  
    stacktrack++;
26  
    count++;
27  
    dfsd(a);
28  
  
29  
    stacktrack--;
30  
    for(int i=0;i<stacktrack;i++)System.out.print("\t");
31  
    print(stacktrack+" bt<<<<<<< ="+count+"= "+pairs.getKey() + " = " + a.edgeStart + " : " + a.edgeEnd );
32  
  }
33  
}
34  
        
35  
p-exp {
36  
  /*Scanner sc = new Scanner(System.in);
37  
  int n = sc.nextInt();
38  
  sc.nextLine();
39  
  */
40  
  
41  
  S s = "abbab";
42  
  SuffixTree t1 = new SuffixTree(print(s));
43  
  print("Number of nodes: " + t1.nofnodes());
44  
  t1 = new SuffixTree(printStruct(new String[]{"aab","aac"}));
45  
  print("Number of nodes: " + t1.nofnodes());
46  
47  
  t1 = new SuffixTree;
48  
  t1.addString(print("aab"));
49  
  t1.addString(print("aac"));
50  
  print("Number of nodes: " + t1.nofnodes());
51  
  dfsd(t1.root);
52  
  print(+count);
53  
}
54  
55  
sclass Node {
56  
  Node parent, suffixLink;
57  
  int edgeStart, edgeEnd, parentDepth;
58  
  // The edge that reaches this node contains the substring s[edgeStart, edgeEnd]
59  
  TreeMap<Character, Node> sons;
60  
61  
  public Node(){
62  
    parent = suffixLink = null;
63  
    edgeStart = edgeEnd = parentDepth = 0;
64  
    sons = new TreeMap<Character, Node>();
65  
  }
66  
67  
  // Returns true if there is a path starting at root having length position + 1 that ends
68  
  // in the edge that reaches this node.
69  
  public boolean inEdge(int position){
70  
    return parentDepth <= position && position < depth();
71  
  }
72  
73  
  public int edgeLength(){
74  
    return edgeEnd - edgeStart;
75  
  }
76  
77  
  public int depth(){
78  
    return parentDepth + edgeLength();
79  
  }
80  
81  
  void link(Node son, int start, int end, String s){
82  
    // Links the current node with the son. The edge will have substring s[start, end)
83  
    son.parent = this;
84  
    son.parentDepth = this.depth();
85  
    son.edgeStart = start;
86  
    son.edgeEnd = end;
87  
    sons.put(s.charAt(start),son);
88  
  }
89  
90  
  public boolean isLeaf(){
91  
    return sons.size() == 0;
92  
  }
93  
};
94  
95  
sclass SuffixTree {
96  
  ArrayList<Node> nodes;
97  
  Node root, needSuffix;
98  
  int currentNode;
99  
  int length;
100  
  char TERMINATORS_RANGE = '\ud800';
101  
  int termi=0;
102  
  String generalized;
103  
104  
  public SuffixTree(String str) {
105  
    nodes = new ArrayList<Node>();
106  
    currentNode = 0;
107  
    str = str + (char)TERMINATORS_RANGE;
108  
    length = str.length();
109  
    root = newNode();
110  
    build(root, str);
111  
  }
112  
113  
  public SuffixTree(String[] stra) {
114  
    nodes = new ArrayList<Node>();
115  
    currentNode = 0;
116  
    root = newNode();
117  
    
118  
    StringBuilder sb = new StringBuilder();
119  
    for (int i = 0; i < stra.length; i++) {
120  
        sb.append(stra[i]);
121  
        sb.append((char)(TERMINATORS_RANGE + i));
122  
    }
123  
    generalized = sb.toString();
124  
    length = generalized.length();
125  
    build(root, generalized); 
126  
  }
127  
128  
  public SuffixTree() {
129  
    nodes = new ArrayList<Node>();
130  
    currentNode = 0;
131  
    root = newNode();
132  
  }
133  
  
134  
  void addString(String str){
135  
    str = str+ (char)(TERMINATORS_RANGE + termi);
136  
    termi++;
137  
    length = str.length();
138  
    build(root, str);
139  
  } 
140  
  
141  
  int nofnodes() {
142  
    return currentNode;
143  
  }
144  
  
145  
  Node newNode(){
146  
    new Node node;
147  
    nodes.add(currentNode, node);
148  
    currentNode++;
149  
    ret node;
150  
  }
151  
152  
  Node walkDown(Node c, int j, int i, String str) {
153  
    int k = j + c.depth();
154  
    if (i - j + 1 > 0){
155  
      while (!c.inEdge(i - j)){
156  
        c = c.sons.get(str.charAt(k));
157  
        k += c.edgeLength();
158  
      }
159  
    }
160  
    return c;
161  
  }
162  
163  
  void addSuffixLink(Node current){
164  
    if (needSuffix != null){
165  
      needSuffix.suffixLink = current;
166  
    }
167  
    needSuffix = null;
168  
  }
169  
170  
  void build(Node root, String s) {
171  
    
172  
    Node c = newNode();
173  
    needSuffix = null;
174  
    root.link(c, 0, length, s);
175  
176  
    // Indicates if at the beginning of the phase we need to follow the suffix link of the current node 
177  
    //and then walk down the tree using the skip and count trick.
178  
    boolean needWalk = true;
179  
180  
    for (int i=0, j=1; i<length-1; ++i){
181  
      char nc = s.charAt(i+1);
182  
      while (j <= i + 1){
183  
        if (needWalk){
184  
          if (c.suffixLink == null && c.parent != null) c = c.parent;
185  
          c = (c.suffixLink == null ? root : c.suffixLink);
186  
          c = walkDown(c, j, i, s);
187  
        }
188  
189  
        needWalk = true;
190  
        // Here c == the highest node below s[j...i] and we will add char s[i+1]
191  
        int m = i - j + 1; // Length of the string s[j..i].
192  
        if (m == c.depth()){
193  
          // String s[j...i] ends exactly at node c (explicit node).
194  
          addSuffixLink(c);
195  
          if (c.sons.containsKey(nc)){
196  
            c = c.sons.get(nc);
197  
            needWalk = false;
198  
            break;
199  
          }else{
200  
            Node leaf = newNode();
201  
            c.link(leaf, i+1, length, s);
202  
          }
203  
        }else{
204  
          // String s[j...i] ends at some place in the edge that reaches node c.
205  
          int where = c.edgeStart + m - c.parentDepth;
206  
          // The next character in the path after string s[j...i] is s[where]
207  
          if (s.charAt(where) == nc){ //Either rule 3 or rule 1
208  
            addSuffixLink(c);
209  
            if (!c.isLeaf() || j != c.edgeStart - c.parentDepth){
210  
              // Rule 3
211  
              needWalk = false;
212  
              break;
213  
            }
214  
          }else{
215  
            Node split = newNode();
216  
            c.parent.link(split, c.edgeStart, where, s);
217  
            split.link(c, where, c.edgeEnd, s);
218  
            split.link(newNode(), i+1, length, s);
219  
      
220  
            addSuffixLink(split);
221  
      
222  
            if (split.depth() == 1){
223  
              //The suffix link is the root because we remove the only character and end with an empty string.
224  
              split.suffixLink = root;
225  
            }else{
226  
              needSuffix = split;
227  
            }
228  
            c = split;
229  
          }
230  
        }
231  
        j++;
232  
      }
233  
    }
234  
  }
235  
}

download  show line numbers  debug dex  old transpilations   

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

No comments. add comment

Snippet ID: #1029272
Snippet name: Ukkonen's algorithm Spike [dev.]
Eternal ID of this version: #1029272/6
Text MD5: 3c18b2dee87246b626c4737a8994c5f2
Transpilation MD5: 3cb32035074f86647e5e25299e1a79a6
Author: stefan
Category: javax / suffix trees
Type: JavaX source code (desktop)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-07-28 13:26:35
Source code size: 6265 bytes / 235 lines
Pitched / IR pitched: No / No
Views / Downloads: 269 / 918
Version history: 5 change(s)
Referenced in: [show references]