Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

205
LINES

< > BotCompany Repo | #1029274 // Ukkonen's algorithm Spike [version by Sergey Makagonov, dev.]

JavaX source code (desktop) [tags: use-pretranspiled] - run with: x30.jar

Download Jar. Libraryless. Click here for Pure Java version (2973L/20K).

1  
// from https://gist.github.com/bicepjai/3355993#file-gistfile1-java-L147
2  
3  
/*
4  
 * Copyright (c) 2016 Sergey Makagonov
5  
 * 
6  
 * Permission is hereby granted, free of charge, to any person obtaining
7  
 * a copy of this software and associated documentation files (the
8  
 * "Software"), to deal in the Software without restriction, including
9  
 * without limitation the rights to use, copy, modify, merge, publish,
10  
 * distribute, sublicense, and/or sell copies of the Software, and to
11  
 * permit persons to whom the Software is furnished to do so, subject to
12  
 * the following conditions:
13  
 * 
14  
 * The above copyright notice and this permission notice shall be
15  
 * included in all copies or substantial portions of the Software.
16  
 * 
17  
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18  
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19  
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20  
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21  
 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22  
 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23  
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24  
 * 
25  
 */
26  
 
27  
do not include class SuffixTree.
28  
 
29  
p { new ST; }
30  
31  
sclass ST {
32  
    BufferedReader in;
33  
    PrintWriter out;
34  
35  
    class SuffixTree {
36  
        final int oo = Integer.MAX_VALUE/2;
37  
        Node [] nodes;
38  
        char [] text;
39  
        int root, position = -1,
40  
                currentNode,
41  
                needSuffixLink,
42  
                remainder;
43  
44  
        int active_node, active_length, active_edge;
45  
46  
        class Node {
47  
48  
            /*
49  
               There is no need to create an "Edge" class.
50  
               Information about the edge is stored right in the node.
51  
               [start; end) interval specifies the edge,
52  
               by which the node is connected to its parent node.
53  
            */
54  
55  
            int start, end = oo, link;
56  
            public TreeMap<Character, Integer> next = new TreeMap<Character, Integer>();
57  
58  
            public Node(int start, int end) {
59  
                this.start = start;
60  
                this.end = end;
61  
            }
62  
63  
            public int edgeLength() {
64  
                return Math.min(end, position + 1) - start;
65  
            }
66  
        }
67  
68  
        public SuffixTree(int length) {
69  
            nodes = new Node[2* length + 2];
70  
            text = new char[length];
71  
            root = active_node = newNode(-1, -1);
72  
        }
73  
74  
        private void addSuffixLink(int node) {
75  
            if (needSuffixLink > 0)
76  
                nodes[needSuffixLink].link = node;
77  
            needSuffixLink = node;
78  
        }
79  
80  
        char active_edge() {
81  
            return text[active_edge];
82  
        }
83  
84  
        boolean walkDown(int next) {
85  
            if (active_length >= nodes[next].edgeLength()) {
86  
                active_edge += nodes[next].edgeLength();
87  
                active_length -= nodes[next].edgeLength();
88  
                active_node = next;
89  
                return true;
90  
            }
91  
            return false;
92  
        }
93  
94  
        int newNode(int start, int end) {
95  
            nodes[++currentNode] = new Node(start, end);
96  
            return currentNode;
97  
        }
98  
99  
        public void addChar(char c) throws Exception {
100  
            text[++position] = c;
101  
            needSuffixLink = -1;
102  
            remainder++;
103  
            while(remainder > 0) {
104  
                if (active_length == 0) active_edge = position;
105  
                if (!nodes[active_node].next.containsKey(active_edge())){
106  
                    int leaf = newNode(position, oo);
107  
                    nodes[active_node].next.put(active_edge(), leaf);
108  
                    addSuffixLink(active_node); //rule 2
109  
                } else {
110  
                    int next = nodes[active_node].next.get(active_edge());
111  
                    if (walkDown(next)) continue;   //observation 2
112  
                    if (text[nodes[next].start + active_length] == c) { //observation 1
113  
                        active_length++;
114  
                        addSuffixLink(active_node); // observation 3
115  
                        break;
116  
                    }
117  
                    int split = newNode(nodes[next].start, nodes[next].start + active_length);
118  
                    nodes[active_node].next.put(active_edge(), split);
119  
                    int leaf = newNode(position, oo);
120  
                    nodes[split].next.put(c, leaf);
121  
                    nodes[next].start += active_length;
122  
                    nodes[split].next.put(text[nodes[next].start], next);
123  
                    addSuffixLink(split); //rule 2
124  
                }
125  
                remainder--;
126  
127  
                if (active_node == root && active_length > 0) {  //rule 1
128  
                    active_length--;
129  
                    active_edge = position - remainder + 1;
130  
                } else
131  
                    active_node = nodes[active_node].link > 0 ? nodes[active_node].link : root; //rule 3
132  
            }
133  
        }
134  
135  
        /*
136  
            printing the Suffix Tree in a format understandable by graphviz. The output is written into
137  
            st.dot file. In order to see the suffix tree as a PNG image, run the following command:
138  
            dot -Tpng -O st.dot
139  
         */
140  
141  
        String edgeString(int node) {
142  
            return new String(Arrays.copyOfRange(text, nodes[node].start, Math.min(position + 1, nodes[node].end)));
143  
        }
144  
145  
        void printTree() {
146  
            out.println("digraph {");
147  
            out.println("\trankdir = LR;");
148  
            out.println("\tedge [arrowsize=0.4,fontsize=10]");
149  
            out.println("\tnode1 [label=\"\",style=filled,fillcolor=lightgrey,shape=circle,width=.1,height=.1];");
150  
            out.println("//------leaves------");
151  
            printLeaves(root);
152  
            out.println("//------internal nodes------");
153  
            printInternalNodes(root);
154  
            out.println("//------edges------");
155  
            printEdges(root);
156  
            out.println("//------suffix links------");
157  
            printSLinks(root);
158  
            out.println("}");
159  
        }
160  
161  
        void printLeaves(int x) {
162  
            if (nodes[x].next.size() == 0)
163  
                out.println("\tnode"+x+" [label=\"\",shape=point]");
164  
            else {
165  
                for (int child : nodes[x].next.values())
166  
                    printLeaves(child);
167  
            }
168  
        }
169  
170  
        void printInternalNodes(int x) {
171  
            if (x != root && nodes[x].next.size() > 0)
172  
                out.println("\tnode"+x+" [label=\"\",style=filled,fillcolor=lightgrey,shape=circle,width=.07,height=.07]");
173  
174  
            for (int child : nodes[x].next.values())
175  
                printInternalNodes(child);
176  
        }
177  
178  
        void printEdges(int x) {
179  
            for (int child : nodes[x].next.values()) {
180  
                out.println("\tnode"+x+" -> node"+child+" [label=\""+edgeString(child)+"\",weight=3]");
181  
                printEdges(child);
182  
            }
183  
        }
184  
185  
        void printSLinks(int x) {
186  
            if (nodes[x].link > 0)
187  
                out.println("\tnode"+x+" -> node"+nodes[x].link+" [label=\"\",weight=1,style=dotted]");
188  
            for (int child : nodes[x].next.values())
189  
                printSLinks(child);
190  
        }
191  
    }
192  
193  
    public ST() throws Exception {
194  
        in = new BufferedReader(new StringReader("abc hello abc"));
195  
        File f = prepareProgramFile("st.dot");
196  
        out = new PrintWriter(new FileWriter(f));
197  
        String line = in.readLine();
198  
        SuffixTree st = new SuffixTree(line.length());
199  
        for (int i = 0; i < line.length(); i++)
200  
            st.addChar(line.charAt(i));
201  
        st.printTree();
202  
        out.close();
203  
        printFileInfo(f);
204  
    }
205  
}

Author comment

Began life as a copy of #1029272

download  show line numbers  debug dex  old transpilations   

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

No comments. add comment

Snippet ID: #1029274
Snippet name: Ukkonen's algorithm Spike [version by Sergey Makagonov, dev.]
Eternal ID of this version: #1029274/4
Text MD5: 9c07947a326fe8fe3edca550b149be58
Transpilation MD5: fd34127d685764ccc3143e96d62b7d65
Author: stefan
Category: javax / suffix trees
Type: JavaX source code (desktop)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-07-29 19:44:05
Source code size: 7849 bytes / 205 lines
Pitched / IR pitched: No / No
Views / Downloads: 303 / 928
Version history: 3 change(s)
Referenced in: [show references]