Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

211
LINES

< > BotCompany Repo | #1028128 // Suffix Tree Spike v1 [seems OK]

JavaX source code (Dynamic Module) [tags: use-pretranspiled] - run with: Stefan's OS

Uses 911K of libraries. Click here for Pure Java version (3756L/20K).

1  
!7
2  
3  
// adapted from https://github.com/eugenp/tutorials/tree/master/algorithms-searching/src/main/java/com/baeldung/algorithms/suffixtree
4  
5  
cmodule SuffixTreeSpike > DynPrintLog {
6  
  switchable S text = "hello world what is up in the world";
7  
  switchable S pattern = "world";
8  
  transient SuffixTree tree;
9  
  
10  
  sclass Node {
11  
    S text;
12  
    new L<Node> children;
13  
    int position;
14  
15  
    *(S *text, int *position) {}
16  
17  
    S getText() { ret text; }
18  
    void setText(S text) { this.text = text; }
19  
20  
    int getPosition() { ret position; }
21  
    void setPosition(int position) { this.position = position; }
22  
    L<Node> getChildren() { ret children; }
23  
    void setChildren(L<Node> children) { this.children = children; }
24  
25  
    S printTree(String depthIndicator) {
26  
        S str = "";
27  
        S positionStr = position > -1 ? "[" + position + "]" : "";
28  
        str += depthIndicator + text + positionStr + "\n";
29  
30  
        for (int i = 0; i < children.size(); i++)
31  
          str += children.get(i).printTree(depthIndicator + "\t");
32  
        return str;
33  
    }
34  
35  
    toString { ret printTree(""); }
36  
  }
37  
  
38  
  sclass SuffixTree {
39  
    private static final String WORD_TERMINATION = "$";
40  
    private static final int POSITION_UNDEFINED = -1;
41  
    Node root;
42  
    S fullText;
43  
  
44  
    *(S text) {
45  
        root = new Node("", POSITION_UNDEFINED);
46  
        for (int i = 0; i < text.length(); i++) {
47  
            addSuffix(text.substring(i) + WORD_TERMINATION, i);
48  
        }
49  
        fullText = text;
50  
    }
51  
52  
    public LS searchText(S pattern) {
53  
        //LOGGER.info("Searching for pattern \"{}\"", pattern);
54  
        List<String> result = new ArrayList<>();
55  
        List<Node> nodes = getAllNodesInTraversePath(pattern, root, false);
56  
57  
        if (nodes.size() > 0) {
58  
            Node lastNode = nodes.get(nodes.size() - 1);
59  
            if (lastNode != null) {
60  
                List<Integer> positions = getPositions(lastNode);
61  
                /*positions = positions.stream()
62  
                    .sorted()
63  
                    .collect(Collectors.toList());*/
64  
                sortInPlace(positions);
65  
                positions.forEach(m -> result.add((markPatternInText(m, pattern))));
66  
            }
67  
        }
68  
        return result;
69  
    }
70  
71  
    private void addSuffix(String suffix, int position) {
72  
        //LOGGER.info(">>>>>>>>>>>> Adding new suffix {}", suffix);
73  
        List<Node> nodes = getAllNodesInTraversePath(suffix, root, true);
74  
        if (nodes.size() == 0) {
75  
            addChildNode(root, suffix, position);
76  
            //LOGGER.info("{}", printTree());
77  
        } else {
78  
            Node lastNode = nodes.remove(nodes.size() - 1);
79  
            String newText = suffix;
80  
            if (nodes.size() > 0) {
81  
                String existingSuffixUptoLastNode = nodes.stream()
82  
                    .map(a -> a.getText())
83  
                    .reduce("", String::concat);
84  
85  
                // Remove prefix from newText already included in parent
86  
                newText = newText.substring(existingSuffixUptoLastNode.length());
87  
            }
88  
            extendNode(lastNode, newText, position);
89  
            //LOGGER.info("{}", printTree());
90  
        }
91  
    }
92  
93  
    private List<Integer> getPositions(Node node) {
94  
        List<Integer> positions = new ArrayList<>();
95  
        if (node.getText()
96  
            .endsWith(WORD_TERMINATION)) {
97  
            positions.add(node.getPosition());
98  
        }
99  
        for (int i = 0; i < node.getChildren()
100  
            .size(); i++) {
101  
            positions.addAll(getPositions(node.getChildren()
102  
                .get(i)));
103  
        }
104  
        return positions;
105  
    }
106  
107  
    private String markPatternInText(Integer startPosition, String pattern) {
108  
        String matchingTextLHS = fullText.substring(0, startPosition);
109  
        String matchingText = fullText.substring(startPosition, startPosition + pattern.length());
110  
        String matchingTextRHS = fullText.substring(startPosition + pattern.length());
111  
        return matchingTextLHS + "[" + matchingText + "]" + matchingTextRHS;
112  
    }
113  
114  
    private void addChildNode(Node parentNode, String text, int position) {
115  
        parentNode.getChildren()
116  
            .add(new Node(text, position));
117  
    }
118  
119  
    private void extendNode(Node node, String newText, int position) {
120  
        String currentText = node.getText();
121  
        String commonPrefix = getLongestCommonPrefix(currentText, newText);
122  
123  
        if (commonPrefix != currentText) {
124  
            String parentText = currentText.substring(0, commonPrefix.length());
125  
            String childText = currentText.substring(commonPrefix.length());
126  
            splitNodeToParentAndChild(node, parentText, childText);
127  
        }
128  
129  
        String remainingText = newText.substring(commonPrefix.length());
130  
        addChildNode(node, remainingText, position);
131  
    }
132  
133  
    private void splitNodeToParentAndChild(Node parentNode, String parentNewText, String childNewText) {
134  
        Node childNode = new Node(childNewText, parentNode.getPosition());
135  
136  
        if (parentNode.getChildren()
137  
            .size() > 0) {
138  
            while (parentNode.getChildren()
139  
                .size() > 0) {
140  
                childNode.getChildren()
141  
                    .add(parentNode.getChildren()
142  
                        .remove(0));
143  
            }
144  
        }
145  
146  
        parentNode.getChildren()
147  
            .add(childNode);
148  
        parentNode.setText(parentNewText);
149  
        parentNode.setPosition(POSITION_UNDEFINED);
150  
    }
151  
152  
    private String getLongestCommonPrefix(String str1, String str2) {
153  
        int compareLength = Math.min(str1.length(), str2.length());
154  
        for (int i = 0; i < compareLength; i++) {
155  
            if (str1.charAt(i) != str2.charAt(i)) {
156  
                return str1.substring(0, i);
157  
            }
158  
        }
159  
        return str1.substring(0, compareLength);
160  
    }
161  
162  
    private List<Node> getAllNodesInTraversePath(String pattern, Node startNode, boolean isAllowPartialMatch) {
163  
        List<Node> nodes = new ArrayList<>();
164  
        for (int i = 0; i < startNode.getChildren()
165  
            .size(); i++) {
166  
            Node currentNode = startNode.getChildren()
167  
                .get(i);
168  
            String nodeText = currentNode.getText();
169  
            if (pattern.charAt(0) == nodeText.charAt(0)) {
170  
                if (isAllowPartialMatch && pattern.length() <= nodeText.length()) {
171  
                    nodes.add(currentNode);
172  
                    return nodes;
173  
                }
174  
175  
                int compareLength = Math.min(nodeText.length(), pattern.length());
176  
                for (int j = 1; j < compareLength; j++) {
177  
                    if (pattern.charAt(j) != nodeText.charAt(j)) {
178  
                        if (isAllowPartialMatch) {
179  
                            nodes.add(currentNode);
180  
                        }
181  
                        return nodes;
182  
                    }
183  
                }
184  
185  
                nodes.add(currentNode);
186  
                if (pattern.length() > compareLength) {
187  
                    List<Node> nodes2 = getAllNodesInTraversePath(pattern.substring(compareLength), currentNode, isAllowPartialMatch);
188  
                    if (nodes2.size() > 0) {
189  
                        nodes.addAll(nodes2);
190  
                    } else if (!isAllowPartialMatch) {
191  
                        nodes.add(null);
192  
                    }
193  
                }
194  
                return nodes;
195  
            }
196  
        }
197  
        return nodes;
198  
    }
199  
200  
    S printTree() { ret root.printTree(""); }
201  
  }
202  
203  
  start-thread {
204  
    tree = new SuffixTree(text);
205  
    print(tree.printTree());
206  
    print("Estimated tree size: " + nBytes(guessObjectSize(tree)));
207  
    print(+text);
208  
    print(+pattern);
209  
    pnl(tree.searchText(pattern));
210  
  }
211  
}

download  show line numbers  debug dex  old transpilations   

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

No comments. add comment

Snippet ID: #1028128
Snippet name: Suffix Tree Spike v1 [seems OK]
Eternal ID of this version: #1028128/7
Text MD5: 203020476937643ad2d171e987647b5e
Transpilation MD5: 3c6e660a3ace1f8d2035233f68cf00b8
Author: stefan
Category: javax / text searching
Type: JavaX source code (Dynamic Module)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-05-23 12:01:48
Source code size: 7805 bytes / 211 lines
Pitched / IR pitched: No / No
Views / Downloads: 234 / 416
Version history: 6 change(s)
Referenced in: [show references]