Uses 911K of libraries. Click here for Pure Java version (3756L/20K).
1 | !7 |
2 | |
3 | // adapted from https://github.com/eugenp/tutorials/tree/master/algorithms-searching/src/main/java/com/baeldung/algorithms/suffixtree |
4 | |
5 | cmodule SuffixTreeSpike > DynPrintLog { |
6 | switchable S text = "hello world what is up in the world"; |
7 | switchable S pattern = "world"; |
8 | transient SuffixTree tree; |
9 | |
10 | sclass Node { |
11 | S text; |
12 | new L<Node> children; |
13 | int position; |
14 | |
15 | *(S *text, int *position) {} |
16 | |
17 | S getText() { ret text; } |
18 | void setText(S text) { this.text = text; } |
19 | |
20 | int getPosition() { ret position; } |
21 | void setPosition(int position) { this.position = position; } |
22 | L<Node> getChildren() { ret children; } |
23 | void setChildren(L<Node> children) { this.children = children; } |
24 | |
25 | S printTree(String depthIndicator) { |
26 | S str = ""; |
27 | S positionStr = position > -1 ? "[" + position + "]" : ""; |
28 | str += depthIndicator + text + positionStr + "\n"; |
29 | |
30 | for (int i = 0; i < children.size(); i++) |
31 | str += children.get(i).printTree(depthIndicator + "\t"); |
32 | return str; |
33 | } |
34 | |
35 | toString { ret printTree(""); } |
36 | } |
37 | |
38 | sclass SuffixTree { |
39 | private static final String WORD_TERMINATION = "$"; |
40 | private static final int POSITION_UNDEFINED = -1; |
41 | Node root; |
42 | S fullText; |
43 | |
44 | *(S text) { |
45 | root = new Node("", POSITION_UNDEFINED); |
46 | for (int i = 0; i < text.length(); i++) { |
47 | addSuffix(text.substring(i) + WORD_TERMINATION, i); |
48 | } |
49 | fullText = text; |
50 | } |
51 | |
52 | public LS searchText(S pattern) { |
53 | //LOGGER.info("Searching for pattern \"{}\"", pattern); |
54 | List<String> result = new ArrayList<>(); |
55 | List<Node> nodes = getAllNodesInTraversePath(pattern, root, false); |
56 | |
57 | if (nodes.size() > 0) { |
58 | Node lastNode = nodes.get(nodes.size() - 1); |
59 | if (lastNode != null) { |
60 | List<Integer> positions = getPositions(lastNode); |
61 | /*positions = positions.stream() |
62 | .sorted() |
63 | .collect(Collectors.toList());*/ |
64 | sortInPlace(positions); |
65 | positions.forEach(m -> result.add((markPatternInText(m, pattern)))); |
66 | } |
67 | } |
68 | return result; |
69 | } |
70 | |
71 | private void addSuffix(String suffix, int position) { |
72 | //LOGGER.info(">>>>>>>>>>>> Adding new suffix {}", suffix); |
73 | List<Node> nodes = getAllNodesInTraversePath(suffix, root, true); |
74 | if (nodes.size() == 0) { |
75 | addChildNode(root, suffix, position); |
76 | //LOGGER.info("{}", printTree()); |
77 | } else { |
78 | Node lastNode = nodes.remove(nodes.size() - 1); |
79 | String newText = suffix; |
80 | if (nodes.size() > 0) { |
81 | String existingSuffixUptoLastNode = nodes.stream() |
82 | .map(a -> a.getText()) |
83 | .reduce("", String::concat); |
84 | |
85 | // Remove prefix from newText already included in parent |
86 | newText = newText.substring(existingSuffixUptoLastNode.length()); |
87 | } |
88 | extendNode(lastNode, newText, position); |
89 | //LOGGER.info("{}", printTree()); |
90 | } |
91 | } |
92 | |
93 | private List<Integer> getPositions(Node node) { |
94 | List<Integer> positions = new ArrayList<>(); |
95 | if (node.getText() |
96 | .endsWith(WORD_TERMINATION)) { |
97 | positions.add(node.getPosition()); |
98 | } |
99 | for (int i = 0; i < node.getChildren() |
100 | .size(); i++) { |
101 | positions.addAll(getPositions(node.getChildren() |
102 | .get(i))); |
103 | } |
104 | return positions; |
105 | } |
106 | |
107 | private String markPatternInText(Integer startPosition, String pattern) { |
108 | String matchingTextLHS = fullText.substring(0, startPosition); |
109 | String matchingText = fullText.substring(startPosition, startPosition + pattern.length()); |
110 | String matchingTextRHS = fullText.substring(startPosition + pattern.length()); |
111 | return matchingTextLHS + "[" + matchingText + "]" + matchingTextRHS; |
112 | } |
113 | |
114 | private void addChildNode(Node parentNode, String text, int position) { |
115 | parentNode.getChildren() |
116 | .add(new Node(text, position)); |
117 | } |
118 | |
119 | private void extendNode(Node node, String newText, int position) { |
120 | String currentText = node.getText(); |
121 | String commonPrefix = getLongestCommonPrefix(currentText, newText); |
122 | |
123 | if (commonPrefix != currentText) { |
124 | String parentText = currentText.substring(0, commonPrefix.length()); |
125 | String childText = currentText.substring(commonPrefix.length()); |
126 | splitNodeToParentAndChild(node, parentText, childText); |
127 | } |
128 | |
129 | String remainingText = newText.substring(commonPrefix.length()); |
130 | addChildNode(node, remainingText, position); |
131 | } |
132 | |
133 | private void splitNodeToParentAndChild(Node parentNode, String parentNewText, String childNewText) { |
134 | Node childNode = new Node(childNewText, parentNode.getPosition()); |
135 | |
136 | if (parentNode.getChildren() |
137 | .size() > 0) { |
138 | while (parentNode.getChildren() |
139 | .size() > 0) { |
140 | childNode.getChildren() |
141 | .add(parentNode.getChildren() |
142 | .remove(0)); |
143 | } |
144 | } |
145 | |
146 | parentNode.getChildren() |
147 | .add(childNode); |
148 | parentNode.setText(parentNewText); |
149 | parentNode.setPosition(POSITION_UNDEFINED); |
150 | } |
151 | |
152 | private String getLongestCommonPrefix(String str1, String str2) { |
153 | int compareLength = Math.min(str1.length(), str2.length()); |
154 | for (int i = 0; i < compareLength; i++) { |
155 | if (str1.charAt(i) != str2.charAt(i)) { |
156 | return str1.substring(0, i); |
157 | } |
158 | } |
159 | return str1.substring(0, compareLength); |
160 | } |
161 | |
162 | private List<Node> getAllNodesInTraversePath(String pattern, Node startNode, boolean isAllowPartialMatch) { |
163 | List<Node> nodes = new ArrayList<>(); |
164 | for (int i = 0; i < startNode.getChildren() |
165 | .size(); i++) { |
166 | Node currentNode = startNode.getChildren() |
167 | .get(i); |
168 | String nodeText = currentNode.getText(); |
169 | if (pattern.charAt(0) == nodeText.charAt(0)) { |
170 | if (isAllowPartialMatch && pattern.length() <= nodeText.length()) { |
171 | nodes.add(currentNode); |
172 | return nodes; |
173 | } |
174 | |
175 | int compareLength = Math.min(nodeText.length(), pattern.length()); |
176 | for (int j = 1; j < compareLength; j++) { |
177 | if (pattern.charAt(j) != nodeText.charAt(j)) { |
178 | if (isAllowPartialMatch) { |
179 | nodes.add(currentNode); |
180 | } |
181 | return nodes; |
182 | } |
183 | } |
184 | |
185 | nodes.add(currentNode); |
186 | if (pattern.length() > compareLength) { |
187 | List<Node> nodes2 = getAllNodesInTraversePath(pattern.substring(compareLength), currentNode, isAllowPartialMatch); |
188 | if (nodes2.size() > 0) { |
189 | nodes.addAll(nodes2); |
190 | } else if (!isAllowPartialMatch) { |
191 | nodes.add(null); |
192 | } |
193 | } |
194 | return nodes; |
195 | } |
196 | } |
197 | return nodes; |
198 | } |
199 | |
200 | S printTree() { ret root.printTree(""); } |
201 | } |
202 | |
203 | start-thread { |
204 | tree = new SuffixTree(text); |
205 | print(tree.printTree()); |
206 | print("Estimated tree size: " + nBytes(guessObjectSize(tree))); |
207 | print(+text); |
208 | print(+pattern); |
209 | pnl(tree.searchText(pattern)); |
210 | } |
211 | } |
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1028128 |
Snippet name: | Suffix Tree Spike v1 [seems OK] |
Eternal ID of this version: | #1028128/7 |
Text MD5: | 203020476937643ad2d171e987647b5e |
Transpilation MD5: | 3c6e660a3ace1f8d2035233f68cf00b8 |
Author: | stefan |
Category: | javax / text searching |
Type: | JavaX source code (Dynamic Module) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-05-23 12:01:48 |
Source code size: | 7805 bytes / 211 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 234 / 416 |
Version history: | 6 change(s) |
Referenced in: | [show references] |