Uses 911K of libraries. Click here for Pure Java version (3756L/20K).
1 | !7 |
2 | |
3 | // adapted from https://github.com/eugenp/tutorials/tree/master/algorithms-searching/src/main/java/com/baeldung/algorithms/suffixtree |
4 | |
5 | cmodule SuffixTreeSpike > DynPrintLog {
|
6 | switchable S text = "hello world what is up in the world"; |
7 | switchable S pattern = "world"; |
8 | transient SuffixTree tree; |
9 | |
10 | sclass Node {
|
11 | S text; |
12 | new L<Node> children; |
13 | int position; |
14 | |
15 | *(S *text, int *position) {}
|
16 | |
17 | S getText() { ret text; }
|
18 | void setText(S text) { this.text = text; }
|
19 | |
20 | int getPosition() { ret position; }
|
21 | void setPosition(int position) { this.position = position; }
|
22 | L<Node> getChildren() { ret children; }
|
23 | void setChildren(L<Node> children) { this.children = children; }
|
24 | |
25 | S printTree(String depthIndicator) {
|
26 | S str = ""; |
27 | S positionStr = position > -1 ? "[" + position + "]" : ""; |
28 | str += depthIndicator + text + positionStr + "\n"; |
29 | |
30 | for (int i = 0; i < children.size(); i++) |
31 | str += children.get(i).printTree(depthIndicator + "\t"); |
32 | return str; |
33 | } |
34 | |
35 | toString { ret printTree(""); }
|
36 | } |
37 | |
38 | sclass SuffixTree {
|
39 | private static final String WORD_TERMINATION = "$"; |
40 | private static final int POSITION_UNDEFINED = -1; |
41 | Node root; |
42 | S fullText; |
43 | |
44 | *(S text) {
|
45 | root = new Node("", POSITION_UNDEFINED);
|
46 | for (int i = 0; i < text.length(); i++) {
|
47 | addSuffix(text.substring(i) + WORD_TERMINATION, i); |
48 | } |
49 | fullText = text; |
50 | } |
51 | |
52 | public LS searchText(S pattern) {
|
53 | //LOGGER.info("Searching for pattern \"{}\"", pattern);
|
54 | List<String> result = new ArrayList<>(); |
55 | List<Node> nodes = getAllNodesInTraversePath(pattern, root, false); |
56 | |
57 | if (nodes.size() > 0) {
|
58 | Node lastNode = nodes.get(nodes.size() - 1); |
59 | if (lastNode != null) {
|
60 | List<Integer> positions = getPositions(lastNode); |
61 | /*positions = positions.stream() |
62 | .sorted() |
63 | .collect(Collectors.toList());*/ |
64 | sortInPlace(positions); |
65 | positions.forEach(m -> result.add((markPatternInText(m, pattern)))); |
66 | } |
67 | } |
68 | return result; |
69 | } |
70 | |
71 | private void addSuffix(String suffix, int position) {
|
72 | //LOGGER.info(">>>>>>>>>>>> Adding new suffix {}", suffix);
|
73 | List<Node> nodes = getAllNodesInTraversePath(suffix, root, true); |
74 | if (nodes.size() == 0) {
|
75 | addChildNode(root, suffix, position); |
76 | //LOGGER.info("{}", printTree());
|
77 | } else {
|
78 | Node lastNode = nodes.remove(nodes.size() - 1); |
79 | String newText = suffix; |
80 | if (nodes.size() > 0) {
|
81 | String existingSuffixUptoLastNode = nodes.stream() |
82 | .map(a -> a.getText()) |
83 | .reduce("", String::concat);
|
84 | |
85 | // Remove prefix from newText already included in parent |
86 | newText = newText.substring(existingSuffixUptoLastNode.length()); |
87 | } |
88 | extendNode(lastNode, newText, position); |
89 | //LOGGER.info("{}", printTree());
|
90 | } |
91 | } |
92 | |
93 | private List<Integer> getPositions(Node node) {
|
94 | List<Integer> positions = new ArrayList<>(); |
95 | if (node.getText() |
96 | .endsWith(WORD_TERMINATION)) {
|
97 | positions.add(node.getPosition()); |
98 | } |
99 | for (int i = 0; i < node.getChildren() |
100 | .size(); i++) {
|
101 | positions.addAll(getPositions(node.getChildren() |
102 | .get(i))); |
103 | } |
104 | return positions; |
105 | } |
106 | |
107 | private String markPatternInText(Integer startPosition, String pattern) {
|
108 | String matchingTextLHS = fullText.substring(0, startPosition); |
109 | String matchingText = fullText.substring(startPosition, startPosition + pattern.length()); |
110 | String matchingTextRHS = fullText.substring(startPosition + pattern.length()); |
111 | return matchingTextLHS + "[" + matchingText + "]" + matchingTextRHS; |
112 | } |
113 | |
114 | private void addChildNode(Node parentNode, String text, int position) {
|
115 | parentNode.getChildren() |
116 | .add(new Node(text, position)); |
117 | } |
118 | |
119 | private void extendNode(Node node, String newText, int position) {
|
120 | String currentText = node.getText(); |
121 | String commonPrefix = getLongestCommonPrefix(currentText, newText); |
122 | |
123 | if (commonPrefix != currentText) {
|
124 | String parentText = currentText.substring(0, commonPrefix.length()); |
125 | String childText = currentText.substring(commonPrefix.length()); |
126 | splitNodeToParentAndChild(node, parentText, childText); |
127 | } |
128 | |
129 | String remainingText = newText.substring(commonPrefix.length()); |
130 | addChildNode(node, remainingText, position); |
131 | } |
132 | |
133 | private void splitNodeToParentAndChild(Node parentNode, String parentNewText, String childNewText) {
|
134 | Node childNode = new Node(childNewText, parentNode.getPosition()); |
135 | |
136 | if (parentNode.getChildren() |
137 | .size() > 0) {
|
138 | while (parentNode.getChildren() |
139 | .size() > 0) {
|
140 | childNode.getChildren() |
141 | .add(parentNode.getChildren() |
142 | .remove(0)); |
143 | } |
144 | } |
145 | |
146 | parentNode.getChildren() |
147 | .add(childNode); |
148 | parentNode.setText(parentNewText); |
149 | parentNode.setPosition(POSITION_UNDEFINED); |
150 | } |
151 | |
152 | private String getLongestCommonPrefix(String str1, String str2) {
|
153 | int compareLength = Math.min(str1.length(), str2.length()); |
154 | for (int i = 0; i < compareLength; i++) {
|
155 | if (str1.charAt(i) != str2.charAt(i)) {
|
156 | return str1.substring(0, i); |
157 | } |
158 | } |
159 | return str1.substring(0, compareLength); |
160 | } |
161 | |
162 | private List<Node> getAllNodesInTraversePath(String pattern, Node startNode, boolean isAllowPartialMatch) {
|
163 | List<Node> nodes = new ArrayList<>(); |
164 | for (int i = 0; i < startNode.getChildren() |
165 | .size(); i++) {
|
166 | Node currentNode = startNode.getChildren() |
167 | .get(i); |
168 | String nodeText = currentNode.getText(); |
169 | if (pattern.charAt(0) == nodeText.charAt(0)) {
|
170 | if (isAllowPartialMatch && pattern.length() <= nodeText.length()) {
|
171 | nodes.add(currentNode); |
172 | return nodes; |
173 | } |
174 | |
175 | int compareLength = Math.min(nodeText.length(), pattern.length()); |
176 | for (int j = 1; j < compareLength; j++) {
|
177 | if (pattern.charAt(j) != nodeText.charAt(j)) {
|
178 | if (isAllowPartialMatch) {
|
179 | nodes.add(currentNode); |
180 | } |
181 | return nodes; |
182 | } |
183 | } |
184 | |
185 | nodes.add(currentNode); |
186 | if (pattern.length() > compareLength) {
|
187 | List<Node> nodes2 = getAllNodesInTraversePath(pattern.substring(compareLength), currentNode, isAllowPartialMatch); |
188 | if (nodes2.size() > 0) {
|
189 | nodes.addAll(nodes2); |
190 | } else if (!isAllowPartialMatch) {
|
191 | nodes.add(null); |
192 | } |
193 | } |
194 | return nodes; |
195 | } |
196 | } |
197 | return nodes; |
198 | } |
199 | |
200 | S printTree() { ret root.printTree(""); }
|
201 | } |
202 | |
203 | start-thread {
|
204 | tree = new SuffixTree(text); |
205 | print(tree.printTree()); |
206 | print("Estimated tree size: " + nBytes(guessObjectSize(tree)));
|
207 | print(+text); |
208 | print(+pattern); |
209 | pnl(tree.searchText(pattern)); |
210 | } |
211 | } |
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
| Snippet ID: | #1028128 |
| Snippet name: | Suffix Tree Spike v1 [seems OK] |
| Eternal ID of this version: | #1028128/7 |
| Text MD5: | 203020476937643ad2d171e987647b5e |
| Transpilation MD5: | 3c6e660a3ace1f8d2035233f68cf00b8 |
| Author: | stefan |
| Category: | javax / text searching |
| Type: | JavaX source code (Dynamic Module) |
| Public (visible to everyone): | Yes |
| Archived (hidden from active list): | No |
| Created/modified: | 2020-05-23 12:01:48 |
| Source code size: | 7805 bytes / 211 lines |
| Pitched / IR pitched: | No / No |
| Views / Downloads: | 460 / 689 |
| Version history: | 6 change(s) |
| Referenced in: | [show references] |