Libraryless. Click here for Pure Java version (3322L/21K).
1 | sclass SuffixTree { |
2 | Node root; |
3 | S fullText; |
4 | |
5 | sclass Node { |
6 | Substring text; |
7 | // we either have children or positions |
8 | O childrenOrPositions; // Map or IntBuffer |
9 | |
10 | // key is first character or -1 for end of string |
11 | Map<Int, Node> children() { |
12 | if (childrenOrPositions cast Map) ret childrenOrPositions; null; |
13 | } |
14 | |
15 | IntBuffer positions() { |
16 | if (childrenOrPositions cast IntBuffer) ret childrenOrPositions; null; |
17 | } |
18 | |
19 | *() {} |
20 | *(Substring *text) {} |
21 | |
22 | void addPosition(int i) { |
23 | if (children() != null) fail("illegal node conversion to positions"); |
24 | if (positions() == null) childrenOrPositions = new IntBuffer; |
25 | positions().add(i); |
26 | } |
27 | |
28 | void addChild(Node n) { |
29 | if (positions() != null) fail("illegal node conversion to children"); |
30 | if (children() == null) childrenOrPositions = new Map; |
31 | children().put(charToIntOrMinus1(n.text), n); |
32 | } |
33 | |
34 | Node getChild(int c) { |
35 | ret mapGet(children(), c); |
36 | } |
37 | } |
38 | |
39 | *() {} |
40 | *(S *fullText) { |
41 | root = new Node(Substring(fullText, 0, 0)); |
42 | for i over fullText: |
43 | addSuffix(Substring(fullText, i), i); |
44 | } |
45 | |
46 | void addSuffix(Substring s, int position) { |
47 | Node node = root; |
48 | |
49 | while (!empty(s)) { |
50 | int _n = lCommonPrefix_CharSequence(node.text, s); |
51 | s = s.substring(_n); |
52 | if (_n >= l(node.text)) { // node text exhausted |
53 | Node n = node.getChild(charToIntOrMinus1(s)); |
54 | if (empty(s)) { |
55 | if (n != null) node = n; |
56 | node.addPosition(position); |
57 | ret; |
58 | } else { |
59 | if (n == null) { |
60 | n = new Node(s); |
61 | n.addPosition(position); |
62 | node.addChild(n); |
63 | ret; |
64 | } else |
65 | node = n; |
66 | } |
67 | } else { // node text not exhausted |
68 | // split node. first, move all the node's vitals to a new node nOld |
69 | Node nOld = new Node(node.text.substring(_n)); |
70 | nOld.childrenOrPositions = node.childrenOrPositions; |
71 | node.childrenOrPositions = null; |
72 | node.text = node.text.substring(0, _n); |
73 | node.addChild(nOld); |
74 | |
75 | // now add a new node |
76 | Node nNew = new Node(s); |
77 | nNew.addPosition(position); |
78 | node.addChild(nNew); |
79 | ret; |
80 | } |
81 | } |
82 | } |
83 | |
84 | public L<Int> indicesOf(S pattern) { |
85 | new IntBuffer out; |
86 | getIndicesOf(out, root, pattern, 0); |
87 | ret out.asVirtualList(); |
88 | } |
89 | |
90 | void getIndicesOf(IntBuffer out, Node node, S pattern, int iPattern) { |
91 | int lPattern = l(pattern); |
92 | while true { |
93 | int n = lCommonPrefix_CharSequence(node.text, Substring(pattern, iPattern)); |
94 | iPattern += n; |
95 | if (iPattern >= lPattern) break; |
96 | if (n < l(node.text)) break; |
97 | Node child = node.getChild(charAtAsIntOrMinus1(pattern, iPattern)); |
98 | if (child != null) node = child; |
99 | } |
100 | |
101 | collectPositions(out, node); |
102 | } |
103 | |
104 | L<Int> getPositions(Node node) { |
105 | new IntBuffer out; |
106 | collectPositions(out, node); |
107 | ret out.asVirtualList(); |
108 | } |
109 | |
110 | void collectPositions(IntBuffer out, Node node) { |
111 | if (node == null) ret; |
112 | out.addAll(node.positions()); |
113 | fOr (Node n : values(node.children())) |
114 | collectPositions(out, n); |
115 | } |
116 | |
117 | void printMe() { |
118 | printNode("", "", root); |
119 | } |
120 | |
121 | void printNode(S indent, S pre, Node node) { |
122 | print(indent + pre + quote(shorten(20, node.text)) + (empty(node.positions()) ? "" : " " + node.positions())); |
123 | fOr (int c, Node n : node.children()) |
124 | printNode(indent + " ", "[" + (c < 0 ? "end" : quote((char) c)) + "] ", n); |
125 | } |
126 | } |
Began life as a copy of #1029202
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1029211 |
Snippet name: | SuffixTree [optimization attempt, probably not correct] |
Eternal ID of this version: | #1029211/1 |
Text MD5: | 73a27d11df73b993b44c2d39309e308c |
Transpilation MD5: | c440e94d2d5d136562b1754e8b8171b0 |
Author: | stefan |
Category: | javax / text searching |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-07-25 13:32:38 |
Source code size: | 3679 bytes / 126 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 164 / 239 |
Referenced in: | [show references] |