Libraryless. Click here for Pure Java version (3312L/21K).
sclass SuffixTree { Node root; S fullText; sclass Node { Substring text; Map<Int, Node> children; // key is first character or -1 for end of string IntBuffer positions; *() {} *(Substring *text) {} void addPosition(int i) { if (positions == null) positions = new IntBuffer; positions.add(i); } void addChild(Node n) { if (children == null) children = new Map; children.put(charToIntOrMinus1(n.text), n); } Node getChild(int c) { ret mapGet(children, c); } } *() {} *(S *fullText) { root = new Node(Substring(fullText, 0, 0)); for i over fullText: addSuffix(Substring(fullText, i), i); } void addSuffix(Substring s, int position) { Node node = root; while (!empty(s)) { int _n = lCommonPrefix_CharSequence(node.text, s); s = s.substring(_n); if (_n >= l(node.text)) { // node text exhausted if (empty(s)) { node.addPosition(position); ret; } else { Node n = node.getChild(charToIntOrMinus1(s)); if (n == null) { n = new Node(s); n.addPosition(position); node.addChild(n); ret; } else node = n; } } else { // node text not exhausted // split node. first, move all the node's vitals to a new node nOld Node nOld = new Node(node.text.substring(_n)); nOld.positions = node.positions; node.positions = null; nOld.children = node.children; node.children = null; node.text = node.text.substring(0, _n); node.addChild(nOld); // now add a new node Node nNew = new Node(s); nNew.addPosition(position); node.addChild(nNew); ret; } } } public L<Int> indicesOf(S pattern) { new IntBuffer out; getIndicesOf(out, root, pattern, 0); ret out.asVirtualList(); } void getIndicesOf(IntBuffer out, Node node, S pattern, int iPattern) { int lPattern = l(pattern); while true { int n = lCommonPrefix_CharSequence(node.text, Substring(pattern, iPattern)); iPattern += n; if (iPattern >= lPattern) break; if (n < l(node.text)) break; Node child = node.getChild(charAtAsIntOrMinus1(pattern, iPattern)); if (child != null) node = child; } collectPositions(out, node); } L<Int> getPositions(Node node) { new IntBuffer out; collectPositions(out, node); ret out.asVirtualList(); } void collectPositions(IntBuffer out, Node node) { if (node == null) ret; out.addAll(node.positions); fOr (Node n : values(node.children)) collectPositions(out, n); } void printMe() { printNode("", "", root); } void printNode(S indent, S pre, Node node) { print(indent + pre + quote(shorten(20, node.text)) + (empty(node.positions) ? "" : " " + node.positions)); fOr (int c, Node n : node.children) printNode(indent + " ", "[" + (c < 0 ? "end" : quote((char) c)) + "] ", n); } }
Began life as a copy of #1029202
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1029210 |
Snippet name: | SuffixTree [OK, backup before optimization] |
Eternal ID of this version: | #1029210/1 |
Text MD5: | ddcf61d36ee3e6788d7ed71f2e696e39 |
Transpilation MD5: | 060b156144ed667832707870a23f1888 |
Author: | stefan |
Category: | javax / text searching |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-07-25 13:23:27 |
Source code size: | 3211 bytes / 116 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 186 / 251 |
Referenced in: | [show references] |