Libraryless. Click here for Pure Java version (3322L/21K).
sclass SuffixTree { Node root; S fullText; sclass Node { Substring text; // we either have children or positions O childrenOrPositions; // Map or IntBuffer // key is first character or -1 for end of string Map<Int, Node> children() { if (childrenOrPositions cast Map) ret childrenOrPositions; null; } IntBuffer positions() { if (childrenOrPositions cast IntBuffer) ret childrenOrPositions; null; } *() {} *(Substring *text) {} void addPosition(int i) { if (children() != null) fail("illegal node conversion to positions"); if (positions() == null) childrenOrPositions = new IntBuffer; positions().add(i); } void addChild(Node n) { if (positions() != null) fail("illegal node conversion to children"); if (children() == null) childrenOrPositions = new Map; children().put(charToIntOrMinus1(n.text), n); } Node getChild(int c) { ret mapGet(children(), c); } } *() {} *(S *fullText) { root = new Node(Substring(fullText, 0, 0)); for i over fullText: addSuffix(Substring(fullText, i), i); } void addSuffix(Substring s, int position) { Node node = root; while (!empty(s)) { int _n = lCommonPrefix_CharSequence(node.text, s); s = s.substring(_n); if (_n >= l(node.text)) { // node text exhausted Node n = node.getChild(charToIntOrMinus1(s)); if (empty(s)) { if (n != null) node = n; node.addPosition(position); ret; } else { if (n == null) { n = new Node(s); n.addPosition(position); node.addChild(n); ret; } else node = n; } } else { // node text not exhausted // split node. first, move all the node's vitals to a new node nOld Node nOld = new Node(node.text.substring(_n)); nOld.childrenOrPositions = node.childrenOrPositions; node.childrenOrPositions = null; node.text = node.text.substring(0, _n); node.addChild(nOld); // now add a new node Node nNew = new Node(s); nNew.addPosition(position); node.addChild(nNew); ret; } } } public L<Int> indicesOf(S pattern) { new IntBuffer out; getIndicesOf(out, root, pattern, 0); ret out.asVirtualList(); } void getIndicesOf(IntBuffer out, Node node, S pattern, int iPattern) { int lPattern = l(pattern); while true { int n = lCommonPrefix_CharSequence(node.text, Substring(pattern, iPattern)); iPattern += n; if (iPattern >= lPattern) break; if (n < l(node.text)) break; Node child = node.getChild(charAtAsIntOrMinus1(pattern, iPattern)); if (child != null) node = child; } collectPositions(out, node); } L<Int> getPositions(Node node) { new IntBuffer out; collectPositions(out, node); ret out.asVirtualList(); } void collectPositions(IntBuffer out, Node node) { if (node == null) ret; out.addAll(node.positions()); fOr (Node n : values(node.children())) collectPositions(out, n); } void printMe() { printNode("", "", root); } void printNode(S indent, S pre, Node node) { print(indent + pre + quote(shorten(20, node.text)) + (empty(node.positions()) ? "" : " " + node.positions())); fOr (int c, Node n : node.children()) printNode(indent + " ", "[" + (c < 0 ? "end" : quote((char) c)) + "] ", n); } }
Began life as a copy of #1029202
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1029211 |
Snippet name: | SuffixTree [optimization attempt, probably not correct] |
Eternal ID of this version: | #1029211/1 |
Text MD5: | 73a27d11df73b993b44c2d39309e308c |
Transpilation MD5: | c440e94d2d5d136562b1754e8b8171b0 |
Author: | stefan |
Category: | javax / text searching |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-07-25 13:32:38 |
Source code size: | 3679 bytes / 126 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 162 / 235 |
Referenced in: | [show references] |