Libraryless. Click here for Pure Java version (14689L/100K).
// A naive suffix tree implementation sclass SuffixTree { Node root; S fullText; int nodeCount; Comparator<IFirstChar> childComparator = (a, b) -> a.firstCharOrMinus1(this)-b.firstCharOrMinus1(this); new DummyNode dummyNode; sinterface IFirstChar { int firstCharOrMinus1(SuffixTree tree); } sclass DummyNode implements IFirstChar { int firstChar; public int firstCharOrMinus1(SuffixTree tree) { ret firstChar; } } sclass Node implements IFirstChar { int from, to; // our range in fullText CompactTreeSet<IFirstChar> children; // actually, <Node> int position = -1; *() {} *(int *from, int *to) {} *(Substring s) { from = s.startIndex(); to = s.endIndex(); } Substring text(SuffixTree tree) { ret Substring(tree.fullText, from, to); } int lText() { ret to-from; } void addPosition(int i) { if (position >= 0) fail("Already have position"); position = i; } void addChild(SuffixTree tree, Node n) { if (children == null) children = makeEmptyChildrenSet(tree); children.add(n); } CompactTreeSet<IFirstChar> makeEmptyChildrenSet(SuffixTree tree) { ret new CompactTreeSet<>(tree.childComparator); } public int firstCharOrMinus1(SuffixTree tree) { ret charToIntOrMinus1(text(tree)); } Node getChild(SuffixTree tree, int c) { if (children == null) null; tree.dummyNode.firstChar = c; ret (Node) children.find(tree.dummyNode); } } *() {} *(S *fullText) { root = new Node(0, 0); ++nodeCount; for i over fullText: { addSuffix(Substring(fullText, i), i); if (((i+1) % 1000000) == 0) print((i+1) + " suffixes added (" + nNodes(nodeCount) + ")"); } } void addSuffix(Substring s, int position) { Node node = root; while (!empty(s)) { int _n = lCommonPrefix_CharSequence(node.text(SuffixTree.this), s); s = s.substring(_n); if (_n >= node.lText()) { // node text exhausted if (empty(s)) { // pattern also exhausted - done node.addPosition(position); ret; } else { Node n = node.getChild(SuffixTree.this, charToIntOrMinus1(s)); if (n == null) { n = new Node(s); ++nodeCount; n.addPosition(position); node.addChild(SuffixTree.this, n); ret; } else node = n; } } else { // node text not exhausted // split node. first, move all the node's vitals to a new node nOld Node nOld = new Node(node.from+_n, node.to); ++nodeCount; nOld.position = node.position; node.position = -1; nOld.children = node.children; node.children = null; node.to = node.from+_n; node.addChild(SuffixTree.this, nOld); // now add a new node Node nNew = new Node(s); ++nodeCount; nNew.addPosition(position); node.addChild(SuffixTree.this, nNew); ret; } } } public L<Int> indicesOf(S pattern) { new IntBuffer out; getIndicesOf(out, root, pattern, 0); ret out.asVirtualList(); } void getIndicesOf(IntBuffer out, Node node, S pattern, int iPattern) { int lPattern = l(pattern); while true { int n = lCommonPrefix_CharSequence(node.text(this), Substring(pattern, iPattern)); iPattern += n; if (iPattern >= lPattern) break; // pattern exhausted - done if (n < node.lText()) ret; // mismatch, exit Node child = node.getChild(SuffixTree.this, charAtAsIntOrMinus1(pattern, iPattern)); if (child != null) continue with node = child; ret; } collectPositions(out, node); } L<Int> getPositions(Node node) { new IntBuffer out; collectPositions(out, node); ret out.asVirtualList(); } void collectPositions(IntBuffer out, Node node) { if (node == null) ret; if (node.position >= 0) out.add(node.position); fOr (IFirstChar n : node.children) collectPositions(out, (Node) n); } void printMe() { printNode("", "", root); } void printNode(S indent, S pre, Node node) { print(indent + pre + quote(shorten(20, node.text(this))) + (node.position < 0 ? "" : " [" + node.position + "]")); fOr (IFirstChar _n : node.children) { Node n = cast _n; printNode(indent + " ", "[" + (n.lText() == 0 ? "end" : quote(n.text(this).charAt(0))) + "] ", n); } } ItIt<Node> allNodes() { new L<Iterator<Node>> stack; stack.add(iteratorLL(root)); ret iteratorFromFunction_if0(() -> { while (nempty(stack)) { if (!last(stack).hasNext()) popLast(stack); else { Node n = last(stack).next(); stack.add((Iterator) iterator(n.children)); ret n; } } null; }); } }
Began life as a copy of #1029202
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1029215 |
Snippet name: | SuffixTree [another optimized backup] |
Eternal ID of this version: | #1029215/1 |
Text MD5: | ceb6bc478b58ff5ea43505d73031fa24 |
Transpilation MD5: | b6b9de0cb0b06c587acbf5284c34efbd |
Author: | stefan |
Category: | javax / text searching |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-07-25 14:39:56 |
Source code size: | 5077 bytes / 171 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 166 / 230 |
Referenced in: | [show references] |