// A naive suffix tree implementation sclass IntSuffixTree { Node root; L fullText; int nodeCount; Comparator childComparator = (a, b) -> a.firstCharOrMinus1(this)-b.firstCharOrMinus1(this); new DummyNode dummyNode; int printEvery = 1000000; sinterface IFirstChar { int firstCharOrMinus1(IntSuffixTree tree); } sclass DummyNode implements IFirstChar { int firstChar; public int firstCharOrMinus1(IntSuffixTree tree) { ret firstChar; } } sclass Node implements IFirstChar { int from, to; // our range in fullText CompactTreeSet children; *() {} *(int *from, int *to) {} *(IntRange s) { from = s.start; to = s.end; } L text(IntSuffixTree tree) { ret subList(tree.fullText, from, to); } int lText() { ret to-from; } bool isTerminal(IntSuffixTree tree) { ret to == l(tree.fullText); } void addChild(IntSuffixTree tree, Node n) { if (children == null) children = makeEmptyChildrenSet(tree); children.add(n); } CompactTreeSet makeEmptyChildrenSet(IntSuffixTree tree) { ret new CompactTreeSet<>(tree.childComparator); } Cl children() { ret children; } public int firstCharOrMinus1(IntSuffixTree tree) { ret getOrMinus1(tree.fullText, from); } Node getChild(IntSuffixTree tree, int c) { if (children == null) null; tree.dummyNode.firstChar = c; ret (Node) children.find(tree.dummyNode); } } *() {} *(L fullText) { process(fullText); } void process(L fullText) { root = new Node(0, 0); ++nodeCount; for i over fullText: { ping(); addSuffix(IntRange(i, l(fullText))); if (((i+1) % printEvery) == 0) print((i+1) + "/" + l(fullText ) + " suffixes added (" + nNodes(nodeCount) + ")"); } } void addSuffix(IntRange s) { Node node = root; while (!empty(s)) { int _n = lCommonPrefix_lists_ext(fullText, node.from, fullText, s.start); s.start += _n; if (_n >= node.lText()) { // node text exhausted if (empty(s)) { // pattern also exhausted - done //print("Exhausted: " + node.from + "/" + node.to); // add a new termination node Node nNew = new Node(s); ++nodeCount; node.addChild(IntSuffixTree.this, nNew); ret; } else { Node n = node.getChild(IntSuffixTree.this, getOrMinus1(fullText, s.start)); if (n == null) { n = new Node(s); ++nodeCount; node.addChild(IntSuffixTree.this, n); ret; } else node = n; } } else { // node text not exhausted // split node. first, move all the node's vitals to a new node nOld Node nOld = new Node(node.from+_n, node.to); ++nodeCount; nOld.children = node.children; node.children = null; node.to = node.from+_n; node.addChild(IntSuffixTree.this, nOld); // now add a new node Node nNew = new Node(s); ++nodeCount; node.addChild(IntSuffixTree.this, nNew); ret; } } } public L indicesOf(L pattern) { ret asList(indicesOf_iterator(pattern)); } public ItIt indicesOf_iterator(L pattern) { ret mapI_notNull(allNodesUnder(scanDown(root, pattern)), nad -> nad.node.isTerminal(this) ? nad.position() : null); } srecord NodeAndDepth(Node node, int depth) { int position() { int position = node.from-depth; //print("from=" + node.from + ", to=" + node.to + ", depth=" + depth + ", position=" + position); ret position; } Cl children() { ret lmap wrapChild((Cl) (Cl) node.children()); } NodeAndDepth wrapChild(Node n) { ret n == null ? null : new NodeAndDepth(n, depth+node.lText()); } NodeAndDepth getChild(IntSuffixTree tree, int c) { ret wrapChild(node.getChild(tree, c)); } } NodeAndDepth scanDown(Node node, L pattern) { int lPattern = l(pattern), iPattern = 0; NodeAndDepth nad = new(node, 0); while true { int n = lCommonPrefix_lists(nad.node.text(this), subList(pattern, iPattern)); iPattern += n; if (iPattern >= lPattern) break; // pattern exhausted - done if (n < nad.node.lText()) null; // mismatch, exit NodeAndDepth child = nad.getChild(IntSuffixTree.this, or(get(pattern, iPattern), -1)); if (child != null) continue with nad = child; null; } ret nad; } void printMe() { printNode("", "", new NodeAndDepth(root, 0)); } void printNode(S indent, S pre, NodeAndDepth nad) { print(indent + pre + takeFirst(20, nad.node.text(this)) + (!nad.node.isTerminal(this) ? "" : " [" + nad.position() + "]")); fOr (NodeAndDepth n : nad.children()) { printNode(indent + " ", takeFirst(1, n.node.text(this)) + " ", n); } } ItIt allNodes() { ret allNodesUnder(new NodeAndDepth(root, 0)); } // includes the node itself ItIt allNodesUnder(NodeAndDepth nad) { new L> stack; if (nad != null) stack.add(iteratorLL(nad)); ret iteratorFromFunction_if0(() -> { while (nempty(stack)) { if (!last(stack).hasNext()) popLast(stack); else { NodeAndDepth n = last(stack).next(); stack.add((Iterator) iterator(n.children())); ret n; } } null; }); } }