Uses 11335K of libraries. Click here for Pure Java version (18648L/123K).
sclass IntSuffixTree_managed implements IManagedObject, AutoCloseable { replace Addr with int. new ManagedIntObjects_v1 mem; Node root; L<Int> fullText; int nodeCount; Cl<AutoCloseable> dependentCloseables; int printEvery = 1000000; Comparator<Node> childComparator = (a, b) -> cmp(a.firstCharOrMinus1(this), b.firstCharOrMinus1(this)); class Node extends AbstractManagedObject { // children is a pointer to an int "array" (length field + ints) final static int ofs_from = 0, ofs_to = 1, ofs_children = 2; final static int objectSize = ofs_children+1; // initialize as wrapper for existing object *(Addr addr) { super(addr); } // initialize as new object *() { addr = mem.alloc(objectSize); } // create object from arguments *(int from, int to) { this(); from(from); to(to); } *(IntRange r) { this(r.start, r.end); } // getters + setters int from() { ret mem.get(addr+ofs_from); } void from(int from) { mem.set(addr+ofs_from, from); } int to() { ret mem.get(addr+ofs_to); } void to(int to) { mem.set(addr+ofs_to, to); } Addr children_ptr() { ret mem.get(addr+ofs_children); } L<Node> children() { ret mem.objectArray(children_ptr(), addr -> new Node(addr)); } void children(Addr addr) { mem.set(this.addr+ofs_children, addr); } // GC handling public void scanForCollection(IManagedObjectCollector gc) { gc.noteObject(addr, objectSize, this == root ? newAddr -> { addr = newAddr; } : null); gc.notePointer(addr+ofs_children); gc.notePointerArray(children_ptr()); for (Node n : children()) n.scanForCollection(gc); } // other methods L<Int> text(IntSuffixTree_managed tree) { ret subList(tree.fullText, from(), to()); } int lText() { ret to()-from(); } bool isTerminal(IntSuffixTree_managed tree) { ret to() == l(tree.fullText); } void setChildren(IntSuffixTree_managed tree, L<Node> nodes) { mem.freePointerArray(children_ptr()); children(mem.newPointerArray(l(nodes)); for i over nodes: children().set(i, nodes.get(i)); sortInPlace(children(), tree.childComparator); } void addChild(IntSuffixTree_managed tree, Node n) { Addr oldChildren = children_ptr(); int i = l(children()); //print(children_ptr := children_ptr() + ", l=" + l(children())); children(mem.resizePointerArray(oldChildren, i+1); //print(children_ptr := children_ptr() + ", l=" + l(children())); mem.freePointerArray(oldChildren); children().set(i, n); sortInPlace(children(), tree.childComparator); } public int firstCharOrMinus1(IntSuffixTree_managed tree) { ret firstOrMinus1(text(tree)); } Node getChild(IntSuffixTree_managed tree, int c) { L<Node> children = children(); int i = generalizedBinarySearch2(children, n -> cmp(n.firstCharOrMinus1(tree), c)); ret i >= 0 ? children.get(i) : null; } toString { ret "Node@" + addr + ", " + from() + "/" + lText() + ". " + nChildren(children()); } } *() {} *(L<Int> fullText) { process(fullText); } // load *(IIntMemory mem, int root) { this.mem = new ManagedIntObjects_v1_virtual(mem); this.root = new Node(root); } void process(L<Int> fullText) { this.fullText = fullText; root = new Node(0, 0); ++nodeCount; for i over fullText: { ping(); addSuffix(IntRange(i, l(fullText))); if (((i+1) % printEvery) == 0) { print((i+1) + " suffixes added (" + nNodes(nodeCount) + ")"); mem.printStats(); } } } void addSuffix(IntRange s) { Node node = root; while (!empty(s)) { int _n = lCommonPrefix_lists(node.text(IntSuffixTree_managed.this), subList(fullText, s)); s.start += _n; if (_n >= node.lText()) { // node text exhausted if (empty(s)) { // pattern also exhausted - done //print("Exhausted: " + node.from() + "/" + node.to()); // add a new termination node Node nNew = new Node(s); ++nodeCount; node.addChild(IntSuffixTree_managed.this, nNew); ret; } else { Node n = node.getChild(IntSuffixTree_managed.this, firstOrMinus1(subList(fullText, s))); if (n == null) { n = new Node(s); ++nodeCount; node.addChild(IntSuffixTree_managed.this, n); ret; } else node = n; } } else { // node text not exhausted // split node. first, move all the node's vitals to a new node nOld Node nOld = new Node(node.from()+_n, node.to()); ++nodeCount; nOld.children(node.children_ptr()); node.children(mem.nullPtr()); node.to(node.from()+_n); node.addChild(IntSuffixTree_managed.this, nOld); // now add a new node Node nNew = new Node(s); ++nodeCount; node.addChild(IntSuffixTree_managed.this, nNew); ret; } } } public L<Int> indicesOf(L<Int> pattern) { ret asList(indicesOf_iterator(pattern)); } public ItIt<Int> indicesOf_iterator(L<Int> pattern) { ret mapI_notNull(allNodesUnder(scanDown(root, pattern)), nad -> nad.node.isTerminal(this) ? nad.position() : null); } srecord NodeAndDepth(Node node, int depth) { int position() { int position = node.from()-depth; //print("from=" + node.from() + ", to=" + node.to() + ", depth=" + depth + ", position=" + position); ret position; } Cl<NodeAndDepth> children() { ret lmap wrapChild(node.children()); } NodeAndDepth wrapChild(Node n) { ret n == null ? null : new NodeAndDepth(n, depth+node.lText()); } NodeAndDepth getChild(IntSuffixTree_managed tree, int c) { ret wrapChild(node.getChild(tree, c)); } } NodeAndDepth scanDown(Node node, L<Int> pattern) { int lPattern = l(pattern), iPattern = 0; NodeAndDepth nad = new(node, 0); while true { int n = lCommonPrefix_lists(nad.node.text(this), subList(pattern, iPattern)); iPattern += n; if (iPattern >= lPattern) break; // pattern exhausted - done if (n < nad.node.lText()) null; // mismatch, exit NodeAndDepth child = nad.getChild(IntSuffixTree_managed.this, or(get(pattern, iPattern), -1)); if (child != null) continue with nad = child; null; } ret nad; } void printMe() { printNode("", "", new NodeAndDepth(root, 0)); } void printNode(S indent, S pre, NodeAndDepth nad) { print(indent + pre + takeFirst(20, nad.node.text(this)) + (!nad.node.isTerminal(this) ? "" : " [" + nad.position() + "]")); fOr (NodeAndDepth n : nad.children()) { printNode(indent + " ", "", n); } } ItIt<NodeAndDepth> allNodes() { ret allNodesUnder(new NodeAndDepth(root, 0)); } // includes the node itself ItIt<NodeAndDepth> allNodesUnder(NodeAndDepth nad) { new L<Iterator<NodeAndDepth>> stack; if (nad != null) stack.add(iteratorLL(nad)); ret iteratorFromFunction_if0(() -> { while (nempty(stack)) { if (!last(stack).hasNext()) popLast(stack); else { NodeAndDepth n = last(stack).next(); stack.add((Iterator) iterator(n.children())); ret n; } } null; }); } public void scanForCollection(IManagedObjectCollector gc) { if (root == null) ret; root.scanForCollection(gc); } public void close() { closeAll(dependentCloseables); } void saveToFile(File f) { mem.set(0, root.addr); mem.saveToFile(f); } Node newNode(int from, int to) { ret new Node(from, to); } }
Began life as a copy of #1029230
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1029263 |
Snippet name: | IntSuffixTree_managed [use ints instead of chars] |
Eternal ID of this version: | #1029263/24 |
Text MD5: | eca6c7df63523f3e8af8cd6efd75319d |
Transpilation MD5: | 1b25dcc4276b1188cab6bbc2525bcbcd |
Author: | stefan |
Category: | javax / text searching |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2021-06-23 22:18:03 |
Source code size: | 8096 bytes / 256 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 373 / 730 |
Version history: | 23 change(s) |
Referenced in: | #1034167 - Standard Classes + Interfaces (LIVE, continuation of #1003674) |