Uses 11335K of libraries. Click here for Pure Java version (18648L/123K).
1 | sclass IntSuffixTree_managed implements IManagedObject, AutoCloseable { |
2 | replace Addr with int. |
3 | |
4 | new ManagedIntObjects_v1 mem; |
5 | Node root; |
6 | L<Int> fullText; |
7 | int nodeCount; |
8 | Cl<AutoCloseable> dependentCloseables; |
9 | int printEvery = 1000000; |
10 | |
11 | Comparator<Node> childComparator = (a, b) -> cmp(a.firstCharOrMinus1(this), b.firstCharOrMinus1(this)); |
12 | |
13 | class Node extends AbstractManagedObject { |
14 | // children is a pointer to an int "array" (length field + ints) |
15 | final static int ofs_from = 0, ofs_to = 1, ofs_children = 2; |
16 | final static int objectSize = ofs_children+1; |
17 | |
18 | // initialize as wrapper for existing object |
19 | *(Addr addr) { super(addr); } |
20 | |
21 | // initialize as new object |
22 | *() { addr = mem.alloc(objectSize); } |
23 | |
24 | // create object from arguments |
25 | *(int from, int to) { |
26 | this(); |
27 | from(from); |
28 | to(to); |
29 | } |
30 | |
31 | *(IntRange r) { this(r.start, r.end); } |
32 | |
33 | // getters + setters |
34 | int from() { ret mem.get(addr+ofs_from); } |
35 | void from(int from) { mem.set(addr+ofs_from, from); } |
36 | int to() { ret mem.get(addr+ofs_to); } |
37 | void to(int to) { mem.set(addr+ofs_to, to); } |
38 | Addr children_ptr() { ret mem.get(addr+ofs_children); } |
39 | L<Node> children() { |
40 | ret mem.objectArray(children_ptr(), addr -> new Node(addr)); |
41 | } |
42 | void children(Addr addr) { mem.set(this.addr+ofs_children, addr); } |
43 | |
44 | // GC handling |
45 | public void scanForCollection(IManagedObjectCollector gc) { |
46 | gc.noteObject(addr, objectSize, this == root ? newAddr -> { addr = newAddr; } : null); |
47 | gc.notePointer(addr+ofs_children); |
48 | gc.notePointerArray(children_ptr()); |
49 | for (Node n : children()) |
50 | n.scanForCollection(gc); |
51 | } |
52 | |
53 | // other methods |
54 | |
55 | L<Int> text(IntSuffixTree_managed tree) { ret subList(tree.fullText, from(), to()); } |
56 | |
57 | int lText() { ret to()-from(); } |
58 | |
59 | bool isTerminal(IntSuffixTree_managed tree) { ret to() == l(tree.fullText); } |
60 | |
61 | void setChildren(IntSuffixTree_managed tree, L<Node> nodes) { |
62 | mem.freePointerArray(children_ptr()); |
63 | children(mem.newPointerArray(l(nodes)); |
64 | for i over nodes: |
65 | children().set(i, nodes.get(i)); |
66 | sortInPlace(children(), tree.childComparator); |
67 | } |
68 | |
69 | void addChild(IntSuffixTree_managed tree, Node n) { |
70 | Addr oldChildren = children_ptr(); |
71 | int i = l(children()); |
72 | //print(children_ptr := children_ptr() + ", l=" + l(children())); |
73 | children(mem.resizePointerArray(oldChildren, i+1); |
74 | //print(children_ptr := children_ptr() + ", l=" + l(children())); |
75 | mem.freePointerArray(oldChildren); |
76 | children().set(i, n); |
77 | sortInPlace(children(), tree.childComparator); |
78 | } |
79 | |
80 | public int firstCharOrMinus1(IntSuffixTree_managed tree) { |
81 | ret firstOrMinus1(text(tree)); |
82 | } |
83 | |
84 | Node getChild(IntSuffixTree_managed tree, int c) { |
85 | L<Node> children = children(); |
86 | int i = generalizedBinarySearch2(children, n -> cmp(n.firstCharOrMinus1(tree), c)); |
87 | ret i >= 0 ? children.get(i) : null; |
88 | } |
89 | |
90 | toString { ret "Node@" + addr + ", " + from() + "/" + lText() + ". " + nChildren(children()); } |
91 | } |
92 | |
93 | *() {} |
94 | *(L<Int> fullText) { |
95 | process(fullText); |
96 | } |
97 | |
98 | // load |
99 | *(IIntMemory mem, int root) { |
100 | this.mem = new ManagedIntObjects_v1_virtual(mem); |
101 | this.root = new Node(root); |
102 | } |
103 | |
104 | void process(L<Int> fullText) { |
105 | this.fullText = fullText; |
106 | root = new Node(0, 0); |
107 | ++nodeCount; |
108 | for i over fullText: { |
109 | ping(); |
110 | addSuffix(IntRange(i, l(fullText))); |
111 | if (((i+1) % printEvery) == 0) { |
112 | print((i+1) + " suffixes added (" + nNodes(nodeCount) + ")"); |
113 | mem.printStats(); |
114 | } |
115 | } |
116 | } |
117 | |
118 | void addSuffix(IntRange s) { |
119 | Node node = root; |
120 | |
121 | while (!empty(s)) { |
122 | int _n = lCommonPrefix_lists(node.text(IntSuffixTree_managed.this), subList(fullText, s)); |
123 | s.start += _n; |
124 | if (_n >= node.lText()) { // node text exhausted |
125 | if (empty(s)) { // pattern also exhausted - done |
126 | //print("Exhausted: " + node.from() + "/" + node.to()); |
127 | // add a new termination node |
128 | Node nNew = new Node(s); |
129 | ++nodeCount; |
130 | node.addChild(IntSuffixTree_managed.this, nNew); |
131 | ret; |
132 | } else { |
133 | Node n = node.getChild(IntSuffixTree_managed.this, firstOrMinus1(subList(fullText, s))); |
134 | if (n == null) { |
135 | n = new Node(s); |
136 | ++nodeCount; |
137 | node.addChild(IntSuffixTree_managed.this, n); |
138 | ret; |
139 | } else |
140 | node = n; |
141 | } |
142 | } else { // node text not exhausted |
143 | // split node. first, move all the node's vitals to a new node nOld |
144 | Node nOld = new Node(node.from()+_n, node.to()); |
145 | ++nodeCount; |
146 | nOld.children(node.children_ptr()); |
147 | node.children(mem.nullPtr()); |
148 | node.to(node.from()+_n); |
149 | node.addChild(IntSuffixTree_managed.this, nOld); |
150 | |
151 | // now add a new node |
152 | Node nNew = new Node(s); |
153 | ++nodeCount; |
154 | node.addChild(IntSuffixTree_managed.this, nNew); |
155 | ret; |
156 | } |
157 | } |
158 | } |
159 | |
160 | public L<Int> indicesOf(L<Int> pattern) { |
161 | ret asList(indicesOf_iterator(pattern)); |
162 | } |
163 | |
164 | public ItIt<Int> indicesOf_iterator(L<Int> pattern) { |
165 | ret mapI_notNull(allNodesUnder(scanDown(root, pattern)), |
166 | nad -> nad.node.isTerminal(this) ? nad.position() : null); |
167 | } |
168 | |
169 | srecord NodeAndDepth(Node node, int depth) { |
170 | int position() { |
171 | int position = node.from()-depth; |
172 | //print("from=" + node.from() + ", to=" + node.to() + ", depth=" + depth + ", position=" + position); |
173 | ret position; |
174 | } |
175 | |
176 | Cl<NodeAndDepth> children() { |
177 | ret lmap wrapChild(node.children()); |
178 | } |
179 | |
180 | NodeAndDepth wrapChild(Node n) { |
181 | ret n == null ? null : new NodeAndDepth(n, depth+node.lText()); |
182 | } |
183 | |
184 | NodeAndDepth getChild(IntSuffixTree_managed tree, int c) { |
185 | ret wrapChild(node.getChild(tree, c)); |
186 | } |
187 | } |
188 | |
189 | NodeAndDepth scanDown(Node node, L<Int> pattern) { |
190 | int lPattern = l(pattern), iPattern = 0; |
191 | NodeAndDepth nad = new(node, 0); |
192 | while true { |
193 | int n = lCommonPrefix_lists(nad.node.text(this), subList(pattern, iPattern)); |
194 | iPattern += n; |
195 | if (iPattern >= lPattern) break; // pattern exhausted - done |
196 | if (n < nad.node.lText()) null; // mismatch, exit |
197 | NodeAndDepth child = nad.getChild(IntSuffixTree_managed.this, or(get(pattern, iPattern), -1)); |
198 | if (child != null) continue with nad = child; |
199 | null; |
200 | } |
201 | |
202 | ret nad; |
203 | } |
204 | |
205 | void printMe() { |
206 | printNode("", "", new NodeAndDepth(root, 0)); |
207 | } |
208 | |
209 | void printNode(S indent, S pre, NodeAndDepth nad) { |
210 | print(indent + pre + takeFirst(20, nad.node.text(this)) + (!nad.node.isTerminal(this) ? "" : " [" + nad.position() + "]")); |
211 | fOr (NodeAndDepth n : nad.children()) { |
212 | printNode(indent + " ", "", n); |
213 | } |
214 | } |
215 | |
216 | ItIt<NodeAndDepth> allNodes() { |
217 | ret allNodesUnder(new NodeAndDepth(root, 0)); |
218 | } |
219 | |
220 | // includes the node itself |
221 | ItIt<NodeAndDepth> allNodesUnder(NodeAndDepth nad) { |
222 | new L<Iterator<NodeAndDepth>> stack; |
223 | if (nad != null) |
224 | stack.add(iteratorLL(nad)); |
225 | ret iteratorFromFunction_if0(() -> { |
226 | while (nempty(stack)) { |
227 | if (!last(stack).hasNext()) |
228 | popLast(stack); |
229 | else { |
230 | NodeAndDepth n = last(stack).next(); |
231 | stack.add((Iterator) iterator(n.children())); |
232 | ret n; |
233 | } |
234 | } |
235 | null; |
236 | }); |
237 | } |
238 | |
239 | public void scanForCollection(IManagedObjectCollector gc) { |
240 | if (root == null) ret; |
241 | root.scanForCollection(gc); |
242 | } |
243 | |
244 | public void close() { |
245 | closeAll(dependentCloseables); |
246 | } |
247 | |
248 | void saveToFile(File f) { |
249 | mem.set(0, root.addr); |
250 | mem.saveToFile(f); |
251 | } |
252 | |
253 | Node newNode(int from, int to) { |
254 | ret new Node(from, to); |
255 | } |
256 | } |
Began life as a copy of #1029230
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1029263 |
Snippet name: | IntSuffixTree_managed [use ints instead of chars] |
Eternal ID of this version: | #1029263/24 |
Text MD5: | eca6c7df63523f3e8af8cd6efd75319d |
Transpilation MD5: | 1b25dcc4276b1188cab6bbc2525bcbcd |
Author: | stefan |
Category: | javax / text searching |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2021-06-23 22:18:03 |
Source code size: | 8096 bytes / 256 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 374 / 731 |
Version history: | 23 change(s) |
Referenced in: | [show references] |