Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

116
LINES

< > BotCompany Repo | #1029210 // SuffixTree [OK, backup before optimization]

JavaX fragment (include) [tags: use-pretranspiled]

Libraryless. Click here for Pure Java version (3312L/21K).

1  
sclass SuffixTree {
2  
  Node root;
3  
  S fullText;
4  
5  
  sclass Node {
6  
    Substring text;
7  
    Map<Int, Node> children; // key is first character or -1 for end of string
8  
    IntBuffer positions;
9  
10  
    *() {}
11  
    *(Substring *text) {}
12  
    
13  
    void addPosition(int i) {
14  
      if (positions == null) positions = new IntBuffer;
15  
      positions.add(i);
16  
    }
17  
    
18  
    void addChild(Node n) {
19  
      if (children == null) children = new Map;
20  
      children.put(charToIntOrMinus1(n.text), n);
21  
    }
22  
    
23  
    Node getChild(int c) {
24  
      ret mapGet(children, c);
25  
    }
26  
  }
27  
28  
  *() {}
29  
  *(S *fullText) {
30  
    root = new Node(Substring(fullText, 0, 0));
31  
    for i over fullText:
32  
      addSuffix(Substring(fullText, i), i);
33  
  }
34  
35  
  void addSuffix(Substring s, int position) {
36  
    Node node = root;
37  
    
38  
    while (!empty(s)) {
39  
      int _n = lCommonPrefix_CharSequence(node.text, s);
40  
      s = s.substring(_n);
41  
      if (_n >= l(node.text)) { // node text exhausted
42  
        if (empty(s)) {
43  
          node.addPosition(position);
44  
          ret;
45  
        } else {
46  
          Node n = node.getChild(charToIntOrMinus1(s));
47  
          if (n == null) {
48  
            n = new Node(s);
49  
            n.addPosition(position);
50  
            node.addChild(n);
51  
            ret;
52  
          } else
53  
            node = n;
54  
        }
55  
      } else { // node text not exhausted
56  
        // split node. first, move all the node's vitals to a new node nOld
57  
        Node nOld = new Node(node.text.substring(_n));
58  
        nOld.positions = node.positions;
59  
        node.positions = null;
60  
        nOld.children = node.children;
61  
        node.children = null;
62  
        node.text = node.text.substring(0, _n);
63  
        node.addChild(nOld);
64  
65  
        // now add a new node
66  
        Node nNew = new Node(s);
67  
        nNew.addPosition(position);
68  
        node.addChild(nNew);
69  
        ret;
70  
      }
71  
    }
72  
  }
73  
74  
  public L<Int> indicesOf(S pattern) {
75  
    new IntBuffer out;
76  
    getIndicesOf(out, root, pattern, 0);
77  
    ret out.asVirtualList();
78  
  }
79  
  
80  
  void getIndicesOf(IntBuffer out, Node node, S pattern, int iPattern) {
81  
    int lPattern = l(pattern);
82  
    while true {
83  
      int n = lCommonPrefix_CharSequence(node.text, Substring(pattern, iPattern));
84  
      iPattern += n;
85  
      if (iPattern >= lPattern) break;
86  
      if (n < l(node.text)) break;
87  
      Node child = node.getChild(charAtAsIntOrMinus1(pattern, iPattern));
88  
      if (child != null) node = child;
89  
    }
90  
    
91  
    collectPositions(out, node);
92  
  }
93  
  
94  
  L<Int> getPositions(Node node) {
95  
    new IntBuffer out;
96  
    collectPositions(out, node);
97  
    ret out.asVirtualList();
98  
  }
99  
  
100  
  void collectPositions(IntBuffer out, Node node) {
101  
    if (node == null) ret;
102  
    out.addAll(node.positions);
103  
    fOr (Node n : values(node.children))
104  
      collectPositions(out, n);
105  
  }
106  
  
107  
  void printMe() {
108  
    printNode("", "", root);
109  
  }
110  
  
111  
  void printNode(S indent, S pre, Node node) {
112  
    print(indent + pre + quote(shorten(20, node.text)) + (empty(node.positions) ? "" : " " + node.positions));
113  
    fOr (int c, Node n : node.children)
114  
      printNode(indent + "  ", "[" + (c < 0 ? "end" : quote((char) c)) + "] ", n);
115  
  }
116  
}

Author comment

Began life as a copy of #1029202

download  show line numbers  debug dex  old transpilations   

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

No comments. add comment

Snippet ID: #1029210
Snippet name: SuffixTree [OK, backup before optimization]
Eternal ID of this version: #1029210/1
Text MD5: ddcf61d36ee3e6788d7ed71f2e696e39
Transpilation MD5: 060b156144ed667832707870a23f1888
Author: stefan
Category: javax / text searching
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-07-25 13:23:27
Source code size: 3211 bytes / 116 lines
Pitched / IR pitched: No / No
Views / Downloads: 188 / 253
Referenced in: [show references]