Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

126
LINES

< > BotCompany Repo | #1029211 // SuffixTree [optimization attempt, probably not correct]

JavaX fragment (include) [tags: use-pretranspiled]

Libraryless. Click here for Pure Java version (3322L/21K).

1  
sclass SuffixTree {
2  
  Node root;
3  
  S fullText;
4  
5  
  sclass Node {
6  
    Substring text;
7  
    // we either have children or positions
8  
    O childrenOrPositions; // Map or IntBuffer
9  
    
10  
    // key is first character or -1 for end of string
11  
    Map<Int, Node> children() {
12  
      if (childrenOrPositions cast Map) ret childrenOrPositions; null;
13  
    }
14  
    
15  
    IntBuffer positions() {
16  
      if (childrenOrPositions cast IntBuffer) ret childrenOrPositions; null;
17  
    }
18  
19  
    *() {}
20  
    *(Substring *text) {}
21  
    
22  
    void addPosition(int i) {
23  
      if (children() != null) fail("illegal node conversion to positions");
24  
      if (positions() == null) childrenOrPositions = new IntBuffer;
25  
      positions().add(i);
26  
    }
27  
    
28  
    void addChild(Node n) {
29  
      if (positions() != null) fail("illegal node conversion to children");
30  
      if (children() == null) childrenOrPositions = new Map;
31  
      children().put(charToIntOrMinus1(n.text), n);
32  
    }
33  
    
34  
    Node getChild(int c) {
35  
      ret mapGet(children(), c);
36  
    }
37  
  }
38  
39  
  *() {}
40  
  *(S *fullText) {
41  
    root = new Node(Substring(fullText, 0, 0));
42  
    for i over fullText:
43  
      addSuffix(Substring(fullText, i), i);
44  
  }
45  
46  
  void addSuffix(Substring s, int position) {
47  
    Node node = root;
48  
    
49  
    while (!empty(s)) {
50  
      int _n = lCommonPrefix_CharSequence(node.text, s);
51  
      s = s.substring(_n);
52  
      if (_n >= l(node.text)) { // node text exhausted
53  
        Node n = node.getChild(charToIntOrMinus1(s));
54  
        if (empty(s)) {
55  
          if (n != null) node = n;
56  
          node.addPosition(position);
57  
          ret;
58  
        } else {
59  
          if (n == null) {
60  
            n = new Node(s);
61  
            n.addPosition(position);
62  
            node.addChild(n);
63  
            ret;
64  
          } else
65  
            node = n;
66  
        }
67  
      } else { // node text not exhausted
68  
        // split node. first, move all the node's vitals to a new node nOld
69  
        Node nOld = new Node(node.text.substring(_n));
70  
        nOld.childrenOrPositions = node.childrenOrPositions;
71  
        node.childrenOrPositions = null;
72  
        node.text = node.text.substring(0, _n);
73  
        node.addChild(nOld);
74  
75  
        // now add a new node
76  
        Node nNew = new Node(s);
77  
        nNew.addPosition(position);
78  
        node.addChild(nNew);
79  
        ret;
80  
      }
81  
    }
82  
  }
83  
84  
  public L<Int> indicesOf(S pattern) {
85  
    new IntBuffer out;
86  
    getIndicesOf(out, root, pattern, 0);
87  
    ret out.asVirtualList();
88  
  }
89  
  
90  
  void getIndicesOf(IntBuffer out, Node node, S pattern, int iPattern) {
91  
    int lPattern = l(pattern);
92  
    while true {
93  
      int n = lCommonPrefix_CharSequence(node.text, Substring(pattern, iPattern));
94  
      iPattern += n;
95  
      if (iPattern >= lPattern) break;
96  
      if (n < l(node.text)) break;
97  
      Node child = node.getChild(charAtAsIntOrMinus1(pattern, iPattern));
98  
      if (child != null) node = child;
99  
    }
100  
    
101  
    collectPositions(out, node);
102  
  }
103  
  
104  
  L<Int> getPositions(Node node) {
105  
    new IntBuffer out;
106  
    collectPositions(out, node);
107  
    ret out.asVirtualList();
108  
  }
109  
  
110  
  void collectPositions(IntBuffer out, Node node) {
111  
    if (node == null) ret;
112  
    out.addAll(node.positions());
113  
    fOr (Node n : values(node.children()))
114  
      collectPositions(out, n);
115  
  }
116  
  
117  
  void printMe() {
118  
    printNode("", "", root);
119  
  }
120  
  
121  
  void printNode(S indent, S pre, Node node) {
122  
    print(indent + pre + quote(shorten(20, node.text)) + (empty(node.positions()) ? "" : " " + node.positions()));
123  
    fOr (int c, Node n : node.children())
124  
      printNode(indent + "  ", "[" + (c < 0 ? "end" : quote((char) c)) + "] ", n);
125  
  }
126  
}

Author comment

Began life as a copy of #1029202

download  show line numbers  debug dex  old transpilations   

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

No comments. add comment

Snippet ID: #1029211
Snippet name: SuffixTree [optimization attempt, probably not correct]
Eternal ID of this version: #1029211/1
Text MD5: 73a27d11df73b993b44c2d39309e308c
Transpilation MD5: c440e94d2d5d136562b1754e8b8171b0
Author: stefan
Category: javax / text searching
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-07-25 13:32:38
Source code size: 3679 bytes / 126 lines
Pitched / IR pitched: No / No
Views / Downloads: 164 / 239
Referenced in: [show references]