Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

112
LINES

< > BotCompany Repo | #1029275 // UkkonenSuffixTree

JavaX fragment (include) [tags: use-pretranspiled]

Libraryless. Click here for Pure Java version (139L/2K).

// from https://gist.github.com/bicepjai/3355993#file-gistfile1-java-L147

/*
 * Copyright (c) 2016 Sergey Makagonov
 * 
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 * 
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 * 
 */
 
sclass UkkonenSuffixTree {
  static final int oo = Integer.MAX_VALUE/2; // TODO: Why /2?
  Node [] nodes;
  char [] text;
  int root, position = -1, currentNode, needSuffixLink, remainder;

  int active_node, active_length, active_edge;

  sclass Node {
    int start, end = oo, link;
    public TreeMap<Character, Integer> next = new TreeMap<Character, Integer>();

    *(int *start, int *end) {}

    public int edgeLength(UkkonenSuffixTree tree) {
      ret Math.min(end, tree.position + 1) - start;
    }
  }

  *(int length) {
    nodes = new Node[2*length+2];
    text = new char[length];
    root = active_node = newNode(-1, -1);
  }

  private void addSuffixLink(int node) {
      if (needSuffixLink > 0)
          nodes[needSuffixLink].link = node;
      needSuffixLink = node;
  }

  char active_edge() {
      return text[active_edge];
  }

  boolean walkDown(int next) {
      if (active_length >= nodes[next].edgeLength(this)) {
          active_edge += nodes[next].edgeLength(this);
          active_length -= nodes[next].edgeLength(this);
          active_node = next;
          return true;
      }
      return false;
  }

  int newNode(int start, int end) {
      nodes[++currentNode] = new Node(start, end);
      return currentNode;
  }

  public void addChar(char c) {
      text[++position] = c;
      needSuffixLink = -1;
      remainder++;
      while(remainder > 0) {
          if (active_length == 0) active_edge = position;
          if (!nodes[active_node].next.containsKey(active_edge())){
              int leaf = newNode(position, oo);
              nodes[active_node].next.put(active_edge(), leaf);
              addSuffixLink(active_node); //rule 2
          } else {
              int next = nodes[active_node].next.get(active_edge());
              if (walkDown(next)) continue;   //observation 2
              if (text[nodes[next].start + active_length] == c) { //observation 1
                  active_length++;
                  addSuffixLink(active_node); // observation 3
                  break;
              }
              int split = newNode(nodes[next].start, nodes[next].start + active_length);
              nodes[active_node].next.put(active_edge(), split);
              int leaf = newNode(position, oo);
              nodes[split].next.put(c, leaf);
              nodes[next].start += active_length;
              nodes[split].next.put(text[nodes[next].start], next);
              addSuffixLink(split); //rule 2
          }
          remainder--;

          if (active_node == root && active_length > 0) {  //rule 1
              active_length--;
              active_edge = position - remainder + 1;
          } else
              active_node = nodes[active_node].link > 0 ? nodes[active_node].link : root; //rule 3
      }
  }
}

Author comment

Began life as a copy of #1029274

download  show line numbers  debug dex  old transpilations   

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

No comments. add comment

Snippet ID: #1029275
Snippet name: UkkonenSuffixTree
Eternal ID of this version: #1029275/3
Text MD5: 67c01b3997a871bb5d85c32ab5f042c4
Transpilation MD5: 345d8fd26c4166c2858c09fb0ff2ad3c
Author: stefan
Category: javax / suffix trees
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-07-29 19:51:04
Source code size: 4101 bytes / 112 lines
Pitched / IR pitched: No / No
Views / Downloads: 219 / 320
Version history: 2 change(s)
Referenced in: #1029276 - UkkonenIntSuffixTree [ints instead of chars, compact version, dev.]
#1029277 - UkkonenIntSuffixTree [ints instead of chars]