Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

123
LINES

< > BotCompany Repo | #1029276 - UkkonenIntSuffixTree [ints instead of chars, compact version, dev.]

JavaX fragment (include) [tags: use-pretranspiled]

Libraryless. Compilation Failed (15002L/102K).

// from https://gist.github.com/bicepjai/3355993#file-gistfile1-java-L147

/*
 * Copyright (c) 2016 Sergey Makagonov
 * 
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 * 
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 * 
 */
 
sclass UkkonenIntSuffixTree {
  Node[] nodes;
  int[] text;
  int root, position = -1, currentNode, needSuffixLink, remainder;

  int active_node, active_length, active_edge;
  
  static final int oo = Integer.MAX_VALUE/2; // TODO: Why /2?
  Comparator<Int> childComparator = (a, b) -> nodes[a].firstCharOrMinus1(this)-nodes[b].firstCharOrMinus1(this);
 

  sclass Node {
    int start, end = oo, link;
    public CompactTreeSet<Int> next = new(childComparator());

    *(int *start, int *end) {}

    public int edgeLength(UkkonenSuffixTree tree) {
      ret Math.min(end, tree.position + 1) - start;
    }
    
    CompactTreeSet<Int> makeEmptyChildrenSet(UkkonenIntSuffixTree tree) {
      ret new CompactTreeSet<>(tree.childComparator);
    }
    
    public int firstCharOrMinus1(IntSuffixTree tree) {
      ret tree.fullText[from);
    }
  }

  *(int length) {
    nodes = new Node[2*length+2];
    text = new char[length];
    root = active_node = newNode(-1, -1);
  }

  private void addSuffixLink(int node) {
      if (needSuffixLink > 0)
          nodes[needSuffixLink].link = node;
      needSuffixLink = node;
  }

  char active_edge() {
      return text[active_edge];
  }

  boolean walkDown(int next) {
      if (active_length >= nodes[next].edgeLength(this)) {
          active_edge += nodes[next].edgeLength(this);
          active_length -= nodes[next].edgeLength(this);
          active_node = next;
          return true;
      }
      return false;
  }

  int newNode(int start, int end) {
      nodes[++currentNode] = new Node(start, end);
      return currentNode;
  }

  public void addChar(char c) {
      text[++position] = c;
      needSuffixLink = -1;
      remainder++;
      while(remainder > 0) {
          if (active_length == 0) active_edge = position;
          if (!nodes[active_node].next.containsKey(active_edge())){
              int leaf = newNode(position, oo);
              nodes[active_node].next.put(active_edge(), leaf);
              addSuffixLink(active_node); //rule 2
          } else {
              int next = nodes[active_node].next.get(active_edge());
              if (walkDown(next)) continue;   //observation 2
              if (text[nodes[next].start + active_length] == c) { //observation 1
                  active_length++;
                  addSuffixLink(active_node); // observation 3
                  break;
              }
              int split = newNode(nodes[next].start, nodes[next].start + active_length);
              nodes[active_node].next.put(active_edge(), split);
              int leaf = newNode(position, oo);
              nodes[split].next.put(c, leaf);
              nodes[next].start += active_length;
              nodes[split].next.put(text[nodes[next].start], next);
              addSuffixLink(split); //rule 2
          }
          remainder--;

          if (active_node == root && active_length > 0) {  //rule 1
              active_length--;
              active_edge = position - remainder + 1;
          } else
              active_node = nodes[active_node].link > 0 ? nodes[active_node].link : root; //rule 3
      }
  }
}

Author comment

Began life as a copy of #1029275

download  show line numbers  debug dex   

Travelled to 6 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, xrpafgyirdlv

No comments. add comment

Snippet ID: #1029276
Snippet name: UkkonenIntSuffixTree [ints instead of chars, compact version, dev.]
Eternal ID of this version: #1029276/1
Text MD5: 3feab062968fc13a6086cda8815af3b8
Transpilation MD5: fd2ad803444444b4b0824b0a436a7e46
Author: stefan
Category: javax / suffix trees
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-07-29 19:59:47
Source code size: 4447 bytes / 123 lines
Pitched / IR pitched: No / No
Views / Downloads: 30 / 47
Referenced in: [show references]

Formerly at http://tinybrain.de/1029276 & http://1029276.tinybrain.de