Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

375
LINES

< > BotCompany Repo | #2000400 // class JSONTokenizer

New Tinybrain snippet

/*
 * Copyright 2008-2010 the T2 Project ant the Others.
*/

class JSONTokenizer {

  public static final char COMMA = ',';

  public static final char COLON = ':';

  public static final char SINGLE_QUOTE = '\'';

  public static final char DOUBLE_QUOTE = '\"';

  public static final char START_BRACKET = '[';

  public static final char END_BRACKET = ']';

  public static final char START_BRACE = '{';

  public static final char END_BRACE = '}';

  // public static final char UTF8_BOM = 0xFEFF;

  protected final String orgString;

  protected final int orgLength;

  protected String tokenString;

  protected int pos;

  protected int nextPos;

  protected TokenType type = TokenType.EOF;

  public JSONTokenizer(String s) {
    // TODO : check grammer first.
    this.orgString = (s != null) ? s : "null";
    this.orgLength = this.orgString.length();
    this.tokenString = this.orgString;
    prepare(0);
  }

  protected void prepare(int i) {
    if (i < orgLength) {
      char ch = orgString.charAt(i);
      if (ch == START_BRACE) {
        type = TokenType.START_BRACE;
      } else if (ch == END_BRACE) {
        type = TokenType.END_BRACE;
      } else if (ch == START_BRACKET) {
        type = TokenType.START_BRACKET;
      } else if (ch == END_BRACKET) {
        type = TokenType.END_BRACKET;
      }
      this.pos = i;
      this.nextPos = i + 1;
    } else {
      type = TokenType.EOF;
    }
  }

  public TokenType nextToken() {
    if (type == TokenType.EOF) {
      return TokenType.EOF;
    }
    String s = this.tokenString;
    TokenType retType = TokenType.EOF;
    boolean key = true;
    for (int i = this.nextPos; i < this.orgLength; i++) {
      char ch = this.orgString.charAt(i);
      if (isIgnorable(ch)) {
        continue;
      }
      if (ch == START_BRACE) {
        s = getElement(nextPos, this.orgLength - 1);
        retType = TokenType.START_BRACE;
        prepare(i);
        key = true;
        break;
      } else if (ch == START_BRACKET) {
        s = getElement(nextPos, this.orgLength - 1);
        retType = TokenType.START_BRACKET;
        prepare(i);
        key = true;
        break;
      } else if (ch == COLON) {
        if (i == this.orgLength - 1) {
          throw new IllegalStateException();
        }
        s = getElement(nextPos, i - 1);
        this.type = retType = TokenType.COLON;
        prepare(i);
        key = true;
        break;
      } else if (ch == COMMA) {
        if (i == this.orgLength - 1) {
          throw new IllegalArgumentException();
        }
        s = getElement(nextPos, i - 1);
        this.type = retType = (isObjectOrArrayEnd(i - 1)) ? TokenType.END_COMMA
          : TokenType.COMMA;
        prepare(i);
        key = false;
        break;
      } else if (ch == END_BRACKET) {
        this.type = (i == this.orgLength - 1) ? TokenType.EOF
          : TokenType.END_BRACKET;
        retType = TokenType.END_BRACKET;
        s = getElement(nextPos, i - 1);
        prepare(i);
        key = false;
        break;
      } else if (ch == END_BRACE) {
        this.type = (i == this.orgLength - 1) ? TokenType.EOF
          : TokenType.END_BRACE;
        retType = TokenType.END_BRACE;
        s = getElement(this.nextPos, i - 1);
        prepare(i);
        key = false;
        break;
      }
    }
    s = removeIgnorable(s);
    this.tokenString = (key) ? unquote(s) : s;
    return retType;
  }

  protected boolean isObjectOrArrayEnd(int pos) {
    for (int i = pos; 0 < i; i--) {
      char c = this.orgString.charAt(i);
      if (isIgnorable(c)) {
        continue;
      }
      if (c == END_BRACE || c == END_BRACKET) {
        return true;
      } else {
        return false;
      }
    }
    return false;
  }

  /**
   * <#if locale="en">
   * <p>
   * TODO refactor.
   *
   * </p>
   * <#else>
   * <p>
   *
   * </p>
   * </#if>
   *
   * @param s
   * @return
   */
  protected String removeIgnorable(String s) {
    if (isEmpty(s)) {
      return s;
    }
    for (int pos1 = 0; pos1 < s.length(); pos1++) {
      if (isIgnorable(s.charAt(pos1)) == false) {
        return s.substring(pos1);
      }
    }
    return "";
  }

  public static boolean isEmpty(String text) {
    return text == null || text.length() == 0;
  }

  protected String getElement(final int orgStartPos, final int orgEndPos) {
    int startPos = orgStartPos;
    for (; startPos < orgEndPos; startPos++) {
      char ch = this.orgString.charAt(startPos);
      if (isIgnorable(ch) == false) {
        break;
      }
    }
    int endPos = orgEndPos;
    for (; startPos < endPos; endPos--) {
      char ch = this.orgString.charAt(endPos);
      if (isIgnorable(ch) == false) {
        break;
      }
    }
    return this.orgString.substring(startPos, endPos + 1);
  }

  protected static boolean isIgnorable(char ch) {
    boolean ret = false;
    switch (ch) {
      case '\b':
      case '\f':
      case '\n':
      case '\r':
      case '\t':
      case ' ':
        ret = true;
        break;
      default:
        ret = false;
        break;
    }
    return ret;
  }

  public static String unquote(String str) {
    if (str == null || str.length() == 0) {
      return str;
    }
    if (isQuoted(str, DOUBLE_QUOTE)) {
      return chopQuote(str);
    } else if (isQuoted(str, SINGLE_QUOTE)) {
      return chopQuote(str);
    } else {
      return str;
    }
  }

  public static String chopQuote(String str) {
    return str.substring(1, str.length() - 1);
  }

  protected static boolean isQuoted(String str, char quote) {
    return str.indexOf(quote) == 0 && str.lastIndexOf(quote) > 0;
  }

  public String getToken() {
    return this.tokenString;
  }

  public static enum TokenType {
    /**
     * <#if locale="en">
     * <p>
     * "{"
     * </p>
     * <#else>
     * <p>
     *
     * </p>
     * </#if>
     */
    START_BRACE {
      public String toString() {
        return "{";
      }
    },

    /**
     * <#if locale="en">
     * <p>
     * "}"
     * </p>
     * <#else>
     * <p>
     *
     * </p>
     * </#if>
     */
    END_BRACE {
      public String toString() {
        return "}";
      }
    },

    /**
     * <#if locale="en">
     * <p>
     * "["
     * </p>
     * <#else>
     * <p>
     *
     * </p>
     * </#if>
     */
    START_BRACKET {
      public String toString() {
        return "[";
      }
    },

    /**
     * <#if locale="en">
     * <p>
     * "]"
     * </p>
     * <#else>
     * <p>
     *
     * </p>
     * </#if>
     */
    END_BRACKET {
      public String toString() {
        return "]";
      }
    },

    /**
     * <#if locale="en">
     * <p>
     * ","
     * </p>
     * <#else>
     * <p>
     *
     * </p>
     * </#if>
     */
    COMMA {
      public String toString() {
        return ",";
      }
    },

    /**
     * <#if locale="en">
     * <p>
     * ":"
     * </p>
     * <#else>
     * <p>
     *
     * </p>
     * </#if>
     */
    COLON {
      public String toString() {
        return ":";
      }
    },

    /**
     * <#if locale="en">
     * <p>
     * "," and it is the end of {} or [].
     * </p>
     * <#else>
     * <p>
     *
     * </p>
     * </#if>
     */
    END_COMMA {
      public String toString() {
        return ",";
      }
    },

    /**
     * <#if locale="en">
     * <p>
     * End of file.
     * </p>
     * <#else>
     * <p>
     *
     * </p>
     * </#if>
     */
    EOF;

  }

  public TokenType currentTokenType() {
    return type;
  }
}

download  show line numbers   

Snippet is not live.

Travelled to 12 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt

No comments. add comment

Snippet ID: #2000400
Snippet name: class JSONTokenizer
Eternal ID of this version: #2000400/1
Text MD5: 0ad8de1e884015368a85b40a0e50fde6
Author: stefan
Category: javax
Type: New Tinybrain snippet
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2015-06-28 18:30:22
Source code size: 7862 bytes / 375 lines
Pitched / IR pitched: No / Yes
Views / Downloads: 600 / 401
Referenced in: [show references]