Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

72
LINES

< > BotCompany Repo | #1027377 // javaTokWithUnifiedNumbersAndIdentifiers

JavaX fragment (include) [tags: use-pretranspiled]

Libraryless. Click here for Pure Java version (107L/1K).

// TODO: extended multi-line strings

static LS javaTokWithUnifiedNumbersAndIdentifiers(String s) {
  ArrayList<String> tok = new ArrayList();
  int l = s == null ? 0 : s.length();
  
  int i = 0, n = 0;
  while (i < l) {
    int j = i;
    char c, d;
    
    // scan for whitespace
    while (j < l) {
      c = s.charAt(j);
      d = j+1 >= l ? '\0' : s.charAt(j+1);
      if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
        ++j;
      else if (c == '/' && d == '*') {
        do ++j; while (j < l && !s.substring(j, Math.min(j+2, l)).equals("*/"));
        j = Math.min(j+2, l);
      } else if (c == '/' && d == '/') {
        do ++j; while (j < l && "\r\n".indexOf(s.charAt(j)) < 0);
      } else
        break;
    }
    
    tok.add(javaTok_substringN(s, i, j));
    ++n;
    i = j;
    if (i >= l) break;
    c = s.charAt(i);
    d = i+1 >= l ? '\0' : s.charAt(i+1);

    // scan for non-whitespace
    
    // Special JavaX syntax: 'identifier
    if (c == '\'' && Character.isJavaIdentifierStart(d) && i+2 < l && "'\\".indexOf(s.charAt(i+2)) < 0) {
      j += 2;
      while (j < l && Character.isJavaIdentifierPart(s.charAt(j)))
        ++j;
    } else if (c == '\'' || c == '"') {
      char opener = c;
      ++j;
      while (j < l) {
        int c2 = s.charAt(j);
        if (c2 == opener || c2 == '\n' && opener == '\'') { // allow multi-line strings, but not for '
          ++j;
          break;
        } else if (c2 == '\\' && j+1 < l)
          j += 2;
        else
          ++j;
      }
    } else if (Character.isJavaIdentifierStart(c) || Character.isDigit(c)) {
      do ++j; while (j < l && (Character.isJavaIdentifierPart(s.charAt(j)) || s.charAt(j) == '\'')); // for stuff like "don't"
    } else if (c == '[' && d == '[') {
      do ++j; while (j+1 < l && !s.substring(j, j+2).equals("]]"));
      j = Math.min(j+2, l);
    } else if (c == '[' && d == '=' && i+2 < l && s.charAt(i+2) == '[') {
      do ++j; while (j+2 < l && !s.substring(j, j+3).equals("]=]"));
      j = Math.min(j+3, l);
    } else
      ++j;
      
    tok.add(javaTok_substringC(s, i, j));
    ++n;
    i = j;
  }
  
  if ((tok.size() % 2) == 0) tok.add("");
  ret tok;
}

Author comment

Began life as a copy of #1000688

download  show line numbers  debug dex  old transpilations   

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

No comments. add comment

Snippet ID: #1027377
Snippet name: javaTokWithUnifiedNumbersAndIdentifiers
Eternal ID of this version: #1027377/1
Text MD5: bacfd33bbb1accc32b6448fdc3aec223
Transpilation MD5: ec1aa8019d613083dd39a6c0b9ff638c
Author: stefan
Category: javax / parsing
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-03-15 16:35:03
Source code size: 2253 bytes / 72 lines
Pitched / IR pitched: No / No
Views / Downloads: 189 / 272
Referenced in: [show references]