// This is made for NL parsing. // It's javaTok extended with "..." token, "$n" and "#n" and // special quotes (which are converted to normal ones). static List javaTokPlusPeriod(String s) { List tok = new ArrayList(); if (s == null) ret tok; int l = s.length(); int i = 0; while (i < l) { int j = i; char c; String cc; // scan for whitespace while (j < l) { c = s.charAt(j); cc = s.substring(j, Math.min(j+2, l)); if (c == ' ' || c == '\t' || c == '\r' || c == '\n') ++j; else if (cc.equals("/*")) { do ++j; while (j < l && !s.substring(j, Math.min(j+2, l)).equals("*/")); j = Math.min(j+2, l); } else if (cc.equals("//")) { do ++j; while (j < l && "\r\n".indexOf(s.charAt(j)) < 0); } else break; } tok.add(s.substring(i, j)); i = j; if (i >= l) break; c = s.charAt(i); cc = s.substring(i, Math.min(i+2, l)); // scan for non-whitespace if (c == (char) 0x201C || c == (char) 0x201D) c = '"'; // normalize quotes if (c == '\'' || c == '"') { char opener = c; ++j; while (j < l) { char _c = s.charAt(j); if (_c == (char) 0x201C || _c == (char) 0x201D) _c = '"'; // normalize quotes if (_c == opener) { ++j; break; } else if (s.charAt(j) == '\\' && j+1 < l) j += 2; else ++j; } if (j-1 >= i+1) { tok.add(opener + s.substring(i+1, j-1) + opener); i = j; continue; } } else if (Character.isJavaIdentifierStart(c)) do ++j; while (j < l && (Character.isJavaIdentifierPart(s.charAt(j)) || s.charAt(j) == '\'')); // for things like "this one's" else if (Character.isDigit(c)) do ++j; while (j < l && Character.isDigit(s.charAt(j))); else if (cc.equals("[[")) { do ++j; while (j+1 < l && !s.substring(j, j+2).equals("]]")); j = Math.min(j+2, l); } else if (cc.equals("[=") && i+2 < l && s.charAt(i+2) == '[') { do ++j; while (j+2 < l && !s.substring(j, j+3).equals("]=]")); j = Math.min(j+3, l); } else if (s.substring(j, Math.min(j+3, l)).equals("...")) j += 3; else if (c == '$' || c == '#') do ++j; while (j < l && Character.isDigit(s.charAt(j))); else ++j; tok.add(s.substring(i, j)); i = j; } if ((tok.size() % 2) == 0) tok.add(""); return tok; }