static L htmlFineTok(S s) {
new L tok;
int l = s.length();
int i = 0, n = 0;
while (i < l) {
int j = i;
char c, d;
// scan for whitespace
while (j < l) {
c = s.charAt(j);
d = j+1 >= l ? '\0' : s.charAt(j+1);
if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
++j;
else
break;
}
tok.add(quickSubstring(s, i, j));
++n;
i = j;
if (i >= l) break;
c = s.charAt(i);
d = i+1 >= l ? '\0' : s.charAt(i+1);
// scan for non-whitespace
if (c == '\'' || c == '"') {
char opener = c;
++j;
while (j < l) {
if (s.charAt(j) == opener /*|| s.charAt(j) == '\n'*/) { // allow multi-line strings
++j;
break;
} else if (s.charAt(j) == '\\' && j+1 < l)
j += 2;
else
++j;
}
} else if (Character.isJavaIdentifierStart(c))
do ++j; while (j < l && (Character.isJavaIdentifierPart(s.charAt(j)) || "'".indexOf(s.charAt(j)) >= 0)); // for stuff like "don't"
else if (Character.isDigit(c)) {
do ++j; while (j < l && Character.isDigit(s.charAt(j)));
if (j < l && s.charAt(j) == 'L') ++j; // Long constants like 1L
} else
++j;
tok.add(quickSubstring(s, i, j));
++n;
i = j;
}
if ((tok.size() % 2) == 0) tok.add("");
ret tok;
}