Libraryless. Click here for Pure Java version (2703L/17K).
1 | // TODO: extended multi-line strings |
2 | |
3 | static int javaTok_n, javaTok_elements; |
4 | static bool javaTok_opt; |
5 | |
6 | static List<String> javaTok(String s) { |
7 | ++javaTok_n; |
8 | ArrayList<String> tok = new ArrayList(); |
9 | int l = s == null ? 0 : s.length(); |
10 | |
11 | int i = 0; |
12 | while (i < l) { |
13 | int j = i; |
14 | char c, d; |
15 | |
16 | // scan for whitespace |
17 | while (j < l) { |
18 | c = s.charAt(j); |
19 | d = j+1 >= l ? '\0' : s.charAt(j+1); |
20 | if (c == ' ' || c == '\t' || c == '\r' || c == '\n') |
21 | ++j; |
22 | else if (c == '/' && d == '*') { |
23 | do ++j; while (j < l && !regionMatches(s, j, "*/")); |
24 | j = Math.min(j+2, l); |
25 | } else if (c == '/' && d == '/') { |
26 | do ++j; while (j < l && "\r\n".indexOf(s.charAt(j)) < 0); |
27 | } else |
28 | break; |
29 | } |
30 | |
31 | tok.add(javaTok_substringN(s, i, j)); |
32 | i = j; |
33 | if (i >= l) break; |
34 | c = s.charAt(i); |
35 | d = i+1 >= l ? '\0' : s.charAt(i+1); |
36 | |
37 | // scan for non-whitespace |
38 | |
39 | // Special JavaX syntax: 'identifier |
40 | if (c == '\'' && Character.isJavaIdentifierStart(d) && i+2 < l && "'\\".indexOf(s.charAt(i+2)) < 0) { |
41 | j += 2; |
42 | while (j < l && Character.isJavaIdentifierPart(s.charAt(j))) |
43 | ++j; |
44 | } else if (c == '\'' || c == '"') { |
45 | char opener = c; |
46 | ++j; |
47 | while (j < l) { |
48 | int c2 = s.charAt(j); |
49 | if (c2 == opener || c2 == '\n' && opener == '\'') { // allow multi-line strings, but not for ' |
50 | ++j; |
51 | break; |
52 | } else if (c2 == '\\' && j+1 < l) |
53 | j += 2; |
54 | else |
55 | ++j; |
56 | } |
57 | } else if (Character.isJavaIdentifierStart(c)) |
58 | do ++j; while (j < l && (Character.isJavaIdentifierPart(s.charAt(j)) || s.charAt(j) == '\'')); // for stuff like "don't" |
59 | else if (Character.isDigit(c)) { |
60 | do ++j; while (j < l && Character.isDigit(s.charAt(j))); |
61 | if (j < l && s.charAt(j) == 'L') ++j; // Long constants like 1L |
62 | } else if (c == '[' && d == '[') { |
63 | do ++j; while (j < l && !regionMatches(s, j, "]]")); |
64 | j = Math.min(j+2, l); |
65 | } else if (c == '[' && d == '=' && i+2 < l && s.charAt(i+2) == '[') { |
66 | do ++j; while (j+2 < l && !regionMatches(s, j, "]=]")); |
67 | j = Math.min(j+3, l); |
68 | } else |
69 | ++j; |
70 | |
71 | tok.add(javaTok_substringC(s, i, j)); |
72 | i = j; |
73 | } |
74 | |
75 | if ((tok.size() % 2) == 0) tok.add(""); |
76 | javaTok_elements += tok.size(); |
77 | return tok; |
78 | } |
79 | |
80 | static List<String> javaTok(List<String> tok) { |
81 | return javaTokWithExisting(join(tok), tok); |
82 | } |
Began life as a copy of #1000647
download show line numbers debug dex old transpilations
Travelled to 18 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, ddnzoavkxhuk, gwrvuhgaqvyk, irmadwmeruwu, ishqpsrjomds, lpdgvwnxivlt, mowyntqkapby, mqqgnosmbjvj, onxytkatvevr, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1000688 |
Snippet name: | javaTok function - Java + JavaX tokenizer, but parses just about anything |
Eternal ID of this version: | #1000688/18 |
Text MD5: | 2c9ef109164cefc9352ebf79f6cb1b2f |
Transpilation MD5: | 00cf2d77835e7f88dbdf7b44fe6c33c1 |
Author: | stefan |
Category: | |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-12-11 01:36:23 |
Source code size: | 2755 bytes / 82 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 1206 / 8593 |
Version history: | 17 change(s) |
Referenced in: | [show references] |