1 | // This is made for NL parsing. |
2 | // It's javaTok extended with "..." token, "$n" and "#n" and |
3 | // special quotes (which are converted to normal ones). |
4 | |
5 | static List<String> javaTokPlusPeriod(String s) {
|
6 | List<String> tok = new ArrayList<String>(); |
7 | if (s == null) ret tok; |
8 | int l = s.length(); |
9 | |
10 | int i = 0; |
11 | while (i < l) {
|
12 | int j = i; |
13 | char c; String cc; |
14 | |
15 | // scan for whitespace |
16 | while (j < l) {
|
17 | c = s.charAt(j); |
18 | cc = s.substring(j, Math.min(j+2, l)); |
19 | if (c == ' ' || c == '\t' || c == '\r' || c == '\n') |
20 | ++j; |
21 | else if (cc.equals("/*")) {
|
22 | do ++j; while (j < l && !s.substring(j, Math.min(j+2, l)).equals("*/"));
|
23 | j = Math.min(j+2, l); |
24 | } else if (cc.equals("//")) {
|
25 | do ++j; while (j < l && "\r\n".indexOf(s.charAt(j)) < 0); |
26 | } else |
27 | break; |
28 | } |
29 | |
30 | tok.add(s.substring(i, j)); |
31 | i = j; |
32 | if (i >= l) break; |
33 | c = s.charAt(i); |
34 | cc = s.substring(i, Math.min(i+2, l)); |
35 | |
36 | // scan for non-whitespace |
37 | if (c == (char) 0x201C || c == (char) 0x201D) c = '"'; // normalize quotes |
38 | if (c == '\'' || c == '"') {
|
39 | char opener = c; |
40 | ++j; |
41 | while (j < l) {
|
42 | char _c = s.charAt(j); |
43 | if (_c == (char) 0x201C || _c == (char) 0x201D) _c = '"'; // normalize quotes |
44 | if (_c == opener) {
|
45 | ++j; |
46 | break; |
47 | } else if (s.charAt(j) == '\\' && j+1 < l) |
48 | j += 2; |
49 | else |
50 | ++j; |
51 | } |
52 | if (j-1 >= i+1) {
|
53 | tok.add(opener + s.substring(i+1, j-1) + opener); |
54 | i = j; |
55 | continue; |
56 | } |
57 | } else if (Character.isJavaIdentifierStart(c)) |
58 | do ++j; while (j < l && (Character.isJavaIdentifierPart(s.charAt(j)) || s.charAt(j) == '\'')); // for things like "this one's" |
59 | else if (Character.isDigit(c)) |
60 | do ++j; while (j < l && Character.isDigit(s.charAt(j))); |
61 | else if (cc.equals("[[")) {
|
62 | do ++j; while (j+1 < l && !s.substring(j, j+2).equals("]]"));
|
63 | j = Math.min(j+2, l); |
64 | } else if (cc.equals("[=") && i+2 < l && s.charAt(i+2) == '[') {
|
65 | do ++j; while (j+2 < l && !s.substring(j, j+3).equals("]=]"));
|
66 | j = Math.min(j+3, l); |
67 | } else if (s.substring(j, Math.min(j+3, l)).equals("..."))
|
68 | j += 3; |
69 | else if (c == '$' || c == '#') |
70 | do ++j; while (j < l && Character.isDigit(s.charAt(j))); |
71 | else |
72 | ++j; |
73 | |
74 | tok.add(s.substring(i, j)); |
75 | i = j; |
76 | } |
77 | |
78 | if ((tok.size() % 2) == 0) tok.add("");
|
79 | return tok; |
80 | } |
Began life as a copy of #1000688
download show line numbers debug dex old transpilations
Travelled to 17 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, ekrmjmnbrukm, gwrvuhgaqvyk, irmadwmeruwu, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, onxytkatvevr, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt, whxojlpjdney
No comments. add comment
| Snippet ID: | #1000769 |
| Snippet name: | javaTokPlusPeriod |
| Eternal ID of this version: | #1000769/3 |
| Text MD5: | cfa816f859e87e692ba747917d2bdec1 |
| Author: | stefan |
| Category: | |
| Type: | JavaX fragment (include) |
| Public (visible to everyone): | Yes |
| Archived (hidden from active list): | No |
| Created/modified: | 2019-08-24 17:07:15 |
| Source code size: | 2539 bytes / 80 lines |
| Pitched / IR pitched: | No / No |
| Views / Downloads: | 1050 / 9273 |
| Version history: | 2 change(s) |
| Referenced in: | [show references] |