1 | // This is made for SNL parsing. |
2 | // It does NOT recognize multiline strings as these conflict |
3 | // with syntax like [[a] [b]]. |
4 | |
5 | static List<String> snlTok(String s) { |
6 | List<String> tok = new ArrayList<String>(); |
7 | int l = s.length(); |
8 | |
9 | int i = 0; |
10 | while (i < l) { |
11 | int j = i; |
12 | char c; String cc; |
13 | |
14 | // scan for whitespace |
15 | while (j < l) { |
16 | c = s.charAt(j); |
17 | cc = s.substring(j, Math.min(j+2, l)); |
18 | if (c == ' ' || c == '\t' || c == '\r' || c == '\n') |
19 | ++j; |
20 | else if (cc.equals("/*")) { |
21 | do ++j; while (j < l && !s.substring(j, Math.min(j+2, l)).equals("*/")); |
22 | j = Math.min(j+2, l); |
23 | } else if (cc.equals("//")) { |
24 | do ++j; while (j < l && "\r\n".indexOf(s.charAt(j)) < 0); |
25 | } else |
26 | break; |
27 | } |
28 | |
29 | tok.add(s.substring(i, j)); |
30 | i = j; |
31 | if (i >= l) break; |
32 | c = s.charAt(i); |
33 | cc = s.substring(i, Math.min(i+2, l)); |
34 | |
35 | // scan for non-whitespace |
36 | if (c == '\u201C' || c == '\u201D') c = '"'; // normalize quotes |
37 | if (c == '\'' || c == '"') { |
38 | char opener = c; |
39 | ++j; |
40 | while (j < l) { |
41 | char _c = s.charAt(j); |
42 | if (_c == '\u201C' || _c == '\u201D') _c = '"'; // normalize quotes |
43 | if (_c == opener) { |
44 | ++j; |
45 | break; |
46 | } else if (s.charAt(j) == '\\' && j+1 < l) |
47 | j += 2; |
48 | else |
49 | ++j; |
50 | } |
51 | if (j-1 >= i+1) { |
52 | tok.add(opener + s.substring(i+1, j-1) + opener); |
53 | i = j; |
54 | continue; |
55 | } |
56 | } else if (Character.isJavaIdentifierStart(c)) |
57 | do ++j; while (j < l && (Character.isJavaIdentifierPart(s.charAt(j)) || s.charAt(j) == '\'')); // for things like "this one's" |
58 | else if (Character.isDigit(c)) |
59 | do ++j; while (j < l && Character.isDigit(s.charAt(j))); |
60 | /*else if (cc.equals("[[")) { |
61 | do ++j; while (j+1 < l && !s.substring(j, j+2).equals("]]")); |
62 | j = Math.min(j+2, l); |
63 | }*/ else if (s.substring(j, Math.min(j+3, l)).equals("...")) |
64 | j += 3; |
65 | else if (c == '$' || c == '#') |
66 | do ++j; while (j < l && Character.isLetterOrDigit(s.charAt(j))); |
67 | else |
68 | ++j; |
69 | |
70 | tok.add(s.substring(i, j)); |
71 | i = j; |
72 | } |
73 | |
74 | if ((tok.size() % 2) == 0) tok.add(""); |
75 | return tok; |
76 | } |
Began life as a copy of #1000769
download show line numbers debug dex old transpilations
Travelled to 13 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
Snippet ID: | #1002709 |
Snippet name: | snlTok |
Eternal ID of this version: | #1002709/1 |
Text MD5: | 0cb5f5e5aded500b78e10471a8a46853 |
Author: | stefan |
Category: | |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2016-03-02 17:37:36 |
Source code size: | 2300 bytes / 76 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 617 / 1259 |
Referenced in: | [show references] |