1 | // no multi-line-strings/docstrings yet |
2 | |
3 | static List<String> pythonToJavaTok(String s) {
|
4 | List<String> tok = new ArrayList<String>(); |
5 | int l = s.length(); |
6 | |
7 | int i = 0; |
8 | while (i < l) {
|
9 | int j = i; |
10 | char c; String cc; |
11 | |
12 | // scan whitespace+comments into buf |
13 | new StringBuilder buf; |
14 | while (j < l) {
|
15 | c = s.charAt(j); |
16 | cc = s.substring(j, Math.min(j+2, l)); |
17 | int k = j; |
18 | if (c == ' ' || c == '\t' || c == '\r' || c == '\n') {
|
19 | ++j; |
20 | buf.append(s.substring(k, j)); |
21 | } else if (c == '#') {
|
22 | do ++j; while (j < l && "\r\n".indexOf(s.charAt(j)) < 0); |
23 | buf.append("//").append(s.substring(k+1, j));
|
24 | } else |
25 | break; |
26 | } |
27 | |
28 | tok.add(buf.toString()); |
29 | i = j; |
30 | if (i >= l) break; |
31 | c = s.charAt(i); |
32 | cc = s.substring(i, Math.min(i+2, l)); |
33 | |
34 | // scan for non-whitespace |
35 | if (c == '\'' || c == '"') {
|
36 | char opener = c; |
37 | ++j; |
38 | while (j < l) {
|
39 | if (s.charAt(j) == opener) {
|
40 | ++j; |
41 | break; |
42 | } else if (s.charAt(j) == '\\' && j+1 < l) |
43 | j += 2; |
44 | else |
45 | ++j; |
46 | } |
47 | } else if (Character.isJavaIdentifierStart(c)) |
48 | do ++j; while (j < l && Character.isJavaIdentifierPart(s.charAt(j))); |
49 | else if (Character.isDigit(c)) {
|
50 | do ++j; while (j < l && Character.isDigit(s.charAt(j))); |
51 | if (j < l && s.charAt(j) == 'L') ++j; // Long constants like 1L |
52 | } else if (cc.equals("[[")) {
|
53 | do ++j; while (j+1 < l && !s.substring(j, j+2).equals("]]"));
|
54 | j = Math.min(j+2, l); |
55 | } else |
56 | ++j; |
57 | |
58 | tok.add(s.substring(i, j)); |
59 | i = j; |
60 | } |
61 | |
62 | if ((tok.size() % 2) == 0) tok.add("");
|
63 | return tok; |
64 | } |
Began life as a copy of #758
download show line numbers debug dex old transpilations
Travelled to 13 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
| Snippet ID: | #1001998 |
| Snippet name: | pythonToJavaTok function - Python tokenizer that converts comments to Java style |
| Eternal ID of this version: | #1001998/1 |
| Text MD5: | 0d6484ab9d28c667286de5c84707ee5a |
| Author: | stefan |
| Category: | |
| Type: | JavaX fragment (include) |
| Public (visible to everyone): | Yes |
| Archived (hidden from active list): | No |
| Created/modified: | 2015-12-11 17:27:37 |
| Source code size: | 1752 bytes / 64 lines |
| Pitched / IR pitched: | No / Yes |
| Views / Downloads: | 911 / 1168 |
| Referenced in: | [show references] |