Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

64
LINES

< > BotCompany Repo | #1001998 // pythonToJavaTok function - Python tokenizer that converts comments to Java style

JavaX fragment (include)

1  
// no multi-line-strings/docstrings yet
2  
3  
static List<String> pythonToJavaTok(String s) {
4  
  List<String> tok = new ArrayList<String>();
5  
  int l = s.length();
6  
  
7  
  int i = 0;
8  
  while (i < l) {
9  
    int j = i;
10  
    char c; String cc;
11  
    
12  
    // scan whitespace+comments into buf
13  
    new StringBuilder buf;
14  
    while (j < l) {
15  
      c = s.charAt(j);
16  
      cc = s.substring(j, Math.min(j+2, l));
17  
      int k = j;
18  
      if (c == ' ' || c == '\t' || c == '\r' || c == '\n') {
19  
        ++j;
20  
        buf.append(s.substring(k, j));
21  
      } else if (c == '#') {
22  
        do ++j; while (j < l && "\r\n".indexOf(s.charAt(j)) < 0);
23  
        buf.append("//").append(s.substring(k+1, j));
24  
      } else
25  
        break;
26  
    }
27  
    
28  
    tok.add(buf.toString());
29  
    i = j;
30  
    if (i >= l) break;
31  
    c = s.charAt(i);
32  
    cc = s.substring(i, Math.min(i+2, l));
33  
34  
    // scan for non-whitespace
35  
    if (c == '\'' || c == '"') {
36  
      char opener = c;
37  
      ++j;
38  
      while (j < l) {
39  
        if (s.charAt(j) == opener) {
40  
          ++j;
41  
          break;
42  
        } else if (s.charAt(j) == '\\' && j+1 < l)
43  
          j += 2;
44  
        else
45  
          ++j;
46  
      }
47  
    } else if (Character.isJavaIdentifierStart(c))
48  
      do ++j; while (j < l && Character.isJavaIdentifierPart(s.charAt(j)));
49  
    else if (Character.isDigit(c)) {
50  
      do ++j; while (j < l && Character.isDigit(s.charAt(j)));
51  
      if (j < l && s.charAt(j) == 'L') ++j; // Long constants like 1L
52  
    } else if (cc.equals("[[")) {
53  
      do ++j; while (j+1 < l && !s.substring(j, j+2).equals("]]"));
54  
      j = Math.min(j+2, l);
55  
    } else
56  
      ++j;
57  
58  
    tok.add(s.substring(i, j));
59  
    i = j;
60  
  }
61  
  
62  
  if ((tok.size() % 2) == 0) tok.add("");
63  
  return tok;
64  
}

Author comment

Began life as a copy of #758

download  show line numbers  debug dex  old transpilations   

Travelled to 13 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt

No comments. add comment

Snippet ID: #1001998
Snippet name: pythonToJavaTok function - Python tokenizer that converts comments to Java style
Eternal ID of this version: #1001998/1
Text MD5: 0d6484ab9d28c667286de5c84707ee5a
Author: stefan
Category:
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2015-12-11 17:27:37
Source code size: 1752 bytes / 64 lines
Pitched / IR pitched: No / Yes
Views / Downloads: 547 / 794
Referenced in: [show references]