Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

75
LINES

< > BotCompany Repo | #1004163 // nlTok3 - with quoted strings, without ' in identifiers

JavaX fragment (include)

1  
static List<String> nlTok3(String s) {
2  
  List<String> tok = new ArrayList<String>();
3  
  int l = s.length();
4  
  
5  
  int i = 0;
6  
  while (i < l) {
7  
    int j = i;
8  
    char c; String cc;
9  
    
10  
    // scan for whitespace
11  
    while (j < l) {
12  
      c = s.charAt(j);
13  
      cc = s.substring(j, Math.min(j+2, l));
14  
      if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
15  
        ++j;
16  
      else if (cc.equals("/*")) {
17  
        do ++j; while (j < l && !s.substring(j, Math.min(j+2, l)).equals("*/"));
18  
        j = Math.min(j+2, l);
19  
      } else if (cc.equals("//")) {
20  
        do ++j; while (j < l && "\r\n".indexOf(s.charAt(j)) < 0);
21  
      } else
22  
        break;
23  
    }
24  
    
25  
    tok.add(s.substring(i, j));
26  
    i = j;
27  
    if (i >= l) break;
28  
    c = s.charAt(i);
29  
    cc = s.substring(i, Math.min(i+2, l));
30  
31  
    // scan for non-whitespace
32  
    if (c == '\u201C' || c == '\u201D') c = '"'; // normalize quotes
33  
    if (c == '\'' || c == '"') {
34  
      char opener = c;
35  
      ++j;
36  
      while (j < l) {
37  
        char _c = s.charAt(j);
38  
        if (_c == '\u201C' || _c == '\u201D') _c = '"'; // normalize quotes
39  
        if (_c == opener) {
40  
          ++j;
41  
          break;
42  
        } else if (s.charAt(j) == '\\' && j+1 < l)
43  
          j += 2;
44  
        else
45  
          ++j;
46  
      }
47  
      if (j-1 >= i+1) {
48  
        tok.add(opener + s.substring(i+1, j-1) + opener);
49  
        i = j;
50  
        continue;
51  
      }
52  
    } else if (Character.isJavaIdentifierStart(c))
53  
      do ++j; while (j < l && (Character.isJavaIdentifierPart(s.charAt(j)) /*|| s.charAt(j) == '\''*/ ));
54  
    else if (Character.isDigit(c))
55  
      do ++j; while (j < l && Character.isDigit(s.charAt(j)));
56  
    else if (cc.equals("[[")) {
57  
      do ++j; while (j+1 < l && !s.substring(j, j+2).equals("]]"));
58  
      j = Math.min(j+2, l);
59  
    } else if (cc.equals("[=") && i+2 < l && s.charAt(i+2) == '[') {
60  
      do ++j; while (j+2 < l && !s.substring(j, j+3).equals("]=]"));
61  
      j = Math.min(j+3, l);
62  
    } else if (s.substring(j, Math.min(j+3, l)).equals("..."))
63  
      j += 3;
64  
    else if (c == '$' || c == '#')
65  
      do ++j; while (j < l && Character.isDigit(s.charAt(j)));
66  
    else
67  
      ++j;
68  
69  
    tok.add(s.substring(i, j));
70  
    i = j;
71  
  }
72  
  
73  
  if ((tok.size() % 2) == 0) tok.add("");
74  
  return tok;
75  
}

Author comment

Began life as a copy of #1000769

download  show line numbers  debug dex  old transpilations   

Travelled to 14 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, ddnzoavkxhuk, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt

No comments. add comment

Snippet ID: #1004163
Snippet name: nlTok3 - with quoted strings, without ' in identifiers
Eternal ID of this version: #1004163/1
Text MD5: ad55cef92456fa3948357bea6380b799
Author: stefan
Category:
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2016-08-08 17:41:31
Source code size: 2300 bytes / 75 lines
Pitched / IR pitched: No / No
Views / Downloads: 587 / 597
Referenced in: [show references]