Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

76
LINES

< > BotCompany Repo | #1002709 // snlTok

JavaX fragment (include)

1  
// This is made for SNL parsing.
2  
// It does NOT recognize multiline strings as these conflict
3  
// with syntax like [[a] [b]].
4  
5  
static List<String> snlTok(String s) {
6  
  List<String> tok = new ArrayList<String>();
7  
  int l = s.length();
8  
  
9  
  int i = 0;
10  
  while (i < l) {
11  
    int j = i;
12  
    char c; String cc;
13  
    
14  
    // scan for whitespace
15  
    while (j < l) {
16  
      c = s.charAt(j);
17  
      cc = s.substring(j, Math.min(j+2, l));
18  
      if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
19  
        ++j;
20  
      else if (cc.equals("/*")) {
21  
        do ++j; while (j < l && !s.substring(j, Math.min(j+2, l)).equals("*/"));
22  
        j = Math.min(j+2, l);
23  
      } else if (cc.equals("//")) {
24  
        do ++j; while (j < l && "\r\n".indexOf(s.charAt(j)) < 0);
25  
      } else
26  
        break;
27  
    }
28  
    
29  
    tok.add(s.substring(i, j));
30  
    i = j;
31  
    if (i >= l) break;
32  
    c = s.charAt(i);
33  
    cc = s.substring(i, Math.min(i+2, l));
34  
35  
    // scan for non-whitespace
36  
    if (c == '\u201C' || c == '\u201D') c = '"'; // normalize quotes
37  
    if (c == '\'' || c == '"') {
38  
      char opener = c;
39  
      ++j;
40  
      while (j < l) {
41  
        char _c = s.charAt(j);
42  
        if (_c == '\u201C' || _c == '\u201D') _c = '"'; // normalize quotes
43  
        if (_c == opener) {
44  
          ++j;
45  
          break;
46  
        } else if (s.charAt(j) == '\\' && j+1 < l)
47  
          j += 2;
48  
        else
49  
          ++j;
50  
      }
51  
      if (j-1 >= i+1) {
52  
        tok.add(opener + s.substring(i+1, j-1) + opener);
53  
        i = j;
54  
        continue;
55  
      }
56  
    } else if (Character.isJavaIdentifierStart(c))
57  
      do ++j; while (j < l && (Character.isJavaIdentifierPart(s.charAt(j)) || s.charAt(j) == '\'')); // for things like "this one's"
58  
    else if (Character.isDigit(c))
59  
      do ++j; while (j < l && Character.isDigit(s.charAt(j)));
60  
    /*else if (cc.equals("[[")) {
61  
      do ++j; while (j+1 < l && !s.substring(j, j+2).equals("]]"));
62  
      j = Math.min(j+2, l);
63  
    }*/ else if (s.substring(j, Math.min(j+3, l)).equals("..."))
64  
      j += 3;
65  
    else if (c == '$' || c == '#')
66  
      do ++j; while (j < l && Character.isLetterOrDigit(s.charAt(j)));
67  
    else
68  
      ++j;
69  
70  
    tok.add(s.substring(i, j));
71  
    i = j;
72  
  }
73  
  
74  
  if ((tok.size() % 2) == 0) tok.add("");
75  
  return tok;
76  
}

Author comment

Began life as a copy of #1000769

download  show line numbers  debug dex  old transpilations   

Travelled to 13 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt

No comments. add comment

Snippet ID: #1002709
Snippet name: snlTok
Eternal ID of this version: #1002709/1
Text MD5: 0cb5f5e5aded500b78e10471a8a46853
Author: stefan
Category:
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2016-03-02 17:37:36
Source code size: 2300 bytes / 76 lines
Pitched / IR pitched: No / No
Views / Downloads: 617 / 1259
Referenced in: [show references]