Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

80
LINES

< > BotCompany Repo | #1000769 // javaTokPlusPeriod

JavaX fragment (include)

1  
// This is made for NL parsing.
2  
// It's javaTok extended with "..." token, "$n" and "#n" and
3  
// special quotes (which are converted to normal ones).
4  
5  
static List<String> javaTokPlusPeriod(String s) {
6  
  List<String> tok = new ArrayList<String>();
7  
  if (s == null) ret tok;
8  
  int l = s.length();
9  
  
10  
  int i = 0;
11  
  while (i < l) {
12  
    int j = i;
13  
    char c; String cc;
14  
    
15  
    // scan for whitespace
16  
    while (j < l) {
17  
      c = s.charAt(j);
18  
      cc = s.substring(j, Math.min(j+2, l));
19  
      if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
20  
        ++j;
21  
      else if (cc.equals("/*")) {
22  
        do ++j; while (j < l && !s.substring(j, Math.min(j+2, l)).equals("*/"));
23  
        j = Math.min(j+2, l);
24  
      } else if (cc.equals("//")) {
25  
        do ++j; while (j < l && "\r\n".indexOf(s.charAt(j)) < 0);
26  
      } else
27  
        break;
28  
    }
29  
    
30  
    tok.add(s.substring(i, j));
31  
    i = j;
32  
    if (i >= l) break;
33  
    c = s.charAt(i);
34  
    cc = s.substring(i, Math.min(i+2, l));
35  
36  
    // scan for non-whitespace
37  
    if (c == (char) 0x201C || c == (char) 0x201D) c = '"'; // normalize quotes
38  
    if (c == '\'' || c == '"') {
39  
      char opener = c;
40  
      ++j;
41  
      while (j < l) {
42  
        char _c = s.charAt(j);
43  
        if (_c == (char) 0x201C || _c == (char) 0x201D) _c = '"'; // normalize quotes
44  
        if (_c == opener) {
45  
          ++j;
46  
          break;
47  
        } else if (s.charAt(j) == '\\' && j+1 < l)
48  
          j += 2;
49  
        else
50  
          ++j;
51  
      }
52  
      if (j-1 >= i+1) {
53  
        tok.add(opener + s.substring(i+1, j-1) + opener);
54  
        i = j;
55  
        continue;
56  
      }
57  
    } else if (Character.isJavaIdentifierStart(c))
58  
      do ++j; while (j < l && (Character.isJavaIdentifierPart(s.charAt(j)) || s.charAt(j) == '\'')); // for things like "this one's"
59  
    else if (Character.isDigit(c))
60  
      do ++j; while (j < l && Character.isDigit(s.charAt(j)));
61  
    else if (cc.equals("[[")) {
62  
      do ++j; while (j+1 < l && !s.substring(j, j+2).equals("]]"));
63  
      j = Math.min(j+2, l);
64  
    } else if (cc.equals("[=") && i+2 < l && s.charAt(i+2) == '[') {
65  
      do ++j; while (j+2 < l && !s.substring(j, j+3).equals("]=]"));
66  
      j = Math.min(j+3, l);
67  
    } else if (s.substring(j, Math.min(j+3, l)).equals("..."))
68  
      j += 3;
69  
    else if (c == '$' || c == '#')
70  
      do ++j; while (j < l && Character.isDigit(s.charAt(j)));
71  
    else
72  
      ++j;
73  
74  
    tok.add(s.substring(i, j));
75  
    i = j;
76  
  }
77  
  
78  
  if ((tok.size() % 2) == 0) tok.add("");
79  
  return tok;
80  
}

Author comment

Began life as a copy of #1000688

download  show line numbers  debug dex  old transpilations   

Travelled to 17 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, ekrmjmnbrukm, gwrvuhgaqvyk, irmadwmeruwu, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, onxytkatvevr, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt, whxojlpjdney

No comments. add comment

Snippet ID: #1000769
Snippet name: javaTokPlusPeriod
Eternal ID of this version: #1000769/3
Text MD5: cfa816f859e87e692ba747917d2bdec1
Author: stefan
Category:
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2019-08-24 17:07:15
Source code size: 2539 bytes / 80 lines
Pitched / IR pitched: No / No
Views / Downloads: 749 / 8977
Version history: 2 change(s)
Referenced in: [show references]