1 | // TODO: process CDATA, scripts |
2 | |
3 | static List<String> htmlcoarsetok(String s) { |
4 | List<String> tok = new ArrayList<String>(); |
5 | int l = s == null ? 0 : s.length(); |
6 | |
7 | int i = 0; |
8 | while (i < l) { |
9 | int j = i; |
10 | char c; |
11 | |
12 | // scan for non-tags |
13 | while (j < l) { |
14 | if (s.charAt(j) != '<') |
15 | // regular character |
16 | ++j; |
17 | else if (s.substring(j, Math.min(j+4, l)).equals("<!--")) { |
18 | // HTML comment |
19 | j = j+4; |
20 | do ++j; while (j < l && !s.substring(j, Math.min(j+3, l)).equals("-->")); |
21 | j = Math.min(j+3, l); |
22 | } else |
23 | // it's a tag |
24 | break; |
25 | } |
26 | tok.add(s.substring(i, j)); |
27 | i = j; |
28 | if (i >= l) break; |
29 | c = s.charAt(i); |
30 | |
31 | // scan for tags |
32 | if (c == '<') { |
33 | ++j; |
34 | |
35 | while (j < l && s.charAt(j) != '>') ++j; // TODO: strings? |
36 | if (j < l) ++j; |
37 | } |
38 | |
39 | tok.add(s.substring(i, j)); |
40 | i = j; |
41 | } |
42 | |
43 | if ((tok.size() % 2) == 0) tok.add(""); |
44 | return tok; |
45 | } |
Began life as a copy of #1000670
download show line numbers debug dex old transpilations
Travelled to 4 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, vouqrxazstgt
No comments. add comment
Snippet ID: | #1030282 |
Snippet name: | htmlcoarsetok [backup] |
Eternal ID of this version: | #1030282/1 |
Text MD5: | 48de0706a35ab5a5353b803994f8d124 |
Author: | stefan |
Category: | |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-11-25 14:43:59 |
Source code size: | 1013 bytes / 45 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 209 / 229 |
Referenced in: | [show references] |