1 | // TODO: process CDATA, scripts |
2 | |
3 | static List<String> htmlcoarsetok(String s) {
|
4 | List<String> tok = new ArrayList<String>(); |
5 | int l = s == null ? 0 : s.length(); |
6 | |
7 | int i = 0; |
8 | while (i < l) {
|
9 | int j = i; |
10 | char c; |
11 | |
12 | // scan for non-tags |
13 | while (j < l) {
|
14 | if (s.charAt(j) != '<') |
15 | // regular character |
16 | ++j; |
17 | else if (s.substring(j, Math.min(j+4, l)).equals("<!--")) {
|
18 | // HTML comment |
19 | j = j+4; |
20 | do ++j; while (j < l && !s.substring(j, Math.min(j+3, l)).equals("-->"));
|
21 | j = Math.min(j+3, l); |
22 | } else |
23 | // it's a tag |
24 | break; |
25 | } |
26 | tok.add(s.substring(i, j)); |
27 | i = j; |
28 | if (i >= l) break; |
29 | c = s.charAt(i); |
30 | |
31 | // scan for tags |
32 | if (c == '<') {
|
33 | ++j; |
34 | |
35 | while (j < l && s.charAt(j) != '>') ++j; // TODO: strings? |
36 | if (j < l) ++j; |
37 | } |
38 | |
39 | tok.add(s.substring(i, j)); |
40 | i = j; |
41 | } |
42 | |
43 | if ((tok.size() % 2) == 0) tok.add("");
|
44 | return tok; |
45 | } |
Began life as a copy of #1000670
download show line numbers debug dex old transpilations
Travelled to 4 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, vouqrxazstgt
No comments. add comment
| Snippet ID: | #1030282 |
| Snippet name: | htmlcoarsetok [backup] |
| Eternal ID of this version: | #1030282/1 |
| Text MD5: | 48de0706a35ab5a5353b803994f8d124 |
| Author: | stefan |
| Category: | |
| Type: | JavaX fragment (include) |
| Public (visible to everyone): | Yes |
| Archived (hidden from active list): | No |
| Created/modified: | 2020-11-25 14:43:59 |
| Source code size: | 1013 bytes / 45 lines |
| Pitched / IR pitched: | No / No |
| Views / Downloads: | 442 / 466 |
| Referenced in: | [show references] |