Libraryless. Click here for Pure Java version (2317L/15K).
1 | static class TableFinder { |
2 | LS tok; // list of tokens in HTML document |
3 | LS table; // list of tokens in table |
4 | LLS rows; // for every row, list of tokens in row |
5 | LLS data; // for every row, for every cell, inner data |
6 | int i; |
7 | boolean debug; |
8 | |
9 | *() {} |
10 | *(S html) { go(html); } |
11 | *(S html, bool findFirstTable) { go(html, findFirstTable); } |
12 | |
13 | void go(S html) { go(html, true); } |
14 | void go(S html, bool findFirstTable) { |
15 | tok = htmlcoarsetok(html); |
16 | i = 1; |
17 | if (findFirstTable) findTable(); |
18 | } |
19 | |
20 | boolean findTable() { |
21 | if (debug) print("Finding table."); |
22 | for (; i < tok.size(); i += 2) |
23 | if (isTag(tok.get(i), "table")) |
24 | for (int j = i+2; j < tok.size(); j += 2) |
25 | if (isTag(tok.get(j), "/table")) { |
26 | if (debug) print("Table found!"); |
27 | table = tok.subList(i-1, j+2); |
28 | findRows(); |
29 | i = j; |
30 | return true; |
31 | } |
32 | ret false; |
33 | } |
34 | |
35 | void findRows() { |
36 | L<S> tok = table; |
37 | rows = new ArrayList<List<S>>(); |
38 | data = new ArrayList<List<S>>(); |
39 | int rowStart = 0; |
40 | |
41 | for (int i = 1; i < table.size(); i += 2) { |
42 | //print(tok.get(i)); |
43 | if (isTag(tok.get(i), "tr")) |
44 | rowStart = i; |
45 | else if (isTag(tok.get(i), "/tr") && rowStart != 0) { |
46 | L<S> row = table.subList(rowStart-1, i+2); |
47 | rows.add(row); |
48 | data.add(getData(row)); |
49 | } |
50 | } |
51 | |
52 | if (debug) print(rows.size() + " row(s)"); |
53 | if (debug) print("Top left cell: " + data.get(0).get(0)); |
54 | } |
55 | |
56 | boolean isTag(S token, S tag) { |
57 | return token.regionMatches(true, 0, "<" + tag + " ", 0, tag.length()+2) |
58 | || token.regionMatches(true, 0, "<" + tag + ">", 0, tag.length()+2); |
59 | } |
60 | |
61 | // called internally |
62 | L<S> getData(L<S> row) { |
63 | int colStart = 0; |
64 | new L<S> cols; |
65 | |
66 | for (int i = 1; i < row.size(); i += 2) { |
67 | S t = row.get(i); |
68 | if (isTag(t, "td") || isTag(t, "th")) |
69 | colStart = i; |
70 | else if ((isTag(t, "/td") || isTag(t, "/th")) && colStart != 0) |
71 | cols.add(join(row.subList(colStart+1, i))); |
72 | } |
73 | return cols; |
74 | } |
75 | |
76 | // for clients |
77 | L<S> getRow(int row) { |
78 | return data.get(row); |
79 | } |
80 | |
81 | LL<S> rows() { ret data; } |
82 | } |
download show line numbers debug dex old transpilations
Travelled to 17 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, ddnzoavkxhuk, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, onxytkatvevr, pyentgdyhuwx, pzhvpgtvlbxg, teubizvjbppd, tslmcundralx, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1000850 |
Snippet name: | TableFinder (find tables in HTML) |
Eternal ID of this version: | #1000850/7 |
Text MD5: | 829cb943594f764d8ab50569b11d4434 |
Transpilation MD5: | ad76960d33322c3eecaabbdc984d97b6 |
Author: | stefan |
Category: | javax |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-03-17 13:36:15 |
Source code size: | 2276 bytes / 82 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 765 / 2983 |
Version history: | 6 change(s) |
Referenced in: | [show references] |