Libraryless. Click here for Pure Java version (568L/5K/14K).
1 | !747 |
2 | |
3 | m {
|
4 | static L<S> tok; // list of tokens in HTML document |
5 | static L<S> table; // list of tokens in table |
6 | static L<L<S>> rows; // for every row, list of tokens in row |
7 | static L<L<S>> data; // for every row, for every cell, inner data |
8 | |
9 | p {
|
10 | S htmlID = "#3000025"; |
11 | if (args.length != 0) htmlID = args[0]; |
12 | |
13 | S script = [[ |
14 | find table |
15 | ]]; |
16 | |
17 | S html = loadSnippet(htmlID); |
18 | tok = htmlcoarsetok(html); |
19 | |
20 | for (S cmd : splitScript(script)) {
|
21 | L<S> c = javaTok(cmd); |
22 | if (c.size() == 1) continue; |
23 | |
24 | print("cmd: " + structure(c));
|
25 | |
26 | if (cmdMatch("find table", c) != null) {
|
27 | findTable(); |
28 | if (table == null) fail("No table");
|
29 | //print(fromLines(table)); |
30 | } |
31 | } |
32 | } |
33 | |
34 | // split at newline, but also take into account multi-line strings |
35 | static L<S> splitScript(S script) {
|
36 | L<S> tok = javaTok(script); |
37 | new L<S> result; |
38 | result.add("");
|
39 | for (int i = 0; i < tok.size(); i++) {
|
40 | boolean nl = tok.get(i).indexOf("\n") >= 0;
|
41 | if (nl) |
42 | result.add("");
|
43 | else |
44 | result.set(result.size()-1, result.get(result.size()-1) + tok.get(i)); |
45 | } |
46 | return result; |
47 | } |
48 | |
49 | static S[] cmdMatch(S pat, L<S> cmd) {
|
50 | return match2(javaTok(pat), cmd); |
51 | } |
52 | |
53 | static void findTable() {
|
54 | print("Finding table.");
|
55 | for (int i = 1; i < tok.size(); i += 2) |
56 | if (isTag(tok.get(i), "table")) |
57 | for (int j = i+2; j < tok.size(); j += 2) |
58 | if (isTag(tok.get(j), "/table")) {
|
59 | print("Table found!");
|
60 | table = tok.subList(i-1, j+2); |
61 | findRows(); |
62 | return; |
63 | } |
64 | } |
65 | |
66 | static void findRows() {
|
67 | L<S> tok = table; |
68 | rows = new ArrayList<List<S>>(); |
69 | data = new ArrayList<List<S>>(); |
70 | int rowStart = 0; |
71 | |
72 | for (int i = 1; i < table.size(); i += 2) {
|
73 | //print(tok.get(i)); |
74 | if (isTag(tok.get(i), "tr")) |
75 | rowStart = i; |
76 | else if (isTag(tok.get(i), "/tr") && rowStart != 0) {
|
77 | L<S> row = table.subList(rowStart-1, i+2); |
78 | rows.add(row); |
79 | data.add(getData(row)); |
80 | } |
81 | } |
82 | |
83 | print(rows.size() + " row(s)"); |
84 | print("Top left cell: " + data.get(0).get(0));
|
85 | } |
86 | |
87 | static boolean isTag(S token, S tag) {
|
88 | return token.regionMatches(true, 0, "<" + tag + " ", 0, tag.length()+2) |
89 | || token.regionMatches(true, 0, "<" + tag + ">", 0, tag.length()+2); |
90 | } |
91 | |
92 | static L<S> getData(L<S> row) {
|
93 | int colStart = 0; |
94 | new L<S> cols; |
95 | |
96 | for (int i = 1; i < row.size(); i += 2) {
|
97 | S t = row.get(i); |
98 | if (isTag(t, "td") || isTag(t, "th")) |
99 | colStart = i; |
100 | else if ((isTag(t, "/td") || isTag(t, "/th")) && colStart != 0) |
101 | cols.add(join(row.subList(colStart+1, i))); |
102 | } |
103 | return cols; |
104 | } |
105 | } |
download show line numbers debug dex old transpilations
Travelled to 15 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, onxytkatvevr, pyentgdyhuwx, pzhvpgtvlbxg, teubizvjbppd, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
| Snippet ID: | #1000846 |
| Snippet name: | HTML parsing |
| Eternal ID of this version: | #1000846/1 |
| Text MD5: | 08f81fe49d16e7511c8ffd9608af0452 |
| Transpilation MD5: | 8bee9b6fe52304604253ae97de7d5535 |
| Author: | stefan |
| Category: | javax |
| Type: | JavaX source code |
| Public (visible to everyone): | Yes |
| Archived (hidden from active list): | No |
| Created/modified: | 2015-08-30 16:11:16 |
| Source code size: | 2892 bytes / 105 lines |
| Pitched / IR pitched: | No / Yes |
| Views / Downloads: | 853 / 960 |
| Referenced in: | [show references] |