Libraryless. Click here for Pure Java version (2317L/15K).
static class TableFinder { LS tok; // list of tokens in HTML document LS table; // list of tokens in table LLS rows; // for every row, list of tokens in row LLS data; // for every row, for every cell, inner data int i; boolean debug; *() {} *(S html) { go(html); } *(S html, bool findFirstTable) { go(html, findFirstTable); } void go(S html) { go(html, true); } void go(S html, bool findFirstTable) { tok = htmlcoarsetok(html); i = 1; if (findFirstTable) findTable(); } boolean findTable() { if (debug) print("Finding table."); for (; i < tok.size(); i += 2) if (isTag(tok.get(i), "table")) for (int j = i+2; j < tok.size(); j += 2) if (isTag(tok.get(j), "/table")) { if (debug) print("Table found!"); table = tok.subList(i-1, j+2); findRows(); i = j; return true; } ret false; } void findRows() { L<S> tok = table; rows = new ArrayList<List<S>>(); data = new ArrayList<List<S>>(); int rowStart = 0; for (int i = 1; i < table.size(); i += 2) { //print(tok.get(i)); if (isTag(tok.get(i), "tr")) rowStart = i; else if (isTag(tok.get(i), "/tr") && rowStart != 0) { L<S> row = table.subList(rowStart-1, i+2); rows.add(row); data.add(getData(row)); } } if (debug) print(rows.size() + " row(s)"); if (debug) print("Top left cell: " + data.get(0).get(0)); } boolean isTag(S token, S tag) { return token.regionMatches(true, 0, "<" + tag + " ", 0, tag.length()+2) || token.regionMatches(true, 0, "<" + tag + ">", 0, tag.length()+2); } // called internally L<S> getData(L<S> row) { int colStart = 0; new L<S> cols; for (int i = 1; i < row.size(); i += 2) { S t = row.get(i); if (isTag(t, "td") || isTag(t, "th")) colStart = i; else if ((isTag(t, "/td") || isTag(t, "/th")) && colStart != 0) cols.add(join(row.subList(colStart+1, i))); } return cols; } // for clients L<S> getRow(int row) { return data.get(row); } LL<S> rows() { ret data; } }
download show line numbers debug dex old transpilations
Travelled to 17 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, ddnzoavkxhuk, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, onxytkatvevr, pyentgdyhuwx, pzhvpgtvlbxg, teubizvjbppd, tslmcundralx, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment