static class TableFinder { LS tok; // list of tokens in HTML document LS table; // list of tokens in table LLS rows; // for every row, list of tokens in row LLS data; // for every row, for every cell, inner data int i; boolean debug; *() {} *(S html) { go(html); } *(S html, bool findFirstTable) { go(html, findFirstTable); } void go(S html) { go(html, true); } void go(S html, bool findFirstTable) { tok = htmlcoarsetok(html); i = 1; if (findFirstTable) findTable(); } boolean findTable() { if (debug) print("Finding table."); for (; i < tok.size(); i += 2) if (isTag(tok.get(i), "table")) for (int j = i+2; j < tok.size(); j += 2) if (isTag(tok.get(j), "/table")) { if (debug) print("Table found!"); table = tok.subList(i-1, j+2); findRows(); i = j; return true; } ret false; } void findRows() { L tok = table; rows = new ArrayList>(); data = new ArrayList>(); int rowStart = 0; for (int i = 1; i < table.size(); i += 2) { //print(tok.get(i)); if (isTag(tok.get(i), "tr")) rowStart = i; else if (isTag(tok.get(i), "/tr") && rowStart != 0) { L row = table.subList(rowStart-1, i+2); rows.add(row); data.add(getData(row)); } } if (debug) print(rows.size() + " row(s)"); if (debug) print("Top left cell: " + data.get(0).get(0)); } boolean isTag(S token, S tag) { return token.regionMatches(true, 0, "<" + tag + " ", 0, tag.length()+2) || token.regionMatches(true, 0, "<" + tag + ">", 0, tag.length()+2); } // called internally L getData(L row) { int colStart = 0; new L cols; for (int i = 1; i < row.size(); i += 2) { S t = row.get(i); if (isTag(t, "td") || isTag(t, "th")) colStart = i; else if ((isTag(t, "/td") || isTag(t, "/th")) && colStart != 0) cols.add(join(row.subList(colStart+1, i))); } return cols; } // for clients L getRow(int row) { return data.get(row); } LL rows() { ret data; } }