!747 m { static L tok; // list of tokens in HTML document static L table; // list of tokens in table static L> rows; // for every row, list of tokens in row static L> data; // for every row, for every cell, inner data p { S htmlID = "#3000025"; if (args.length != 0) htmlID = args[0]; S script = [[ find table ]]; S html = loadSnippet(htmlID); tok = htmlcoarsetok(html); for (S cmd : splitScript(script)) { L c = javaTok(cmd); if (c.size() == 1) continue; print("cmd: " + structure(c)); if (cmdMatch("find table", c) != null) { findTable(); if (table == null) fail("No table"); //print(fromLines(table)); } } } // split at newline, but also take into account multi-line strings static L splitScript(S script) { L tok = javaTok(script); new L result; result.add(""); for (int i = 0; i < tok.size(); i++) { boolean nl = tok.get(i).indexOf("\n") >= 0; if (nl) result.add(""); else result.set(result.size()-1, result.get(result.size()-1) + tok.get(i)); } return result; } static S[] cmdMatch(S pat, L cmd) { return match2(javaTok(pat), cmd); } static void findTable() { print("Finding table."); for (int i = 1; i < tok.size(); i += 2) if (isTag(tok.get(i), "table")) for (int j = i+2; j < tok.size(); j += 2) if (isTag(tok.get(j), "/table")) { print("Table found!"); table = tok.subList(i-1, j+2); findRows(); return; } } static void findRows() { L tok = table; rows = new ArrayList>(); data = new ArrayList>(); int rowStart = 0; for (int i = 1; i < table.size(); i += 2) { //print(tok.get(i)); if (isTag(tok.get(i), "tr")) rowStart = i; else if (isTag(tok.get(i), "/tr") && rowStart != 0) { L row = table.subList(rowStart-1, i+2); rows.add(row); data.add(getData(row)); } } print(rows.size() + " row(s)"); print("Top left cell: " + data.get(0).get(0)); } static boolean isTag(S token, S tag) { return token.regionMatches(true, 0, "<" + tag + " ", 0, tag.length()+2) || token.regionMatches(true, 0, "<" + tag + ">", 0, tag.length()+2); } static L getData(L row) { int colStart = 0; new L cols; for (int i = 1; i < row.size(); i += 2) { S t = row.get(i); if (isTag(t, "td") || isTag(t, "th")) colStart = i; else if ((isTag(t, "/td") || isTag(t, "/th")) && colStart != 0) cols.add(join(row.subList(colStart+1, i))); } return cols; } }