Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

82
LINES

< > BotCompany Repo | #1000850 // TableFinder (find tables in HTML)

JavaX fragment (include) [tags: use-pretranspiled]

Libraryless. Click here for Pure Java version (2317L/15K).

1  
static class TableFinder {
2  
  LS tok; // list of tokens in HTML document
3  
  LS table; // list of tokens in table
4  
  LLS rows; // for every row, list of tokens in row
5  
  LLS data; // for every row, for every cell, inner data
6  
  int i;
7  
  boolean debug;
8  
  
9  
  *() {}
10  
  *(S html) { go(html); }
11  
  *(S html, bool findFirstTable) { go(html, findFirstTable); }
12  
  
13  
  void go(S html) { go(html, true); }
14  
  void go(S html, bool findFirstTable) {
15  
    tok = htmlcoarsetok(html);
16  
    i = 1;
17  
    if (findFirstTable) findTable();
18  
  }
19  
20  
  boolean findTable() {
21  
    if (debug) print("Finding table.");
22  
    for (; i < tok.size(); i += 2)
23  
      if (isTag(tok.get(i), "table"))
24  
        for (int j = i+2; j < tok.size(); j += 2)
25  
          if (isTag(tok.get(j), "/table")) {
26  
            if (debug) print("Table found!");
27  
            table = tok.subList(i-1, j+2);
28  
            findRows();
29  
            i = j;
30  
            return true;
31  
          }
32  
    ret false;
33  
  }
34  
  
35  
  void findRows() {
36  
    L<S> tok = table;
37  
    rows = new ArrayList<List<S>>();
38  
    data = new ArrayList<List<S>>();
39  
    int rowStart = 0;
40  
    
41  
    for (int i = 1; i < table.size(); i += 2) {
42  
      //print(tok.get(i));
43  
      if (isTag(tok.get(i), "tr"))
44  
        rowStart = i;
45  
      else if (isTag(tok.get(i), "/tr") && rowStart != 0) {
46  
        L<S> row = table.subList(rowStart-1, i+2);
47  
        rows.add(row);
48  
        data.add(getData(row));
49  
      }
50  
    }
51  
    
52  
    if (debug) print(rows.size() + " row(s)");
53  
    if (debug) print("Top left cell: " + data.get(0).get(0));
54  
  }
55  
  
56  
  boolean isTag(S token, S tag) {
57  
    return token.regionMatches(true, 0, "<" + tag + " ", 0, tag.length()+2)
58  
      || token.regionMatches(true, 0, "<" + tag + ">", 0, tag.length()+2);
59  
  }
60  
  
61  
  // called internally
62  
  L<S> getData(L<S> row) {
63  
    int colStart = 0;
64  
    new L<S> cols;
65  
    
66  
    for (int i = 1; i < row.size(); i += 2) {
67  
      S t = row.get(i);
68  
      if (isTag(t, "td") || isTag(t, "th"))
69  
        colStart = i;
70  
      else if ((isTag(t, "/td") || isTag(t, "/th")) && colStart != 0)
71  
        cols.add(join(row.subList(colStart+1, i)));
72  
    }
73  
    return cols;
74  
  }
75  
  
76  
  // for clients
77  
  L<S> getRow(int row) {
78  
    return data.get(row);
79  
  }
80  
  
81  
  LL<S> rows() { ret data; }
82  
}

download  show line numbers  debug dex  old transpilations   

Travelled to 17 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, ddnzoavkxhuk, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, onxytkatvevr, pyentgdyhuwx, pzhvpgtvlbxg, teubizvjbppd, tslmcundralx, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

No comments. add comment

Snippet ID: #1000850
Snippet name: TableFinder (find tables in HTML)
Eternal ID of this version: #1000850/7
Text MD5: 829cb943594f764d8ab50569b11d4434
Transpilation MD5: ad76960d33322c3eecaabbdc984d97b6
Author: stefan
Category: javax
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-03-17 13:36:15
Source code size: 2276 bytes / 82 lines
Pitched / IR pitched: No / No
Views / Downloads: 684 / 2876
Version history: 6 change(s)
Referenced in: [show references]