Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

82
LINES

< > BotCompany Repo | #1000850 // TableFinder (find tables in HTML)

JavaX fragment (include) [tags: use-pretranspiled]

Libraryless. Click here for Pure Java version (2317L/15K).

static class TableFinder {
  LS tok; // list of tokens in HTML document
  LS table; // list of tokens in table
  LLS rows; // for every row, list of tokens in row
  LLS data; // for every row, for every cell, inner data
  int i;
  boolean debug;
  
  *() {}
  *(S html) { go(html); }
  *(S html, bool findFirstTable) { go(html, findFirstTable); }
  
  void go(S html) { go(html, true); }
  void go(S html, bool findFirstTable) {
    tok = htmlcoarsetok(html);
    i = 1;
    if (findFirstTable) findTable();
  }

  boolean findTable() {
    if (debug) print("Finding table.");
    for (; i < tok.size(); i += 2)
      if (isTag(tok.get(i), "table"))
        for (int j = i+2; j < tok.size(); j += 2)
          if (isTag(tok.get(j), "/table")) {
            if (debug) print("Table found!");
            table = tok.subList(i-1, j+2);
            findRows();
            i = j;
            return true;
          }
    ret false;
  }
  
  void findRows() {
    L<S> tok = table;
    rows = new ArrayList<List<S>>();
    data = new ArrayList<List<S>>();
    int rowStart = 0;
    
    for (int i = 1; i < table.size(); i += 2) {
      //print(tok.get(i));
      if (isTag(tok.get(i), "tr"))
        rowStart = i;
      else if (isTag(tok.get(i), "/tr") && rowStart != 0) {
        L<S> row = table.subList(rowStart-1, i+2);
        rows.add(row);
        data.add(getData(row));
      }
    }
    
    if (debug) print(rows.size() + " row(s)");
    if (debug) print("Top left cell: " + data.get(0).get(0));
  }
  
  boolean isTag(S token, S tag) {
    return token.regionMatches(true, 0, "<" + tag + " ", 0, tag.length()+2)
      || token.regionMatches(true, 0, "<" + tag + ">", 0, tag.length()+2);
  }
  
  // called internally
  L<S> getData(L<S> row) {
    int colStart = 0;
    new L<S> cols;
    
    for (int i = 1; i < row.size(); i += 2) {
      S t = row.get(i);
      if (isTag(t, "td") || isTag(t, "th"))
        colStart = i;
      else if ((isTag(t, "/td") || isTag(t, "/th")) && colStart != 0)
        cols.add(join(row.subList(colStart+1, i)));
    }
    return cols;
  }
  
  // for clients
  L<S> getRow(int row) {
    return data.get(row);
  }
  
  LL<S> rows() { ret data; }
}

download  show line numbers  debug dex  old transpilations   

Travelled to 17 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, ddnzoavkxhuk, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, onxytkatvevr, pyentgdyhuwx, pzhvpgtvlbxg, teubizvjbppd, tslmcundralx, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

No comments. add comment

Snippet ID: #1000850
Snippet name: TableFinder (find tables in HTML)
Eternal ID of this version: #1000850/7
Text MD5: 829cb943594f764d8ab50569b11d4434
Transpilation MD5: ad76960d33322c3eecaabbdc984d97b6
Author: stefan
Category: javax
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-03-17 13:36:15
Source code size: 2276 bytes / 82 lines
Pitched / IR pitched: No / No
Views / Downloads: 764 / 2982
Version history: 6 change(s)
Referenced in: #1000849 - Get names of some parties from pouet (with links, static)
#1000903 - SelectFinder (find "select" tags in HTML, todo)
#1001215 - Update Snippet Cache Bot with latest changed snippets
#1002007 - Blog Headlines Bot
#1002341 - Parse Nouns List
#1003872 - Integrating #759 in One Program
#1003874 - Backup of #759 Before Integration
#1004091 - 759 with new loadClasses (spike)
#1034167 - Standard Classes + Interfaces (LIVE, continuation of #1003674)
#3000382 - Answer for ferdie (>> t = 1, f = 0)
#3000383 - Answer for funkoverflow (>> t=1, f=0 okay)