Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

58
LINES

< > BotCompany Repo | #1011993 // quickGoogle2 - returns triples of (link, text, desc)

JavaX fragment (include) [tags: use-pretranspiled]

Libraryless. Click here for Pure Java version (10927L/67K).

1  
static L<T3<S>> quickGoogle2(S query) {
2  
  ret quickGoogle2(query, null);
3  
}
4  
5  
static L<T3<S>> quickGoogle2(S query, S language, O... _) {
6  
  optPar bool safeSearch;
7  
  optPar bool noCache;
8  
  optPar bool debug;
9  
  if (safeSearch) set noCache;
10  
  
11  
  language = or2(language, "lang_en");
12  
  query = trim(query);
13  
  L<T3<S>> out = noCache ? null : lookupPossiblyIgnoringCase(parseGoogleLog(), query);
14  
  if (out != null) ret out with print("cache hit");
15  
  out = new L;
16  
  
17  
  S userAgent = "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:53.0) Gecko/20100101 Firefox/53.0";
18  
  set loadPageWithUserAgent_verbose;
19  
  S html = loadPageWithUserAgent("https://www.google.com/search?q=" + urlencode(query) + "&lr=" + language + "&hl=en"
20  
    + (safeSearch ? "&safe=active" : ""), userAgent);
21  
  S url = first(loadPage_responseHeaders->get("Location"));
22  
  if (url != null)
23  
    html = loadPageWithUserAgent(url, userAgent);
24  
25  
  saveTextFile(javaxCachesDir("last-google-result-page.html"), html);
26  
  L<S> htmlTok = htmlTok(html);
27  
  LL<S> h3s = findContainerTagDeep(htmlTok, "h3");
28  
  for (L<S> tok : h3s) {
29  
    int idx = magicIndexOfSubList(htmlTok, tok);
30  
    S linkTag = get(htmlTok, idx-1);
31  
    if (!tagIs(linkTag, "a")) continue with if (debug) print(+linkTag);
32  
33  
    S link = tagGet(linkTag, "href");
34  
    continue unless isAbsoluteURL(link);
35  
    
36  
    S text = htmldecode(join(dropTags(contentsOfContainerTag(tok))));
37  
    
38  
    // TODO
39  
    L<S> sub = subList(htmlTok, idx+l(tok)-1);
40  
    LLS spans = findContainerTagWithParams(sub, "span", "class" := "st");
41  
    if (debug) pnl("l(sub)=" + l(sub) + ", spans: " + l(spans));
42  
    if (debug) pnl(+spans);
43  
    S desc = trim(htmldecode(dropTags(join(first(spans)))));
44  
    if (debug) printStruct(+desc);
45  
46  
    //S desc = "";
47  
    out.add(triple(link, text, desc));
48  
  }
49  
  
50  
  if (empty(out)) {
51  
    saveTextFile(javaxCachesDir("buggy-google.html"), html);
52  
    ret out;
53  
  }
54  
  
55  
  if (!noCache)
56  
    logStructure(googleLog(), litorderedmap(+query, +language, date := localDateWithSeconds(), results := out));
57  
  ret out;
58  
}

Author comment

Began life as a copy of #1011241

download  show line numbers  debug dex  old transpilations   

Travelled to 16 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, irmadwmeruwu, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt, whxojlpjdney, xrpafgyirdlv

No comments. add comment

Snippet ID: #1011993
Snippet name: quickGoogle2 - returns triples of (link, text, desc)
Eternal ID of this version: #1011993/41
Text MD5: bc7cae08943ffc4299c78f821c04767d
Transpilation MD5: 909c625632eb81facb04a964396cf85b
Author: stefan
Category: javax / networking
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2019-07-22 21:29:12
Source code size: 2078 bytes / 58 lines
Pitched / IR pitched: No / No
Views / Downloads: 1025 / 1220
Version history: 40 change(s)
Referenced in: [show references]