Libraryless. Click here for Pure Java version (10927L/67K).
static L<T3<S>> quickGoogle2(S query) { ret quickGoogle2(query, null); } static L<T3<S>> quickGoogle2(S query, S language, O... _) { optPar bool safeSearch; optPar bool noCache; optPar bool debug; if (safeSearch) set noCache; language = or2(language, "lang_en"); query = trim(query); L<T3<S>> out = noCache ? null : lookupPossiblyIgnoringCase(parseGoogleLog(), query); if (out != null) ret out with print("cache hit"); out = new L; S userAgent = "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:53.0) Gecko/20100101 Firefox/53.0"; set loadPageWithUserAgent_verbose; S html = loadPageWithUserAgent("https://www.google.com/search?q=" + urlencode(query) + "&lr=" + language + "&hl=en" + (safeSearch ? "&safe=active" : ""), userAgent); S url = first(loadPage_responseHeaders->get("Location")); if (url != null) html = loadPageWithUserAgent(url, userAgent); saveTextFile(javaxCachesDir("last-google-result-page.html"), html); L<S> htmlTok = htmlTok(html); LL<S> h3s = findContainerTagDeep(htmlTok, "h3"); for (L<S> tok : h3s) { int idx = magicIndexOfSubList(htmlTok, tok); S linkTag = get(htmlTok, idx-1); if (!tagIs(linkTag, "a")) continue with if (debug) print(+linkTag); S link = tagGet(linkTag, "href"); continue unless isAbsoluteURL(link); S text = htmldecode(join(dropTags(contentsOfContainerTag(tok)))); // TODO L<S> sub = subList(htmlTok, idx+l(tok)-1); LLS spans = findContainerTagWithParams(sub, "span", "class" := "st"); if (debug) pnl("l(sub)=" + l(sub) + ", spans: " + l(spans)); if (debug) pnl(+spans); S desc = trim(htmldecode(dropTags(join(first(spans))))); if (debug) printStruct(+desc); //S desc = ""; out.add(triple(link, text, desc)); } if (empty(out)) { saveTextFile(javaxCachesDir("buggy-google.html"), html); ret out; } if (!noCache) logStructure(googleLog(), litorderedmap(+query, +language, date := localDateWithSeconds(), results := out)); ret out; }
Began life as a copy of #1011241
download show line numbers debug dex old transpilations
Travelled to 16 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, irmadwmeruwu, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt, whxojlpjdney, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1011993 |
Snippet name: | quickGoogle2 - returns triples of (link, text, desc) |
Eternal ID of this version: | #1011993/41 |
Text MD5: | bc7cae08943ffc4299c78f821c04767d |
Transpilation MD5: | 909c625632eb81facb04a964396cf85b |
Author: | stefan |
Category: | javax / networking |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2019-07-22 21:29:12 |
Source code size: | 2078 bytes / 58 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 1023 / 1217 |
Version history: | 40 change(s) |
Referenced in: | [show references] |