Libraryless. Click here for Pure Java version (10927L/67K).
1 | static L<T3<S>> quickGoogle2(S query) { |
2 | ret quickGoogle2(query, null); |
3 | } |
4 | |
5 | static L<T3<S>> quickGoogle2(S query, S language, O... _) { |
6 | optPar bool safeSearch; |
7 | optPar bool noCache; |
8 | optPar bool debug; |
9 | if (safeSearch) set noCache; |
10 | |
11 | language = or2(language, "lang_en"); |
12 | query = trim(query); |
13 | L<T3<S>> out = noCache ? null : lookupPossiblyIgnoringCase(parseGoogleLog(), query); |
14 | if (out != null) ret out with print("cache hit"); |
15 | out = new L; |
16 | |
17 | S userAgent = "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:53.0) Gecko/20100101 Firefox/53.0"; |
18 | set loadPageWithUserAgent_verbose; |
19 | S html = loadPageWithUserAgent("https://www.google.com/search?q=" + urlencode(query) + "&lr=" + language + "&hl=en" |
20 | + (safeSearch ? "&safe=active" : ""), userAgent); |
21 | S url = first(loadPage_responseHeaders->get("Location")); |
22 | if (url != null) |
23 | html = loadPageWithUserAgent(url, userAgent); |
24 | |
25 | saveTextFile(javaxCachesDir("last-google-result-page.html"), html); |
26 | L<S> htmlTok = htmlTok(html); |
27 | LL<S> h3s = findContainerTagDeep(htmlTok, "h3"); |
28 | for (L<S> tok : h3s) { |
29 | int idx = magicIndexOfSubList(htmlTok, tok); |
30 | S linkTag = get(htmlTok, idx-1); |
31 | if (!tagIs(linkTag, "a")) continue with if (debug) print(+linkTag); |
32 | |
33 | S link = tagGet(linkTag, "href"); |
34 | continue unless isAbsoluteURL(link); |
35 | |
36 | S text = htmldecode(join(dropTags(contentsOfContainerTag(tok)))); |
37 | |
38 | // TODO |
39 | L<S> sub = subList(htmlTok, idx+l(tok)-1); |
40 | LLS spans = findContainerTagWithParams(sub, "span", "class" := "st"); |
41 | if (debug) pnl("l(sub)=" + l(sub) + ", spans: " + l(spans)); |
42 | if (debug) pnl(+spans); |
43 | S desc = trim(htmldecode(dropTags(join(first(spans))))); |
44 | if (debug) printStruct(+desc); |
45 | |
46 | //S desc = ""; |
47 | out.add(triple(link, text, desc)); |
48 | } |
49 | |
50 | if (empty(out)) { |
51 | saveTextFile(javaxCachesDir("buggy-google.html"), html); |
52 | ret out; |
53 | } |
54 | |
55 | if (!noCache) |
56 | logStructure(googleLog(), litorderedmap(+query, +language, date := localDateWithSeconds(), results := out)); |
57 | ret out; |
58 | } |
Began life as a copy of #1011241
download show line numbers debug dex old transpilations
Travelled to 16 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, irmadwmeruwu, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt, whxojlpjdney, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1011993 |
Snippet name: | quickGoogle2 - returns triples of (link, text, desc) |
Eternal ID of this version: | #1011993/41 |
Text MD5: | bc7cae08943ffc4299c78f821c04767d |
Transpilation MD5: | 909c625632eb81facb04a964396cf85b |
Author: | stefan |
Category: | javax / networking |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2019-07-22 21:29:12 |
Source code size: | 2078 bytes / 58 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 1025 / 1220 |
Version history: | 40 change(s) |
Referenced in: | [show references] |