static L> quickGoogle2(S query) { ret quickGoogle2(query, null); } static L> quickGoogle2(S query, S language, O... _) { optPar bool safeSearch; optPar bool noCache; optPar bool debug; if (safeSearch) set noCache; language = or2(language, "lang_en"); query = trim(query); L> out = noCache ? null : lookupPossiblyIgnoringCase(parseGoogleLog(), query); if (out != null) ret out with print("cache hit"); out = new L; S userAgent = "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:53.0) Gecko/20100101 Firefox/53.0"; set loadPageWithUserAgent_verbose; S html = loadPageWithUserAgent("https://www.google.com/search?q=" + urlencode(query) + "&lr=" + language + "&hl=en" + (safeSearch ? "&safe=active" : ""), userAgent); S url = first(loadPage_responseHeaders->get("Location")); if (url != null) html = loadPageWithUserAgent(url, userAgent); saveTextFile(javaxCachesDir("last-google-result-page.html"), html); L htmlTok = htmlTok(html); LL h3s = findContainerTagDeep(htmlTok, "h3"); for (L tok : h3s) { int idx = magicIndexOfSubList(htmlTok, tok); S linkTag = get(htmlTok, idx-1); if (!tagIs(linkTag, "a")) continue with if (debug) print(+linkTag); S link = tagGet(linkTag, "href"); continue unless isAbsoluteURL(link); S text = htmldecode(join(dropTags(contentsOfContainerTag(tok)))); // TODO L sub = subList(htmlTok, idx+l(tok)-1); LLS spans = findContainerTagWithParams(sub, "span", "class" := "st"); if (debug) pnl("l(sub)=" + l(sub) + ", spans: " + l(spans)); if (debug) pnl(+spans); S desc = trim(htmldecode(dropTags(join(first(spans))))); if (debug) printStruct(+desc); //S desc = ""; out.add(triple(link, text, desc)); } if (empty(out)) { saveTextFile(javaxCachesDir("buggy-google.html"), html); ret out; } if (!noCache) logStructure(googleLog(), litorderedmap(+query, +language, date := localDateWithSeconds(), results := out)); ret out; }