static L> quickGoogle2(S query) { ret quickGoogle2(query, null); } static L> quickGoogle2(S query, S language, O... _) { optBar bool safeSearch; language = or2(language, "lang_en"); query = trim(query); L> out = lookupPossiblyIgnoringCase(parseGoogleLog(), query); if (out != null) ret out with print("cache hit"); out = new L; S userAgent = "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:53.0) Gecko/20100101 Firefox/53.0"; set loadPageWithUserAgent_verbose; S html = loadPageWithUserAgent("https://www.google.com/search?q=" + urlencode(query) + "&lr=" + language + "&hl=en" + (safeSearch ? "safe=active" : ""), userAgent); S url = first(loadPage_responseHeaders->get("Location")); if (url != null) html = loadPageWithUserAgent(url, userAgent); saveTextFile(javaxCachesDir("last-google-result-page.html"), html); L htmlTok = htmlTok(html); LL h3s = findContainerTagDeep(htmlTok, "h3"); for (L tok : h3s) { int idx = magicIndexOfSubList(htmlTok, tok); S linkTag = get(htmlTok, idx-1); if (!tagIs(linkTag, "a")) continue with print(linkTag); S link = tagGet(linkTag, "href"); continue unless isAbsoluteURL(link); S text = htmldecode(join(dropTags(contentsOfContainerTag(tok)))); // TODO L sub = subList(htmlTok, idx+l(tok)-1); LLS spans = findContainerTagWithParams(sub, "span", "class" := "st"); pnl("l(sub)=" + l(sub) + ", spans: " + l(spans)); pnl(+spans); S desc = trim(htmldecode(dropTags(join(first(spans))))); printStruct(+desc); //S desc = ""; out.add(triple(link, text, desc)); } if (empty(out)) { saveTextFile(javaxCachesDir("buggy-google.html"), html); ret out; } logStructure(googleLog(), litorderedmap(+query, +language, date := localDateWithSeconds(), results := out)); ret out; }