static int loadPage_defaultTimeout = 60000; static new ThreadLocal loadPage_charset; static boolean loadPage_allowGzip = true, loadPage_debug; static boolean loadPage_anonymous; // don't send computer ID static int loadPage_verboseness = 100000; static int loadPage_retries = 1; //60; // seconds static new ThreadLocal loadPage_silent; static volatile int loadPage_forcedTimeout; // ms static new ThreadLocal loadPage_forcedTimeout_byThread; // ms static ThreadLocal>> loadPage_responseHeaders = new ThreadLocal; static ThreadLocal loadPage_extraHeaders = new ThreadLocal; static new ThreadLocal loadPage_sizeLimit; public static String loadPageSilently(String url) ctex { return loadPageSilently(new URL(loadPage_preprocess(url))); } public static String loadPageSilently(URL url) ctex { if (!networkAllowanceTest(str(url))) fail("Not allowed: " + url); IOException e = null; for (int tries = 0; tries < loadPage_retries; tries++) try { URLConnection con = loadPage_openConnection(url); ret loadPage(con, url); } catch (IOException _e) { e = _e; if (loadPage_debug) print(exceptionToStringShort(e)); if (tries < loadPage_retries-1) sleepSeconds(1); } throw e; } static String loadPage_preprocess(S url) { if (url.startsWith("tb/")) // don't think we use this anymore url = tb_mainServer() + "/" + url; if (url.indexOf("://") < 0) url = "http://" + url; return url; } static S loadPage(S url) ctex { url = loadPage_preprocess(url); if (!isTrue(loadPage_silent!)) printWithTime("Loading: " + hideCredentials(url)); ret loadPageSilently(new URL(url)); } static S loadPage(URL url) { ret loadPage(url.toExternalForm()); } static S loadPage(URLConnection con, URL url) throws IOException { ret loadPage(con, url, true); } sS loadPage(URLConnection con, URL url, bool addHeaders) ctex { SS extraHeaders = getAndClearThreadLocal(loadPage_extraHeaders); if (addHeaders) try { if (!loadPage_anonymous) setHeaders(con); if (loadPage_allowGzip) con.setRequestProperty("Accept-Encoding", "gzip"); con.setRequestProperty("X-No-Cookies", "1"); for (S key : keys(extraHeaders)) con.setRequestProperty(key, extraHeaders.get(key)); } catch (Throwable e) {} // fails if within doPost ret loadPage(con); } // just download as string, no shenanigans or extra headers or ANYTHING sS loadPage(URLConnection con) ctex { Long limit = optPar(loadPage_sizeLimit); URL url = con.getURL(); ifndef LeanMode vm_generalSubMap("URLConnection per thread").put(currentThread(), con); endifndef loadPage_responseHeaders.set(con.getHeaderFields()); InputStream in = null; try { in = urlConnection_getInputStream(con); //vm_generalSubMap("InputStream per thread").put(currentThread(), in); if (loadPage_debug) print("Put stream in map: " + currentThread()); String contentType = con.getContentType(); if (contentType == null) { //printStruct("Headers: ", con.getHeaderFields()); throw new IOException("Page could not be read: " + hideCredentials(url)); } //print("Content-Type: " + contentType); String charset = loadPage_charset == null ? null : loadPage_charset.get(); if (charset == null) charset = loadPage_guessCharset(contentType); if ("gzip".equals(con.getContentEncoding())) { if (loadPage_debug) print("loadPage: Using gzip."); in = newGZIPInputStream(in); } Reader r; try { r = new InputStreamReader(in, unquote(charset)); } catch (UnsupportedEncodingException e) { print(toHex(utf8(charset))); throw e; } bool silent = isTrue(loadPage_silent!); new StringBuilder buf; int n = 0; while (limit == null || n < limit) { ping(); int ch = r.read(); if (ch < 0) break; buf.append((char) ch); ++n; if (!silent && (n % loadPage_verboseness) == 0) print(" " + n + " chars read"); } return buf.toString(); } finally { if (loadPage_debug) print("loadPage done"); //vm_generalSubMap("InputStream per thread").remove(currentThread()); ifndef LeanMode vm_generalSubMap("URLConnection per thread").remove(currentThread()); endifndef if (in != null) in.close(); } } static String loadPage_guessCharset(String contentType) { Matcher m = regexpMatcher("text/[a-z]+;\\s*charset=([^\\s]+)\\s*", contentType); S match = m.matches() ? m.group(1) : null; if (loadPage_debug) print("loadPage: contentType=" + contentType + ", match: " + match); /* If Content-Type doesn't match this pre-conception, choose default and hope for the best. */ //return or(match, "ISO-8859-1"); return or(match, "UTF-8"); } static URLConnection loadPage_openConnection(URL url) { URLConnection con = openConnection(url); int timeout = toInt(loadPage_forcedTimeout_byThread!); if (timeout == 0) timeout = loadPage_forcedTimeout; if (timeout != 0) setURLConnectionTimeouts(con, loadPage_forcedTimeout); else setURLConnectionDefaultTimeouts(con, loadPage_defaultTimeout); ret con; }