static new ThreadLocal loadPage_charset; static boolean loadPage_allowGzip = true, loadPage_debug; static boolean loadPage_anonymous; // don't send computer ID static int loadPage_verboseness = 100000; static int loadPage_retries = 1; //60; // seconds static new ThreadLocal loadPage_silent; public static String loadPageSilently(String url) ctex { return loadPageSilently(new URL(loadPage_preprocess(url))); } public static String loadPageSilently(URL url) ctex { IOException e = null; for (int tries = 0; tries < loadPage_retries; tries++) try { URLConnection con = openConnection(url); return loadPage(con, url); } catch (IOException _e) { e = _e; if (loadPageThroughProxy_enabled) { print("Trying proxy because of: " + e); try { ret loadPageThroughProxy(str(url)); } catch (Throwable e2) { print(" " + exceptionToStringShort(e2)); } } else if (loadPage_debug) print(e); sleepSeconds(1); } throw e; } static String loadPage_preprocess(S url) { if (url.startsWith("tb/")) // don't think we use this anymore url = tb_mainServer() + "/" + url; if (url.indexOf("://") < 0) url = "http://" + url; return url; } public static String loadPage(String url) ctex { url = loadPage_preprocess(url); if (!isTrue(loadPage_silent!)) print("Loading: " + hideCredentials(url)); return loadPageSilently(new URL(url)); } public static String loadPage(URL url) { print("Loading: " + hideCredentials(url.toExternalForm())); return loadPageSilently(url); } public static String loadPage(URLConnection con, URL url) throws IOException { try { if (!loadPage_anonymous) setHeaders(con); if (loadPage_allowGzip) con.setRequestProperty("Accept-Encoding", "gzip"); con.setRequestProperty("X-No-Cookies", "1"); } catch (Throwable e) {} // fails if within doPost String contentType = con.getContentType(); if (contentType == null) throw new IOException("Page could not be read: " + url); //print("Content-Type: " + contentType); String charset = loadPage_charset == null ? null : loadPage_charset.get(); if (charset == null) charset = loadPage_guessCharset(contentType); InputStream in = con.getInputStream(); try { if ("gzip".equals(con.getContentEncoding())) { if (loadPage_debug) print("loadPage: Using gzip."); in = new GZIPInputStream(in); } Reader r = new InputStreamReader(in, charset); StringBuilder buf = new StringBuilder(); int n = 0; while (true) { int ch = r.read(); if (ch < 0) break; buf.append((char) ch); ++n; if ((n % loadPage_verboseness) == 0) print(" " + n + " chars read"); } return buf.toString(); } finally { in.close(); } } static String loadPage_guessCharset(String contentType) { Pattern p = Pattern.compile("text/[a-z]+;\\s*charset=([^\\s]+)\\s*"); Matcher m = p.matcher(contentType); S match = m.matches() ? m.group(1) : null; if (loadPage_debug) print("loadPage: contentType=" + contentType + ", match: " + match); /* If Content-Type doesn't match this pre-conception, choose default and hope for the best. */ return or(match, "ISO-8859-1"); }