Libraryless. Click here for Pure Java version (10507L/60K).
static int loadPage_defaultTimeout = 60000; static new ThreadLocal<S> loadPage_charset; static boolean loadPage_allowGzip = true, loadPage_debug; static boolean loadPage_anonymous; // don't send computer ID static int loadPage_verboseness = 100000; static int loadPage_retries = 1; //60; // seconds static new ThreadLocal<Bool> loadPage_silent; static volatile int loadPage_forcedTimeout; // ms static new ThreadLocal<Int> loadPage_forcedTimeout_byThread; // ms static ThreadLocal<Map<S, L<S>>> loadPage_responseHeaders = new ThreadLocal; static ThreadLocal<SS> loadPage_extraHeaders = new ThreadLocal; static new ThreadLocal<Long> loadPage_sizeLimit; public static String loadPageSilently(String url) ctex { return loadPageSilently(new URL(loadPage_preprocess(url))); } public static String loadPageSilently(URL url) ctex { if (!networkAllowanceTest(str(url))) fail("Not allowed: " + url); IOException e = null; for (int tries = 0; tries < loadPage_retries; tries++) try { URLConnection con = loadPage_openConnection(url); ret loadPage(con, url); } catch (IOException _e) { e = _e; if (loadPage_debug) print(exceptionToStringShort(e)); if (tries < loadPage_retries-1) sleepSeconds(1); } throw e; } static String loadPage_preprocess(S url) { if (url.startsWith("tb/")) // don't think we use this anymore url = tb_mainServer() + "/" + url; if (url.indexOf("://") < 0) url = "http://" + url; return url; } static S loadPage(S url) ctex { url = loadPage_preprocess(url); if (!isTrue(loadPage_silent!)) printWithTime("Loading: " + hideCredentials(url)); ret loadPageSilently(new URL(url)); } static S loadPage(URL url) { ret loadPage(url.toExternalForm()); } static S loadPage(URLConnection con, URL url) throws IOException { ret loadPage(con, url, true); } sS loadPage(URLConnection con, URL url, bool addHeaders) ctex { SS extraHeaders = getAndClearThreadLocal(loadPage_extraHeaders); if (addHeaders) try { if (!loadPage_anonymous) setHeaders(con); if (loadPage_allowGzip) con.setRequestProperty("Accept-Encoding", "gzip"); con.setRequestProperty("X-No-Cookies", "1"); for (S key : keys(extraHeaders)) con.setRequestProperty(key, extraHeaders.get(key)); } catch (Throwable e) {} // fails if within doPost ret loadPage(con); } // just download as string, no shenanigans or extra headers or ANYTHING sS loadPage(URLConnection con) ctex { Long limit = optPar(loadPage_sizeLimit); URL url = con.getURL(); ifndef LeanMode vm_generalSubMap("URLConnection per thread").put(currentThread(), con); endifndef loadPage_responseHeaders.set(con.getHeaderFields()); InputStream in = null; try { in = urlConnection_getInputStream(con); //vm_generalSubMap("InputStream per thread").put(currentThread(), in); if (loadPage_debug) print("Put stream in map: " + currentThread()); String contentType = con.getContentType(); if (contentType == null) { //printStruct("Headers: ", con.getHeaderFields()); throw new IOException("Page could not be read: " + hideCredentials(url)); } //print("Content-Type: " + contentType); String charset = loadPage_charset == null ? null : loadPage_charset.get(); if (charset == null) charset = loadPage_guessCharset(contentType); if ("gzip".equals(con.getContentEncoding())) { if (loadPage_debug) print("loadPage: Using gzip."); in = newGZIPInputStream(in); } Reader r; try { r = new InputStreamReader(in, unquote(charset)); } catch (UnsupportedEncodingException e) { print(toHex(utf8(charset))); throw e; } bool silent = isTrue(loadPage_silent!); new StringBuilder buf; int n = 0; while (limit == null || n < limit) { ping(); int ch = r.read(); if (ch < 0) break; buf.append((char) ch); ++n; if (!silent && (n % loadPage_verboseness) == 0) print(" " + n + " chars read"); } return buf.toString(); } finally { if (loadPage_debug) print("loadPage done"); //vm_generalSubMap("InputStream per thread").remove(currentThread()); ifndef LeanMode vm_generalSubMap("URLConnection per thread").remove(currentThread()); endifndef if (in != null) in.close(); } } static String loadPage_guessCharset(String contentType) { Matcher m = regexpMatcher("text/[a-z]+;\\s*charset=([^\\s]+)\\s*", contentType); S match = m.matches() ? m.group(1) : null; if (loadPage_debug) print("loadPage: contentType=" + contentType + ", match: " + match); /* If Content-Type doesn't match this pre-conception, choose default and hope for the best. */ //return or(match, "ISO-8859-1"); return or(match, "UTF-8"); } static URLConnection loadPage_openConnection(URL url) { URLConnection con = openConnection(url); int timeout = toInt(loadPage_forcedTimeout_byThread!); if (timeout == 0) timeout = loadPage_forcedTimeout; if (timeout != 0) setURLConnectionTimeouts(con, loadPage_forcedTimeout); else setURLConnectionDefaultTimeouts(con, loadPage_defaultTimeout); ret con; }
Began life as a copy of #2000484
download show line numbers debug dex old transpilations
Travelled to 22 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, ddnzoavkxhuk, ekrmjmnbrukm, gwrvuhgaqvyk, irmadwmeruwu, ishqpsrjomds, jtubtzbbkimh, lpdgvwnxivlt, mowyntqkapby, mqqgnosmbjvj, onxytkatvevr, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt, whxojlpjdney, wnsclhtenguj, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1000879 |
Snippet name: | loadPage + loadPageSilently |
Eternal ID of this version: | #1000879/50 |
Text MD5: | 537ebda57db721e643853da133d94b99 |
Transpilation MD5: | 234ec74e3b73d863a165da90311f78c5 |
Author: | stefan |
Category: | |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2022-07-01 01:30:21 |
Source code size: | 5333 bytes / 154 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 1093 / 7047 |
Version history: | 49 change(s) |
Referenced in: | #1001657 - loadPageWithTimeout #1002427 - Accellerating 629 (SPIKE) #1006654 - Standard functions list 2 (LIVE, continuation of #761) #1035654 - loadPage + loadPageSilently backup #3000382 - Answer for ferdie (>> t = 1, f = 0) #3000383 - Answer for funkoverflow (>> t=1, f=0 okay) |