| 1 | static int loadPage_defaultTimeout = 60000; | 
| 2 | static new ThreadLocal<S> loadPage_charset; | 
| 3 | static boolean loadPage_allowGzip = true, loadPage_debug; | 
| 4 | static boolean loadPage_anonymous; // don't send computer ID | 
| 5 | static int loadPage_verboseness = 100000; | 
| 6 | static int loadPage_retries = 1; //60; // seconds | 
| 7 | static new ThreadLocal<Bool> loadPage_silent; | 
| 8 | static volatile int loadPage_forcedTimeout; // ms | 
| 9 | static new ThreadLocal<Int> loadPage_forcedTimeout_byThread; // ms | 
| 10 | static ThreadLocal<Map<S, L<S>>> loadPage_responseHeaders = new ThreadLocal; | 
| 11 | static ThreadLocal<SS> loadPage_extraHeaders = new ThreadLocal; | 
| 12 | static new ThreadLocal<Long> loadPage_sizeLimit; | 
| 13 | |
| 14 | public static String loadPageSilently(String url) ctex {
 | 
| 15 | return loadPageSilently(new URL(loadPage_preprocess(url))); | 
| 16 | } | 
| 17 | |
| 18 | public static String loadPageSilently(URL url) ctex {
 | 
| 19 |   if (!networkAllowanceTest(str(url))) fail("Not allowed: " + url);
 | 
| 20 | |
| 21 | IOException e = null; | 
| 22 | for (int tries = 0; tries < loadPage_retries; tries++) | 
| 23 |     try {
 | 
| 24 | URLConnection con = loadPage_openConnection(url); | 
| 25 | ret loadPage(con, url); | 
| 26 |     } catch (IOException _e) {
 | 
| 27 | e = _e; | 
| 28 | if (loadPage_debug) | 
| 29 | print(exceptionToStringShort(e)); | 
| 30 | if (tries < loadPage_retries-1) sleepSeconds(1); | 
| 31 | } | 
| 32 | throw e; | 
| 33 | } | 
| 34 | |
| 35 | static String loadPage_preprocess(S url) {  
 | 
| 36 |   if (url.startsWith("tb/")) // don't think we use this anymore
 | 
| 37 | url = tb_mainServer() + "/" + url; | 
| 38 |   if (url.indexOf("://") < 0)
 | 
| 39 | url = "http://" + url; | 
| 40 | return url; | 
| 41 | } | 
| 42 | |
| 43 | static S loadPage(S url) ctex {
 | 
| 44 | url = loadPage_preprocess(url); | 
| 45 | if (!isTrue(loadPage_silent!)) | 
| 46 |     printWithTime("Loading: " + hideCredentials(url));
 | 
| 47 | ret loadPageSilently(new URL(url)); | 
| 48 | } | 
| 49 | |
| 50 | static S loadPage(URL url) {
 | 
| 51 | ret loadPage(url.toExternalForm()); | 
| 52 | } | 
| 53 | |
| 54 | static S loadPage(URLConnection con, URL url) throws IOException {
 | 
| 55 | ret loadPage(con, url, true); | 
| 56 | } | 
| 57 | |
| 58 | static String loadPage(URLConnection con, URL url, bool addHeaders) throws IOException {
 | 
| 59 | SS extraHeaders = getAndClearThreadLocal(loadPage_extraHeaders); | 
| 60 | Long limit = optPar(loadPage_sizeLimit); | 
| 61 |   if (addHeaders) try {
 | 
| 62 | if (!loadPage_anonymous) | 
| 63 | setHeaders(con); | 
| 64 | if (loadPage_allowGzip) | 
| 65 |       con.setRequestProperty("Accept-Encoding", "gzip");
 | 
| 66 |     con.setRequestProperty("X-No-Cookies", "1");
 | 
| 67 | for (S key : keys(extraHeaders)) | 
| 68 | con.setRequestProperty(key, extraHeaders.get(key)); | 
| 69 |   } catch (Throwable e) {} // fails if within doPost
 | 
| 70 | |
| 71 | ifndef LeanMode | 
| 72 |   vm_generalSubMap("URLConnection per thread").put(currentThread(), con);
 | 
| 73 | endifndef | 
| 74 | loadPage_responseHeaders.set(con.getHeaderFields()); | 
| 75 | InputStream in = null; | 
| 76 |   try {
 | 
| 77 | in = urlConnection_getInputStream(con); | 
| 78 |   //vm_generalSubMap("InputStream per thread").put(currentThread(), in);
 | 
| 79 | if (loadPage_debug) | 
| 80 |     print("Put stream in map: " + currentThread());
 | 
| 81 | String contentType = con.getContentType(); | 
| 82 |     if (contentType == null) {
 | 
| 83 |       //printStruct("Headers: ", con.getHeaderFields());
 | 
| 84 |       throw new IOException("Page could not be read: " + hideCredentials(url));
 | 
| 85 | } | 
| 86 |     //print("Content-Type: " + contentType);
 | 
| 87 | String charset = loadPage_charset == null ? null : loadPage_charset.get(); | 
| 88 | if (charset == null) charset = loadPage_guessCharset(contentType); | 
| 89 | |
| 90 |     if ("gzip".equals(con.getContentEncoding())) {
 | 
| 91 | if (loadPage_debug) | 
| 92 |         print("loadPage: Using gzip.");
 | 
| 93 | in = newGZIPInputStream(in); | 
| 94 | } | 
| 95 | Reader r; | 
| 96 |     try {
 | 
| 97 | r = new InputStreamReader(in, unquote(charset)); | 
| 98 |     } catch (UnsupportedEncodingException e) {
 | 
| 99 | print(toHex(utf8(charset))); | 
| 100 | throw e; | 
| 101 | } | 
| 102 | |
| 103 | bool silent = isTrue(loadPage_silent!); | 
| 104 | new StringBuilder buf; | 
| 105 | int n = 0; | 
| 106 |     while (limit == null || n < limit) {
 | 
| 107 | ping(); | 
| 108 | int ch = r.read(); | 
| 109 | if (ch < 0) | 
| 110 | break; | 
| 111 | buf.append((char) ch); | 
| 112 | ++n; | 
| 113 | if (!silent && (n % loadPage_verboseness) == 0) | 
| 114 |         print("  " + n + " chars read");
 | 
| 115 | } | 
| 116 | return buf.toString(); | 
| 117 |   } finally {
 | 
| 118 | if (loadPage_debug) | 
| 119 |       print("loadPage done");
 | 
| 120 |     //vm_generalSubMap("InputStream per thread").remove(currentThread());
 | 
| 121 | ifndef LeanMode | 
| 122 |     vm_generalSubMap("URLConnection per thread").remove(currentThread());
 | 
| 123 | endifndef | 
| 124 | if (in != null) in.close(); | 
| 125 | } | 
| 126 | } | 
| 127 | |
| 128 | static String loadPage_guessCharset(String contentType) {
 | 
| 129 |   Matcher m = regexpMatcher("text/[a-z]+;\\s*charset=([^\\s]+)\\s*", contentType);
 | 
| 130 | S match = m.matches() ? m.group(1) : null; | 
| 131 | if (loadPage_debug) | 
| 132 |     print("loadPage: contentType=" + contentType + ", match: " + match);
 | 
| 133 | /* If Content-Type doesn't match this pre-conception, choose default and hope for the best. */ | 
| 134 | //return or(match, "ISO-8859-1"); | 
| 135 | return or(match, "UTF-8"); | 
| 136 | } | 
| 137 | |
| 138 | static URLConnection loadPage_openConnection(URL url) {
 | 
| 139 | URLConnection con = openConnection(url); | 
| 140 | int timeout = toInt(loadPage_forcedTimeout_byThread!); | 
| 141 | if (timeout == 0) timeout = loadPage_forcedTimeout; | 
| 142 | if (timeout != 0) | 
| 143 | setURLConnectionTimeouts(con, loadPage_forcedTimeout); | 
| 144 | else | 
| 145 | setURLConnectionDefaultTimeouts(con, loadPage_defaultTimeout); | 
| 146 | ret con; | 
| 147 | } | 
Began life as a copy of #1000879
download show line numbers debug dex old transpilations
Travelled to 1 computer(s): mqqgnosmbjvj
No comments. add comment
| Snippet ID: | #1035654 | 
| Snippet name: | loadPage + loadPageSilently backup | 
| Eternal ID of this version: | #1035654/1 | 
| Text MD5: | 3d70ec54ead47d971f270b53495b92ec | 
| Author: | stefan | 
| Category: | |
| Type: | JavaX fragment (include) | 
| Public (visible to everyone): | Yes | 
| Archived (hidden from active list): | No | 
| Created/modified: | 2022-07-01 01:29:29 | 
| Source code size: | 5188 bytes / 147 lines | 
| Pitched / IR pitched: | No / No | 
| Views / Downloads: | 390 / 401 | 
| Referenced in: | [show references] |