Libraryless. Click here for Pure Java version (10507L/60K).
1 | static int loadPage_defaultTimeout = 60000; |
2 | static new ThreadLocal<S> loadPage_charset; |
3 | static boolean loadPage_allowGzip = true, loadPage_debug; |
4 | static boolean loadPage_anonymous; // don't send computer ID |
5 | static int loadPage_verboseness = 100000; |
6 | static int loadPage_retries = 1; //60; // seconds |
7 | static new ThreadLocal<Bool> loadPage_silent; |
8 | static volatile int loadPage_forcedTimeout; // ms |
9 | static new ThreadLocal<Int> loadPage_forcedTimeout_byThread; // ms |
10 | static ThreadLocal<Map<S, L<S>>> loadPage_responseHeaders = new ThreadLocal; |
11 | static ThreadLocal<SS> loadPage_extraHeaders = new ThreadLocal; |
12 | static new ThreadLocal<Long> loadPage_sizeLimit; |
13 | |
14 | public static String loadPageSilently(String url) ctex { |
15 | return loadPageSilently(new URL(loadPage_preprocess(url))); |
16 | } |
17 | |
18 | public static String loadPageSilently(URL url) ctex { |
19 | if (!networkAllowanceTest(str(url))) fail("Not allowed: " + url); |
20 | |
21 | IOException e = null; |
22 | for (int tries = 0; tries < loadPage_retries; tries++) |
23 | try { |
24 | URLConnection con = loadPage_openConnection(url); |
25 | ret loadPage(con, url); |
26 | } catch (IOException _e) { |
27 | e = _e; |
28 | if (loadPage_debug) |
29 | print(exceptionToStringShort(e)); |
30 | if (tries < loadPage_retries-1) sleepSeconds(1); |
31 | } |
32 | throw e; |
33 | } |
34 | |
35 | static String loadPage_preprocess(S url) { |
36 | if (url.startsWith("tb/")) // don't think we use this anymore |
37 | url = tb_mainServer() + "/" + url; |
38 | if (url.indexOf("://") < 0) |
39 | url = "http://" + url; |
40 | return url; |
41 | } |
42 | |
43 | static S loadPage(S url) ctex { |
44 | url = loadPage_preprocess(url); |
45 | if (!isTrue(loadPage_silent!)) |
46 | printWithTime("Loading: " + hideCredentials(url)); |
47 | ret loadPageSilently(new URL(url)); |
48 | } |
49 | |
50 | static S loadPage(URL url) { |
51 | ret loadPage(url.toExternalForm()); |
52 | } |
53 | |
54 | static S loadPage(URLConnection con, URL url) throws IOException { |
55 | ret loadPage(con, url, true); |
56 | } |
57 | |
58 | sS loadPage(URLConnection con, URL url, bool addHeaders) ctex { |
59 | SS extraHeaders = getAndClearThreadLocal(loadPage_extraHeaders); |
60 | if (addHeaders) try { |
61 | if (!loadPage_anonymous) |
62 | setHeaders(con); |
63 | if (loadPage_allowGzip) |
64 | con.setRequestProperty("Accept-Encoding", "gzip"); |
65 | con.setRequestProperty("X-No-Cookies", "1"); |
66 | for (S key : keys(extraHeaders)) |
67 | con.setRequestProperty(key, extraHeaders.get(key)); |
68 | } catch (Throwable e) {} // fails if within doPost |
69 | |
70 | ret loadPage(con); |
71 | } |
72 | |
73 | // just download as string, no shenanigans or extra headers or ANYTHING |
74 | sS loadPage(URLConnection con) ctex { |
75 | Long limit = optPar(loadPage_sizeLimit); |
76 | URL url = con.getURL(); |
77 | |
78 | ifndef LeanMode |
79 | vm_generalSubMap("URLConnection per thread").put(currentThread(), con); |
80 | endifndef |
81 | loadPage_responseHeaders.set(con.getHeaderFields()); |
82 | InputStream in = null; |
83 | try { |
84 | in = urlConnection_getInputStream(con); |
85 | //vm_generalSubMap("InputStream per thread").put(currentThread(), in); |
86 | if (loadPage_debug) |
87 | print("Put stream in map: " + currentThread()); |
88 | String contentType = con.getContentType(); |
89 | if (contentType == null) { |
90 | //printStruct("Headers: ", con.getHeaderFields()); |
91 | throw new IOException("Page could not be read: " + hideCredentials(url)); |
92 | } |
93 | //print("Content-Type: " + contentType); |
94 | String charset = loadPage_charset == null ? null : loadPage_charset.get(); |
95 | if (charset == null) charset = loadPage_guessCharset(contentType); |
96 | |
97 | if ("gzip".equals(con.getContentEncoding())) { |
98 | if (loadPage_debug) |
99 | print("loadPage: Using gzip."); |
100 | in = newGZIPInputStream(in); |
101 | } |
102 | Reader r; |
103 | try { |
104 | r = new InputStreamReader(in, unquote(charset)); |
105 | } catch (UnsupportedEncodingException e) { |
106 | print(toHex(utf8(charset))); |
107 | throw e; |
108 | } |
109 | |
110 | bool silent = isTrue(loadPage_silent!); |
111 | new StringBuilder buf; |
112 | int n = 0; |
113 | while (limit == null || n < limit) { |
114 | ping(); |
115 | int ch = r.read(); |
116 | if (ch < 0) |
117 | break; |
118 | buf.append((char) ch); |
119 | ++n; |
120 | if (!silent && (n % loadPage_verboseness) == 0) |
121 | print(" " + n + " chars read"); |
122 | } |
123 | return buf.toString(); |
124 | } finally { |
125 | if (loadPage_debug) |
126 | print("loadPage done"); |
127 | //vm_generalSubMap("InputStream per thread").remove(currentThread()); |
128 | ifndef LeanMode |
129 | vm_generalSubMap("URLConnection per thread").remove(currentThread()); |
130 | endifndef |
131 | if (in != null) in.close(); |
132 | } |
133 | } |
134 | |
135 | static String loadPage_guessCharset(String contentType) { |
136 | Matcher m = regexpMatcher("text/[a-z]+;\\s*charset=([^\\s]+)\\s*", contentType); |
137 | S match = m.matches() ? m.group(1) : null; |
138 | if (loadPage_debug) |
139 | print("loadPage: contentType=" + contentType + ", match: " + match); |
140 | /* If Content-Type doesn't match this pre-conception, choose default and hope for the best. */ |
141 | //return or(match, "ISO-8859-1"); |
142 | return or(match, "UTF-8"); |
143 | } |
144 | |
145 | static URLConnection loadPage_openConnection(URL url) { |
146 | URLConnection con = openConnection(url); |
147 | int timeout = toInt(loadPage_forcedTimeout_byThread!); |
148 | if (timeout == 0) timeout = loadPage_forcedTimeout; |
149 | if (timeout != 0) |
150 | setURLConnectionTimeouts(con, loadPage_forcedTimeout); |
151 | else |
152 | setURLConnectionDefaultTimeouts(con, loadPage_defaultTimeout); |
153 | ret con; |
154 | } |
Began life as a copy of #2000484
download show line numbers debug dex old transpilations
Travelled to 22 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, ddnzoavkxhuk, ekrmjmnbrukm, gwrvuhgaqvyk, irmadwmeruwu, ishqpsrjomds, jtubtzbbkimh, lpdgvwnxivlt, mowyntqkapby, mqqgnosmbjvj, onxytkatvevr, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt, whxojlpjdney, wnsclhtenguj, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1000879 |
Snippet name: | loadPage + loadPageSilently |
Eternal ID of this version: | #1000879/50 |
Text MD5: | 537ebda57db721e643853da133d94b99 |
Transpilation MD5: | 234ec74e3b73d863a165da90311f78c5 |
Author: | stefan |
Category: | |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2022-07-01 01:30:21 |
Source code size: | 5333 bytes / 154 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 1129 / 7103 |
Version history: | 49 change(s) |
Referenced in: | [show references] |