1 | static int loadPage_defaultTimeout = 60000; |
2 | static new ThreadLocal<S> loadPage_charset; |
3 | static boolean loadPage_allowGzip = true, loadPage_debug; |
4 | static boolean loadPage_anonymous; // don't send computer ID |
5 | static int loadPage_verboseness = 100000; |
6 | static int loadPage_retries = 1; //60; // seconds |
7 | static new ThreadLocal<Bool> loadPage_silent; |
8 | static volatile int loadPage_forcedTimeout; // ms |
9 | static new ThreadLocal<Int> loadPage_forcedTimeout_byThread; // ms |
10 | static ThreadLocal<Map<S, L<S>>> loadPage_responseHeaders = new ThreadLocal; |
11 | static ThreadLocal<SS> loadPage_extraHeaders = new ThreadLocal; |
12 | static new ThreadLocal<Long> loadPage_sizeLimit; |
13 | |
14 | public static String loadPageSilently(String url) ctex {
|
15 | return loadPageSilently(new URL(loadPage_preprocess(url))); |
16 | } |
17 | |
18 | public static String loadPageSilently(URL url) ctex {
|
19 | if (!networkAllowanceTest(str(url))) fail("Not allowed: " + url);
|
20 | |
21 | IOException e = null; |
22 | for (int tries = 0; tries < loadPage_retries; tries++) |
23 | try {
|
24 | URLConnection con = loadPage_openConnection(url); |
25 | ret loadPage(con, url); |
26 | } catch (IOException _e) {
|
27 | e = _e; |
28 | if (loadPage_debug) |
29 | print(exceptionToStringShort(e)); |
30 | if (tries < loadPage_retries-1) sleepSeconds(1); |
31 | } |
32 | throw e; |
33 | } |
34 | |
35 | static String loadPage_preprocess(S url) {
|
36 | if (url.startsWith("tb/")) // don't think we use this anymore
|
37 | url = tb_mainServer() + "/" + url; |
38 | if (url.indexOf("://") < 0)
|
39 | url = "http://" + url; |
40 | return url; |
41 | } |
42 | |
43 | static S loadPage(S url) ctex {
|
44 | url = loadPage_preprocess(url); |
45 | if (!isTrue(loadPage_silent!)) |
46 | printWithTime("Loading: " + hideCredentials(url));
|
47 | ret loadPageSilently(new URL(url)); |
48 | } |
49 | |
50 | static S loadPage(URL url) {
|
51 | ret loadPage(url.toExternalForm()); |
52 | } |
53 | |
54 | static S loadPage(URLConnection con, URL url) throws IOException {
|
55 | ret loadPage(con, url, true); |
56 | } |
57 | |
58 | static String loadPage(URLConnection con, URL url, bool addHeaders) throws IOException {
|
59 | SS extraHeaders = getAndClearThreadLocal(loadPage_extraHeaders); |
60 | Long limit = optPar(loadPage_sizeLimit); |
61 | if (addHeaders) try {
|
62 | if (!loadPage_anonymous) |
63 | setHeaders(con); |
64 | if (loadPage_allowGzip) |
65 | con.setRequestProperty("Accept-Encoding", "gzip");
|
66 | con.setRequestProperty("X-No-Cookies", "1");
|
67 | for (S key : keys(extraHeaders)) |
68 | con.setRequestProperty(key, extraHeaders.get(key)); |
69 | } catch (Throwable e) {} // fails if within doPost
|
70 | |
71 | ifndef LeanMode |
72 | vm_generalSubMap("URLConnection per thread").put(currentThread(), con);
|
73 | endifndef |
74 | loadPage_responseHeaders.set(con.getHeaderFields()); |
75 | InputStream in = null; |
76 | try {
|
77 | in = urlConnection_getInputStream(con); |
78 | //vm_generalSubMap("InputStream per thread").put(currentThread(), in);
|
79 | if (loadPage_debug) |
80 | print("Put stream in map: " + currentThread());
|
81 | String contentType = con.getContentType(); |
82 | if (contentType == null) {
|
83 | //printStruct("Headers: ", con.getHeaderFields());
|
84 | throw new IOException("Page could not be read: " + hideCredentials(url));
|
85 | } |
86 | //print("Content-Type: " + contentType);
|
87 | String charset = loadPage_charset == null ? null : loadPage_charset.get(); |
88 | if (charset == null) charset = loadPage_guessCharset(contentType); |
89 | |
90 | if ("gzip".equals(con.getContentEncoding())) {
|
91 | if (loadPage_debug) |
92 | print("loadPage: Using gzip.");
|
93 | in = newGZIPInputStream(in); |
94 | } |
95 | Reader r; |
96 | try {
|
97 | r = new InputStreamReader(in, unquote(charset)); |
98 | } catch (UnsupportedEncodingException e) {
|
99 | print(toHex(utf8(charset))); |
100 | throw e; |
101 | } |
102 | |
103 | bool silent = isTrue(loadPage_silent!); |
104 | new StringBuilder buf; |
105 | int n = 0; |
106 | while (limit == null || n < limit) {
|
107 | ping(); |
108 | int ch = r.read(); |
109 | if (ch < 0) |
110 | break; |
111 | buf.append((char) ch); |
112 | ++n; |
113 | if (!silent && (n % loadPage_verboseness) == 0) |
114 | print(" " + n + " chars read");
|
115 | } |
116 | return buf.toString(); |
117 | } finally {
|
118 | if (loadPage_debug) |
119 | print("loadPage done");
|
120 | //vm_generalSubMap("InputStream per thread").remove(currentThread());
|
121 | ifndef LeanMode |
122 | vm_generalSubMap("URLConnection per thread").remove(currentThread());
|
123 | endifndef |
124 | if (in != null) in.close(); |
125 | } |
126 | } |
127 | |
128 | static String loadPage_guessCharset(String contentType) {
|
129 | Matcher m = regexpMatcher("text/[a-z]+;\\s*charset=([^\\s]+)\\s*", contentType);
|
130 | S match = m.matches() ? m.group(1) : null; |
131 | if (loadPage_debug) |
132 | print("loadPage: contentType=" + contentType + ", match: " + match);
|
133 | /* If Content-Type doesn't match this pre-conception, choose default and hope for the best. */ |
134 | //return or(match, "ISO-8859-1"); |
135 | return or(match, "UTF-8"); |
136 | } |
137 | |
138 | static URLConnection loadPage_openConnection(URL url) {
|
139 | URLConnection con = openConnection(url); |
140 | int timeout = toInt(loadPage_forcedTimeout_byThread!); |
141 | if (timeout == 0) timeout = loadPage_forcedTimeout; |
142 | if (timeout != 0) |
143 | setURLConnectionTimeouts(con, loadPage_forcedTimeout); |
144 | else |
145 | setURLConnectionDefaultTimeouts(con, loadPage_defaultTimeout); |
146 | ret con; |
147 | } |
Began life as a copy of #1000879
download show line numbers debug dex old transpilations
Travelled to 1 computer(s): mqqgnosmbjvj
No comments. add comment
| Snippet ID: | #1035654 |
| Snippet name: | loadPage + loadPageSilently backup |
| Eternal ID of this version: | #1035654/1 |
| Text MD5: | 3d70ec54ead47d971f270b53495b92ec |
| Author: | stefan |
| Category: | |
| Type: | JavaX fragment (include) |
| Public (visible to everyone): | Yes |
| Archived (hidden from active list): | No |
| Created/modified: | 2022-07-01 01:29:29 |
| Source code size: | 5188 bytes / 147 lines |
| Pitched / IR pitched: | No / No |
| Views / Downloads: | 434 / 428 |
| Referenced in: | [show references] |