1 | static int loadPage_defaultTimeout = 60000; |
2 | static new ThreadLocal<S> loadPage_charset; |
3 | static boolean loadPage_allowGzip = true, loadPage_debug; |
4 | static boolean loadPage_anonymous; // don't send computer ID |
5 | static int loadPage_verboseness = 100000; |
6 | static int loadPage_retries = 1; //60; // seconds |
7 | static new ThreadLocal<Bool> loadPage_silent; |
8 | static volatile int loadPage_forcedTimeout; // ms |
9 | static new ThreadLocal<Int> loadPage_forcedTimeout_byThread; // ms |
10 | static ThreadLocal<Map<S, L<S>>> loadPage_responseHeaders = new ThreadLocal; |
11 | static ThreadLocal<SS> loadPage_extraHeaders = new ThreadLocal; |
12 | static new ThreadLocal<Long> loadPage_sizeLimit; |
13 | |
14 | public static String loadPageSilently(String url) ctex { |
15 | return loadPageSilently(new URL(loadPage_preprocess(url))); |
16 | } |
17 | |
18 | public static String loadPageSilently(URL url) ctex { |
19 | if (!networkAllowanceTest(str(url))) fail("Not allowed: " + url); |
20 | |
21 | IOException e = null; |
22 | for (int tries = 0; tries < loadPage_retries; tries++) |
23 | try { |
24 | URLConnection con = loadPage_openConnection(url); |
25 | ret loadPage(con, url); |
26 | } catch (IOException _e) { |
27 | e = _e; |
28 | if (loadPage_debug) |
29 | print(exceptionToStringShort(e)); |
30 | if (tries < loadPage_retries-1) sleepSeconds(1); |
31 | } |
32 | throw e; |
33 | } |
34 | |
35 | static String loadPage_preprocess(S url) { |
36 | if (url.startsWith("tb/")) // don't think we use this anymore |
37 | url = tb_mainServer() + "/" + url; |
38 | if (url.indexOf("://") < 0) |
39 | url = "http://" + url; |
40 | return url; |
41 | } |
42 | |
43 | static S loadPage(S url) ctex { |
44 | url = loadPage_preprocess(url); |
45 | if (!isTrue(loadPage_silent!)) |
46 | printWithTime("Loading: " + hideCredentials(url)); |
47 | ret loadPageSilently(new URL(url)); |
48 | } |
49 | |
50 | static S loadPage(URL url) { |
51 | ret loadPage(url.toExternalForm()); |
52 | } |
53 | |
54 | static S loadPage(URLConnection con, URL url) throws IOException { |
55 | ret loadPage(con, url, true); |
56 | } |
57 | |
58 | static String loadPage(URLConnection con, URL url, bool addHeaders) throws IOException { |
59 | SS extraHeaders = getAndClearThreadLocal(loadPage_extraHeaders); |
60 | Long limit = optPar(loadPage_sizeLimit); |
61 | if (addHeaders) try { |
62 | if (!loadPage_anonymous) |
63 | setHeaders(con); |
64 | if (loadPage_allowGzip) |
65 | con.setRequestProperty("Accept-Encoding", "gzip"); |
66 | con.setRequestProperty("X-No-Cookies", "1"); |
67 | for (S key : keys(extraHeaders)) |
68 | con.setRequestProperty(key, extraHeaders.get(key)); |
69 | } catch (Throwable e) {} // fails if within doPost |
70 | |
71 | ifndef LeanMode |
72 | vm_generalSubMap("URLConnection per thread").put(currentThread(), con); |
73 | endifndef |
74 | loadPage_responseHeaders.set(con.getHeaderFields()); |
75 | InputStream in = null; |
76 | try { |
77 | in = urlConnection_getInputStream(con); |
78 | //vm_generalSubMap("InputStream per thread").put(currentThread(), in); |
79 | if (loadPage_debug) |
80 | print("Put stream in map: " + currentThread()); |
81 | String contentType = con.getContentType(); |
82 | if (contentType == null) { |
83 | //printStruct("Headers: ", con.getHeaderFields()); |
84 | throw new IOException("Page could not be read: " + hideCredentials(url)); |
85 | } |
86 | //print("Content-Type: " + contentType); |
87 | String charset = loadPage_charset == null ? null : loadPage_charset.get(); |
88 | if (charset == null) charset = loadPage_guessCharset(contentType); |
89 | |
90 | if ("gzip".equals(con.getContentEncoding())) { |
91 | if (loadPage_debug) |
92 | print("loadPage: Using gzip."); |
93 | in = newGZIPInputStream(in); |
94 | } |
95 | Reader r; |
96 | try { |
97 | r = new InputStreamReader(in, unquote(charset)); |
98 | } catch (UnsupportedEncodingException e) { |
99 | print(toHex(utf8(charset))); |
100 | throw e; |
101 | } |
102 | |
103 | bool silent = isTrue(loadPage_silent!); |
104 | new StringBuilder buf; |
105 | int n = 0; |
106 | while (limit == null || n < limit) { |
107 | ping(); |
108 | int ch = r.read(); |
109 | if (ch < 0) |
110 | break; |
111 | buf.append((char) ch); |
112 | ++n; |
113 | if (!silent && (n % loadPage_verboseness) == 0) |
114 | print(" " + n + " chars read"); |
115 | } |
116 | return buf.toString(); |
117 | } finally { |
118 | if (loadPage_debug) |
119 | print("loadPage done"); |
120 | //vm_generalSubMap("InputStream per thread").remove(currentThread()); |
121 | ifndef LeanMode |
122 | vm_generalSubMap("URLConnection per thread").remove(currentThread()); |
123 | endifndef |
124 | if (in != null) in.close(); |
125 | } |
126 | } |
127 | |
128 | static String loadPage_guessCharset(String contentType) { |
129 | Matcher m = regexpMatcher("text/[a-z]+;\\s*charset=([^\\s]+)\\s*", contentType); |
130 | S match = m.matches() ? m.group(1) : null; |
131 | if (loadPage_debug) |
132 | print("loadPage: contentType=" + contentType + ", match: " + match); |
133 | /* If Content-Type doesn't match this pre-conception, choose default and hope for the best. */ |
134 | //return or(match, "ISO-8859-1"); |
135 | return or(match, "UTF-8"); |
136 | } |
137 | |
138 | static URLConnection loadPage_openConnection(URL url) { |
139 | URLConnection con = openConnection(url); |
140 | int timeout = toInt(loadPage_forcedTimeout_byThread!); |
141 | if (timeout == 0) timeout = loadPage_forcedTimeout; |
142 | if (timeout != 0) |
143 | setURLConnectionTimeouts(con, loadPage_forcedTimeout); |
144 | else |
145 | setURLConnectionDefaultTimeouts(con, loadPage_defaultTimeout); |
146 | ret con; |
147 | } |
Began life as a copy of #1000879
download show line numbers debug dex old transpilations
Travelled to 1 computer(s): mqqgnosmbjvj
No comments. add comment
Snippet ID: | #1035654 |
Snippet name: | loadPage + loadPageSilently backup |
Eternal ID of this version: | #1035654/1 |
Text MD5: | 3d70ec54ead47d971f270b53495b92ec |
Author: | stefan |
Category: | |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2022-07-01 01:29:29 |
Source code size: | 5188 bytes / 147 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 148 / 151 |
Referenced in: | [show references] |