Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

147
LINES

< > BotCompany Repo | #1035654 // loadPage + loadPageSilently backup

JavaX fragment (include)

1  
static int loadPage_defaultTimeout = 60000;
2  
static new ThreadLocal<S> loadPage_charset;
3  
static boolean loadPage_allowGzip = true, loadPage_debug;
4  
static boolean loadPage_anonymous; // don't send computer ID
5  
static int loadPage_verboseness = 100000;
6  
static int loadPage_retries = 1; //60; // seconds
7  
static new ThreadLocal<Bool> loadPage_silent;
8  
static volatile int loadPage_forcedTimeout; // ms
9  
static new ThreadLocal<Int> loadPage_forcedTimeout_byThread; // ms
10  
static ThreadLocal<Map<S, L<S>>> loadPage_responseHeaders = new ThreadLocal;
11  
static ThreadLocal<SS> loadPage_extraHeaders = new ThreadLocal;
12  
static new ThreadLocal<Long> loadPage_sizeLimit;
13  
14  
public static String loadPageSilently(String url) ctex {
15  
  return loadPageSilently(new URL(loadPage_preprocess(url)));
16  
}
17  
18  
public static String loadPageSilently(URL url) ctex {
19  
  if (!networkAllowanceTest(str(url))) fail("Not allowed: " + url);
20  
    
21  
  IOException e = null;
22  
  for (int tries = 0; tries < loadPage_retries; tries++)
23  
    try {
24  
      URLConnection con = loadPage_openConnection(url);
25  
      ret loadPage(con, url);
26  
    } catch (IOException _e) {
27  
      e = _e;
28  
      if (loadPage_debug)
29  
        print(exceptionToStringShort(e));
30  
      if (tries < loadPage_retries-1) sleepSeconds(1);
31  
    }
32  
  throw e;
33  
}
34  
35  
static String loadPage_preprocess(S url) {  
36  
  if (url.startsWith("tb/")) // don't think we use this anymore
37  
    url = tb_mainServer() + "/" + url;
38  
  if (url.indexOf("://") < 0)
39  
    url = "http://" + url;
40  
  return url;
41  
}
42  
43  
static S loadPage(S url) ctex {
44  
  url = loadPage_preprocess(url);
45  
  if (!isTrue(loadPage_silent!))
46  
    printWithTime("Loading: " + hideCredentials(url));
47  
  ret loadPageSilently(new URL(url));
48  
}
49  
50  
static S loadPage(URL url) {
51  
  ret loadPage(url.toExternalForm());
52  
}
53  
54  
static S loadPage(URLConnection con, URL url) throws IOException {
55  
  ret loadPage(con, url, true);
56  
}
57  
58  
static String loadPage(URLConnection con, URL url, bool addHeaders) throws IOException {
59  
  SS extraHeaders = getAndClearThreadLocal(loadPage_extraHeaders);
60  
  Long limit = optPar(loadPage_sizeLimit);
61  
  if (addHeaders) try {
62  
    if (!loadPage_anonymous)
63  
      setHeaders(con);
64  
    if (loadPage_allowGzip)
65  
      con.setRequestProperty("Accept-Encoding", "gzip");
66  
    con.setRequestProperty("X-No-Cookies", "1");
67  
    for (S key : keys(extraHeaders))
68  
      con.setRequestProperty(key, extraHeaders.get(key));
69  
  } catch (Throwable e) {} // fails if within doPost
70  
  
71  
  ifndef LeanMode
72  
  vm_generalSubMap("URLConnection per thread").put(currentThread(), con);
73  
  endifndef
74  
  loadPage_responseHeaders.set(con.getHeaderFields());
75  
  InputStream in = null;
76  
  try {
77  
    in = urlConnection_getInputStream(con);
78  
  //vm_generalSubMap("InputStream per thread").put(currentThread(), in);
79  
  if (loadPage_debug)
80  
    print("Put stream in map: " + currentThread());
81  
    String contentType = con.getContentType();
82  
    if (contentType == null) {
83  
      //printStruct("Headers: ", con.getHeaderFields());
84  
      throw new IOException("Page could not be read: " + hideCredentials(url));
85  
    }
86  
    //print("Content-Type: " + contentType);
87  
    String charset = loadPage_charset == null ? null : loadPage_charset.get();
88  
    if (charset == null) charset = loadPage_guessCharset(contentType);
89  
    
90  
    if ("gzip".equals(con.getContentEncoding())) {
91  
      if (loadPage_debug)
92  
        print("loadPage: Using gzip.");
93  
      in = newGZIPInputStream(in);
94  
    }
95  
    Reader r;
96  
    try {
97  
      r = new InputStreamReader(in, unquote(charset));
98  
    } catch (UnsupportedEncodingException e) {
99  
      print(toHex(utf8(charset)));
100  
      throw e;
101  
    }
102  
    
103  
    bool silent = isTrue(loadPage_silent!);
104  
    new StringBuilder buf;
105  
    int n = 0;
106  
    while (limit == null || n < limit) {
107  
      ping();
108  
      int ch = r.read();
109  
      if (ch < 0)
110  
        break;
111  
      buf.append((char) ch);
112  
      ++n;
113  
      if (!silent && (n % loadPage_verboseness) == 0)
114  
        print("  " + n + " chars read");
115  
    }
116  
    return buf.toString();
117  
  } finally {
118  
    if (loadPage_debug)
119  
      print("loadPage done");
120  
    //vm_generalSubMap("InputStream per thread").remove(currentThread());
121  
    ifndef LeanMode
122  
    vm_generalSubMap("URLConnection per thread").remove(currentThread());
123  
    endifndef
124  
    if (in != null) in.close();
125  
  }
126  
}
127  
128  
static String loadPage_guessCharset(String contentType) {
129  
  Matcher m = regexpMatcher("text/[a-z]+;\\s*charset=([^\\s]+)\\s*", contentType);
130  
  S match = m.matches() ? m.group(1) : null;
131  
  if (loadPage_debug)
132  
    print("loadPage: contentType=" + contentType + ", match: " + match);
133  
  /* If Content-Type doesn't match this pre-conception, choose default and hope for the best. */
134  
  //return or(match, "ISO-8859-1");
135  
  return or(match, "UTF-8");
136  
}
137  
138  
static URLConnection loadPage_openConnection(URL url) {
139  
  URLConnection con = openConnection(url);
140  
  int timeout = toInt(loadPage_forcedTimeout_byThread!);
141  
  if (timeout == 0) timeout = loadPage_forcedTimeout;
142  
  if (timeout != 0)
143  
    setURLConnectionTimeouts(con, loadPage_forcedTimeout);
144  
  else
145  
    setURLConnectionDefaultTimeouts(con, loadPage_defaultTimeout);
146  
  ret con;
147  
}

Author comment

Began life as a copy of #1000879

download  show line numbers  debug dex  old transpilations   

Travelled to 1 computer(s): mqqgnosmbjvj

No comments. add comment

Snippet ID: #1035654
Snippet name: loadPage + loadPageSilently backup
Eternal ID of this version: #1035654/1
Text MD5: 3d70ec54ead47d971f270b53495b92ec
Author: stefan
Category:
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2022-07-01 01:29:29
Source code size: 5188 bytes / 147 lines
Pitched / IR pitched: No / No
Views / Downloads: 148 / 151
Referenced in: [show references]