Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

154
LINES

< > BotCompany Repo | #1000879 // loadPage + loadPageSilently

JavaX fragment (include) [tags: use-pretranspiled]

Libraryless. Click here for Pure Java version (10507L/60K).

1  
static int loadPage_defaultTimeout = 60000;
2  
static new ThreadLocal<S> loadPage_charset;
3  
static boolean loadPage_allowGzip = true, loadPage_debug;
4  
static boolean loadPage_anonymous; // don't send computer ID
5  
static int loadPage_verboseness = 100000;
6  
static int loadPage_retries = 1; //60; // seconds
7  
static new ThreadLocal<Bool> loadPage_silent;
8  
static volatile int loadPage_forcedTimeout; // ms
9  
static new ThreadLocal<Int> loadPage_forcedTimeout_byThread; // ms
10  
static ThreadLocal<Map<S, L<S>>> loadPage_responseHeaders = new ThreadLocal;
11  
static ThreadLocal<SS> loadPage_extraHeaders = new ThreadLocal;
12  
static new ThreadLocal<Long> loadPage_sizeLimit;
13  
14  
public static String loadPageSilently(String url) ctex {
15  
  return loadPageSilently(new URL(loadPage_preprocess(url)));
16  
}
17  
18  
public static String loadPageSilently(URL url) ctex {
19  
  if (!networkAllowanceTest(str(url))) fail("Not allowed: " + url);
20  
    
21  
  IOException e = null;
22  
  for (int tries = 0; tries < loadPage_retries; tries++)
23  
    try {
24  
      URLConnection con = loadPage_openConnection(url);
25  
      ret loadPage(con, url);
26  
    } catch (IOException _e) {
27  
      e = _e;
28  
      if (loadPage_debug)
29  
        print(exceptionToStringShort(e));
30  
      if (tries < loadPage_retries-1) sleepSeconds(1);
31  
    }
32  
  throw e;
33  
}
34  
35  
static String loadPage_preprocess(S url) {  
36  
  if (url.startsWith("tb/")) // don't think we use this anymore
37  
    url = tb_mainServer() + "/" + url;
38  
  if (url.indexOf("://") < 0)
39  
    url = "http://" + url;
40  
  return url;
41  
}
42  
43  
static S loadPage(S url) ctex {
44  
  url = loadPage_preprocess(url);
45  
  if (!isTrue(loadPage_silent!))
46  
    printWithTime("Loading: " + hideCredentials(url));
47  
  ret loadPageSilently(new URL(url));
48  
}
49  
50  
static S loadPage(URL url) {
51  
  ret loadPage(url.toExternalForm());
52  
}
53  
54  
static S loadPage(URLConnection con, URL url) throws IOException {
55  
  ret loadPage(con, url, true);
56  
}
57  
58  
sS loadPage(URLConnection con, URL url, bool addHeaders) ctex {
59  
  SS extraHeaders = getAndClearThreadLocal(loadPage_extraHeaders);
60  
  if (addHeaders) try {
61  
    if (!loadPage_anonymous)
62  
      setHeaders(con);
63  
    if (loadPage_allowGzip)
64  
      con.setRequestProperty("Accept-Encoding", "gzip");
65  
    con.setRequestProperty("X-No-Cookies", "1");
66  
    for (S key : keys(extraHeaders))
67  
      con.setRequestProperty(key, extraHeaders.get(key));
68  
  } catch (Throwable e) {} // fails if within doPost
69  
  
70  
  ret loadPage(con);
71  
}
72  
73  
// just download as string, no shenanigans or extra headers or ANYTHING
74  
sS loadPage(URLConnection con) ctex {
75  
  Long limit = optPar(loadPage_sizeLimit);
76  
  URL url = con.getURL();
77  
  
78  
  ifndef LeanMode
79  
  vm_generalSubMap("URLConnection per thread").put(currentThread(), con);
80  
  endifndef
81  
  loadPage_responseHeaders.set(con.getHeaderFields());
82  
  InputStream in = null;
83  
  try {
84  
    in = urlConnection_getInputStream(con);
85  
  //vm_generalSubMap("InputStream per thread").put(currentThread(), in);
86  
  if (loadPage_debug)
87  
    print("Put stream in map: " + currentThread());
88  
    String contentType = con.getContentType();
89  
    if (contentType == null) {
90  
      //printStruct("Headers: ", con.getHeaderFields());
91  
      throw new IOException("Page could not be read: " + hideCredentials(url));
92  
    }
93  
    //print("Content-Type: " + contentType);
94  
    String charset = loadPage_charset == null ? null : loadPage_charset.get();
95  
    if (charset == null) charset = loadPage_guessCharset(contentType);
96  
    
97  
    if ("gzip".equals(con.getContentEncoding())) {
98  
      if (loadPage_debug)
99  
        print("loadPage: Using gzip.");
100  
      in = newGZIPInputStream(in);
101  
    }
102  
    Reader r;
103  
    try {
104  
      r = new InputStreamReader(in, unquote(charset));
105  
    } catch (UnsupportedEncodingException e) {
106  
      print(toHex(utf8(charset)));
107  
      throw e;
108  
    }
109  
    
110  
    bool silent = isTrue(loadPage_silent!);
111  
    new StringBuilder buf;
112  
    int n = 0;
113  
    while (limit == null || n < limit) {
114  
      ping();
115  
      int ch = r.read();
116  
      if (ch < 0)
117  
        break;
118  
      buf.append((char) ch);
119  
      ++n;
120  
      if (!silent && (n % loadPage_verboseness) == 0)
121  
        print("  " + n + " chars read");
122  
    }
123  
    return buf.toString();
124  
  } finally {
125  
    if (loadPage_debug)
126  
      print("loadPage done");
127  
    //vm_generalSubMap("InputStream per thread").remove(currentThread());
128  
    ifndef LeanMode
129  
    vm_generalSubMap("URLConnection per thread").remove(currentThread());
130  
    endifndef
131  
    if (in != null) in.close();
132  
  }
133  
}
134  
135  
static String loadPage_guessCharset(String contentType) {
136  
  Matcher m = regexpMatcher("text/[a-z]+;\\s*charset=([^\\s]+)\\s*", contentType);
137  
  S match = m.matches() ? m.group(1) : null;
138  
  if (loadPage_debug)
139  
    print("loadPage: contentType=" + contentType + ", match: " + match);
140  
  /* If Content-Type doesn't match this pre-conception, choose default and hope for the best. */
141  
  //return or(match, "ISO-8859-1");
142  
  return or(match, "UTF-8");
143  
}
144  
145  
static URLConnection loadPage_openConnection(URL url) {
146  
  URLConnection con = openConnection(url);
147  
  int timeout = toInt(loadPage_forcedTimeout_byThread!);
148  
  if (timeout == 0) timeout = loadPage_forcedTimeout;
149  
  if (timeout != 0)
150  
    setURLConnectionTimeouts(con, loadPage_forcedTimeout);
151  
  else
152  
    setURLConnectionDefaultTimeouts(con, loadPage_defaultTimeout);
153  
  ret con;
154  
}

Author comment

Began life as a copy of #2000484

download  show line numbers  debug dex  old transpilations   

Travelled to 22 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, ddnzoavkxhuk, ekrmjmnbrukm, gwrvuhgaqvyk, irmadwmeruwu, ishqpsrjomds, jtubtzbbkimh, lpdgvwnxivlt, mowyntqkapby, mqqgnosmbjvj, onxytkatvevr, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt, whxojlpjdney, wnsclhtenguj, xrpafgyirdlv

No comments. add comment

Snippet ID: #1000879
Snippet name: loadPage + loadPageSilently
Eternal ID of this version: #1000879/50
Text MD5: 537ebda57db721e643853da133d94b99
Transpilation MD5: 234ec74e3b73d863a165da90311f78c5
Author: stefan
Category:
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2022-07-01 01:30:21
Source code size: 5333 bytes / 154 lines
Pitched / IR pitched: No / No
Views / Downloads: 1129 / 7103
Version history: 49 change(s)
Referenced in: [show references]