Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

147
LINES

< > BotCompany Repo | #1035654 // loadPage + loadPageSilently backup

JavaX fragment (include)

static int loadPage_defaultTimeout = 60000;
static new ThreadLocal<S> loadPage_charset;
static boolean loadPage_allowGzip = true, loadPage_debug;
static boolean loadPage_anonymous; // don't send computer ID
static int loadPage_verboseness = 100000;
static int loadPage_retries = 1; //60; // seconds
static new ThreadLocal<Bool> loadPage_silent;
static volatile int loadPage_forcedTimeout; // ms
static new ThreadLocal<Int> loadPage_forcedTimeout_byThread; // ms
static ThreadLocal<Map<S, L<S>>> loadPage_responseHeaders = new ThreadLocal;
static ThreadLocal<SS> loadPage_extraHeaders = new ThreadLocal;
static new ThreadLocal<Long> loadPage_sizeLimit;

public static String loadPageSilently(String url) ctex {
  return loadPageSilently(new URL(loadPage_preprocess(url)));
}

public static String loadPageSilently(URL url) ctex {
  if (!networkAllowanceTest(str(url))) fail("Not allowed: " + url);
    
  IOException e = null;
  for (int tries = 0; tries < loadPage_retries; tries++)
    try {
      URLConnection con = loadPage_openConnection(url);
      ret loadPage(con, url);
    } catch (IOException _e) {
      e = _e;
      if (loadPage_debug)
        print(exceptionToStringShort(e));
      if (tries < loadPage_retries-1) sleepSeconds(1);
    }
  throw e;
}

static String loadPage_preprocess(S url) {  
  if (url.startsWith("tb/")) // don't think we use this anymore
    url = tb_mainServer() + "/" + url;
  if (url.indexOf("://") < 0)
    url = "http://" + url;
  return url;
}

static S loadPage(S url) ctex {
  url = loadPage_preprocess(url);
  if (!isTrue(loadPage_silent!))
    printWithTime("Loading: " + hideCredentials(url));
  ret loadPageSilently(new URL(url));
}

static S loadPage(URL url) {
  ret loadPage(url.toExternalForm());
}

static S loadPage(URLConnection con, URL url) throws IOException {
  ret loadPage(con, url, true);
}

static String loadPage(URLConnection con, URL url, bool addHeaders) throws IOException {
  SS extraHeaders = getAndClearThreadLocal(loadPage_extraHeaders);
  Long limit = optPar(loadPage_sizeLimit);
  if (addHeaders) try {
    if (!loadPage_anonymous)
      setHeaders(con);
    if (loadPage_allowGzip)
      con.setRequestProperty("Accept-Encoding", "gzip");
    con.setRequestProperty("X-No-Cookies", "1");
    for (S key : keys(extraHeaders))
      con.setRequestProperty(key, extraHeaders.get(key));
  } catch (Throwable e) {} // fails if within doPost
  
  ifndef LeanMode
  vm_generalSubMap("URLConnection per thread").put(currentThread(), con);
  endifndef
  loadPage_responseHeaders.set(con.getHeaderFields());
  InputStream in = null;
  try {
    in = urlConnection_getInputStream(con);
  //vm_generalSubMap("InputStream per thread").put(currentThread(), in);
  if (loadPage_debug)
    print("Put stream in map: " + currentThread());
    String contentType = con.getContentType();
    if (contentType == null) {
      //printStruct("Headers: ", con.getHeaderFields());
      throw new IOException("Page could not be read: " + hideCredentials(url));
    }
    //print("Content-Type: " + contentType);
    String charset = loadPage_charset == null ? null : loadPage_charset.get();
    if (charset == null) charset = loadPage_guessCharset(contentType);
    
    if ("gzip".equals(con.getContentEncoding())) {
      if (loadPage_debug)
        print("loadPage: Using gzip.");
      in = newGZIPInputStream(in);
    }
    Reader r;
    try {
      r = new InputStreamReader(in, unquote(charset));
    } catch (UnsupportedEncodingException e) {
      print(toHex(utf8(charset)));
      throw e;
    }
    
    bool silent = isTrue(loadPage_silent!);
    new StringBuilder buf;
    int n = 0;
    while (limit == null || n < limit) {
      ping();
      int ch = r.read();
      if (ch < 0)
        break;
      buf.append((char) ch);
      ++n;
      if (!silent && (n % loadPage_verboseness) == 0)
        print("  " + n + " chars read");
    }
    return buf.toString();
  } finally {
    if (loadPage_debug)
      print("loadPage done");
    //vm_generalSubMap("InputStream per thread").remove(currentThread());
    ifndef LeanMode
    vm_generalSubMap("URLConnection per thread").remove(currentThread());
    endifndef
    if (in != null) in.close();
  }
}

static String loadPage_guessCharset(String contentType) {
  Matcher m = regexpMatcher("text/[a-z]+;\\s*charset=([^\\s]+)\\s*", contentType);
  S match = m.matches() ? m.group(1) : null;
  if (loadPage_debug)
    print("loadPage: contentType=" + contentType + ", match: " + match);
  /* If Content-Type doesn't match this pre-conception, choose default and hope for the best. */
  //return or(match, "ISO-8859-1");
  return or(match, "UTF-8");
}

static URLConnection loadPage_openConnection(URL url) {
  URLConnection con = openConnection(url);
  int timeout = toInt(loadPage_forcedTimeout_byThread!);
  if (timeout == 0) timeout = loadPage_forcedTimeout;
  if (timeout != 0)
    setURLConnectionTimeouts(con, loadPage_forcedTimeout);
  else
    setURLConnectionDefaultTimeouts(con, loadPage_defaultTimeout);
  ret con;
}

Author comment

Began life as a copy of #1000879

download  show line numbers  debug dex  old transpilations   

Travelled to 1 computer(s): mqqgnosmbjvj

No comments. add comment

-
Snippet ID: #1035654
Snippet name: loadPage + loadPageSilently backup
Eternal ID of this version: #1035654/1
Text MD5: 3d70ec54ead47d971f270b53495b92ec
Author: stefan
Category:
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2022-07-01 01:29:29
Source code size: 5188 bytes / 147 lines
Pitched / IR pitched: No / No
Views / Downloads: 147 / 150
Referenced in: