Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

154
LINES

< > BotCompany Repo | #1000879 // loadPage + loadPageSilently

JavaX fragment (include) [tags: use-pretranspiled]

Libraryless. Click here for Pure Java version (10507L/60K).

static int loadPage_defaultTimeout = 60000;
static new ThreadLocal<S> loadPage_charset;
static boolean loadPage_allowGzip = true, loadPage_debug;
static boolean loadPage_anonymous; // don't send computer ID
static int loadPage_verboseness = 100000;
static int loadPage_retries = 1; //60; // seconds
static new ThreadLocal<Bool> loadPage_silent;
static volatile int loadPage_forcedTimeout; // ms
static new ThreadLocal<Int> loadPage_forcedTimeout_byThread; // ms
static ThreadLocal<Map<S, L<S>>> loadPage_responseHeaders = new ThreadLocal;
static ThreadLocal<SS> loadPage_extraHeaders = new ThreadLocal;
static new ThreadLocal<Long> loadPage_sizeLimit;

public static String loadPageSilently(String url) ctex {
  return loadPageSilently(new URL(loadPage_preprocess(url)));
}

public static String loadPageSilently(URL url) ctex {
  if (!networkAllowanceTest(str(url))) fail("Not allowed: " + url);
    
  IOException e = null;
  for (int tries = 0; tries < loadPage_retries; tries++)
    try {
      URLConnection con = loadPage_openConnection(url);
      ret loadPage(con, url);
    } catch (IOException _e) {
      e = _e;
      if (loadPage_debug)
        print(exceptionToStringShort(e));
      if (tries < loadPage_retries-1) sleepSeconds(1);
    }
  throw e;
}

static String loadPage_preprocess(S url) {  
  if (url.startsWith("tb/")) // don't think we use this anymore
    url = tb_mainServer() + "/" + url;
  if (url.indexOf("://") < 0)
    url = "http://" + url;
  return url;
}

static S loadPage(S url) ctex {
  url = loadPage_preprocess(url);
  if (!isTrue(loadPage_silent!))
    printWithTime("Loading: " + hideCredentials(url));
  ret loadPageSilently(new URL(url));
}

static S loadPage(URL url) {
  ret loadPage(url.toExternalForm());
}

static S loadPage(URLConnection con, URL url) throws IOException {
  ret loadPage(con, url, true);
}

sS loadPage(URLConnection con, URL url, bool addHeaders) ctex {
  SS extraHeaders = getAndClearThreadLocal(loadPage_extraHeaders);
  if (addHeaders) try {
    if (!loadPage_anonymous)
      setHeaders(con);
    if (loadPage_allowGzip)
      con.setRequestProperty("Accept-Encoding", "gzip");
    con.setRequestProperty("X-No-Cookies", "1");
    for (S key : keys(extraHeaders))
      con.setRequestProperty(key, extraHeaders.get(key));
  } catch (Throwable e) {} // fails if within doPost
  
  ret loadPage(con);
}

// just download as string, no shenanigans or extra headers or ANYTHING
sS loadPage(URLConnection con) ctex {
  Long limit = optPar(loadPage_sizeLimit);
  URL url = con.getURL();
  
  ifndef LeanMode
  vm_generalSubMap("URLConnection per thread").put(currentThread(), con);
  endifndef
  loadPage_responseHeaders.set(con.getHeaderFields());
  InputStream in = null;
  try {
    in = urlConnection_getInputStream(con);
  //vm_generalSubMap("InputStream per thread").put(currentThread(), in);
  if (loadPage_debug)
    print("Put stream in map: " + currentThread());
    String contentType = con.getContentType();
    if (contentType == null) {
      //printStruct("Headers: ", con.getHeaderFields());
      throw new IOException("Page could not be read: " + hideCredentials(url));
    }
    //print("Content-Type: " + contentType);
    String charset = loadPage_charset == null ? null : loadPage_charset.get();
    if (charset == null) charset = loadPage_guessCharset(contentType);
    
    if ("gzip".equals(con.getContentEncoding())) {
      if (loadPage_debug)
        print("loadPage: Using gzip.");
      in = newGZIPInputStream(in);
    }
    Reader r;
    try {
      r = new InputStreamReader(in, unquote(charset));
    } catch (UnsupportedEncodingException e) {
      print(toHex(utf8(charset)));
      throw e;
    }
    
    bool silent = isTrue(loadPage_silent!);
    new StringBuilder buf;
    int n = 0;
    while (limit == null || n < limit) {
      ping();
      int ch = r.read();
      if (ch < 0)
        break;
      buf.append((char) ch);
      ++n;
      if (!silent && (n % loadPage_verboseness) == 0)
        print("  " + n + " chars read");
    }
    return buf.toString();
  } finally {
    if (loadPage_debug)
      print("loadPage done");
    //vm_generalSubMap("InputStream per thread").remove(currentThread());
    ifndef LeanMode
    vm_generalSubMap("URLConnection per thread").remove(currentThread());
    endifndef
    if (in != null) in.close();
  }
}

static String loadPage_guessCharset(String contentType) {
  Matcher m = regexpMatcher("text/[a-z]+;\\s*charset=([^\\s]+)\\s*", contentType);
  S match = m.matches() ? m.group(1) : null;
  if (loadPage_debug)
    print("loadPage: contentType=" + contentType + ", match: " + match);
  /* If Content-Type doesn't match this pre-conception, choose default and hope for the best. */
  //return or(match, "ISO-8859-1");
  return or(match, "UTF-8");
}

static URLConnection loadPage_openConnection(URL url) {
  URLConnection con = openConnection(url);
  int timeout = toInt(loadPage_forcedTimeout_byThread!);
  if (timeout == 0) timeout = loadPage_forcedTimeout;
  if (timeout != 0)
    setURLConnectionTimeouts(con, loadPage_forcedTimeout);
  else
    setURLConnectionDefaultTimeouts(con, loadPage_defaultTimeout);
  ret con;
}

Author comment

Began life as a copy of #2000484

download  show line numbers  debug dex  old transpilations   

Travelled to 22 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, ddnzoavkxhuk, ekrmjmnbrukm, gwrvuhgaqvyk, irmadwmeruwu, ishqpsrjomds, jtubtzbbkimh, lpdgvwnxivlt, mowyntqkapby, mqqgnosmbjvj, onxytkatvevr, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt, whxojlpjdney, wnsclhtenguj, xrpafgyirdlv

No comments. add comment

Snippet ID: #1000879
Snippet name: loadPage + loadPageSilently
Eternal ID of this version: #1000879/50
Text MD5: 537ebda57db721e643853da133d94b99
Transpilation MD5: 234ec74e3b73d863a165da90311f78c5
Author: stefan
Category:
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2022-07-01 01:30:21
Source code size: 5333 bytes / 154 lines
Pitched / IR pitched: No / No
Views / Downloads: 1119 / 7086
Version history: 49 change(s)
Referenced in: [show references]