1 | public static String loadPage(String url) throws IOException {
|
2 | return loadPage(new URL(url)); |
3 | } |
4 | |
5 | public static String loadPage(URL url) throws IOException {
|
6 | System.out.println("Loading: " + url.toExternalForm());
|
7 | URLConnection con = url.openConnection(); |
8 | return loadPage(con, url); |
9 | } |
10 | |
11 | public static String loadPage(URLConnection con, URL url) throws IOException {
|
12 | String contentType = con.getContentType(); |
13 | if (contentType == null) |
14 | throw new IOException("Page could not be read: " + url);
|
15 | //Log.info("Content-Type: " + contentType);
|
16 | String charset = loadPage_guessCharset(contentType); |
17 | Reader r = new InputStreamReader(con.getInputStream(), charset); |
18 | StringBuilder buf = new StringBuilder(); |
19 | while (true) {
|
20 | int ch = r.read(); |
21 | if (ch < 0) |
22 | break; |
23 | //Log.info("Chars read: " + buf.length());
|
24 | buf.append((char) ch); |
25 | } |
26 | return buf.toString(); |
27 | } |
28 | |
29 | static String loadPage_guessCharset(String contentType) {
|
30 | Pattern p = Pattern.compile("text/html;\\s+charset=([^\\s]+)\\s*");
|
31 | Matcher m = p.matcher(contentType); |
32 | /* If Content-Type doesn't match this pre-conception, choose default and hope for the best. */ |
33 | return m.matches() ? m.group(1) : "ISO-8859-1"; |
34 | } |
35 | } |
Began life as a copy of #2000481
Snippet is not live.
Travelled to 12 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
| Snippet ID: | #2000482 |
| Snippet name: | loadPage |
| Eternal ID of this version: | #2000482/1 |
| Text MD5: | e93104233406da9933a962897a80f669 |
| Author: | stefan |
| Category: | |
| Type: | New Tinybrain snippet |
| Public (visible to everyone): | Yes |
| Archived (hidden from active list): | No |
| Created/modified: | 2015-08-02 14:53:08 |
| Source code size: | 1307 bytes / 35 lines |
| Pitched / IR pitched: | No / Yes |
| Views / Downloads: | 832 / 170 |
| Referenced in: | [show references] |