1 | public static String loadPage(String url) throws IOException { |
2 | if (url.indexOf("://") < 0) |
3 | url = "http://" + url; |
4 | return loadPage(new URL(url)); |
5 | } |
6 | |
7 | public static String loadPage(URL url) throws IOException { |
8 | System.out.println("Loading: " + url.toExternalForm()); |
9 | URLConnection con = url.openConnection(); |
10 | return loadPage(con, url); |
11 | } |
12 | |
13 | public static String loadPage(URLConnection con, URL url) throws IOException { |
14 | String contentType = con.getContentType(); |
15 | if (contentType == null) |
16 | throw new IOException("Page could not be read: " + url); |
17 | //Log.info("Content-Type: " + contentType); |
18 | String charset = loadPage_guessCharset(contentType); |
19 | Reader r = new InputStreamReader(con.getInputStream(), charset); |
20 | StringBuilder buf = new StringBuilder(); |
21 | while (true) { |
22 | int ch = r.read(); |
23 | if (ch < 0) |
24 | break; |
25 | //Log.info("Chars read: " + buf.length()); |
26 | buf.append((char) ch); |
27 | } |
28 | return buf.toString(); |
29 | } |
30 | |
31 | static String loadPage_guessCharset(String contentType) { |
32 | Pattern p = Pattern.compile("text/html;\\s+charset=([^\\s]+)\\s*"); |
33 | Matcher m = p.matcher(contentType); |
34 | /* If Content-Type doesn't match this pre-conception, choose default and hope for the best. */ |
35 | return m.matches() ? m.group(1) : "ISO-8859-1"; |
36 | } |
Began life as a copy of #2000483
Snippet is not live.
Travelled to 12 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
Snippet ID: | #2000484 |
Snippet name: | loadPage |
Eternal ID of this version: | #2000484/1 |
Text MD5: | 464b3fff5dfdf23376e0b2ca029a6f32 |
Author: | stefan |
Category: | |
Type: | New Tinybrain snippet |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2015-08-02 14:56:51 |
Source code size: | 1369 bytes / 36 lines |
Pitched / IR pitched: | No / Yes |
Views / Downloads: | 611 / 2620 |
Referenced in: | [show references] |