import javax.imageio.*; import java.awt.image.*; import java.awt.*; import java.security.NoSuchAlgorithmException; import java.security.MessageDigest; import java.lang.reflect.*; import java.net.*; import java.io.*; import javax.swing.*; import java.util.regex.*; import java.util.List; import java.util.*; public class main { public static void main(String[] args) throws Exception { String s = load("tinybrain.blog.de"); List tok = htmlcoarsetok(s); for (int i = 0; i+1 < tok.size(); i += 2) { String t = tok.get(i); if (tok.get(i+1).equals("
")) { int j = i+1; while (j < tok.size() && !tok.get(j).equals("
")) j += 2; i = j+1; continue; } if (t.indexOf("Computer!") < 0) continue; print("!!! " + t); if (tok.get(i+3).equals("
")) { int j = i+3; while (j < tok.size() && !tok.get(j).equals("
")) j += 2; StringBuilder buf = new StringBuilder(); for (i = i+4; i < j; i += 2) buf.append(tok.get(i) + "\n"); String code = buf.toString(); code = code.replace("<", "<"); code = code.replace(">", ">"); code = code.replace("\n\n", "\n"); print("Code:\n" + indent(" ", code)); } } } static String load(String bla) { try { return loadPage(bla); } catch (Throwable __e) { throw __e instanceof RuntimeException ? (RuntimeException) __e : new RuntimeException(__e); }} static String indent(String indent, String s) { return indent + s.replace("\n", "\n" + indent); } static List htmlcoarsetok(String s) { List tok = new ArrayList(); int l = s.length(); int i = 0; while (i < l) { int j = i; char c; // scan for non-tags while (j < l && s.charAt(j) != '<') ++j; tok.add(s.substring(i, j)); i = j; if (i >= l) break; c = s.charAt(i); // scan for tags if (c == '<') { ++j; while (j < l && s.charAt(j) != '>') ++j; // TODO: strings? if (j < l) ++j; } tok.add(s.substring(i, j)); i = j; } return tok; } static void print(Object o) { System.out.println(o); } public static String loadPage(String url) throws IOException { if (url.indexOf("://") < 0) url = "http://" + url; return loadPage(new URL(url)); } public static String loadPage(URL url) throws IOException { System.out.println("Loading: " + url.toExternalForm()); URLConnection con = url.openConnection(); return loadPage(con, url); } public static String loadPage(URLConnection con, URL url) throws IOException { String contentType = con.getContentType(); if (contentType == null) throw new IOException("Page could not be read: " + url); //Log.info("Content-Type: " + contentType); String charset = loadPage_guessCharset(contentType); Reader r = new InputStreamReader(con.getInputStream(), charset); StringBuilder buf = new StringBuilder(); while (true) { int ch = r.read(); if (ch < 0) break; //Log.info("Chars read: " + buf.length()); buf.append((char) ch); } return buf.toString(); } static String loadPage_guessCharset(String contentType) { Pattern p = Pattern.compile("text/html;\\s+charset=([^\\s]+)\\s*"); Matcher m = p.matcher(contentType); /* If Content-Type doesn't match this pre-conception, choose default and hope for the best. */ return m.matches() ? m.group(1) : "ISO-8859-1"; } }