import java.security.NoSuchAlgorithmException; import java.security.MessageDigest; import java.lang.reflect.*; import java.net.*; import java.io.*; import javax.swing.*; import java.util.regex.*; import java.util.*; abstract class Predictor { public abstract String predict(String s, int chars); public void preload(String s) {} } public class main { static Set debug = new HashSet(); public static void main(String[] args) throws Exception { String input = "abcdefghijklmnopqrstuvwxyz"; List preloads = new ArrayList(); for (int i = 0; i < args.length; i++) { String arg = args[i]; if (arg.equals("debug")) { String s = args[++i]; debug.add(s); debugOn(s); } else if (isSnippetID(arg)) input = loadSnippet(arg); else if (arg.equals("preload")) { String s = args[++i]; preloads.add(loadSnippet(s)); } } input = input.replace("\r", ""); List predictors = new ArrayList(); add(predictors, new PLin(), new PWords(), new PIndent()); for (Predictor p : predictors) for (String s : preloads) p.preload(s); Collector globalCollector = new Collector(); int skips = 0, totalSkip = 0; StringBuilder compacted = new StringBuilder(); for (int splitPoint = 0; splitPoint < input.length(); splitPoint++) { String before = input.substring(0, splitPoint); String rest = input.substring(splitPoint); Collector collector = new Collector(); for (Predictor p : predictors) { boolean doDebug = debug.contains(p.getClass().getName().replaceAll("^main\\$", "")); int chars = rest.length(); String prediction = ""; try { prediction = p.predict(before, chars); if (doDebug) System.out.println("Actual prediction: " + prediction); } catch (Throwable e) { // silent exception } // convenience result fixing for the predictors if (prediction == null) prediction = ""; if (prediction.length() > chars) prediction = prediction.substring(0, chars); String actual = rest.length() > chars ? rest.substring(0, chars) : rest; int improvement = getScore2(prediction, actual); collector.add(p, prediction, improvement, splitPoint); if (doDebug) { String expected = actual.substring(0, Math.min(prediction.length()+5, rest.length())); System.out.println(splitPoint + "*" + improvement + " " + structure(p) + " -> " + quote(prediction) + " vs " + quote(expected) + " (error " + (actual.length()-improvement) + " of " + rest.length() + ")"); } globalCollector.add(p, prediction, improvement, splitPoint); } int skip = 0; if (collector.bestScore > 0) { System.out.println(splitPoint + " " + collector.bestScore + " " + structure(collector.winner)); skip = commonPrefix(rest, collector.winnerResult).length(); } if (skip != 0) { System.out.println("Skipping " + skip + ": " + quote(collector.winnerResult.substring(0, skip))); splitPoint += skip-1; totalSkip += skip; ++skips; compacted.append('*'); } else compacted.append(input.charAt(splitPoint)); } System.out.println("\n" + compacted + "\n\n"); System.out.println("Highest score seen: " + globalCollector.bestScore + " by " + structure(globalCollector.winner) + " at " + globalCollector.splitPoint); System.out.println("Total characters skipped: " + totalSkip + "/" + input.length() + " (skips: " + skips + ")"); } static class Collector { Predictor winner; String winnerResult; long bestScore = -1; int splitPoint; void add(Predictor p, String result, long score, int splitPoint) { if (winner == null || score > bestScore) { winner = p; winnerResult = result; bestScore = score; this.splitPoint = splitPoint; } } } static int leven(String s, String t) { // degenerate cases if (s.equals(t)) return 0; if (s.length() == 0) return t.length(); if (t.length() == 0) return s.length(); // create two work vectors of integer distances int[] v0 = new int[t.length() + 1]; int[] v1 = new int[t.length() + 1]; // initialize v0 (the previous row of distances) // this row is A[0][i]: edit distance for an empty s // the distance is just the number of characters to delete from t for (int i = 0; i < v0.length; i++) v0[i] = i; for (int i = 0; i < s.length(); i++) { // calculate v1 (current row distances) from the previous row v0 // first element of v1 is A[i+1][0] // edit distance is delete (i+1) chars from s to match empty t v1[0] = i + 1; // use formula to fill in the rest of the row for (int j = 0; j < t.length(); j++) { int cost = s.charAt(i) == t.charAt(j) ? 0 : 1; v1[j + 1] = Math.min(Math.min(v1[j] + 1, v0[j + 1] + 1), v0[j] + cost); } // copy v1 (current row) to v0 (previous row) for next iteration /*for (int j = 0; j < v0.length; j++) v0[j] = v1[j];*/ // swap arrays int[] v = v1; v1 = v0; v0 = v; } // for array copying: // return v1[t.length()]; // for array swapping: return v0[t.length()]; } // "leven" function (Levenshtein distance) /*static class P0 extends Predictor { public String predict(String s, int chars) { return ""; } } static class P1 extends Predictor { public String predict(String s, int chars) { return s; } }*/ static class PLin extends Predictor { public String predict(String s, int chars) { if (s.length() < 2) return ""; char a = s.charAt(s.length()-2); char b = s.charAt(s.length()-1); int step = (char) (((int) b) - (int) a); StringBuilder buf = new StringBuilder(); for (int i = 0; i < chars; i++) { char c = (char) (((int) a) + step*(i+2)); buf.append(c); } return buf.toString(); } } static void add(Collection c, C... objects) { for (C x : objects) c.add(x); } static String structure(Object o) { String name = o.getClass().getName(); StringBuilder buf = new StringBuilder(); if (o instanceof Collection) { for (Object x : (Collection) o) { if (!(buf.length() == 0)) buf.append(", "); buf.append(structure(x)); } return "{" + buf + "}"; } // Need more cases? This should cover all library classes... if (name.startsWith("java.") || name.startsWith("javax.")) return String.valueOf(o); String shortName = o.getClass().getName().replaceAll("^main\\$", ""); // TODO: go to superclasses too Field[] fields = o.getClass().getDeclaredFields(); for (Field field : fields) { if ((field.getModifiers() & Modifier.STATIC) != 0) continue; Object value; try { value = field.get(o); } catch (Exception e) { value = "?"; } String fieldName = field.getName(); // special case for PWords - show only number of preloaded words if (shortName.equals("PWords") && field.getName().equals("preloaded")) value = ((Collection) value).size(); if (!(buf.length() == 0)) buf.append(", "); buf.append(fieldName + "=" + structure(value)); } String s = shortName; if (!(buf.length() == 0)) s += "(" + buf + ")"; return s; } static int getScore1(String prediction, String rest) { int error = leven(prediction, rest); return rest.length()-error; } static int getScore2(String prediction, String rest) { return commonPrefix(prediction, rest).length(); } static class PWords extends Predictor { static boolean debug; Set preloaded = new TreeSet(); public void preload(String s) { preloaded.addAll(findWords(s)); } public String predict(String s, int chars) { Set words = findWords(s); words.addAll(preloaded); String word = match("\\w+$", s); if (word == null) return ""; if (debug) System.out.println("Looking for: " + word); for (String w : words) if (w.startsWith(word)) { String pred = w.substring(word.length()); if (debug) System.out.println("PWords: predicted " + quote(pred) + " for " + quote(word) + " based on word " + quote(w)); return pred; } return ""; } boolean firstTime = true; Set findWords(String s) { Set words = new TreeSet( Collections.reverseOrder()); // important so partial matches come later words.addAll(matchAll("\\w+", s)); if (debug && firstTime && s.length() >= 100) { firstTime = false; //System.out.println("Words found: " + structure(words)); } return words; } } static String match(String pattern, String text) { List matches = new ArrayList(); Matcher matcher = Pattern.compile(pattern).matcher(text); return matcher.find() ? matcher.group() : null; } static class PIndent extends Predictor { public String predict(String s, int chars) { int i = s.lastIndexOf('\n'); int j = i+1; while (j < s.length() && (s.charAt(j) == ' ' || s.charAt(j) == '\t')) ++j; return "\n" + s.substring(i, j); } } static boolean preferCached = false; public static String loadSnippet(String snippetID) throws IOException { return loadSnippet(parseSnippetID(snippetID), preferCached); } public static String loadSnippet(String snippetID, boolean preferCached) throws IOException { return loadSnippet(parseSnippetID(snippetID), preferCached); } public static long parseSnippetID(String snippetID) { return Long.parseLong(shortenSnippetID(snippetID)); } private static String shortenSnippetID(String snippetID) { if (snippetID.startsWith("#")) snippetID = snippetID.substring(1); String httpBlaBla = "http://tinybrain.de/"; if (snippetID.startsWith(httpBlaBla)) snippetID = snippetID.substring(httpBlaBla.length()); return snippetID; } public static boolean isSnippetID(String snippetID) { snippetID = shortenSnippetID(snippetID); return isInteger(snippetID) && Long.parseLong(snippetID) != 0; } public static boolean isInteger(String s) { return Pattern.matches("\\-?\\d+", s); } public static String loadSnippet(long snippetID, boolean preferCached) throws IOException { if (preferCached) { initSnippetCache(); String text = DiskSnippetCache_get(snippetID); if (text != null) return text; } String text; try { URL url = new URL("http://tinybrain.de:8080/getraw.php?id=" + snippetID); text = loadPage(url); } catch (FileNotFoundException e) { throw new IOException("Snippet #" + snippetID + " not found or not public"); } try { initSnippetCache(); DiskSnippetCache_put(snippetID, text); } catch (IOException e) { System.err.println("Minor warning: Couldn't save snippet to cache (" + DiskSnippetCache_getDir() + ")"); } return text; } private static String loadPage(URL url) throws IOException { System.out.println("Loading: " + url.toExternalForm()); URLConnection con = url.openConnection(); return loadPage(con, url); } public static String loadPage(URLConnection con, URL url) throws IOException { String contentType = con.getContentType(); if (contentType == null) throw new IOException("Page could not be read: " + url); //Log.info("Content-Type: " + contentType); String charset = guessCharset(contentType); Reader r = new InputStreamReader(con.getInputStream(), charset); StringBuilder buf = new StringBuilder(); while (true) { int ch = r.read(); if (ch < 0) break; //Log.info("Chars read: " + buf.length()); buf.append((char) ch); } return buf.toString(); } public static String guessCharset(String contentType) { Pattern p = Pattern.compile("text/html;\\s+charset=([^\\s]+)\\s*"); Matcher m = p.matcher(contentType); /* If Content-Type doesn't match this pre-conception, choose default and hope for the best. */ return m.matches() ? m.group(1) : "ISO-8859-1"; } static File DiskSnippetCache_dir; public static void initDiskSnippetCache(File dir) { DiskSnippetCache_dir = dir; dir.mkdirs(); } public static synchronized String DiskSnippetCache_get(long snippetID) throws IOException { return loadTextFile(DiskSnippetCache_getFile(snippetID).getPath(), null); } private static File DiskSnippetCache_getFile(long snippetID) { return new File(DiskSnippetCache_dir, "" + snippetID); } public static synchronized void DiskSnippetCache_put(long snippetID, String snippet) throws IOException { saveTextFile(DiskSnippetCache_getFile(snippetID).getPath(), snippet); } public static File DiskSnippetCache_getDir() { return DiskSnippetCache_dir; } public static void initSnippetCache() { if (DiskSnippetCache_dir == null) initDiskSnippetCache(new File(System.getProperty("user.home"), ".tinybrain/snippet-cache")); } public static String commonPrefix(String a, String b) { int i = 0; while (i < a.length() && i < b.length() && a.charAt(i) == b.charAt(i)) ++i; return a.substring(0, i); } public static String quote(String s) { if (s == null) return "null"; return "\"" + s.replace("\\", "\\\\").replace("\"", "\\\"").replace("\r", "\\r").replace("\n", "\\n") + "\""; } static List matchAll(String pattern, String text) { List matches = new ArrayList(); Matcher matcher = Pattern.compile(pattern).matcher(text); while (matcher.find()) matches.add(matcher.group()); return matches; } static void debugOn(String name) { try { Class c = getClass("main$" + name); if (c == null) c = getClass(name); Field field; while (true) try { field = c.getDeclaredField("debug"); break; } catch (NoSuchFieldException e) { c = c.getSuperclass(); } field.setBoolean(null, true); } catch (Exception e) { e.printStackTrace(); System.err.println("Cannot debug class " + name); } } static Class getClass(String name) { try { return Class.forName(name); } catch (ClassNotFoundException e) { return null; } } /** writes safely (to temp file, then rename) */ public static void saveTextFile(String fileName, String contents) throws IOException { File file = new File(fileName); File parentFile = file.getParentFile(); if (parentFile != null) parentFile.mkdirs(); String tempFileName = fileName + "_temp"; FileOutputStream fileOutputStream = new FileOutputStream(tempFileName); OutputStreamWriter outputStreamWriter = new OutputStreamWriter(fileOutputStream, "UTF-8"); PrintWriter printWriter = new PrintWriter(outputStreamWriter); printWriter.print(contents); printWriter.close(); if (file.exists() && !file.delete()) throw new IOException("Can't delete " + fileName); if (!new File(tempFileName).renameTo(file)) throw new IOException("Can't rename " + tempFileName + " to " + fileName); } public static String loadTextFile(String fileName, String defaultContents) throws IOException { if (!new File(fileName).exists()) return defaultContents; FileInputStream fileInputStream = new FileInputStream(fileName); InputStreamReader inputStreamReader = new InputStreamReader(fileInputStream, "UTF-8"); return loadTextFile(inputStreamReader); } public static String loadTextFile(Reader reader) throws IOException { StringBuilder builder = new StringBuilder(); try { BufferedReader bufferedReader = new BufferedReader(reader); String line; while ((line = bufferedReader.readLine()) != null) builder.append(line).append('\n'); } finally { reader.close(); } return builder.length() == 0 ? "" : builder.substring(0, builder.length()-1); } }