tok;
static Set predicted;
static class Chain extends P {
List list = new ArrayList
();
Chain() {}
Chain(List
list) {
this.list = list;}
Chain(P... a) { list = asList(a); }
void add(P p) { list.add(p); }
String read(List tok) {
for (P p : list) {
String s = p.read(tok);
if (s != null) return s;
}
return null;
}
P derive() {
Chain c = new Chain();
for (P p : list)
c.add(p.derive());
return c;
}
}
static class Tuples extends P {
Map,String> map = new HashMap,String>();
int n, seen;
Tuples(int n) {
this.n = n;
}
String read(List tok) {
while (tok.size() > seen) {
++seen;
if (seen > n)
map.put(new ArrayList(tok.subList(seen-n-1, seen-1)), tok.get(seen-1));
}
if (tok.size() >= n)
return map.get(new ArrayList(tok.subList(tok.size()-n, tok.size())));
return null;
}
// slow...
P oldDerive() {
Tuples t = new Tuples(n);
t.map.putAll(map);
// t.seen == 0 which is ok
return t;
}
// fast!
P derive() {
Tuples t = new Tuples(n);
t.map = new DerivedHashMap,String>(map);
return t;
}
}
static class DerivedHashMap extends AbstractMap {
Map base;
HashMap additions = new HashMap();
DerivedHashMap(Map base) {
this.base = base;}
public B get(Object key) {
B b = additions.get(key);
if (b != null) return b;
return base.get(key);
}
public B put(A key, B value) {
return additions.put(key, value);
}
public Set> entrySet() {
throw fail();
}
}
// TODO: Put NewX back in
public static void main(String[] args) throws Exception {
tok = makeCorpusJavaTok();
print("Tokens in corpus: " + tok.size());
print("Learning...");
collector = new Collector();
/*test(new Tuples(1));
test(new Tuples(2));
test(new Tuples(3));
test(new Tuples(4));
test(new Chain(new Tuples(2), new Tuples(1)));*/
test(new Chain(new Tuples(4), new Tuples(3), new Tuples(2), new Tuples(1)));
print("Learning done.");
if (collector.winner != null && showGUI) {
predicted = collector.predicted;
showColoredText();
}
}
// test a predictor
static void test(P p) {
predicted = new TreeSet();
int points = 0, total = 0, lastPercent = 0;
List history = new ArrayList();
for (int i = allTokens ? 0 : 1; i < tok.size(); i += allTokens ? 1 : 2) {
String t = tok.get(i);
String x = p.read(history);
boolean correct = t.equals(x);
total += t.length();
if (correct) {
predicted.add(i);
points += t.length();
}
history.add(t);
int percent = roundUpTo(10, (int) (i*100L/tok.size()));
if (percent > lastPercent) {
print("Learning " + percent + "% done.");
lastPercent = percent;
}
}
double score = points*100.0/total;
collector.add(p, score);
}
static void showColoredText() { try {
JFrame jf = new JFrame("Predicted = green");
Container cp = jf.getContentPane();
JTextPane pane = new JTextPane();
//pane.setFont(loadFont("#1000993", 24));
Document doc = pane.getStyledDocument();
int i = tok.size(), len = 0;
while (len <= maxCharsGUI && i > 0) {
--i;
len += tok.get(i).length();
}
for (; i < tok.size(); i++) {
if (tok.get(i).length() == 0) continue;
boolean green = predicted.contains(i);
SimpleAttributeSet set = new SimpleAttributeSet();
StyleConstants.setForeground(set, green ? Color.green : Color.gray);
doc.insertString(doc.getLength(), tok.get(i), set);
}
JScrollPane scrollPane = new JScrollPane(pane);
cp.add(scrollPane, BorderLayout.CENTER);
jf.setBounds(100, 100, 600, 600);
jf.setVisible(true);
} catch (Throwable __e) { throw __e instanceof RuntimeException ? (RuntimeException) __e : new RuntimeException(__e); }}
static class SnippetDB {
static List fields;
static List> rows;
SnippetDB() {}
SnippetDB(String snippetIDOfGzippedMysqlDump) {
importMySQLDump(gunzipToText(snippetIDOfGzippedMysqlDump));
}
void importMySQLDump(String dump) {
System.out.println("Importing " + dump.length() + " chars.");
Class importer = hotwire("#1000660");
set(importer, "in", dump);
callMain(importer);
Map tables = (Map) get(importer, "tables");
Map tableFields = (Map) get(importer, "tableFields");
fields = tableFields.get("snippets_public");
rows = tables.get("snippets_public");
}
String getField(List row, String field) {
int i = fields.indexOf(field);
return i < 0 ? null : row.get(i);
}
List> rowsOrderedBy(String field) {
final int i = fields.indexOf(field);
List> list = new ArrayList>(rows);
sort(list, new Comparator>() {
public int compare(List rowA, List rowB) {
String a = rowA.get(i), b = rowB.get(i);
return a.compareTo(b);
}
});
return list;
}
public void finalize() {
System.err.println("Note: SnippetDB released from memory.");
}
} // SnippetDB
static List makeCorpusJavaTok() {
String name = getSnippetTitle(corpusID);
if (name.toLowerCase().indexOf(".zip") >= 0)
return makeCorpus_zip();
else
return makeCorpus_mysqldump();
}
static List makeCorpus_zip() { try {
ZipFile zipFile = new ZipFile(loadLibrary(corpusID));
Enumeration entries = zipFile.entries();
List tok = new ArrayList();
while (entries.hasMoreElements()) {
ZipEntry entry = (ZipEntry) entries.nextElement();
//System.out.println("File found: " + entry.getName());
InputStream fin = zipFile.getInputStream(entry);
// TODO: try to skip binary files?
InputStreamReader reader = new InputStreamReader(fin, "UTF-8");
StringBuilder builder = new StringBuilder();
BufferedReader bufferedReader = new BufferedReader(reader);
String line;
while ((line = bufferedReader.readLine()) != null)
builder.append(line).append('\n');
fin.close();
StringBuilder buf = new StringBuilder();
buf.append("\n== File: " + entry.getName());
buf.append("\n==\n");
buf.append(builder.toString()).append("\n");
if (tok.size() != 0) tok.remove(tok.size()-1);
tok.addAll(javaTok(buf.toString()));
}
zipFile.close();
return internAll(tok);
} catch (Throwable __e) { throw __e instanceof RuntimeException ? (RuntimeException) __e : new RuntimeException(__e); }}
static List makeCorpus_mysqldump() {
SnippetDB db = new SnippetDB(corpusID);
List> rows = db.rowsOrderedBy("sn_created");
List tok = new ArrayList();
for (int i = 0; i < Math.min(rows.size(), numSnippets); i++) {
StringBuilder buf = new StringBuilder();
String id = db.getField(rows.get(i), "sn_id");
String title = db.getField(rows.get(i), "sn_title");
String text = db.getField(rows.get(i), "sn_text");
buf.append("\n== ID: " + id);
buf.append("\n== Title: " + title);
buf.append("\n==\n");
buf.append(text).append("\n");
if (tok.size() != 0) tok.remove(tok.size()-1);
tok.addAll(javaTok(buf.toString()));
++i;
}
return internAll(tok);
}
static List internAll(List tok) {
List l = new ArrayList();
for (String t : tok)
l.add(t.intern());
return l;
}
static class Collector {
P winner;
double bestScore = -1;
Set predicted;
void add(P p, double score) {
if (winner == null || score > bestScore) {
winner = p;
bestScore = score;
//S name = shorten(structure(p), 100);
String name = p.getClass().getName();
print("New best score: " + formatDouble(score, 2) + "% (" + name + ")");
this.predicted = main.predicted;
}
}
}
static String getSnippetTitle(String id) { try {
return loadPage(new URL("http://tinybrain.de:8080/tb-int/getfield.php?id=" + parseSnippetID(id) + "&field=title"));
} catch (Throwable __e) { throw __e instanceof RuntimeException ? (RuntimeException) __e : new RuntimeException(__e); }}
static int roundUpTo(int n, int x) {
return (x+n-1)/n*n;
}
static ArrayList asList(A[] a) {
return new ArrayList(Arrays.asList(a));
}
static File loadLibrary(String snippetID) {
return loadBinarySnippet(snippetID);
}
static void sort(T[] a, Comparator super T> c) {
Arrays.sort(a, c);
}
static void sort(List a, Comparator super T> c) {
Collections.sort(a, c);
}
// replacement for class JavaTok
// maybe incomplete, might want to add floating point numbers
// todo also: extended multi-line strings
static List javaTok(String s) {
List tok = new ArrayList();
int l = s.length();
int i = 0;
while (i < l) {
int j = i;
char c; String cc;
// scan for whitespace
while (j < l) {
c = s.charAt(j);
cc = s.substring(j, Math.min(j+2, l));
if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
++j;
else if (cc.equals("/*")) {
do ++j; while (j < l && !s.substring(j, Math.min(j+2, l)).equals("*/"));
j = Math.min(j+2, l);
} else if (cc.equals("//")) {
do ++j; while (j < l && "\r\n".indexOf(s.charAt(j)) < 0);
} else
break;
}
tok.add(s.substring(i, j));
i = j;
if (i >= l) break;
c = s.charAt(i); // cc is not needed in rest of loop body
cc = s.substring(i, Math.min(i+2, l));
// scan for non-whitespace
if (c == '\'' || c == '"') {
char opener = c;
++j;
while (j < l) {
if (s.charAt(j) == opener) {
++j;
break;
} else if (s.charAt(j) == '\\' && j+1 < l)
j += 2;
else
++j;
}
} else if (Character.isJavaIdentifierStart(c))
do ++j; while (j < l && Character.isJavaIdentifierPart(s.charAt(j)));
else if (Character.isDigit(c))
do ++j; while (j < l && Character.isDigit(s.charAt(j)));
else if (cc.equals("[[")) {
do ++j; while (j+1 < l && !s.substring(j, j+2).equals("]]"));
j = Math.min(j+2, l);
} else
++j;
tok.add(s.substring(i, j));
i = j;
}
if ((tok.size() % 2) == 0) tok.add("");
return tok;
}
static String gunzipToText(String dataID) { try {
File file = loadBinarySnippet(dataID);
InputStream fis = new FileInputStream(file);
GZIPInputStream gis = new GZIPInputStream(fis);
ByteArrayOutputStream fos = new ByteArrayOutputStream();
byte[] buffer = new byte[1024];
int len;
while((len = gis.read(buffer)) != -1){
fos.write(buffer, 0, len);
}
fis.close();
fos.close();
return new String(fos.toByteArray(), "UTF-8");
} catch (Throwable __e) { throw __e instanceof RuntimeException ? (RuntimeException) __e : new RuntimeException(__e); }}
static void callMain(Object c, String... args) {
call(c, "main", new Object[] {args});
}
static void print() {
System.out.println();
}
static void print(Object o) {
System.out.println(o);
}
static void print(long i) {
System.out.println(i);
}
// compile JavaX source, load classes & return main class
// src can be a snippet ID or actual source code
static Class> hotwire(String src) {
try {
Class j = getJavaX();
if (j == null) {
j = _javax.class;
try { _javax.androidContext = get(main.class, "androidContext"); } catch (Exception e) {}
}
List libraries = new ArrayList();
File srcDir = (File) call(j, "transpileMain", src, libraries);
Object androidContext = get(j, "androidContext");
if (androidContext != null)
return (Class) call(j, "loadx2android", srcDir, src);
File classesDir = (File) call(j, "TempDirMaker_make");
String javacOutput = (String) call(j, "compileJava", srcDir, libraries, classesDir);
System.out.println(javacOutput);
URL[] urls = new URL[libraries.size()+1];
urls[0] = classesDir.toURI().toURL();
for (int i = 0; i < libraries.size(); i++)
urls[i+1] = libraries.get(i).toURI().toURL();
// make class loader
URLClassLoader classLoader = new URLClassLoader(urls);
// load & return main class
Class> theClass = classLoader.loadClass("main");
call(j, "setVars", theClass, isSnippetID(src) ? src: null);
return theClass;
} catch (Exception e) {
throw e instanceof RuntimeException ? (RuntimeException) e : new RuntimeException(e);
}
}
static void set(Class c, String field, Object value) {
try {
Field f = set_findStaticField(c, field);
f.setAccessible(true);
f.set(null, value);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
static Field set_findStaticField(Class> c, String field) {
for (Field f : c.getDeclaredFields())
if (f.getName().equals(field) && (f.getModifiers() & Modifier.STATIC) != 0)
return f;
throw new RuntimeException("Static field '" + field + "' not found in " + c.getName());
}
public static String formatDouble(double d, int digits) {
String format = "0.";
for (int i = 0; i < digits; i++) format += "#";
return new java.text.DecimalFormat(format, new java.text.DecimalFormatSymbols(Locale.ENGLISH)).format(d);
}
static RuntimeException fail() {
throw new RuntimeException("fail");
}
static RuntimeException fail(String msg) {
throw new RuntimeException(msg);
}
static Object get(Object o, String field) {
if (o instanceof Class) return get((Class) o, field);
try {
Field f = get_findField(o.getClass(), field);
f.setAccessible(true);
return f.get(o);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
static Object get(Class c, String field) {
try {
Field f = get_findStaticField(c, field);
f.setAccessible(true);
return f.get(null);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
static Field get_findStaticField(Class> c, String field) {
for (Field f : c.getDeclaredFields())
if (f.getName().equals(field) && (f.getModifiers() & Modifier.STATIC) != 0)
return f;
throw new RuntimeException("Static field '" + field + "' not found in " + c.getName());
}
static Field get_findField(Class> c, String field) {
for (Field f : c.getDeclaredFields())
if (f.getName().equals(field))
return f;
throw new RuntimeException("Field '" + field + "' not found in " + c.getName());
}
static Class __javax;
static Class getJavaX() {
return __javax;
}
static File loadBinarySnippet(String snippetID) { try {
long id = parseSnippetID(snippetID);
File f = DiskSnippetCache_getLibrary(id);
if (f == null) {
byte[] data = loadDataSnippetImpl(snippetID);
DiskSnippetCache_putLibrary(id, data);
f = DiskSnippetCache_getLibrary(id);
}
return f;
} catch (Throwable __e) { throw __e instanceof RuntimeException ? (RuntimeException) __e : new RuntimeException(__e); }}
public static boolean isSnippetID(String s) {
try {
parseSnippetID(s);
return true;
} catch (RuntimeException e) {
return false;
}
}
public static String loadPage(String url) throws IOException {
if(url.startsWith("tb/"))
url = "tinybrain.de:8080/" + url;
if (url.indexOf("://") < 0)
url = "http://" + url;
return loadPage(new URL(url));
}
public static String loadPage(URL url) throws IOException {
System.out.println("Loading: " + url.toExternalForm());
URLConnection con = url.openConnection();
return loadPage(con, url);
}
public static String loadPage(URLConnection con, URL url) throws IOException {
String contentType = con.getContentType();
if (contentType == null)
throw new IOException("Page could not be read: " + url);
//Log.info("Content-Type: " + contentType);
String charset = loadPage_guessCharset(contentType);
Reader r = new InputStreamReader(con.getInputStream(), charset);
StringBuilder buf = new StringBuilder();
while (true) {
int ch = r.read();
if (ch < 0)
break;
//Log.info("Chars read: " + buf.length());
buf.append((char) ch);
}
return buf.toString();
}
static String loadPage_guessCharset(String contentType) {
Pattern p = Pattern.compile("text/html;\\s+charset=([^\\s]+)\\s*");
Matcher m = p.matcher(contentType);
/* If Content-Type doesn't match this pre-conception, choose default and hope for the best. */
return m.matches() ? m.group(1) : "ISO-8859-1";
}
static Object call(Object o, String method, Object... args) {
try {
if (o instanceof Class) {
Method m = call_findStaticMethod((Class) o, method, args, false);
m.setAccessible(true);
return m.invoke(null, args);
} else {
Method m = call_findMethod(o, method, args, false);
m.setAccessible(true);
return m.invoke(o, args);
}
} catch (Exception e) {
throw new RuntimeException(e);
}
}
static Method call_findStaticMethod(Class c, String method, Object[] args, boolean debug) {
Class _c = c;
while (c != null) {
for (Method m : c.getDeclaredMethods()) {
if (debug)
System.out.println("Checking method " + m.getName() + " with " + m.getParameterTypes().length + " parameters");;
if (!m.getName().equals(method)) {
if (debug) System.out.println("Method name mismatch: " + method);
continue;
}
if ((m.getModifiers() & Modifier.STATIC) == 0 || !call_checkArgs(m, args, debug))
continue;
return m;
}
c = c.getSuperclass();
}
throw new RuntimeException("Method '" + method + "' (static) with " + args.length + " parameter(s) not found in " + _c.getName());
}
static Method call_findMethod(Object o, String method, Object[] args, boolean debug) {
Class c = o.getClass();
while (c != null) {
for (Method m : c.getDeclaredMethods()) {
if (debug)
System.out.println("Checking method " + m.getName() + " with " + m.getParameterTypes().length + " parameters");;
if (m.getName().equals(method) && call_checkArgs(m, args, debug))
return m;
}
c = c.getSuperclass();
}
throw new RuntimeException("Method '" + method + "' (non-static) with " + args.length + " parameter(s) not found in " + o.getClass().getName());
}
private static boolean call_checkArgs(Method m, Object[] args, boolean debug) {
Class>[] types = m.getParameterTypes();
if (types.length != args.length) {
if (debug)
System.out.println("Bad parameter length: " + args.length + " vs " + types.length);
return false;
}
for (int i = 0; i < types.length; i++)
if (!(args[i] == null || isInstanceX(types[i], args[i]))) {
if (debug)
System.out.println("Bad parameter " + i + ": " + args[i] + " vs " + types[i]);
return false;
}
return true;
}
public static long parseSnippetID(String snippetID) {
return Long.parseLong(shortenSnippetID(snippetID));
}
// extended over Class.isInstance() to handle primitive types
private static boolean isInstanceX(Class type, Object arg) {
if (type == boolean.class) return arg instanceof Boolean;
if (type == int.class) return arg instanceof Integer;
if (type == long.class) return arg instanceof Long;
if (type == float.class) return arg instanceof Float;
if (type == short.class) return arg instanceof Short;
if (type == char.class) return arg instanceof Character;
if (type == byte.class) return arg instanceof Byte;
return type.isInstance(arg);
}
// Data files are immutable, use centralized cache
public static File DiskSnippetCache_getLibrary(long snippetID) throws IOException {
File file = new File(getGlobalCache(), "data_" + snippetID + ".jar");
return file.exists() ? file : null;
}
public static void DiskSnippetCache_putLibrary(long snippetID, byte[] data) throws IOException {
saveBinaryFile(new File(getGlobalCache(), "data_" + snippetID).getPath() + ".jar", data);
}
static byte[] loadDataSnippetImpl(String snippetID) throws IOException {
byte[] data;
try {
URL url = new URL("http://eyeocr.sourceforge.net/filestore/filestore.php?cmd=serve&file=blob_"
+ parseSnippetID(snippetID) + "&contentType=application/binary");
System.err.println("Loading library: " + url);
data = loadBinaryPage(url.openConnection());
System.err.println("Bytes loaded: " + data.length);
} catch (FileNotFoundException e) {
throw new IOException("Binary snippet #" + snippetID + " not found or not public");
}
return data;
}
static String shortenSnippetID(String snippetID) {
if (snippetID.startsWith("#"))
snippetID = snippetID.substring(1);
String httpBlaBla = "http://tinybrain.de/";
if (snippetID.startsWith(httpBlaBla))
snippetID = snippetID.substring(httpBlaBla.length());
return snippetID;
}
static File getGlobalCache() {
File file = new File(userHome(), ".tinybrain/snippet-cache");
file.mkdirs();
return file;
}
static byte[] loadBinaryPage(String url) throws IOException {
return loadBinaryPage(new URL(url).openConnection());
}
public static byte[] loadBinaryPage(URLConnection con) throws IOException {
//setHeaders(con);
ByteArrayOutputStream buf = new ByteArrayOutputStream();
InputStream inputStream = con.getInputStream();
int n = 0;
while (true) {
int ch = inputStream.read();
if (ch < 0)
break;
buf.write(ch);
if (++n % 100000 == 0)
System.err.println(" " + n + " bytes loaded.");
}
inputStream.close();
return buf.toByteArray();
}
/** writes safely (to temp file, then rename) */
public static void saveBinaryFile(String fileName, byte[] contents) throws IOException {
File file = new File(fileName);
File parentFile = file.getParentFile();
if (parentFile != null)
parentFile.mkdirs();
String tempFileName = fileName + "_temp";
FileOutputStream fileOutputStream = new FileOutputStream(tempFileName);
fileOutputStream.write(contents);
fileOutputStream.close();
if (file.exists() && !file.delete())
throw new IOException("Can't delete " + fileName);
if (!new File(tempFileName).renameTo(file))
throw new IOException("Can't rename " + tempFileName + " to " + fileName);
}
static String _userHome;
static String userHome() {
if (_userHome == null) {
if (isAndroid())
_userHome = "/storage/sdcard0/";
else
_userHome = System.getProperty("user.home");
//System.out.println("userHome: " + _userHome);
}
return _userHome;
}
static boolean isAndroid() { return System.getProperty("java.vendor").toLowerCase().indexOf("android") >= 0; }
}