!7 static new TreeMap wiki; // title to text (stored as compressed strings) sS processKey(S s) { ret toLower(s); } p-subst { time { quietGC(); // don't litter console with GC messages veryBigConsole(); consoleWordWrap(); File f = unpackSimpleWikipedia(); BufferedReader reader = utf8bufferedReader(f); S line; int lines = 0, pages = 0; StringBuilder pageBuf = null; while ((line = reader.readLine()) != null) { /*if ((++lines % 100) == 0) print("Lines: " + lines);*/ line = trim(line); if (eq(line, "")) pageBuf = new StringBuilder; if (pageBuf != null) pageBuf.append(line).append("\n"); if (eq(line, "")) { //print("Page done. " + l(pageBuf) + " chars"); L tok = htmlTok(str(pageBuf)); S title = join(contentsOfContainerTag(tok, "title")); S text = trim(htmldecode(join(contentsOfContainerTag(tok, "text")))); if (empty(text)) { // print("No text: " + title); } else { U key = new U(processKey(title)); U old = wiki.get(key); S red = wikipedia_getRedirect(text); if (!eqic(red, title) && !eqic(text, str(old))) { if (old != null) { print("Double entry: " + title); print(" " + quote(str(old))); print(" " + quote(text)); } if (old == null || wikipedia_getRedirect(str(old)) == null) wiki.put(key, new U(text)); } } if ((++pages % 1000) == 0) { fractionDone(pages/228400.0); print("Pages: " + pages + " (" + title + ")"); sleep(1); } //print(title); // XXX - too much printing hangs AWT (investigate!) pageBuf = null; } } } fractionDone(1); // print a random entry answer(str(random(keys(wiki)))); botSleep(); } answer { U u = followRedirect(nicestClosestKey(wiki, new U(processKey(s)))); if (u != null) { clearConsole(); S title = toUpper(str(u)); consoleStatus(title); print(title); print(); print(wiki.get(u)); scrollConsoleUpIn(0.5); ret " "; } } static U followRedirect(U key) { U next; int count = 0; while ((next = toU(processKey(wikipedia_getRedirect(str(wiki.get(key)))))) != null && ++count < 10) key = next; ret key; }