!7 static new TreeMap wiki; // title to text (stored as compressed strings) p { time { File f = userFile("Downloads/wikipedia/simplewiki-latest-pages-articles.xml"); BufferedReader reader; if (f.exists()) reader = utf8bufferedReader(f); else { f = downloadSimpleWikipedia(); reader = utf8bufferedReader(bunzip2stream(f)); } S line; int lines = 0, pages = 0; StringBuilder pageBuf = null; while ((line = reader.readLine()) != null) { /*if ((++lines % 100) == 0) print("Lines: " + lines);*/ line = trim(line); if (eq(line, "")) pageBuf = new StringBuilder; if (pageBuf != null) pageBuf.append(line).append("\n"); if (eq(line, "")) { //print("Page done. " + l(pageBuf) + " chars"); L tok = htmlTok(str(pageBuf)); S title = join(contentsOfContainerTag(tok, "title")); S text = join(contentsOfContainerTag(tok, "text")); if (empty(text)) { // print("No text: " + title); } else wiki.put(new U(title), new U(text)); if ((++pages % 1000) == 0) { fractionDone(pages/228400.0); print("Pages: " + pages + " (" + title + ")"); sleep(1); } //print(title); // XXX - too much printing hangs AWT (investigate!) pageBuf = null; } } } fractionDone(1); // print a random entry U n = random(keys(wiki)); print("== " + n + " =="); print(); print(wiki.get(n)); bot(); } answer { // for (U u : keys(wiki)) if (match(str(u), s)) ... U u = map.floorKey(new U(s)); if (u != null) { print("== " + u + " =="); print(wiki.get(u)); ret str(u); } }