!7 sS url = "https://github.com/stefan-reich/smartbot/releases/download/1/triples.gz"; sclass Triple { S a, b, c; // noun, verb, noun S globalID; // 16 character global ID bool verified; // is it reliable information? long created; // unix timestamp S source; // further source info toString { ret "[" + globalID + "] " + ai_renderTriple(a, b, c); } } p { File file = getProgramFile("triples-1.gz"); if (fileSize(file) == 0) { print("Downloading triples (9 MB)."); loadBinaryPageToFile(url, file); } else print("Triples already downloaded."); print("Parsing."); // Read names Iterator it = linesFromFile(file); new L names; while (it.hasNext()) { S s = trim(it.next()); if (empty(s)) break; names.add(unquote(s)); } // Read triples new L triples; while (it.hasNext()) { S s = it.next(); pcall { addIfNotNull(triples, readTriple(s, names)); } } print("Have " + n(triples, "triple")); print("Some random triples:"); print(); pnl(selectRandom(triples, 10)); } static Triple readTriple(S s, L names) { L l = javaTokC(s); if (l(l) == 8) { new Triple t; t.a = javaIntern(names.get(parseInt(first(l)))); t.b = javaIntern(names.get(parseInt(second(l)))); t.c = javaIntern(names.get(parseInt(third(l)))); t.globalID = unquote(l.get(3)); // t.title = unquote(l.get(4)); // unused t.source = javaIntern(unquote(l.get(5))); t.verified = eq(l.get(6), "v"); t.created = parseLong(l.get(7)); ret t; } null; }