!7

sS url = "https://github.com/stefan-reich/smartbot/releases/download/1/triples.gz";

sclass Triple {
  S a, b, c;     // noun, verb, noun
  S globalID;    // 16 character global ID
  bool verified; // is it reliable information?
  long created;  // unix timestamp
  S source;      // further source info
}

p {
  File file = getProgramFile("triples-1.gz");
  if (fileSize(file) == 0) {
    print("Downloading triples (9 MB).");
    loadBinaryPageToFile(url, file);
  } else
    print("Triples already downloaded.");
  print("Parsing.");
    
  // Read names
  
  Iterator<S> it = linesFromFile(file);
  new L<S> names;
  while (it.hasNext()) {
    S s = trim(it.next());
    if (empty(s)) break;
    names.add(unquote(s));
  }
  
  // Read triples
  
  new L<Triple> triples;
  while (it.hasNext()) {
    S s = it.next();
    pcall {
      addIfNotNull(triples, readTriple(s, names));
    }
  }
  
  print("Have " + n(triples, "triple"));
  print("Some random triples:");
  print();
  pnl(selectRandom(triples, 10));
}

static Triple readTriple(S s, L<S> names) {
  L<S> l = javaTokC(s);
  if (l(l) == 8) {
    new Triple t;
    t.a = javaIntern(names.get(parseInt(first(l))));
    t.b = javaIntern(names.get(parseInt(second(l))));
    t.c = javaIntern(names.get(parseInt(third(l))));
    t.globalID = unquote(l.get(3));
    // t.title = unquote(l.get(4)); // unused
    t.source = javaIntern(unquote(l.get(5)));
    t.verified = eq(l.get(6), "v");
    t.created = parseLong(l.get(7));
    ret t;
  }
  null;
}