static L sentencesFromHTML(S html) {
new SentencesFromHTML x;
x.parseHTML(html);
ret x.sentences;
}
sclass SentencesFromHTML {
new L sentences;
bool withQuestions = true;
void parseHTML(S html) {
// tokenize and clean up html
L tok = htmlcoarsetok(html);
tok = removeScripts(tok);
tok = dropTags(tok, "span");
tok = dropTags(tok, "a");
tok = dropTags(tok, "b");
tok = dropTags(tok, "small");
// tok = dropAllTags(tok); // Too much!
//print(structure(tok));
for (int i = 0; i < tok.size(); i += 2) {
S line = tok.get(i).trim();
if (line.startsWith("