Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

96
LINES

< > BotCompany Repo | #1008430 // Persistent text segmentation [dev.]

JavaX source code [tags: use-pretranspiled] - run with: x30.jar

Libraryless. Click here for Pure Java version (10336L/72K/218K).

!7

static new L<Map> data;

concept Para {
  S text;
  L<S> split;
  bool ok;
}

p {
  db();
  parasFromText();
  makeData();
  serveHttpOpenBrowser(4000);
}

static synchronized NanoHTTPD.Response serve(S uri, NanoHTTPD.Method method,
  Map<S,S> header, Map<S,S> parms, Map<S,S> files) {
  
  if (neq(uri, "/")) ret serve404();
  
  if (nempty(parms.get("update"))) {
    Set<Long> toCheck = asSet(map(f toLong, keysDeprefixNemptyValue(parms, "ok_")));
    //print("toCheck: " + struct(toCheck));
    for (Para p)
      cset(p, ok := toCheck.contains(p.id));
    makeData();
  }
  
  if (nempty(parms.get("resplit")))
    resplit();

  S title = "Splitting paragraphs into sentences";
  
  ret serveHTML(hhtml(
      hhead(htitle(title)
        + loadJQuery()
        + hCheckBoxMultiSelect())
    + hbody(h3(title)
      + hformPOST(p(hhidden("resplit", "1") + hsubmit("Resplit")))
      + hformPOST(
          hhidden("update", "1")
        + p(hsubmit())
        + htable_noEncode(data)
        + p(hsubmit())))));
}

svoid parasFromText {
  S text = loadSnippet(#1008407);
  for (S line : toLinesFullTrim(text)) {
    if (line.startsWith("#")) continue;
    uniq(Para, text := line);
  }
}

svoid makeData {
  data.clear();
  for (Para p) {
    if (p.split == null) {
      p.split = splitIntoSentences_v3(p.text);
      p.change();
    }
    
    S sp;
    if (l(p.split) == 1 && eq(first(p.split), p.text))
      sp = "-";
    else
      //sp = htmlEncode_nlToBr(lines(p.split));
      sp = ul(htmlEncodeAll(p.split));
      
    data.add(litorderedmap(
      "Original Paragraph" := htmlencode(p.text),
      "Split" := sp,
      "OK" := hcheckbox("ok_" + p.id, p.ok, "class" := "chkbox")));
  }
  
  data = sortByCalculatedField(data, func(Map m) {
    comparableList(
      contains(getString(m, "OK"), "checked"),
      neq(m.get("Split"), "-"),
      m.get("Original Paragraph"))
  });
}

svoid resplit {
  for (Para p) {
    L<S> split = splitIntoSentences_v3(p.text);
    if (neq(split, p.split)) {
      p.split = split;
      p.ok = false;
      p.change();
    }
  }
  makeData();
}

Author comment

Began life as a copy of #1008419

download  show line numbers  debug dex  old transpilations   

Travelled to 13 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt

No comments. add comment

Snippet ID: #1008430
Snippet name: Persistent text segmentation [dev.]
Eternal ID of this version: #1008430/18
Text MD5: 931f351925e8b689cd3318ce30876c8a
Transpilation MD5: c329202deb1de7ecf94662c49e2a9e93
Author: stefan
Category: javax / a.i. parsing
Type: JavaX source code
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2017-05-15 16:24:51
Source code size: 2195 bytes / 96 lines
Pitched / IR pitched: No / No
Views / Downloads: 505 / 872
Version history: 17 change(s)
Referenced in: #1008438 - Collect sentences from novel (using #1008430 db)