Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

96
LINES

< > BotCompany Repo | #1008430 // Persistent text segmentation [dev.]

JavaX source code [tags: use-pretranspiled] - run with: x30.jar

Libraryless. Click here for Pure Java version (10336L/72K/218K).

1  
!7
2  
3  
static new L<Map> data;
4  
5  
concept Para {
6  
  S text;
7  
  L<S> split;
8  
  bool ok;
9  
}
10  
11  
p {
12  
  db();
13  
  parasFromText();
14  
  makeData();
15  
  serveHttpOpenBrowser(4000);
16  
}
17  
18  
static synchronized NanoHTTPD.Response serve(S uri, NanoHTTPD.Method method,
19  
  Map<S,S> header, Map<S,S> parms, Map<S,S> files) {
20  
  
21  
  if (neq(uri, "/")) ret serve404();
22  
  
23  
  if (nempty(parms.get("update"))) {
24  
    Set<Long> toCheck = asSet(map(f toLong, keysDeprefixNemptyValue(parms, "ok_")));
25  
    //print("toCheck: " + struct(toCheck));
26  
    for (Para p)
27  
      cset(p, ok := toCheck.contains(p.id));
28  
    makeData();
29  
  }
30  
  
31  
  if (nempty(parms.get("resplit")))
32  
    resplit();
33  
34  
  S title = "Splitting paragraphs into sentences";
35  
  
36  
  ret serveHTML(hhtml(
37  
      hhead(htitle(title)
38  
        + loadJQuery()
39  
        + hCheckBoxMultiSelect())
40  
    + hbody(h3(title)
41  
      + hformPOST(p(hhidden("resplit", "1") + hsubmit("Resplit")))
42  
      + hformPOST(
43  
          hhidden("update", "1")
44  
        + p(hsubmit())
45  
        + htable_noEncode(data)
46  
        + p(hsubmit())))));
47  
}
48  
49  
svoid parasFromText {
50  
  S text = loadSnippet(#1008407);
51  
  for (S line : toLinesFullTrim(text)) {
52  
    if (line.startsWith("#")) continue;
53  
    uniq(Para, text := line);
54  
  }
55  
}
56  
57  
svoid makeData {
58  
  data.clear();
59  
  for (Para p) {
60  
    if (p.split == null) {
61  
      p.split = splitIntoSentences_v3(p.text);
62  
      p.change();
63  
    }
64  
    
65  
    S sp;
66  
    if (l(p.split) == 1 && eq(first(p.split), p.text))
67  
      sp = "-";
68  
    else
69  
      //sp = htmlEncode_nlToBr(lines(p.split));
70  
      sp = ul(htmlEncodeAll(p.split));
71  
      
72  
    data.add(litorderedmap(
73  
      "Original Paragraph" := htmlencode(p.text),
74  
      "Split" := sp,
75  
      "OK" := hcheckbox("ok_" + p.id, p.ok, "class" := "chkbox")));
76  
  }
77  
  
78  
  data = sortByCalculatedField(data, func(Map m) {
79  
    comparableList(
80  
      contains(getString(m, "OK"), "checked"),
81  
      neq(m.get("Split"), "-"),
82  
      m.get("Original Paragraph"))
83  
  });
84  
}
85  
86  
svoid resplit {
87  
  for (Para p) {
88  
    L<S> split = splitIntoSentences_v3(p.text);
89  
    if (neq(split, p.split)) {
90  
      p.split = split;
91  
      p.ok = false;
92  
      p.change();
93  
    }
94  
  }
95  
  makeData();
96  
}

Author comment

Began life as a copy of #1008419

download  show line numbers  debug dex  old transpilations   

Travelled to 13 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt

No comments. add comment

Snippet ID: #1008430
Snippet name: Persistent text segmentation [dev.]
Eternal ID of this version: #1008430/18
Text MD5: 931f351925e8b689cd3318ce30876c8a
Transpilation MD5: c329202deb1de7ecf94662c49e2a9e93
Author: stefan
Category: javax / a.i. parsing
Type: JavaX source code
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2017-05-15 16:24:51
Source code size: 2195 bytes / 96 lines
Pitched / IR pitched: No / No
Views / Downloads: 506 / 875
Version history: 17 change(s)
Referenced in: [show references]