1 | static L<S> splitIntoSentences(S s) { |
2 | new L<S> sentences; |
3 | for (S sentence : splitIntoSentences_split(s)) { |
4 | char first = sentence.charAt(0); |
5 | if (Character.isLowerCase(first) || ",;:=".indexOf(first) >= 0) continue; |
6 | if (!hasCharacters(sentence)) continue; |
7 | sentences.add(sentence); |
8 | } |
9 | ret sentences; |
10 | } |
11 | |
12 | static L<S> splitIntoSentences_split(S s) { |
13 | L<S> tok = javaTok(s); // To parse quoted things |
14 | simpleSpaces(tok); |
15 | new L<S> list; |
16 | int i = 0; |
17 | while (true) { |
18 | int j = i; |
19 | do { |
20 | j = indexOfAny(tok, j+1, ".", "?"); |
21 | if (j < 0) return list; |
22 | } while (j+1 < tok.size()-1 && tok.get(j+1).equals("")); // matches stuff like "9.5" |
23 | |
24 | S sentence = join(tok.subList(i, j+1)).trim(); |
25 | if (sentence.length() > 1) |
26 | list.add(sentence); |
27 | i = j+1; |
28 | } |
29 | } |
download show line numbers debug dex old transpilations
Travelled to 13 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
Snippet ID: | #1007652 |
Snippet name: | splitIntoSentences |
Eternal ID of this version: | #1007652/2 |
Text MD5: | a6f583af3e90778edadc0da42d9796f2 |
Author: | stefan |
Category: | javax / parsing |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2017-03-30 19:46:59 |
Source code size: | 830 bytes / 29 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 574 / 597 |
Version history: | 1 change(s) |
Referenced in: | [show references] |