1 | sclass WikiPage {
|
2 | S title, text; |
3 | |
4 | *() {}
|
5 | *(S *title, S *text) {}
|
6 | } |
7 | |
8 | static IterableIterator<WikiPage> streamInSimpleWikipedia() {
|
9 | File f = unpackSimpleWikipedia(); |
10 | final BufferedReader reader = utf8bufferedReader(f); |
11 | please include function iteratorFromFunction. |
12 | ret main.<WikiPage> iteratorFromFunction(new O {
|
13 | int lines = 0, pages = 0; |
14 | StringBuilder pageBuf = null; |
15 | |
16 | WikiPage get() ctex {
|
17 | S line; |
18 | while ((line = reader.readLine()) != null) {
|
19 | line = trim(line); |
20 | if (eq(line, "<page>")) |
21 | pageBuf = new StringBuilder; |
22 | if (pageBuf != null) |
23 | pageBuf.append(line).append("\n");
|
24 | if (eq(line, "</page>")) {
|
25 | L<S> tok = htmlTok(str(pageBuf)); |
26 | S title = trim(htmldecode(join(contentsOfContainerTag(tok, "title")))); |
27 | S text = trim(htmldecode(join(contentsOfContainerTag(tok, "text")))); |
28 | if ((++pages % 1000) == 0) {
|
29 | fractionDone(pages/228400.0); |
30 | print("Pages: " + pages + " (" + title + ")");
|
31 | sleep(1); |
32 | } |
33 | ret new WikiPage(title, text); |
34 | } |
35 | } |
36 | fractionDone(1); |
37 | reader.close(); |
38 | null; |
39 | } |
40 | }); |
41 | } |
Began life as a copy of #1008015
download show line numbers debug dex old transpilations
Travelled to 13 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
| Snippet ID: | #1008067 |
| Snippet name: | streamInSimpleWikipedia |
| Eternal ID of this version: | #1008067/8 |
| Text MD5: | ebae3e08ef2f26ad05eaab4ab3c1888a |
| Author: | stefan |
| Category: | javax / a.i. / networking |
| Type: | JavaX fragment (include) |
| Public (visible to everyone): | Yes |
| Archived (hidden from active list): | No |
| Created/modified: | 2017-04-23 16:10:42 |
| Source code size: | 1244 bytes / 41 lines |
| Pitched / IR pitched: | No / No |
| Views / Downloads: | 661 / 686 |
| Version history: | 7 change(s) |
| Referenced in: | [show references] |