Uses 16250K of libraries. Click here for Pure Java version (3325L/21K).
1 | import org.openxmlformats.schemas.wordprocessingml.x2006.main.*; |
2 | |
3 | sclass WordDocumentTextReplacer2 { |
4 | replace Run with XWPFRun. |
5 | replace Paragraph with XWPFParagraph. |
6 | |
7 | File inFile, outFile; |
8 | |
9 | srecord OutRun(Run run, S newText) {} |
10 | |
11 | L<Paragraph> paragraphs; |
12 | new LL<OutRun> outParagraphs; |
13 | |
14 | run { |
15 | assertNotNull(+inFile); |
16 | assertNotNull(+outFile); |
17 | |
18 | XWPFDocument doc = loadDocx(print("Loading", inFile)); |
19 | print("Document loaded"); |
20 | |
21 | new XWPFDocument docOut; |
22 | CTBody body = doc.getDocument().getBody(); |
23 | CTSectPr sectPr = body.getSectPr(); |
24 | CTBody bodyOut = docOut.getDocument().getBody(); |
25 | bodyOut.setSectPr(sectPr); |
26 | |
27 | paragraphs = doc.getParagraphs(); |
28 | |
29 | for (Paragraph para : paragraphs) { |
30 | L<Run> runs = para.getRuns(); |
31 | //print(n2(runs, "run")); |
32 | new LPair<Run, S> runs2; |
33 | for (Run r : runs) |
34 | addPair(runs2, r, unnull(r.getText(0))); |
35 | S fullText = join(pairsB(runs2)); |
36 | //print(quote(fullText)); |
37 | //printIfNempty(regexpExtractAll(regexp, fullText)); |
38 | |
39 | processParagraph(runs2); |
40 | |
41 | outParagraphs.add(map(runs2, p -> new OutRun(p.a, p.b))); |
42 | } |
43 | |
44 | postprocess(); |
45 | |
46 | for (L<OutRun> runs : outParagraphs) { |
47 | Paragraph paraOut = docOut.createParagraph(); |
48 | |
49 | for (OutRun r : runs) { |
50 | //paraOut.addRun(run); |
51 | Run run = r.run; |
52 | Run runOut = paraOut.createRun(); |
53 | runOut.setText(r.newText); |
54 | |
55 | // copy run attributes |
56 | runOut.setColor(run.getColor()); |
57 | runOut.setFontFamily(run.getFontFamily()); |
58 | runOut.setFontSize(run.getFontSize()); |
59 | runOut.setBold(run.isBold()); |
60 | runOut.setItalic(run.isItalic()); |
61 | runOut.setUnderline(run.getUnderline()); |
62 | paraOut.addRun(runOut); |
63 | } |
64 | } |
65 | |
66 | saveDocx(docOut, outFile); |
67 | printFileInfo(outFile); |
68 | } |
69 | |
70 | swappable void postprocess() {} |
71 | |
72 | swappable void processParagraph(LPair<Run, S> runs2) {} |
73 | |
74 | void regexpReplacement(LPair<Run, S> runs2, S regularExpression, IF2<S, LS, S> getReplacement) { |
75 | int safety = 100; |
76 | for (int i = 0; safety-- > 0 && i < l(runs2); i++) { |
77 | for (int j = i+1; j <= l(runs2); j++) { |
78 | S text = join(pairsB(subList(runs2, i, j))); |
79 | //print(+text); |
80 | IntRange range = regexpFindRangeIC(regularExpression, text); |
81 | if (range == null) continue; // no match |
82 | //print("Match: " + substring(text, range)); |
83 | // we have a match, find out run indices |
84 | |
85 | // skip runs left of match |
86 | while (i < l(runs2) && range.start >= l(runs2.get(i).b)) { |
87 | range = shiftIntRange(range, -l(runs2.get(i).b)); |
88 | i++; |
89 | } |
90 | |
91 | text = join(pairsB(subList(runs2, i, j))); |
92 | print("Found match: " + substring(text, range)); |
93 | |
94 | // replace all matched runs with one or two runs at i |
95 | removeSubList(runs2, i+1, j); |
96 | Run run = runs2.get(i).a; |
97 | S found = substring(text, range); |
98 | LS groups = regexpFirstGroups(regularExpression, found); |
99 | S replacement = getReplacement.get(found, groups); |
100 | print("Replacing with: " + replacement); |
101 | S text1 = takeFirst(text, range.start) + replacement; |
102 | if (nempty(text1)) { |
103 | runs2.add(i, pair(run, text1)); |
104 | ++i; |
105 | } |
106 | S rest = substring(text, range.end); |
107 | if (empty(rest)) |
108 | runs2.remove(i); |
109 | else |
110 | runs2.get(i).b = rest; |
111 | --i; // process again |
112 | } |
113 | } |
114 | } |
115 | |
116 | S fullText(L<OutRun> paragraph) { |
117 | ret join(map(p -> p.newText, paragraph)); |
118 | } |
119 | } |
Began life as a copy of #1028365
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1028390 |
Snippet name: | WordDocumentTextReplacer2 [allows multiple replacement patterns & post processing] |
Eternal ID of this version: | #1028390/12 |
Text MD5: | e0553769fcd8fb049adf4035d967e147 |
Transpilation MD5: | f96343ce0e6d91722dbf932b0be3d923 |
Author: | stefan |
Category: | javax / io |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-06-15 13:49:13 |
Source code size: | 3719 bytes / 119 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 249 / 590 |
Version history: | 11 change(s) |
Referenced in: | [show references] |