Uses 16250K of libraries. Click here for Pure Java version (3159L/20K).
1 | import org.openxmlformats.schemas.wordprocessingml.x2006.main.*; |
2 | |
3 | sclass WordDocumentTextReplacer { |
4 | replace Run with XWPFRun. |
5 | replace Paragraph with XWPFParagraph. |
6 | |
7 | File inFile, outFile; |
8 | S regularExpression; |
9 | |
10 | swappable S getReplacement(S text, LS groups) { ret "[REPLACED]"; } |
11 | |
12 | run { |
13 | assertNempty(+regularExpression); |
14 | assertNotNull(+inFile); |
15 | assertNotNull(+outFile); |
16 | |
17 | XWPFDocument doc = loadDocx(print("Loading", inFile)); |
18 | print("Document loaded"); |
19 | |
20 | new XWPFDocument docOut; |
21 | CTBody body = doc.getDocument().getBody(); |
22 | CTSectPr sectPr = body.getSectPr(); |
23 | CTBody bodyOut = docOut.getDocument().getBody(); |
24 | bodyOut.setSectPr(sectPr); |
25 | |
26 | L<Paragraph> paragraphs = doc.getParagraphs(); |
27 | |
28 | for (Paragraph para : paragraphs) { |
29 | Paragraph paraOut = docOut.createParagraph(); |
30 | L<Run> runs = para.getRuns(); |
31 | //print(n2(runs, "run")); |
32 | new LPair<Run, S> runs2; |
33 | for (Run r : runs) |
34 | addPair(runs2, r, unnull(r.getText(0))); |
35 | S fullText = join(pairsB(runs2)); |
36 | //print(quote(fullText)); |
37 | //printIfNempty(regexpExtractAll(regexp, fullText)); |
38 | |
39 | for (int i = 0; i < l(runs2); i++) { |
40 | for (int j = i+1; j <= l(runs2); j++) { |
41 | S text = join(pairsB(subList(runs2, i, j))); |
42 | print(+text); |
43 | IntRange range = regexpFindRange(regularExpression, text); |
44 | if (range == null) continue; // no match |
45 | //print("Match: " + substring(text, range)); |
46 | // we have a match, find out run indices |
47 | |
48 | // skip runs left of match |
49 | while (i < l(runs2) && range.start >= l(runs2.get(i).b)) { |
50 | range = shiftIntRange(range, -l(runs2.get(i).b)); |
51 | i++; |
52 | } |
53 | |
54 | text = join(pairsB(subList(runs2, i, j))); |
55 | print("Found match: " + substring(text, range)); |
56 | |
57 | // replace all matched runs with one or two runs at i |
58 | removeSubList(runs2, i+1, j); |
59 | Run run = runs2.get(i).a; |
60 | S found = substring(text, range); |
61 | LS groups = regexpFirstGroups(regularExpression, found); |
62 | S replacement = getReplacement(found, groups); |
63 | print("Replacing with: " + replacement); |
64 | S text1 = takeFirst(text, range.start) + replacement; |
65 | if (nempty(text1)) { |
66 | runs2.add(i, pair(run, text1)); |
67 | ++i; |
68 | } |
69 | S rest = substring(text, range.end); |
70 | if (empty(rest)) |
71 | runs2.remove(i--); |
72 | else |
73 | runs2.get(i).b = rest; |
74 | } |
75 | } |
76 | |
77 | //for (int k = l(runs)-1; k >= 0; k--) para.removeRun(k); |
78 | //print("Adding " + nRuns(runs2)); |
79 | for (Pair<Run, S> p : runs2) { |
80 | //paraOut.addRun(run); |
81 | Run run = p.a; |
82 | Run runOut = paraOut.createRun(); |
83 | runOut.setText(p.b); |
84 | |
85 | // copy run attributes |
86 | runOut.setColor(run.getColor()); |
87 | runOut.setFontFamily(run.getFontFamily()); |
88 | runOut.setFontSize(run.getFontSize()); |
89 | runOut.setBold(run.isBold()); |
90 | runOut.setItalic(run.isItalic()); |
91 | runOut.setUnderline(run.getUnderline()); |
92 | paraOut.addRun(runOut); |
93 | } |
94 | } |
95 | |
96 | saveDocx(docOut, outFile); |
97 | printFileInfo(outFile); |
98 | } |
99 | } |
Began life as a copy of #1028318
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1028365 |
Snippet name: | WordDocumentTextReplacer |
Eternal ID of this version: | #1028365/5 |
Text MD5: | 4d0f037aa386ef25f9aca226b779ee28 |
Transpilation MD5: | bed581516cdac5033b4aa6994150cacf |
Author: | stefan |
Category: | javax / io |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-06-13 17:21:53 |
Source code size: | 3377 bytes / 99 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 209 / 512 |
Version history: | 4 change(s) |
Referenced in: | [show references] |