Uses 16250K of libraries. Click here for Pure Java version (3325L/21K).
import org.openxmlformats.schemas.wordprocessingml.x2006.main.*; sclass WordDocumentTextReplacer2 { replace Run with XWPFRun. replace Paragraph with XWPFParagraph. File inFile, outFile; srecord OutRun(Run run, S newText) {} L<Paragraph> paragraphs; new LL<OutRun> outParagraphs; run { assertNotNull(+inFile); assertNotNull(+outFile); XWPFDocument doc = loadDocx(print("Loading", inFile)); print("Document loaded"); new XWPFDocument docOut; CTBody body = doc.getDocument().getBody(); CTSectPr sectPr = body.getSectPr(); CTBody bodyOut = docOut.getDocument().getBody(); bodyOut.setSectPr(sectPr); paragraphs = doc.getParagraphs(); for (Paragraph para : paragraphs) { L<Run> runs = para.getRuns(); //print(n2(runs, "run")); new LPair<Run, S> runs2; for (Run r : runs) addPair(runs2, r, unnull(r.getText(0))); S fullText = join(pairsB(runs2)); //print(quote(fullText)); //printIfNempty(regexpExtractAll(regexp, fullText)); processParagraph(runs2); outParagraphs.add(map(runs2, p -> new OutRun(p.a, p.b))); } postprocess(); for (L<OutRun> runs : outParagraphs) { Paragraph paraOut = docOut.createParagraph(); for (OutRun r : runs) { //paraOut.addRun(run); Run run = r.run; Run runOut = paraOut.createRun(); runOut.setText(r.newText); // copy run attributes runOut.setColor(run.getColor()); runOut.setFontFamily(run.getFontFamily()); runOut.setFontSize(run.getFontSize()); runOut.setBold(run.isBold()); runOut.setItalic(run.isItalic()); runOut.setUnderline(run.getUnderline()); paraOut.addRun(runOut); } } saveDocx(docOut, outFile); printFileInfo(outFile); } swappable void postprocess() {} swappable void processParagraph(LPair<Run, S> runs2) {} void regexpReplacement(LPair<Run, S> runs2, S regularExpression, IF2<S, LS, S> getReplacement) { int safety = 100; for (int i = 0; safety-- > 0 && i < l(runs2); i++) { for (int j = i+1; j <= l(runs2); j++) { S text = join(pairsB(subList(runs2, i, j))); //print(+text); IntRange range = regexpFindRangeIC(regularExpression, text); if (range == null) continue; // no match //print("Match: " + substring(text, range)); // we have a match, find out run indices // skip runs left of match while (i < l(runs2) && range.start >= l(runs2.get(i).b)) { range = shiftIntRange(range, -l(runs2.get(i).b)); i++; } text = join(pairsB(subList(runs2, i, j))); print("Found match: " + substring(text, range)); // replace all matched runs with one or two runs at i removeSubList(runs2, i+1, j); Run run = runs2.get(i).a; S found = substring(text, range); LS groups = regexpFirstGroups(regularExpression, found); S replacement = getReplacement.get(found, groups); print("Replacing with: " + replacement); S text1 = takeFirst(text, range.start) + replacement; if (nempty(text1)) { runs2.add(i, pair(run, text1)); ++i; } S rest = substring(text, range.end); if (empty(rest)) runs2.remove(i); else runs2.get(i).b = rest; --i; // process again } } } S fullText(L<OutRun> paragraph) { ret join(map(p -> p.newText, paragraph)); } }
Began life as a copy of #1028365
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1028390 |
Snippet name: | WordDocumentTextReplacer2 [allows multiple replacement patterns & post processing] |
Eternal ID of this version: | #1028390/12 |
Text MD5: | e0553769fcd8fb049adf4035d967e147 |
Transpilation MD5: | f96343ce0e6d91722dbf932b0be3d923 |
Author: | stefan |
Category: | javax / io |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-06-15 13:49:13 |
Source code size: | 3719 bytes / 119 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 248 / 589 |
Version history: | 11 change(s) |
Referenced in: | #1034167 - Standard Classes + Interfaces (LIVE, continuation of #1003674) |