Uses 16250K of libraries. Click here for Pure Java version (3159L/20K).
import org.openxmlformats.schemas.wordprocessingml.x2006.main.*; sclass WordDocumentTextReplacer { replace Run with XWPFRun. replace Paragraph with XWPFParagraph. File inFile, outFile; S regularExpression; swappable S getReplacement(S text, LS groups) { ret "[REPLACED]"; } run { assertNempty(+regularExpression); assertNotNull(+inFile); assertNotNull(+outFile); XWPFDocument doc = loadDocx(print("Loading", inFile)); print("Document loaded"); new XWPFDocument docOut; CTBody body = doc.getDocument().getBody(); CTSectPr sectPr = body.getSectPr(); CTBody bodyOut = docOut.getDocument().getBody(); bodyOut.setSectPr(sectPr); L<Paragraph> paragraphs = doc.getParagraphs(); for (Paragraph para : paragraphs) { Paragraph paraOut = docOut.createParagraph(); L<Run> runs = para.getRuns(); //print(n2(runs, "run")); new LPair<Run, S> runs2; for (Run r : runs) addPair(runs2, r, unnull(r.getText(0))); S fullText = join(pairsB(runs2)); //print(quote(fullText)); //printIfNempty(regexpExtractAll(regexp, fullText)); for (int i = 0; i < l(runs2); i++) { for (int j = i+1; j <= l(runs2); j++) { S text = join(pairsB(subList(runs2, i, j))); print(+text); IntRange range = regexpFindRange(regularExpression, text); if (range == null) continue; // no match //print("Match: " + substring(text, range)); // we have a match, find out run indices // skip runs left of match while (i < l(runs2) && range.start >= l(runs2.get(i).b)) { range = shiftIntRange(range, -l(runs2.get(i).b)); i++; } text = join(pairsB(subList(runs2, i, j))); print("Found match: " + substring(text, range)); // replace all matched runs with one or two runs at i removeSubList(runs2, i+1, j); Run run = runs2.get(i).a; S found = substring(text, range); LS groups = regexpFirstGroups(regularExpression, found); S replacement = getReplacement(found, groups); print("Replacing with: " + replacement); S text1 = takeFirst(text, range.start) + replacement; if (nempty(text1)) { runs2.add(i, pair(run, text1)); ++i; } S rest = substring(text, range.end); if (empty(rest)) runs2.remove(i--); else runs2.get(i).b = rest; } } //for (int k = l(runs)-1; k >= 0; k--) para.removeRun(k); //print("Adding " + nRuns(runs2)); for (Pair<Run, S> p : runs2) { //paraOut.addRun(run); Run run = p.a; Run runOut = paraOut.createRun(); runOut.setText(p.b); // copy run attributes runOut.setColor(run.getColor()); runOut.setFontFamily(run.getFontFamily()); runOut.setFontSize(run.getFontSize()); runOut.setBold(run.isBold()); runOut.setItalic(run.isItalic()); runOut.setUnderline(run.getUnderline()); paraOut.addRun(runOut); } } saveDocx(docOut, outFile); printFileInfo(outFile); } }
Began life as a copy of #1028318
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1028365 |
Snippet name: | WordDocumentTextReplacer |
Eternal ID of this version: | #1028365/5 |
Text MD5: | 4d0f037aa386ef25f9aca226b779ee28 |
Transpilation MD5: | bed581516cdac5033b4aa6994150cacf |
Author: | stefan |
Category: | javax / io |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-06-13 17:21:53 |
Source code size: | 3377 bytes / 99 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 210 / 512 |
Version history: | 4 change(s) |
Referenced in: | #1028390 - WordDocumentTextReplacer2 [allows multiple replacement patterns & post processing] #1034167 - Standard Classes + Interfaces (LIVE, continuation of #1003674) |