import org.openxmlformats.schemas.wordprocessingml.x2006.main.*; sclass WordDocumentTextReplacer2 { replace Run with XWPFRun. replace Paragraph with XWPFParagraph. File inFile, outFile; srecord OutRun(Run run, S newText) {} L paragraphs; new LL outParagraphs; run { assertNotNull(+inFile); assertNotNull(+outFile); XWPFDocument doc = loadDocx(print("Loading", inFile)); print("Document loaded"); new XWPFDocument docOut; CTBody body = doc.getDocument().getBody(); CTSectPr sectPr = body.getSectPr(); CTBody bodyOut = docOut.getDocument().getBody(); bodyOut.setSectPr(sectPr); paragraphs = doc.getParagraphs(); for (Paragraph para : paragraphs) { L runs = para.getRuns(); //print(n2(runs, "run")); new LPair runs2; for (Run r : runs) addPair(runs2, r, unnull(r.getText(0))); S fullText = join(pairsB(runs2)); //print(quote(fullText)); //printIfNempty(regexpExtractAll(regexp, fullText)); processParagraph(runs2); outParagraphs.add(map(runs2, p -> new OutRun(p.a, p.b))); } postprocess(); for (L runs : outParagraphs) { Paragraph paraOut = docOut.createParagraph(); for (OutRun r : runs) { //paraOut.addRun(run); Run run = r.run; Run runOut = paraOut.createRun(); runOut.setText(r.newText); // copy run attributes runOut.setColor(run.getColor()); runOut.setFontFamily(run.getFontFamily()); runOut.setFontSize(run.getFontSize()); runOut.setBold(run.isBold()); runOut.setItalic(run.isItalic()); runOut.setUnderline(run.getUnderline()); paraOut.addRun(runOut); } } saveDocx(docOut, outFile); printFileInfo(outFile); } swappable void postprocess() {} swappable void processParagraph(LPair runs2) {} void regexpReplacement(LPair runs2, S regularExpression, IF2 getReplacement) { int safety = 100; for (int i = 0; safety-- > 0 && i < l(runs2); i++) { for (int j = i+1; j <= l(runs2); j++) { S text = join(pairsB(subList(runs2, i, j))); //print(+text); IntRange range = regexpFindRangeIC(regularExpression, text); if (range == null) continue; // no match //print("Match: " + substring(text, range)); // we have a match, find out run indices // skip runs left of match while (i < l(runs2) && range.start >= l(runs2.get(i).b)) { range = shiftIntRange(range, -l(runs2.get(i).b)); i++; } text = join(pairsB(subList(runs2, i, j))); print("Found match: " + substring(text, range)); // replace all matched runs with one or two runs at i removeSubList(runs2, i+1, j); Run run = runs2.get(i).a; S found = substring(text, range); LS groups = regexpFirstGroups(regularExpression, found); S replacement = getReplacement.get(found, groups); print("Replacing with: " + replacement); S text1 = takeFirst(text, range.start) + replacement; if (nempty(text1)) { runs2.add(i, pair(run, text1)); ++i; } S rest = substring(text, range.end); if (empty(rest)) runs2.remove(i); else runs2.get(i).b = rest; --i; // process again } } } S fullText(L paragraph) { ret join(map(p -> p.newText, paragraph)); } }