import org.openxmlformats.schemas.wordprocessingml.x2006.main.*; sclass WordDocumentTextReplacer { replace Run with XWPFRun. replace Paragraph with XWPFParagraph. File inFile, outFile; S regularExpression; swappable S getReplacement(S text, LS groups) { ret "[REPLACED]"; } run { assertNempty(+regularExpression); assertNotNull(+inFile); assertNotNull(+outFile); XWPFDocument doc = loadDocx(print("Loading", inFile)); print("Document loaded"); new XWPFDocument docOut; CTBody body = doc.getDocument().getBody(); CTSectPr sectPr = body.getSectPr(); CTBody bodyOut = docOut.getDocument().getBody(); bodyOut.setSectPr(sectPr); L paragraphs = doc.getParagraphs(); for (Paragraph para : paragraphs) { Paragraph paraOut = docOut.createParagraph(); L runs = para.getRuns(); //print(n2(runs, "run")); new LPair runs2; for (Run r : runs) addPair(runs2, r, unnull(r.getText(0))); S fullText = join(pairsB(runs2)); //print(quote(fullText)); //printIfNempty(regexpExtractAll(regexp, fullText)); for (int i = 0; i < l(runs2); i++) { for (int j = i+1; j <= l(runs2); j++) { S text = join(pairsB(subList(runs2, i, j))); print(+text); IntRange range = regexpFindRange(regularExpression, text); if (range == null) continue; // no match //print("Match: " + substring(text, range)); // we have a match, find out run indices // skip runs left of match while (i < l(runs2) && range.start >= l(runs2.get(i).b)) { range = shiftIntRange(range, -l(runs2.get(i).b)); i++; } text = join(pairsB(subList(runs2, i, j))); print("Found match: " + substring(text, range)); // replace all matched runs with one or two runs at i removeSubList(runs2, i+1, j); Run run = runs2.get(i).a; S found = substring(text, range); LS groups = regexpFirstGroups(regularExpression, found); S replacement = getReplacement(found, groups); print("Replacing with: " + replacement); S text1 = takeFirst(text, range.start) + replacement; if (nempty(text1)) { runs2.add(i, pair(run, text1)); ++i; } S rest = substring(text, range.end); if (empty(rest)) runs2.remove(i--); else runs2.get(i).b = rest; } } //for (int k = l(runs)-1; k >= 0; k--) para.removeRun(k); //print("Adding " + nRuns(runs2)); for (Pair p : runs2) { //paraOut.addRun(run); Run run = p.a; Run runOut = paraOut.createRun(); runOut.setText(p.b); // copy run attributes runOut.setColor(run.getColor()); runOut.setFontFamily(run.getFontFamily()); runOut.setFontSize(run.getFontSize()); runOut.setBold(run.isBold()); runOut.setItalic(run.isItalic()); runOut.setUnderline(run.getUnderline()); paraOut.addRun(runOut); } } saveDocx(docOut, outFile); printFileInfo(outFile); } }