Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

119
LINES

< > BotCompany Repo | #1028390 // WordDocumentTextReplacer2 [allows multiple replacement patterns & post processing]

JavaX fragment (include) [tags: use-pretranspiled]

Uses 16250K of libraries. Click here for Pure Java version (3325L/21K).

import org.openxmlformats.schemas.wordprocessingml.x2006.main.*;

sclass WordDocumentTextReplacer2 {
  replace Run with XWPFRun.
  replace Paragraph with XWPFParagraph.
  
  File inFile, outFile;

  srecord OutRun(Run run, S newText) {}
  
  L<Paragraph> paragraphs;
  new LL<OutRun> outParagraphs;

  run {
    assertNotNull(+inFile);
    assertNotNull(+outFile);

    XWPFDocument doc = loadDocx(print("Loading", inFile));
    print("Document loaded");
    
    new XWPFDocument docOut;
    CTBody body = doc.getDocument().getBody();
    CTSectPr sectPr = body.getSectPr();
    CTBody bodyOut = docOut.getDocument().getBody();
    bodyOut.setSectPr(sectPr);
    
    paragraphs = doc.getParagraphs();

    for (Paragraph para : paragraphs) {
      L<Run> runs = para.getRuns();
      //print(n2(runs, "run"));
      new LPair<Run, S> runs2;
      for (Run r : runs)
        addPair(runs2, r, unnull(r.getText(0)));
      S fullText = join(pairsB(runs2));
      //print(quote(fullText));
      //printIfNempty(regexpExtractAll(regexp, fullText));
      
      processParagraph(runs2);
      
      outParagraphs.add(map(runs2, p -> new OutRun(p.a, p.b)));
    }
    
    postprocess();

    for (L<OutRun> runs : outParagraphs) {
      Paragraph paraOut = docOut.createParagraph();

      for (OutRun r : runs) {
        //paraOut.addRun(run);
        Run run = r.run;
        Run runOut = paraOut.createRun();
        runOut.setText(r.newText);
        
        // copy run attributes
        runOut.setColor(run.getColor());
        runOut.setFontFamily(run.getFontFamily());
        runOut.setFontSize(run.getFontSize());
        runOut.setBold(run.isBold());
        runOut.setItalic(run.isItalic());
        runOut.setUnderline(run.getUnderline());
        paraOut.addRun(runOut);
      }
    }
    
    saveDocx(docOut, outFile);
    printFileInfo(outFile);
  }
  
  swappable void postprocess() {}
  
  swappable void processParagraph(LPair<Run, S> runs2) {}
  
  void regexpReplacement(LPair<Run, S> runs2, S regularExpression, IF2<S, LS, S> getReplacement) {
    int safety = 100;
    for (int i = 0; safety-- > 0 && i < l(runs2); i++) {
      for (int j = i+1; j <= l(runs2); j++) {
        S text = join(pairsB(subList(runs2, i, j)));
        //print(+text);
        IntRange range = regexpFindRangeIC(regularExpression, text);
        if (range == null) continue; // no match
        //print("Match: " + substring(text, range));
        // we have a match, find out run indices
        
        // skip runs left of match
        while (i < l(runs2) && range.start >= l(runs2.get(i).b)) {
          range = shiftIntRange(range, -l(runs2.get(i).b));
          i++;
        }
        
        text = join(pairsB(subList(runs2, i, j)));
        print("Found match: " + substring(text, range));
        
        // replace all matched runs with one or two runs at i
        removeSubList(runs2, i+1, j);
        Run run = runs2.get(i).a;
        S found = substring(text, range);
        LS groups = regexpFirstGroups(regularExpression, found);
        S replacement = getReplacement.get(found, groups);
        print("Replacing with: " + replacement);
        S text1 = takeFirst(text, range.start) + replacement;
        if (nempty(text1)) {
          runs2.add(i, pair(run, text1));
          ++i;
        }
        S rest = substring(text, range.end);
        if (empty(rest))
          runs2.remove(i);
        else
          runs2.get(i).b = rest;
        --i; // process again
      }
    }
  }
  
  S fullText(L<OutRun> paragraph) {
    ret join(map(p -> p.newText, paragraph));
  }
}

Author comment

Began life as a copy of #1028365

download  show line numbers  debug dex  old transpilations   

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

No comments. add comment

Snippet ID: #1028390
Snippet name: WordDocumentTextReplacer2 [allows multiple replacement patterns & post processing]
Eternal ID of this version: #1028390/12
Text MD5: e0553769fcd8fb049adf4035d967e147
Transpilation MD5: f96343ce0e6d91722dbf932b0be3d923
Author: stefan
Category: javax / io
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-06-15 13:49:13
Source code size: 3719 bytes / 119 lines
Pitched / IR pitched: No / No
Views / Downloads: 248 / 589
Version history: 11 change(s)
Referenced in: #1034167 - Standard Classes + Interfaces (LIVE, continuation of #1003674)