Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

119
LINES

< > BotCompany Repo | #1028390 // WordDocumentTextReplacer2 [allows multiple replacement patterns & post processing]

JavaX fragment (include) [tags: use-pretranspiled]

Uses 16250K of libraries. Click here for Pure Java version (3325L/21K).

1  
import org.openxmlformats.schemas.wordprocessingml.x2006.main.*;
2  
3  
sclass WordDocumentTextReplacer2 {
4  
  replace Run with XWPFRun.
5  
  replace Paragraph with XWPFParagraph.
6  
  
7  
  File inFile, outFile;
8  
9  
  srecord OutRun(Run run, S newText) {}
10  
  
11  
  L<Paragraph> paragraphs;
12  
  new LL<OutRun> outParagraphs;
13  
14  
  run {
15  
    assertNotNull(+inFile);
16  
    assertNotNull(+outFile);
17  
18  
    XWPFDocument doc = loadDocx(print("Loading", inFile));
19  
    print("Document loaded");
20  
    
21  
    new XWPFDocument docOut;
22  
    CTBody body = doc.getDocument().getBody();
23  
    CTSectPr sectPr = body.getSectPr();
24  
    CTBody bodyOut = docOut.getDocument().getBody();
25  
    bodyOut.setSectPr(sectPr);
26  
    
27  
    paragraphs = doc.getParagraphs();
28  
29  
    for (Paragraph para : paragraphs) {
30  
      L<Run> runs = para.getRuns();
31  
      //print(n2(runs, "run"));
32  
      new LPair<Run, S> runs2;
33  
      for (Run r : runs)
34  
        addPair(runs2, r, unnull(r.getText(0)));
35  
      S fullText = join(pairsB(runs2));
36  
      //print(quote(fullText));
37  
      //printIfNempty(regexpExtractAll(regexp, fullText));
38  
      
39  
      processParagraph(runs2);
40  
      
41  
      outParagraphs.add(map(runs2, p -> new OutRun(p.a, p.b)));
42  
    }
43  
    
44  
    postprocess();
45  
46  
    for (L<OutRun> runs : outParagraphs) {
47  
      Paragraph paraOut = docOut.createParagraph();
48  
49  
      for (OutRun r : runs) {
50  
        //paraOut.addRun(run);
51  
        Run run = r.run;
52  
        Run runOut = paraOut.createRun();
53  
        runOut.setText(r.newText);
54  
        
55  
        // copy run attributes
56  
        runOut.setColor(run.getColor());
57  
        runOut.setFontFamily(run.getFontFamily());
58  
        runOut.setFontSize(run.getFontSize());
59  
        runOut.setBold(run.isBold());
60  
        runOut.setItalic(run.isItalic());
61  
        runOut.setUnderline(run.getUnderline());
62  
        paraOut.addRun(runOut);
63  
      }
64  
    }
65  
    
66  
    saveDocx(docOut, outFile);
67  
    printFileInfo(outFile);
68  
  }
69  
  
70  
  swappable void postprocess() {}
71  
  
72  
  swappable void processParagraph(LPair<Run, S> runs2) {}
73  
  
74  
  void regexpReplacement(LPair<Run, S> runs2, S regularExpression, IF2<S, LS, S> getReplacement) {
75  
    int safety = 100;
76  
    for (int i = 0; safety-- > 0 && i < l(runs2); i++) {
77  
      for (int j = i+1; j <= l(runs2); j++) {
78  
        S text = join(pairsB(subList(runs2, i, j)));
79  
        //print(+text);
80  
        IntRange range = regexpFindRangeIC(regularExpression, text);
81  
        if (range == null) continue; // no match
82  
        //print("Match: " + substring(text, range));
83  
        // we have a match, find out run indices
84  
        
85  
        // skip runs left of match
86  
        while (i < l(runs2) && range.start >= l(runs2.get(i).b)) {
87  
          range = shiftIntRange(range, -l(runs2.get(i).b));
88  
          i++;
89  
        }
90  
        
91  
        text = join(pairsB(subList(runs2, i, j)));
92  
        print("Found match: " + substring(text, range));
93  
        
94  
        // replace all matched runs with one or two runs at i
95  
        removeSubList(runs2, i+1, j);
96  
        Run run = runs2.get(i).a;
97  
        S found = substring(text, range);
98  
        LS groups = regexpFirstGroups(regularExpression, found);
99  
        S replacement = getReplacement.get(found, groups);
100  
        print("Replacing with: " + replacement);
101  
        S text1 = takeFirst(text, range.start) + replacement;
102  
        if (nempty(text1)) {
103  
          runs2.add(i, pair(run, text1));
104  
          ++i;
105  
        }
106  
        S rest = substring(text, range.end);
107  
        if (empty(rest))
108  
          runs2.remove(i);
109  
        else
110  
          runs2.get(i).b = rest;
111  
        --i; // process again
112  
      }
113  
    }
114  
  }
115  
  
116  
  S fullText(L<OutRun> paragraph) {
117  
    ret join(map(p -> p.newText, paragraph));
118  
  }
119  
}

Author comment

Began life as a copy of #1028365

download  show line numbers  debug dex  old transpilations   

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

No comments. add comment

Snippet ID: #1028390
Snippet name: WordDocumentTextReplacer2 [allows multiple replacement patterns & post processing]
Eternal ID of this version: #1028390/12
Text MD5: e0553769fcd8fb049adf4035d967e147
Transpilation MD5: f96343ce0e6d91722dbf932b0be3d923
Author: stefan
Category: javax / io
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-06-15 13:49:13
Source code size: 3719 bytes / 119 lines
Pitched / IR pitched: No / No
Views / Downloads: 184 / 502
Version history: 11 change(s)
Referenced in: [show references]