Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

99
LINES

< > BotCompany Repo | #1028365 // WordDocumentTextReplacer

JavaX fragment (include) [tags: use-pretranspiled]

Uses 16250K of libraries. Click here for Pure Java version (3159L/20K).

1  
import org.openxmlformats.schemas.wordprocessingml.x2006.main.*;
2  
3  
sclass WordDocumentTextReplacer {
4  
  replace Run with XWPFRun.
5  
  replace Paragraph with XWPFParagraph.
6  
  
7  
  File inFile, outFile;
8  
  S regularExpression;
9  
10  
  swappable S getReplacement(S text, LS groups) { ret "[REPLACED]"; }
11  
12  
  run {
13  
    assertNempty(+regularExpression);
14  
    assertNotNull(+inFile);
15  
    assertNotNull(+outFile);
16  
17  
    XWPFDocument doc = loadDocx(print("Loading", inFile));
18  
    print("Document loaded");
19  
    
20  
    new XWPFDocument docOut;
21  
    CTBody body = doc.getDocument().getBody();
22  
    CTSectPr sectPr = body.getSectPr();
23  
    CTBody bodyOut = docOut.getDocument().getBody();
24  
    bodyOut.setSectPr(sectPr);
25  
    
26  
    L<Paragraph> paragraphs = doc.getParagraphs();
27  
28  
    for (Paragraph para : paragraphs) {
29  
      Paragraph paraOut = docOut.createParagraph();
30  
      L<Run> runs = para.getRuns();
31  
      //print(n2(runs, "run"));
32  
      new LPair<Run, S> runs2;
33  
      for (Run r : runs)
34  
        addPair(runs2, r, unnull(r.getText(0)));
35  
      S fullText = join(pairsB(runs2));
36  
      //print(quote(fullText));
37  
      //printIfNempty(regexpExtractAll(regexp, fullText));
38  
      
39  
      for (int i = 0; i < l(runs2); i++) {
40  
        for (int j = i+1; j <= l(runs2); j++) {
41  
          S text = join(pairsB(subList(runs2, i, j)));
42  
          print(+text);
43  
          IntRange range = regexpFindRange(regularExpression, text);
44  
          if (range == null) continue; // no match
45  
          //print("Match: " + substring(text, range));
46  
          // we have a match, find out run indices
47  
          
48  
          // skip runs left of match
49  
          while (i < l(runs2) && range.start >= l(runs2.get(i).b)) {
50  
            range = shiftIntRange(range, -l(runs2.get(i).b));
51  
            i++;
52  
          }
53  
          
54  
          text = join(pairsB(subList(runs2, i, j)));
55  
          print("Found match: " + substring(text, range));
56  
          
57  
          // replace all matched runs with one or two runs at i
58  
          removeSubList(runs2, i+1, j);
59  
          Run run = runs2.get(i).a;
60  
          S found = substring(text, range);
61  
          LS groups = regexpFirstGroups(regularExpression, found);
62  
          S replacement = getReplacement(found, groups);
63  
          print("Replacing with: " + replacement);
64  
          S text1 = takeFirst(text, range.start) + replacement;
65  
          if (nempty(text1)) {
66  
            runs2.add(i, pair(run, text1));
67  
            ++i;
68  
          }
69  
          S rest = substring(text, range.end);
70  
          if (empty(rest))
71  
            runs2.remove(i--);
72  
          else
73  
            runs2.get(i).b = rest;
74  
        }
75  
      }
76  
      
77  
      //for (int k = l(runs)-1; k >= 0; k--) para.removeRun(k);
78  
      //print("Adding " + nRuns(runs2));
79  
      for (Pair<Run, S> p : runs2) {
80  
        //paraOut.addRun(run);
81  
        Run run = p.a;
82  
        Run runOut = paraOut.createRun();
83  
        runOut.setText(p.b);
84  
        
85  
        // copy run attributes
86  
        runOut.setColor(run.getColor());
87  
        runOut.setFontFamily(run.getFontFamily());
88  
        runOut.setFontSize(run.getFontSize());
89  
        runOut.setBold(run.isBold());
90  
        runOut.setItalic(run.isItalic());
91  
        runOut.setUnderline(run.getUnderline());
92  
        paraOut.addRun(runOut);
93  
      }
94  
    }
95  
96  
    saveDocx(docOut, outFile);
97  
    printFileInfo(outFile);
98  
  }
99  
}

Author comment

Began life as a copy of #1028318

download  show line numbers  debug dex  old transpilations   

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

No comments. add comment

Snippet ID: #1028365
Snippet name: WordDocumentTextReplacer
Eternal ID of this version: #1028365/5
Text MD5: 4d0f037aa386ef25f9aca226b779ee28
Transpilation MD5: bed581516cdac5033b4aa6994150cacf
Author: stefan
Category: javax / io
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-06-13 17:21:53
Source code size: 3377 bytes / 99 lines
Pitched / IR pitched: No / No
Views / Downloads: 150 / 430
Version history: 4 change(s)
Referenced in: [show references]