Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

91
LINES

< > BotCompany Repo | #1000793 // htmldecode

JavaX fragment (include) [tags: use-pretranspiled]

Libraryless. Click here for Pure Java version (241L/3K).

1  
static String htmldecode(final String input) {
2  
  if (input == null) ret null;
3  
  
4  
  final int MIN_ESCAPE = 2;
5  
  final int MAX_ESCAPE = 6;
6  
7  
  StringWriter writer = null;
8  
  int len = input.length();
9  
  int i = 1;
10  
  int st = 0;
11  
  while (true) {
12  
      // look for '&'
13  
      while (i < len && input.charAt(i-1) != '&')
14  
          i++;
15  
      if (i >= len)
16  
          break;
17  
18  
      // found '&', look for ';'
19  
      int j = i;
20  
      while (j < len && j < i + MAX_ESCAPE + 1 && input.charAt(j) != ';')
21  
          j++;
22  
      if (j == len || j < i + MIN_ESCAPE || j == i + MAX_ESCAPE + 1) {
23  
          i++;
24  
          continue;
25  
      }
26  
27  
      // found escape 
28  
      if (input.charAt(i) == '#') {
29  
          // numeric escape
30  
          int k = i + 1;
31  
          int radix = 10;
32  
33  
          final char firstChar = input.charAt(k);
34  
          if (firstChar == 'x' || firstChar == 'X') {
35  
              k++;
36  
              radix = 16;
37  
          }
38  
39  
          try {
40  
              int entityValue = Integer.parseInt(input.substring(k, j), radix);
41  
42  
              if (writer == null) 
43  
                  writer = new StringWriter(input.length());
44  
              writer.append(input.substring(st, i - 1));
45  
46  
              if (entityValue > 0xFFFF) {
47  
                  final char[] chrs = Character.toChars(entityValue);
48  
                  writer.write(chrs[0]);
49  
                  writer.write(chrs[1]);
50  
              } else {
51  
                  writer.write(entityValue);
52  
              }
53  
54  
          } catch (NumberFormatException ex) { 
55  
              i++;
56  
              continue;
57  
          }
58  
      }
59  
      else {
60  
          // named escape
61  
          CharSequence value = htmldecode_lookupMap().get(input.substring(i, j));
62  
          if (value == null) {
63  
              i++;
64  
              continue;
65  
          }
66  
67  
          if (writer == null) 
68  
              writer = new StringWriter(input.length());
69  
          writer.append(input.substring(st, i - 1));
70  
71  
          writer.append(value);
72  
      }
73  
74  
      // skip escape
75  
      st = j + 1;
76  
      i = st;
77  
  }
78  
79  
  if (writer != null) {
80  
      writer.append(input.substring(st, len));
81  
      return writer.toString();
82  
  }
83  
  return input;
84  
}
85  
86  
static simplyCached HashMap<String, CharSequence> htmldecode_lookupMap() {
87  
  var map = new HashMap<String, CharSequence>();
88  
  for (CharSequence[] seq : htmldecode_escapes()) 
89  
    map.put(seq[1].toString(), seq[0]);
90  
  ret map;
91  
}

Author comment

See http://unicode.e-workers.de/entities.php

download  show line numbers  debug dex  old transpilations   

Travelled to 14 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, onxytkatvevr, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt

No comments. add comment

Snippet ID: #1000793
Snippet name: htmldecode
Eternal ID of this version: #1000793/9
Text MD5: 1808854b076bfa92236ceff9f113f8ba
Transpilation MD5: 33eeb12667a48ee36b7c26df93e58d19
Author: stefan
Category: javax
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2021-07-23 18:12:06
Source code size: 2453 bytes / 91 lines
Pitched / IR pitched: No / No
Views / Downloads: 774 / 1916
Version history: 8 change(s)
Referenced in: [show references]