Libraryless. Click here for Pure Java version (241L/3K).
1 | static String htmldecode(final String input) { |
2 | if (input == null) ret null; |
3 | |
4 | final int MIN_ESCAPE = 2; |
5 | final int MAX_ESCAPE = 6; |
6 | |
7 | StringWriter writer = null; |
8 | int len = input.length(); |
9 | int i = 1; |
10 | int st = 0; |
11 | while (true) { |
12 | // look for '&' |
13 | while (i < len && input.charAt(i-1) != '&') |
14 | i++; |
15 | if (i >= len) |
16 | break; |
17 | |
18 | // found '&', look for ';' |
19 | int j = i; |
20 | while (j < len && j < i + MAX_ESCAPE + 1 && input.charAt(j) != ';') |
21 | j++; |
22 | if (j == len || j < i + MIN_ESCAPE || j == i + MAX_ESCAPE + 1) { |
23 | i++; |
24 | continue; |
25 | } |
26 | |
27 | // found escape |
28 | if (input.charAt(i) == '#') { |
29 | // numeric escape |
30 | int k = i + 1; |
31 | int radix = 10; |
32 | |
33 | final char firstChar = input.charAt(k); |
34 | if (firstChar == 'x' || firstChar == 'X') { |
35 | k++; |
36 | radix = 16; |
37 | } |
38 | |
39 | try { |
40 | int entityValue = Integer.parseInt(input.substring(k, j), radix); |
41 | |
42 | if (writer == null) |
43 | writer = new StringWriter(input.length()); |
44 | writer.append(input.substring(st, i - 1)); |
45 | |
46 | if (entityValue > 0xFFFF) { |
47 | final char[] chrs = Character.toChars(entityValue); |
48 | writer.write(chrs[0]); |
49 | writer.write(chrs[1]); |
50 | } else { |
51 | writer.write(entityValue); |
52 | } |
53 | |
54 | } catch (NumberFormatException ex) { |
55 | i++; |
56 | continue; |
57 | } |
58 | } |
59 | else { |
60 | // named escape |
61 | CharSequence value = htmldecode_lookupMap().get(input.substring(i, j)); |
62 | if (value == null) { |
63 | i++; |
64 | continue; |
65 | } |
66 | |
67 | if (writer == null) |
68 | writer = new StringWriter(input.length()); |
69 | writer.append(input.substring(st, i - 1)); |
70 | |
71 | writer.append(value); |
72 | } |
73 | |
74 | // skip escape |
75 | st = j + 1; |
76 | i = st; |
77 | } |
78 | |
79 | if (writer != null) { |
80 | writer.append(input.substring(st, len)); |
81 | return writer.toString(); |
82 | } |
83 | return input; |
84 | } |
85 | |
86 | static simplyCached HashMap<String, CharSequence> htmldecode_lookupMap() { |
87 | var map = new HashMap<String, CharSequence>(); |
88 | for (CharSequence[] seq : htmldecode_escapes()) |
89 | map.put(seq[1].toString(), seq[0]); |
90 | ret map; |
91 | } |
See http://unicode.e-workers.de/entities.php
download show line numbers debug dex old transpilations
Travelled to 14 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, onxytkatvevr, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
Snippet ID: | #1000793 |
Snippet name: | htmldecode |
Eternal ID of this version: | #1000793/9 |
Text MD5: | 1808854b076bfa92236ceff9f113f8ba |
Transpilation MD5: | 33eeb12667a48ee36b7c26df93e58d19 |
Author: | stefan |
Category: | javax |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2021-07-23 18:12:06 |
Source code size: | 2453 bytes / 91 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 829 / 1992 |
Version history: | 8 change(s) |
Referenced in: | [show references] |