Libraryless. Click here for Pure Java version (241L/3K).
1 | static String htmldecode(final String input) {
|
2 | if (input == null) ret null; |
3 | |
4 | final int MIN_ESCAPE = 2; |
5 | final int MAX_ESCAPE = 6; |
6 | |
7 | StringWriter writer = null; |
8 | int len = input.length(); |
9 | int i = 1; |
10 | int st = 0; |
11 | while (true) {
|
12 | // look for '&' |
13 | while (i < len && input.charAt(i-1) != '&') |
14 | i++; |
15 | if (i >= len) |
16 | break; |
17 | |
18 | // found '&', look for ';' |
19 | int j = i; |
20 | while (j < len && j < i + MAX_ESCAPE + 1 && input.charAt(j) != ';') |
21 | j++; |
22 | if (j == len || j < i + MIN_ESCAPE || j == i + MAX_ESCAPE + 1) {
|
23 | i++; |
24 | continue; |
25 | } |
26 | |
27 | // found escape |
28 | if (input.charAt(i) == '#') {
|
29 | // numeric escape |
30 | int k = i + 1; |
31 | int radix = 10; |
32 | |
33 | final char firstChar = input.charAt(k); |
34 | if (firstChar == 'x' || firstChar == 'X') {
|
35 | k++; |
36 | radix = 16; |
37 | } |
38 | |
39 | try {
|
40 | int entityValue = Integer.parseInt(input.substring(k, j), radix); |
41 | |
42 | if (writer == null) |
43 | writer = new StringWriter(input.length()); |
44 | writer.append(input.substring(st, i - 1)); |
45 | |
46 | if (entityValue > 0xFFFF) {
|
47 | final char[] chrs = Character.toChars(entityValue); |
48 | writer.write(chrs[0]); |
49 | writer.write(chrs[1]); |
50 | } else {
|
51 | writer.write(entityValue); |
52 | } |
53 | |
54 | } catch (NumberFormatException ex) {
|
55 | i++; |
56 | continue; |
57 | } |
58 | } |
59 | else {
|
60 | // named escape |
61 | CharSequence value = htmldecode_lookupMap().get(input.substring(i, j)); |
62 | if (value == null) {
|
63 | i++; |
64 | continue; |
65 | } |
66 | |
67 | if (writer == null) |
68 | writer = new StringWriter(input.length()); |
69 | writer.append(input.substring(st, i - 1)); |
70 | |
71 | writer.append(value); |
72 | } |
73 | |
74 | // skip escape |
75 | st = j + 1; |
76 | i = st; |
77 | } |
78 | |
79 | if (writer != null) {
|
80 | writer.append(input.substring(st, len)); |
81 | return writer.toString(); |
82 | } |
83 | return input; |
84 | } |
85 | |
86 | static simplyCached HashMap<String, CharSequence> htmldecode_lookupMap() {
|
87 | var map = new HashMap<String, CharSequence>(); |
88 | for (CharSequence[] seq : htmldecode_escapes()) |
89 | map.put(seq[1].toString(), seq[0]); |
90 | ret map; |
91 | } |
See http://unicode.e-workers.de/entities.php
download show line numbers debug dex old transpilations
Travelled to 14 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, onxytkatvevr, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
| Snippet ID: | #1000793 |
| Snippet name: | htmldecode |
| Eternal ID of this version: | #1000793/9 |
| Text MD5: | 1808854b076bfa92236ceff9f113f8ba |
| Transpilation MD5: | 33eeb12667a48ee36b7c26df93e58d19 |
| Author: | stefan |
| Category: | javax |
| Type: | JavaX fragment (include) |
| Public (visible to everyone): | Yes |
| Archived (hidden from active list): | No |
| Created/modified: | 2021-07-23 18:12:06 |
| Source code size: | 2453 bytes / 91 lines |
| Pitched / IR pitched: | No / No |
| Views / Downloads: | 1124 / 2334 |
| Version history: | 8 change(s) |
| Referenced in: | [show references] |