1 | static S fastu_fromUtf8(byte[] data) { |
2 | ret fastu_fromUtf8(data, 0, data.length); |
3 | } |
4 | |
5 | static S fastu_fromUtf8(byte[] data, int i, int n) { |
6 | char[] chars = new char[n]; |
7 | int len = 0, offset = i; |
8 | n += i; |
9 | while (offset < n) { |
10 | if (data[offset] >= 0) { |
11 | // 0xxxxxxx - it is an ASCII char, so copy it exactly as it is |
12 | chars[len] = (char) data[offset]; |
13 | len++; |
14 | offset++; |
15 | } else { |
16 | int uc = 0; |
17 | if ((data[offset] & 0xE0) == 0xC0) { |
18 | uc = (int) (data[offset] & 0x1F); |
19 | offset++; |
20 | uc <<= 6; |
21 | uc |= (int) (data[offset] & 0x3F); |
22 | offset++; |
23 | } else if ((data[offset] & 0xF0) == 0xE0) { |
24 | uc = (int) (data[offset] & 0x0F); |
25 | offset++; |
26 | uc <<= 6; |
27 | uc |= (int) (data[offset] & 0x3F); |
28 | offset++; |
29 | uc <<= 6; |
30 | uc |= (int) (data[offset] & 0x3F); |
31 | offset++; |
32 | } else if ((data[offset] & 0xF8) == 0xF0) { |
33 | uc = (int) (data[offset] & 0x07); |
34 | offset++; |
35 | uc <<= 6; |
36 | uc |= (int) (data[offset] & 0x3F); |
37 | offset++; |
38 | uc <<= 6; |
39 | uc |= (int) (data[offset] & 0x3F); |
40 | offset++; |
41 | uc <<= 6; |
42 | uc |= (int) (data[offset] & 0x3F); |
43 | offset++; |
44 | } else if ((data[offset] & 0xFC) == 0xF8) { |
45 | uc = (int) (data[offset] & 0x03); |
46 | offset++; |
47 | uc <<= 6; |
48 | uc |= (int) (data[offset] & 0x3F); |
49 | offset++; |
50 | uc <<= 6; |
51 | uc |= (int) (data[offset] & 0x3F); |
52 | offset++; |
53 | uc <<= 6; |
54 | uc |= (int) (data[offset] & 0x3F); |
55 | offset++; |
56 | uc <<= 6; |
57 | uc |= (int) (data[offset] & 0x3F); |
58 | offset++; |
59 | } else if ((data[offset] & 0xFE) == 0xFC) { |
60 | uc = (int) (data[offset] & 0x01); |
61 | offset++; |
62 | uc <<= 6; |
63 | uc |= (int) (data[offset] & 0x3F); |
64 | offset++; |
65 | uc <<= 6; |
66 | uc |= (int) (data[offset] & 0x3F); |
67 | offset++; |
68 | uc <<= 6; |
69 | uc |= (int) (data[offset] & 0x3F); |
70 | offset++; |
71 | uc <<= 6; |
72 | uc |= (int) (data[offset] & 0x3F); |
73 | offset++; |
74 | uc <<= 6; |
75 | uc |= (int) (data[offset] & 0x3F); |
76 | offset++; |
77 | } |
78 | |
79 | len = fastu_fromUtf8_toChars(uc, chars, len); |
80 | } |
81 | } |
82 | ret new S(chars, 0, len); |
83 | } |
84 | |
85 | static int fastu_fromUtf8_toChars(int codePoint, char[] dst, int index) { |
86 | if (codePoint < 0 || codePoint > Character.MAX_CODE_POINT) |
87 | throw new IllegalArgumentException; |
88 | if (codePoint < Character.MIN_SUPPLEMENTARY_CODE_POINT) { |
89 | dst[index] = (char) codePoint; |
90 | ret ++index; |
91 | } |
92 | int offset = codePoint - Character.MIN_SUPPLEMENTARY_CODE_POINT; |
93 | dst[index + 1] = (char) ((offset & 0x3ff) + Character.MIN_LOW_SURROGATE); |
94 | dst[index] = (char) ((offset >>> 10) + Character.MIN_HIGH_SURROGATE); |
95 | ret index + 2; |
96 | } |
download show line numbers debug dex old transpilations
Travelled to 13 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
Snippet ID: | #1009203 |
Snippet name: | fastu_fromUtf8 - faster version of fromUtf8 |
Eternal ID of this version: | #1009203/5 |
Text MD5: | da9466d48c803c1fa9f3ab982ff26fae |
Author: | stefan |
Category: | javax |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2017-10-30 01:45:27 |
Source code size: | 2876 bytes / 96 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 537 / 578 |
Version history: | 4 change(s) |
Referenced in: | [show references] |