Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

96
LINES

< > BotCompany Repo | #1009203 // fastu_fromUtf8 - faster version of fromUtf8

JavaX fragment (include)

static S fastu_fromUtf8(byte[] data) {
  ret fastu_fromUtf8(data, 0, data.length);
}

static S fastu_fromUtf8(byte[] data, int i, int n) {
  char[] chars = new char[n];
  int len = 0, offset = i;
  n += i;
  while (offset < n) {
    if (data[offset] >= 0) {
      // 0xxxxxxx - it is an ASCII char, so copy it exactly as it is
      chars[len] = (char) data[offset];
      len++;
      offset++;
    } else {
      int uc = 0;
      if ((data[offset] & 0xE0) == 0xC0) {
        uc = (int) (data[offset] & 0x1F);
        offset++;
        uc <<= 6;
        uc |= (int) (data[offset] & 0x3F);
        offset++;
      } else if ((data[offset] & 0xF0) == 0xE0) {
        uc = (int) (data[offset] & 0x0F);
        offset++;
        uc <<= 6;
        uc |= (int) (data[offset] & 0x3F);
        offset++;
        uc <<= 6;
        uc |= (int) (data[offset] & 0x3F);
        offset++;
      } else if ((data[offset] & 0xF8) == 0xF0) {
        uc = (int) (data[offset] & 0x07);
        offset++;
        uc <<= 6;
        uc |= (int) (data[offset] & 0x3F);
        offset++;
        uc <<= 6;
        uc |= (int) (data[offset] & 0x3F);
        offset++;
        uc <<= 6;
        uc |= (int) (data[offset] & 0x3F);
        offset++;
      } else if ((data[offset] & 0xFC) == 0xF8) {
        uc = (int) (data[offset] & 0x03);
        offset++;
        uc <<= 6;
        uc |= (int) (data[offset] & 0x3F);
        offset++;
        uc <<= 6;
        uc |= (int) (data[offset] & 0x3F);
        offset++;
        uc <<= 6;
        uc |= (int) (data[offset] & 0x3F);
        offset++;
        uc <<= 6;
        uc |= (int) (data[offset] & 0x3F);
        offset++;
      } else if ((data[offset] & 0xFE) == 0xFC) {
        uc = (int) (data[offset] & 0x01);
        offset++;
        uc <<= 6;
        uc |= (int) (data[offset] & 0x3F);
        offset++;
        uc <<= 6;
        uc |= (int) (data[offset] & 0x3F);
        offset++;
        uc <<= 6;
        uc |= (int) (data[offset] & 0x3F);
        offset++;
        uc <<= 6;
        uc |= (int) (data[offset] & 0x3F);
        offset++;
        uc <<= 6;
        uc |= (int) (data[offset] & 0x3F);
        offset++;
      }

      len = fastu_fromUtf8_toChars(uc, chars, len);
    }
  }
  ret new S(chars, 0, len);
}

static int fastu_fromUtf8_toChars(int codePoint, char[] dst, int index) {
  if (codePoint < 0 || codePoint > Character.MAX_CODE_POINT)
    throw new IllegalArgumentException;
  if (codePoint < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
    dst[index] = (char) codePoint;
    ret ++index;
  }
  int offset = codePoint - Character.MIN_SUPPLEMENTARY_CODE_POINT;
  dst[index + 1] = (char) ((offset & 0x3ff) + Character.MIN_LOW_SURROGATE);
  dst[index] = (char) ((offset >>> 10) + Character.MIN_HIGH_SURROGATE);
  ret index + 2;
}

download  show line numbers  debug dex  old transpilations   

Travelled to 13 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt

No comments. add comment

Snippet ID: #1009203
Snippet name: fastu_fromUtf8 - faster version of fromUtf8
Eternal ID of this version: #1009203/5
Text MD5: da9466d48c803c1fa9f3ab982ff26fae
Author: stefan
Category: javax
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2017-10-30 01:45:27
Source code size: 2876 bytes / 96 lines
Pitched / IR pitched: No / No
Views / Downloads: 399 / 438
Version history: 4 change(s)
Referenced in: [show references]