| /* |
| * Copyright (C) 2007 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include <objmng/drm_i18n.h> |
| |
| #define IS_GB2312_HIGH_BYTE(c) ((c) >= 0xA1 && (c) <= 0xF7) |
| #define IS_GB2312_LOW_BYTE(c) ((c) >= 0xA1 && (c) <= 0xFE) |
| #define IS_GBK_HIGH_BYTE(c) ((c) >= 0x81 && (c) <= 0xFE) |
| #define IS_GBK_LOW_BYTE(c) ((c) >= 0x40 && (c) <= 0xFE && (c) != 0x7F) |
| #define IS_BIG5_HIGH_BYTE(c) ((c) >= 0xA1 && (c) <= 0xF9) |
| #define IS_BIG5_LOW_BYTE(c) (((c) >= 0x40 && (c) <= 0x7E) \ |
| || ((c) >= 0xA1 && (c) <= 0xFE)) |
| #define IS_ASCII(c) ((c) <= 127) |
| |
| #define INVALID_UNICODE 0xFFFD |
| |
| #define I18N_LATIN1_SUPPORT |
| #define I18N_UTF8_UTF16_SUPPORT |
| |
| |
| /** |
| * Simply convert ISO 8859-1 (latin1) to unicode |
| */ |
| static int32_t latin1ToWcs(const uint8_t *mbs, int32_t mbsLen, |
| uint16_t *wcsBuf, int32_t bufSizeInWideChar, |
| int32_t *bytesConsumed); |
| |
| /** |
| * Convert one unicode char to ISO 8859-1 (latin1) byte |
| */ |
| static int32_t wcToLatin1(uint16_t wc, uint8_t * mbs, int32_t bufSize); |
| |
| /** |
| * Convert UTF-8 to unicode |
| */ |
| static int32_t utf8ToWcs(const uint8_t *mbs, int32_t mbsLen, |
| uint16_t *wcsBuf, int32_t bufSizeInWideChar, |
| int32_t *bytesConsumed); |
| |
| /** |
| * Convert one unicode char to UTF-8 bytes |
| */ |
| static int32_t wcToUtf8(uint16_t wc, uint8_t * mbs, int32_t bufSize); |
| |
| /** |
| * Convert UTF-16 BE to unicode |
| */ |
| static int32_t utf16beToWcs(const uint8_t *mbs, int32_t mbsLen, |
| uint16_t *wcsBuf, int32_t bufSizeInWideChar, |
| int32_t *bytesConsumed); |
| |
| /** |
| * Convert one unicode char to UTF-16 BE bytes |
| */ |
| static int32_t wcToUtf16be(uint16_t wc, uint8_t * mbs, int32_t bufSize); |
| |
| /** |
| * Convert UTF-16 LE to unicode |
| */ |
| static int32_t utf16leToWcs(const uint8_t *mbs, int32_t mbsLen, |
| uint16_t *wcsBuf, int32_t bufSizeInWideChar, |
| int32_t *bytesConsumed); |
| |
| /** |
| * Convert one unicode char to UTF-16 LE bytes |
| */ |
| static int32_t wcToUtf16le(uint16_t wc, uint8_t * mbs, int32_t bufSize); |
| |
| /* |
| * see drm_i18n.h |
| */ |
| int32_t DRM_i18n_mbsToWcs(DRM_Charset_t charset, |
| const uint8_t *mbs, int32_t mbsLen, |
| uint16_t *wcsBuf, int32_t bufSizeInWideChar, |
| int32_t *bytesConsumed) |
| { |
| switch (charset) |
| { |
| #ifdef I18N_GB2312_SUPPORT |
| case DRM_CHARSET_GB2312: |
| return gb2312ToWcs(mbs, mbsLen, wcsBuf, bufSizeInWideChar, bytesConsumed); |
| #endif |
| #ifdef I18N_GBK_SUPPORT |
| case DRM_CHARSET_GBK: |
| return gbkToWcs(mbs, mbsLen, wcsBuf, bufSizeInWideChar, bytesConsumed); |
| #endif |
| #ifdef I18N_BIG5_SUPPORT |
| case DRM_CHARSET_BIG5: |
| return big5ToWcs(mbs, mbsLen, wcsBuf, bufSizeInWideChar, bytesConsumed); |
| #endif |
| #ifdef I18N_LATIN1_SUPPORT |
| case DRM_CHARSET_LATIN1: |
| return latin1ToWcs(mbs, mbsLen, wcsBuf, bufSizeInWideChar, bytesConsumed); |
| #endif |
| #ifdef I18N_ISO8859X_SUPPORT |
| case DRM_CHARSET_LATIN2: |
| case DRM_CHARSET_LATIN3: |
| case DRM_CHARSET_LATIN4: |
| case DRM_CHARSET_CYRILLIC: |
| case DRM_CHARSET_ARABIC: |
| case DRM_CHARSET_GREEK: |
| case DRM_CHARSET_HEBREW: |
| case DRM_CHARSET_LATIN5: |
| case DRM_CHARSET_LATIN6: |
| case DRM_CHARSET_THAI: |
| case DRM_CHARSET_LATIN7: |
| case DRM_CHARSET_LATIN8: |
| case DRM_CHARSET_LATIN9: |
| case DRM_CHARSET_LATIN10: |
| return iso8859xToWcs(charset, mbs, mbsLen, wcsBuf, bufSizeInWideChar, bytesConsumed); |
| #endif |
| #ifdef I18N_UTF8_UTF16_SUPPORT |
| case DRM_CHARSET_UTF8: |
| return utf8ToWcs(mbs, mbsLen, wcsBuf, bufSizeInWideChar, bytesConsumed); |
| case DRM_CHARSET_UTF16BE: |
| return utf16beToWcs(mbs, mbsLen, wcsBuf, bufSizeInWideChar, bytesConsumed); |
| case DRM_CHARSET_UTF16LE: |
| return utf16leToWcs(mbs, mbsLen, wcsBuf, bufSizeInWideChar, bytesConsumed); |
| #endif |
| default: |
| return -1; |
| } |
| } |
| |
| /* |
| * see drm_i18n.h |
| */ |
| int32_t DRM_i18n_wcsToMbs(DRM_Charset_t charset, |
| const uint16_t *wcs, int32_t wcsLen, |
| uint8_t *mbsBuf, int32_t bufSizeInByte) |
| { |
| int32_t (* wcToMbFunc)(uint16_t, uint8_t *, int32_t); |
| int32_t charIndex = 0; |
| int32_t numMultiBytes = 0; |
| |
| switch (charset) |
| { |
| #ifdef I18N_LATIN1_SUPPORT |
| case DRM_CHARSET_LATIN1: |
| wcToMbFunc = wcToLatin1; |
| break; |
| #endif |
| #ifdef I18N_UTF8_UTF16_SUPPORT |
| case DRM_CHARSET_UTF8: |
| wcToMbFunc = wcToUtf8; |
| break; |
| case DRM_CHARSET_UTF16BE: |
| wcToMbFunc = wcToUtf16be; |
| break; |
| case DRM_CHARSET_UTF16LE: |
| wcToMbFunc = wcToUtf16le; |
| break; |
| #endif |
| #ifdef I18N_ISO8859X_SUPPORT |
| case DRM_CHARSET_LATIN2: |
| case DRM_CHARSET_LATIN3: |
| case DRM_CHARSET_LATIN4: |
| case DRM_CHARSET_CYRILLIC: |
| case DRM_CHARSET_ARABIC: |
| case DRM_CHARSET_GREEK: |
| case DRM_CHARSET_HEBREW: |
| case DRM_CHARSET_LATIN5: |
| case DRM_CHARSET_LATIN6: |
| case DRM_CHARSET_THAI: |
| case DRM_CHARSET_LATIN7: |
| case DRM_CHARSET_LATIN8: |
| case DRM_CHARSET_LATIN9: |
| case DRM_CHARSET_LATIN10: |
| return wcsToIso8859x(charset, wcs, wcsLen, mbsBuf, bufSizeInByte); |
| #endif |
| default: |
| return -1; |
| } |
| |
| if (mbsBuf) { |
| while (numMultiBytes < bufSizeInByte && charIndex < wcsLen) { |
| /* TODO: handle surrogate pair values here */ |
| int32_t mbLen = wcToMbFunc(wcs[charIndex], |
| &mbsBuf[numMultiBytes], bufSizeInByte - numMultiBytes); |
| |
| if (numMultiBytes + mbLen > bufSizeInByte) { |
| /* Insufficient buffer. Don't update numMultiBytes */ |
| break; |
| } |
| charIndex++; |
| numMultiBytes += mbLen; |
| } |
| } else { |
| while (charIndex < wcsLen) { |
| /* TODO: handle surrogate pair values here */ |
| numMultiBytes += wcToMbFunc(wcs[charIndex], NULL, 0); |
| charIndex++; |
| } |
| } |
| |
| return numMultiBytes; |
| } |
| |
| |
| #ifdef I18N_LATIN1_SUPPORT |
| |
| int32_t latin1ToWcs(const uint8_t *mbs, int32_t mbsLen, |
| uint16_t *wcsBuf, int32_t bufSizeInWideChar, |
| int32_t *bytesConsumed) |
| { |
| int32_t charsToConvert; |
| int32_t len; |
| |
| if (wcsBuf == NULL) { |
| return mbsLen; |
| } |
| |
| len = charsToConvert = mbsLen > bufSizeInWideChar ? bufSizeInWideChar : mbsLen; |
| if (len < 0) |
| return 0; |
| while (len--) { |
| *wcsBuf++ = *mbs++; |
| } |
| |
| if (bytesConsumed) |
| *bytesConsumed = charsToConvert; |
| |
| return charsToConvert; |
| } |
| |
| int32_t wcToLatin1(uint16_t wc, uint8_t * mbs, int32_t bufSize) |
| { |
| uint8_t ch; |
| |
| if (wc < 0x100) { |
| ch = (uint8_t)(wc & 0xff); |
| } else { |
| ch = '?'; |
| } |
| if (mbs && bufSize > 0) |
| *mbs = ch; |
| return 1; |
| } |
| |
| #endif /* I18N_LATIN1_SUPPORT */ |
| |
| #ifdef I18N_UTF8_UTF16_SUPPORT |
| |
| int32_t utf8ToWcs(const uint8_t *mbs, int32_t mbsLen, |
| uint16_t *wcsBuf, int32_t bufSizeInWideChar, |
| int32_t *bytesConsumed) |
| { |
| int32_t charsConverted = 0; |
| int32_t i = 0; |
| int32_t wideChar; |
| |
| if (wcsBuf == NULL) { |
| /* No conversion but we're still going to calculate bytesConsumed */ |
| bufSizeInWideChar = mbsLen * 2; |
| } |
| |
| while((i < mbsLen) && (charsConverted < bufSizeInWideChar)) { |
| uint8_t ch = mbs[i]; |
| uint8_t ch2, ch3, ch4; |
| |
| wideChar = -1; |
| |
| if(IS_ASCII(ch)) { |
| wideChar = ch; |
| i++; |
| } else if ((ch & 0xc0) == 0xc0) { |
| int utfStart = i; |
| if ((ch & 0xe0) == 0xc0) { |
| /* 2 byte sequence */ |
| if (i + 1 < mbsLen && ((ch2 = mbs[i + 1]) & 0xc0) == 0x80) { |
| wideChar = (uint16_t)(((ch & 0x1F) << 6) | (ch2 & 0x3F)); |
| i += 2; |
| } else { |
| /* skip incomplete sequence */ |
| i++; |
| } |
| } else if ((ch & 0xf0) == 0xe0) { |
| /* 3 byte sequence */ |
| if (i + 2 < mbsLen |
| && ((ch2 = mbs[i + 1]) & 0xc0) == 0x80 |
| && ((ch3 = mbs[i + 2]) & 0xc0) == 0x80) { |
| wideChar = (uint16_t)(((ch & 0x0F) << 12) | ((ch2 & 0x3F) << 6) | (ch3 & 0x3F)); |
| i += 3; |
| } else { |
| /* skip incomplete sequence (up to 2 bytes) */ |
| i++; |
| if (i < mbsLen && (mbs[i] & 0xc0) == 0x80) |
| i++; |
| } |
| } else if ((ch & 0xf8) == 0xf0) { |
| /* 4 byte sequence */ |
| if (i + 3 < mbsLen |
| && ((ch2 = mbs[i + 1]) & 0xc0) == 0x80 |
| && ((ch3 = mbs[i + 2]) & 0xc0) == 0x80 |
| && ((ch4 = mbs[i + 3]) & 0xc0) == 0x80) { |
| /* FIXME: we do NOT support U+10000 - U+10FFFF for now. |
| * leave it as 0xFFFD. */ |
| wideChar = INVALID_UNICODE; |
| i += 4; |
| } else { |
| /* skip incomplete sequence (up to 3 bytes) */ |
| i++; |
| if (i < mbsLen && (mbs[i] & 0xc0) == 0x80) { |
| i++; |
| if (i < mbsLen && (mbs[i] & 0xc0) == 0x80) { |
| i++; |
| } |
| } |
| } |
| } else { |
| /* invalid */ |
| i++; |
| } |
| if (i >= mbsLen && wideChar == -1) { |
| /* Possible incomplete UTF-8 sequence at the end of mbs. |
| * Leave it to the caller. |
| */ |
| i = utfStart; |
| break; |
| } |
| } else { |
| /* invalid */ |
| i++; |
| } |
| if(wcsBuf) { |
| if (wideChar == -1) |
| wideChar = INVALID_UNICODE; |
| wcsBuf[charsConverted] = (uint16_t)wideChar; |
| } |
| charsConverted++; |
| } |
| |
| if (bytesConsumed) |
| *bytesConsumed = i; |
| |
| return charsConverted; |
| } |
| |
| int32_t wcToUtf8(uint16_t wc, uint8_t * mbs, int32_t bufSize) |
| { |
| if (wc <= 0x7f) { |
| if (mbs && (bufSize >= 1)) { |
| *mbs = (uint8_t)wc; |
| } |
| return 1; |
| } else if (wc <= 0x7ff) { |
| if (mbs && (bufSize >= 2)) { |
| *mbs++ = (uint8_t)((wc >> 6) | 0xc0); |
| *mbs = (uint8_t)((wc & 0x3f) | 0x80); |
| } |
| return 2; |
| } else { |
| if (mbs && (bufSize >= 3)) { |
| *mbs++ = (uint8_t)((wc >> 12) | 0xe0); |
| *mbs++ = (uint8_t)(((wc >> 6) & 0x3f)| 0x80); |
| *mbs = (uint8_t)((wc & 0x3f) | 0x80); |
| } |
| return 3; |
| } |
| } |
| |
| int32_t utf16beToWcs(const uint8_t *mbs, int32_t mbsLen, |
| uint16_t *wcsBuf, int32_t bufSizeInWideChar, |
| int32_t *bytesConsumed) |
| { |
| int32_t charsToConvert; |
| int32_t len; |
| |
| if (wcsBuf == NULL) { |
| return mbsLen / 2; |
| } |
| |
| len = charsToConvert = (mbsLen / 2) > bufSizeInWideChar ? bufSizeInWideChar : (mbsLen / 2); |
| while (len--) { |
| /* TODO: handle surrogate pair values */ |
| *wcsBuf++ = (uint16_t)((*mbs << 8) | *(mbs + 1)); |
| mbs += 2; |
| } |
| |
| if (bytesConsumed) |
| *bytesConsumed = charsToConvert * 2; |
| |
| return charsToConvert; |
| } |
| |
| int32_t wcToUtf16be(uint16_t wc, uint8_t * mbs, int32_t bufSize) |
| { |
| if (mbs && bufSize >= 2) { |
| /* TODO: handle surrogate pair values */ |
| *mbs = (uint8_t)(wc >> 8); |
| *(mbs + 1) = (uint8_t)(wc & 0xff); |
| } |
| return 2; |
| } |
| |
| int32_t utf16leToWcs(const uint8_t *mbs, int32_t mbsLen, |
| uint16_t *wcsBuf, int32_t bufSizeInWideChar, |
| int32_t *bytesConsumed) |
| { |
| int32_t charsToConvert; |
| int32_t len; |
| |
| if (wcsBuf == NULL) { |
| return mbsLen / 2; |
| } |
| |
| len = charsToConvert = (mbsLen / 2) > bufSizeInWideChar ? bufSizeInWideChar : (mbsLen / 2); |
| while (len--) { |
| /* TODO: handle surrogate pair values */ |
| *wcsBuf++ = (uint16_t)(*mbs | (*(mbs + 1) << 8)); |
| mbs += 2; |
| } |
| |
| if (bytesConsumed) |
| *bytesConsumed = charsToConvert * 2; |
| |
| return charsToConvert; |
| } |
| |
| int32_t wcToUtf16le(uint16_t wc, uint8_t * mbs, int32_t bufSize) |
| { |
| if (mbs && bufSize >= 2) { |
| /* TODO: handle surrogate pair values */ |
| *mbs = (uint8_t)(wc & 0xff); |
| *(mbs + 1) = (uint8_t)(wc >> 8); |
| } |
| return 2; |
| } |
| |
| #endif /* I18N_UTF8_UTF16_SUPPORT */ |
| |