| /* |
| * Copyright (C) 2015 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include <cstdlib> |
| #include <string> |
| #include <vector> |
| |
| #include <cutils/log.h> |
| #include <unicode/utf.h> |
| #include <unicode/utf8.h> |
| |
| #include "minikin/U16StringPiece.h" |
| |
| namespace minikin { |
| |
| // src is of the form "U+1F431 | 'h' 'i'". Position of "|" gets saved to offset if non-null. |
| // Size is returned in an out parameter because gtest needs a void return for ASSERT to work. |
| void ParseUnicode(uint16_t* buf, size_t buf_size, const char* src, size_t* result_size, |
| size_t* offset) { |
| size_t input_ix = 0; |
| size_t output_ix = 0; |
| bool seen_offset = false; |
| |
| while (src[input_ix] != 0) { |
| switch (src[input_ix]) { |
| case '\'': |
| // single ASCII char |
| LOG_ALWAYS_FATAL_IF(static_cast<uint8_t>(src[input_ix]) >= 0x80); |
| input_ix++; |
| LOG_ALWAYS_FATAL_IF(src[input_ix] == 0); |
| LOG_ALWAYS_FATAL_IF(output_ix >= buf_size); |
| buf[output_ix++] = (uint16_t)src[input_ix++]; |
| LOG_ALWAYS_FATAL_IF(src[input_ix] != '\''); |
| input_ix++; |
| break; |
| case 'u': |
| case 'U': { |
| // Unicode codepoint in hex syntax |
| input_ix++; |
| LOG_ALWAYS_FATAL_IF(src[input_ix] != '+'); |
| input_ix++; |
| char* endptr = (char*)src + input_ix; |
| unsigned long int codepoint = strtoul(src + input_ix, &endptr, 16); |
| size_t num_hex_digits = endptr - (src + input_ix); |
| |
| // also triggers on invalid number syntax, digits = 0 |
| LOG_ALWAYS_FATAL_IF(num_hex_digits < 4u); |
| LOG_ALWAYS_FATAL_IF(num_hex_digits > 6u); |
| LOG_ALWAYS_FATAL_IF(codepoint > 0x10FFFFu); |
| input_ix += num_hex_digits; |
| if (U16_LENGTH(codepoint) == 1) { |
| LOG_ALWAYS_FATAL_IF(output_ix + 1 > buf_size); |
| buf[output_ix++] = codepoint; |
| } else { |
| // UTF-16 encoding |
| LOG_ALWAYS_FATAL_IF(output_ix + 2 > buf_size); |
| buf[output_ix++] = U16_LEAD(codepoint); |
| buf[output_ix++] = U16_TRAIL(codepoint); |
| } |
| break; |
| } |
| case ' ': |
| input_ix++; |
| break; |
| case '|': |
| LOG_ALWAYS_FATAL_IF(seen_offset); |
| LOG_ALWAYS_FATAL_IF(offset == nullptr); |
| *offset = output_ix; |
| seen_offset = true; |
| input_ix++; |
| break; |
| default: |
| LOG_ALWAYS_FATAL("Unexpected Character"); |
| } |
| } |
| LOG_ALWAYS_FATAL_IF(result_size == nullptr); |
| *result_size = output_ix; |
| LOG_ALWAYS_FATAL_IF(!seen_offset && offset != nullptr); |
| } |
| |
| std::vector<uint16_t> parseUnicodeStringWithOffset(const std::string& in, size_t* offset) { |
| std::unique_ptr<uint16_t[]> buffer(new uint16_t[in.size()]); |
| size_t result_size = 0; |
| ParseUnicode(buffer.get(), in.size(), in.c_str(), &result_size, offset); |
| return std::vector<uint16_t>(buffer.get(), buffer.get() + result_size); |
| } |
| |
| std::vector<uint16_t> parseUnicodeString(const std::string& in) { |
| return parseUnicodeStringWithOffset(in, nullptr); |
| } |
| |
| std::vector<uint16_t> utf8ToUtf16(const std::string& text) { |
| std::vector<uint16_t> result; |
| int32_t i = 0; |
| const int32_t textLength = static_cast<int32_t>(text.size()); |
| uint32_t c = 0; |
| while (i < textLength) { |
| U8_NEXT(text.c_str(), i, textLength, c); |
| if (U16_LENGTH(c) == 1) { |
| result.push_back(c); |
| } else { |
| result.push_back(U16_LEAD(c)); |
| result.push_back(U16_TRAIL(c)); |
| } |
| } |
| return result; |
| } |
| |
| std::string utf16ToUtf8(const U16StringPiece& u16String) { |
| const uint32_t textLength = u16String.size(); |
| uint32_t i = 0; |
| uint32_t c = 0; |
| |
| std::string out; |
| out.reserve(textLength * 4); |
| |
| while (i < textLength) { |
| U16_NEXT(u16String.data(), i, textLength, c); |
| |
| char buf[U8_MAX_LENGTH] = {}; |
| uint32_t outIndex = 0; |
| U8_APPEND_UNSAFE(buf, outIndex, c); |
| out.append(buf, outIndex); |
| } |
| return out; |
| } |
| |
| } // namespace minikin |