tests/UnicodeUtils.cpp - platform/frameworks/minikin - Git at Google

 /*
  * Copyright (C) 2015 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #include <gtest/gtest.h>
 #include <unicode/utf.h>
 #include <cstdlib>

 // src is of the form "U+1F431 | 'h' 'i'". Position of "|" gets saved to offset if non-null.
 // Size is returned in an out parameter because gtest needs a void return for ASSERT to work.
 void ParseUnicode(uint16_t* buf, size_t buf_size, const char* src, size_t* result_size,
         size_t* offset) {
     size_t input_ix = 0;
     size_t output_ix = 0;
     bool seen_offset = false;

     while (src[input_ix] != 0) {
         switch (src[input_ix]) {
         case '\'':
             // single ASCII char
             ASSERT_LT(src[input_ix], 0x80);
             input_ix++;
             ASSERT_NE(src[input_ix], 0);
             ASSERT_LT(output_ix, buf_size);
             buf[output_ix++] = (uint16_t)src[input_ix++];
             ASSERT_EQ(src[input_ix], '\'');
             input_ix++;
             break;
         case 'u':
         case 'U': {
             // Unicode codepoint in hex syntax
             input_ix++;
             ASSERT_EQ(src[input_ix], '+');
             input_ix++;
             char* endptr = (char*)src + input_ix;
             unsigned long int codepoint = strtoul(src + input_ix, &endptr, 16);
             size_t num_hex_digits = endptr - (src + input_ix);
             ASSERT_GE(num_hex_digits, 4u);  // also triggers on invalid number syntax, digits = 0
             ASSERT_LE(num_hex_digits, 6u);
             ASSERT_LE(codepoint, 0x10FFFFu);
             input_ix += num_hex_digits;
             if (U16_LENGTH(codepoint) == 1) {
                 ASSERT_LE(output_ix + 1, buf_size);
                 buf[output_ix++] = codepoint;
             } else {
                 // UTF-16 encoding
                 ASSERT_LE(output_ix + 2, buf_size);
                 buf[output_ix++] = U16_LEAD(codepoint);
                 buf[output_ix++] = U16_TRAIL(codepoint);
             }
             break;
         }
         case ' ':
             input_ix++;
             break;
         case '|':
             ASSERT_FALSE(seen_offset);
             ASSERT_NE(offset, nullptr);
             *offset = output_ix;
             seen_offset = true;
             input_ix++;
             break;
         default:
             FAIL();  // unexpected character
         }
     }
     ASSERT_NE(result_size, nullptr);
     *result_size = output_ix;
     ASSERT_TRUE(seen_offset || offset == nullptr);
 }

 TEST(UnicodeUtils, parse) {
     const size_t BUF_SIZE = 256;
     uint16_t buf[BUF_SIZE];
     size_t offset;
     size_t size;
     ParseUnicode(buf, BUF_SIZE, "U+000D U+1F431 | 'a'", &size, &offset);
     EXPECT_EQ(size, 4u);
     EXPECT_EQ(offset, 3u);
     EXPECT_EQ(buf[0], 0x000D);
     EXPECT_EQ(buf[1], 0xD83D);
     EXPECT_EQ(buf[2], 0xDC31);
     EXPECT_EQ(buf[3], 'a');
 }
	/*
	* Copyright (C) 2015 The Android Open Source Project
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	#include <gtest/gtest.h>
	#include <unicode/utf.h>
	#include <cstdlib>

	// src is of the form "U+1F431 \| 'h' 'i'". Position of "\|" gets saved to offset if non-null.
	// Size is returned in an out parameter because gtest needs a void return for ASSERT to work.
	void ParseUnicode(uint16_t* buf, size_t buf_size, const char* src, size_t* result_size,
	size_t* offset) {
	size_t input_ix = 0;
	size_t output_ix = 0;
	bool seen_offset = false;

	while (src[input_ix] != 0) {
	switch (src[input_ix]) {
	case '\'':
	// single ASCII char
	ASSERT_LT(src[input_ix], 0x80);
	input_ix++;
	ASSERT_NE(src[input_ix], 0);
	ASSERT_LT(output_ix, buf_size);
	buf[output_ix++] = (uint16_t)src[input_ix++];
	ASSERT_EQ(src[input_ix], '\'');
	input_ix++;
	break;
	case 'u':
	case 'U': {
	// Unicode codepoint in hex syntax
	input_ix++;
	ASSERT_EQ(src[input_ix], '+');
	input_ix++;
	char* endptr = (char*)src + input_ix;
	unsigned long int codepoint = strtoul(src + input_ix, &endptr, 16);
	size_t num_hex_digits = endptr - (src + input_ix);
	ASSERT_GE(num_hex_digits, 4u); // also triggers on invalid number syntax, digits = 0
	ASSERT_LE(num_hex_digits, 6u);
	ASSERT_LE(codepoint, 0x10FFFFu);
	input_ix += num_hex_digits;
	if (U16_LENGTH(codepoint) == 1) {
	ASSERT_LE(output_ix + 1, buf_size);
	buf[output_ix++] = codepoint;
	} else {
	// UTF-16 encoding
	ASSERT_LE(output_ix + 2, buf_size);
	buf[output_ix++] = U16_LEAD(codepoint);
	buf[output_ix++] = U16_TRAIL(codepoint);
	}
	break;
	}
	case ' ':
	input_ix++;
	break;
	case '\|':
	ASSERT_FALSE(seen_offset);
	ASSERT_NE(offset, nullptr);
	*offset = output_ix;
	seen_offset = true;
	input_ix++;
	break;
	default:
	FAIL(); // unexpected character
	}
	}
	ASSERT_NE(result_size, nullptr);
	*result_size = output_ix;
	ASSERT_TRUE(seen_offset \|\| offset == nullptr);
	}

	TEST(UnicodeUtils, parse) {
	const size_t BUF_SIZE = 256;
	uint16_t buf[BUF_SIZE];
	size_t offset;
	size_t size;
	ParseUnicode(buf, BUF_SIZE, "U+000D U+1F431 \| 'a'", &size, &offset);
	EXPECT_EQ(size, 4u);
	EXPECT_EQ(offset, 3u);
	EXPECT_EQ(buf[0], 0x000D);
	EXPECT_EQ(buf[1], 0xD83D);
	EXPECT_EQ(buf[2], 0xDC31);
	EXPECT_EQ(buf[3], 'a');
	}