am b4e9e379: Merge "fix build problem, when #define ENABLE_ANDROID_LOG 1"
* commit 'b4e9e379e360464d3d6bde3084e5b286238a9329':
fix build problem, when #define ENABLE_ANDROID_LOG 1
diff --git a/android/Android.mk b/android/Android.mk
index 151a5cb..9284903 100644
--- a/android/Android.mk
+++ b/android/Android.mk
@@ -2,9 +2,7 @@
libsqlite3_android_local_src_files := \
PhoneNumberUtils.cpp \
- PhoneticStringUtils.cpp \
OldPhoneNumberUtils.cpp \
- PhonebookIndex.cpp \
sqlite3_android.cpp
libsqlite3_android_c_includes := \
@@ -26,24 +24,6 @@
include $(BUILD_HOST_STATIC_LIBRARY)
endif
-# Test for PhoneticStringUtils
-include $(CLEAR_VARS)
-
-LOCAL_MODULE:= libsqlite3_phonetic_string_utils_test
-
-LOCAL_CFLAGS += -Wall -Werror
-
-LOCAL_SRC_FILES := \
- PhoneticStringUtils.cpp \
- PhoneticStringUtilsTest.cpp
-
-LOCAL_MODULE_TAGS := optional
-
-LOCAL_SHARED_LIBRARIES := \
- libutils
-
-include $(BUILD_EXECUTABLE)
-
# Test for PhoneNumberUtils
#
# You can also test this in Unix, like this:
diff --git a/android/PhonebookIndex.cpp b/android/PhonebookIndex.cpp
deleted file mode 100644
index 5cc26e5..0000000
--- a/android/PhonebookIndex.cpp
+++ /dev/null
@@ -1,205 +0,0 @@
-/*
- * Copyright 2010, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <ctype.h>
-#include <string.h>
-
-#include <unicode/ucol.h>
-#include <unicode/uiter.h>
-#include <unicode/ustring.h>
-#include <unicode/utypes.h>
-
-#include "PhonebookIndex.h"
-#include "PhoneticStringUtils.h"
-
-#define MIN_OUTPUT_SIZE 6 // Minimum required size for the output buffer (in bytes)
-
-namespace android {
-
-// IMPORTANT! Keep the codes below SORTED. We are doing a binary search on the array
-static UChar DEFAULT_CHAR_MAP[] = {
- 0x00C6, 'A', // AE
- 0x00DF, 'S', // Etzett
- 0x1100, 0x3131, // HANGUL LETTER KIYEOK
- 0x1101, 0x3132, // HANGUL LETTER SSANGKIYEOK
- 0x1102, 0x3134, // HANGUL LETTER NIEUN
- 0x1103, 0x3137, // HANGUL LETTER TIKEUT
- 0x1104, 0x3138, // HANGUL LETTER SSANGTIKEUT
- 0x1105, 0x3139, // HANGUL LETTER RIEUL
- 0x1106, 0x3141, // HANGUL LETTER MIEUM
- 0x1107, 0x3142, // HANGUL LETTER PIEUP
- 0x1108, 0x3143, // HANGUL LETTER SSANGPIEUP
- 0x1109, 0x3145, // HANGUL LETTER SIOS
- 0x110A, 0x3146, // HANGUL LETTER SSANGSIOS
- 0x110B, 0x3147, // HANGUL LETTER IEUNG
- 0x110C, 0x3148, // HANGUL LETTER CIEUC
- 0x110D, 0x3149, // HANGUL LETTER SSANGCIEUC
- 0x110E, 0x314A, // HANGUL LETTER CHIEUCH
- 0x110F, 0x314B, // HANGUL LETTER KHIEUKH
- 0x1110, 0x314C, // HANGUL LETTER THIEUTH
- 0x1111, 0x314D, // HANGUL LETTER PHIEUPH
- 0x1112, 0x314E, // HANGUL LETTER HIEUH
- 0x111A, 0x3140, // HANGUL LETTER RIEUL-HIEUH
- 0x1121, 0x3144, // HANGUL LETTER PIEUP-SIOS
- 0x1161, 0x314F, // HANGUL LETTER A
- 0x1162, 0x3150, // HANGUL LETTER AE
- 0x1163, 0x3151, // HANGUL LETTER YA
- 0x1164, 0x3152, // HANGUL LETTER YAE
- 0x1165, 0x3153, // HANGUL LETTER EO
- 0x1166, 0x3154, // HANGUL LETTER E
- 0x1167, 0x3155, // HANGUL LETTER YEO
- 0x1168, 0x3156, // HANGUL LETTER YE
- 0x1169, 0x3157, // HANGUL LETTER O
- 0x116A, 0x3158, // HANGUL LETTER WA
- 0x116B, 0x3159, // HANGUL LETTER WAE
- 0x116C, 0x315A, // HANGUL LETTER OE
- 0x116D, 0x315B, // HANGUL LETTER YO
- 0x116E, 0x315C, // HANGUL LETTER U
- 0x116F, 0x315D, // HANGUL LETTER WEO
- 0x1170, 0x315E, // HANGUL LETTER WE
- 0x1171, 0x315F, // HANGUL LETTER WI
- 0x1172, 0x3160, // HANGUL LETTER YU
- 0x1173, 0x3161, // HANGUL LETTER EU
- 0x1174, 0x3162, // HANGUL LETTER YI
- 0x1175, 0x3163, // HANGUL LETTER I
- 0x11AA, 0x3133, // HANGUL LETTER KIYEOK-SIOS
- 0x11AC, 0x3135, // HANGUL LETTER NIEUN-CIEUC
- 0x11AD, 0x3136, // HANGUL LETTER NIEUN-HIEUH
- 0x11B0, 0x313A, // HANGUL LETTER RIEUL-KIYEOK
- 0x11B1, 0x313B, // HANGUL LETTER RIEUL-MIEUM
- 0x11B3, 0x313D, // HANGUL LETTER RIEUL-SIOS
- 0x11B4, 0x313E, // HANGUL LETTER RIEUL-THIEUTH
- 0x11B5, 0x313F, // HANGUL LETTER RIEUL-PHIEUPH
-};
-
-/**
- * Binary search to map an individual character to the corresponding phone book index.
- */
-static UChar map_character(UChar c, UChar * char_map, int32_t length) {
- int from = 0, to = length;
- while (from < to) {
- int m = ((to + from) >> 1) & ~0x1; // Only consider even positions
- UChar cm = char_map[m];
- if (cm == c) {
- return char_map[m + 1];
- } else if (cm < c) {
- from = m + 2;
- } else {
- to = m;
- }
- }
- return 0;
-}
-
-/**
- * Returns TRUE if the character belongs to a Hanzi unicode block
- */
-static bool is_CJK(UChar c) {
- return
- (0x4e00 <= c && c <= 0x9fff) // CJK_UNIFIED_IDEOGRAPHS
- || (0x3400 <= c && c <= 0x4dbf) // CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
- || (0x3000 <= c && c <= 0x303f) // CJK_SYMBOLS_AND_PUNCTUATION
- || (0x2e80 <= c && c <= 0x2eff) // CJK_RADICALS_SUPPLEMENT
- || (0x3300 <= c && c <= 0x33ff) // CJK_COMPATIBILITY
- || (0xfe30 <= c && c <= 0xfe4f) // CJK_COMPATIBILITY_FORMS
- || (0xf900 <= c && c <= 0xfaff); // CJK_COMPATIBILITY_IDEOGRAPHS
-}
-
-int32_t GetPhonebookIndex(UCharIterator * iter, const char * locale, UChar * out, int32_t size,
- UBool * isError)
-{
- if (size < MIN_OUTPUT_SIZE) {
- *isError = TRUE;
- return 0;
- }
-
- *isError = FALSE;
-
- // Normalize the first character to remove accents using the NFD normalization
- UErrorCode errorCode = U_ZERO_ERROR;
- int32_t len = unorm_next(iter, out, size, UNORM_NFD,
- 0 /* options */, TRUE /* normalize */, NULL, &errorCode);
- if (U_FAILURE(errorCode)) {
- *isError = TRUE;
- return 0;
- }
-
- if (len == 0) { // Empty input string
- return 0;
- }
-
- UChar c = out[0];
-
- if (!u_isalpha(c)) {
- // Digits go into a # section. Everything else goes into the empty section
- // The unicode function u_isdigit would also identify other characters as digits (arabic),
- // but if we caught them here we'd risk having the same section before and after alpha-letters
- // which might break the assumption that each section exists only once
- if (c >= '0' && c <= '9') {
- out[0] = '#';
- return 1;
- }
- return 0;
- }
-
- c = u_toupper(c);
-
- // Check for explicitly mapped characters
- UChar c_mapped = map_character(c, DEFAULT_CHAR_MAP, sizeof(DEFAULT_CHAR_MAP) / sizeof(UChar));
- if (c_mapped != 0) {
- out[0] = c_mapped;
- return 1;
- }
-
- // Convert Kanas to Hiragana
- UChar next = len > 2 ? out[1] : 0;
- c = android::GetNormalizedCodePoint(c, next, NULL);
-
- // Traditional grouping of Hiragana characters
- if (0x3041 <= c && c <= 0x309F) {
- if (c < 0x304B) c = 0x3042; // a
- else if (c < 0x3055) c = 0x304B; // ka
- else if (c < 0x305F) c = 0x3055; // sa
- else if (c < 0x306A) c = 0x305F; // ta
- else if (c < 0x306F) c = 0x306A; // na
- else if (c < 0x307E) c = 0x306F; // ha
- else if (c < 0x3083) c = 0x307E; // ma
- else if (c < 0x3089) c = 0x3084; // ya
- else if (c < 0x308E) c = 0x3089; // ra
- else if (c < 0x3094) c = 0x308F; // wa
- else return 0; // Others are not readable
- out[0] = c;
- return 1;
- } else if (0x30A0 <= c && c <= 0x30FF) {
- // Dot, onbiki, iteration marks are not readable
- return 0;
- }
-
- if (is_CJK(c)) {
- if (strncmp(locale, "ja", 2) == 0) {
- // Japanese word meaning "misc" or "other"
- out[0] = 0x4ED6;
- return 1;
- } else {
- return 0;
- }
- }
-
- out[0] = c;
- return 1;
-}
-
-} // namespace android
diff --git a/android/PhonebookIndex.h b/android/PhonebookIndex.h
deleted file mode 100644
index 5bf14f3..0000000
--- a/android/PhonebookIndex.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
-**
-** Copyright 2010, The Android Open Source Project
-**
-** Licensed under the Apache License, Version 2.0 (the "License");
-** you may not use this file except in compliance with the License.
-** You may obtain a copy of the License at
-**
-** http://www.apache.org/licenses/LICENSE-2.0
-**
-** Unless required by applicable law or agreed to in writing, software
-** distributed under the License is distributed on an "AS IS" BASIS,
-** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-** See the License for the specific language governing permissions and
-** limitations under the License.
-*/
-
-#ifndef _ANDROID_PHONEBOOK_INDEX_H
-#define _ANDROID_PHONEBOOK_INDEX_H
-
-#include <unicode/uiter.h>
-#include <unicode/utypes.h>
-
-namespace android {
-
-/**
- * A character converter that takes a UNICODE character and produces the
- * phone book index for it in the specified locale. For example, "a" becomes "A"
- * and so does A with accents. Conversion rules differ from locale
- * locale, which is why this function takes locale as an argument.
- *
- * @param iter iterator if input characters
- * @param locale the string representation of the current locale, e.g. "ja"
- * @param out output buffer
- * @param size size of the output buffer in bytes. The buffer should be large enough
- * to hold the longest phone book index (e.g. a three-char word in Japan).
- * @param isError will be set to TRUE if an error occurs
- *
- * @return number of characters returned
- */
-int32_t GetPhonebookIndex(UCharIterator * iter, const char * locale, UChar * out, int32_t size,
- UBool * isError);
-
-} // namespace android
-
-#endif
diff --git a/android/PhoneticStringUtils.cpp b/android/PhoneticStringUtils.cpp
deleted file mode 100644
index 796eaa2..0000000
--- a/android/PhoneticStringUtils.cpp
+++ /dev/null
@@ -1,319 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "PhoneticStringUtils.h"
-#include <utils/Unicode.h>
-
-// We'd like 0 length string last of sorted list. So when input string is NULL
-// or 0 length string, we use these instead.
-#define CODEPOINT_FOR_NULL_STR 0xFFFD
-#define STR_FOR_NULL_STR "\xEF\xBF\xBD"
-
-// We assume that users will not notice strings not sorted properly when the
-// first 128 characters are the same.
-#define MAX_CODEPOINTS 128
-
-namespace android {
-
-// Get hiragana from halfwidth katakana.
-static int GetHiraganaFromHalfwidthKatakana(char32_t codepoint,
- char32_t next_codepoint,
- bool *next_is_consumed) {
- if (codepoint < 0xFF66 || 0xFF9F < codepoint) {
- return codepoint;
- }
-
- switch (codepoint) {
- case 0xFF66: // wo
- return 0x3092;
- case 0xFF67: // xa
- return 0x3041;
- case 0xFF68: // xi
- return 0x3043;
- case 0xFF69: // xu
- return 0x3045;
- case 0xFF6A: // xe
- return 0x3047;
- case 0xFF6B: // xo
- return 0x3049;
- case 0xFF6C: // xya
- return 0x3083;
- case 0xFF6D: // xyu
- return 0x3085;
- case 0xFF6E: // xyo
- return 0x3087;
- case 0xFF6F: // xtsu
- return 0x3063;
- case 0xFF70: // -
- return 0x30FC;
- case 0xFF9C: // wa
- return 0x308F;
- case 0xFF9D: // n
- return 0x3093;
- break;
- default: {
- if (0xFF71 <= codepoint && codepoint <= 0xFF75) {
- // a, i, u, e, o
- if (codepoint == 0xFF73 && next_codepoint == 0xFF9E) {
- if (next_is_consumed != NULL) {
- *next_is_consumed = true;
- }
- return 0x3094; // vu
- } else {
- return 0x3042 + (codepoint - 0xFF71) * 2;
- }
- } else if (0xFF76 <= codepoint && codepoint <= 0xFF81) {
- // ka - chi
- if (next_codepoint == 0xFF9E) {
- // "dakuten" (voiced mark)
- if (next_is_consumed != NULL) {
- *next_is_consumed = true;
- }
- return 0x304B + (codepoint - 0xFF76) * 2 + 1;
- } else {
- return 0x304B + (codepoint - 0xFF76) * 2;
- }
- } else if (0xFF82 <= codepoint && codepoint <= 0xFF84) {
- // tsu, te, to (skip xtsu)
- if (next_codepoint == 0xFF9E) {
- // "dakuten" (voiced mark)
- if (next_is_consumed != NULL) {
- *next_is_consumed = true;
- }
- return 0x3064 + (codepoint - 0xFF82) * 2 + 1;
- } else {
- return 0x3064 + (codepoint - 0xFF82) * 2;
- }
- } else if (0xFF85 <= codepoint && codepoint <= 0xFF89) {
- // na, ni, nu, ne, no
- return 0x306A + (codepoint - 0xFF85);
- } else if (0xFF8A <= codepoint && codepoint <= 0xFF8E) {
- // ha, hi, hu, he, ho
- if (next_codepoint == 0xFF9E) {
- // "dakuten" (voiced mark)
- if (next_is_consumed != NULL) {
- *next_is_consumed = true;
- }
- return 0x306F + (codepoint - 0xFF8A) * 3 + 1;
- } else if (next_codepoint == 0xFF9F) {
- // "han-dakuten" (half voiced mark)
- if (next_is_consumed != NULL) {
- *next_is_consumed = true;
- }
- return 0x306F + (codepoint - 0xFF8A) * 3 + 2;
- } else {
- return 0x306F + (codepoint - 0xFF8A) * 3;
- }
- } else if (0xFF8F <= codepoint && codepoint <= 0xFF93) {
- // ma, mi, mu, me, mo
- return 0x307E + (codepoint - 0xFF8F);
- } else if (0xFF94 <= codepoint && codepoint <= 0xFF96) {
- // ya, yu, yo
- return 0x3084 + (codepoint - 0xFF94) * 2;
- } else if (0xFF97 <= codepoint && codepoint <= 0xFF9B) {
- // ra, ri, ru, re, ro
- return 0x3089 + (codepoint - 0xFF97);
- }
- // Note: 0xFF9C, 0xFF9D are handled above
- } // end of default
- }
-
- return codepoint;
-}
-
-// Assuming input is hiragana, convert the hiragana to "normalized" hiragana.
-static int GetNormalizedHiragana(int codepoint) {
- if (codepoint < 0x3040 || 0x309F < codepoint) {
- return codepoint;
- }
-
- // TODO: should care (semi-)voiced mark (0x3099, 0x309A).
-
- // Trivial kana conversions.
- // e.g. xa => a
- switch (codepoint) {
- case 0x3041:
- case 0x3043:
- case 0x3045:
- case 0x3047:
- case 0x3049:
- case 0x3063:
- case 0x3083:
- case 0x3085:
- case 0x3087:
- case 0x308E: // xwa
- return codepoint + 1;
- case 0x3095: // xka
- return 0x304B;
- case 0x3096: // xke
- return 0x3051;
- case 0x31F0: // xku
- return 0x304F;
- case 0x31F1: // xsi
- return 0x3057;
- case 0x31F2: // xsu
- return 0x3059;
- case 0x31F3: // xto
- return 0x3068;
- case 0x31F4: // xnu
- return 0x306C;
- case 0x31F5: // xha
- return 0x306F;
- case 0x31F6: // xhi
- return 0x3072;
- case 0x31F7: // xhu
- return 0x3075;
- case 0x31F8: // xhe
- return 0x3078;
- case 0x31F9: // xho
- return 0x307B;
- case 0x31FA: // xmu
- return 0x3080;
- case 0x31FB: // xra
- case 0x31FC: // xri
- case 0x31FD: // xru
- case 0x31FE: // xre
- case 0x31FF: // xro
- // ra: 0x3089
- return 0x3089 + (codepoint - 0x31FB);
- default:
- return codepoint;
- }
-}
-
-static int GetNormalizedKana(char32_t codepoint,
- char32_t next_codepoint,
- bool *next_is_consumed) {
- // First, convert fullwidth katakana and halfwidth katakana to hiragana.
- if (0x30A1 <= codepoint && codepoint <= 0x30F6) {
- // Make fullwidth katakana same as hiragana.
- // 96 == 0x30A1 - 0x3041c
- codepoint = codepoint - 96;
- } else if (codepoint == 0x309F) {
- // Digraph YORI; Yo
- codepoint = 0x3088;
- } else if (codepoint == 0x30FF) {
- // Digraph KOTO; Ko
- codepoint = 0x3053;
- } else {
- codepoint = GetHiraganaFromHalfwidthKatakana(
- codepoint, next_codepoint, next_is_consumed);
- }
-
- // Normalize Hiragana.
- return GetNormalizedHiragana(codepoint);
-}
-
-int GetNormalizedCodePoint(char32_t codepoint,
- char32_t next_codepoint,
- bool *next_is_consumed) {
- if (next_is_consumed != NULL) {
- *next_is_consumed = false;
- }
-
- if (codepoint <= 0x0020 || codepoint == 0x3000) {
- // Whitespaces. Keep it as is.
- return codepoint;
- } else if ((0x0021 <= codepoint && codepoint <= 0x007E) ||
- (0xFF01 <= codepoint && codepoint <= 0xFF5E)) {
- // Ascii and fullwidth ascii. Keep it as is
- return codepoint;
- } else if (codepoint == 0x02DC || codepoint == 0x223C) {
- // tilde
- return 0xFF5E;
- } else if (codepoint <= 0x3040 ||
- (0x3100 <= codepoint && codepoint < 0xFF00) ||
- codepoint == CODEPOINT_FOR_NULL_STR) {
- // Keep it as is.
- return codepoint;
- }
-
- // Below is Kana-related handling.
-
- return GetNormalizedKana(codepoint, next_codepoint, next_is_consumed);
-}
-
-static bool GetExpectedString(
- const char *src, char **dst, size_t *dst_len,
- int (*get_codepoint_function)(char32_t, char32_t, bool*)) {
- if (dst == NULL || dst_len == NULL) {
- return false;
- }
-
- if (src == NULL || *src == '\0') {
- src = STR_FOR_NULL_STR;
- }
-
- char32_t codepoints[MAX_CODEPOINTS]; // if array size is changed the for loop needs to be changed
-
- ssize_t src_len = utf8_length(src);
- if (src_len <= 0) {
- return false;
- }
-
- bool next_is_consumed;
- size_t j = 0;
- for (size_t i = 0; i < (size_t)src_len && j < MAX_CODEPOINTS;) {
- int32_t ret = utf32_from_utf8_at(src, src_len, i, &i);
- if (ret < 0) {
- // failed to parse UTF-8
- return false;
- }
- ret = get_codepoint_function(
- static_cast<char32_t>(ret),
- i + 1 < (size_t)src_len ? src[i + 1] : 0,
- &next_is_consumed);
- if (ret > 0) {
- codepoints[j] = static_cast<char32_t>(ret);
- j++;
- }
- if (next_is_consumed) {
- i++;
- }
- }
- size_t length = j;
-
- if (length == 0) {
- // If all of codepoints are invalid, we place the string at the end of
- // the list.
- codepoints[0] = 0x10000 + CODEPOINT_FOR_NULL_STR;
- length = 1;
- }
-
- ssize_t new_len = utf32_to_utf8_length(codepoints, length);
- if (new_len < 0) {
- return false;
- }
-
- *dst = static_cast<char *>(malloc(new_len + 1));
- if (*dst == NULL) {
- return false;
- }
-
- utf32_to_utf8(codepoints, length, *dst);
-
- *dst_len = new_len;
- return true;
-}
-
-bool GetNormalizedString(const char *src, char **dst, size_t *len) {
- return GetExpectedString(src, dst, len, GetNormalizedCodePoint);
-}
-
-} // namespace android
diff --git a/android/PhoneticStringUtils.h b/android/PhoneticStringUtils.h
deleted file mode 100644
index a567a27..0000000
--- a/android/PhoneticStringUtils.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef _ANDROID_PHONETIC_STRING_UTILS_H
-#define _ANDROID_PHONETIC_STRING_UTILS_H
-
-#include <string.h> // For size_t.
-#include <utils/String8.h>
-
-namespace android {
-
-// Returns codepoint which is "normalized", whose definition depends on each
-// Locale. Note that currently this function normalizes only Japanese; the
-// other characters are remained as is.
-// The variable "next_is_consumed" is set to true if "next_codepoint"
-// is "consumed" (e.g. Japanese halfwidth katakana's voiced mark is consumed
-// when previous "codepoint" is appropriate, like half-width "ka").
-//
-// In Japanese, "normalized" means that half-width and full-width katakana is
-// appropriately converted to hiragana.
-int GetNormalizedCodePoint(char32_t codepoint,
- char32_t next_codepoint,
- bool *next_is_consumed);
-
-// Pushes Utf8 expression of "codepoint" to "dst". Returns true when successful.
-// If input is invalid or the length of the destination is not enough,
-// returns false.
-bool GetUtf8FromCodePoint(int codepoint, char *dst, size_t len, size_t *index);
-
-// Creates a "phonetically sortable" Utf8 string and push it into "dst".
-// *dst must be freed after being used outside.
-// If "src" is NULL or its length is 0, "dst" is set to \uFFFF.
-//
-// Note that currently this function considers only Japanese.
-bool GetPhoneticallySortableString(const char *src, char **dst, size_t *len);
-
-// Creates a "normalized" Utf8 string and push it into "dst". *dst must be
-// freed after being used outside.
-// If "src" is NULL or its length is 0, "dst" is set to \uFFFF.
-//
-// Note that currently this function considers only Japanese.
-bool GetNormalizedString(const char *src, char **dst, size_t *len);
-
-} // namespace android
-
-#endif
diff --git a/android/PhoneticStringUtilsTest.cpp b/android/PhoneticStringUtilsTest.cpp
deleted file mode 100644
index 9885823..0000000
--- a/android/PhoneticStringUtilsTest.cpp
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "PhoneticStringUtils.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <utils/String8.h>
-
-using namespace android;
-
-class TestExecutor {
- public:
- TestExecutor() : m_total_count(0), m_success_count(0), m_success(true) {}
- bool DoAllTests();
- private:
- void DoOneTest(void (TestExecutor::*test)());
-
- void testUtf32At();
- void testGetUtf8FromUtf32();
- void testGetNormalizedString();
- void testLongString();
-
- // Note: When adding a test, do not forget to add it to DoOneTest().
-
- int m_total_count;
- int m_success_count;
-
- bool m_success;
-};
-
-#define ASSERT_EQ_VALUE(input, expected) \
- ({ \
- if ((expected) != (input)) { \
- printf("0x%X(result) != 0x%X(expected)\n", input, expected); \
- m_success = false; \
- return; \
- } \
- })
-
-#define EXPECT_EQ_VALUE(input, expected) \
- ({ \
- if ((expected) != (input)) { \
- printf("0x%X(result) != 0x%X(expected)\n", input, expected); \
- m_success = false; \
- } \
- })
-
-
-bool TestExecutor::DoAllTests() {
- DoOneTest(&TestExecutor::testUtf32At);
- DoOneTest(&TestExecutor::testGetUtf8FromUtf32);
- DoOneTest(&TestExecutor::testGetNormalizedString);
- DoOneTest(&TestExecutor::testLongString);
-
- printf("Test total: %d\nSuccess: %d\nFailure: %d\n",
- m_total_count, m_success_count, m_total_count - m_success_count);
-
- bool success = m_total_count == m_success_count;
- printf("\n%s\n", success ? "Success" : "Failure");
-
- return success;
-}
-
-void TestExecutor::DoOneTest(void (TestExecutor::*test)()) {
- m_success = true;
-
- (this->*test)();
-
- ++m_total_count;
- m_success_count += m_success ? 1 : 0;
-}
-
-#define TEST_GET_UTF32AT(src, index, expected_next, expected_value) \
- ({ \
- size_t next; \
- int32_t ret = utf32_from_utf8_at(src, strlen(src), index, &next); \
- if (ret < 0) { \
- printf("getUtf32At() returned negative value (src: %s, index: %d)\n", \
- (src), (index)); \
- m_success = false; \
- } else if (next != (expected_next)) { \
- printf("next is unexpected value (src: %s, actual: %u, expected: %u)\n", \
- (src), next, (expected_next)); \
- } else { \
- EXPECT_EQ_VALUE(ret, (expected_value)); \
- } \
- })
-
-void TestExecutor::testUtf32At() {
- printf("testUtf32At()\n");
-
- TEST_GET_UTF32AT("a", 0, 1, 97);
- // Japanese hiragana "a"
- TEST_GET_UTF32AT("\xE3\x81\x82", 0, 3, 0x3042);
- // Japanese fullwidth katakana "a" with ascii a
- TEST_GET_UTF32AT("a\xE3\x82\xA2", 1, 4, 0x30A2);
-
- // 2 PUA
- TEST_GET_UTF32AT("\xF3\xBE\x80\x80\xF3\xBE\x80\x88", 0, 4, 0xFE000);
- TEST_GET_UTF32AT("\xF3\xBE\x80\x80\xF3\xBE\x80\x88", 4, 8, 0xFE008);
-}
-
-
-#define EXPECT_EQ_CODEPOINT_UTF8(codepoint, expected) \
- ({ \
- char32_t codepoints[1] = {codepoint}; \
- status_t ret = string8.setTo(codepoints, 1); \
- if (ret != NO_ERROR) { \
- printf("GetUtf8FromCodePoint() returned false at 0x%04X\n", codepoint); \
- m_success = false; \
- } else { \
- const char* string = string8.string(); \
- if (strcmp(string, expected) != 0) { \
- printf("Failed at codepoint 0x%04X\n", codepoint); \
- for (const char *ch = string; *ch != '\0'; ++ch) { \
- printf("0x%X ", *ch); \
- } \
- printf("!= "); \
- for (const char *ch = expected; *ch != '\0'; ++ch) { \
- printf("0x%X ", *ch); \
- } \
- printf("\n"); \
- m_success = false; \
- } \
- } \
- })
-
-void TestExecutor::testGetUtf8FromUtf32() {
- printf("testGetUtf8FromUtf32()\n");
- String8 string8;
-
- EXPECT_EQ_CODEPOINT_UTF8('a', "\x61");
- // Armenian capital letter AYB (2 bytes in UTF8)
- EXPECT_EQ_CODEPOINT_UTF8(0x0530, "\xD4\xB0");
- // Japanese 'a' (3 bytes in UTF8)
- EXPECT_EQ_CODEPOINT_UTF8(0x3042, "\xE3\x81\x82");
- // Kanji
- EXPECT_EQ_CODEPOINT_UTF8(0x65E5, "\xE6\x97\xA5");
- // PUA (4 byets in UTF8)
- EXPECT_EQ_CODEPOINT_UTF8(0xFE016, "\xF3\xBE\x80\x96");
- EXPECT_EQ_CODEPOINT_UTF8(0xFE972, "\xF3\xBE\xA5\xB2");
-}
-
-#define EXPECT_EQ_UTF8_UTF8(src, expected) \
- ({ \
- if (!GetNormalizedString(src, &dst, &len)) { \
- printf("GetNormalizedSortableString() returned false.\n"); \
- m_success = false; \
- } else { \
- if (strcmp(dst, expected) != 0) { \
- for (const char *ch = dst; *ch != '\0'; ++ch) { \
- printf("0x%X ", *ch); \
- } \
- printf("!= "); \
- for (const char *ch = expected; *ch != '\0'; ++ch) { \
- printf("0x%X ", *ch); \
- } \
- printf("\n"); \
- m_success = false; \
- } \
- free(dst); \
- } \
- })
-
-void TestExecutor::testGetNormalizedString() {
- printf("testGetNormalizedString()\n");
- char *dst;
- size_t len;
-
- // halfwidth alphabets/symbols -> keep it as is.
- EXPECT_EQ_UTF8_UTF8("ABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%^&'()",
- "ABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%^&'()");
- EXPECT_EQ_UTF8_UTF8("abcdefghijklmnopqrstuvwxyz[]{}\\@/",
- "abcdefghijklmnopqrstuvwxyz[]{}\\@/");
-
- // halfwidth/fullwidth-katakana -> hiragana
- EXPECT_EQ_UTF8_UTF8(
- "\xE3\x81\x82\xE3\x82\xA4\xE3\x81\x86\xEF\xBD\xB4\xE3\x82\xAA",
- "\xE3\x81\x82\xE3\x81\x84\xE3\x81\x86\xE3\x81\x88\xE3\x81\x8A");
-
- // whitespace -> keep it as is.
- EXPECT_EQ_UTF8_UTF8(" \t", " \t");
-}
-
-void TestExecutor::testLongString() {
- printf("testLongString()\n");
- char * dst;
- size_t len;
- EXPECT_EQ_UTF8_UTF8("Qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqtttttttttttttttttttttttttttttttttttttttttttttttttgggggggggggggggggggggggggggggggggggggggbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb",
- "Qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqtttttttttttttttttttttttttttttttttttttttttttttttttggggggggggggggggggggggggggggggggggg");
-}
-
-
-int main() {
- TestExecutor executor;
- if(executor.DoAllTests()) {
- return 0;
- } else {
- return 1;
- }
-}
diff --git a/android/sqlite3_android.cpp b/android/sqlite3_android.cpp
index 1d51b60..b836952 100644
--- a/android/sqlite3_android.cpp
+++ b/android/sqlite3_android.cpp
@@ -29,7 +29,6 @@
#include "sqlite3_android.h"
#include "PhoneNumberUtils.h"
-#include "PhonebookIndex.h"
#define ENABLE_ANDROID_LOG 0
#define SMALL_BUFFER_SIZE 10
@@ -74,53 +73,6 @@
}
}
-/**
- * Obtains the first UNICODE letter from the supplied string, normalizes and returns it.
- */
-static void get_phonebook_index(
- sqlite3_context * context, int argc, sqlite3_value ** argv)
-{
- if (argc != 2) {
- sqlite3_result_null(context);
- return;
- }
-
- char const * src = (char const *)sqlite3_value_text(argv[0]);
- char const * locale = (char const *)sqlite3_value_text(argv[1]);
- if (src == NULL || src[0] == 0 || locale == NULL) {
- sqlite3_result_null(context);
- return;
- }
-
- UCharIterator iter;
- uiter_setUTF8(&iter, src, -1);
-
- UBool isError = FALSE;
- UChar index[SMALL_BUFFER_SIZE];
- uint32_t len = android::GetPhonebookIndex(&iter, locale, index, sizeof(index), &isError);
- if (isError) {
- sqlite3_result_null(context);
- return;
- }
-
- uint32_t outlen = 0;
- uint8_t out[SMALL_BUFFER_SIZE];
- for (uint32_t i = 0; i < len; i++) {
- U8_APPEND(out, outlen, sizeof(out), index[i], isError);
- if (isError) {
- sqlite3_result_null(context);
- return;
- }
- }
-
- if (outlen == 0) {
- sqlite3_result_null(context);
- return;
- }
-
- sqlite3_result_text(context, (const char*)out, outlen, SQLITE_TRANSIENT);
-}
-
static void phone_numbers_equal(sqlite3_context * context, int argc, sqlite3_value ** argv)
{
if (argc != 2 && argc != 3) {
@@ -509,14 +461,8 @@
//// PHONEBOOK_COLLATOR
- // The collator may be removed in the near future. Do not depend on it.
- // TODO: it might be better to have another function for registering phonebook collator.
status = U_ZERO_ERROR;
- if (strcmp(systemLocale, "ja") == 0 || strcmp(systemLocale, "ja_JP") == 0) {
- collator = ucol_open("ja@collation=phonebook", &status);
- } else {
- collator = ucol_open(systemLocale, &status);
- }
+ collator = ucol_open(systemLocale, &status);
if (U_FAILURE(status)) {
return -1;
}
@@ -605,16 +551,6 @@
}
#endif
- // Register the GET_PHONEBOOK_INDEX function
- err = sqlite3_create_function(handle,
- "GET_PHONEBOOK_INDEX",
- 2, SQLITE_UTF8, NULL,
- get_phonebook_index,
- NULL, NULL);
- if (err != SQLITE_OK) {
- return err;
- }
-
// Register the _PHONE_NUMBER_STRIPPED_REVERSED function, which imitates
// PhoneNumberUtils.getStrippedReversed. This function is not public API,
// it is only used for compatibility with Android 1.6 and earlier.