| // icu.h |
| |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| // Copyright 2005-2010 Google, Inc. |
| // Author: roubert@google.com (Fredrik Roubert) |
| |
| // Wrapper class for UErrorCode, with conversion operators for direct use in |
| // ICU C and C++ APIs. |
| // |
| // Features: |
| // - The constructor initializes the internal UErrorCode to U_ZERO_ERROR, |
| // removing one common source of errors. |
| // - Same use in C APIs taking a UErrorCode* (pointer) and C++ taking |
| // UErrorCode& (reference), via conversion operators. |
| // - Automatic checking for success when it goes out of scope. On failure, |
| // the destructor will FSTERROR() an error message. |
| // |
| // Most of ICU will handle errors gracefully and provide sensible fallbacks. |
| // Using IcuErrorCode, it is therefore possible to write very compact code |
| // that does sensible things on failure and provides logging for debugging. |
| // |
| // Example: |
| // |
| // IcuErrorCode icuerrorcode; |
| // return collator.compareUTF8(a, b, icuerrorcode) == UCOL_EQUAL; |
| |
| #ifndef FST_LIB_ICU_H_ |
| #define FST_LIB_ICU_H_ |
| |
| #include <unicode/errorcode.h> |
| #include <unicode/unistr.h> |
| #include <unicode/ustring.h> |
| #include <unicode/utf8.h> |
| |
| class IcuErrorCode : public icu::ErrorCode { |
| public: |
| IcuErrorCode() {} |
| virtual ~IcuErrorCode() { if (isFailure()) handleFailure(); } |
| |
| // Redefine 'errorName()' in order to be compatible with ICU version 4.2 |
| const char* errorName() const { |
| return u_errorName(errorCode); |
| } |
| |
| protected: |
| virtual void handleFailure() const { |
| FSTERROR() << errorName(); |
| } |
| |
| private: |
| DISALLOW_COPY_AND_ASSIGN(IcuErrorCode); |
| }; |
| |
| namespace fst { |
| |
| template <class Label> |
| bool UTF8StringToLabels(const string &str, vector<Label> *labels) { |
| const char *c_str = str.c_str(); |
| int32_t length = str.size(); |
| UChar32 c; |
| for (int32_t i = 0; i < length; /* no update */) { |
| U8_NEXT(c_str, i, length, c); |
| if (c < 0) { |
| LOG(ERROR) << "UTF8StringToLabels: Invalid character found: " << c; |
| return false; |
| } |
| labels->push_back(c); |
| } |
| return true; |
| } |
| |
| template <class Label> |
| bool LabelsToUTF8String(const vector<Label> &labels, string *str) { |
| icu::UnicodeString u_str; |
| char c_str[5]; |
| for (size_t i = 0; i < labels.size(); ++i) { |
| u_str.setTo(labels[i]); |
| IcuErrorCode error; |
| u_strToUTF8(c_str, 5, NULL, u_str.getTerminatedBuffer(), -1, error); |
| if (error.isFailure()) { |
| LOG(ERROR) << "LabelsToUTF8String: Bad encoding: " |
| << error.errorName(); |
| return false; |
| } |
| *str += c_str; |
| } |
| return true; |
| } |
| |
| } // namespace fst |
| |
| #endif // FST_LIB_ICU_H_ |