| /* |
| * Copyright (C) 2017 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #ifndef LIBTEXTCLASSIFIER_COMMON_LITTLE_ENDIAN_DATA_H_ |
| #define LIBTEXTCLASSIFIER_COMMON_LITTLE_ENDIAN_DATA_H_ |
| |
| #include <algorithm> |
| #include <string> |
| #include <vector> |
| |
| #include "base.h" |
| #include "util/base/logging.h" |
| |
| namespace libtextclassifier { |
| namespace nlp_core { |
| |
| // Swaps the sizeof(T) bytes that start at addr. E.g., if sizeof(T) == 2, |
| // then (addr[0], addr[1]) -> (addr[1], addr[0]). Useful for little endian |
| // <-> big endian conversions. |
| template <class T> |
| void SwapBytes(T *addr) { |
| char *char_ptr = reinterpret_cast<char *>(addr); |
| std::reverse(char_ptr, char_ptr + sizeof(T)); |
| } |
| |
| // Assuming addr points to a piece of data of type T, with its bytes in the |
| // little/big endian order specific to the machine this code runs on, this |
| // method will re-arrange the bytes (in place) in little-endian order. |
| template <class T> |
| void HostToLittleEndian(T *addr) { |
| if (LittleEndian::IsLittleEndian()) { |
| // Do nothing: current machine is little-endian. |
| } else { |
| SwapBytes(addr); |
| } |
| } |
| |
| // Reverse of HostToLittleEndian. |
| template <class T> |
| void LittleEndianToHost(T *addr) { |
| // It turns out it's the same function: on little-endian machines, do nothing |
| // (source and target formats are identical). Otherwise, swap bytes. |
| HostToLittleEndian(addr); |
| } |
| |
| // Returns string obtained by concatenating the bytes of the elements from a |
| // vector (in order: v[0], v[1], etc). If the type T requires more than one |
| // byte, the byte for each element are first converted to little-endian format. |
| template<typename T> |
| std::string GetDataBytesInLittleEndianOrder(const std::vector<T> &v) { |
| std::string data_bytes; |
| for (const T element : v) { |
| T little_endian_element = element; |
| HostToLittleEndian(&little_endian_element); |
| data_bytes.append( |
| reinterpret_cast<const char *>(&little_endian_element), |
| sizeof(T)); |
| } |
| return data_bytes; |
| } |
| |
| // Performs reverse of GetDataBytesInLittleEndianOrder. |
| // |
| // I.e., decodes the data bytes from parameter bytes into num_elements Ts, and |
| // places them in the vector v (previous content of that vector is erased). |
| // |
| // We expect bytes to contain the concatenation of the bytes for exactly |
| // num_elements elements of type T. If the type T requires more than one byte, |
| // those bytes should be arranged in little-endian form. |
| // |
| // Returns true on success and false otherwise (e.g., bytes has the wrong size). |
| // Note: we do not want to crash on corrupted data (some clients, e..g, GMSCore, |
| // have asked us not to do so). Instead, we report the error and let the client |
| // decide what to do. On error, we also fill the vector with zeros, such that |
| // at least the dimension of v matches expectations. |
| template<typename T> |
| bool FillVectorFromDataBytesInLittleEndian( |
| const std::string &bytes, int num_elements, std::vector<T> *v) { |
| if (bytes.size() != num_elements * sizeof(T)) { |
| TC_LOG(ERROR) << "Wrong number of bytes: actual " << bytes.size() |
| << " vs expected " << num_elements |
| << " elements of sizeof(element) = " << sizeof(T) |
| << " bytes each ; will fill vector with zeros"; |
| v->assign(num_elements, static_cast<T>(0)); |
| return false; |
| } |
| v->clear(); |
| v->reserve(num_elements); |
| const T *start = reinterpret_cast<const T *>(bytes.data()); |
| if (LittleEndian::IsLittleEndian() || (sizeof(T) == 1)) { |
| // Fast in the common case ([almost] all hardware today is little-endian): |
| // if same endianness (or type T requires a single byte and endianness |
| // irrelevant), just use the bytes. |
| v->assign(start, start + num_elements); |
| } else { |
| // Slower (but very rare case): this code runs on a big endian machine and |
| // the type T requires more than one byte. Hence, some conversion is |
| // necessary. |
| for (int i = 0; i < num_elements; ++i) { |
| T temp = start[i]; |
| SwapBytes(&temp); |
| v->push_back(temp); |
| } |
| } |
| return true; |
| } |
| |
| } // namespace nlp_core |
| } // namespace libtextclassifier |
| |
| #endif // LIBTEXTCLASSIFIER_COMMON_LITTLE_ENDIAN_DATA_H_ |