native/utils/base/prefixvarint.h - platform/external/libtextclassifier - Git at Google

 /*
  * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 // PrefixVarint is an integer encoding method that has the exact same
 // compression size as Varint, but is faster to decode because all of the
 // length information is encoded in the first byte.
 // On a Warp 19 it can parse up to 42% faster than Varint, for the distributions
 // tested below.
 // On an Ilium it can parse up to 37% faster than Varint.
 //
 // But there are a few caveats:
 // - This is fastest if both the encoder and decoder are little endian.
 //   Somewhat slower versions are provided for encoding and decoding on big
 //   endian machines.
 // - This doesn't support backwards decoding.
 //
 // The PrefixVarint encoding uses a unary code in the high bits of the first
 // byte to encode the total number of bytes, as follows:
 // - 32bit encoding:
 //     1 byte:  "0" + 7 value bits
 //     2 bytes: "10" + 6 value bits
 //     3 bytes: "110" + 5 value bits
 //     4 bytes: "1110" + 4 value bits
 //     5 bytes: "1111" + no value bits (value is in the next 4 bytes)
 //
 // - 64bit encoding:
 //     1 byte:  "0" + 7 value bits
 //     2 bytes: "10" + 6 value bits
 //     3 bytes: "110" + 5 value bits
 //     4 bytes: "1110" + 4 value bits
 //     5 bytes: "11110" + 3 value bits
 //     6 bytes: "111110" + 2 value bits
 //     7 bytes: "1111110" + 1 value bits
 //     8 bytes: "11111110" + no value bits (value is in the next 7 bytes)
 //     9 bytes: "11111111" + no value bits (value is in the next 8 bytes)
 //
 // Note that 32bit and 64bit PrefixVarint encoding are same for values between
 // 0 and (1<<28)-1 (i.e., upto 4 byte-encodable value).
 //
 // The following are benchmark results (in cycles per operation, so lower is
 // better) on randomly generated sequences of values whose encodings have the
 // given distribution of byte lengths.  The cycle counts include some overhead
 // (1-2 cycles) for the testing loop operation.
 //
 // UNIFORM 2^14 means the values are randomly generated in the range [0-2^14),
 // so the majority will require 2 bytes to encode.  MIXED 60:20:10:6:4, on the
 // other hand, means 60% of the values encode to 1 byte, 20% to 2 bytes, and
 // so on.  The MIXED 15:71:13:1.2:0.1 distribution simulates a power law with
 // median value of 1024.
 //
 // VI is Varint, PVI is PrefixVarint.  In both cases, Parse32Inline was used.
 //
 // Warp 19 (Opteron):
 //                            Encode     Parse       Skip
 // Byte Len Dist              VI  PVI    VI  PVI    VI  PVI
 // UNIFORM 2^7              12.2  9.9   3.4  3.3   3.2  3.2
 // UNIFORM 2^14             18.2 14.0   8.8  6.0   5.4  6.4
 // UNIFORM 2^21             18.1 15.1  13.0  9.7   6.7  9.5
 // UNIFORM 2^28             18.9 14.9  15.4 12.1   9.8 10.7
 // UNIFORM 2^31             23.6 19.3  20.1 14.9  12.7 10.7
 // MIXED 50:50:0:0:0        19.4 19.8  15.0 12.7  11.8 12.6
 // MIXED 20:20:20:20:20     28.2 27.3  24.9 21.8  20.7 18.8
 // MIXED 60:20:10:6:4       23.5 23.3  29.7 17.3  16.7 16.3
 // MIXED 80:12:5:2:1        16.5 16.3  11.6  9.9   9.7  9.6
 // MIXED 90:7:2:1:0         12.9 12.9   8.2  6.2   6.1  6.1
 // MIXED 15:71:13:1.2:0.1   18.9 19.2  13.8 11.2  11.0 11.8
 //
 // Ilium:
 //                            Encode     Parse       Skip
 // Byte Len Dist              VI  PVI    VI  PVI    VI  PVI
 // UNIFORM 2^7              10.2  8.7   3.1  3.1   2.9  2.1
 // UNIFORM 2^14             15.8 13.2   7.1  4.5   4.2  3.4
 // UNIFORM 2^21             15.6 14.1  10.1  6.6   5.4  5.7
 // UNIFORM 2^28             18.1 15.2  12.7  8.8   7.3  8.3
 // UNIFORM 2^31             21.8 16.5  17.9 13.3  13.9  8.1
 // MIXED 50:50:0:0:0        19.8 20.7  14.2 13.0  12.4 12.2
 // MIXED 20:20:20:20:20     29.8 30.1  27.7 24.3  22.7 20.2
 // MIXED 60:20:10:6:4       24.2 24.9  20.1 18.9  18.7 17.2
 // MIXED 80:12:5:2:1        16.3 16.6  12.0 11.6  11.3 10.7
 // MIXED 90:7:2:1:0         12.1 12.3   7.2  7.0   6.8  6.5
 // MIXED 15:71:13:1.2:0.1   19.2 20.1  14.2 13.1  12.5 12.0
 //

 #ifndef LIBTEXTCLASSIFIER_UTILS_BASE_PREFIXVARINT_H_
 #define LIBTEXTCLASSIFIER_UTILS_BASE_PREFIXVARINT_H_

 #include <string>

 #include "utils/base/casts.h"
 #include "utils/base/endian.h"
 #include "utils/base/integral_types.h"
 #include "utils/base/unaligned_access.h"

 namespace libtextclassifier3 {

 class PrefixVarint {
  public:
   // The max bytes used to encode a uint32:
   static constexpr int kMax32 = 5;
   static constexpr int kMax64 = 9;

   // This decoder does not read past the encoded buffer.
   static constexpr int kSlopBytes = 0;

   // Returns the number of bytes used to encode the given value:
   static int Length32(uint32 val);
   static int Length64(uint64 val);

   // The Encode functions could reset up to the following bytes past the last
   // encoded byte. Use the slower SafeEncode equivalent if you want the encode
   // to not use any slop bytes.
   static constexpr int kEncode32SlopBytes = 1;
   static constexpr int kEncode64SlopBytes = 3;

   // The safer version of the Encode functions, which don't need any slop bytes.
   static char* SafeEncode32(char* ptr, uint32 val);
   static char* SafeEncode64(char* ptr, uint64 val);
   // Inlined version:
   static char* SafeEncode32Inline(char* ptr, uint32 val);
   static char* SafeEncode64Inline(char* ptr, uint64 val);

   // Appends the encoded value to *s.
   static void Append32(std::string* s, uint32 value);
   static void Append64(std::string* s, uint64 value);

   // Parses the next value in the ptr buffer and returns the pointer advanced
   // past the end of the encoded value.
   static const char* Parse32(const char* ptr, uint32* val);
   static const char* Parse64(const char* ptr, uint64* val);
   // Use this in time-critical code:
   static const char* Parse32Inline(const char* ptr, uint32* val);
   static const char* Parse64Inline(const char* ptr, uint64* val);

  private:
   static constexpr int kMin2Bytes = (1 << 7);
   static constexpr int kMin3Bytes = (1 << 14);
   static constexpr int kMin4Bytes = (1 << 21);
   static constexpr int kMin5Bytes = (1 << 28);
   static constexpr int64 kMin6Bytes = (1LL << 35);
   static constexpr int64 kMin7Bytes = (1LL << 42);
   static constexpr int64 kMin8Bytes = (1LL << 49);
   static constexpr int64 kMin9Bytes = (1LL << 56);

   static void Append32Slow(std::string* s, uint32 value);
   static void Append64Slow(std::string* s, uint64 value);
   static const char* Parse32Fallback(uint32 code, const char* ptr, uint32* val);
   static const char* Parse64Fallback(uint64 code, const char* ptr, uint64* val);
   static const char* Parse32FallbackInline(uint32 code, const char* ptr,
                                            uint32* val);
   static const char* Parse64FallbackInline(uint64 code, const char* ptr,
                                            uint64* val);

   // Casting helpers to aid in making this code signed-char-clean.
   static uint8* MakeUnsigned(char* p) { return bit_cast<uint8*>(p); }
   static const uint8* MakeUnsigned(const char* p) {
     return bit_cast<const uint8*>(p);
   }
 };

 inline int PrefixVarint::Length32(uint32 val) {
   if (val < kMin2Bytes) return 1;
   if (val < kMin3Bytes) return 2;
   if (val < kMin4Bytes) return 3;
   if (val < kMin5Bytes) return 4;
   return 5;
 }

 inline int PrefixVarint::Length64(uint64 val) {
   if (val < kMin2Bytes) return 1;
   if (val < kMin3Bytes) return 2;
   if (val < kMin4Bytes) return 3;
   if (val < kMin5Bytes) return 4;
   if (val < kMin6Bytes) return 5;
   if (val < kMin7Bytes) return 6;
   if (val < kMin8Bytes) return 7;
   if (val < kMin9Bytes) return 8;
   return 9;
 }

 inline char* PrefixVarint::SafeEncode32Inline(char* p, uint32 val) {
   uint8* const ptr = MakeUnsigned(p);
   if (val < kMin2Bytes) {
     ptr[0] = val;
     return p + 1;
   } else if (val < kMin3Bytes) {
     val <<= 2;
     uint8 low = val;
     ptr[0] = (low >> 2) | 128;
     ptr[1] = val >> 8;
     return p + 2;
   } else if (val < kMin4Bytes) {
     val <<= 3;
     uint8 low = val;
     ptr[0] = (low >> 3) | 192;
     ptr[1] = val >> 8;
     ptr[2] = val >> 16;
     return p + 3;
   } else if (val < kMin5Bytes) {
     val <<= 4;
     uint8 low = val;
     ptr[0] = (low >> 4) | 224;
     ptr[1] = val >> 8;
     ptr[2] = val >> 16;
     ptr[3] = val >> 24;
     return p + 4;
   } else {
     ptr[0] = 0xff;
     ptr[1] = val;
     ptr[2] = val >> 8;
     ptr[3] = val >> 16;
     ptr[4] = val >> 24;
     return p + 5;
   }
 }

 inline char* PrefixVarint::SafeEncode64Inline(char* p, uint64 val) {
   uint8* const ptr = MakeUnsigned(p);
   if (val < kMin2Bytes) {
     ptr[0] = val;
     return p + 1;
   } else if (val < kMin3Bytes) {
     val <<= 2;
     uint8 low = val;
     ptr[0] = (low >> 2) | 128;
     ptr[1] = val >> 8;
     return p + 2;
   } else if (val < kMin4Bytes) {
     val <<= 3;
     uint8 low = val;
     ptr[0] = (low >> 3) | 192;
     ptr[1] = val >> 8;
     ptr[2] = val >> 16;
     return p + 3;
   } else if (val < kMin5Bytes) {
     val <<= 4;
     uint8 low = val;
     ptr[0] = (low >> 4) | 224;
     ptr[1] = val >> 8;
     ptr[2] = val >> 16;
     ptr[3] = val >> 24;
     return p + 4;
   } else if (val < kMin6Bytes) {
     val <<= 5;
     uint8 low = val;
     ptr[0] = (low >> 5) | 240;
     ptr[1] = val >> 8;
     ptr[2] = val >> 16;
     ptr[3] = val >> 24;
     ptr[4] = val >> 32;
     return p + 5;
   } else if (val < kMin7Bytes) {
     val <<= 6;
     uint8 low = val;
     ptr[0] = (low >> 6) | 248;
     ptr[1] = val >> 8;
     ptr[2] = val >> 16;
     ptr[3] = val >> 24;
     ptr[4] = val >> 32;
     ptr[5] = val >> 40;
     return p + 6;
   } else if (val < kMin8Bytes) {
     val <<= 7;
     uint8 low = val;
     ptr[0] = (low >> 7) | 252;
     ptr[1] = val >> 8;
     ptr[2] = val >> 16;
     ptr[3] = val >> 24;
     ptr[4] = val >> 32;
     ptr[5] = val >> 40;
     ptr[6] = val >> 48;
     return p + 7;
   } else if (val < kMin9Bytes) {
     ptr[0] = 254;
     ptr[1] = val;
     ptr[2] = val >> 8;
     ptr[3] = val >> 16;
     ptr[4] = val >> 24;
     ptr[5] = val >> 32;
     ptr[6] = val >> 40;
     ptr[7] = val >> 48;
     return p + 8;
   } else {
     ptr[0] = 255;
     ptr[1] = val;
     ptr[2] = val >> 8;
     ptr[3] = val >> 16;
     ptr[4] = val >> 24;
     ptr[5] = val >> 32;
     ptr[6] = val >> 40;
     ptr[7] = val >> 48;
     ptr[8] = val >> 56;
     return p + 9;
   }
 }

 inline void PrefixVarint::Append32(std::string* s, uint32 value) {
   // Inline the fast-path for single-character output, but fall back to the .cc
   // file for the full version. The size<capacity check is so the compiler can
   // optimize out the string resize code.
   if (value < kMin2Bytes && s->size() < s->capacity()) {
     s->push_back(static_cast<unsigned char>(value));
   } else {
     Append32Slow(s, value);
   }
 }

 inline void PrefixVarint::Append64(std::string* s, uint64 value) {
   // Inline the fast-path for single-character output, but fall back to the .cc
   // file for the full version. The size<capacity check is so the compiler can
   // optimize out the string resize code.
   if (value < kMin2Bytes && s->size() < s->capacity()) {
     s->push_back(static_cast<unsigned char>(value));
   } else {
     Append64Slow(s, value);
   }
 }

 #ifdef IS_LITTLE_ENDIAN

 inline const char* PrefixVarint::Parse32(const char* p, uint32* val) {
   const uint8* const ptr = MakeUnsigned(p);
   uint32 code = *ptr;
   if (code < 128) {
     *val = code;
     return p + 1;
   } else if (code < 192) {
     uint32 v = ptr[1];
     *val = (code & 0x3f) | (v << 6);
     return p + 2;
   } else {
     return Parse32Fallback(code, p, val);
   }
 }

 inline const char* PrefixVarint::Parse64(const char* p, uint64* val) {
   const uint8* const ptr = MakeUnsigned(p);
   uint64 code = *ptr;
   if (code < 128) {
     *val = code;
     return p + 1;
   } else if (code < 192) {
     uint64 v = ptr[1];
     *val = (code & 0x3fLLU) | (v << 6);
     return p + 2;
   } else {
     return Parse64Fallback(code, p, val);
   }
 }

 inline const char* PrefixVarint::Parse32Inline(const char* p, uint32* val) {
   const uint8* const ptr = MakeUnsigned(p);
   uint32 code = *ptr;
   if (code < 128) {
     *val = code;
     return p + 1;
   } else if (code < 192) {
     uint32 v = ptr[1];
     *val = (code & 0x3f) | (v << 6);
     return p + 2;
   } else {
     return Parse32FallbackInline(code, p, val);
   }
 }

 inline const char* PrefixVarint::Parse64Inline(const char* p, uint64* val) {
   const uint8* const ptr = MakeUnsigned(p);
   uint64 code = *ptr;
   if (code < 128) {
     *val = code;
     return p + 1;
   } else if (code < 192) {
     uint64 v = ptr[1];
     *val = (code & 0x3f) | (v << 6);
     return p + 2;
   } else {
     return Parse64FallbackInline(code, p, val);
   }
 }

 // Only handles cases with 3-5 bytes
 inline const char* PrefixVarint::Parse32FallbackInline(uint32 code,
                                                        const char* p,
                                                        uint32* val) {
   const uint8* const ptr = MakeUnsigned(p);
   if (code < 224) {
     uint32 v = TC3_UNALIGNED_LOAD16(ptr + 1);
     *val = (code & 0x1f) | (v << 5);
     return p + 3;
   } else if (code < 240) {
     uint32 v = ptr[3];
     v = (v << 16) | TC3_UNALIGNED_LOAD16(ptr + 1);
     *val = (code & 0xf) | (v << 4);
     return p + 4;
   } else {
     *val = TC3_UNALIGNED_LOAD32(ptr + 1);
     return p + 5;
   }
 }

 // Only handles cases with 3-9 bytes
 inline const char* PrefixVarint::Parse64FallbackInline(uint64 code,
                                                        const char* p,
                                                        uint64* val) {
   const uint8* const ptr = MakeUnsigned(p);
   if (code < 224) {
     uint64 v = TC3_UNALIGNED_LOAD16(ptr + 1);
     *val = (code & 0x1fLLU) | (v << 5);
     return p + 3;
   } else if (code < 240) {
     uint64 v = ptr[3];
     v = (v << 16) | TC3_UNALIGNED_LOAD16(ptr + 1);
     *val = (code & 0xfLLU) | (v << 4);
     return p + 4;
   } else if (code < 248) {
     uint64 v = TC3_UNALIGNED_LOAD32(ptr + 1);
     *val = (code & 0x7LLU) | (v << 3);
     return p + 5;
   } else if (code < 252) {
     uint64 v = ptr[5];
     v = (v << 32) | TC3_UNALIGNED_LOAD32(ptr + 1);
     *val = (code & 0x3LLU) | (v << 2);
     return p + 6;
   } else if (code < 254) {
     uint64 v = TC3_UNALIGNED_LOAD16(ptr + 5);
     v = (v << 32) | TC3_UNALIGNED_LOAD32(ptr + 1);
     *val = (code & 0x1LLU) | (v << 1);
     return p + 7;
   } else if (code < 255) {
     uint64 v = TC3_UNALIGNED_LOAD64(ptr);
     *val = v >> 8;
     return p + 8;
   } else {
     *val = TC3_UNALIGNED_LOAD64(ptr + 1);
     return p + 9;
   }
 }

 #else  // IS_BIG_ENDIAN

 // This works on big-endian machines.  Performance is 1-16% slower, depending
 // on the data.
 inline const char* PrefixVarint::Parse32(const char* p, uint32* val) {
   const uint8* const ptr = MakeUnsigned(p);
   uint32 code = *ptr;
   if (code < 128) {
     *val = code;
     return p + 1;
   } else if (code < 192) {
     uint32 v = ptr[1];
     *val = (code & 0x3f) | (v << 6);
     return p + 2;
   } else {
     return Parse32Fallback(code, p, val);
   }
 }

 inline const char* PrefixVarint::Parse64(const char* p, uint64* val) {
   const uint8* const ptr = MakeUnsigned(p);
   uint64 code = *ptr;
   if (code < 128) {
     *val = code;
     return p + 1;
   } else if (code < 192) {
     uint64 v = ptr[1];
     *val = (code & 0x3fLLU) | (v << 6);
     return p + 2;
   } else {
     return Parse64Fallback(code, p, val);
   }
 }

 inline const char* PrefixVarint::Parse32Inline(const char* p, uint32* val) {
   const uint8* const ptr = MakeUnsigned(p);
   uint32 code = *ptr;
   if (code < 128) {
     *val = code;
     return p + 1;
   } else if (code < 192) {
     uint32 v = ptr[1];
     *val = (code & 0x3f) | (v << 6);
     return p + 2;
   } else {
     return Parse32FallbackInline(code, p, val);
   }
 }

 inline const char* PrefixVarint::Parse64Inline(const char* p, uint64* val) {
   const uint8* const ptr = MakeUnsigned(p);
   uint64 code = *ptr;
   if (code < 128) {
     *val = code;
     return p + 1;
   } else if (code < 192) {
     uint64 v = ptr[1];
     *val = (code & 0x3fLLU) | (v << 6);
     return p + 2;
   } else {
     return Parse64FallbackInline(code, p, val);
   }
 }

 // Only handles cases with 3-5 bytes
 inline const char* PrefixVarint::Parse32FallbackInline(uint32 code,
                                                        const char* p,
                                                        uint32* val) {
   const uint8* const ptr = MakeUnsigned(p);
   if (code < 224) {
     uint32 v = ptr[2];
     v = (v << 8) | ptr[1];
     *val = (code & 0x1f) | (v << 5);
     return p + 3;
   } else if (code < 240) {
     uint32 v = ptr[3];
     v = (v << 8) | ptr[2];
     v = (v << 8) | ptr[1];
     *val = (code & 0xf) | (v << 4);
     return p + 4;
   } else {
     uint32 v = ptr[4];
     v = (v << 8) | ptr[3];
     v = (v << 8) | ptr[2];
     v = (v << 8) | ptr[1];
     *val = v;
     return p + 5;
   }
 }

 // Only handles cases with 3-9 bytes
 inline const char* PrefixVarint::Parse64FallbackInline(uint64 code,
                                                        const char* p,
                                                        uint64* val) {
   const uint8* const ptr = MakeUnsigned(p);
   if (code < 224) {
     uint64 v = ptr[2];
     v = (v << 8) | ptr[1];
     *val = (code & 0x1f) | (v << 5);
     return p + 3;
   } else if (code < 240) {
     uint64 v = ptr[3];
     v = (v << 8) | ptr[2];
     v = (v << 8) | ptr[1];
     *val = (code & 0xf) | (v << 4);
     return p + 4;
   } else if (code < 248) {
     uint64 v = ptr[4];
     v = (v << 8) | ptr[3];
     v = (v << 8) | ptr[2];
     v = (v << 8) | ptr[1];
     *val = (code & 0x7) | (v << 3);
     return p + 5;
   } else if (code < 252) {
     uint64 v = ptr[5];
     v = (v << 8) | ptr[4];
     v = (v << 8) | ptr[3];
     v = (v << 8) | ptr[2];
     v = (v << 8) | ptr[1];
     *val = (code & 0x3) | (v << 2);
     return p + 6;
   } else if (code < 254) {
     uint64 v = ptr[6];
     v = (v << 8) | ptr[5];
     v = (v << 8) | ptr[4];
     v = (v << 8) | ptr[3];
     v = (v << 8) | ptr[2];
     v = (v << 8) | ptr[1];
     *val = (code & 0x1) | (v << 1);
     return p + 7;
   } else if (code < 255) {
     uint64 v = ptr[7];
     v = (v << 8) | ptr[6];
     v = (v << 8) | ptr[5];
     v = (v << 8) | ptr[4];
     v = (v << 8) | ptr[3];
     v = (v << 8) | ptr[2];
     v = (v << 8) | ptr[1];
     *val = v;
     return p + 8;
   } else {
     uint64 v = ptr[8];
     v = (v << 8) | ptr[7];
     v = (v << 8) | ptr[6];
     v = (v << 8) | ptr[5];
     v = (v << 8) | ptr[4];
     v = (v << 8) | ptr[3];
     v = (v << 8) | ptr[2];
     v = (v << 8) | ptr[1];
     *val = v;
     return p + 9;
   }
 }

 #endif  // IS_LITTLE_ENDIAN

 }  // namespace libtextclassifier3

 #endif  // LIBTEXTCLASSIFIER_UTILS_BASE_PREFIXVARINT_H_
	/*
	* Copyright (C) 2018 The Android Open Source Project
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	// PrefixVarint is an integer encoding method that has the exact same
	// compression size as Varint, but is faster to decode because all of the
	// length information is encoded in the first byte.
	// On a Warp 19 it can parse up to 42% faster than Varint, for the distributions
	// tested below.
	// On an Ilium it can parse up to 37% faster than Varint.
	//
	// But there are a few caveats:
	// - This is fastest if both the encoder and decoder are little endian.
	// Somewhat slower versions are provided for encoding and decoding on big
	// endian machines.
	// - This doesn't support backwards decoding.
	//
	// The PrefixVarint encoding uses a unary code in the high bits of the first
	// byte to encode the total number of bytes, as follows:
	// - 32bit encoding:
	// 1 byte: "0" + 7 value bits
	// 2 bytes: "10" + 6 value bits
	// 3 bytes: "110" + 5 value bits
	// 4 bytes: "1110" + 4 value bits
	// 5 bytes: "1111" + no value bits (value is in the next 4 bytes)
	//
	// - 64bit encoding:
	// 1 byte: "0" + 7 value bits
	// 2 bytes: "10" + 6 value bits
	// 3 bytes: "110" + 5 value bits
	// 4 bytes: "1110" + 4 value bits
	// 5 bytes: "11110" + 3 value bits
	// 6 bytes: "111110" + 2 value bits
	// 7 bytes: "1111110" + 1 value bits
	// 8 bytes: "11111110" + no value bits (value is in the next 7 bytes)
	// 9 bytes: "11111111" + no value bits (value is in the next 8 bytes)
	//
	// Note that 32bit and 64bit PrefixVarint encoding are same for values between
	// 0 and (1<<28)-1 (i.e., upto 4 byte-encodable value).
	//
	// The following are benchmark results (in cycles per operation, so lower is
	// better) on randomly generated sequences of values whose encodings have the
	// given distribution of byte lengths. The cycle counts include some overhead
	// (1-2 cycles) for the testing loop operation.
	//
	// UNIFORM 2^14 means the values are randomly generated in the range [0-2^14),
	// so the majority will require 2 bytes to encode. MIXED 60:20:10:6:4, on the
	// other hand, means 60% of the values encode to 1 byte, 20% to 2 bytes, and
	// so on. The MIXED 15:71:13:1.2:0.1 distribution simulates a power law with
	// median value of 1024.
	//
	// VI is Varint, PVI is PrefixVarint. In both cases, Parse32Inline was used.
	//
	// Warp 19 (Opteron):
	// Encode Parse Skip
	// Byte Len Dist VI PVI VI PVI VI PVI
	// UNIFORM 2^7 12.2 9.9 3.4 3.3 3.2 3.2
	// UNIFORM 2^14 18.2 14.0 8.8 6.0 5.4 6.4
	// UNIFORM 2^21 18.1 15.1 13.0 9.7 6.7 9.5
	// UNIFORM 2^28 18.9 14.9 15.4 12.1 9.8 10.7
	// UNIFORM 2^31 23.6 19.3 20.1 14.9 12.7 10.7
	// MIXED 50:50:0:0:0 19.4 19.8 15.0 12.7 11.8 12.6
	// MIXED 20:20:20:20:20 28.2 27.3 24.9 21.8 20.7 18.8
	// MIXED 60:20:10:6:4 23.5 23.3 29.7 17.3 16.7 16.3
	// MIXED 80:12:5:2:1 16.5 16.3 11.6 9.9 9.7 9.6
	// MIXED 90:7:2:1:0 12.9 12.9 8.2 6.2 6.1 6.1
	// MIXED 15:71:13:1.2:0.1 18.9 19.2 13.8 11.2 11.0 11.8
	//
	// Ilium:
	// Encode Parse Skip
	// Byte Len Dist VI PVI VI PVI VI PVI
	// UNIFORM 2^7 10.2 8.7 3.1 3.1 2.9 2.1
	// UNIFORM 2^14 15.8 13.2 7.1 4.5 4.2 3.4
	// UNIFORM 2^21 15.6 14.1 10.1 6.6 5.4 5.7
	// UNIFORM 2^28 18.1 15.2 12.7 8.8 7.3 8.3
	// UNIFORM 2^31 21.8 16.5 17.9 13.3 13.9 8.1
	// MIXED 50:50:0:0:0 19.8 20.7 14.2 13.0 12.4 12.2
	// MIXED 20:20:20:20:20 29.8 30.1 27.7 24.3 22.7 20.2
	// MIXED 60:20:10:6:4 24.2 24.9 20.1 18.9 18.7 17.2
	// MIXED 80:12:5:2:1 16.3 16.6 12.0 11.6 11.3 10.7
	// MIXED 90:7:2:1:0 12.1 12.3 7.2 7.0 6.8 6.5
	// MIXED 15:71:13:1.2:0.1 19.2 20.1 14.2 13.1 12.5 12.0
	//

	#ifndef LIBTEXTCLASSIFIER_UTILS_BASE_PREFIXVARINT_H_
	#define LIBTEXTCLASSIFIER_UTILS_BASE_PREFIXVARINT_H_

	#include <string>

	#include "utils/base/casts.h"
	#include "utils/base/endian.h"
	#include "utils/base/integral_types.h"
	#include "utils/base/unaligned_access.h"

	namespace libtextclassifier3 {

	class PrefixVarint {
	public:
	// The max bytes used to encode a uint32:
	static constexpr int kMax32 = 5;
	static constexpr int kMax64 = 9;

	// This decoder does not read past the encoded buffer.
	static constexpr int kSlopBytes = 0;

	// Returns the number of bytes used to encode the given value:
	static int Length32(uint32 val);
	static int Length64(uint64 val);

	// The Encode functions could reset up to the following bytes past the last
	// encoded byte. Use the slower SafeEncode equivalent if you want the encode
	// to not use any slop bytes.
	static constexpr int kEncode32SlopBytes = 1;
	static constexpr int kEncode64SlopBytes = 3;

	// The safer version of the Encode functions, which don't need any slop bytes.
	static char* SafeEncode32(char* ptr, uint32 val);
	static char* SafeEncode64(char* ptr, uint64 val);
	// Inlined version:
	static char* SafeEncode32Inline(char* ptr, uint32 val);
	static char* SafeEncode64Inline(char* ptr, uint64 val);

	// Appends the encoded value to *s.
	static void Append32(std::string* s, uint32 value);
	static void Append64(std::string* s, uint64 value);

	// Parses the next value in the ptr buffer and returns the pointer advanced
	// past the end of the encoded value.
	static const char* Parse32(const char* ptr, uint32* val);
	static const char* Parse64(const char* ptr, uint64* val);
	// Use this in time-critical code:
	static const char* Parse32Inline(const char* ptr, uint32* val);
	static const char* Parse64Inline(const char* ptr, uint64* val);

	private:
	static constexpr int kMin2Bytes = (1 << 7);
	static constexpr int kMin3Bytes = (1 << 14);
	static constexpr int kMin4Bytes = (1 << 21);
	static constexpr int kMin5Bytes = (1 << 28);
	static constexpr int64 kMin6Bytes = (1LL << 35);
	static constexpr int64 kMin7Bytes = (1LL << 42);
	static constexpr int64 kMin8Bytes = (1LL << 49);
	static constexpr int64 kMin9Bytes = (1LL << 56);

	static void Append32Slow(std::string* s, uint32 value);
	static void Append64Slow(std::string* s, uint64 value);
	static const char* Parse32Fallback(uint32 code, const char* ptr, uint32* val);
	static const char* Parse64Fallback(uint64 code, const char* ptr, uint64* val);
	static const char* Parse32FallbackInline(uint32 code, const char* ptr,
	uint32* val);
	static const char* Parse64FallbackInline(uint64 code, const char* ptr,
	uint64* val);

	// Casting helpers to aid in making this code signed-char-clean.
	static uint8* MakeUnsigned(char* p) { return bit_cast<uint8*>(p); }
	static const uint8* MakeUnsigned(const char* p) {
	return bit_cast<const uint8*>(p);
	}
	};

	inline int PrefixVarint::Length32(uint32 val) {
	if (val < kMin2Bytes) return 1;
	if (val < kMin3Bytes) return 2;
	if (val < kMin4Bytes) return 3;
	if (val < kMin5Bytes) return 4;
	return 5;
	}

	inline int PrefixVarint::Length64(uint64 val) {
	if (val < kMin2Bytes) return 1;
	if (val < kMin3Bytes) return 2;
	if (val < kMin4Bytes) return 3;
	if (val < kMin5Bytes) return 4;
	if (val < kMin6Bytes) return 5;
	if (val < kMin7Bytes) return 6;
	if (val < kMin8Bytes) return 7;
	if (val < kMin9Bytes) return 8;
	return 9;
	}

	inline char* PrefixVarint::SafeEncode32Inline(char* p, uint32 val) {
	uint8* const ptr = MakeUnsigned(p);
	if (val < kMin2Bytes) {
	ptr[0] = val;
	return p + 1;
	} else if (val < kMin3Bytes) {
	val <<= 2;
	uint8 low = val;
	ptr[0] = (low >> 2) \| 128;
	ptr[1] = val >> 8;
	return p + 2;
	} else if (val < kMin4Bytes) {
	val <<= 3;
	uint8 low = val;
	ptr[0] = (low >> 3) \| 192;
	ptr[1] = val >> 8;
	ptr[2] = val >> 16;
	return p + 3;
	} else if (val < kMin5Bytes) {
	val <<= 4;
	uint8 low = val;
	ptr[0] = (low >> 4) \| 224;
	ptr[1] = val >> 8;
	ptr[2] = val >> 16;
	ptr[3] = val >> 24;
	return p + 4;
	} else {
	ptr[0] = 0xff;
	ptr[1] = val;
	ptr[2] = val >> 8;
	ptr[3] = val >> 16;
	ptr[4] = val >> 24;
	return p + 5;
	}
	}

	inline char* PrefixVarint::SafeEncode64Inline(char* p, uint64 val) {
	uint8* const ptr = MakeUnsigned(p);
	if (val < kMin2Bytes) {
	ptr[0] = val;
	return p + 1;
	} else if (val < kMin3Bytes) {
	val <<= 2;
	uint8 low = val;
	ptr[0] = (low >> 2) \| 128;
	ptr[1] = val >> 8;
	return p + 2;
	} else if (val < kMin4Bytes) {
	val <<= 3;
	uint8 low = val;
	ptr[0] = (low >> 3) \| 192;
	ptr[1] = val >> 8;
	ptr[2] = val >> 16;
	return p + 3;
	} else if (val < kMin5Bytes) {
	val <<= 4;
	uint8 low = val;
	ptr[0] = (low >> 4) \| 224;
	ptr[1] = val >> 8;
	ptr[2] = val >> 16;
	ptr[3] = val >> 24;
	return p + 4;
	} else if (val < kMin6Bytes) {
	val <<= 5;
	uint8 low = val;
	ptr[0] = (low >> 5) \| 240;
	ptr[1] = val >> 8;
	ptr[2] = val >> 16;
	ptr[3] = val >> 24;
	ptr[4] = val >> 32;
	return p + 5;
	} else if (val < kMin7Bytes) {
	val <<= 6;
	uint8 low = val;
	ptr[0] = (low >> 6) \| 248;
	ptr[1] = val >> 8;
	ptr[2] = val >> 16;
	ptr[3] = val >> 24;
	ptr[4] = val >> 32;
	ptr[5] = val >> 40;
	return p + 6;
	} else if (val < kMin8Bytes) {
	val <<= 7;
	uint8 low = val;
	ptr[0] = (low >> 7) \| 252;
	ptr[1] = val >> 8;
	ptr[2] = val >> 16;
	ptr[3] = val >> 24;
	ptr[4] = val >> 32;
	ptr[5] = val >> 40;
	ptr[6] = val >> 48;
	return p + 7;
	} else if (val < kMin9Bytes) {
	ptr[0] = 254;
	ptr[1] = val;
	ptr[2] = val >> 8;
	ptr[3] = val >> 16;
	ptr[4] = val >> 24;
	ptr[5] = val >> 32;
	ptr[6] = val >> 40;
	ptr[7] = val >> 48;
	return p + 8;
	} else {
	ptr[0] = 255;
	ptr[1] = val;
	ptr[2] = val >> 8;
	ptr[3] = val >> 16;
	ptr[4] = val >> 24;
	ptr[5] = val >> 32;
	ptr[6] = val >> 40;
	ptr[7] = val >> 48;
	ptr[8] = val >> 56;
	return p + 9;
	}
	}

	inline void PrefixVarint::Append32(std::string* s, uint32 value) {
	// Inline the fast-path for single-character output, but fall back to the .cc
	// file for the full version. The size<capacity check is so the compiler can
	// optimize out the string resize code.
	if (value < kMin2Bytes && s->size() < s->capacity()) {
	s->push_back(static_cast<unsigned char>(value));
	} else {
	Append32Slow(s, value);
	}
	}

	inline void PrefixVarint::Append64(std::string* s, uint64 value) {
	// Inline the fast-path for single-character output, but fall back to the .cc
	// file for the full version. The size<capacity check is so the compiler can
	// optimize out the string resize code.
	if (value < kMin2Bytes && s->size() < s->capacity()) {
	s->push_back(static_cast<unsigned char>(value));
	} else {
	Append64Slow(s, value);
	}
	}

	#ifdef IS_LITTLE_ENDIAN

	inline const char* PrefixVarint::Parse32(const char* p, uint32* val) {
	const uint8* const ptr = MakeUnsigned(p);
	uint32 code = *ptr;
	if (code < 128) {
	*val = code;
	return p + 1;
	} else if (code < 192) {
	uint32 v = ptr[1];
	*val = (code & 0x3f) \| (v << 6);
	return p + 2;
	} else {
	return Parse32Fallback(code, p, val);
	}
	}

	inline const char* PrefixVarint::Parse64(const char* p, uint64* val) {
	const uint8* const ptr = MakeUnsigned(p);
	uint64 code = *ptr;
	if (code < 128) {
	*val = code;
	return p + 1;
	} else if (code < 192) {
	uint64 v = ptr[1];
	*val = (code & 0x3fLLU) \| (v << 6);
	return p + 2;
	} else {
	return Parse64Fallback(code, p, val);
	}
	}

	inline const char* PrefixVarint::Parse32Inline(const char* p, uint32* val) {
	const uint8* const ptr = MakeUnsigned(p);
	uint32 code = *ptr;
	if (code < 128) {
	*val = code;
	return p + 1;
	} else if (code < 192) {
	uint32 v = ptr[1];
	*val = (code & 0x3f) \| (v << 6);
	return p + 2;
	} else {
	return Parse32FallbackInline(code, p, val);
	}
	}

	inline const char* PrefixVarint::Parse64Inline(const char* p, uint64* val) {
	const uint8* const ptr = MakeUnsigned(p);
	uint64 code = *ptr;
	if (code < 128) {
	*val = code;
	return p + 1;
	} else if (code < 192) {
	uint64 v = ptr[1];
	*val = (code & 0x3f) \| (v << 6);
	return p + 2;
	} else {
	return Parse64FallbackInline(code, p, val);
	}
	}

	// Only handles cases with 3-5 bytes
	inline const char* PrefixVarint::Parse32FallbackInline(uint32 code,
	const char* p,
	uint32* val) {
	const uint8* const ptr = MakeUnsigned(p);
	if (code < 224) {
	uint32 v = TC3_UNALIGNED_LOAD16(ptr + 1);
	*val = (code & 0x1f) \| (v << 5);
	return p + 3;
	} else if (code < 240) {
	uint32 v = ptr[3];
	v = (v << 16) \| TC3_UNALIGNED_LOAD16(ptr + 1);
	*val = (code & 0xf) \| (v << 4);
	return p + 4;
	} else {
	*val = TC3_UNALIGNED_LOAD32(ptr + 1);
	return p + 5;
	}
	}

	// Only handles cases with 3-9 bytes
	inline const char* PrefixVarint::Parse64FallbackInline(uint64 code,
	const char* p,
	uint64* val) {
	const uint8* const ptr = MakeUnsigned(p);
	if (code < 224) {
	uint64 v = TC3_UNALIGNED_LOAD16(ptr + 1);
	*val = (code & 0x1fLLU) \| (v << 5);
	return p + 3;
	} else if (code < 240) {
	uint64 v = ptr[3];
	v = (v << 16) \| TC3_UNALIGNED_LOAD16(ptr + 1);
	*val = (code & 0xfLLU) \| (v << 4);
	return p + 4;
	} else if (code < 248) {
	uint64 v = TC3_UNALIGNED_LOAD32(ptr + 1);
	*val = (code & 0x7LLU) \| (v << 3);
	return p + 5;
	} else if (code < 252) {
	uint64 v = ptr[5];
	v = (v << 32) \| TC3_UNALIGNED_LOAD32(ptr + 1);
	*val = (code & 0x3LLU) \| (v << 2);
	return p + 6;
	} else if (code < 254) {
	uint64 v = TC3_UNALIGNED_LOAD16(ptr + 5);
	v = (v << 32) \| TC3_UNALIGNED_LOAD32(ptr + 1);
	*val = (code & 0x1LLU) \| (v << 1);
	return p + 7;
	} else if (code < 255) {
	uint64 v = TC3_UNALIGNED_LOAD64(ptr);
	*val = v >> 8;
	return p + 8;
	} else {
	*val = TC3_UNALIGNED_LOAD64(ptr + 1);
	return p + 9;
	}
	}

	#else // IS_BIG_ENDIAN

	// This works on big-endian machines. Performance is 1-16% slower, depending
	// on the data.
	inline const char* PrefixVarint::Parse32(const char* p, uint32* val) {
	const uint8* const ptr = MakeUnsigned(p);
	uint32 code = *ptr;
	if (code < 128) {
	*val = code;
	return p + 1;
	} else if (code < 192) {
	uint32 v = ptr[1];
	*val = (code & 0x3f) \| (v << 6);
	return p + 2;
	} else {
	return Parse32Fallback(code, p, val);
	}
	}

	inline const char* PrefixVarint::Parse64(const char* p, uint64* val) {
	const uint8* const ptr = MakeUnsigned(p);
	uint64 code = *ptr;
	if (code < 128) {
	*val = code;
	return p + 1;
	} else if (code < 192) {
	uint64 v = ptr[1];
	*val = (code & 0x3fLLU) \| (v << 6);
	return p + 2;
	} else {
	return Parse64Fallback(code, p, val);
	}
	}

	inline const char* PrefixVarint::Parse32Inline(const char* p, uint32* val) {
	const uint8* const ptr = MakeUnsigned(p);
	uint32 code = *ptr;
	if (code < 128) {
	*val = code;
	return p + 1;
	} else if (code < 192) {
	uint32 v = ptr[1];
	*val = (code & 0x3f) \| (v << 6);
	return p + 2;
	} else {
	return Parse32FallbackInline(code, p, val);
	}
	}

	inline const char* PrefixVarint::Parse64Inline(const char* p, uint64* val) {
	const uint8* const ptr = MakeUnsigned(p);
	uint64 code = *ptr;
	if (code < 128) {
	*val = code;
	return p + 1;
	} else if (code < 192) {
	uint64 v = ptr[1];
	*val = (code & 0x3fLLU) \| (v << 6);
	return p + 2;
	} else {
	return Parse64FallbackInline(code, p, val);
	}
	}

	// Only handles cases with 3-5 bytes
	inline const char* PrefixVarint::Parse32FallbackInline(uint32 code,
	const char* p,
	uint32* val) {
	const uint8* const ptr = MakeUnsigned(p);
	if (code < 224) {
	uint32 v = ptr[2];
	v = (v << 8) \| ptr[1];
	*val = (code & 0x1f) \| (v << 5);
	return p + 3;
	} else if (code < 240) {
	uint32 v = ptr[3];
	v = (v << 8) \| ptr[2];
	v = (v << 8) \| ptr[1];
	*val = (code & 0xf) \| (v << 4);
	return p + 4;
	} else {
	uint32 v = ptr[4];
	v = (v << 8) \| ptr[3];
	v = (v << 8) \| ptr[2];
	v = (v << 8) \| ptr[1];
	*val = v;
	return p + 5;
	}
	}

	// Only handles cases with 3-9 bytes
	inline const char* PrefixVarint::Parse64FallbackInline(uint64 code,
	const char* p,
	uint64* val) {
	const uint8* const ptr = MakeUnsigned(p);
	if (code < 224) {
	uint64 v = ptr[2];
	v = (v << 8) \| ptr[1];
	*val = (code & 0x1f) \| (v << 5);
	return p + 3;
	} else if (code < 240) {
	uint64 v = ptr[3];
	v = (v << 8) \| ptr[2];
	v = (v << 8) \| ptr[1];
	*val = (code & 0xf) \| (v << 4);
	return p + 4;
	} else if (code < 248) {
	uint64 v = ptr[4];
	v = (v << 8) \| ptr[3];
	v = (v << 8) \| ptr[2];
	v = (v << 8) \| ptr[1];
	*val = (code & 0x7) \| (v << 3);
	return p + 5;
	} else if (code < 252) {
	uint64 v = ptr[5];
	v = (v << 8) \| ptr[4];
	v = (v << 8) \| ptr[3];
	v = (v << 8) \| ptr[2];
	v = (v << 8) \| ptr[1];
	*val = (code & 0x3) \| (v << 2);
	return p + 6;
	} else if (code < 254) {
	uint64 v = ptr[6];
	v = (v << 8) \| ptr[5];
	v = (v << 8) \| ptr[4];
	v = (v << 8) \| ptr[3];
	v = (v << 8) \| ptr[2];
	v = (v << 8) \| ptr[1];
	*val = (code & 0x1) \| (v << 1);
	return p + 7;
	} else if (code < 255) {
	uint64 v = ptr[7];
	v = (v << 8) \| ptr[6];
	v = (v << 8) \| ptr[5];
	v = (v << 8) \| ptr[4];
	v = (v << 8) \| ptr[3];
	v = (v << 8) \| ptr[2];
	v = (v << 8) \| ptr[1];
	*val = v;
	return p + 8;
	} else {
	uint64 v = ptr[8];
	v = (v << 8) \| ptr[7];
	v = (v << 8) \| ptr[6];
	v = (v << 8) \| ptr[5];
	v = (v << 8) \| ptr[4];
	v = (v << 8) \| ptr[3];
	v = (v << 8) \| ptr[2];
	v = (v << 8) \| ptr[1];
	*val = v;
	return p + 9;
	}
	}

	#endif // IS_LITTLE_ENDIAN

	} // namespace libtextclassifier3

	#endif // LIBTEXTCLASSIFIER_UTILS_BASE_PREFIXVARINT_H_