libs/utils/String8.cpp - platform/frameworks/base - Git at Google

 /*
  * Copyright (C) 2005 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #include <utils/String8.h>

 #include <utils/Log.h>
 #include <utils/String16.h>
 #include <utils/TextOutput.h>
 #include <utils/threads.h>

 #include <private/utils/Static.h>

 #include <ctype.h>

 /*
  * Functions outside android is below the namespace android, since they use
  * functions and constants in android namespace.
  */

 // ---------------------------------------------------------------------------

 namespace android {

 static const char32_t kByteMask = 0x000000BF;
 static const char32_t kByteMark = 0x00000080;

 // Surrogates aren't valid for UTF-32 characters, so define some
 // constants that will let us screen them out.
 static const char32_t kUnicodeSurrogateHighStart  = 0x0000D800;
 static const char32_t kUnicodeSurrogateHighEnd    = 0x0000DBFF;
 static const char32_t kUnicodeSurrogateLowStart   = 0x0000DC00;
 static const char32_t kUnicodeSurrogateLowEnd     = 0x0000DFFF;
 static const char32_t kUnicodeSurrogateStart      = kUnicodeSurrogateHighStart;
 static const char32_t kUnicodeSurrogateEnd        = kUnicodeSurrogateLowEnd;
 static const char32_t kUnicodeMaxCodepoint        = 0x0010FFFF;

 // Mask used to set appropriate bits in first byte of UTF-8 sequence,
 // indexed by number of bytes in the sequence.
 // 0xxxxxxx
 // -> (00-7f) 7bit. Bit mask for the first byte is 0x00000000
 // 110yyyyx 10xxxxxx
 // -> (c0-df)(80-bf) 11bit. Bit mask is 0x000000C0
 // 1110yyyy 10yxxxxx 10xxxxxx
 // -> (e0-ef)(80-bf)(80-bf) 16bit. Bit mask is 0x000000E0
 // 11110yyy 10yyxxxx 10xxxxxx 10xxxxxx
 // -> (f0-f7)(80-bf)(80-bf)(80-bf) 21bit. Bit mask is 0x000000F0
 static const char32_t kFirstByteMark[] = {
     0x00000000, 0x00000000, 0x000000C0, 0x000000E0, 0x000000F0
 };

 // Separator used by resource paths. This is not platform dependent contrary
 // to OS_PATH_SEPARATOR.
 #define RES_PATH_SEPARATOR '/'

 // Return number of utf8 bytes required for the character.
 static size_t utf32_to_utf8_bytes(char32_t srcChar)
 {
     size_t bytesToWrite;

     // Figure out how many bytes the result will require.
     if (srcChar < 0x00000080)
     {
         bytesToWrite = 1;
     }
     else if (srcChar < 0x00000800)
     {
         bytesToWrite = 2;
     }
     else if (srcChar < 0x00010000)
     {
         if ((srcChar < kUnicodeSurrogateStart)
          || (srcChar > kUnicodeSurrogateEnd))
         {
             bytesToWrite = 3;
         }
         else
         {
             // Surrogates are invalid UTF-32 characters.
             return 0;
         }
     }
     // Max code point for Unicode is 0x0010FFFF.
     else if (srcChar <= kUnicodeMaxCodepoint)
     {
         bytesToWrite = 4;
     }
     else
     {
         // Invalid UTF-32 character.
         return 0;
     }

     return bytesToWrite;
 }

 // Write out the source character to <dstP>.

 static void utf32_to_utf8(uint8_t* dstP, char32_t srcChar, size_t bytes)
 {
     dstP += bytes;
     switch (bytes)
     {   /* note: everything falls through. */
         case 4: *--dstP = (uint8_t)((srcChar | kByteMark) & kByteMask); srcChar >>= 6;
         case 3: *--dstP = (uint8_t)((srcChar | kByteMark) & kByteMask); srcChar >>= 6;
         case 2: *--dstP = (uint8_t)((srcChar | kByteMark) & kByteMask); srcChar >>= 6;
         case 1: *--dstP = (uint8_t)(srcChar | kFirstByteMark[bytes]);
     }
 }

 // ---------------------------------------------------------------------------

 static SharedBuffer* gEmptyStringBuf = NULL;
 static char* gEmptyString = NULL;

 extern int gDarwinCantLoadAllObjects;
 int gDarwinIsReallyAnnoying;

 static inline char* getEmptyString()
 {
     gEmptyStringBuf->acquire();
     return gEmptyString;
 }

 void initialize_string8()
 {
     // HACK: This dummy dependency forces linking libutils Static.cpp,
     // which is needed to initialize String8/String16 classes.
     // These variables are named for Darwin, but are needed elsewhere too,
     // including static linking on any platform.
     gDarwinIsReallyAnnoying = gDarwinCantLoadAllObjects;

     SharedBuffer* buf = SharedBuffer::alloc(1);
     char* str = (char*)buf->data();
     *str = 0;
     gEmptyStringBuf = buf;
     gEmptyString = str;
 }

 void terminate_string8()
 {
     SharedBuffer::bufferFromData(gEmptyString)->release();
     gEmptyStringBuf = NULL;
     gEmptyString = NULL;
 }

 // ---------------------------------------------------------------------------

 static char* allocFromUTF8(const char* in, size_t len)
 {
     if (len > 0) {
         SharedBuffer* buf = SharedBuffer::alloc(len+1);
         LOG_ASSERT(buf, "Unable to allocate shared buffer");
         if (buf) {
             char* str = (char*)buf->data();
             memcpy(str, in, len);
             str[len] = 0;
             return str;
         }
         return NULL;
     }

     return getEmptyString();
 }

 template<typename T, typename L>
 static char* allocFromUTF16OrUTF32(const T* in, L len)
 {
     if (len == 0) return getEmptyString();

     size_t bytes = 0;
     const T* end = in+len;
     const T* p = in;

     while (p < end) {
         bytes += utf32_to_utf8_bytes(*p);
         p++;
     }

     SharedBuffer* buf = SharedBuffer::alloc(bytes+1);
     LOG_ASSERT(buf, "Unable to allocate shared buffer");
     if (buf) {
         p = in;
         char* str = (char*)buf->data();
         char* d = str;
         while (p < end) {
             const T c = *p++;
             size_t len = utf32_to_utf8_bytes(c);
             utf32_to_utf8((uint8_t*)d, c, len);
             d += len;
         }
         *d = 0;

         return str;
     }

     return getEmptyString();
 }

 static char* allocFromUTF16(const char16_t* in, size_t len)
 {
     if (len == 0) return getEmptyString();

     const size_t bytes = utf8_length_from_utf16(in, len);

     SharedBuffer* buf = SharedBuffer::alloc(bytes+1);
     LOG_ASSERT(buf, "Unable to allocate shared buffer");
     if (buf) {
         char* str = (char*)buf->data();

         utf16_to_utf8(in, len, str, bytes+1);

         return str;
     }

     return getEmptyString();
 }

 static char* allocFromUTF32(const char32_t* in, size_t len)
 {
     return allocFromUTF16OrUTF32<char32_t, size_t>(in, len);
 }

 // ---------------------------------------------------------------------------

 String8::String8()
     : mString(getEmptyString())
 {
 }

 String8::String8(const String8& o)
     : mString(o.mString)
 {
     SharedBuffer::bufferFromData(mString)->acquire();
 }

 String8::String8(const char* o)
     : mString(allocFromUTF8(o, strlen(o)))
 {
     if (mString == NULL) {
         mString = getEmptyString();
     }
 }

 String8::String8(const char* o, size_t len)
     : mString(allocFromUTF8(o, len))
 {
     if (mString == NULL) {
         mString = getEmptyString();
     }
 }

 String8::String8(const String16& o)
     : mString(allocFromUTF16(o.string(), o.size()))
 {
 }

 String8::String8(const char16_t* o)
     : mString(allocFromUTF16(o, strlen16(o)))
 {
 }

 String8::String8(const char16_t* o, size_t len)
     : mString(allocFromUTF16(o, len))
 {
 }

 String8::String8(const char32_t* o)
     : mString(allocFromUTF32(o, strlen32(o)))
 {
 }

 String8::String8(const char32_t* o, size_t len)
     : mString(allocFromUTF32(o, len))
 {
 }

 String8::~String8()
 {
     SharedBuffer::bufferFromData(mString)->release();
 }

 void String8::setTo(const String8& other)
 {
     SharedBuffer::bufferFromData(other.mString)->acquire();
     SharedBuffer::bufferFromData(mString)->release();
     mString = other.mString;
 }

 status_t String8::setTo(const char* other)
 {
     SharedBuffer::bufferFromData(mString)->release();
     mString = allocFromUTF8(other, strlen(other));
     if (mString) return NO_ERROR;

     mString = getEmptyString();
     return NO_MEMORY;
 }

 status_t String8::setTo(const char* other, size_t len)
 {
     SharedBuffer::bufferFromData(mString)->release();
     mString = allocFromUTF8(other, len);
     if (mString) return NO_ERROR;

     mString = getEmptyString();
     return NO_MEMORY;
 }

 status_t String8::setTo(const char16_t* other, size_t len)
 {
     SharedBuffer::bufferFromData(mString)->release();
     mString = allocFromUTF16(other, len);
     if (mString) return NO_ERROR;

     mString = getEmptyString();
     return NO_MEMORY;
 }

 status_t String8::setTo(const char32_t* other, size_t len)
 {
     SharedBuffer::bufferFromData(mString)->release();
     mString = allocFromUTF32(other, len);
     if (mString) return NO_ERROR;

     mString = getEmptyString();
     return NO_MEMORY;
 }

 status_t String8::append(const String8& other)
 {
     const size_t otherLen = other.bytes();
     if (bytes() == 0) {
         setTo(other);
         return NO_ERROR;
     } else if (otherLen == 0) {
         return NO_ERROR;
     }

     return real_append(other.string(), otherLen);
 }

 status_t String8::append(const char* other)
 {
     return append(other, strlen(other));
 }

 status_t String8::append(const char* other, size_t otherLen)
 {
     if (bytes() == 0) {
         return setTo(other, otherLen);
     } else if (otherLen == 0) {
         return NO_ERROR;
     }

     return real_append(other, otherLen);
 }

 status_t String8::real_append(const char* other, size_t otherLen)
 {
     const size_t myLen = bytes();

     SharedBuffer* buf = SharedBuffer::bufferFromData(mString)
         ->editResize(myLen+otherLen+1);
     if (buf) {
         char* str = (char*)buf->data();
         mString = str;
         str += myLen;
         memcpy(str, other, otherLen);
         str[otherLen] = '\0';
         return NO_ERROR;
     }
     return NO_MEMORY;
 }

 char* String8::lockBuffer(size_t size)
 {
     SharedBuffer* buf = SharedBuffer::bufferFromData(mString)
         ->editResize(size+1);
     if (buf) {
         char* str = (char*)buf->data();
         mString = str;
         return str;
     }
     return NULL;
 }

 void String8::unlockBuffer()
 {
     unlockBuffer(strlen(mString));
 }

 status_t String8::unlockBuffer(size_t size)
 {
     if (size != this->size()) {
         SharedBuffer* buf = SharedBuffer::bufferFromData(mString)
             ->editResize(size+1);
         if (buf) {
             char* str = (char*)buf->data();
             str[size] = 0;
             mString = str;
             return NO_ERROR;
         }
     }

     return NO_MEMORY;
 }

 ssize_t String8::find(const char* other, size_t start) const
 {
     size_t len = size();
     if (start >= len) {
         return -1;
     }
     const char* s = mString+start;
     const char* p = strstr(s, other);
     return p ? p-mString : -1;
 }

 void String8::toLower()
 {
     toLower(0, size());
 }

 void String8::toLower(size_t start, size_t length)
 {
     const size_t len = size();
     if (start >= len) {
         return;
     }
     if (start+length > len) {
         length = len-start;
     }
     char* buf = lockBuffer(len);
     buf += start;
     while (length > 0) {
         *buf = tolower(*buf);
         buf++;
         length--;
     }
     unlockBuffer(len);
 }

 void String8::toUpper()
 {
     toUpper(0, size());
 }

 void String8::toUpper(size_t start, size_t length)
 {
     const size_t len = size();
     if (start >= len) {
         return;
     }
     if (start+length > len) {
         length = len-start;
     }
     char* buf = lockBuffer(len);
     buf += start;
     while (length > 0) {
         *buf = toupper(*buf);
         buf++;
         length--;
     }
     unlockBuffer(len);
 }

 size_t String8::getUtf32Length() const
 {
     return utf32_length(mString, length());
 }

 int32_t String8::getUtf32At(size_t index, size_t *next_index) const
 {
     return utf32_at(mString, length(), index, next_index);
 }

 size_t String8::getUtf32(char32_t* dst, size_t dst_len) const
 {
     return utf8_to_utf32(mString, length(), dst, dst_len);
 }

 TextOutput& operator<<(TextOutput& to, const String8& val)
 {
     to << val.string();
     return to;
 }

 // ---------------------------------------------------------------------------
 // Path functions

 void String8::setPathName(const char* name)
 {
     setPathName(name, strlen(name));
 }

 void String8::setPathName(const char* name, size_t len)
 {
     char* buf = lockBuffer(len);

     memcpy(buf, name, len);

     // remove trailing path separator, if present
     if (len > 0 && buf[len-1] == OS_PATH_SEPARATOR)
         len--;

     buf[len] = '\0';

     unlockBuffer(len);
 }

 String8 String8::getPathLeaf(void) const
 {
     const char* cp;
     const char*const buf = mString;

     cp = strrchr(buf, OS_PATH_SEPARATOR);
     if (cp == NULL)
         return String8(*this);
     else
         return String8(cp+1);
 }

 String8 String8::getPathDir(void) const
 {
     const char* cp;
     const char*const str = mString;

     cp = strrchr(str, OS_PATH_SEPARATOR);
     if (cp == NULL)
         return String8("");
     else
         return String8(str, cp - str);
 }

 String8 String8::walkPath(String8* outRemains) const
 {
     const char* cp;
     const char*const str = mString;
     const char* buf = str;

     cp = strchr(buf, OS_PATH_SEPARATOR);
     if (cp == buf) {
         // don't include a leading '/'.
         buf = buf+1;
         cp = strchr(buf, OS_PATH_SEPARATOR);
     }

     if (cp == NULL) {
         String8 res = buf != str ? String8(buf) : *this;
         if (outRemains) *outRemains = String8("");
         return res;
     }

     String8 res(buf, cp-buf);
     if (outRemains) *outRemains = String8(cp+1);
     return res;
 }

 /*
  * Helper function for finding the start of an extension in a pathname.
  *
  * Returns a pointer inside mString, or NULL if no extension was found.
  */
 char* String8::find_extension(void) const
 {
     const char* lastSlash;
     const char* lastDot;
     int extLen;
     const char* const str = mString;

     // only look at the filename
     lastSlash = strrchr(str, OS_PATH_SEPARATOR);
     if (lastSlash == NULL)
         lastSlash = str;
     else
         lastSlash++;

     // find the last dot
     lastDot = strrchr(lastSlash, '.');
     if (lastDot == NULL)
         return NULL;

     // looks good, ship it
     return const_cast<char*>(lastDot);
 }

 String8 String8::getPathExtension(void) const
 {
     char* ext;

     ext = find_extension();
     if (ext != NULL)
         return String8(ext);
     else
         return String8("");
 }

 String8 String8::getBasePath(void) const
 {
     char* ext;
     const char* const str = mString;

     ext = find_extension();
     if (ext == NULL)
         return String8(*this);
     else
         return String8(str, ext - str);
 }

 String8& String8::appendPath(const char* name)
 {
     // TODO: The test below will fail for Win32 paths. Fix later or ignore.
     if (name[0] != OS_PATH_SEPARATOR) {
         if (*name == '\0') {
             // nothing to do
             return *this;
         }

         size_t len = length();
         if (len == 0) {
             // no existing filename, just use the new one
             setPathName(name);
             return *this;
         }

         // make room for oldPath + '/' + newPath
         int newlen = strlen(name);

         char* buf = lockBuffer(len+1+newlen);

         // insert a '/' if needed
         if (buf[len-1] != OS_PATH_SEPARATOR)
             buf[len++] = OS_PATH_SEPARATOR;

         memcpy(buf+len, name, newlen+1);
         len += newlen;

         unlockBuffer(len);

         return *this;
     } else {
         setPathName(name);
         return *this;
     }
 }

 String8& String8::convertToResPath()
 {
 #if OS_PATH_SEPARATOR != RES_PATH_SEPARATOR
     size_t len = length();
     if (len > 0) {
         char * buf = lockBuffer(len);
         for (char * end = buf + len; buf < end; ++buf) {
             if (*buf == OS_PATH_SEPARATOR)
                 *buf = RES_PATH_SEPARATOR;
         }
         unlockBuffer(len);
     }
 #endif
     return *this;
 }

 }; // namespace android

 // ---------------------------------------------------------------------------

 size_t strlen32(const char32_t *s)
 {
   const char32_t *ss = s;
   while ( *ss )
     ss++;
   return ss-s;
 }

 size_t strnlen32(const char32_t *s, size_t maxlen)
 {
   const char32_t *ss = s;
   while ((maxlen > 0) && *ss) {
     ss++;
     maxlen--;
   }
   return ss-s;
 }

 size_t utf8_length(const char *src)
 {
     const char *cur = src;
     size_t ret = 0;
     while (*cur != '\0') {
         const char first_char = *cur++;
         if ((first_char & 0x80) == 0) { // ASCII
             ret += 1;
             continue;
         }
         // (UTF-8's character must not be like 10xxxxxx,
         //  but 110xxxxx, 1110xxxx, ... or 1111110x)
         if ((first_char & 0x40) == 0) {
             return 0;
         }

         int32_t mask, to_ignore_mask;
         size_t num_to_read = 0;
         char32_t utf32 = 0;
         for (num_to_read = 1, mask = 0x40, to_ignore_mask = 0x80;
              num_to_read < 5 && (first_char & mask);
              num_to_read++, to_ignore_mask |= mask, mask >>= 1) {
             if ((*cur & 0xC0) != 0x80) { // must be 10xxxxxx
                 return 0;
             }
             // 0x3F == 00111111
             utf32 = (utf32 << 6) + (*cur++ & 0x3F);
         }
         // "first_char" must be (110xxxxx - 11110xxx)
         if (num_to_read == 5) {
             return 0;
         }
         to_ignore_mask |= mask;
         utf32 |= ((~to_ignore_mask) & first_char) << (6 * (num_to_read - 1));
         if (utf32 > android::kUnicodeMaxCodepoint) {
             return 0;
         }

         ret += num_to_read;
     }
     return ret;
 }

 size_t utf32_length(const char *src, size_t src_len)
 {
     if (src == NULL || src_len == 0) {
         return 0;
     }
     size_t ret = 0;
     const char* cur;
     const char* end;
     size_t num_to_skip;
     for (cur = src, end = src + src_len, num_to_skip = 1;
          cur < end;
          cur += num_to_skip, ret++) {
         const char first_char = *cur;
         num_to_skip = 1;
         if ((first_char & 0x80) == 0) {  // ASCII
             continue;
         }
         int32_t mask;

         for (mask = 0x40; (first_char & mask); num_to_skip++, mask >>= 1) {
         }
     }
     return ret;
 }

 size_t utf8_length_from_utf32(const char32_t *src, size_t src_len)
 {
     if (src == NULL || src_len == 0) {
         return 0;
     }
     size_t ret = 0;
     const char32_t *end = src + src_len;
     while (src < end) {
         ret += android::utf32_to_utf8_bytes(*src++);
     }
     return ret;
 }

 size_t utf8_length_from_utf16(const char16_t *src, size_t src_len)
 {
     if (src == NULL || src_len == 0) {
         return 0;
     }
     size_t ret = 0;
     const char16_t* const end = src + src_len;
     while (src < end) {
         if ((*src & 0xFC00) == 0xD800 && (src + 1) < end
                 && (*++src & 0xFC00) == 0xDC00) {
             // surrogate pairs are always 4 bytes.
             ret += 4;
             src++;
         } else {
             ret += android::utf32_to_utf8_bytes((char32_t) *src++);
         }
     }
     return ret;
 }

 static int32_t utf32_at_internal(const char* cur, size_t *num_read)
 {
     const char first_char = *cur;
     if ((first_char & 0x80) == 0) { // ASCII
         *num_read = 1;
         return *cur;
     }
     cur++;
     char32_t mask, to_ignore_mask;
     size_t num_to_read = 0;
     char32_t utf32 = first_char;
     for (num_to_read = 1, mask = 0x40, to_ignore_mask = 0xFFFFFF80;
          (first_char & mask);
          num_to_read++, to_ignore_mask |= mask, mask >>= 1) {
         // 0x3F == 00111111
         utf32 = (utf32 << 6) + (*cur++ & 0x3F);
     }
     to_ignore_mask |= mask;
     utf32 &= ~(to_ignore_mask << (6 * (num_to_read - 1)));

     *num_read = num_to_read;
     return static_cast<int32_t>(utf32);
 }

 int32_t utf32_at(const char *src, size_t src_len,
                  size_t index, size_t *next_index)
 {
     if (index >= src_len) {
         return -1;
     }
     size_t dummy_index;
     if (next_index == NULL) {
         next_index = &dummy_index;
     }
     size_t num_read;
     int32_t ret = utf32_at_internal(src + index, &num_read);
     if (ret >= 0) {
         *next_index = index + num_read;
     }

     return ret;
 }

 size_t utf8_to_utf32(const char* src, size_t src_len,
                      char32_t* dst, size_t dst_len)
 {
     if (src == NULL || src_len == 0 || dst == NULL || dst_len == 0) {
         return 0;
     }

     const char* cur = src;
     const char* end = src + src_len;
     char32_t* cur_utf32 = dst;
     const char32_t* end_utf32 = dst + dst_len;
     while (cur_utf32 < end_utf32 && cur < end) {
         size_t num_read;
         *cur_utf32++ =
                 static_cast<char32_t>(utf32_at_internal(cur, &num_read));
         cur += num_read;
     }
     if (cur_utf32 < end_utf32) {
         *cur_utf32 = 0;
     }
     return static_cast<size_t>(cur_utf32 - dst);
 }

 size_t utf32_to_utf8(const char32_t* src, size_t src_len,
                      char* dst, size_t dst_len)
 {
     if (src == NULL || src_len == 0 || dst == NULL || dst_len == 0) {
         return 0;
     }
     const char32_t *cur_utf32 = src;
     const char32_t *end_utf32 = src + src_len;
     char *cur = dst;
     const char *end = dst + dst_len;
     while (cur_utf32 < end_utf32 && cur < end) {
         size_t len = android::utf32_to_utf8_bytes(*cur_utf32);
         android::utf32_to_utf8((uint8_t *)cur, *cur_utf32++, len);
         cur += len;
     }
     if (cur < end) {
         *cur = '\0';
     }
     return cur - dst;
 }

 size_t utf16_to_utf8(const char16_t* src, size_t src_len,
                      char* dst, size_t dst_len)
 {
     if (src == NULL || src_len == 0 || dst == NULL || dst_len == 0) {
         return 0;
     }
     const char16_t* cur_utf16 = src;
     const char16_t* const end_utf16 = src + src_len;
     char *cur = dst;
     const char* const end = dst + dst_len;
     while (cur_utf16 < end_utf16 && cur < end) {
         char32_t utf32;
         // surrogate pairs
         if ((*cur_utf16 & 0xFC00) == 0xD800 && (cur_utf16 + 1) < end_utf16) {
             utf32 = (*cur_utf16++ - 0xD800) << 10;
             utf32 |= *cur_utf16++ - 0xDC00;
             utf32 += 0x10000;
         } else {
             utf32 = (char32_t) *cur_utf16++;
         }
         size_t len = android::utf32_to_utf8_bytes(utf32);
         android::utf32_to_utf8((uint8_t*)cur, utf32, len);
         cur += len;
     }
     if (cur < end) {
         *cur = '\0';
     }
     return cur - dst;
 }
	/*
	* Copyright (C) 2005 The Android Open Source Project
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	#include <utils/String8.h>

	#include <utils/Log.h>
	#include <utils/String16.h>
	#include <utils/TextOutput.h>
	#include <utils/threads.h>

	#include <private/utils/Static.h>

	#include <ctype.h>

	/*
	* Functions outside android is below the namespace android, since they use
	* functions and constants in android namespace.
	*/

	// ---------------------------------------------------------------------------

	namespace android {

	static const char32_t kByteMask = 0x000000BF;
	static const char32_t kByteMark = 0x00000080;

	// Surrogates aren't valid for UTF-32 characters, so define some
	// constants that will let us screen them out.
	static const char32_t kUnicodeSurrogateHighStart = 0x0000D800;
	static const char32_t kUnicodeSurrogateHighEnd = 0x0000DBFF;
	static const char32_t kUnicodeSurrogateLowStart = 0x0000DC00;
	static const char32_t kUnicodeSurrogateLowEnd = 0x0000DFFF;
	static const char32_t kUnicodeSurrogateStart = kUnicodeSurrogateHighStart;
	static const char32_t kUnicodeSurrogateEnd = kUnicodeSurrogateLowEnd;
	static const char32_t kUnicodeMaxCodepoint = 0x0010FFFF;

	// Mask used to set appropriate bits in first byte of UTF-8 sequence,
	// indexed by number of bytes in the sequence.
	// 0xxxxxxx
	// -> (00-7f) 7bit. Bit mask for the first byte is 0x00000000
	// 110yyyyx 10xxxxxx
	// -> (c0-df)(80-bf) 11bit. Bit mask is 0x000000C0
	// 1110yyyy 10yxxxxx 10xxxxxx
	// -> (e0-ef)(80-bf)(80-bf) 16bit. Bit mask is 0x000000E0
	// 11110yyy 10yyxxxx 10xxxxxx 10xxxxxx
	// -> (f0-f7)(80-bf)(80-bf)(80-bf) 21bit. Bit mask is 0x000000F0
	static const char32_t kFirstByteMark[] = {
	0x00000000, 0x00000000, 0x000000C0, 0x000000E0, 0x000000F0
	};

	// Separator used by resource paths. This is not platform dependent contrary
	// to OS_PATH_SEPARATOR.
	#define RES_PATH_SEPARATOR '/'

	// Return number of utf8 bytes required for the character.
	static size_t utf32_to_utf8_bytes(char32_t srcChar)
	{
	size_t bytesToWrite;

	// Figure out how many bytes the result will require.
	if (srcChar < 0x00000080)
	{
	bytesToWrite = 1;
	}
	else if (srcChar < 0x00000800)
	{
	bytesToWrite = 2;
	}
	else if (srcChar < 0x00010000)
	{
	if ((srcChar < kUnicodeSurrogateStart)
	\|\| (srcChar > kUnicodeSurrogateEnd))
	{
	bytesToWrite = 3;
	}
	else
	{
	// Surrogates are invalid UTF-32 characters.
	return 0;
	}
	}
	// Max code point for Unicode is 0x0010FFFF.
	else if (srcChar <= kUnicodeMaxCodepoint)
	{
	bytesToWrite = 4;
	}
	else
	{
	// Invalid UTF-32 character.
	return 0;
	}

	return bytesToWrite;
	}

	// Write out the source character to <dstP>.

	static void utf32_to_utf8(uint8_t* dstP, char32_t srcChar, size_t bytes)
	{
	dstP += bytes;
	switch (bytes)
	{ /* note: everything falls through. */
	case 4: *--dstP = (uint8_t)((srcChar \| kByteMark) & kByteMask); srcChar >>= 6;
	case 3: *--dstP = (uint8_t)((srcChar \| kByteMark) & kByteMask); srcChar >>= 6;
	case 2: *--dstP = (uint8_t)((srcChar \| kByteMark) & kByteMask); srcChar >>= 6;
	case 1: *--dstP = (uint8_t)(srcChar \| kFirstByteMark[bytes]);
	}
	}

	// ---------------------------------------------------------------------------

	static SharedBuffer* gEmptyStringBuf = NULL;
	static char* gEmptyString = NULL;

	extern int gDarwinCantLoadAllObjects;
	int gDarwinIsReallyAnnoying;

	static inline char* getEmptyString()
	{
	gEmptyStringBuf->acquire();
	return gEmptyString;
	}

	void initialize_string8()
	{
	// HACK: This dummy dependency forces linking libutils Static.cpp,
	// which is needed to initialize String8/String16 classes.
	// These variables are named for Darwin, but are needed elsewhere too,
	// including static linking on any platform.
	gDarwinIsReallyAnnoying = gDarwinCantLoadAllObjects;

	SharedBuffer* buf = SharedBuffer::alloc(1);
	char* str = (char*)buf->data();
	*str = 0;
	gEmptyStringBuf = buf;
	gEmptyString = str;
	}

	void terminate_string8()
	{
	SharedBuffer::bufferFromData(gEmptyString)->release();
	gEmptyStringBuf = NULL;
	gEmptyString = NULL;
	}

	// ---------------------------------------------------------------------------

	static char* allocFromUTF8(const char* in, size_t len)
	{
	if (len > 0) {
	SharedBuffer* buf = SharedBuffer::alloc(len+1);
	LOG_ASSERT(buf, "Unable to allocate shared buffer");
	if (buf) {
	char* str = (char*)buf->data();
	memcpy(str, in, len);
	str[len] = 0;
	return str;
	}
	return NULL;
	}

	return getEmptyString();
	}

	template<typename T, typename L>
	static char* allocFromUTF16OrUTF32(const T* in, L len)
	{
	if (len == 0) return getEmptyString();

	size_t bytes = 0;
	const T* end = in+len;
	const T* p = in;

	while (p < end) {
	bytes += utf32_to_utf8_bytes(*p);
	p++;
	}

	SharedBuffer* buf = SharedBuffer::alloc(bytes+1);
	LOG_ASSERT(buf, "Unable to allocate shared buffer");
	if (buf) {
	p = in;
	char* str = (char*)buf->data();
	char* d = str;
	while (p < end) {
	const T c = *p++;
	size_t len = utf32_to_utf8_bytes(c);
	utf32_to_utf8((uint8_t*)d, c, len);
	d += len;
	}
	*d = 0;

	return str;
	}

	return getEmptyString();
	}

	static char* allocFromUTF16(const char16_t* in, size_t len)
	{
	if (len == 0) return getEmptyString();

	const size_t bytes = utf8_length_from_utf16(in, len);

	SharedBuffer* buf = SharedBuffer::alloc(bytes+1);
	LOG_ASSERT(buf, "Unable to allocate shared buffer");
	if (buf) {
	char* str = (char*)buf->data();

	utf16_to_utf8(in, len, str, bytes+1);

	return str;
	}

	return getEmptyString();
	}

	static char* allocFromUTF32(const char32_t* in, size_t len)
	{
	return allocFromUTF16OrUTF32<char32_t, size_t>(in, len);
	}

	// ---------------------------------------------------------------------------

	String8::String8()
	: mString(getEmptyString())
	{
	}

	String8::String8(const String8& o)
	: mString(o.mString)
	{
	SharedBuffer::bufferFromData(mString)->acquire();
	}

	String8::String8(const char* o)
	: mString(allocFromUTF8(o, strlen(o)))
	{
	if (mString == NULL) {
	mString = getEmptyString();
	}
	}

	String8::String8(const char* o, size_t len)
	: mString(allocFromUTF8(o, len))
	{
	if (mString == NULL) {
	mString = getEmptyString();
	}
	}

	String8::String8(const String16& o)
	: mString(allocFromUTF16(o.string(), o.size()))
	{
	}

	String8::String8(const char16_t* o)
	: mString(allocFromUTF16(o, strlen16(o)))
	{
	}

	String8::String8(const char16_t* o, size_t len)
	: mString(allocFromUTF16(o, len))
	{
	}

	String8::String8(const char32_t* o)
	: mString(allocFromUTF32(o, strlen32(o)))
	{
	}

	String8::String8(const char32_t* o, size_t len)
	: mString(allocFromUTF32(o, len))
	{
	}

	String8::~String8()
	{
	SharedBuffer::bufferFromData(mString)->release();
	}

	void String8::setTo(const String8& other)
	{
	SharedBuffer::bufferFromData(other.mString)->acquire();
	SharedBuffer::bufferFromData(mString)->release();
	mString = other.mString;
	}

	status_t String8::setTo(const char* other)
	{
	SharedBuffer::bufferFromData(mString)->release();
	mString = allocFromUTF8(other, strlen(other));
	if (mString) return NO_ERROR;

	mString = getEmptyString();
	return NO_MEMORY;
	}

	status_t String8::setTo(const char* other, size_t len)
	{
	SharedBuffer::bufferFromData(mString)->release();
	mString = allocFromUTF8(other, len);
	if (mString) return NO_ERROR;

	mString = getEmptyString();
	return NO_MEMORY;
	}

	status_t String8::setTo(const char16_t* other, size_t len)
	{
	SharedBuffer::bufferFromData(mString)->release();
	mString = allocFromUTF16(other, len);
	if (mString) return NO_ERROR;

	mString = getEmptyString();
	return NO_MEMORY;
	}

	status_t String8::setTo(const char32_t* other, size_t len)
	{
	SharedBuffer::bufferFromData(mString)->release();
	mString = allocFromUTF32(other, len);
	if (mString) return NO_ERROR;

	mString = getEmptyString();
	return NO_MEMORY;
	}

	status_t String8::append(const String8& other)
	{
	const size_t otherLen = other.bytes();
	if (bytes() == 0) {
	setTo(other);
	return NO_ERROR;
	} else if (otherLen == 0) {
	return NO_ERROR;
	}

	return real_append(other.string(), otherLen);
	}

	status_t String8::append(const char* other)
	{
	return append(other, strlen(other));
	}

	status_t String8::append(const char* other, size_t otherLen)
	{
	if (bytes() == 0) {
	return setTo(other, otherLen);
	} else if (otherLen == 0) {
	return NO_ERROR;
	}

	return real_append(other, otherLen);
	}

	status_t String8::real_append(const char* other, size_t otherLen)
	{
	const size_t myLen = bytes();

	SharedBuffer* buf = SharedBuffer::bufferFromData(mString)
	->editResize(myLen+otherLen+1);
	if (buf) {
	char* str = (char*)buf->data();
	mString = str;
	str += myLen;
	memcpy(str, other, otherLen);
	str[otherLen] = '\0';
	return NO_ERROR;
	}
	return NO_MEMORY;
	}

	char* String8::lockBuffer(size_t size)
	{
	SharedBuffer* buf = SharedBuffer::bufferFromData(mString)
	->editResize(size+1);
	if (buf) {
	char* str = (char*)buf->data();
	mString = str;
	return str;
	}
	return NULL;
	}

	void String8::unlockBuffer()
	{
	unlockBuffer(strlen(mString));
	}

	status_t String8::unlockBuffer(size_t size)
	{
	if (size != this->size()) {
	SharedBuffer* buf = SharedBuffer::bufferFromData(mString)
	->editResize(size+1);
	if (buf) {
	char* str = (char*)buf->data();
	str[size] = 0;
	mString = str;
	return NO_ERROR;
	}
	}

	return NO_MEMORY;
	}

	ssize_t String8::find(const char* other, size_t start) const
	{
	size_t len = size();
	if (start >= len) {
	return -1;
	}
	const char* s = mString+start;
	const char* p = strstr(s, other);
	return p ? p-mString : -1;
	}

	void String8::toLower()
	{
	toLower(0, size());
	}

	void String8::toLower(size_t start, size_t length)
	{
	const size_t len = size();
	if (start >= len) {
	return;
	}
	if (start+length > len) {
	length = len-start;
	}
	char* buf = lockBuffer(len);
	buf += start;
	while (length > 0) {
	buf = tolower(buf);
	buf++;
	length--;
	}
	unlockBuffer(len);
	}

	void String8::toUpper()
	{
	toUpper(0, size());
	}

	void String8::toUpper(size_t start, size_t length)
	{
	const size_t len = size();
	if (start >= len) {
	return;
	}
	if (start+length > len) {
	length = len-start;
	}
	char* buf = lockBuffer(len);
	buf += start;
	while (length > 0) {
	buf = toupper(buf);
	buf++;
	length--;
	}
	unlockBuffer(len);
	}

	size_t String8::getUtf32Length() const
	{
	return utf32_length(mString, length());
	}

	int32_t String8::getUtf32At(size_t index, size_t *next_index) const
	{
	return utf32_at(mString, length(), index, next_index);
	}

	size_t String8::getUtf32(char32_t* dst, size_t dst_len) const
	{
	return utf8_to_utf32(mString, length(), dst, dst_len);
	}

	TextOutput& operator<<(TextOutput& to, const String8& val)
	{
	to << val.string();
	return to;
	}

	// ---------------------------------------------------------------------------
	// Path functions

	void String8::setPathName(const char* name)
	{
	setPathName(name, strlen(name));
	}

	void String8::setPathName(const char* name, size_t len)
	{
	char* buf = lockBuffer(len);

	memcpy(buf, name, len);

	// remove trailing path separator, if present
	if (len > 0 && buf[len-1] == OS_PATH_SEPARATOR)
	len--;

	buf[len] = '\0';

	unlockBuffer(len);
	}

	String8 String8::getPathLeaf(void) const
	{
	const char* cp;
	const char*const buf = mString;

	cp = strrchr(buf, OS_PATH_SEPARATOR);
	if (cp == NULL)
	return String8(*this);
	else
	return String8(cp+1);
	}

	String8 String8::getPathDir(void) const
	{
	const char* cp;
	const char*const str = mString;

	cp = strrchr(str, OS_PATH_SEPARATOR);
	if (cp == NULL)
	return String8("");
	else
	return String8(str, cp - str);
	}

	String8 String8::walkPath(String8* outRemains) const
	{
	const char* cp;
	const char*const str = mString;
	const char* buf = str;

	cp = strchr(buf, OS_PATH_SEPARATOR);
	if (cp == buf) {
	// don't include a leading '/'.
	buf = buf+1;
	cp = strchr(buf, OS_PATH_SEPARATOR);
	}

	if (cp == NULL) {
	String8 res = buf != str ? String8(buf) : *this;
	if (outRemains) *outRemains = String8("");
	return res;
	}

	String8 res(buf, cp-buf);
	if (outRemains) *outRemains = String8(cp+1);
	return res;
	}

	/*
	* Helper function for finding the start of an extension in a pathname.
	*
	* Returns a pointer inside mString, or NULL if no extension was found.
	*/
	char* String8::find_extension(void) const
	{
	const char* lastSlash;
	const char* lastDot;
	int extLen;
	const char* const str = mString;

	// only look at the filename
	lastSlash = strrchr(str, OS_PATH_SEPARATOR);
	if (lastSlash == NULL)
	lastSlash = str;
	else
	lastSlash++;

	// find the last dot
	lastDot = strrchr(lastSlash, '.');
	if (lastDot == NULL)
	return NULL;

	// looks good, ship it
	return const_cast<char*>(lastDot);
	}

	String8 String8::getPathExtension(void) const
	{
	char* ext;

	ext = find_extension();
	if (ext != NULL)
	return String8(ext);
	else
	return String8("");
	}

	String8 String8::getBasePath(void) const
	{
	char* ext;
	const char* const str = mString;

	ext = find_extension();
	if (ext == NULL)
	return String8(*this);
	else
	return String8(str, ext - str);
	}

	String8& String8::appendPath(const char* name)
	{
	// TODO: The test below will fail for Win32 paths. Fix later or ignore.
	if (name[0] != OS_PATH_SEPARATOR) {
	if (*name == '\0') {
	// nothing to do
	return *this;
	}

	size_t len = length();
	if (len == 0) {
	// no existing filename, just use the new one
	setPathName(name);
	return *this;
	}

	// make room for oldPath + '/' + newPath
	int newlen = strlen(name);

	char* buf = lockBuffer(len+1+newlen);

	// insert a '/' if needed
	if (buf[len-1] != OS_PATH_SEPARATOR)
	buf[len++] = OS_PATH_SEPARATOR;

	memcpy(buf+len, name, newlen+1);
	len += newlen;

	unlockBuffer(len);

	return *this;
	} else {
	setPathName(name);
	return *this;
	}
	}

	String8& String8::convertToResPath()
	{
	#if OS_PATH_SEPARATOR != RES_PATH_SEPARATOR
	size_t len = length();
	if (len > 0) {
	char * buf = lockBuffer(len);
	for (char * end = buf + len; buf < end; ++buf) {
	if (*buf == OS_PATH_SEPARATOR)
	*buf = RES_PATH_SEPARATOR;
	}
	unlockBuffer(len);
	}
	#endif
	return *this;
	}

	}; // namespace android

	// ---------------------------------------------------------------------------

	size_t strlen32(const char32_t *s)
	{
	const char32_t *ss = s;
	while ( *ss )
	ss++;
	return ss-s;
	}

	size_t strnlen32(const char32_t *s, size_t maxlen)
	{
	const char32_t *ss = s;
	while ((maxlen > 0) && *ss) {
	ss++;
	maxlen--;
	}
	return ss-s;
	}

	size_t utf8_length(const char *src)
	{
	const char *cur = src;
	size_t ret = 0;
	while (*cur != '\0') {
	const char first_char = *cur++;
	if ((first_char & 0x80) == 0) { // ASCII
	ret += 1;
	continue;
	}
	// (UTF-8's character must not be like 10xxxxxx,
	// but 110xxxxx, 1110xxxx, ... or 1111110x)
	if ((first_char & 0x40) == 0) {
	return 0;
	}

	int32_t mask, to_ignore_mask;
	size_t num_to_read = 0;
	char32_t utf32 = 0;
	for (num_to_read = 1, mask = 0x40, to_ignore_mask = 0x80;
	num_to_read < 5 && (first_char & mask);
	num_to_read++, to_ignore_mask \|= mask, mask >>= 1) {
	if ((*cur & 0xC0) != 0x80) { // must be 10xxxxxx
	return 0;
	}
	// 0x3F == 00111111
	utf32 = (utf32 << 6) + (*cur++ & 0x3F);
	}
	// "first_char" must be (110xxxxx - 11110xxx)
	if (num_to_read == 5) {
	return 0;
	}
	to_ignore_mask \|= mask;
	utf32 \|= ((~to_ignore_mask) & first_char) << (6 * (num_to_read - 1));
	if (utf32 > android::kUnicodeMaxCodepoint) {
	return 0;
	}

	ret += num_to_read;
	}
	return ret;
	}

	size_t utf32_length(const char *src, size_t src_len)
	{
	if (src == NULL \|\| src_len == 0) {
	return 0;
	}
	size_t ret = 0;
	const char* cur;
	const char* end;
	size_t num_to_skip;
	for (cur = src, end = src + src_len, num_to_skip = 1;
	cur < end;
	cur += num_to_skip, ret++) {
	const char first_char = *cur;
	num_to_skip = 1;
	if ((first_char & 0x80) == 0) { // ASCII
	continue;
	}
	int32_t mask;

	for (mask = 0x40; (first_char & mask); num_to_skip++, mask >>= 1) {
	}
	}
	return ret;
	}

	size_t utf8_length_from_utf32(const char32_t *src, size_t src_len)
	{
	if (src == NULL \|\| src_len == 0) {
	return 0;
	}
	size_t ret = 0;
	const char32_t *end = src + src_len;
	while (src < end) {
	ret += android::utf32_to_utf8_bytes(*src++);
	}
	return ret;
	}

	size_t utf8_length_from_utf16(const char16_t *src, size_t src_len)
	{
	if (src == NULL \|\| src_len == 0) {
	return 0;
	}
	size_t ret = 0;
	const char16_t* const end = src + src_len;
	while (src < end) {
	if ((*src & 0xFC00) == 0xD800 && (src + 1) < end
	&& (*++src & 0xFC00) == 0xDC00) {
	// surrogate pairs are always 4 bytes.
	ret += 4;
	src++;
	} else {
	ret += android::utf32_to_utf8_bytes((char32_t) *src++);
	}
	}
	return ret;
	}

	static int32_t utf32_at_internal(const char* cur, size_t *num_read)
	{
	const char first_char = *cur;
	if ((first_char & 0x80) == 0) { // ASCII
	*num_read = 1;
	return *cur;
	}
	cur++;
	char32_t mask, to_ignore_mask;
	size_t num_to_read = 0;
	char32_t utf32 = first_char;
	for (num_to_read = 1, mask = 0x40, to_ignore_mask = 0xFFFFFF80;
	(first_char & mask);
	num_to_read++, to_ignore_mask \|= mask, mask >>= 1) {
	// 0x3F == 00111111
	utf32 = (utf32 << 6) + (*cur++ & 0x3F);
	}
	to_ignore_mask \|= mask;
	utf32 &= ~(to_ignore_mask << (6 * (num_to_read - 1)));

	*num_read = num_to_read;
	return static_cast<int32_t>(utf32);
	}

	int32_t utf32_at(const char *src, size_t src_len,
	size_t index, size_t *next_index)
	{
	if (index >= src_len) {
	return -1;
	}
	size_t dummy_index;
	if (next_index == NULL) {
	next_index = &dummy_index;
	}
	size_t num_read;
	int32_t ret = utf32_at_internal(src + index, &num_read);
	if (ret >= 0) {
	*next_index = index + num_read;
	}

	return ret;
	}

	size_t utf8_to_utf32(const char* src, size_t src_len,
	char32_t* dst, size_t dst_len)
	{
	if (src == NULL \|\| src_len == 0 \|\| dst == NULL \|\| dst_len == 0) {
	return 0;
	}

	const char* cur = src;
	const char* end = src + src_len;
	char32_t* cur_utf32 = dst;
	const char32_t* end_utf32 = dst + dst_len;
	while (cur_utf32 < end_utf32 && cur < end) {
	size_t num_read;
	*cur_utf32++ =
	static_cast<char32_t>(utf32_at_internal(cur, &num_read));
	cur += num_read;
	}
	if (cur_utf32 < end_utf32) {
	*cur_utf32 = 0;
	}
	return static_cast<size_t>(cur_utf32 - dst);
	}

	size_t utf32_to_utf8(const char32_t* src, size_t src_len,
	char* dst, size_t dst_len)
	{
	if (src == NULL \|\| src_len == 0 \|\| dst == NULL \|\| dst_len == 0) {
	return 0;
	}
	const char32_t *cur_utf32 = src;
	const char32_t *end_utf32 = src + src_len;
	char *cur = dst;
	const char *end = dst + dst_len;
	while (cur_utf32 < end_utf32 && cur < end) {
	size_t len = android::utf32_to_utf8_bytes(*cur_utf32);
	android::utf32_to_utf8((uint8_t )cur, cur_utf32++, len);
	cur += len;
	}
	if (cur < end) {
	*cur = '\0';
	}
	return cur - dst;
	}

	size_t utf16_to_utf8(const char16_t* src, size_t src_len,
	char* dst, size_t dst_len)
	{
	if (src == NULL \|\| src_len == 0 \|\| dst == NULL \|\| dst_len == 0) {
	return 0;
	}
	const char16_t* cur_utf16 = src;
	const char16_t* const end_utf16 = src + src_len;
	char *cur = dst;
	const char* const end = dst + dst_len;
	while (cur_utf16 < end_utf16 && cur < end) {
	char32_t utf32;
	// surrogate pairs
	if ((*cur_utf16 & 0xFC00) == 0xD800 && (cur_utf16 + 1) < end_utf16) {
	utf32 = (*cur_utf16++ - 0xD800) << 10;
	utf32 \|= *cur_utf16++ - 0xDC00;
	utf32 += 0x10000;
	} else {
	utf32 = (char32_t) *cur_utf16++;
	}
	size_t len = android::utf32_to_utf8_bytes(utf32);
	android::utf32_to_utf8((uint8_t*)cur, utf32, len);
	cur += len;
	}
	if (cur < end) {
	*cur = '\0';
	}
	return cur - dst;
	}