| /********************************************************************** |
| * File: strngs.c (Formerly strings.c) |
| * Description: STRING class functions. |
| * Author: Ray Smith |
| * Created: Fri Feb 15 09:13:30 GMT 1991 |
| * |
| * (C) Copyright 1991, Hewlett-Packard Ltd. |
| ** Licensed under the Apache License, Version 2.0 (the "License"); |
| ** you may not use this file except in compliance with the License. |
| ** You may obtain a copy of the License at |
| ** http://www.apache.org/licenses/LICENSE-2.0 |
| ** Unless required by applicable law or agreed to in writing, software |
| ** distributed under the License is distributed on an "AS IS" BASIS, |
| ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| ** See the License for the specific language governing permissions and |
| ** limitations under the License. |
| * |
| **********************************************************************/ |
| |
| #include "mfcpch.h" //precompiled headers |
| #include "tprintf.h" |
| #include "strngs.h" |
| |
| /********************************************************************** |
| * DataCache for reducing initial allocations, such as the default |
| * constructor. The memory in this cache is not special, it is just |
| * held locally rather than freeing. Only blocks with the default |
| * capacity are considered for the cache. |
| * |
| * In practice it does not appear that this cache grows very big, |
| * so even 2-4 elements are probably sufficient to realize most |
| * gains. |
| * |
| * The cache is maintained globally with a global destructor to |
| * avoid memory leaks being reported on exit. |
| **********************************************************************/ |
| // kDataCacheSize is cache of last n min sized buffers freed for |
| // cheap recyling |
| const int kDataCacheSize = 8; // max number of buffers cached |
| |
| #if 1 |
| #define CHECK_INVARIANT(s) // EMPTY |
| #else |
| static void check_used_(int len, const char *s) { |
| bool ok; |
| |
| if (len == 0) |
| ok = (s == NULL); |
| else |
| ok = (len == (strlen(s) + 1)); |
| |
| if (!ok) |
| abort(); |
| } |
| |
| #define CHECK_INVARIANT(s) check_used_(s->GetHeader()->used_, s->string()) |
| #endif |
| |
| // put recycled buffers into a class so we can destroy it on exit |
| class DataCache { |
| public: |
| DataCache() { |
| top_ = 0; |
| } |
| ~DataCache() { |
| while (--top_ >= 0) |
| free_string((char *)stack_[top_]); |
| } |
| |
| // Allocate a buffer out of this cache. |
| // Returs NULL if there are no cached buffers. |
| // The buffers in the cache can be freed using string_free. |
| void* alloc() { |
| if (top_ == 0) |
| return NULL; |
| |
| return stack_[--top_]; |
| } |
| |
| // Free pointer either by caching it on the stack of pointers |
| // or freeing it with string_free if there isnt space left to cache it. |
| // s should have capacity kMinCapacity. |
| void free(void* p) { |
| if (top_ == kDataCacheSize) |
| free_string((char *)p); |
| else |
| stack_[top_++] = p; |
| } |
| |
| // Stack of discarded but not-yet freed pointers. |
| void* stack_[kDataCacheSize]; |
| |
| // Top of stack, points to element after last cached pointer |
| int top_; |
| }; |
| |
| static DataCache MinCapacityDataCache; |
| |
| |
| /********************************************************************** |
| * STRING_HEADER provides metadata about the allocated buffer, |
| * including total capacity and how much used (strlen with '\0'). |
| * |
| * The implementation hides this header at the start of the data |
| * buffer and appends the string on the end to keep sizeof(STRING) |
| * unchanged from earlier versions so serialization is not affected. |
| * |
| * The collection of MACROS provide different implementations depending |
| * on whether the string keeps track of its strlen or not so that this |
| * feature can be added in later when consumers dont modifify the string |
| **********************************************************************/ |
| |
| // Smallest string to allocate by default |
| const int kMinCapacity = 16; |
| |
| char* STRING::AllocData(int used, int capacity) { |
| if ((capacity != kMinCapacity) |
| || ((data_ = (STRING_HEADER *)MinCapacityDataCache.alloc()) == NULL)) |
| data_ = (STRING_HEADER *)alloc_string(capacity + sizeof(STRING_HEADER)); |
| |
| // header is the metadata for this memory block |
| STRING_HEADER* header = GetHeader(); |
| header->capacity_ = capacity; |
| header->used_ = used; |
| return GetCStr(); |
| } |
| |
| void STRING::DiscardData() { |
| STRING_HEADER* header = GetHeader(); |
| if (header->capacity_ == kMinCapacity) |
| MinCapacityDataCache.free(data_); |
| else |
| free_string((char *)data_); |
| } |
| |
| // This is a private method; ensure FixHeader is called (or used_ is well defined) |
| // beforehand |
| char* STRING::ensure_cstr(inT32 min_capacity) { |
| STRING_HEADER* orig_header = GetHeader(); |
| if (min_capacity <= orig_header->capacity_) |
| return ((char *)this->data_) + sizeof(STRING_HEADER); |
| |
| // if we are going to grow bigger, than double our existing |
| // size, but if that still is not big enough then keep the |
| // requested capacity |
| if (min_capacity < 2 * orig_header->capacity_) |
| min_capacity = 2 * orig_header->capacity_; |
| |
| int alloc = sizeof(STRING_HEADER) + min_capacity; |
| STRING_HEADER* new_header = (STRING_HEADER*)(alloc_string(alloc)); |
| |
| memcpy(&new_header[1], GetCStr(), orig_header->used_); |
| new_header->capacity_ = min_capacity; |
| new_header->used_ = orig_header->used_; |
| |
| // free old memory, then rebind to new memory |
| DiscardData(); |
| data_ = new_header; |
| |
| CHECK_INVARIANT(this); |
| return ((char *)data_) + sizeof(STRING_HEADER); |
| } |
| |
| // This is const, but is modifying a mutable field |
| // this way it can be used on const or non-const instances. |
| void STRING::FixHeader() const { |
| const STRING_HEADER* header = GetHeader(); |
| if (header->used_ < 0) |
| header->used_ = strlen(GetCStr()) + 1; |
| } |
| |
| |
| STRING::STRING() { |
| // 0 indicates old NULL -- it doesnt even have '\0' |
| AllocData(0, kMinCapacity); |
| } |
| |
| STRING::STRING(const STRING& str) { |
| str.FixHeader(); |
| const STRING_HEADER* str_header = str.GetHeader(); |
| int str_used = str_header->used_; |
| char *this_cstr = AllocData(str_used, str_used); |
| memcpy(this_cstr, str.GetCStr(), str_used); |
| CHECK_INVARIANT(this); |
| } |
| |
| STRING::STRING(const char* cstr) { |
| if (cstr == NULL) { |
| AllocData(0, 0); |
| } else { |
| int len = strlen(cstr) + 1; |
| char* this_cstr = AllocData(len, len); |
| memcpy(this_cstr, cstr, len); |
| } |
| CHECK_INVARIANT(this); |
| } |
| |
| STRING::~STRING() { |
| DiscardData(); |
| } |
| |
| BOOL8 STRING::contains(const char c) const { |
| return (c != '\0') && (strchr (GetCStr(), c) != NULL); |
| } |
| |
| inT32 STRING::length() const { |
| FixHeader(); |
| return GetHeader()->used_ - 1; |
| } |
| |
| const char* STRING::string() const { |
| const STRING_HEADER* header = GetHeader(); |
| if (header->used_ == 0) |
| return NULL; |
| |
| // mark header length unreliable because tesseract might |
| // cast away the const and mutate the string directly. |
| header->used_ = -1; |
| return GetCStr(); |
| } |
| |
| /****** |
| * The STRING_IS_PROTECTED interface adds additional support to migrate |
| * code that needs to modify the STRING in ways not otherwise supported |
| * without violating encapsulation. |
| * |
| * Also makes the [] operator return a const so it is immutable |
| */ |
| #if STRING_IS_PROTECTED |
| const char& STRING::operator[](inT32 index) const { |
| return GetCStr()[index]; |
| } |
| |
| void STRING::insert_range(inT32 index, const char* str, int len) { |
| // if index is outside current range, then also grow size of string |
| // to accmodate the requested range. |
| STRING_HEADER* this_header = GetHeader(); |
| int used = this_header->used_; |
| if (index > used) |
| used = index; |
| |
| char* this_cstr = ensure_cstr(used + len + 1); |
| if (index < used) { |
| // move existing string from index to '\0' inclusive. |
| memmove(this_cstr + index + len, |
| this_cstr + index, |
| this_header->used_ - index); |
| } else if (len > 0) { |
| // We are going to overwrite previous null terminator, so write the new one. |
| this_cstr[this_header->used_ + len - 1] = '\0'; |
| |
| // If the old header did not have the terminator, |
| // then we need to account for it now that we've added it. |
| // Otherwise it was already accounted for; we just moved it. |
| if (this_header->used_ == 0) |
| ++this_header->used_; |
| } |
| |
| // Write new string to index. |
| // The string is already terminated from the conditions above. |
| memcpy(this_cstr + index, str, len); |
| this_header->used_ += len; |
| |
| CHECK_INVARIANT(this); |
| } |
| |
| void STRING::erase_range(inT32 index, int len) { |
| char* this_cstr = GetCStr(); |
| STRING_HEADER* this_header = GetHeader(); |
| |
| memcpy(this_cstr+index, this_cstr+index+len, |
| this_header->used_ - index - len); |
| this_header->used_ -= len; |
| CHECK_INVARIANT(this); |
| } |
| |
| void STRING::truncate_at(inT32 index) { |
| char* this_cstr = ensure_cstr(index); |
| this_cstr[index] = '\0'; |
| GetHeader()->used_ = index; |
| CHECK_INVARIANT(this); |
| } |
| |
| #else |
| char& STRING::operator[](inT32 index) const { |
| // Code is casting away this const and mutating the string, |
| // so mark used_ as -1 to flag it unreliable. |
| GetHeader()->used_ = -1; |
| return ((char *)GetCStr())[index]; |
| } |
| #endif |
| |
| BOOL8 STRING::operator==(const STRING& str) const { |
| FixHeader(); |
| str.FixHeader(); |
| const STRING_HEADER* str_header = str.GetHeader(); |
| const STRING_HEADER* this_header = GetHeader(); |
| int this_used = this_header->used_; |
| int str_used = str_header->used_; |
| |
| return (this_used == str_used) |
| && (memcmp(GetCStr(), str.GetCStr(), this_used) == 0); |
| } |
| |
| BOOL8 STRING::operator!=(const STRING& str) const { |
| FixHeader(); |
| str.FixHeader(); |
| const STRING_HEADER* str_header = str.GetHeader(); |
| const STRING_HEADER* this_header = GetHeader(); |
| int this_used = this_header->used_; |
| int str_used = str_header->used_; |
| |
| return (this_used != str_used) |
| || (memcmp(GetCStr(), str.GetCStr(), this_used) != 0); |
| } |
| |
| BOOL8 STRING::operator!=(const char* cstr) const { |
| FixHeader(); |
| const STRING_HEADER* this_header = GetHeader(); |
| |
| if (cstr == NULL) |
| return this_header->used_ > 1; // either '\0' or NULL |
| else { |
| inT32 length = strlen(cstr) + 1; |
| return (this_header->used_ != length) |
| || (memcmp(GetCStr(), cstr, length) != 0); |
| } |
| } |
| |
| STRING& STRING::operator=(const STRING& str) { |
| str.FixHeader(); |
| const STRING_HEADER* str_header = str.GetHeader(); |
| int str_used = str_header->used_; |
| |
| GetHeader()->used_ = 0; // clear since ensure doesnt need to copy data |
| char* this_cstr = ensure_cstr(str_used); |
| STRING_HEADER* this_header = GetHeader(); |
| |
| memcpy(this_cstr, str.GetCStr(), str_used); |
| this_header->used_ = str_used; |
| |
| CHECK_INVARIANT(this); |
| return *this; |
| } |
| |
| STRING & STRING::operator+=(const STRING& str) { |
| FixHeader(); |
| str.FixHeader(); |
| const STRING_HEADER* str_header = str.GetHeader(); |
| const char* str_cstr = str.GetCStr(); |
| int str_used = str_header->used_; |
| int this_used = GetHeader()->used_; |
| char* this_cstr = ensure_cstr(this_used + str_used); |
| |
| STRING_HEADER* this_header = GetHeader(); // after ensure for realloc |
| |
| if (this_used > 1) { |
| memcpy(this_cstr + this_used - 1, str_cstr, str_used); |
| this_header->used_ += str_used - 1; // overwrite '\0' |
| } else { |
| memcpy(this_cstr, str_cstr, str_used); |
| this_header->used_ = str_used; |
| } |
| |
| CHECK_INVARIANT(this); |
| return *this; |
| } |
| |
| void STRING::prep_serialise() { |
| // WARNING |
| // This method should only be called on a shallow bitwise copy |
| // by the serialise() method (see serialis.h). |
| FixHeader(); |
| data_ = (STRING_HEADER *)GetHeader()->used_; |
| } |
| |
| |
| void STRING::dump(FILE* f) { |
| FixHeader(); |
| serialise_bytes (f, data_, GetHeader()->used_); |
| } |
| |
| void STRING::de_dump(FILE* f) { |
| char *instring; //input from read |
| fprintf(stderr, "de_dump\n"); |
| instring = (char *)de_serialise_bytes(f, (ptrdiff_t)data_); |
| int len = strlen(instring) + 1; |
| |
| char* this_cstr = AllocData(len, len); |
| STRING_HEADER* this_header = GetHeader(); |
| |
| memcpy(this_cstr, instring, len); |
| this_header->used_ = len; |
| |
| free_mem(instring); |
| CHECK_INVARIANT(this); |
| } |
| |
| |
| STRING & STRING::operator=(const char* cstr) { |
| STRING_HEADER* this_header = GetHeader(); |
| if (cstr) { |
| int len = strlen(cstr) + 1; |
| |
| this_header->used_ = 0; // dont bother copying data if need to realloc |
| char* this_cstr = ensure_cstr(len); |
| this_header = GetHeader(); // for realloc |
| memcpy(this_cstr, cstr, len); |
| this_header->used_ = len; |
| } |
| else { |
| // Reallocate to zero capacity buffer, consistent with the corresponding |
| // copy constructor. |
| DiscardData(); |
| AllocData(0, 0); |
| } |
| |
| CHECK_INVARIANT(this); |
| return *this; |
| } |
| |
| |
| STRING STRING::operator+(const STRING& str) const { |
| STRING result(*this); |
| result += str; |
| |
| CHECK_INVARIANT(this); |
| return result; |
| } |
| |
| |
| STRING STRING::operator+(const char ch) const { |
| STRING result; |
| FixHeader(); |
| const STRING_HEADER* this_header = GetHeader(); |
| int this_used = this_header->used_; |
| char* result_cstr = result.ensure_cstr(this_used + 1); |
| STRING_HEADER* result_header = result.GetHeader(); |
| int result_used = result_header->used_; |
| |
| // copies '\0' but we'll overwrite that |
| memcpy(result_cstr, GetCStr(), this_used); |
| result_cstr[result_used] = ch; // overwrite old '\0' |
| result_cstr[result_used + 1] = '\0'; // append on '\0' |
| ++result_header->used_; |
| |
| CHECK_INVARIANT(this); |
| return result; |
| } |
| |
| |
| STRING& STRING::operator+=(const char *str) { |
| if (!str || !*str) // empty string has no effect |
| return *this; |
| |
| FixHeader(); |
| int len = strlen(str) + 1; |
| int this_used = GetHeader()->used_; |
| char* this_cstr = ensure_cstr(this_used + len); |
| STRING_HEADER* this_header = GetHeader(); // after ensure for realloc |
| |
| // if we had non-empty string then append overwriting old '\0' |
| // otherwise replace |
| if (this_used > 0) { |
| memcpy(this_cstr + this_used - 1, str, len); |
| this_header->used_ += len - 1; |
| } else { |
| memcpy(this_cstr, str, len); |
| this_header->used_ = len; |
| } |
| |
| CHECK_INVARIANT(this); |
| return *this; |
| } |
| |
| |
| STRING& STRING::operator+=(const char ch) { |
| if (ch == '\0') |
| return *this; |
| |
| FixHeader(); |
| int this_used = GetHeader()->used_; |
| char* this_cstr = ensure_cstr(this_used + 1); |
| STRING_HEADER* this_header = GetHeader(); |
| |
| if (this_used > 0) |
| --this_used; // undo old empty null if there was one |
| |
| this_cstr[this_used++] = ch; // append ch to end |
| this_cstr[this_used++] = '\0'; // append '\0' after ch |
| this_header->used_ = this_used; |
| |
| CHECK_INVARIANT(this); |
| return *this; |
| } |