| /** \file |
| * Implementation of the ANTLR3 string and string factory classes |
| */ |
| |
| // [The "BSD licence"] |
| // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC |
| // http://www.temporal-wave.com |
| // http://www.linkedin.com/in/jimidle |
| // |
| // All rights reserved. |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions |
| // are met: |
| // 1. Redistributions of source code must retain the above copyright |
| // notice, this list of conditions and the following disclaimer. |
| // 2. Redistributions in binary form must reproduce the above copyright |
| // notice, this list of conditions and the following disclaimer in the |
| // documentation and/or other materials provided with the distribution. |
| // 3. The name of the author may not be used to endorse or promote products |
| // derived from this software without specific prior written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
| // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
| // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
| // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
| // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| #include <antlr3string.h> |
| |
| /* Factory API |
| */ |
| static pANTLR3_STRING newRaw8 (pANTLR3_STRING_FACTORY factory); |
| static pANTLR3_STRING newRawUTF16 (pANTLR3_STRING_FACTORY factory); |
| static pANTLR3_STRING newSize8 (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size); |
| static pANTLR3_STRING newSizeUTF16 (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size); |
| static pANTLR3_STRING newPtr8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size); |
| static pANTLR3_STRING newPtrUTF16_8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size); |
| static pANTLR3_STRING newPtrUTF16_UTF16 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size); |
| static pANTLR3_STRING newStr8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string); |
| static pANTLR3_STRING newStrUTF16_8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string); |
| static pANTLR3_STRING newStrUTF16_UTF16 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string); |
| static void destroy (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string); |
| static pANTLR3_STRING printable8 (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string); |
| static pANTLR3_STRING printableUTF16 (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string); |
| static void closeFactory(pANTLR3_STRING_FACTORY factory); |
| |
| /* String API |
| */ |
| static pANTLR3_UINT8 set8 (pANTLR3_STRING string, const char * chars); |
| static pANTLR3_UINT8 setUTF16_8 (pANTLR3_STRING string, const char * chars); |
| static pANTLR3_UINT8 setUTF16_UTF16 (pANTLR3_STRING string, const char * chars); |
| static pANTLR3_UINT8 append8 (pANTLR3_STRING string, const char * newbit); |
| static pANTLR3_UINT8 appendUTF16_8 (pANTLR3_STRING string, const char * newbit); |
| static pANTLR3_UINT8 appendUTF16_UTF16 (pANTLR3_STRING string, const char * newbit); |
| static pANTLR3_UINT8 insert8 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit); |
| static pANTLR3_UINT8 insertUTF16_8 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit); |
| static pANTLR3_UINT8 insertUTF16_UTF16 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit); |
| |
| static pANTLR3_UINT8 setS (pANTLR3_STRING string, pANTLR3_STRING chars); |
| static pANTLR3_UINT8 appendS (pANTLR3_STRING string, pANTLR3_STRING newbit); |
| static pANTLR3_UINT8 insertS (pANTLR3_STRING string, ANTLR3_UINT32 point, pANTLR3_STRING newbit); |
| |
| static pANTLR3_UINT8 addc8 (pANTLR3_STRING string, ANTLR3_UINT32 c); |
| static pANTLR3_UINT8 addcUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 c); |
| static pANTLR3_UINT8 addi8 (pANTLR3_STRING string, ANTLR3_INT32 i); |
| static pANTLR3_UINT8 addiUTF16 (pANTLR3_STRING string, ANTLR3_INT32 i); |
| static pANTLR3_UINT8 inserti8 (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i); |
| static pANTLR3_UINT8 insertiUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i); |
| |
| static ANTLR3_UINT32 compare8 (pANTLR3_STRING string, const char * compStr); |
| static ANTLR3_UINT32 compareUTF16_8 (pANTLR3_STRING string, const char * compStr); |
| static ANTLR3_UINT32 compareUTF16_UTF16(pANTLR3_STRING string, const char * compStr); |
| static ANTLR3_UINT32 compareS (pANTLR3_STRING string, pANTLR3_STRING compStr); |
| static ANTLR3_UCHAR charAt8 (pANTLR3_STRING string, ANTLR3_UINT32 offset); |
| static ANTLR3_UCHAR charAtUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 offset); |
| static pANTLR3_STRING subString8 (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex); |
| static pANTLR3_STRING subStringUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex); |
| static ANTLR3_INT32 toInt32_8 (pANTLR3_STRING string); |
| static ANTLR3_INT32 toInt32_UTF16 (pANTLR3_STRING string); |
| static pANTLR3_STRING to8_8 (pANTLR3_STRING string); |
| static pANTLR3_STRING to8_UTF16 (pANTLR3_STRING string); |
| static pANTLR3_STRING toUTF8_8 (pANTLR3_STRING string); |
| static pANTLR3_STRING toUTF8_UTF16 (pANTLR3_STRING string); |
| |
| /* Local helpers |
| */ |
| static void stringInit8 (pANTLR3_STRING string); |
| static void stringInitUTF16 (pANTLR3_STRING string); |
| static void ANTLR3_CDECL stringFree (pANTLR3_STRING string); |
| |
| ANTLR3_API pANTLR3_STRING_FACTORY |
| antlr3StringFactoryNew(ANTLR3_UINT32 encoding) |
| { |
| pANTLR3_STRING_FACTORY factory; |
| |
| /* Allocate memory |
| */ |
| factory = (pANTLR3_STRING_FACTORY) ANTLR3_CALLOC(1, sizeof(ANTLR3_STRING_FACTORY)); |
| |
| if (factory == NULL) |
| { |
| return NULL; |
| } |
| |
| /* Now we make a new list to track the strings. |
| */ |
| factory->strings = antlr3VectorNew(0); |
| factory->index = 0; |
| |
| if (factory->strings == NULL) |
| { |
| ANTLR3_FREE(factory); |
| return NULL; |
| } |
| |
| // Install the API |
| // |
| // TODO: These encodings need equivalent functions to |
| // UTF16 and 8Bit if I am going to support those encodings in the STRING stuff. |
| // The STRING stuff was intended as a quick and dirty hack for people that did not |
| // want to worry about memory and performance very much, but nobody ever reads the |
| // notes or comments or uses the email list search. I want to discourage using these |
| // interfaces as it is much more efficient to use the pointers within the tokens |
| // directly, so I am not implementing the string stuff for the newer encodings. |
| // We install the standard 8 and 16 bit functions for the UTF 8 and 16 but they |
| // will not be useful beyond returning the text. |
| // |
| switch(encoding) |
| { |
| case ANTLR3_ENC_UTF32: |
| break; |
| |
| case ANTLR3_ENC_UTF32BE: |
| break; |
| |
| case ANTLR3_ENC_UTF32LE: |
| break; |
| |
| case ANTLR3_ENC_UTF16BE: |
| case ANTLR3_ENC_UTF16LE: |
| case ANTLR3_ENC_UTF16: |
| |
| factory->newRaw = newRawUTF16; |
| factory->newSize = newSizeUTF16; |
| factory->newPtr = newPtrUTF16_UTF16; |
| factory->newPtr8 = newPtrUTF16_8; |
| factory->newStr = newStrUTF16_UTF16; |
| factory->newStr8 = newStrUTF16_8; |
| factory->printable = printableUTF16; |
| factory->destroy = destroy; |
| factory->close = closeFactory; |
| break; |
| |
| case ANTLR3_ENC_UTF8: |
| case ANTLR3_ENC_EBCDIC: |
| case ANTLR3_ENC_8BIT: |
| default: |
| |
| factory->newRaw = newRaw8; |
| factory->newSize = newSize8; |
| factory->newPtr = newPtr8; |
| factory->newPtr8 = newPtr8; |
| factory->newStr = newStr8; |
| factory->newStr8 = newStr8; |
| factory->printable = printable8; |
| factory->destroy = destroy; |
| factory->close = closeFactory; |
| break; |
| } |
| return factory; |
| } |
| |
| |
| /** |
| * |
| * \param factory |
| * \return |
| */ |
| static pANTLR3_STRING |
| newRaw8 (pANTLR3_STRING_FACTORY factory) |
| { |
| pANTLR3_STRING string; |
| |
| string = (pANTLR3_STRING) ANTLR3_MALLOC(sizeof(ANTLR3_STRING)); |
| |
| if (string == NULL) |
| { |
| return NULL; |
| } |
| |
| /* Structure is allocated, now fill in the API etc. |
| */ |
| stringInit8(string); |
| string->factory = factory; |
| |
| /* Add the string into the allocated list |
| */ |
| factory->strings->set(factory->strings, factory->index, (void *) string, (void (ANTLR3_CDECL *)(void *))(stringFree), ANTLR3_TRUE); |
| string->index = factory->index++; |
| |
| return string; |
| } |
| /** |
| * |
| * \param factory |
| * \return |
| */ |
| static pANTLR3_STRING |
| newRawUTF16 (pANTLR3_STRING_FACTORY factory) |
| { |
| pANTLR3_STRING string; |
| |
| string = (pANTLR3_STRING) ANTLR3_MALLOC(sizeof(ANTLR3_STRING)); |
| |
| if (string == NULL) |
| { |
| return NULL; |
| } |
| |
| /* Structure is allocated, now fill in the API etc. |
| */ |
| stringInitUTF16(string); |
| string->factory = factory; |
| |
| /* Add the string into the allocated list |
| */ |
| factory->strings->set(factory->strings, factory->index, (void *) string, (void (ANTLR3_CDECL *)(void *))(stringFree), ANTLR3_TRUE); |
| string->index = factory->index++; |
| |
| return string; |
| } |
| static |
| void ANTLR3_CDECL stringFree (pANTLR3_STRING string) |
| { |
| /* First free the string itself if there was anything in it |
| */ |
| if (string->chars) |
| { |
| ANTLR3_FREE(string->chars); |
| } |
| |
| /* Now free the space for this string |
| */ |
| ANTLR3_FREE(string); |
| |
| return; |
| } |
| /** |
| * |
| * \param string |
| * \return |
| */ |
| static void |
| stringInit8 (pANTLR3_STRING string) |
| { |
| string->len = 0; |
| string->size = 0; |
| string->chars = NULL; |
| string->encoding = ANTLR3_ENC_8BIT ; |
| |
| /* API for 8 bit strings*/ |
| |
| string->set = set8; |
| string->set8 = set8; |
| string->append = append8; |
| string->append8 = append8; |
| string->insert = insert8; |
| string->insert8 = insert8; |
| string->addi = addi8; |
| string->inserti = inserti8; |
| string->addc = addc8; |
| string->charAt = charAt8; |
| string->compare = compare8; |
| string->compare8 = compare8; |
| string->subString = subString8; |
| string->toInt32 = toInt32_8; |
| string->to8 = to8_8; |
| string->toUTF8 = toUTF8_8; |
| string->compareS = compareS; |
| string->setS = setS; |
| string->appendS = appendS; |
| string->insertS = insertS; |
| |
| } |
| /** |
| * |
| * \param string |
| * \return |
| */ |
| static void |
| stringInitUTF16 (pANTLR3_STRING string) |
| { |
| string->len = 0; |
| string->size = 0; |
| string->chars = NULL; |
| string->encoding = ANTLR3_ENC_8BIT; |
| |
| /* API for UTF16 strings */ |
| |
| string->set = setUTF16_UTF16; |
| string->set8 = setUTF16_8; |
| string->append = appendUTF16_UTF16; |
| string->append8 = appendUTF16_8; |
| string->insert = insertUTF16_UTF16; |
| string->insert8 = insertUTF16_8; |
| string->addi = addiUTF16; |
| string->inserti = insertiUTF16; |
| string->addc = addcUTF16; |
| string->charAt = charAtUTF16; |
| string->compare = compareUTF16_UTF16; |
| string->compare8 = compareUTF16_8; |
| string->subString = subStringUTF16; |
| string->toInt32 = toInt32_UTF16; |
| string->to8 = to8_UTF16; |
| string->toUTF8 = toUTF8_UTF16; |
| |
| string->compareS = compareS; |
| string->setS = setS; |
| string->appendS = appendS; |
| string->insertS = insertS; |
| } |
| /** |
| * |
| * \param string |
| * \return |
| * TODO: Implement UTF-8 |
| */ |
| static void |
| stringInitUTF8 (pANTLR3_STRING string) |
| { |
| string->len = 0; |
| string->size = 0; |
| string->chars = NULL; |
| |
| /* API */ |
| |
| } |
| |
| // Convert an 8 bit string into a UTF8 representation, which is in fact just the string itself |
| // a memcpy as we make no assumptions about the 8 bit encoding. |
| // |
| static pANTLR3_STRING |
| toUTF8_8 (pANTLR3_STRING string) |
| { |
| return string->factory->newPtr(string->factory, (pANTLR3_UINT8)(string->chars), string->len); |
| } |
| |
| // Convert a UTF16 string into a UTF8 representation using the Unicode.org |
| // supplied C algorithms, which are now contained within the ANTLR3 C runtime |
| // as permitted by the Unicode license (within the source code antlr3convertutf.c/.h |
| // UCS2 has the same encoding as UTF16 so we can use UTF16 converter. |
| // |
| static pANTLR3_STRING |
| toUTF8_UTF16 (pANTLR3_STRING string) |
| { |
| |
| UTF8 * outputEnd; |
| UTF16 * inputEnd; |
| pANTLR3_STRING utf8String; |
| |
| ConversionResult cResult; |
| |
| // Allocate the output buffer, which needs to accommodate potentially |
| // 3X (in bytes) the input size (in chars). |
| // |
| utf8String = string->factory->newStr8(string->factory, (pANTLR3_UINT8)""); |
| |
| if (utf8String != NULL) |
| { |
| // Free existing allocation |
| // |
| ANTLR3_FREE(utf8String->chars); |
| |
| // Reallocate according to maximum expected size |
| // |
| utf8String->size = string->len *3; |
| utf8String->chars = (pANTLR3_UINT8)ANTLR3_MALLOC(utf8String->size +1); |
| |
| if (utf8String->chars != NULL) |
| { |
| inputEnd = (UTF16 *) (string->chars); |
| outputEnd = (UTF8 *) (utf8String->chars); |
| |
| // Call the Unicode converter |
| // |
| cResult = ConvertUTF16toUTF8 |
| ( |
| (const UTF16**)&inputEnd, |
| ((const UTF16 *)(string->chars)) + string->len, |
| &outputEnd, |
| outputEnd + utf8String->size - 1, |
| lenientConversion |
| ); |
| |
| // We don't really care if things failed or not here, we just converted |
| // everything that was vaguely possible and stopped when it wasn't. It is |
| // up to the grammar programmer to verify that the input is sensible. |
| // |
| utf8String->len = ANTLR3_UINT32_CAST(((pANTLR3_UINT8)outputEnd) - utf8String->chars); |
| |
| *(outputEnd+1) = '\0'; // Always null terminate |
| } |
| } |
| return utf8String; |
| } |
| |
| /** |
| * Creates a new string with enough capacity for size 8 bit characters plus a terminator. |
| * |
| * \param[in] factory - Pointer to the string factory that owns strings |
| * \param[in] size - In characters |
| * \return pointer to the new string. |
| */ |
| static pANTLR3_STRING |
| newSize8 (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size) |
| { |
| pANTLR3_STRING string; |
| |
| string = factory->newRaw(factory); |
| |
| if (string == NULL) |
| { |
| return string; |
| } |
| |
| /* Always add one more byte for a terminator ;-) |
| */ |
| string->chars = (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(sizeof(ANTLR3_UINT8) * (size+1))); |
| *(string->chars) = '\0'; |
| string->size = size + 1; |
| |
| |
| return string; |
| } |
| /** |
| * Creates a new string with enough capacity for size UTF16 characters plus a terminator. |
| * |
| * \param[in] factory - Pointer to the string factory that owns strings |
| * \param[in] size - In characters (count double for surrogate pairs!!!) |
| * \return pointer to the new string. |
| */ |
| static pANTLR3_STRING |
| newSizeUTF16 (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size) |
| { |
| pANTLR3_STRING string; |
| |
| string = factory->newRaw(factory); |
| |
| if (string == NULL) |
| { |
| return string; |
| } |
| |
| /* Always add one more byte for a terminator ;-) |
| */ |
| string->chars = (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(sizeof(ANTLR3_UINT16) * (size+1))); |
| *(string->chars) = '\0'; |
| string->size = size+1; /* Size is always in characters, as is len */ |
| |
| return string; |
| } |
| |
| /** Creates a new 8 bit string initialized with the 8 bit characters at the |
| * supplied ptr, of pre-determined size. |
| * \param[in] factory - Pointer to the string factory that owns the strings |
| * \param[in] ptr - Pointer to 8 bit encoded characters |
| * \return pointer to the new string |
| */ |
| static pANTLR3_STRING |
| newPtr8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size) |
| { |
| pANTLR3_STRING string; |
| |
| string = factory->newSize(factory, size); |
| |
| if (string == NULL) |
| { |
| return NULL; |
| } |
| |
| if (size <= 0) |
| { |
| return string; |
| } |
| |
| if (ptr != NULL) |
| { |
| ANTLR3_MEMMOVE(string->chars, (const void *)ptr, size); |
| *(string->chars + size) = '\0'; /* Terminate, these strings are usually used for Token streams and printing etc. */ |
| string->len = size; |
| } |
| |
| return string; |
| } |
| |
| /** Creates a new UTF16 string initialized with the 8 bit characters at the |
| * supplied 8 bit character ptr, of pre-determined size. |
| * \param[in] factory - Pointer to the string factory that owns the strings |
| * \param[in] ptr - Pointer to 8 bit encoded characters |
| * \return pointer to the new string |
| */ |
| static pANTLR3_STRING |
| newPtrUTF16_8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size) |
| { |
| pANTLR3_STRING string; |
| |
| /* newSize accepts size in characters, not bytes |
| */ |
| string = factory->newSize(factory, size); |
| |
| if (string == NULL) |
| { |
| return NULL; |
| } |
| |
| if (size <= 0) |
| { |
| return string; |
| } |
| |
| if (ptr != NULL) |
| { |
| pANTLR3_UINT16 out; |
| ANTLR3_INT32 inSize; |
| |
| out = (pANTLR3_UINT16)(string->chars); |
| inSize = size; |
| |
| while (inSize-- > 0) |
| { |
| *out++ = (ANTLR3_UINT16)(*ptr++); |
| } |
| |
| /* Terminate, these strings are usually used for Token streams and printing etc. |
| */ |
| *(((pANTLR3_UINT16)(string->chars)) + size) = '\0'; |
| |
| string->len = size; |
| } |
| |
| return string; |
| } |
| |
| /** Creates a new UTF16 string initialized with the UTF16 characters at the |
| * supplied ptr, of pre-determined size. |
| * \param[in] factory - Pointer to the string factory that owns the strings |
| * \param[in] ptr - Pointer to UTF16 encoded characters |
| * \return pointer to the new string |
| */ |
| static pANTLR3_STRING |
| newPtrUTF16_UTF16 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size) |
| { |
| pANTLR3_STRING string; |
| |
| string = factory->newSize(factory, size); |
| |
| if (string == NULL) |
| { |
| return NULL; |
| } |
| |
| if (size <= 0) |
| { |
| return string; |
| } |
| |
| if (ptr != NULL) |
| { |
| ANTLR3_MEMMOVE(string->chars, (const void *)ptr, (size * sizeof(ANTLR3_UINT16))); |
| |
| /* Terminate, these strings are usually used for Token streams and printing etc. |
| */ |
| *(((pANTLR3_UINT16)(string->chars)) + size) = '\0'; |
| string->len = size; |
| } |
| |
| return string; |
| } |
| |
| /** Create a new 8 bit string from the supplied, null terminated, 8 bit string pointer. |
| * \param[in] factory - Pointer to the string factory that owns strings. |
| * \param[in] ptr - Pointer to the 8 bit encoded string |
| * \return Pointer to the newly initialized string |
| */ |
| static pANTLR3_STRING |
| newStr8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr) |
| { |
| return factory->newPtr8(factory, ptr, (ANTLR3_UINT32)strlen((const char *)ptr)); |
| } |
| |
| /** Create a new UTF16 string from the supplied, null terminated, 8 bit string pointer. |
| * \param[in] factory - Pointer to the string factory that owns strings. |
| * \param[in] ptr - Pointer to the 8 bit encoded string |
| * \return POinter to the newly initialized string |
| */ |
| static pANTLR3_STRING |
| newStrUTF16_8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr) |
| { |
| return factory->newPtr8(factory, ptr, (ANTLR3_UINT32)strlen((const char *)ptr)); |
| } |
| |
| /** Create a new UTF16 string from the supplied, null terminated, UTF16 string pointer. |
| * \param[in] factory - Pointer to the string factory that owns strings. |
| * \param[in] ptr - Pointer to the UTF16 encoded string |
| * \return Pointer to the newly initialized string |
| */ |
| static pANTLR3_STRING |
| newStrUTF16_UTF16 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr) |
| { |
| pANTLR3_UINT16 in; |
| ANTLR3_UINT32 count; |
| |
| /** First, determine the length of the input string |
| */ |
| in = (pANTLR3_UINT16)ptr; |
| count = 0; |
| |
| while (*in++ != '\0') |
| { |
| count++; |
| } |
| return factory->newPtr(factory, ptr, count); |
| } |
| |
| static void |
| destroy (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string) |
| { |
| // Record which string we are deleting |
| // |
| ANTLR3_UINT32 strIndex = string->index; |
| |
| // Ensure that the string was not factory made, or we would try |
| // to delete memory that wasn't allocated outside the factory |
| // block. |
| // Remove the specific indexed string from the vector |
| // |
| factory->strings->del(factory->strings, strIndex); |
| |
| // One less string in the vector, so decrement the factory index |
| // so that the next string allocated is indexed correctly with |
| // respect to the vector. |
| // |
| factory->index--; |
| |
| // Now we have to reindex the strings in the vector that followed |
| // the one we just deleted. We only do this if the one we just deleted |
| // was not the last one. |
| // |
| if (strIndex< factory->index) |
| { |
| // We must reindex the strings after the one we just deleted. |
| // The one that follows the one we just deleted is also out |
| // of whack, so we start there. |
| // |
| ANTLR3_UINT32 i; |
| |
| for (i = strIndex; i < factory->index; i++) |
| { |
| // Renumber the entry |
| // |
| ((pANTLR3_STRING)(factory->strings->elements[i].element))->index = i; |
| } |
| } |
| |
| // The string has been destroyed and the elements of the factory are reindexed. |
| // |
| |
| } |
| |
| static pANTLR3_STRING |
| printable8(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING instr) |
| { |
| pANTLR3_STRING string; |
| |
| /* We don't need to be too efficient here, this is mostly for error messages and so on. |
| */ |
| pANTLR3_UINT8 scannedText; |
| ANTLR3_UINT32 i; |
| |
| /* Assume we need as much as twice as much space to parse out the control characters |
| */ |
| string = factory->newSize(factory, instr->len *2 + 1); |
| |
| /* Scan through and replace unprintable (in terms of this routine) |
| * characters |
| */ |
| scannedText = string->chars; |
| |
| for (i = 0; i < instr->len; i++) |
| { |
| if (*(instr->chars + i) == '\n') |
| { |
| *scannedText++ = '\\'; |
| *scannedText++ = 'n'; |
| } |
| else if (*(instr->chars + i) == '\r') |
| { |
| *scannedText++ = '\\'; |
| *scannedText++ = 'r'; |
| } |
| else if (!isprint(*(instr->chars +i))) |
| { |
| *scannedText++ = '?'; |
| } |
| else |
| { |
| *scannedText++ = *(instr->chars + i); |
| } |
| } |
| *scannedText = '\0'; |
| |
| string->len = (ANTLR3_UINT32)(scannedText - string->chars); |
| |
| return string; |
| } |
| |
| static pANTLR3_STRING |
| printableUTF16(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING instr) |
| { |
| pANTLR3_STRING string; |
| |
| /* We don't need to be too efficient here, this is mostly for error messages and so on. |
| */ |
| pANTLR3_UINT16 scannedText; |
| pANTLR3_UINT16 inText; |
| ANTLR3_UINT32 i; |
| ANTLR3_UINT32 outLen; |
| |
| /* Assume we need as much as twice as much space to parse out the control characters |
| */ |
| string = factory->newSize(factory, instr->len *2 + 1); |
| |
| /* Scan through and replace unprintable (in terms of this routine) |
| * characters |
| */ |
| scannedText = (pANTLR3_UINT16)(string->chars); |
| inText = (pANTLR3_UINT16)(instr->chars); |
| outLen = 0; |
| |
| for (i = 0; i < instr->len; i++) |
| { |
| if (*(inText + i) == '\n') |
| { |
| *scannedText++ = '\\'; |
| *scannedText++ = 'n'; |
| outLen += 2; |
| } |
| else if (*(inText + i) == '\r') |
| { |
| *scannedText++ = '\\'; |
| *scannedText++ = 'r'; |
| outLen += 2; |
| } |
| else if (!isprint(*(inText +i))) |
| { |
| *scannedText++ = '?'; |
| outLen++; |
| } |
| else |
| { |
| *scannedText++ = *(inText + i); |
| outLen++; |
| } |
| } |
| *scannedText = '\0'; |
| |
| string->len = outLen; |
| |
| return string; |
| } |
| |
| /** Fascist Capitalist Pig function created |
| * to oppress the workers comrade. |
| */ |
| static void |
| closeFactory (pANTLR3_STRING_FACTORY factory) |
| { |
| /* Delete the vector we were tracking the strings with, this will |
| * causes all the allocated strings to be deallocated too |
| */ |
| factory->strings->free(factory->strings); |
| |
| /* Delete the space for the factory itself |
| */ |
| ANTLR3_FREE((void *)factory); |
| } |
| |
| static pANTLR3_UINT8 |
| append8 (pANTLR3_STRING string, const char * newbit) |
| { |
| ANTLR3_UINT32 len; |
| |
| len = (ANTLR3_UINT32)strlen(newbit); |
| |
| if (string->size < (string->len + len + 1)) |
| { |
| string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + len + 1)); |
| string->size = string->len + len + 1; |
| } |
| |
| /* Note we copy one more byte than the strlen in order to get the trailing |
| */ |
| ANTLR3_MEMMOVE((void *)(string->chars + string->len), newbit, (ANTLR3_UINT32)(len+1)); |
| string->len += len; |
| |
| return string->chars; |
| } |
| |
| static pANTLR3_UINT8 |
| appendUTF16_8 (pANTLR3_STRING string, const char * newbit) |
| { |
| ANTLR3_UINT32 len; |
| pANTLR3_UINT16 apPoint; |
| ANTLR3_UINT32 count; |
| |
| len = (ANTLR3_UINT32)strlen(newbit); |
| |
| if (string->size < (string->len + len + 1)) |
| { |
| string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)((sizeof(ANTLR3_UINT16)*(string->len + len + 1)))); |
| string->size = string->len + len + 1; |
| } |
| |
| apPoint = ((pANTLR3_UINT16)string->chars) + string->len; |
| string->len += len; |
| |
| for (count = 0; count < len; count++) |
| { |
| *apPoint++ = *(newbit + count); |
| } |
| *apPoint = '\0'; |
| |
| return string->chars; |
| } |
| |
| static pANTLR3_UINT8 |
| appendUTF16_UTF16 (pANTLR3_STRING string, const char * newbit) |
| { |
| ANTLR3_UINT32 len; |
| pANTLR3_UINT16 in; |
| |
| /** First, determine the length of the input string |
| */ |
| in = (pANTLR3_UINT16)newbit; |
| len = 0; |
| |
| while (*in++ != '\0') |
| { |
| len++; |
| } |
| |
| if (string->size < (string->len + len + 1)) |
| { |
| string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)( sizeof(ANTLR3_UINT16) *(string->len + len + 1) )); |
| string->size = string->len + len + 1; |
| } |
| |
| /* Note we copy one more byte than the strlen in order to get the trailing delimiter |
| */ |
| ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + string->len), newbit, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len+1))); |
| string->len += len; |
| |
| return string->chars; |
| } |
| |
| static pANTLR3_UINT8 |
| set8 (pANTLR3_STRING string, const char * chars) |
| { |
| ANTLR3_UINT32 len; |
| |
| len = (ANTLR3_UINT32)strlen(chars); |
| if (string->size < len + 1) |
| { |
| string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(len + 1)); |
| string->size = len + 1; |
| } |
| |
| /* Note we copy one more byte than the strlen in order to get the trailing '\0' |
| */ |
| ANTLR3_MEMMOVE((void *)(string->chars), chars, (ANTLR3_UINT32)(len+1)); |
| string->len = len; |
| |
| return string->chars; |
| |
| } |
| |
| static pANTLR3_UINT8 |
| setUTF16_8 (pANTLR3_STRING string, const char * chars) |
| { |
| ANTLR3_UINT32 len; |
| ANTLR3_UINT32 count; |
| pANTLR3_UINT16 apPoint; |
| |
| len = (ANTLR3_UINT32)strlen(chars); |
| if (string->size < len + 1) |
| { |
| string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len + 1))); |
| string->size = len + 1; |
| } |
| apPoint = ((pANTLR3_UINT16)string->chars); |
| string->len = len; |
| |
| for (count = 0; count < string->len; count++) |
| { |
| *apPoint++ = *(chars + count); |
| } |
| *apPoint = '\0'; |
| |
| return string->chars; |
| } |
| |
| static pANTLR3_UINT8 |
| setUTF16_UTF16 (pANTLR3_STRING string, const char * chars) |
| { |
| ANTLR3_UINT32 len; |
| pANTLR3_UINT16 in; |
| |
| /** First, determine the length of the input string |
| */ |
| in = (pANTLR3_UINT16)chars; |
| len = 0; |
| |
| while (*in++ != '\0') |
| { |
| len++; |
| } |
| |
| if (string->size < len + 1) |
| { |
| string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len + 1))); |
| string->size = len + 1; |
| } |
| |
| /* Note we copy one more byte than the strlen in order to get the trailing '\0' |
| */ |
| ANTLR3_MEMMOVE((void *)(string->chars), chars, (ANTLR3_UINT32)((len+1) * sizeof(ANTLR3_UINT16))); |
| string->len = len; |
| |
| return string->chars; |
| |
| } |
| |
| static pANTLR3_UINT8 |
| addc8 (pANTLR3_STRING string, ANTLR3_UINT32 c) |
| { |
| if (string->size < string->len + 2) |
| { |
| string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + 2)); |
| string->size = string->len + 2; |
| } |
| *(string->chars + string->len) = (ANTLR3_UINT8)c; |
| *(string->chars + string->len + 1) = '\0'; |
| string->len++; |
| |
| return string->chars; |
| } |
| |
| static pANTLR3_UINT8 |
| addcUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 c) |
| { |
| pANTLR3_UINT16 ptr; |
| |
| if (string->size < string->len + 2) |
| { |
| string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16) * (string->len + 2))); |
| string->size = string->len + 2; |
| } |
| ptr = (pANTLR3_UINT16)(string->chars); |
| |
| *(ptr + string->len) = (ANTLR3_UINT16)c; |
| *(ptr + string->len + 1) = '\0'; |
| string->len++; |
| |
| return string->chars; |
| } |
| |
| static pANTLR3_UINT8 |
| addi8 (pANTLR3_STRING string, ANTLR3_INT32 i) |
| { |
| ANTLR3_UINT8 newbit[32]; |
| |
| sprintf((char *)newbit, "%d", i); |
| |
| return string->append8(string, (const char *)newbit); |
| } |
| static pANTLR3_UINT8 |
| addiUTF16 (pANTLR3_STRING string, ANTLR3_INT32 i) |
| { |
| ANTLR3_UINT8 newbit[32]; |
| |
| sprintf((char *)newbit, "%d", i); |
| |
| return string->append8(string, (const char *)newbit); |
| } |
| |
| static pANTLR3_UINT8 |
| inserti8 (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i) |
| { |
| ANTLR3_UINT8 newbit[32]; |
| |
| sprintf((char *)newbit, "%d", i); |
| return string->insert8(string, point, (const char *)newbit); |
| } |
| static pANTLR3_UINT8 |
| insertiUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i) |
| { |
| ANTLR3_UINT8 newbit[32]; |
| |
| sprintf((char *)newbit, "%d", i); |
| return string->insert8(string, point, (const char *)newbit); |
| } |
| |
| static pANTLR3_UINT8 |
| insert8 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit) |
| { |
| ANTLR3_UINT32 len; |
| |
| if (point >= string->len) |
| { |
| return string->append(string, newbit); |
| } |
| |
| len = (ANTLR3_UINT32)strlen(newbit); |
| |
| if (len == 0) |
| { |
| return string->chars; |
| } |
| |
| if (string->size < (string->len + len + 1)) |
| { |
| string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + len + 1)); |
| string->size = string->len + len + 1; |
| } |
| |
| /* Move the characters we are inserting before, including the delimiter |
| */ |
| ANTLR3_MEMMOVE((void *)(string->chars + point + len), (void *)(string->chars + point), (ANTLR3_UINT32)(string->len - point + 1)); |
| |
| /* Note we copy the exact number of bytes |
| */ |
| ANTLR3_MEMMOVE((void *)(string->chars + point), newbit, (ANTLR3_UINT32)(len)); |
| |
| string->len += len; |
| |
| return string->chars; |
| } |
| |
| static pANTLR3_UINT8 |
| insertUTF16_8 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit) |
| { |
| ANTLR3_UINT32 len; |
| ANTLR3_UINT32 count; |
| pANTLR3_UINT16 inPoint; |
| |
| if (point >= string->len) |
| { |
| return string->append8(string, newbit); |
| } |
| |
| len = (ANTLR3_UINT32)strlen(newbit); |
| |
| if (len == 0) |
| { |
| return string->chars; |
| } |
| |
| if (string->size < (string->len + len + 1)) |
| { |
| string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len + len + 1))); |
| string->size = string->len + len + 1; |
| } |
| |
| /* Move the characters we are inserting before, including the delimiter |
| */ |
| ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point + len), (void *)(((pANTLR3_UINT16)string->chars) + point), (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len - point + 1))); |
| |
| string->len += len; |
| |
| inPoint = ((pANTLR3_UINT16)(string->chars))+point; |
| for (count = 0; count<len; count++) |
| { |
| *(inPoint + count) = (ANTLR3_UINT16)(*(newbit+count)); |
| } |
| |
| return string->chars; |
| } |
| |
| static pANTLR3_UINT8 |
| insertUTF16_UTF16 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit) |
| { |
| ANTLR3_UINT32 len; |
| pANTLR3_UINT16 in; |
| |
| if (point >= string->len) |
| { |
| return string->append(string, newbit); |
| } |
| |
| /** First, determine the length of the input string |
| */ |
| in = (pANTLR3_UINT16)newbit; |
| len = 0; |
| |
| while (*in++ != '\0') |
| { |
| len++; |
| } |
| |
| if (len == 0) |
| { |
| return string->chars; |
| } |
| |
| if (string->size < (string->len + len + 1)) |
| { |
| string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len + len + 1))); |
| string->size = string->len + len + 1; |
| } |
| |
| /* Move the characters we are inserting before, including the delimiter |
| */ |
| ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point + len), (void *)(((pANTLR3_UINT16)string->chars) + point), (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len - point + 1))); |
| |
| |
| /* Note we copy the exact number of characters |
| */ |
| ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point), newbit, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len))); |
| |
| string->len += len; |
| |
| return string->chars; |
| } |
| |
| static pANTLR3_UINT8 setS (pANTLR3_STRING string, pANTLR3_STRING chars) |
| { |
| return string->set(string, (const char *)(chars->chars)); |
| } |
| |
| static pANTLR3_UINT8 appendS (pANTLR3_STRING string, pANTLR3_STRING newbit) |
| { |
| /* We may be passed an empty string, in which case we just return the current pointer |
| */ |
| if (newbit == NULL || newbit->len == 0 || newbit->size == 0 || newbit->chars == NULL) |
| { |
| return string->chars; |
| } |
| else |
| { |
| return string->append(string, (const char *)(newbit->chars)); |
| } |
| } |
| |
| static pANTLR3_UINT8 insertS (pANTLR3_STRING string, ANTLR3_UINT32 point, pANTLR3_STRING newbit) |
| { |
| return string->insert(string, point, (const char *)(newbit->chars)); |
| } |
| |
| /* Function that compares the text of a string to the supplied |
| * 8 bit character string and returns a result a la strcmp() |
| */ |
| static ANTLR3_UINT32 |
| compare8 (pANTLR3_STRING string, const char * compStr) |
| { |
| return strcmp((const char *)(string->chars), compStr); |
| } |
| |
| /* Function that compares the text of a string with the supplied character string |
| * (which is assumed to be in the same encoding as the string itself) and returns a result |
| * a la strcmp() |
| */ |
| static ANTLR3_UINT32 |
| compareUTF16_8 (pANTLR3_STRING string, const char * compStr) |
| { |
| pANTLR3_UINT16 ourString; |
| ANTLR3_UINT32 charDiff; |
| |
| ourString = (pANTLR3_UINT16)(string->chars); |
| |
| while (((ANTLR3_UCHAR)(*ourString) != '\0') && ((ANTLR3_UCHAR)(*compStr) != '\0')) |
| { |
| charDiff = *ourString - *compStr; |
| if (charDiff != 0) |
| { |
| return charDiff; |
| } |
| ourString++; |
| compStr++; |
| } |
| |
| /* At this point, one of the strings was terminated |
| */ |
| return (ANTLR3_UINT32)((ANTLR3_UCHAR)(*ourString) - (ANTLR3_UCHAR)(*compStr)); |
| |
| } |
| |
| /* Function that compares the text of a string with the supplied character string |
| * (which is assumed to be in the same encoding as the string itself) and returns a result |
| * a la strcmp() |
| */ |
| static ANTLR3_UINT32 |
| compareUTF16_UTF16 (pANTLR3_STRING string, const char * compStr8) |
| { |
| pANTLR3_UINT16 ourString; |
| pANTLR3_UINT16 compStr; |
| ANTLR3_UINT32 charDiff; |
| |
| ourString = (pANTLR3_UINT16)(string->chars); |
| compStr = (pANTLR3_UINT16)(compStr8); |
| |
| while (((ANTLR3_UCHAR)(*ourString) != '\0') && ((ANTLR3_UCHAR)(*((pANTLR3_UINT16)compStr)) != '\0')) |
| { |
| charDiff = *ourString - *compStr; |
| if (charDiff != 0) |
| { |
| return charDiff; |
| } |
| ourString++; |
| compStr++; |
| } |
| |
| /* At this point, one of the strings was terminated |
| */ |
| return (ANTLR3_UINT32)((ANTLR3_UCHAR)(*ourString) - (ANTLR3_UCHAR)(*compStr)); |
| } |
| |
| /* Function that compares the text of a string with the supplied string |
| * (which is assumed to be in the same encoding as the string itself) and returns a result |
| * a la strcmp() |
| */ |
| static ANTLR3_UINT32 |
| compareS (pANTLR3_STRING string, pANTLR3_STRING compStr) |
| { |
| return string->compare(string, (const char *)compStr->chars); |
| } |
| |
| |
| /* Function that returns the character indexed at the supplied |
| * offset as a 32 bit character. |
| */ |
| static ANTLR3_UCHAR |
| charAt8 (pANTLR3_STRING string, ANTLR3_UINT32 offset) |
| { |
| if (offset > string->len) |
| { |
| return (ANTLR3_UCHAR)'\0'; |
| } |
| else |
| { |
| return (ANTLR3_UCHAR)(*(string->chars + offset)); |
| } |
| } |
| |
| /* Function that returns the character indexed at the supplied |
| * offset as a 32 bit character. |
| */ |
| static ANTLR3_UCHAR |
| charAtUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 offset) |
| { |
| if (offset > string->len) |
| { |
| return (ANTLR3_UCHAR)'\0'; |
| } |
| else |
| { |
| return (ANTLR3_UCHAR)(*((pANTLR3_UINT16)(string->chars) + offset)); |
| } |
| } |
| |
| /* Function that returns a substring of the supplied string a la .subString(s,e) |
| * in java runtimes. |
| */ |
| static pANTLR3_STRING |
| subString8 (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex) |
| { |
| pANTLR3_STRING newStr; |
| |
| if (endIndex > string->len) |
| { |
| endIndex = string->len + 1; |
| } |
| newStr = string->factory->newPtr(string->factory, string->chars + startIndex, endIndex - startIndex); |
| |
| return newStr; |
| } |
| |
| /* Returns a substring of the supplied string a la .subString(s,e) |
| * in java runtimes. |
| */ |
| static pANTLR3_STRING |
| subStringUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex) |
| { |
| pANTLR3_STRING newStr; |
| |
| if (endIndex > string->len) |
| { |
| endIndex = string->len + 1; |
| } |
| newStr = string->factory->newPtr(string->factory, (pANTLR3_UINT8)((pANTLR3_UINT16)(string->chars) + startIndex), endIndex - startIndex); |
| |
| return newStr; |
| } |
| |
| /* Function that can convert the characters in the string to an integer |
| */ |
| static ANTLR3_INT32 |
| toInt32_8 (struct ANTLR3_STRING_struct * string) |
| { |
| return atoi((const char *)(string->chars)); |
| } |
| |
| /* Function that can convert the characters in the string to an integer |
| */ |
| static ANTLR3_INT32 |
| toInt32_UTF16 (struct ANTLR3_STRING_struct * string) |
| { |
| pANTLR3_UINT16 input; |
| ANTLR3_INT32 value; |
| ANTLR3_BOOLEAN negate; |
| |
| value = 0; |
| input = (pANTLR3_UINT16)(string->chars); |
| negate = ANTLR3_FALSE; |
| |
| if (*input == (ANTLR3_UCHAR)'-') |
| { |
| negate = ANTLR3_TRUE; |
| input++; |
| } |
| else if (*input == (ANTLR3_UCHAR)'+') |
| { |
| input++; |
| } |
| |
| while (*input != '\0' && isdigit(*input)) |
| { |
| value = value * 10; |
| value += ((ANTLR3_UINT32)(*input) - (ANTLR3_UINT32)'0'); |
| input++; |
| } |
| |
| return negate ? -value : value; |
| } |
| |
| /* Function that returns a pointer to an 8 bit version of the string, |
| * which in this case is just the string as this is |
| * 8 bit encodiing anyway. |
| */ |
| static pANTLR3_STRING to8_8 (pANTLR3_STRING string) |
| { |
| return string; |
| } |
| |
| /* Function that returns an 8 bit version of the string, |
| * which in this case is returning all the UTF16 characters |
| * narrowed back into 8 bits, with characters that are too large |
| * replaced with '_' |
| */ |
| static pANTLR3_STRING to8_UTF16 (pANTLR3_STRING string) |
| { |
| pANTLR3_STRING newStr; |
| ANTLR3_UINT32 i; |
| |
| /* Create a new 8 bit string |
| */ |
| newStr = newRaw8(string->factory); |
| |
| if (newStr == NULL) |
| { |
| return NULL; |
| } |
| |
| /* Always add one more byte for a terminator |
| */ |
| newStr->chars = (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(string->len + 1)); |
| newStr->size = string->len + 1; |
| newStr->len = string->len; |
| |
| /* Now copy each UTF16 charActer , making it an 8 bit character of |
| * some sort. |
| */ |
| for (i=0; i<string->len; i++) |
| { |
| ANTLR3_UCHAR c; |
| |
| c = *(((pANTLR3_UINT16)(string->chars)) + i); |
| |
| *(newStr->chars + i) = (ANTLR3_UINT8)(c > 255 ? '_' : c); |
| } |
| |
| /* Terminate |
| */ |
| *(newStr->chars + newStr->len) = '\0'; |
| |
| return newStr; |
| } |