blob: b29c02024437c7a374b828abcbb6934931d2073b [file] [log] [blame]
/** \file
* Implementation of the ANTLR3 string and string factory classes
*/
// [The "BSD licence"]
// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
// http://www.temporal-wave.com
// http://www.linkedin.com/in/jimidle
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// 3. The name of the author may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <antlr3string.h>
/* Factory API
*/
static pANTLR3_STRING newRaw8 (pANTLR3_STRING_FACTORY factory);
static pANTLR3_STRING newRawUTF16 (pANTLR3_STRING_FACTORY factory);
static pANTLR3_STRING newSize8 (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size);
static pANTLR3_STRING newSizeUTF16 (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size);
static pANTLR3_STRING newPtr8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
static pANTLR3_STRING newPtrUTF16_8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
static pANTLR3_STRING newPtrUTF16_UTF16 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
static pANTLR3_STRING newStr8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string);
static pANTLR3_STRING newStrUTF16_8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string);
static pANTLR3_STRING newStrUTF16_UTF16 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string);
static void destroy (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string);
static pANTLR3_STRING printable8 (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string);
static pANTLR3_STRING printableUTF16 (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string);
static void closeFactory(pANTLR3_STRING_FACTORY factory);
/* String API
*/
static pANTLR3_UINT8 set8 (pANTLR3_STRING string, const char * chars);
static pANTLR3_UINT8 setUTF16_8 (pANTLR3_STRING string, const char * chars);
static pANTLR3_UINT8 setUTF16_UTF16 (pANTLR3_STRING string, const char * chars);
static pANTLR3_UINT8 append8 (pANTLR3_STRING string, const char * newbit);
static pANTLR3_UINT8 appendUTF16_8 (pANTLR3_STRING string, const char * newbit);
static pANTLR3_UINT8 appendUTF16_UTF16 (pANTLR3_STRING string, const char * newbit);
static pANTLR3_UINT8 insert8 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit);
static pANTLR3_UINT8 insertUTF16_8 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit);
static pANTLR3_UINT8 insertUTF16_UTF16 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit);
static pANTLR3_UINT8 setS (pANTLR3_STRING string, pANTLR3_STRING chars);
static pANTLR3_UINT8 appendS (pANTLR3_STRING string, pANTLR3_STRING newbit);
static pANTLR3_UINT8 insertS (pANTLR3_STRING string, ANTLR3_UINT32 point, pANTLR3_STRING newbit);
static pANTLR3_UINT8 addc8 (pANTLR3_STRING string, ANTLR3_UINT32 c);
static pANTLR3_UINT8 addcUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 c);
static pANTLR3_UINT8 addi8 (pANTLR3_STRING string, ANTLR3_INT32 i);
static pANTLR3_UINT8 addiUTF16 (pANTLR3_STRING string, ANTLR3_INT32 i);
static pANTLR3_UINT8 inserti8 (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i);
static pANTLR3_UINT8 insertiUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i);
static ANTLR3_UINT32 compare8 (pANTLR3_STRING string, const char * compStr);
static ANTLR3_UINT32 compareUTF16_8 (pANTLR3_STRING string, const char * compStr);
static ANTLR3_UINT32 compareUTF16_UTF16(pANTLR3_STRING string, const char * compStr);
static ANTLR3_UINT32 compareS (pANTLR3_STRING string, pANTLR3_STRING compStr);
static ANTLR3_UCHAR charAt8 (pANTLR3_STRING string, ANTLR3_UINT32 offset);
static ANTLR3_UCHAR charAtUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 offset);
static pANTLR3_STRING subString8 (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex);
static pANTLR3_STRING subStringUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex);
static ANTLR3_INT32 toInt32_8 (pANTLR3_STRING string);
static ANTLR3_INT32 toInt32_UTF16 (pANTLR3_STRING string);
static pANTLR3_STRING to8_8 (pANTLR3_STRING string);
static pANTLR3_STRING to8_UTF16 (pANTLR3_STRING string);
static pANTLR3_STRING toUTF8_8 (pANTLR3_STRING string);
static pANTLR3_STRING toUTF8_UTF16 (pANTLR3_STRING string);
/* Local helpers
*/
static void stringInit8 (pANTLR3_STRING string);
static void stringInitUTF16 (pANTLR3_STRING string);
static void ANTLR3_CDECL stringFree (pANTLR3_STRING string);
ANTLR3_API pANTLR3_STRING_FACTORY
antlr3StringFactoryNew(ANTLR3_UINT32 encoding)
{
pANTLR3_STRING_FACTORY factory;
/* Allocate memory
*/
factory = (pANTLR3_STRING_FACTORY) ANTLR3_CALLOC(1, sizeof(ANTLR3_STRING_FACTORY));
if (factory == NULL)
{
return NULL;
}
/* Now we make a new list to track the strings.
*/
factory->strings = antlr3VectorNew(0);
factory->index = 0;
if (factory->strings == NULL)
{
ANTLR3_FREE(factory);
return NULL;
}
// Install the API
//
// TODO: These encodings need equivalent functions to
// UTF16 and 8Bit if I am going to support those encodings in the STRING stuff.
// The STRING stuff was intended as a quick and dirty hack for people that did not
// want to worry about memory and performance very much, but nobody ever reads the
// notes or comments or uses the email list search. I want to discourage using these
// interfaces as it is much more efficient to use the pointers within the tokens
// directly, so I am not implementing the string stuff for the newer encodings.
// We install the standard 8 and 16 bit functions for the UTF 8 and 16 but they
// will not be useful beyond returning the text.
//
switch(encoding)
{
case ANTLR3_ENC_UTF32:
break;
case ANTLR3_ENC_UTF32BE:
break;
case ANTLR3_ENC_UTF32LE:
break;
case ANTLR3_ENC_UTF16BE:
case ANTLR3_ENC_UTF16LE:
case ANTLR3_ENC_UTF16:
factory->newRaw = newRawUTF16;
factory->newSize = newSizeUTF16;
factory->newPtr = newPtrUTF16_UTF16;
factory->newPtr8 = newPtrUTF16_8;
factory->newStr = newStrUTF16_UTF16;
factory->newStr8 = newStrUTF16_8;
factory->printable = printableUTF16;
factory->destroy = destroy;
factory->close = closeFactory;
break;
case ANTLR3_ENC_UTF8:
case ANTLR3_ENC_EBCDIC:
case ANTLR3_ENC_8BIT:
default:
factory->newRaw = newRaw8;
factory->newSize = newSize8;
factory->newPtr = newPtr8;
factory->newPtr8 = newPtr8;
factory->newStr = newStr8;
factory->newStr8 = newStr8;
factory->printable = printable8;
factory->destroy = destroy;
factory->close = closeFactory;
break;
}
return factory;
}
/**
*
* \param factory
* \return
*/
static pANTLR3_STRING
newRaw8 (pANTLR3_STRING_FACTORY factory)
{
pANTLR3_STRING string;
string = (pANTLR3_STRING) ANTLR3_MALLOC(sizeof(ANTLR3_STRING));
if (string == NULL)
{
return NULL;
}
/* Structure is allocated, now fill in the API etc.
*/
stringInit8(string);
string->factory = factory;
/* Add the string into the allocated list
*/
factory->strings->set(factory->strings, factory->index, (void *) string, (void (ANTLR3_CDECL *)(void *))(stringFree), ANTLR3_TRUE);
string->index = factory->index++;
return string;
}
/**
*
* \param factory
* \return
*/
static pANTLR3_STRING
newRawUTF16 (pANTLR3_STRING_FACTORY factory)
{
pANTLR3_STRING string;
string = (pANTLR3_STRING) ANTLR3_MALLOC(sizeof(ANTLR3_STRING));
if (string == NULL)
{
return NULL;
}
/* Structure is allocated, now fill in the API etc.
*/
stringInitUTF16(string);
string->factory = factory;
/* Add the string into the allocated list
*/
factory->strings->set(factory->strings, factory->index, (void *) string, (void (ANTLR3_CDECL *)(void *))(stringFree), ANTLR3_TRUE);
string->index = factory->index++;
return string;
}
static
void ANTLR3_CDECL stringFree (pANTLR3_STRING string)
{
/* First free the string itself if there was anything in it
*/
if (string->chars)
{
ANTLR3_FREE(string->chars);
}
/* Now free the space for this string
*/
ANTLR3_FREE(string);
return;
}
/**
*
* \param string
* \return
*/
static void
stringInit8 (pANTLR3_STRING string)
{
string->len = 0;
string->size = 0;
string->chars = NULL;
string->encoding = ANTLR3_ENC_8BIT ;
/* API for 8 bit strings*/
string->set = set8;
string->set8 = set8;
string->append = append8;
string->append8 = append8;
string->insert = insert8;
string->insert8 = insert8;
string->addi = addi8;
string->inserti = inserti8;
string->addc = addc8;
string->charAt = charAt8;
string->compare = compare8;
string->compare8 = compare8;
string->subString = subString8;
string->toInt32 = toInt32_8;
string->to8 = to8_8;
string->toUTF8 = toUTF8_8;
string->compareS = compareS;
string->setS = setS;
string->appendS = appendS;
string->insertS = insertS;
}
/**
*
* \param string
* \return
*/
static void
stringInitUTF16 (pANTLR3_STRING string)
{
string->len = 0;
string->size = 0;
string->chars = NULL;
string->encoding = ANTLR3_ENC_8BIT;
/* API for UTF16 strings */
string->set = setUTF16_UTF16;
string->set8 = setUTF16_8;
string->append = appendUTF16_UTF16;
string->append8 = appendUTF16_8;
string->insert = insertUTF16_UTF16;
string->insert8 = insertUTF16_8;
string->addi = addiUTF16;
string->inserti = insertiUTF16;
string->addc = addcUTF16;
string->charAt = charAtUTF16;
string->compare = compareUTF16_UTF16;
string->compare8 = compareUTF16_8;
string->subString = subStringUTF16;
string->toInt32 = toInt32_UTF16;
string->to8 = to8_UTF16;
string->toUTF8 = toUTF8_UTF16;
string->compareS = compareS;
string->setS = setS;
string->appendS = appendS;
string->insertS = insertS;
}
/**
*
* \param string
* \return
* TODO: Implement UTF-8
*/
static void
stringInitUTF8 (pANTLR3_STRING string)
{
string->len = 0;
string->size = 0;
string->chars = NULL;
/* API */
}
// Convert an 8 bit string into a UTF8 representation, which is in fact just the string itself
// a memcpy as we make no assumptions about the 8 bit encoding.
//
static pANTLR3_STRING
toUTF8_8 (pANTLR3_STRING string)
{
return string->factory->newPtr(string->factory, (pANTLR3_UINT8)(string->chars), string->len);
}
// Convert a UTF16 string into a UTF8 representation using the Unicode.org
// supplied C algorithms, which are now contained within the ANTLR3 C runtime
// as permitted by the Unicode license (within the source code antlr3convertutf.c/.h
// UCS2 has the same encoding as UTF16 so we can use UTF16 converter.
//
static pANTLR3_STRING
toUTF8_UTF16 (pANTLR3_STRING string)
{
UTF8 * outputEnd;
UTF16 * inputEnd;
pANTLR3_STRING utf8String;
ConversionResult cResult;
// Allocate the output buffer, which needs to accommodate potentially
// 3X (in bytes) the input size (in chars).
//
utf8String = string->factory->newStr8(string->factory, (pANTLR3_UINT8)"");
if (utf8String != NULL)
{
// Free existing allocation
//
ANTLR3_FREE(utf8String->chars);
// Reallocate according to maximum expected size
//
utf8String->size = string->len *3;
utf8String->chars = (pANTLR3_UINT8)ANTLR3_MALLOC(utf8String->size +1);
if (utf8String->chars != NULL)
{
inputEnd = (UTF16 *) (string->chars);
outputEnd = (UTF8 *) (utf8String->chars);
// Call the Unicode converter
//
cResult = ConvertUTF16toUTF8
(
(const UTF16**)&inputEnd,
((const UTF16 *)(string->chars)) + string->len,
&outputEnd,
outputEnd + utf8String->size - 1,
lenientConversion
);
// We don't really care if things failed or not here, we just converted
// everything that was vaguely possible and stopped when it wasn't. It is
// up to the grammar programmer to verify that the input is sensible.
//
utf8String->len = ANTLR3_UINT32_CAST(((pANTLR3_UINT8)outputEnd) - utf8String->chars);
*(outputEnd+1) = '\0'; // Always null terminate
}
}
return utf8String;
}
/**
* Creates a new string with enough capacity for size 8 bit characters plus a terminator.
*
* \param[in] factory - Pointer to the string factory that owns strings
* \param[in] size - In characters
* \return pointer to the new string.
*/
static pANTLR3_STRING
newSize8 (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size)
{
pANTLR3_STRING string;
string = factory->newRaw(factory);
if (string == NULL)
{
return string;
}
/* Always add one more byte for a terminator ;-)
*/
string->chars = (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(sizeof(ANTLR3_UINT8) * (size+1)));
*(string->chars) = '\0';
string->size = size + 1;
return string;
}
/**
* Creates a new string with enough capacity for size UTF16 characters plus a terminator.
*
* \param[in] factory - Pointer to the string factory that owns strings
* \param[in] size - In characters (count double for surrogate pairs!!!)
* \return pointer to the new string.
*/
static pANTLR3_STRING
newSizeUTF16 (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size)
{
pANTLR3_STRING string;
string = factory->newRaw(factory);
if (string == NULL)
{
return string;
}
/* Always add one more byte for a terminator ;-)
*/
string->chars = (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(sizeof(ANTLR3_UINT16) * (size+1)));
*(string->chars) = '\0';
string->size = size+1; /* Size is always in characters, as is len */
return string;
}
/** Creates a new 8 bit string initialized with the 8 bit characters at the
* supplied ptr, of pre-determined size.
* \param[in] factory - Pointer to the string factory that owns the strings
* \param[in] ptr - Pointer to 8 bit encoded characters
* \return pointer to the new string
*/
static pANTLR3_STRING
newPtr8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size)
{
pANTLR3_STRING string;
string = factory->newSize(factory, size);
if (string == NULL)
{
return NULL;
}
if (size <= 0)
{
return string;
}
if (ptr != NULL)
{
ANTLR3_MEMMOVE(string->chars, (const void *)ptr, size);
*(string->chars + size) = '\0'; /* Terminate, these strings are usually used for Token streams and printing etc. */
string->len = size;
}
return string;
}
/** Creates a new UTF16 string initialized with the 8 bit characters at the
* supplied 8 bit character ptr, of pre-determined size.
* \param[in] factory - Pointer to the string factory that owns the strings
* \param[in] ptr - Pointer to 8 bit encoded characters
* \return pointer to the new string
*/
static pANTLR3_STRING
newPtrUTF16_8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size)
{
pANTLR3_STRING string;
/* newSize accepts size in characters, not bytes
*/
string = factory->newSize(factory, size);
if (string == NULL)
{
return NULL;
}
if (size <= 0)
{
return string;
}
if (ptr != NULL)
{
pANTLR3_UINT16 out;
ANTLR3_INT32 inSize;
out = (pANTLR3_UINT16)(string->chars);
inSize = size;
while (inSize-- > 0)
{
*out++ = (ANTLR3_UINT16)(*ptr++);
}
/* Terminate, these strings are usually used for Token streams and printing etc.
*/
*(((pANTLR3_UINT16)(string->chars)) + size) = '\0';
string->len = size;
}
return string;
}
/** Creates a new UTF16 string initialized with the UTF16 characters at the
* supplied ptr, of pre-determined size.
* \param[in] factory - Pointer to the string factory that owns the strings
* \param[in] ptr - Pointer to UTF16 encoded characters
* \return pointer to the new string
*/
static pANTLR3_STRING
newPtrUTF16_UTF16 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size)
{
pANTLR3_STRING string;
string = factory->newSize(factory, size);
if (string == NULL)
{
return NULL;
}
if (size <= 0)
{
return string;
}
if (ptr != NULL)
{
ANTLR3_MEMMOVE(string->chars, (const void *)ptr, (size * sizeof(ANTLR3_UINT16)));
/* Terminate, these strings are usually used for Token streams and printing etc.
*/
*(((pANTLR3_UINT16)(string->chars)) + size) = '\0';
string->len = size;
}
return string;
}
/** Create a new 8 bit string from the supplied, null terminated, 8 bit string pointer.
* \param[in] factory - Pointer to the string factory that owns strings.
* \param[in] ptr - Pointer to the 8 bit encoded string
* \return Pointer to the newly initialized string
*/
static pANTLR3_STRING
newStr8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr)
{
return factory->newPtr8(factory, ptr, (ANTLR3_UINT32)strlen((const char *)ptr));
}
/** Create a new UTF16 string from the supplied, null terminated, 8 bit string pointer.
* \param[in] factory - Pointer to the string factory that owns strings.
* \param[in] ptr - Pointer to the 8 bit encoded string
* \return POinter to the newly initialized string
*/
static pANTLR3_STRING
newStrUTF16_8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr)
{
return factory->newPtr8(factory, ptr, (ANTLR3_UINT32)strlen((const char *)ptr));
}
/** Create a new UTF16 string from the supplied, null terminated, UTF16 string pointer.
* \param[in] factory - Pointer to the string factory that owns strings.
* \param[in] ptr - Pointer to the UTF16 encoded string
* \return Pointer to the newly initialized string
*/
static pANTLR3_STRING
newStrUTF16_UTF16 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr)
{
pANTLR3_UINT16 in;
ANTLR3_UINT32 count;
/** First, determine the length of the input string
*/
in = (pANTLR3_UINT16)ptr;
count = 0;
while (*in++ != '\0')
{
count++;
}
return factory->newPtr(factory, ptr, count);
}
static void
destroy (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string)
{
// Record which string we are deleting
//
ANTLR3_UINT32 strIndex = string->index;
// Ensure that the string was not factory made, or we would try
// to delete memory that wasn't allocated outside the factory
// block.
// Remove the specific indexed string from the vector
//
factory->strings->del(factory->strings, strIndex);
// One less string in the vector, so decrement the factory index
// so that the next string allocated is indexed correctly with
// respect to the vector.
//
factory->index--;
// Now we have to reindex the strings in the vector that followed
// the one we just deleted. We only do this if the one we just deleted
// was not the last one.
//
if (strIndex< factory->index)
{
// We must reindex the strings after the one we just deleted.
// The one that follows the one we just deleted is also out
// of whack, so we start there.
//
ANTLR3_UINT32 i;
for (i = strIndex; i < factory->index; i++)
{
// Renumber the entry
//
((pANTLR3_STRING)(factory->strings->elements[i].element))->index = i;
}
}
// The string has been destroyed and the elements of the factory are reindexed.
//
}
static pANTLR3_STRING
printable8(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING instr)
{
pANTLR3_STRING string;
/* We don't need to be too efficient here, this is mostly for error messages and so on.
*/
pANTLR3_UINT8 scannedText;
ANTLR3_UINT32 i;
/* Assume we need as much as twice as much space to parse out the control characters
*/
string = factory->newSize(factory, instr->len *2 + 1);
/* Scan through and replace unprintable (in terms of this routine)
* characters
*/
scannedText = string->chars;
for (i = 0; i < instr->len; i++)
{
if (*(instr->chars + i) == '\n')
{
*scannedText++ = '\\';
*scannedText++ = 'n';
}
else if (*(instr->chars + i) == '\r')
{
*scannedText++ = '\\';
*scannedText++ = 'r';
}
else if (!isprint(*(instr->chars +i)))
{
*scannedText++ = '?';
}
else
{
*scannedText++ = *(instr->chars + i);
}
}
*scannedText = '\0';
string->len = (ANTLR3_UINT32)(scannedText - string->chars);
return string;
}
static pANTLR3_STRING
printableUTF16(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING instr)
{
pANTLR3_STRING string;
/* We don't need to be too efficient here, this is mostly for error messages and so on.
*/
pANTLR3_UINT16 scannedText;
pANTLR3_UINT16 inText;
ANTLR3_UINT32 i;
ANTLR3_UINT32 outLen;
/* Assume we need as much as twice as much space to parse out the control characters
*/
string = factory->newSize(factory, instr->len *2 + 1);
/* Scan through and replace unprintable (in terms of this routine)
* characters
*/
scannedText = (pANTLR3_UINT16)(string->chars);
inText = (pANTLR3_UINT16)(instr->chars);
outLen = 0;
for (i = 0; i < instr->len; i++)
{
if (*(inText + i) == '\n')
{
*scannedText++ = '\\';
*scannedText++ = 'n';
outLen += 2;
}
else if (*(inText + i) == '\r')
{
*scannedText++ = '\\';
*scannedText++ = 'r';
outLen += 2;
}
else if (!isprint(*(inText +i)))
{
*scannedText++ = '?';
outLen++;
}
else
{
*scannedText++ = *(inText + i);
outLen++;
}
}
*scannedText = '\0';
string->len = outLen;
return string;
}
/** Fascist Capitalist Pig function created
* to oppress the workers comrade.
*/
static void
closeFactory (pANTLR3_STRING_FACTORY factory)
{
/* Delete the vector we were tracking the strings with, this will
* causes all the allocated strings to be deallocated too
*/
factory->strings->free(factory->strings);
/* Delete the space for the factory itself
*/
ANTLR3_FREE((void *)factory);
}
static pANTLR3_UINT8
append8 (pANTLR3_STRING string, const char * newbit)
{
ANTLR3_UINT32 len;
len = (ANTLR3_UINT32)strlen(newbit);
if (string->size < (string->len + len + 1))
{
string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + len + 1));
string->size = string->len + len + 1;
}
/* Note we copy one more byte than the strlen in order to get the trailing
*/
ANTLR3_MEMMOVE((void *)(string->chars + string->len), newbit, (ANTLR3_UINT32)(len+1));
string->len += len;
return string->chars;
}
static pANTLR3_UINT8
appendUTF16_8 (pANTLR3_STRING string, const char * newbit)
{
ANTLR3_UINT32 len;
pANTLR3_UINT16 apPoint;
ANTLR3_UINT32 count;
len = (ANTLR3_UINT32)strlen(newbit);
if (string->size < (string->len + len + 1))
{
string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)((sizeof(ANTLR3_UINT16)*(string->len + len + 1))));
string->size = string->len + len + 1;
}
apPoint = ((pANTLR3_UINT16)string->chars) + string->len;
string->len += len;
for (count = 0; count < len; count++)
{
*apPoint++ = *(newbit + count);
}
*apPoint = '\0';
return string->chars;
}
static pANTLR3_UINT8
appendUTF16_UTF16 (pANTLR3_STRING string, const char * newbit)
{
ANTLR3_UINT32 len;
pANTLR3_UINT16 in;
/** First, determine the length of the input string
*/
in = (pANTLR3_UINT16)newbit;
len = 0;
while (*in++ != '\0')
{
len++;
}
if (string->size < (string->len + len + 1))
{
string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)( sizeof(ANTLR3_UINT16) *(string->len + len + 1) ));
string->size = string->len + len + 1;
}
/* Note we copy one more byte than the strlen in order to get the trailing delimiter
*/
ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + string->len), newbit, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len+1)));
string->len += len;
return string->chars;
}
static pANTLR3_UINT8
set8 (pANTLR3_STRING string, const char * chars)
{
ANTLR3_UINT32 len;
len = (ANTLR3_UINT32)strlen(chars);
if (string->size < len + 1)
{
string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(len + 1));
string->size = len + 1;
}
/* Note we copy one more byte than the strlen in order to get the trailing '\0'
*/
ANTLR3_MEMMOVE((void *)(string->chars), chars, (ANTLR3_UINT32)(len+1));
string->len = len;
return string->chars;
}
static pANTLR3_UINT8
setUTF16_8 (pANTLR3_STRING string, const char * chars)
{
ANTLR3_UINT32 len;
ANTLR3_UINT32 count;
pANTLR3_UINT16 apPoint;
len = (ANTLR3_UINT32)strlen(chars);
if (string->size < len + 1)
{
string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len + 1)));
string->size = len + 1;
}
apPoint = ((pANTLR3_UINT16)string->chars);
string->len = len;
for (count = 0; count < string->len; count++)
{
*apPoint++ = *(chars + count);
}
*apPoint = '\0';
return string->chars;
}
static pANTLR3_UINT8
setUTF16_UTF16 (pANTLR3_STRING string, const char * chars)
{
ANTLR3_UINT32 len;
pANTLR3_UINT16 in;
/** First, determine the length of the input string
*/
in = (pANTLR3_UINT16)chars;
len = 0;
while (*in++ != '\0')
{
len++;
}
if (string->size < len + 1)
{
string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len + 1)));
string->size = len + 1;
}
/* Note we copy one more byte than the strlen in order to get the trailing '\0'
*/
ANTLR3_MEMMOVE((void *)(string->chars), chars, (ANTLR3_UINT32)((len+1) * sizeof(ANTLR3_UINT16)));
string->len = len;
return string->chars;
}
static pANTLR3_UINT8
addc8 (pANTLR3_STRING string, ANTLR3_UINT32 c)
{
if (string->size < string->len + 2)
{
string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + 2));
string->size = string->len + 2;
}
*(string->chars + string->len) = (ANTLR3_UINT8)c;
*(string->chars + string->len + 1) = '\0';
string->len++;
return string->chars;
}
static pANTLR3_UINT8
addcUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 c)
{
pANTLR3_UINT16 ptr;
if (string->size < string->len + 2)
{
string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16) * (string->len + 2)));
string->size = string->len + 2;
}
ptr = (pANTLR3_UINT16)(string->chars);
*(ptr + string->len) = (ANTLR3_UINT16)c;
*(ptr + string->len + 1) = '\0';
string->len++;
return string->chars;
}
static pANTLR3_UINT8
addi8 (pANTLR3_STRING string, ANTLR3_INT32 i)
{
ANTLR3_UINT8 newbit[32];
sprintf((char *)newbit, "%d", i);
return string->append8(string, (const char *)newbit);
}
static pANTLR3_UINT8
addiUTF16 (pANTLR3_STRING string, ANTLR3_INT32 i)
{
ANTLR3_UINT8 newbit[32];
sprintf((char *)newbit, "%d", i);
return string->append8(string, (const char *)newbit);
}
static pANTLR3_UINT8
inserti8 (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i)
{
ANTLR3_UINT8 newbit[32];
sprintf((char *)newbit, "%d", i);
return string->insert8(string, point, (const char *)newbit);
}
static pANTLR3_UINT8
insertiUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i)
{
ANTLR3_UINT8 newbit[32];
sprintf((char *)newbit, "%d", i);
return string->insert8(string, point, (const char *)newbit);
}
static pANTLR3_UINT8
insert8 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit)
{
ANTLR3_UINT32 len;
if (point >= string->len)
{
return string->append(string, newbit);
}
len = (ANTLR3_UINT32)strlen(newbit);
if (len == 0)
{
return string->chars;
}
if (string->size < (string->len + len + 1))
{
string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + len + 1));
string->size = string->len + len + 1;
}
/* Move the characters we are inserting before, including the delimiter
*/
ANTLR3_MEMMOVE((void *)(string->chars + point + len), (void *)(string->chars + point), (ANTLR3_UINT32)(string->len - point + 1));
/* Note we copy the exact number of bytes
*/
ANTLR3_MEMMOVE((void *)(string->chars + point), newbit, (ANTLR3_UINT32)(len));
string->len += len;
return string->chars;
}
static pANTLR3_UINT8
insertUTF16_8 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit)
{
ANTLR3_UINT32 len;
ANTLR3_UINT32 count;
pANTLR3_UINT16 inPoint;
if (point >= string->len)
{
return string->append8(string, newbit);
}
len = (ANTLR3_UINT32)strlen(newbit);
if (len == 0)
{
return string->chars;
}
if (string->size < (string->len + len + 1))
{
string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len + len + 1)));
string->size = string->len + len + 1;
}
/* Move the characters we are inserting before, including the delimiter
*/
ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point + len), (void *)(((pANTLR3_UINT16)string->chars) + point), (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len - point + 1)));
string->len += len;
inPoint = ((pANTLR3_UINT16)(string->chars))+point;
for (count = 0; count<len; count++)
{
*(inPoint + count) = (ANTLR3_UINT16)(*(newbit+count));
}
return string->chars;
}
static pANTLR3_UINT8
insertUTF16_UTF16 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit)
{
ANTLR3_UINT32 len;
pANTLR3_UINT16 in;
if (point >= string->len)
{
return string->append(string, newbit);
}
/** First, determine the length of the input string
*/
in = (pANTLR3_UINT16)newbit;
len = 0;
while (*in++ != '\0')
{
len++;
}
if (len == 0)
{
return string->chars;
}
if (string->size < (string->len + len + 1))
{
string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len + len + 1)));
string->size = string->len + len + 1;
}
/* Move the characters we are inserting before, including the delimiter
*/
ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point + len), (void *)(((pANTLR3_UINT16)string->chars) + point), (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len - point + 1)));
/* Note we copy the exact number of characters
*/
ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point), newbit, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len)));
string->len += len;
return string->chars;
}
static pANTLR3_UINT8 setS (pANTLR3_STRING string, pANTLR3_STRING chars)
{
return string->set(string, (const char *)(chars->chars));
}
static pANTLR3_UINT8 appendS (pANTLR3_STRING string, pANTLR3_STRING newbit)
{
/* We may be passed an empty string, in which case we just return the current pointer
*/
if (newbit == NULL || newbit->len == 0 || newbit->size == 0 || newbit->chars == NULL)
{
return string->chars;
}
else
{
return string->append(string, (const char *)(newbit->chars));
}
}
static pANTLR3_UINT8 insertS (pANTLR3_STRING string, ANTLR3_UINT32 point, pANTLR3_STRING newbit)
{
return string->insert(string, point, (const char *)(newbit->chars));
}
/* Function that compares the text of a string to the supplied
* 8 bit character string and returns a result a la strcmp()
*/
static ANTLR3_UINT32
compare8 (pANTLR3_STRING string, const char * compStr)
{
return strcmp((const char *)(string->chars), compStr);
}
/* Function that compares the text of a string with the supplied character string
* (which is assumed to be in the same encoding as the string itself) and returns a result
* a la strcmp()
*/
static ANTLR3_UINT32
compareUTF16_8 (pANTLR3_STRING string, const char * compStr)
{
pANTLR3_UINT16 ourString;
ANTLR3_UINT32 charDiff;
ourString = (pANTLR3_UINT16)(string->chars);
while (((ANTLR3_UCHAR)(*ourString) != '\0') && ((ANTLR3_UCHAR)(*compStr) != '\0'))
{
charDiff = *ourString - *compStr;
if (charDiff != 0)
{
return charDiff;
}
ourString++;
compStr++;
}
/* At this point, one of the strings was terminated
*/
return (ANTLR3_UINT32)((ANTLR3_UCHAR)(*ourString) - (ANTLR3_UCHAR)(*compStr));
}
/* Function that compares the text of a string with the supplied character string
* (which is assumed to be in the same encoding as the string itself) and returns a result
* a la strcmp()
*/
static ANTLR3_UINT32
compareUTF16_UTF16 (pANTLR3_STRING string, const char * compStr8)
{
pANTLR3_UINT16 ourString;
pANTLR3_UINT16 compStr;
ANTLR3_UINT32 charDiff;
ourString = (pANTLR3_UINT16)(string->chars);
compStr = (pANTLR3_UINT16)(compStr8);
while (((ANTLR3_UCHAR)(*ourString) != '\0') && ((ANTLR3_UCHAR)(*((pANTLR3_UINT16)compStr)) != '\0'))
{
charDiff = *ourString - *compStr;
if (charDiff != 0)
{
return charDiff;
}
ourString++;
compStr++;
}
/* At this point, one of the strings was terminated
*/
return (ANTLR3_UINT32)((ANTLR3_UCHAR)(*ourString) - (ANTLR3_UCHAR)(*compStr));
}
/* Function that compares the text of a string with the supplied string
* (which is assumed to be in the same encoding as the string itself) and returns a result
* a la strcmp()
*/
static ANTLR3_UINT32
compareS (pANTLR3_STRING string, pANTLR3_STRING compStr)
{
return string->compare(string, (const char *)compStr->chars);
}
/* Function that returns the character indexed at the supplied
* offset as a 32 bit character.
*/
static ANTLR3_UCHAR
charAt8 (pANTLR3_STRING string, ANTLR3_UINT32 offset)
{
if (offset > string->len)
{
return (ANTLR3_UCHAR)'\0';
}
else
{
return (ANTLR3_UCHAR)(*(string->chars + offset));
}
}
/* Function that returns the character indexed at the supplied
* offset as a 32 bit character.
*/
static ANTLR3_UCHAR
charAtUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 offset)
{
if (offset > string->len)
{
return (ANTLR3_UCHAR)'\0';
}
else
{
return (ANTLR3_UCHAR)(*((pANTLR3_UINT16)(string->chars) + offset));
}
}
/* Function that returns a substring of the supplied string a la .subString(s,e)
* in java runtimes.
*/
static pANTLR3_STRING
subString8 (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex)
{
pANTLR3_STRING newStr;
if (endIndex > string->len)
{
endIndex = string->len + 1;
}
newStr = string->factory->newPtr(string->factory, string->chars + startIndex, endIndex - startIndex);
return newStr;
}
/* Returns a substring of the supplied string a la .subString(s,e)
* in java runtimes.
*/
static pANTLR3_STRING
subStringUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex)
{
pANTLR3_STRING newStr;
if (endIndex > string->len)
{
endIndex = string->len + 1;
}
newStr = string->factory->newPtr(string->factory, (pANTLR3_UINT8)((pANTLR3_UINT16)(string->chars) + startIndex), endIndex - startIndex);
return newStr;
}
/* Function that can convert the characters in the string to an integer
*/
static ANTLR3_INT32
toInt32_8 (struct ANTLR3_STRING_struct * string)
{
return atoi((const char *)(string->chars));
}
/* Function that can convert the characters in the string to an integer
*/
static ANTLR3_INT32
toInt32_UTF16 (struct ANTLR3_STRING_struct * string)
{
pANTLR3_UINT16 input;
ANTLR3_INT32 value;
ANTLR3_BOOLEAN negate;
value = 0;
input = (pANTLR3_UINT16)(string->chars);
negate = ANTLR3_FALSE;
if (*input == (ANTLR3_UCHAR)'-')
{
negate = ANTLR3_TRUE;
input++;
}
else if (*input == (ANTLR3_UCHAR)'+')
{
input++;
}
while (*input != '\0' && isdigit(*input))
{
value = value * 10;
value += ((ANTLR3_UINT32)(*input) - (ANTLR3_UINT32)'0');
input++;
}
return negate ? -value : value;
}
/* Function that returns a pointer to an 8 bit version of the string,
* which in this case is just the string as this is
* 8 bit encodiing anyway.
*/
static pANTLR3_STRING to8_8 (pANTLR3_STRING string)
{
return string;
}
/* Function that returns an 8 bit version of the string,
* which in this case is returning all the UTF16 characters
* narrowed back into 8 bits, with characters that are too large
* replaced with '_'
*/
static pANTLR3_STRING to8_UTF16 (pANTLR3_STRING string)
{
pANTLR3_STRING newStr;
ANTLR3_UINT32 i;
/* Create a new 8 bit string
*/
newStr = newRaw8(string->factory);
if (newStr == NULL)
{
return NULL;
}
/* Always add one more byte for a terminator
*/
newStr->chars = (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(string->len + 1));
newStr->size = string->len + 1;
newStr->len = string->len;
/* Now copy each UTF16 charActer , making it an 8 bit character of
* some sort.
*/
for (i=0; i<string->len; i++)
{
ANTLR3_UCHAR c;
c = *(((pANTLR3_UINT16)(string->chars)) + i);
*(newStr->chars + i) = (ANTLR3_UINT8)(c > 255 ? '_' : c);
}
/* Terminate
*/
*(newStr->chars + newStr->len) = '\0';
return newStr;
}