| /** \file |
| * \brief The ANTLR3 C filestream is used when the source character stream |
| * is a filesystem based input set and all the characters in the filestream |
| * can be loaded at once into memory and away the lexer goes. |
| * |
| * A number of initializers are provided in order that various character |
| * sets can be supported from input files. The ANTLR3 C runtime expects |
| * to deal with UTF32 characters only (the reasons for this are to |
| * do with the simplification of C code when using this form of Unicode |
| * encoding, though this is not a panacea. More information can be |
| * found on this by consulting: |
| * - http://www.unicode.org/versions/Unicode4.0.0/ch02.pdf#G11178 |
| * Where a well grounded discussion of the encoding formats available |
| * may be found. |
| * |
| */ |
| |
| // [The "BSD licence"] |
| // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC |
| // http://www.temporal-wave.com |
| // http://www.linkedin.com/in/jimidle |
| // |
| // All rights reserved. |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions |
| // are met: |
| // 1. Redistributions of source code must retain the above copyright |
| // notice, this list of conditions and the following disclaimer. |
| // 2. Redistributions in binary form must reproduce the above copyright |
| // notice, this list of conditions and the following disclaimer in the |
| // documentation and/or other materials provided with the distribution. |
| // 3. The name of the author may not be used to endorse or promote products |
| // derived from this software without specific prior written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
| // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
| // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
| // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
| // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| #include <antlr3.h> |
| |
| static void setupInputStream (pANTLR3_INPUT_STREAM input); |
| static pANTLR3_INPUT_STREAM antlr3CreateFileStream (pANTLR3_UINT8 fileName); |
| static pANTLR3_INPUT_STREAM antlr3CreateStringStream (pANTLR3_UINT8 data); |
| |
| ANTLR3_API pANTLR3_INPUT_STREAM |
| antlr3FileStreamNew(pANTLR3_UINT8 fileName, ANTLR3_UINT32 encoding) |
| { |
| pANTLR3_INPUT_STREAM input; |
| |
| // First order of business is to read the file into some buffer space |
| // as just straight 8 bit bytes. Then we will work out the encoding and |
| // byte order and adjust the API functions that are installed for the |
| // default 8Bit stream accordingly. |
| // |
| input = antlr3CreateFileStream(fileName); |
| if (input == NULL) |
| { |
| return NULL; |
| } |
| |
| // We have the data in memory now so we can deal with it according to |
| // the encoding scheme we were given by the user. |
| // |
| input->encoding = encoding; |
| |
| // Now we need to work out the endian type and install any |
| // API functions that differ from 8Bit |
| // |
| setupInputStream(input); |
| |
| // Now we can set up the file name |
| // |
| input->istream->streamName = input->strFactory->newStr8(input->strFactory, fileName); |
| input->fileName = input->istream->streamName; |
| |
| return input; |
| } |
| |
| |
| ANTLR3_API pANTLR3_INPUT_STREAM |
| antlr3StringStreamNew(pANTLR3_UINT8 data, ANTLR3_UINT32 encoding, ANTLR3_UINT32 size, pANTLR3_UINT8 name) |
| { |
| pANTLR3_INPUT_STREAM input; |
| |
| // First order of business is to set up the stream and install the data pointer. |
| // Then we will work out the encoding and byte order and adjust the API functions that are installed for the |
| // default 8Bit stream accordingly. |
| // |
| input = antlr3CreateStringStream(data); |
| if (input == NULL) |
| { |
| return NULL; |
| } |
| |
| // Size (in bytes) of the given 'string' |
| // |
| input->sizeBuf = size; |
| |
| // We have the data in memory now so we can deal with it according to |
| // the encoding scheme we were given by the user. |
| // |
| input->encoding = encoding; |
| |
| // Now we need to work out the endian type and install any |
| // API functions that differ from 8Bit |
| // |
| setupInputStream(input); |
| |
| // Now we can set up the file name |
| // |
| input->istream->streamName = input->strFactory->newStr8(input->strFactory, name); |
| input->fileName = input->istream->streamName; |
| |
| return input; |
| } |
| |
| |
| /// Determine endianess of the input stream and install the |
| /// API required for the encoding in that format. |
| /// |
| static void |
| setupInputStream(pANTLR3_INPUT_STREAM input) |
| { |
| ANTLR3_BOOLEAN isBigEndian; |
| |
| // Used to determine the endianness of the machine we are currently |
| // running on. |
| // |
| ANTLR3_UINT16 bomTest = 0xFEFF; |
| |
| // What endianess is the machine we are running on? If the incoming |
| // encoding endianess is the same as this machine's natural byte order |
| // then we can use more efficient API calls. |
| // |
| if (*((pANTLR3_UINT8)(&bomTest)) == 0xFE) |
| { |
| isBigEndian = ANTLR3_TRUE; |
| } |
| else |
| { |
| isBigEndian = ANTLR3_FALSE; |
| } |
| |
| // What encoding did the user tell us {s}he thought it was? I am going |
| // to get sick of the questions on antlr-interest, I know I am. |
| // |
| switch (input->encoding) |
| { |
| case ANTLR3_ENC_UTF8: |
| |
| // See if there is a BOM at the start of this UTF-8 sequence |
| // and just eat it if there is. Windows .TXT files have this for instance |
| // as it identifies UTF-8 even though it is of no consequence for byte order |
| // as UTF-8 does not have a byte order. |
| // |
| if ( (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar)) == 0xEF |
| && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1)) == 0xBB |
| && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+2)) == 0xBF |
| ) |
| { |
| // The UTF8 BOM is present so skip it |
| // |
| input->nextChar = (void *)((pANTLR3_UINT8)input->nextChar + 3); |
| } |
| |
| // Install the UTF8 input routines |
| // |
| antlr3UTF8SetupStream(input); |
| break; |
| |
| case ANTLR3_ENC_UTF16: |
| |
| // See if there is a BOM at the start of the input. If not then |
| // we assume that the byte order is the natural order of this |
| // machine (or it is really UCS2). If there is a BOM we determine if the encoding |
| // is the same as the natural order of this machine. |
| // |
| if ( (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar)) == 0xFE |
| && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1)) == 0xFF |
| ) |
| { |
| // BOM Present, indicates Big Endian |
| // |
| input->nextChar = (void *)((pANTLR3_UINT8)input->nextChar + 2); |
| |
| antlr3UTF16SetupStream(input, isBigEndian, ANTLR3_TRUE); |
| } |
| else if ( (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar)) == 0xFF |
| && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1)) == 0xFE |
| ) |
| { |
| // BOM present, indicates Little Endian |
| // |
| input->nextChar = (void *)((pANTLR3_UINT8)input->nextChar + 2); |
| |
| antlr3UTF16SetupStream(input, isBigEndian, ANTLR3_FALSE); |
| } |
| else |
| { |
| // No BOM present, assume local computer byte order |
| // |
| antlr3UTF16SetupStream(input, isBigEndian, isBigEndian); |
| } |
| break; |
| |
| case ANTLR3_ENC_UTF32: |
| |
| // See if there is a BOM at the start of the input. If not then |
| // we assume that the byte order is the natural order of this |
| // machine. If there is we determine if the encoding |
| // is the same as the natural order of this machine. |
| // |
| if ( (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar)) == 0x00 |
| && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1)) == 0x00 |
| && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+2)) == 0xFE |
| && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+3)) == 0xFF |
| ) |
| { |
| // BOM Present, indicates Big Endian |
| // |
| input->nextChar = (void *)((pANTLR3_UINT8)input->nextChar + 4); |
| |
| antlr3UTF32SetupStream(input, isBigEndian, ANTLR3_TRUE); |
| } |
| else if ( (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar)) == 0xFF |
| && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1)) == 0xFE |
| && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1)) == 0x00 |
| && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1)) == 0x00 |
| ) |
| { |
| // BOM present, indicates Little Endian |
| // |
| input->nextChar = (void *)((pANTLR3_UINT8)input->nextChar + 4); |
| |
| antlr3UTF32SetupStream(input, isBigEndian, ANTLR3_FALSE); |
| } |
| else |
| { |
| // No BOM present, assume local computer byte order |
| // |
| antlr3UTF32SetupStream(input, isBigEndian, isBigEndian); |
| } |
| break; |
| |
| case ANTLR3_ENC_UTF16BE: |
| |
| // Encoding is definately Big Endian with no BOM |
| // |
| antlr3UTF16SetupStream(input, isBigEndian, ANTLR3_TRUE); |
| break; |
| |
| case ANTLR3_ENC_UTF16LE: |
| |
| // Encoding is definately Little Endian with no BOM |
| // |
| antlr3UTF16SetupStream(input, isBigEndian, ANTLR3_FALSE); |
| break; |
| |
| case ANTLR3_ENC_UTF32BE: |
| |
| // Encoding is definately Big Endian with no BOM |
| // |
| antlr3UTF32SetupStream(input, isBigEndian, ANTLR3_TRUE); |
| break; |
| |
| case ANTLR3_ENC_UTF32LE: |
| |
| // Encoding is definately Little Endian with no BOM |
| // |
| antlr3UTF32SetupStream(input, isBigEndian, ANTLR3_FALSE); |
| break; |
| |
| case ANTLR3_ENC_EBCDIC: |
| |
| // EBCDIC is basically the same as ASCII but with an on the |
| // fly translation to ASCII |
| // |
| antlr3EBCDICSetupStream(input); |
| break; |
| |
| case ANTLR3_ENC_8BIT: |
| default: |
| |
| // Standard 8bit/ASCII |
| // |
| antlr38BitSetupStream(input); |
| break; |
| } |
| } |
| |
| /** \brief Use the contents of an operating system file as the input |
| * for an input stream. |
| * |
| * \param fileName Name of operating system file to read. |
| * \return |
| * - Pointer to new input stream context upon success |
| * - One of the ANTLR3_ERR_ defines on error. |
| */ |
| static pANTLR3_INPUT_STREAM |
| antlr3CreateFileStream(pANTLR3_UINT8 fileName) |
| { |
| // Pointer to the input stream we are going to create |
| // |
| pANTLR3_INPUT_STREAM input; |
| ANTLR3_UINT32 status; |
| |
| if (fileName == NULL) |
| { |
| return NULL; |
| } |
| |
| // Allocate memory for the input stream structure |
| // |
| input = (pANTLR3_INPUT_STREAM) |
| ANTLR3_CALLOC(1, sizeof(ANTLR3_INPUT_STREAM)); |
| |
| if (input == NULL) |
| { |
| return NULL; |
| } |
| |
| // Structure was allocated correctly, now we can read the file. |
| // |
| status = antlr3read8Bit(input, fileName); |
| |
| // Call the common 8 bit input stream handler |
| // initialization. |
| // |
| antlr3GenericSetupStream(input); |
| |
| // However if the file was not there or something then we |
| // need to close. Have to wait until here as we cannot call |
| // close until the API is installed of course. |
| // |
| if (status != ANTLR3_SUCCESS) |
| { |
| input->close(input); |
| return NULL; |
| } |
| |
| return input; |
| } |
| |
| ANTLR3_API ANTLR3_UINT32 |
| antlr3read8Bit(pANTLR3_INPUT_STREAM input, pANTLR3_UINT8 fileName) |
| { |
| ANTLR3_FDSC infile; |
| ANTLR3_UINT32 fSize; |
| |
| /* Open the OS file in read binary mode |
| */ |
| infile = antlr3Fopen(fileName, "rb"); |
| |
| /* Check that it was there |
| */ |
| if (infile == NULL) |
| { |
| return (ANTLR3_UINT32)ANTLR3_ERR_NOFILE; |
| } |
| |
| /* It was there, so we can read the bytes now |
| */ |
| fSize = antlr3Fsize(fileName); /* Size of input file */ |
| |
| /* Allocate buffer for this input set |
| */ |
| input->data = ANTLR3_MALLOC((size_t)fSize); |
| input->sizeBuf = fSize; |
| |
| if (input->data == NULL) |
| { |
| return (ANTLR3_UINT32)ANTLR3_ERR_NOMEM; |
| } |
| |
| input->isAllocated = ANTLR3_TRUE; |
| |
| /* Now we read the file. Characters are not converted to |
| * the internal ANTLR encoding until they are read from the buffer |
| */ |
| antlr3Fread(infile, fSize, input->data); |
| |
| /* And close the file handle |
| */ |
| antlr3Fclose(infile); |
| |
| return ANTLR3_SUCCESS; |
| } |
| |
| /** \brief Open an operating system file and return the descriptor |
| * We just use the common open() and related functions here. |
| * Later we might find better ways on systems |
| * such as Windows and OpenVMS for instance. But the idea is to read the |
| * while file at once anyway, so it may be irrelevant. |
| */ |
| ANTLR3_API ANTLR3_FDSC |
| antlr3Fopen(pANTLR3_UINT8 filename, const char * mode) |
| { |
| return (ANTLR3_FDSC)fopen((const char *)filename, mode); |
| } |
| |
| /** \brief Close an operating system file and free any handles |
| * etc. |
| */ |
| ANTLR3_API void |
| antlr3Fclose(ANTLR3_FDSC fd) |
| { |
| fclose(fd); |
| } |
| ANTLR3_API ANTLR3_UINT32 |
| antlr3Fsize(pANTLR3_UINT8 fileName) |
| { |
| struct _stat statbuf; |
| |
| _stat((const char *)fileName, &statbuf); |
| |
| return (ANTLR3_UINT32)statbuf.st_size; |
| } |
| |
| ANTLR3_API ANTLR3_UINT32 |
| antlr3Fread(ANTLR3_FDSC fdsc, ANTLR3_UINT32 count, void * data) |
| { |
| return (ANTLR3_UINT32)fread(data, (size_t)count, 1, fdsc); |
| } |
| |
| |
| /** \brief Use the supplied 'string' as input to the stream |
| * |
| * \param data Pointer to the input data |
| * \return |
| * - Pointer to new input stream context upon success |
| * - NULL defines on error. |
| */ |
| static pANTLR3_INPUT_STREAM |
| antlr3CreateStringStream(pANTLR3_UINT8 data) |
| { |
| // Pointer to the input stream we are going to create |
| // |
| pANTLR3_INPUT_STREAM input; |
| |
| if (data == NULL) |
| { |
| return NULL; |
| } |
| |
| // Allocate memory for the input stream structure |
| // |
| input = (pANTLR3_INPUT_STREAM) |
| ANTLR3_CALLOC(1, sizeof(ANTLR3_INPUT_STREAM)); |
| |
| if (input == NULL) |
| { |
| return NULL; |
| } |
| |
| // Structure was allocated correctly, now we can install the pointer |
| // |
| input->data = data; |
| input->isAllocated = ANTLR3_FALSE; |
| |
| // Call the common 8 bit input stream handler |
| // initialization. |
| // |
| antlr3GenericSetupStream(input); |
| |
| return input; |
| } |