| import antlr3 |
| import testbase |
| import unittest |
| import os |
| import sys |
| from cStringIO import StringIO |
| import difflib |
| import textwrap |
| |
| class t012lexerXML(testbase.ANTLRTest): |
| def setUp(self): |
| self.compileGrammar('t012lexerXMLLexer.g') |
| |
| |
| def lexerClass(self, base): |
| class TLexer(base): |
| def emitErrorMessage(self, msg): |
| # report errors to /dev/null |
| pass |
| |
| def reportError(self, re): |
| # no error recovery yet, just crash! |
| raise re |
| |
| return TLexer |
| |
| |
| def testValid(self): |
| inputPath = os.path.splitext(__file__)[0] + '.input' |
| stream = antlr3.StringStream(unicode(open(inputPath).read(), 'utf-8')) |
| lexer = self.getLexer(stream) |
| |
| while True: |
| token = lexer.nextToken() |
| if token.type == self.lexerModule.EOF: |
| break |
| |
| |
| output = unicode(lexer.outbuf.getvalue(), 'utf-8') |
| |
| outputPath = os.path.splitext(__file__)[0] + '.output' |
| testOutput = unicode(open(outputPath).read(), 'utf-8') |
| |
| success = (output == testOutput) |
| if not success: |
| d = difflib.Differ() |
| r = d.compare(output.splitlines(1), testOutput.splitlines(1)) |
| self.fail( |
| ''.join([l.encode('ascii', 'backslashreplace') for l in r]) |
| ) |
| |
| |
| def testMalformedInput1(self): |
| input = textwrap.dedent("""\ |
| <?xml version='1.0'?> |
| <document d> |
| </document> |
| """) |
| |
| stream = antlr3.StringStream(input) |
| lexer = self.getLexer(stream) |
| |
| try: |
| while True: |
| token = lexer.nextToken() |
| if token.type == antlr3.EOF: |
| break |
| |
| raise AssertionError |
| |
| except antlr3.NoViableAltException, exc: |
| assert exc.unexpectedType == '>', repr(exc.unexpectedType) |
| assert exc.charPositionInLine == 11, repr(exc.charPositionInLine) |
| assert exc.line == 2, repr(exc.line) |
| |
| |
| def testMalformedInput2(self): |
| input = textwrap.dedent("""\ |
| <?tml version='1.0'?> |
| <document> |
| </document> |
| """) |
| |
| stream = antlr3.StringStream(input) |
| lexer = self.getLexer(stream) |
| |
| try: |
| while True: |
| token = lexer.nextToken() |
| if token.type == antlr3.EOF: |
| break |
| |
| raise AssertionError |
| |
| except antlr3.MismatchedSetException, exc: |
| assert exc.unexpectedType == 't', repr(exc.unexpectedType) |
| assert exc.charPositionInLine == 2, repr(exc.charPositionInLine) |
| assert exc.line == 1, repr(exc.line) |
| |
| |
| def testMalformedInput3(self): |
| input = textwrap.dedent("""\ |
| <?xml version='1.0'?> |
| <docu ment attr="foo"> |
| </document> |
| """) |
| |
| stream = antlr3.StringStream(input) |
| lexer = self.getLexer(stream) |
| |
| try: |
| while True: |
| token = lexer.nextToken() |
| if token.type == antlr3.EOF: |
| break |
| |
| raise AssertionError |
| |
| except antlr3.NoViableAltException, exc: |
| assert exc.unexpectedType == 'a', repr(exc.unexpectedType) |
| assert exc.charPositionInLine == 11, repr(exc.charPositionInLine) |
| assert exc.line == 2, repr(exc.line) |
| |
| |
| |
| if __name__ == '__main__': |
| unittest.main() |
| |
| |
| ## # run an infinite loop with randomly mangled input |
| ## while True: |
| ## print "ping" |
| |
| ## input = """\ |
| ## <?xml version='1.0'?> |
| ## <!DOCTYPE component [ |
| ## <!ELEMENT component (PCDATA|sub)*> |
| ## <!ATTLIST component |
| ## attr CDATA #IMPLIED |
| ## attr2 CDATA #IMPLIED |
| ## > |
| ## <!ELMENT sub EMPTY> |
| |
| ## ]> |
| ## <component attr="val'ue" attr2='val"ue'> |
| ## <!-- This is a comment --> |
| ## Text |
| ## <![CDATA[huhu]]> |
| ## & |
| ## < |
| ## <?xtal cursor='11'?> |
| ## <sub/> |
| ## <sub></sub> |
| ## </component> |
| ## """ |
| |
| ## import random |
| ## input = list(input) # make it mutable |
| ## for _ in range(3): |
| ## p1 = random.randrange(len(input)) |
| ## p2 = random.randrange(len(input)) |
| |
| ## c1 = input[p1] |
| ## input[p1] = input[p2] |
| ## input[p2] = c1 |
| ## input = ''.join(input) # back to string |
| |
| ## stream = antlr3.StringStream(input) |
| ## lexer = Lexer(stream) |
| |
| ## try: |
| ## while True: |
| ## token = lexer.nextToken() |
| ## if token.type == EOF: |
| ## break |
| |
| ## except antlr3.RecognitionException, exc: |
| ## print exc |
| ## for l in input.splitlines()[0:exc.line]: |
| ## print l |
| ## print ' '*exc.charPositionInLine + '^' |
| |
| ## except BaseException, exc: |
| ## print '\n'.join(['%02d: %s' % (idx+1, l) for idx, l in enumerate(input.splitlines())]) |
| ## print "%s at %d:%d" % (exc, stream.line, stream.charPositionInLine) |
| ## print |
| |
| ## raise |
| |