antlr-3.4/runtime/Python/tests/t012lexerXML.py - platform/external/antlr - Git at Google

 import antlr3
 import testbase
 import unittest
 import os
 import sys
 from cStringIO import StringIO
 import difflib
 import textwrap

 class t012lexerXML(testbase.ANTLRTest):
     def setUp(self):
         self.compileGrammar('t012lexerXMLLexer.g')


     def lexerClass(self, base):
         class TLexer(base):
             def emitErrorMessage(self, msg):
                 # report errors to /dev/null
                 pass

             def reportError(self, re):
                 # no error recovery yet, just crash!
                 raise re

         return TLexer


     def testValid(self):
         inputPath = os.path.splitext(__file__)[0] + '.input'
         stream = antlr3.StringStream(unicode(open(inputPath).read(), 'utf-8'))
         lexer = self.getLexer(stream)

         while True:
             token = lexer.nextToken()
             if token.type == self.lexerModule.EOF:
                 break


         output = unicode(lexer.outbuf.getvalue(), 'utf-8')

         outputPath = os.path.splitext(__file__)[0] + '.output'
         testOutput = unicode(open(outputPath).read(), 'utf-8')

         success = (output == testOutput)
         if not success:
             d = difflib.Differ()
             r = d.compare(output.splitlines(1), testOutput.splitlines(1))
             self.fail(
                 ''.join([l.encode('ascii', 'backslashreplace') for l in r])
                 )


     def testMalformedInput1(self):
         input = textwrap.dedent("""\
         <?xml version='1.0'?>
         <document d>
         </document>
         """)

         stream = antlr3.StringStream(input)
         lexer = self.getLexer(stream)

         try:
             while True:
                 token = lexer.nextToken()
                 if token.type == antlr3.EOF:
                     break

             raise AssertionError

         except antlr3.NoViableAltException, exc:
             assert exc.unexpectedType == '>', repr(exc.unexpectedType)
             assert exc.charPositionInLine == 11, repr(exc.charPositionInLine)
             assert exc.line == 2, repr(exc.line)


     def testMalformedInput2(self):
         input = textwrap.dedent("""\
         <?tml version='1.0'?>
         <document>
         </document>
         """)

         stream = antlr3.StringStream(input)
         lexer = self.getLexer(stream)

         try:
             while True:
                 token = lexer.nextToken()
                 if token.type == antlr3.EOF:
                     break

             raise AssertionError

         except antlr3.MismatchedSetException, exc:
             assert exc.unexpectedType == 't', repr(exc.unexpectedType)
             assert exc.charPositionInLine == 2, repr(exc.charPositionInLine)
             assert exc.line == 1, repr(exc.line)


     def testMalformedInput3(self):
         input = textwrap.dedent("""\
         <?xml version='1.0'?>
         <docu ment attr="foo">
         </document>
         """)

         stream = antlr3.StringStream(input)
         lexer = self.getLexer(stream)

         try:
             while True:
                 token = lexer.nextToken()
                 if token.type == antlr3.EOF:
                     break

             raise AssertionError

         except antlr3.NoViableAltException, exc:
             assert exc.unexpectedType == 'a', repr(exc.unexpectedType)
             assert exc.charPositionInLine == 11, repr(exc.charPositionInLine)
             assert exc.line == 2, repr(exc.line)


 if __name__ == '__main__':
     unittest.main()


 ## # run an infinite loop with randomly mangled input
 ## while True:
 ##     print "ping"

 ##     input = """\
 ## <?xml version='1.0'?>
 ## <!DOCTYPE component [
 ## <!ELEMENT component (PCDATA|sub)*>
 ## <!ATTLIST component
 ##           attr CDATA #IMPLIED
 ##           attr2 CDATA #IMPLIED
 ## >
 ## <!ELMENT sub EMPTY>

 ## ]>
 ## <component attr="val'ue" attr2='val"ue'>
 ## <!-- This is a comment -->
 ## Text
 ## <![CDATA[huhu]]>
 ## &amp;
 ## &lt;
 ## <?xtal cursor='11'?>
 ## <sub/>
 ## <sub></sub>
 ## </component>
 ## """

 ##     import random
 ##     input = list(input) # make it mutable
 ##     for _ in range(3):
 ##         p1 = random.randrange(len(input))
 ##         p2 = random.randrange(len(input))

 ##         c1 = input[p1]
 ##         input[p1] = input[p2]
 ##         input[p2] = c1
 ##     input = ''.join(input) # back to string

 ##     stream = antlr3.StringStream(input)
 ##     lexer = Lexer(stream)

 ##     try:
 ##         while True:
 ##             token = lexer.nextToken()
 ##             if token.type == EOF:
 ##                 break

 ##     except antlr3.RecognitionException, exc:
 ##         print exc
 ##         for l in input.splitlines()[0:exc.line]:
 ##             print l
 ##         print ' '*exc.charPositionInLine + '^'

 ##     except BaseException, exc:
 ##         print '\n'.join(['%02d: %s' % (idx+1, l) for idx, l in enumerate(input.splitlines())])
 ##         print "%s at %d:%d" % (exc, stream.line, stream.charPositionInLine)
 ##         print

 ##         raise
	import antlr3
	import testbase
	import unittest
	import os
	import sys
	from cStringIO import StringIO
	import difflib
	import textwrap

	class t012lexerXML(testbase.ANTLRTest):
	def setUp(self):
	self.compileGrammar('t012lexerXMLLexer.g')


	def lexerClass(self, base):
	class TLexer(base):
	def emitErrorMessage(self, msg):
	# report errors to /dev/null
	pass

	def reportError(self, re):
	# no error recovery yet, just crash!
	raise re

	return TLexer


	def testValid(self):
	inputPath = os.path.splitext(__file__)[0] + '.input'
	stream = antlr3.StringStream(unicode(open(inputPath).read(), 'utf-8'))
	lexer = self.getLexer(stream)

	while True:
	token = lexer.nextToken()
	if token.type == self.lexerModule.EOF:
	break


	output = unicode(lexer.outbuf.getvalue(), 'utf-8')

	outputPath = os.path.splitext(__file__)[0] + '.output'
	testOutput = unicode(open(outputPath).read(), 'utf-8')

	success = (output == testOutput)
	if not success:
	d = difflib.Differ()
	r = d.compare(output.splitlines(1), testOutput.splitlines(1))
	self.fail(
	''.join([l.encode('ascii', 'backslashreplace') for l in r])
	)


	def testMalformedInput1(self):
	input = textwrap.dedent("""\
	<?xml version='1.0'?>
	<document d>
	</document>
	""")

	stream = antlr3.StringStream(input)
	lexer = self.getLexer(stream)

	try:
	while True:
	token = lexer.nextToken()
	if token.type == antlr3.EOF:
	break

	raise AssertionError

	except antlr3.NoViableAltException, exc:
	assert exc.unexpectedType == '>', repr(exc.unexpectedType)
	assert exc.charPositionInLine == 11, repr(exc.charPositionInLine)
	assert exc.line == 2, repr(exc.line)


	def testMalformedInput2(self):
	input = textwrap.dedent("""\
	<?tml version='1.0'?>
	<document>
	</document>
	""")

	stream = antlr3.StringStream(input)
	lexer = self.getLexer(stream)

	try:
	while True:
	token = lexer.nextToken()
	if token.type == antlr3.EOF:
	break

	raise AssertionError

	except antlr3.MismatchedSetException, exc:
	assert exc.unexpectedType == 't', repr(exc.unexpectedType)
	assert exc.charPositionInLine == 2, repr(exc.charPositionInLine)
	assert exc.line == 1, repr(exc.line)


	def testMalformedInput3(self):
	input = textwrap.dedent("""\
	<?xml version='1.0'?>
	<docu ment attr="foo">
	</document>
	""")

	stream = antlr3.StringStream(input)
	lexer = self.getLexer(stream)

	try:
	while True:
	token = lexer.nextToken()
	if token.type == antlr3.EOF:
	break

	raise AssertionError

	except antlr3.NoViableAltException, exc:
	assert exc.unexpectedType == 'a', repr(exc.unexpectedType)
	assert exc.charPositionInLine == 11, repr(exc.charPositionInLine)
	assert exc.line == 2, repr(exc.line)



	if __name__ == '__main__':
	unittest.main()


	## # run an infinite loop with randomly mangled input
	## while True:
	## print "ping"

	## input = """\
	## <?xml version='1.0'?>
	## <!DOCTYPE component [
	## <!ELEMENT component (PCDATA\|sub)*>
	## <!ATTLIST component
	## attr CDATA #IMPLIED
	## attr2 CDATA #IMPLIED
	## >
	## <!ELMENT sub EMPTY>

	## ]>
	## <component attr="val'ue" attr2='val"ue'>
	## <!-- This is a comment -->
	## Text
	## <![CDATA[huhu]]>
	## &
	## <
	## <?xtal cursor='11'?>
	## <sub/>
	## <sub></sub>
	## </component>
	## """

	## import random
	## input = list(input) # make it mutable
	## for _ in range(3):
	## p1 = random.randrange(len(input))
	## p2 = random.randrange(len(input))

	## c1 = input[p1]
	## input[p1] = input[p2]
	## input[p2] = c1
	## input = ''.join(input) # back to string

	## stream = antlr3.StringStream(input)
	## lexer = Lexer(stream)

	## try:
	## while True:
	## token = lexer.nextToken()
	## if token.type == EOF:
	## break

	## except antlr3.RecognitionException, exc:
	## print exc
	## for l in input.splitlines()[0:exc.line]:
	## print l
	## print ' '*exc.charPositionInLine + '^'

	## except BaseException, exc:
	## print '\n'.join(['%02d: %s' % (idx+1, l) for idx, l in enumerate(input.splitlines())])
	## print "%s at %d:%d" % (exc, stream.line, stream.charPositionInLine)
	## print

	## raise