blob: 987df405a25ed7d58c0478c332d6a8d1ea4be631 [file] [log] [blame]
#
# doctest.py: Syntax Highlighting for doctest blocks
# Edward Loper
#
# Created [06/28/03 02:52 AM]
# $Id: restructuredtext.py 1210 2006-04-10 13:25:50Z edloper $
#
"""
Syntax highlighting for doctest blocks. This module defines two
functions, L{doctest_to_html()} and L{doctest_to_latex()}, which can
be used to perform syntax highlighting on doctest blocks. It also
defines the more general C{colorize_doctest()}, which could be used to
do syntac highlighting on doctest blocks with other output formats.
(Both C{doctest_to_html()} and C{doctest_to_latex()} are defined using
C{colorize_doctest()}.)
"""
__docformat__ = 'epytext en'
import re
from epydoc.util import plaintext_to_html, plaintext_to_latex
__all__ = ['doctest_to_html', 'doctest_to_latex',
'DoctestColorizer', 'XMLDoctestColorizer',
'HTMLDoctestColorizer', 'LaTeXDoctestColorizer']
def doctest_to_html(s):
"""
Perform syntax highlighting on the given doctest string, and
return the resulting HTML code. This code consists of a C{<pre>}
block with class=py-doctest. Syntax highlighting is performed
using the following css classes:
- C{py-prompt} -- the Python PS1 prompt (>>>)
- C{py-more} -- the Python PS2 prompt (...)
- C{py-keyword} -- a Python keyword (for, if, etc.)
- C{py-builtin} -- a Python builtin name (abs, dir, etc.)
- C{py-string} -- a string literal
- C{py-comment} -- a comment
- C{py-except} -- an exception traceback (up to the next >>>)
- C{py-output} -- the output from a doctest block.
- C{py-defname} -- the name of a function or class defined by
a C{def} or C{class} statement.
"""
return HTMLDoctestColorizer().colorize_doctest(s)
def doctest_to_latex(s):
"""
Perform syntax highlighting on the given doctest string, and
return the resulting LaTeX code. This code consists of an
C{alltt} environment. Syntax highlighting is performed using
the following new latex commands, which must be defined externally:
- C{\pysrcprompt} -- the Python PS1 prompt (>>>)
- C{\pysrcmore} -- the Python PS2 prompt (...)
- C{\pysrckeyword} -- a Python keyword (for, if, etc.)
- C{\pysrcbuiltin} -- a Python builtin name (abs, dir, etc.)
- C{\pysrcstring} -- a string literal
- C{\pysrccomment} -- a comment
- C{\pysrcexcept} -- an exception traceback (up to the next >>>)
- C{\pysrcoutput} -- the output from a doctest block.
- C{\pysrcdefname} -- the name of a function or class defined by
a C{def} or C{class} statement.
"""
return LaTeXDoctestColorizer().colorize_doctest(s)
class DoctestColorizer:
"""
An abstract base class for performing syntax highlighting on
doctest blocks and other bits of Python code. Subclasses should
provide definitions for:
- The L{markup()} method, which takes a substring and a tag, and
returns a colorized version of the substring.
- The L{PREFIX} and L{SUFFIX} variables, which will be added
to the beginning and end of the strings returned by
L{colorize_codeblock} and L{colorize_doctest}.
"""
#: A string that is added to the beginning of the strings
#: returned by L{colorize_codeblock} and L{colorize_doctest}.
#: Typically, this string begins a preformatted area.
PREFIX = None
#: A string that is added to the end of the strings
#: returned by L{colorize_codeblock} and L{colorize_doctest}.
#: Typically, this string ends a preformatted area.
SUFFIX = None
#: A list of the names of all Python keywords. ('as' is included
#: even though it is technically not a keyword.)
_KEYWORDS = ("and del for is raise"
"assert elif from lambda return"
"break else global not try"
"class except if or while"
"continue exec import pass yield"
"def finally in print as").split()
#: A list of all Python builtins.
_BUILTINS = [_BI for _BI in dir(__builtins__)
if not _BI.startswith('__')]
#: A regexp group that matches keywords.
_KEYWORD_GRP = '|'.join([r'\b%s\b' % _KW for _KW in _KEYWORDS])
#: A regexp group that matches Python builtins.
_BUILTIN_GRP = (r'(?<!\.)(?:%s)' % '|'.join([r'\b%s\b' % _BI
for _BI in _BUILTINS]))
#: A regexp group that matches Python strings.
_STRING_GRP = '|'.join(
[r'("""("""|.*?((?!").)"""))', r'("("|.*?((?!").)"))',
r"('''('''|.*?[^\\']'''))", r"('('|.*?[^\\']'))"])
#: A regexp group that matches Python comments.
_COMMENT_GRP = '(#.*?$)'
#: A regexp group that matches Python ">>>" prompts.
_PROMPT1_GRP = r'^[ \t]*>>>(?:[ \t]|$)'
#: A regexp group that matches Python "..." prompts.
_PROMPT2_GRP = r'^[ \t]*\.\.\.(?:[ \t]|$)'
#: A regexp group that matches function and class definitions.
_DEFINE_GRP = r'\b(?:def|class)[ \t]+\w+'
#: A regexp that matches Python prompts
PROMPT_RE = re.compile('(%s|%s)' % (_PROMPT1_GRP, _PROMPT2_GRP),
re.MULTILINE | re.DOTALL)
#: A regexp that matches Python "..." prompts.
PROMPT2_RE = re.compile('(%s)' % _PROMPT2_GRP,
re.MULTILINE | re.DOTALL)
#: A regexp that matches doctest exception blocks.
EXCEPT_RE = re.compile(r'^[ \t]*Traceback \(most recent call last\):.*',
re.DOTALL | re.MULTILINE)
#: A regexp that matches doctest directives.
DOCTEST_DIRECTIVE_RE = re.compile(r'#[ \t]*doctest:.*')
#: A regexp that matches all of the regions of a doctest block
#: that should be colored.
DOCTEST_RE = re.compile(
r'(.*?)((?P<STRING>%s)|(?P<COMMENT>%s)|(?P<DEFINE>%s)|'
r'(?P<KEYWORD>%s)|(?P<BUILTIN>%s)|'
r'(?P<PROMPT1>%s)|(?P<PROMPT2>%s)|(?P<EOS>\Z))' % (
_STRING_GRP, _COMMENT_GRP, _DEFINE_GRP, _KEYWORD_GRP, _BUILTIN_GRP,
_PROMPT1_GRP, _PROMPT2_GRP), re.MULTILINE | re.DOTALL)
#: This regular expression is used to find doctest examples in a
#: string. This is copied from the standard Python doctest.py
#: module (after the refactoring in Python 2.4+).
DOCTEST_EXAMPLE_RE = re.compile(r'''
# Source consists of a PS1 line followed by zero or more PS2 lines.
(?P<source>
(?:^(?P<indent> [ ]*) >>> .*) # PS1 line
(?:\n [ ]* \.\.\. .*)* # PS2 lines
\n?)
# Want consists of any non-blank lines that do not start with PS1.
(?P<want> (?:(?![ ]*$) # Not a blank line
(?![ ]*>>>) # Not a line starting with PS1
.*$\n? # But any other line
)*)
''', re.MULTILINE | re.VERBOSE)
def colorize_inline(self, s):
"""
Colorize a string containing Python code. Do not add the
L{PREFIX} and L{SUFFIX} strings to the returned value. This
method is intended for generating syntax-highlighted strings
that are appropriate for inclusion as inline expressions.
"""
return self.DOCTEST_RE.sub(self.subfunc, s)
def colorize_codeblock(self, s):
"""
Colorize a string containing only Python code. This method
differs from L{colorize_doctest} in that it will not search
for doctest prompts when deciding how to colorize the string.
"""
body = self.DOCTEST_RE.sub(self.subfunc, s)
return self.PREFIX + body + self.SUFFIX
def colorize_doctest(self, s, strip_directives=False):
"""
Colorize a string containing one or more doctest examples.
"""
output = []
charno = 0
for m in self.DOCTEST_EXAMPLE_RE.finditer(s):
# Parse the doctest example:
pysrc, want = m.group('source', 'want')
# Pre-example text:
output.append(s[charno:m.start()])
# Example source code:
output.append(self.DOCTEST_RE.sub(self.subfunc, pysrc))
# Example output:
if want:
if self.EXCEPT_RE.match(want):
output += '\n'.join([self.markup(line, 'except')
for line in want.split('\n')])
else:
output += '\n'.join([self.markup(line, 'output')
for line in want.split('\n')])
# Update charno
charno = m.end()
# Add any remaining post-example text.
output.append(s[charno:])
return self.PREFIX + ''.join(output) + self.SUFFIX
def subfunc(self, match):
other, text = match.group(1, 2)
#print 'M %20r %20r' % (other, text) # <- for debugging
if other:
other = '\n'.join([self.markup(line, 'other')
for line in other.split('\n')])
if match.group('PROMPT1'):
return other + self.markup(text, 'prompt')
elif match.group('PROMPT2'):
return other + self.markup(text, 'more')
elif match.group('KEYWORD'):
return other + self.markup(text, 'keyword')
elif match.group('BUILTIN'):
return other + self.markup(text, 'builtin')
elif match.group('COMMENT'):
return other + self.markup(text, 'comment')
elif match.group('STRING') and '\n' not in text:
return other + self.markup(text, 'string')
elif match.group('STRING'):
# It's a multiline string; colorize the string & prompt
# portion of each line.
pieces = []
for line in text.split('\n'):
if self.PROMPT2_RE.match(line):
if len(line) > 4:
pieces.append(self.markup(line[:4], 'more') +
self.markup(line[4:], 'string'))
else:
pieces.append(self.markup(line[:4], 'more'))
elif line:
pieces.append(self.markup(line, 'string'))
else:
pieces.append('')
return other + '\n'.join(pieces)
elif match.group('DEFINE'):
m = re.match('(?P<def>\w+)(?P<space>\s+)(?P<name>\w+)', text)
return other + (self.markup(m.group('def'), 'keyword') +
self.markup(m.group('space'), 'other') +
self.markup(m.group('name'), 'defname'))
elif match.group('EOS') is not None:
return other
else:
assert 0, 'Unexpected match!'
def markup(self, s, tag):
"""
Apply syntax highlighting to a single substring from a doctest
block. C{s} is the substring, and C{tag} is the tag that
should be applied to the substring. C{tag} will be one of the
following strings:
- C{prompt} -- the Python PS1 prompt (>>>)
- C{more} -- the Python PS2 prompt (...)
- C{keyword} -- a Python keyword (for, if, etc.)
- C{builtin} -- a Python builtin name (abs, dir, etc.)
- C{string} -- a string literal
- C{comment} -- a comment
- C{except} -- an exception traceback (up to the next >>>)
- C{output} -- the output from a doctest block.
- C{defname} -- the name of a function or class defined by
a C{def} or C{class} statement.
- C{other} -- anything else (does *not* include output.)
"""
raise AssertionError("Abstract method")
class XMLDoctestColorizer(DoctestColorizer):
"""
A subclass of DoctestColorizer that generates XML-like output.
This class is mainly intended to be used for testing purposes.
"""
PREFIX = '<colorized>\n'
SUFFIX = '</colorized>\n'
def markup(self, s, tag):
s = s.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
if tag == 'other': return s
else: return '<%s>%s</%s>' % (tag, s, tag)
class HTMLDoctestColorizer(DoctestColorizer):
"""A subclass of DoctestColorizer that generates HTML output."""
PREFIX = '<pre class="py-doctest">\n'
SUFFIX = '</pre>\n'
def markup(self, s, tag):
if tag == 'other':
return plaintext_to_html(s)
else:
return ('<span class="py-%s">%s</span>' %
(tag, plaintext_to_html(s)))
class LaTeXDoctestColorizer(DoctestColorizer):
"""A subclass of DoctestColorizer that generates LaTeX output."""
PREFIX = '\\begin{alltt}\n'
SUFFIX = '\\end{alltt}\n'
def markup(self, s, tag):
if tag == 'other':
return plaintext_to_latex(s)
else:
return '\\pysrc%s{%s}' % (tag, plaintext_to_latex(s))