| # epydoc -- Marked-up Representations for Python Values |
| # |
| # Copyright (C) 2005 Edward Loper |
| # Author: Edward Loper <edloper@loper.org> |
| # URL: <http://epydoc.sf.net> |
| # |
| # $Id: apidoc.py 1448 2007-02-11 00:05:34Z dvarrazzo $ |
| |
| """ |
| Syntax highlighter for Python values. Currently provides special |
| colorization support for: |
| |
| - lists, tuples, sets, frozensets, dicts |
| - numbers |
| - strings |
| - compiled regexps |
| |
| The highlighter also takes care of line-wrapping, and automatically |
| stops generating repr output as soon as it has exceeded the specified |
| number of lines (which should make it faster than pprint for large |
| values). It does I{not} bother to do automatic cycle detection, |
| because maxlines is typically around 5, so it's really not worth it. |
| |
| The syntax-highlighted output is encoded using a |
| L{ParsedEpytextDocstring}, which can then be used to generate output in |
| a variety of formats. |
| """ |
| __docformat__ = 'epytext en' |
| |
| # Implementation note: we use exact tests for classes (list, etc) |
| # rather than using isinstance, because subclasses might override |
| # __repr__. |
| |
| import types, re |
| import epydoc.apidoc |
| from epydoc.util import decode_with_backslashreplace |
| from epydoc.util import plaintext_to_html, plaintext_to_latex |
| from epydoc.compat import * |
| import sre_parse, sre_constants |
| |
| from epydoc.markup.epytext import Element, ParsedEpytextDocstring |
| |
| def is_re_pattern(pyval): |
| return type(pyval).__name__ == 'SRE_Pattern' |
| |
| class _ColorizerState: |
| """ |
| An object uesd to keep track of the current state of the pyval |
| colorizer. The L{mark()}/L{restore()} methods can be used to set |
| a backup point, and restore back to that backup point. This is |
| used by several colorization methods that first try colorizing |
| their object on a single line (setting linebreakok=False); and |
| then fall back on a multi-line output if that fails. The L{score} |
| variable is used to keep track of a 'score', reflecting how good |
| we think this repr is. E.g., unhelpful values like '<Foo instance |
| at 0x12345>' get low scores. If the score is too low, we'll use |
| the parse-derived repr instead. |
| """ |
| def __init__(self): |
| self.result = [] |
| self.charpos = 0 |
| self.lineno = 1 |
| self.linebreakok = True |
| |
| #: How good this represention is? |
| self.score = 0 |
| |
| def mark(self): |
| return (len(self.result), self.charpos, |
| self.lineno, self.linebreakok, self.score) |
| |
| def restore(self, mark): |
| n, self.charpos, self.lineno, self.linebreakok, self.score = mark |
| del self.result[n:] |
| |
| class _Maxlines(Exception): |
| """A control-flow exception that is raised when PyvalColorizer |
| exeeds the maximum number of allowed lines.""" |
| |
| class _Linebreak(Exception): |
| """A control-flow exception that is raised when PyvalColorizer |
| generates a string containing a newline, but the state object's |
| linebreakok variable is False.""" |
| |
| class ColorizedPyvalRepr(ParsedEpytextDocstring): |
| """ |
| @ivar score: A score, evaluating how good this repr is. |
| @ivar is_complete: True if this colorized repr completely describes |
| the object. |
| """ |
| def __init__(self, tree, score, is_complete): |
| ParsedEpytextDocstring.__init__(self, tree) |
| self.score = score |
| self.is_complete = is_complete |
| |
| def colorize_pyval(pyval, parse_repr=None, min_score=None, |
| linelen=75, maxlines=5, linebreakok=True, sort=True): |
| return PyvalColorizer(linelen, maxlines, linebreakok, sort).colorize( |
| pyval, parse_repr, min_score) |
| |
| class PyvalColorizer: |
| """ |
| Syntax highlighter for Python values. |
| """ |
| |
| def __init__(self, linelen=75, maxlines=5, linebreakok=True, sort=True): |
| self.linelen = linelen |
| self.maxlines = maxlines |
| self.linebreakok = linebreakok |
| self.sort = sort |
| |
| #//////////////////////////////////////////////////////////// |
| # Colorization Tags & other constants |
| #//////////////////////////////////////////////////////////// |
| |
| GROUP_TAG = 'variable-group' # e.g., "[" and "]" |
| COMMA_TAG = 'variable-op' # The "," that separates elements |
| COLON_TAG = 'variable-op' # The ":" in dictionaries |
| CONST_TAG = None # None, True, False |
| NUMBER_TAG = None # ints, floats, etc |
| QUOTE_TAG = 'variable-quote' # Quotes around strings. |
| STRING_TAG = 'variable-string' # Body of string literals |
| |
| RE_CHAR_TAG = None |
| RE_GROUP_TAG = 're-group' |
| RE_REF_TAG = 're-ref' |
| RE_OP_TAG = 're-op' |
| RE_FLAGS_TAG = 're-flags' |
| |
| ELLIPSIS = Element('code', u'...', style='variable-ellipsis') |
| LINEWRAP = Element('symbol', u'crarr') |
| UNKNOWN_REPR = Element('code', u'??', style='variable-unknown') |
| |
| GENERIC_OBJECT_RE = re.compile(r'^<.* at 0x[0-9a-f]+>$', re.IGNORECASE) |
| |
| ESCAPE_UNICODE = False # should we escape non-ascii unicode chars? |
| |
| #//////////////////////////////////////////////////////////// |
| # Entry Point |
| #//////////////////////////////////////////////////////////// |
| |
| def colorize(self, pyval, parse_repr=None, min_score=None): |
| """ |
| @return: A L{ColorizedPyvalRepr} describing the given pyval. |
| """ |
| UNKNOWN = epydoc.apidoc.UNKNOWN |
| # Create an object to keep track of the colorization. |
| state = _ColorizerState() |
| state.linebreakok = self.linebreakok |
| # Colorize the value. If we reach maxlines, then add on an |
| # ellipsis marker and call it a day. |
| try: |
| if pyval is not UNKNOWN: |
| self._colorize(pyval, state) |
| elif parse_repr not in (None, UNKNOWN): |
| self._output(parse_repr, None, state) |
| else: |
| state.result.append(PyvalColorizer.UNKNOWN_REPR) |
| is_complete = True |
| except (_Maxlines, _Linebreak): |
| if self.linebreakok: |
| state.result.append('\n') |
| state.result.append(self.ELLIPSIS) |
| else: |
| if state.result[-1] is self.LINEWRAP: |
| state.result.pop() |
| self._trim_result(state.result, 3) |
| state.result.append(self.ELLIPSIS) |
| is_complete = False |
| # If we didn't score high enough, then try again. |
| if (pyval is not UNKNOWN and parse_repr not in (None, UNKNOWN) |
| and min_score is not None and state.score < min_score): |
| return self.colorize(UNKNOWN, parse_repr) |
| # Put it all together. |
| tree = Element('epytext', *state.result) |
| return ColorizedPyvalRepr(tree, state.score, is_complete) |
| |
| def _colorize(self, pyval, state): |
| pyval_type = type(pyval) |
| state.score += 1 |
| |
| if pyval is None or pyval is True or pyval is False: |
| self._output(unicode(pyval), self.CONST_TAG, state) |
| elif pyval_type in (int, float, long, types.ComplexType): |
| self._output(unicode(pyval), self.NUMBER_TAG, state) |
| elif pyval_type is str: |
| self._colorize_str(pyval, state, '', 'string-escape') |
| elif pyval_type is unicode: |
| if self.ESCAPE_UNICODE: |
| self._colorize_str(pyval, state, 'u', 'unicode-escape') |
| else: |
| self._colorize_str(pyval, state, 'u', None) |
| elif pyval_type is list: |
| self._multiline(self._colorize_iter, pyval, state, '[', ']') |
| elif pyval_type is tuple: |
| self._multiline(self._colorize_iter, pyval, state, '(', ')') |
| elif pyval_type is set: |
| self._multiline(self._colorize_iter, self._sort(pyval), |
| state, 'set([', '])') |
| elif pyval_type is frozenset: |
| self._multiline(self._colorize_iter, self._sort(pyval), |
| state, 'frozenset([', '])') |
| elif pyval_type is dict: |
| self._multiline(self._colorize_dict, self._sort(pyval.items()), |
| state, '{', '}') |
| elif is_re_pattern(pyval): |
| self._colorize_re(pyval, state) |
| else: |
| try: |
| pyval_repr = repr(pyval) |
| if not isinstance(pyval_repr, (str, unicode)): |
| pyval_repr = unicode(pyval_repr) |
| pyval_repr_ok = True |
| except KeyboardInterrupt: |
| raise |
| except: |
| pyval_repr_ok = False |
| state.score -= 100 |
| |
| if pyval_repr_ok: |
| if self.GENERIC_OBJECT_RE.match(pyval_repr): |
| state.score -= 5 |
| self._output(pyval_repr, None, state) |
| else: |
| state.result.append(self.UNKNOWN_REPR) |
| |
| def _sort(self, items): |
| if not self.sort: return items |
| try: return sorted(items) |
| except KeyboardInterrupt: raise |
| except: return items |
| |
| def _trim_result(self, result, num_chars): |
| while num_chars > 0: |
| if not result: return |
| if isinstance(result[-1], Element): |
| assert len(result[-1].children) == 1 |
| trim = min(num_chars, len(result[-1].children[0])) |
| result[-1].children[0] = result[-1].children[0][:-trim] |
| if not result[-1].children[0]: result.pop() |
| num_chars -= trim |
| else: |
| trim = min(num_chars, len(result[-1])) |
| result[-1] = result[-1][:-trim] |
| if not result[-1]: result.pop() |
| num_chars -= trim |
| |
| #//////////////////////////////////////////////////////////// |
| # Object Colorization Functions |
| #//////////////////////////////////////////////////////////// |
| |
| def _multiline(self, func, pyval, state, *args): |
| """ |
| Helper for container-type colorizers. First, try calling |
| C{func(pyval, state, *args)} with linebreakok set to false; |
| and if that fails, then try again with it set to true. |
| """ |
| linebreakok = state.linebreakok |
| mark = state.mark() |
| |
| try: |
| state.linebreakok = False |
| func(pyval, state, *args) |
| state.linebreakok = linebreakok |
| |
| except _Linebreak: |
| if not linebreakok: |
| raise |
| state.restore(mark) |
| func(pyval, state, *args) |
| |
| def _colorize_iter(self, pyval, state, prefix, suffix): |
| self._output(prefix, self.GROUP_TAG, state) |
| indent = state.charpos |
| for i, elt in enumerate(pyval): |
| if i>=1: |
| if state.linebreakok: |
| self._output(',', self.COMMA_TAG, state) |
| self._output('\n'+' '*indent, None, state) |
| else: |
| self._output(', ', self.COMMA_TAG, state) |
| self._colorize(elt, state) |
| self._output(suffix, self.GROUP_TAG, state) |
| |
| def _colorize_dict(self, items, state, prefix, suffix): |
| self._output(prefix, self.GROUP_TAG, state) |
| indent = state.charpos |
| for i, (key, val) in enumerate(items): |
| if i>=1: |
| if state.linebreakok: |
| self._output(',', self.COMMA_TAG, state) |
| self._output('\n'+' '*indent, None, state) |
| else: |
| self._output(', ', self.COMMA_TAG, state) |
| self._colorize(key, state) |
| self._output(': ', self.COLON_TAG, state) |
| self._colorize(val, state) |
| self._output(suffix, self.GROUP_TAG, state) |
| |
| def _colorize_str(self, pyval, state, prefix, encoding): |
| # Decide which quote to use. |
| if '\n' in pyval and state.linebreakok: quote = "'''" |
| else: quote = "'" |
| # Divide the string into lines. |
| if state.linebreakok: |
| lines = pyval.split('\n') |
| else: |
| lines = [pyval] |
| # Open quote. |
| self._output(prefix+quote, self.QUOTE_TAG, state) |
| # Body |
| for i, line in enumerate(lines): |
| if i>0: self._output('\n', None, state) |
| if encoding: line = line.encode(encoding) |
| self._output(line, self.STRING_TAG, state) |
| # Close quote. |
| self._output(quote, self.QUOTE_TAG, state) |
| |
| def _colorize_re(self, pyval, state): |
| # Extract the flag & pattern from the regexp. |
| pat, flags = pyval.pattern, pyval.flags |
| # If the pattern is a string, decode it to unicode. |
| if isinstance(pat, str): |
| pat = decode_with_backslashreplace(pat) |
| # Parse the regexp pattern. |
| tree = sre_parse.parse(pat, flags) |
| groups = dict([(num,name) for (name,num) in |
| tree.pattern.groupdict.items()]) |
| # Colorize it! |
| self._output("re.compile(r'", None, state) |
| self._colorize_re_flags(tree.pattern.flags, state) |
| self._colorize_re_tree(tree, state, True, groups) |
| self._output("')", None, state) |
| |
| def _colorize_re_flags(self, flags, state): |
| if flags: |
| flags = [c for (c,n) in sorted(sre_parse.FLAGS.items()) |
| if (n&flags)] |
| flags = '(?%s)' % ''.join(flags) |
| self._output(flags, self.RE_FLAGS_TAG, state) |
| |
| def _colorize_re_tree(self, tree, state, noparen, groups): |
| assert noparen in (True, False) |
| if len(tree) > 1 and not noparen: |
| self._output('(', self.RE_GROUP_TAG, state) |
| for elt in tree: |
| op = elt[0] |
| args = elt[1] |
| |
| if op == sre_constants.LITERAL: |
| c = unichr(args) |
| # Add any appropriate escaping. |
| if c in '.^$\\*+?{}[]|()\'': c = '\\'+c |
| elif c == '\t': c = '\\t' |
| elif c == '\r': c = '\\r' |
| elif c == '\n': c = '\\n' |
| elif c == '\f': c = '\\f' |
| elif c == '\v': c = '\\v' |
| elif ord(c) > 0xffff: c = r'\U%08x' % ord(c) |
| elif ord(c) > 0xff: c = r'\u%04x' % ord(c) |
| elif ord(c)<32 or ord(c)>=127: c = r'\x%02x' % ord(c) |
| self._output(c, self.RE_CHAR_TAG, state) |
| |
| elif op == sre_constants.ANY: |
| self._output('.', self.RE_CHAR_TAG, state) |
| |
| elif op == sre_constants.BRANCH: |
| if args[0] is not None: |
| raise ValueError('Branch expected None arg but got %s' |
| % args[0]) |
| for i, item in enumerate(args[1]): |
| if i > 0: |
| self._output('|', self.RE_OP_TAG, state) |
| self._colorize_re_tree(item, state, True, groups) |
| |
| elif op == sre_constants.IN: |
| if (len(args) == 1 and args[0][0] == sre_constants.CATEGORY): |
| self._colorize_re_tree(args, state, False, groups) |
| else: |
| self._output('[', self.RE_GROUP_TAG, state) |
| self._colorize_re_tree(args, state, True, groups) |
| self._output(']', self.RE_GROUP_TAG, state) |
| |
| elif op == sre_constants.CATEGORY: |
| if args == sre_constants.CATEGORY_DIGIT: val = r'\d' |
| elif args == sre_constants.CATEGORY_NOT_DIGIT: val = r'\D' |
| elif args == sre_constants.CATEGORY_SPACE: val = r'\s' |
| elif args == sre_constants.CATEGORY_NOT_SPACE: val = r'\S' |
| elif args == sre_constants.CATEGORY_WORD: val = r'\w' |
| elif args == sre_constants.CATEGORY_NOT_WORD: val = r'\W' |
| else: raise ValueError('Unknown category %s' % args) |
| self._output(val, self.RE_CHAR_TAG, state) |
| |
| elif op == sre_constants.AT: |
| if args == sre_constants.AT_BEGINNING_STRING: val = r'\A' |
| elif args == sre_constants.AT_BEGINNING: val = r'^' |
| elif args == sre_constants.AT_END: val = r'$' |
| elif args == sre_constants.AT_BOUNDARY: val = r'\b' |
| elif args == sre_constants.AT_NON_BOUNDARY: val = r'\B' |
| elif args == sre_constants.AT_END_STRING: val = r'\Z' |
| else: raise ValueError('Unknown position %s' % args) |
| self._output(val, self.RE_CHAR_TAG, state) |
| |
| elif op in (sre_constants.MAX_REPEAT, sre_constants.MIN_REPEAT): |
| minrpt = args[0] |
| maxrpt = args[1] |
| if maxrpt == sre_constants.MAXREPEAT: |
| if minrpt == 0: val = '*' |
| elif minrpt == 1: val = '+' |
| else: val = '{%d,}' % (minrpt) |
| elif minrpt == 0: |
| if maxrpt == 1: val = '?' |
| else: val = '{,%d}' % (maxrpt) |
| elif minrpt == maxrpt: |
| val = '{%d}' % (maxrpt) |
| else: |
| val = '{%d,%d}' % (minrpt, maxrpt) |
| if op == sre_constants.MIN_REPEAT: |
| val += '?' |
| |
| self._colorize_re_tree(args[2], state, False, groups) |
| self._output(val, self.RE_OP_TAG, state) |
| |
| elif op == sre_constants.SUBPATTERN: |
| if args[0] is None: |
| self._output('(?:', self.RE_GROUP_TAG, state) |
| elif args[0] in groups: |
| self._output('(?P<', self.RE_GROUP_TAG, state) |
| self._output(groups[args[0]], self.RE_REF_TAG, state) |
| self._output('>', self.RE_GROUP_TAG, state) |
| elif isinstance(args[0], (int, long)): |
| # This is cheating: |
| self._output('(', self.RE_GROUP_TAG, state) |
| else: |
| self._output('(?P<', self.RE_GROUP_TAG, state) |
| self._output(args[0], self.RE_REF_TAG, state) |
| self._output('>', self.RE_GROUP_TAG, state) |
| self._colorize_re_tree(args[1], state, True, groups) |
| self._output(')', self.RE_GROUP_TAG, state) |
| |
| elif op == sre_constants.GROUPREF: |
| self._output('\\%d' % args, self.RE_REF_TAG, state) |
| |
| elif op == sre_constants.RANGE: |
| self._colorize_re_tree( ((sre_constants.LITERAL, args[0]),), |
| state, False, groups ) |
| self._output('-', self.RE_OP_TAG, state) |
| self._colorize_re_tree( ((sre_constants.LITERAL, args[1]),), |
| state, False, groups ) |
| |
| elif op == sre_constants.NEGATE: |
| self._output('^', self.RE_OP_TAG, state) |
| |
| elif op == sre_constants.ASSERT: |
| if args[0] > 0: |
| self._output('(?=', self.RE_GROUP_TAG, state) |
| else: |
| self._output('(?<=', self.RE_GROUP_TAG, state) |
| self._colorize_re_tree(args[1], state, True, groups) |
| self._output(')', self.RE_GROUP_TAG, state) |
| |
| elif op == sre_constants.ASSERT_NOT: |
| if args[0] > 0: |
| self._output('(?!', self.RE_GROUP_TAG, state) |
| else: |
| self._output('(?<!', self.RE_GROUP_TAG, state) |
| self._colorize_re_tree(args[1], state, True, groups) |
| self._output(')', self.RE_GROUP_TAG, state) |
| |
| elif op == sre_constants.NOT_LITERAL: |
| self._output('[^', self.RE_GROUP_TAG, state) |
| self._colorize_re_tree( ((sre_constants.LITERAL, args),), |
| state, False, groups ) |
| self._output(']', self.RE_GROUP_TAG, state) |
| else: |
| log.error("Error colorizing regexp: unknown elt %r" % elt) |
| if len(tree) > 1 and not noparen: |
| self._output(')', self.RE_GROUP_TAG, state) |
| |
| #//////////////////////////////////////////////////////////// |
| # Output function |
| #//////////////////////////////////////////////////////////// |
| |
| def _output(self, s, tag, state): |
| """ |
| Add the string `s` to the result list, tagging its contents |
| with tag `tag`. Any lines that go beyond `self.linelen` will |
| be line-wrapped. If the total number of lines exceeds |
| `self.maxlines`, then raise a `_Maxlines` exception. |
| """ |
| # Make sure the string is unicode. |
| if isinstance(s, str): |
| s = decode_with_backslashreplace(s) |
| |
| # Split the string into segments. The first segment is the |
| # content to add to the current line, and the remaining |
| # segments are new lines. |
| segments = s.split('\n') |
| |
| for i, segment in enumerate(segments): |
| # If this isn't the first segment, then add a newline to |
| # split it from the previous segment. |
| if i > 0: |
| if (state.lineno+1) > self.maxlines: |
| raise _Maxlines() |
| if not state.linebreakok: |
| raise _Linebreak() |
| state.result.append(u'\n') |
| state.lineno += 1 |
| state.charpos = 0 |
| |
| # If the segment fits on the current line, then just call |
| # markup to tag it, and store the result. |
| if state.charpos + len(segment) <= self.linelen: |
| state.charpos += len(segment) |
| if tag: |
| segment = Element('code', segment, style=tag) |
| state.result.append(segment) |
| |
| # If the segment doesn't fit on the current line, then |
| # line-wrap it, and insert the remainder of the line into |
| # the segments list that we're iterating over. (We'll go |
| # the the beginning of the next line at the start of the |
| # next iteration through the loop.) |
| else: |
| split = self.linelen-state.charpos |
| segments.insert(i+1, segment[split:]) |
| segment = segment[:split] |
| if tag: |
| segment = Element('code', segment, style=tag) |
| state.result += [segment, self.LINEWRAP] |
| |