|  | #!/usr/bin/env python3 | 
|  | '''Add syntax highlighting to Python source code''' | 
|  |  | 
|  | __author__ = 'Raymond Hettinger' | 
|  |  | 
|  | import builtins | 
|  | import functools | 
|  | import html as html_module | 
|  | import keyword | 
|  | import re | 
|  | import tokenize | 
|  |  | 
|  | #### Analyze Python Source ################################# | 
|  |  | 
|  | def is_builtin(s): | 
|  | 'Return True if s is the name of a builtin' | 
|  | return hasattr(builtins, s) | 
|  |  | 
|  | def combine_range(lines, start, end): | 
|  | 'Join content from a range of lines between start and end' | 
|  | (srow, scol), (erow, ecol) = start, end | 
|  | if srow == erow: | 
|  | return lines[srow-1][scol:ecol], end | 
|  | rows = [lines[srow-1][scol:]] + lines[srow: erow-1] + [lines[erow-1][:ecol]] | 
|  | return ''.join(rows), end | 
|  |  | 
|  | def analyze_python(source): | 
|  | '''Generate and classify chunks of Python for syntax highlighting. | 
|  | Yields tuples in the form: (category, categorized_text). | 
|  | ''' | 
|  | lines = source.splitlines(True) | 
|  | lines.append('') | 
|  | readline = functools.partial(next, iter(lines), '') | 
|  | kind = tok_str = '' | 
|  | tok_type = tokenize.COMMENT | 
|  | written = (1, 0) | 
|  | for tok in tokenize.generate_tokens(readline): | 
|  | prev_tok_type, prev_tok_str = tok_type, tok_str | 
|  | tok_type, tok_str, (srow, scol), (erow, ecol), logical_lineno = tok | 
|  | kind = '' | 
|  | if tok_type == tokenize.COMMENT: | 
|  | kind = 'comment' | 
|  | elif tok_type == tokenize.OP and tok_str[:1] not in '{}[](),.:;@': | 
|  | kind = 'operator' | 
|  | elif tok_type == tokenize.STRING: | 
|  | kind = 'string' | 
|  | if prev_tok_type == tokenize.INDENT or scol==0: | 
|  | kind = 'docstring' | 
|  | elif tok_type == tokenize.NAME: | 
|  | if tok_str in ('def', 'class', 'import', 'from'): | 
|  | kind = 'definition' | 
|  | elif prev_tok_str in ('def', 'class'): | 
|  | kind = 'defname' | 
|  | elif keyword.iskeyword(tok_str): | 
|  | kind = 'keyword' | 
|  | elif is_builtin(tok_str) and prev_tok_str != '.': | 
|  | kind = 'builtin' | 
|  | if kind: | 
|  | text, written = combine_range(lines, written, (srow, scol)) | 
|  | yield '', text | 
|  | text, written = tok_str, (erow, ecol) | 
|  | yield kind, text | 
|  | line_upto_token, written = combine_range(lines, written, (erow, ecol)) | 
|  | yield '', line_upto_token | 
|  |  | 
|  | #### Raw Output  ########################################### | 
|  |  | 
|  | def raw_highlight(classified_text): | 
|  | 'Straight text display of text classifications' | 
|  | result = [] | 
|  | for kind, text in classified_text: | 
|  | result.append('%15s:  %r\n' % (kind or 'plain', text)) | 
|  | return ''.join(result) | 
|  |  | 
|  | #### ANSI Output ########################################### | 
|  |  | 
|  | default_ansi = { | 
|  | 'comment': ('\033[0;31m', '\033[0m'), | 
|  | 'string': ('\033[0;32m', '\033[0m'), | 
|  | 'docstring': ('\033[0;32m', '\033[0m'), | 
|  | 'keyword': ('\033[0;33m', '\033[0m'), | 
|  | 'builtin': ('\033[0;35m', '\033[0m'), | 
|  | 'definition': ('\033[0;33m', '\033[0m'), | 
|  | 'defname': ('\033[0;34m', '\033[0m'), | 
|  | 'operator': ('\033[0;33m', '\033[0m'), | 
|  | } | 
|  |  | 
|  | def ansi_highlight(classified_text, colors=default_ansi): | 
|  | 'Add syntax highlighting to source code using ANSI escape sequences' | 
|  | # http://en.wikipedia.org/wiki/ANSI_escape_code | 
|  | result = [] | 
|  | for kind, text in classified_text: | 
|  | opener, closer = colors.get(kind, ('', '')) | 
|  | result += [opener, text, closer] | 
|  | return ''.join(result) | 
|  |  | 
|  | #### HTML Output ########################################### | 
|  |  | 
|  | def html_highlight(classified_text,opener='<pre class="python">\n', closer='</pre>\n'): | 
|  | 'Convert classified text to an HTML fragment' | 
|  | result = [opener] | 
|  | for kind, text in classified_text: | 
|  | if kind: | 
|  | result.append('<span class="%s">' % kind) | 
|  | result.append(html_module.escape(text)) | 
|  | if kind: | 
|  | result.append('</span>') | 
|  | result.append(closer) | 
|  | return ''.join(result) | 
|  |  | 
|  | default_css = { | 
|  | '.comment': '{color: crimson;}', | 
|  | '.string':  '{color: forestgreen;}', | 
|  | '.docstring': '{color: forestgreen; font-style:italic;}', | 
|  | '.keyword': '{color: darkorange;}', | 
|  | '.builtin': '{color: purple;}', | 
|  | '.definition': '{color: darkorange; font-weight:bold;}', | 
|  | '.defname': '{color: blue;}', | 
|  | '.operator': '{color: brown;}', | 
|  | } | 
|  |  | 
|  | default_html = '''\ | 
|  | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" | 
|  | "http://www.w3.org/TR/html4/strict.dtd"> | 
|  | <html> | 
|  | <head> | 
|  | <meta http-equiv="Content-type" content="text/html;charset=UTF-8"> | 
|  | <title> {title} </title> | 
|  | <style type="text/css"> | 
|  | {css} | 
|  | </style> | 
|  | </head> | 
|  | <body> | 
|  | {body} | 
|  | </body> | 
|  | </html> | 
|  | ''' | 
|  |  | 
|  | def build_html_page(classified_text, title='python', | 
|  | css=default_css, html=default_html): | 
|  | 'Create a complete HTML page with colorized source code' | 
|  | css_str = '\n'.join(['%s %s' % item for item in css.items()]) | 
|  | result = html_highlight(classified_text) | 
|  | title = html_module.escape(title) | 
|  | return html.format(title=title, css=css_str, body=result) | 
|  |  | 
|  | #### LaTeX Output ########################################## | 
|  |  | 
|  | default_latex_commands = { | 
|  | 'comment': r'{\color{red}#1}', | 
|  | 'string': r'{\color{ForestGreen}#1}', | 
|  | 'docstring': r'{\emph{\color{ForestGreen}#1}}', | 
|  | 'keyword': r'{\color{orange}#1}', | 
|  | 'builtin': r'{\color{purple}#1}', | 
|  | 'definition': r'{\color{orange}#1}', | 
|  | 'defname': r'{\color{blue}#1}', | 
|  | 'operator': r'{\color{brown}#1}', | 
|  | } | 
|  |  | 
|  | default_latex_document = r''' | 
|  | \documentclass{article} | 
|  | \usepackage{alltt} | 
|  | \usepackage{upquote} | 
|  | \usepackage{color} | 
|  | \usepackage[usenames,dvipsnames]{xcolor} | 
|  | \usepackage[cm]{fullpage} | 
|  | %(macros)s | 
|  | \begin{document} | 
|  | \center{\LARGE{%(title)s}} | 
|  | \begin{alltt} | 
|  | %(body)s | 
|  | \end{alltt} | 
|  | \end{document} | 
|  | ''' | 
|  |  | 
|  | def alltt_escape(s): | 
|  | 'Replace backslash and braces with their escaped equivalents' | 
|  | xlat = {'{': r'\{', '}': r'\}', '\\': r'\textbackslash{}'} | 
|  | return re.sub(r'[\\{}]', lambda mo: xlat[mo.group()], s) | 
|  |  | 
|  | def latex_highlight(classified_text, title = 'python', | 
|  | commands = default_latex_commands, | 
|  | document = default_latex_document): | 
|  | 'Create a complete LaTeX document with colorized source code' | 
|  | macros = '\n'.join(r'\newcommand{\py%s}[1]{%s}' % c for c in commands.items()) | 
|  | result = [] | 
|  | for kind, text in classified_text: | 
|  | if kind: | 
|  | result.append(r'\py%s{' % kind) | 
|  | result.append(alltt_escape(text)) | 
|  | if kind: | 
|  | result.append('}') | 
|  | return default_latex_document % dict(title=title, macros=macros, body=''.join(result)) | 
|  |  | 
|  |  | 
|  | if __name__ == '__main__': | 
|  | import argparse | 
|  | import os.path | 
|  | import sys | 
|  | import textwrap | 
|  | import webbrowser | 
|  |  | 
|  | parser = argparse.ArgumentParser( | 
|  | description = 'Add syntax highlighting to Python source code', | 
|  | formatter_class=argparse.RawDescriptionHelpFormatter, | 
|  | epilog = textwrap.dedent(''' | 
|  | examples: | 
|  |  | 
|  | # Show syntax highlighted code in the terminal window | 
|  | $ ./highlight.py myfile.py | 
|  |  | 
|  | # Colorize myfile.py and display in a browser | 
|  | $ ./highlight.py -b myfile.py | 
|  |  | 
|  | # Create an HTML section to embed in an existing webpage | 
|  | ./highlight.py -s myfile.py | 
|  |  | 
|  | # Create a complete HTML file | 
|  | $ ./highlight.py -c myfile.py > myfile.html | 
|  |  | 
|  | # Create a PDF using LaTeX | 
|  | $ ./highlight.py -l myfile.py | pdflatex | 
|  |  | 
|  | ''')) | 
|  | parser.add_argument('sourcefile', metavar = 'SOURCEFILE', | 
|  | help = 'file containing Python sourcecode') | 
|  | parser.add_argument('-b', '--browser', action = 'store_true', | 
|  | help = 'launch a browser to show results') | 
|  | parser.add_argument('-c', '--complete', action = 'store_true', | 
|  | help = 'build a complete html webpage') | 
|  | parser.add_argument('-l', '--latex', action = 'store_true', | 
|  | help = 'build a LaTeX document') | 
|  | parser.add_argument('-r', '--raw', action = 'store_true', | 
|  | help = 'raw parse of categorized text') | 
|  | parser.add_argument('-s', '--section', action = 'store_true', | 
|  | help = 'show an HTML section rather than a complete webpage') | 
|  | args = parser.parse_args() | 
|  |  | 
|  | if args.section and (args.browser or args.complete): | 
|  | parser.error('The -s/--section option is incompatible with ' | 
|  | 'the -b/--browser or -c/--complete options') | 
|  |  | 
|  | sourcefile = args.sourcefile | 
|  | with open(sourcefile) as f: | 
|  | source = f.read() | 
|  | classified_text = analyze_python(source) | 
|  |  | 
|  | if args.raw: | 
|  | encoded = raw_highlight(classified_text) | 
|  | elif args.complete or args.browser: | 
|  | encoded = build_html_page(classified_text, title=sourcefile) | 
|  | elif args.section: | 
|  | encoded = html_highlight(classified_text) | 
|  | elif args.latex: | 
|  | encoded = latex_highlight(classified_text, title=sourcefile) | 
|  | else: | 
|  | encoded = ansi_highlight(classified_text) | 
|  |  | 
|  | if args.browser: | 
|  | htmlfile = os.path.splitext(os.path.basename(sourcefile))[0] + '.html' | 
|  | with open(htmlfile, 'w') as f: | 
|  | f.write(encoded) | 
|  | webbrowser.open('file://' + os.path.abspath(htmlfile)) | 
|  | else: | 
|  | sys.stdout.write(encoded) |