| """loads Markdown files, converts each one to HTML and parses the HTML into an ElementTree structure. |
| The collection of ElementTrees are further parsed to generate a table of contents structure, and are |
| manipulated to replace various markdown-generated HTML with specific Mako tags before being written |
| to Mako templates, which then re-access the table of contents structure at runtime. |
| |
| Much thanks to Alexey Shamrin, who came up with the original idea and did all the heavy Markdown/Elementtree |
| lifting for this module.""" |
| import sys, re, os |
| from toc import TOCElement |
| |
| try: |
| import elementtree.ElementTree as et |
| except: |
| raise "This module requires ElementTree to run (http://effbot.org/zone/element-index.htm)" |
| |
| import markdown |
| |
| def dump_tree(elem, stream): |
| if elem.tag.startswith('MAKO:'): |
| dump_mako_tag(elem, stream) |
| else: |
| if elem.tag != 'html': |
| if len(elem.attrib): |
| stream.write("<%s %s>" % (elem.tag, " ".join(["%s=%s" % (key, repr(val)) for key, val in elem.attrib.iteritems()]))) |
| else: |
| stream.write("<%s>" % elem.tag) |
| if elem.text: |
| stream.write(elem.text) |
| for child in elem: |
| dump_tree(child, stream) |
| if child.tail: |
| stream.write(child.tail) |
| stream.write("</%s>" % elem.tag) |
| |
| def dump_mako_tag(elem, stream): |
| tag = elem.tag[5:] |
| params = ', '.join(['%s=%s' % i for i in elem.items()]) |
| pipe = '' |
| if elem.text or len(elem): |
| pipe = '|' |
| comma = '' |
| if params: |
| comma = ', ' |
| stream.write('<&%s%s%s%s&>' % (pipe, tag, comma, params)) |
| if pipe: |
| if elem.text: |
| stream.write(elem.text) |
| for n in elem: |
| dump_tree(n, stream) |
| if n.tail: |
| stream.write(n.tail) |
| stream.write("</&>") |
| |
| def create_toc(filename, tree, tocroot): |
| title = [None] |
| current = [tocroot] |
| level = [0] |
| def process(tree): |
| while True: |
| i = find_header_index(tree) |
| if i is None: |
| return |
| node = tree[i] |
| taglevel = int(node.tag[1]) |
| start, end = i, end_of_header(tree, taglevel, i+1) |
| content = tree[start+1:end] |
| description = node.text.strip() |
| if title[0] is None: |
| title[0] = description |
| name = node.get('name') |
| if name is None: |
| name = description.split()[0].lower() |
| |
| taglevel = node.tag[1] |
| if taglevel > level[0]: |
| current[0] = TOCElement(filename, name, description, current[0]) |
| elif taglevel == level[0]: |
| current[0] = TOCElement(filename, name, description, current[0].parent) |
| else: |
| current[0] = TOCElement(filename, name, description, current[0].parent.parent) |
| |
| level[0] = taglevel |
| |
| tag = et.Element("MAKO:formatting.section", path=literal(current[0].path), toc="toc") |
| tag.text = (node.tail or "") + '\n' |
| tag.tail = '\n' |
| tag[:] = content |
| tree[start:end] = [tag] |
| |
| process(tag) |
| |
| process(tree) |
| return (title[0], tocroot.get_by_file(filename)) |
| |
| def literal(s): |
| return '"%s"' % s |
| |
| def index(parent, item): |
| for n, i in enumerate(parent): |
| if i is item: |
| return n |
| |
| def find_header_index(tree): |
| for i, node in enumerate(tree): |
| if is_header(node): |
| return i |
| |
| def is_header(node): |
| t = node.tag |
| return (isinstance(t, str) and len(t) == 2 and t[0] == 'h' |
| and t[1] in '123456789') |
| |
| def end_of_header(tree, level, start): |
| for i, node in enumerate(tree[start:]): |
| if is_header(node) and int(node.tag[1]) <= level: |
| return start + i |
| return len(tree) |
| |
| def process_rel_href(tree): |
| parent = get_parent_map(tree) |
| for a in tree.findall('.//a'): |
| m = re.match(r'(bold)?rel\:(.+)', a.get('href')) |
| if m: |
| (bold, path) = m.group(1,2) |
| text = a.text |
| if text == path: |
| tag = et.Element("MAKO:nav.toclink", path=literal(path), toc="toc", extension="extension") |
| else: |
| tag = et.Element("MAKO:nav.toclink", path=literal(path), description=literal(text), toc="toc", extension="extension") |
| a_parent = parent[a] |
| if bold: |
| bold = et.Element('strong') |
| bold.tail = a.tail |
| bold.append(tag) |
| a_parent[index(a_parent, a)] = bold |
| else: |
| tag.tail = a.tail |
| a_parent[index(a_parent, a)] = tag |
| |
| def replace_pre_with_mako(tree): |
| def splice_code_tag(pre, text, type=None, title=None): |
| doctest_directives = re.compile(r'#\s*doctest:\s*[+-]\w+(,[+-]\w+)*\s*$', re.M) |
| text = re.sub(doctest_directives, '', text) |
| # process '>>>' to have quotes around it, to work with the mako python |
| # syntax highlighter which uses the tokenize module |
| text = re.sub(r'>>> ', r'">>>" ', text) |
| |
| # indent two spaces. among other things, this helps comment lines "# " from being |
| # consumed as mako comments. |
| text = re.compile(r'^(?!<&)', re.M).sub(' ', text) |
| |
| use_sliders = True |
| |
| opts = {} |
| if type == 'python': |
| opts['syntaxtype'] = literal('python') |
| else: |
| opts['syntaxtype'] = None |
| |
| if title is not None: |
| opts['title'] = literal(title) |
| |
| if use_sliders: |
| opts['use_sliders'] = True |
| |
| tag = et.Element("MAKO:formatting.code", **opts) |
| tag.text = text |
| |
| pre_parent = parents[pre] |
| tag.tail = pre.tail |
| pre_parent[reverse_parent(pre_parent, pre)] = tag |
| |
| parents = get_parent_map(tree) |
| |
| for precode in tree.findall('.//pre/code'): |
| m = re.match(r'\{(python|code)(?: title="(.*?)"){0,1}\}', precode.text.lstrip()) |
| if m: |
| code = m.group(1) |
| title = m.group(2) |
| text = precode.text.lstrip() |
| text = re.sub(r'{(python|code).*?}(\n\s*)?', '', text) |
| splice_code_tag(parents[precode], text, type=code, title=title) |
| elif precode.text.lstrip().startswith('>>> '): |
| splice_code_tag(parents[precode], precode.text) |
| |
| def reverse_parent(parent, item): |
| for n, i in enumerate(parent): |
| if i is item: |
| return n |
| |
| def get_parent_map(tree): |
| return dict([(c, p) for p in tree.getiterator() for c in p]) |
| |
| def header(toc, title, filename): |
| return """ |
| <%%inherit file="content_layout.html"/> |
| <%%def name="title">%s - %s</%%def> |
| <%%! |
| filename = '%s' |
| %%> |
| # This file is generated. Edit the .txt files instead of this one. |
| """ % (toc.root.doctitle, title, filename) |
| |
| class utf8stream(object): |
| def __init__(self, stream): |
| self.stream = stream |
| def write(self, str): |
| self.stream.write(str.encode('utf8')) |
| |
| def parse_markdown_files(toc, files): |
| for inname in files: |
| infile = 'content/%s.txt' % inname |
| if not os.access(infile, os.F_OK): |
| continue |
| html = markdown.markdown(file(infile).read()) |
| tree = et.fromstring("<html>" + html + "</html>") |
| (title, toc_element) = create_toc(inname, tree, toc) |
| replace_pre_with_mako(tree) |
| process_rel_href(tree) |
| outname = 'output/%s.html' % inname |
| print infile, '->', outname |
| outfile = utf8stream(file(outname, 'w')) |
| outfile.write(header(toc, title, inname)) |
| dump_tree(tree, outfile) |
| |
| |