| |
| """ |
| PRE-PROCESSORS |
| ============================================================================= |
| |
| Preprocessors work on source text before we start doing anything too |
| complicated. |
| """ |
| |
| import re |
| import markdown |
| |
| HTML_PLACEHOLDER_PREFIX = markdown.STX+"wzxhzdk:" |
| HTML_PLACEHOLDER = HTML_PLACEHOLDER_PREFIX + "%d" + markdown.ETX |
| |
| class Processor: |
| def __init__(self, markdown_instance=None): |
| if markdown_instance: |
| self.markdown = markdown_instance |
| |
| class Preprocessor (Processor): |
| """ |
| Preprocessors are run after the text is broken into lines. |
| |
| Each preprocessor implements a "run" method that takes a pointer to a |
| list of lines of the document, modifies it as necessary and returns |
| either the same pointer or a pointer to a new list. |
| |
| Preprocessors must extend markdown.Preprocessor. |
| |
| """ |
| def run(self, lines): |
| """ |
| Each subclass of Preprocessor should override the `run` method, which |
| takes the document as a list of strings split by newlines and returns |
| the (possibly modified) list of lines. |
| |
| """ |
| pass |
| |
| class HtmlStash: |
| """ |
| This class is used for stashing HTML objects that we extract |
| in the beginning and replace with place-holders. |
| """ |
| |
| def __init__ (self): |
| """ Create a HtmlStash. """ |
| self.html_counter = 0 # for counting inline html segments |
| self.rawHtmlBlocks=[] |
| |
| def store(self, html, safe=False): |
| """ |
| Saves an HTML segment for later reinsertion. Returns a |
| placeholder string that needs to be inserted into the |
| document. |
| |
| Keyword arguments: |
| |
| * html: an html segment |
| * safe: label an html segment as safe for safemode |
| |
| Returns : a placeholder string |
| |
| """ |
| self.rawHtmlBlocks.append((html, safe)) |
| placeholder = HTML_PLACEHOLDER % self.html_counter |
| self.html_counter += 1 |
| return placeholder |
| |
| def reset(self): |
| self.html_counter = 0 |
| self.rawHtmlBlocks = [] |
| |
| |
| class HtmlBlockPreprocessor(Preprocessor): |
| """Remove html blocks from the text and store them for later retrieval.""" |
| |
| right_tag_patterns = ["</%s>", "%s>"] |
| |
| def _get_left_tag(self, block): |
| return block[1:].replace(">", " ", 1).split()[0].lower() |
| |
| def _get_right_tag(self, left_tag, block): |
| for p in self.right_tag_patterns: |
| tag = p % left_tag |
| i = block.rfind(tag) |
| if i > 2: |
| return tag.lstrip("<").rstrip(">"), i + len(p)-2 + len(left_tag) |
| return block.rstrip()[-len(left_tag)-2:-1].lower(), len(block) |
| |
| def _equal_tags(self, left_tag, right_tag): |
| if left_tag == 'div' or left_tag[0] in ['?', '@', '%']: # handle PHP, etc. |
| return True |
| if ("/" + left_tag) == right_tag: |
| return True |
| if (right_tag == "--" and left_tag == "--"): |
| return True |
| elif left_tag == right_tag[1:] \ |
| and right_tag[0] != "<": |
| return True |
| else: |
| return False |
| |
| def _is_oneliner(self, tag): |
| return (tag in ['hr', 'hr/']) |
| |
| def run(self, lines): |
| text = "\n".join(lines) |
| new_blocks = [] |
| text = text.split("\n\n") |
| items = [] |
| left_tag = '' |
| right_tag = '' |
| in_tag = False # flag |
| |
| while text: |
| block = text[0] |
| if block.startswith("\n"): |
| block = block[1:] |
| text = text[1:] |
| |
| if block.startswith("\n"): |
| block = block[1:] |
| |
| if not in_tag: |
| if block.startswith("<"): |
| left_tag = self._get_left_tag(block) |
| right_tag, data_index = self._get_right_tag(left_tag, block) |
| |
| if block[1] == "!": |
| # is a comment block |
| left_tag = "--" |
| right_tag, data_index = self._get_right_tag(left_tag, block) |
| # keep checking conditions below and maybe just append |
| |
| if data_index < len(block) \ |
| and markdown.isBlockLevel(left_tag): |
| text.insert(0, block[data_index:]) |
| block = block[:data_index] |
| |
| if not (markdown.isBlockLevel(left_tag) \ |
| or block[1] in ["!", "?", "@", "%"]): |
| new_blocks.append(block) |
| continue |
| |
| if self._is_oneliner(left_tag): |
| new_blocks.append(block.strip()) |
| continue |
| |
| if block.rstrip().endswith(">") \ |
| and self._equal_tags(left_tag, right_tag): |
| new_blocks.append( |
| self.markdown.htmlStash.store(block.strip())) |
| continue |
| else: #if not block[1] == "!": |
| # if is block level tag and is not complete |
| |
| if markdown.isBlockLevel(left_tag) or left_tag == "--" \ |
| and not block.rstrip().endswith(">"): |
| items.append(block.strip()) |
| in_tag = True |
| else: |
| new_blocks.append( |
| self.markdown.htmlStash.store(block.strip())) |
| |
| continue |
| |
| new_blocks.append(block) |
| |
| else: |
| items.append(block.strip()) |
| |
| right_tag, data_index = self._get_right_tag(left_tag, block) |
| |
| if self._equal_tags(left_tag, right_tag): |
| # if find closing tag |
| in_tag = False |
| new_blocks.append( |
| self.markdown.htmlStash.store('\n\n'.join(items))) |
| items = [] |
| |
| if items: |
| new_blocks.append(self.markdown.htmlStash.store('\n\n'.join(items))) |
| new_blocks.append('\n') |
| |
| new_text = "\n\n".join(new_blocks) |
| return new_text.split("\n") |
| |
| |
| class ReferencePreprocessor(Preprocessor): |
| """ Remove reference definitions from text and store for later use. """ |
| |
| RE = re.compile(r'^(\ ?\ ?\ ?)\[([^\]]*)\]:\s*([^ ]*)(.*)$', re.DOTALL) |
| |
| def run (self, lines): |
| new_text = []; |
| for line in lines: |
| m = self.RE.match(line) |
| if m: |
| id = m.group(2).strip().lower() |
| t = m.group(4).strip() # potential title |
| if not t: |
| self.markdown.references[id] = (m.group(3), t) |
| elif (len(t) >= 2 |
| and (t[0] == t[-1] == "\"" |
| or t[0] == t[-1] == "\'" |
| or (t[0] == "(" and t[-1] == ")") ) ): |
| self.markdown.references[id] = (m.group(3), t[1:-1]) |
| else: |
| new_text.append(line) |
| else: |
| new_text.append(line) |
| |
| return new_text #+ "\n" |