| ## |
| ## deja-vu batman, didn't I write this before? |
| ## This parser is designed to parse an SGML document, and the default action |
| ## is just to pass the data through. Based on TestSGMLParser from sgmllib.py |
| ## Hmm, actually, make it a flag whether to handle unknown elements |
| ## |
| |
| from sgmllib import SGMLParser |
| |
| class PassSGMLParser(SGMLParser): |
| def __init__(self, fp, pass_unknown=0, verbose=0): |
| self.pass_unknown = pass_unknown |
| self.data = "" |
| self.fp = fp |
| SGMLParser.__init__(self, verbose) |
| |
| def handle_data(self, data): |
| self.data = self.data + data |
| |
| def flush(self): |
| data = self.data |
| if data: |
| self.data = "" |
| self.write(data) |
| |
| def write (self, data): |
| return self.fp.write(data) |
| |
| def write_starttag (self, tag, attrs): |
| self.flush() |
| if not attrs: |
| self.write ("<%s>" % tag) |
| else: |
| self.write ("<" + tag) |
| for name, value in attrs: |
| self.write (" " + name + '=' + '"' + value + '"') |
| self.write (">") |
| |
| def write_endtag (self, tag): |
| self.flush() |
| self.write ("</%s>" % tag) |
| |
| def handle_comment(self, data): |
| # don't pass comments |
| pass |
| |
| def unknown_starttag(self, tag, attrs): |
| if self.pass_unknown: |
| self.write_starttag (tag, attrs) |
| |
| def unknown_endtag(self, tag): |
| if self.pass_unknown: |
| self.write_endtag(tag) |
| |
| def handle_entityref(self, ref): |
| self.flush() |
| self.write ("&%s;" % ref) |
| |
| def handle_charref(self, ref): |
| self.flush() |
| self.write ("&#%s;" % ref) |
| |
| def close(self): |
| SGMLParser.close(self) |
| self.flush() |