blob: 6aa5a4adc15d1c6107fb0fbd6a409bbd9d869d6f [file] [log] [blame]
#
# javadoc.py: javadoc docstring parsing
# Edward Loper
#
# Created [07/03/03 12:37 PM]
# $Id: javadoc.py 1574 2007-03-07 02:55:14Z dvarrazzo $
#
"""
Epydoc parser for U{Javadoc<http://java.sun.com/j2se/javadoc/>}
docstrings. Javadoc is an HTML-based markup language that was
developed for documenting Java APIs with inline comments. It consists
of raw HTML, augmented by Javadoc tags. There are two types of
Javadoc tag:
- X{Javadoc block tags} correspond to Epydoc fields. They are
marked by starting a line with a string of the form \"C{@M{tag}
[M{arg}]}\", where C{M{tag}} indicates the type of block, and
C{M{arg}} is an optional argument. (For fields that take
arguments, Javadoc assumes that the single word immediately
following the tag is an argument; multi-word arguments cannot be
used with javadoc.)
- X{inline Javadoc tags} are used for inline markup. In particular,
epydoc uses them for crossreference links between documentation.
Inline tags may appear anywhere in the text, and have the form
\"C{{@M{tag} M{[args...]}}}\", where C{M{tag}} indicates the
type of inline markup, and C{M{args}} are optional arguments.
Epydoc supports all Javadoc tags, I{except}:
- C{{@docRoot}}, which gives the (relative) URL of the generated
documentation's root.
- C{{@inheritDoc}}, which copies the documentation of the nearest
overridden object. This can be used to combine the documentation
of the overridden object with the documentation of the
overridding object.
- C{@serial}, C{@serialField}, and C{@serialData} which describe the
serialization (pickling) of an object.
- C{{@value}}, which copies the value of a constant.
@warning: Epydoc only supports HTML output for Javadoc docstrings.
"""
__docformat__ = 'epytext en'
# Imports
import re
from xml.dom.minidom import *
from epydoc.markup import *
def parse_docstring(docstring, errors, **options):
"""
Parse the given docstring, which is formatted using Javadoc; and
return a C{ParsedDocstring} representation of its contents.
@param docstring: The docstring to parse
@type docstring: C{string}
@param errors: A list where any errors generated during parsing
will be stored.
@type errors: C{list} of L{ParseError}
@param options: Extra options. Unknown options are ignored.
Currently, no extra options are defined.
@rtype: L{ParsedDocstring}
"""
return ParsedJavadocDocstring(docstring, errors)
class ParsedJavadocDocstring(ParsedDocstring):
"""
An encoded version of a Javadoc docstring. Since Javadoc is a
fairly simple markup language, we don't do any processing in
advance; instead, we wait to split fields or resolve
crossreference links until we need to.
@group Field Splitting: split_fields, _ARG_FIELDS, _FIELD_RE
@cvar _ARG_FIELDS: A list of the fields that take arguments.
Since Javadoc doesn't mark arguments in any special way, we
must consult this list to decide whether the first word of a
field is an argument or not.
@cvar _FIELD_RE: A regular expression used to search for Javadoc
block tags.
@group HTML Output: to_html, _LINK_SPLIT_RE, _LINK_RE
@cvar _LINK_SPLIT_RE: A regular expression used to search for
Javadoc inline tags.
@cvar _LINK_RE: A regular expression used to process Javadoc
inline tags.
"""
def __init__(self, docstring, errors=None):
"""
Create a new C{ParsedJavadocDocstring}.
@param docstring: The docstring that should be used to
construct this C{ParsedJavadocDocstring}.
@type docstring: C{string}
@param errors: A list where any errors generated during
parsing will be stored. If no list is given, then
all errors are ignored.
@type errors: C{list} of L{ParseError}
"""
self._docstring = docstring
if errors is None: errors = []
self._check_links(errors)
#////////////////////////////////////////////////////////////
# Field Splitting
#////////////////////////////////////////////////////////////
_ARG_FIELDS = ('group variable var type cvariable cvar ivariable '+
'ivar param '+
'parameter arg argument raise raises exception '+
'except deffield newfield keyword kwarg kwparam').split()
_FIELD_RE = re.compile(r'(^\s*\@\w+[\s$])', re.MULTILINE)
# Inherit docs from ParsedDocstring.
def split_fields(self, errors=None):
# Split the docstring into an alternating list of field tags
# and text (odd pieces are field tags).
pieces = self._FIELD_RE.split(self._docstring)
# The first piece is the description.
descr = ParsedJavadocDocstring(pieces[0])
# The remaining pieces are the block fields (alternating tags
# and bodies; odd pieces are tags).
fields = []
for i in range(1, len(pieces)):
if i%2 == 1:
# Get the field tag.
tag = pieces[i].strip()[1:]
else:
# Get the field argument (if appropriate).
if tag in self._ARG_FIELDS:
subpieces = pieces[i].strip().split(None, 1)+['','']
(arg, body) = subpieces[:2]
else:
(arg, body) = (None, pieces[i])
# Special processing for @see fields, since Epydoc
# allows unrestricted text in them, but Javadoc just
# uses them for xref links:
if tag == 'see' and body:
if body[0] in '"\'':
if body[-1] == body[0]: body = body[1:-1]
elif body[0] == '<': pass
else: body = '{@link %s}' % body
# Construct the field.
parsed_body = ParsedJavadocDocstring(body)
fields.append(Field(tag, arg, parsed_body))
if pieces[0].strip():
return (descr, fields)
else:
return (None, fields)
#////////////////////////////////////////////////////////////
# HTML Output.
#////////////////////////////////////////////////////////////
_LINK_SPLIT_RE = re.compile(r'({@link(?:plain)?\s[^}]+})')
_LINK_RE = re.compile(r'{@link(?:plain)?\s+' + r'([\w#.]+)' +
r'(?:\([^\)]*\))?' + r'(\s+.*)?' + r'}')
# Inherit docs from ParsedDocstring.
def to_html(self, docstring_linker, **options):
# Split the docstring into an alternating list of HTML and
# links (odd pieces are links).
pieces = self._LINK_SPLIT_RE.split(self._docstring)
# This function is used to translate {@link ...}s to HTML.
translate_xref = docstring_linker.translate_identifier_xref
# Build up the HTML string from the pieces. For HTML pieces
# (even), just add it to html. For link pieces (odd), use
# docstring_linker to translate the crossreference link to
# HTML for us.
html = ''
for i in range(len(pieces)):
if i%2 == 0:
html += pieces[i]
else:
# Decompose the link into pieces.
m = self._LINK_RE.match(pieces[i])
if m is None: continue # Error flagged by _check_links
(target, name) = m.groups()
# Normalize the target name.
if target[0] == '#': target = target[1:]
target = target.replace('#', '.')
target = re.sub(r'\(.*\)', '', target)
# Provide a name, if it wasn't specified.
if name is None: name = target
else: name = name.strip()
# Use docstring_linker to convert the name to html.
html += translate_xref(target, name)
return html
def _check_links(self, errors):
"""
Make sure that all @{link}s are valid. We need a separate
method for ths because we want to do this at parse time, not
html output time. Any errors found are appended to C{errors}.
"""
pieces = self._LINK_SPLIT_RE.split(self._docstring)
linenum = 0
for i in range(len(pieces)):
if i%2 == 1 and not self._LINK_RE.match(pieces[i]):
estr = 'Bad link %r' % pieces[i]
errors.append(ParseError(estr, linenum, is_fatal=0))
linenum += pieces[i].count('\n')
#////////////////////////////////////////////////////////////
# Plaintext Output.
#////////////////////////////////////////////////////////////
# Inherit docs from ParsedDocstring. Since we don't define
# to_latex, this is used when generating latex output.
def to_plaintext(self, docstring_linker, **options):
return self._docstring
_SUMMARY_RE = re.compile(r'(\s*[\w\W]*?\.)(\s|$)')
# Jeff's hack to get summary working
def summary(self):
# Drop tags
doc = "\n".join([ row for row in self._docstring.split('\n')
if not row.lstrip().startswith('@') ])
m = self._SUMMARY_RE.match(doc)
if m:
other = doc[m.end():]
return (ParsedJavadocDocstring(m.group(1)),
other != '' and not other.isspace())
else:
parts = doc.strip('\n').split('\n', 1)
if len(parts) == 1:
summary = parts[0]
other = False
else:
summary = parts[0] + '...'
other = True
return ParsedJavadocDocstring(summary), other
# def concatenate(self, other):
# if not isinstance(other, ParsedJavadocDocstring):
# raise ValueError, 'Could not concatenate docstrings'
# return ParsedJavadocDocstring(self._docstring+other._docstring)