python/helpers/epydoc/markup/javadoc.py - platform/tools/idea - Git at Google

 #
 # javadoc.py: javadoc docstring parsing
 # Edward Loper
 #
 # Created [07/03/03 12:37 PM]
 # $Id: javadoc.py 1574 2007-03-07 02:55:14Z dvarrazzo $
 #

 """
 Epydoc parser for U{Javadoc<http://java.sun.com/j2se/javadoc/>}
 docstrings.  Javadoc is an HTML-based markup language that was
 developed for documenting Java APIs with inline comments.  It consists
 of raw HTML, augmented by Javadoc tags.  There are two types of
 Javadoc tag:

   - X{Javadoc block tags} correspond to Epydoc fields.  They are
     marked by starting a line with a string of the form \"C{@M{tag}
     [M{arg}]}\", where C{M{tag}} indicates the type of block, and
     C{M{arg}} is an optional argument.  (For fields that take
     arguments, Javadoc assumes that the single word immediately
     following the tag is an argument; multi-word arguments cannot be
     used with javadoc.)

   - X{inline Javadoc tags} are used for inline markup.  In particular,
     epydoc uses them for crossreference links between documentation.
     Inline tags may appear anywhere in the text, and have the form
     \"C{{@M{tag} M{[args...]}}}\", where C{M{tag}} indicates the
     type of inline markup, and C{M{args}} are optional arguments.

 Epydoc supports all Javadoc tags, I{except}:
   - C{{@docRoot}}, which gives the (relative) URL of the generated
     documentation's root.
   - C{{@inheritDoc}}, which copies the documentation of the nearest
     overridden object.  This can be used to combine the documentation
     of the overridden object with the documentation of the
     overridding object.
   - C{@serial}, C{@serialField}, and C{@serialData} which describe the
     serialization (pickling) of an object.
   - C{{@value}}, which copies the value of a constant.

 @warning: Epydoc only supports HTML output for Javadoc docstrings.
 """
 __docformat__ = 'epytext en'

 # Imports
 import re
 from xml.dom.minidom import *
 from epydoc.markup import *

 def parse_docstring(docstring, errors, **options):
     """
     Parse the given docstring, which is formatted using Javadoc; and
     return a C{ParsedDocstring} representation of its contents.
     @param docstring: The docstring to parse
     @type docstring: C{string}
     @param errors: A list where any errors generated during parsing
         will be stored.
     @type errors: C{list} of L{ParseError}
     @param options: Extra options.  Unknown options are ignored.
         Currently, no extra options are defined.
     @rtype: L{ParsedDocstring}
     """
     return ParsedJavadocDocstring(docstring, errors)

 class ParsedJavadocDocstring(ParsedDocstring):
     """
     An encoded version of a Javadoc docstring.  Since Javadoc is a
     fairly simple markup language, we don't do any processing in
     advance; instead, we wait to split fields or resolve
     crossreference links until we need to.

     @group Field Splitting: split_fields, _ARG_FIELDS, _FIELD_RE
     @cvar _ARG_FIELDS: A list of the fields that take arguments.
         Since Javadoc doesn't mark arguments in any special way, we
         must consult this list to decide whether the first word of a
         field is an argument or not.
     @cvar _FIELD_RE: A regular expression used to search for Javadoc
         block tags.

     @group HTML Output: to_html, _LINK_SPLIT_RE, _LINK_RE
     @cvar _LINK_SPLIT_RE: A regular expression used to search for
         Javadoc inline tags.
     @cvar _LINK_RE: A regular expression used to process Javadoc
         inline tags.
     """
     def __init__(self, docstring, errors=None):
         """
         Create a new C{ParsedJavadocDocstring}.

         @param docstring: The docstring that should be used to
             construct this C{ParsedJavadocDocstring}.
         @type docstring: C{string}
         @param errors: A list where any errors generated during
             parsing will be stored.  If no list is given, then
             all errors are ignored.
         @type errors: C{list} of L{ParseError}
         """
         self._docstring = docstring
         if errors is None: errors = []
         self._check_links(errors)

     #////////////////////////////////////////////////////////////
     # Field Splitting
     #////////////////////////////////////////////////////////////

     _ARG_FIELDS = ('group variable var type cvariable cvar ivariable '+
                    'ivar param '+
                    'parameter arg argument raise raises exception '+
                    'except deffield newfield keyword kwarg kwparam').split()
     _FIELD_RE = re.compile(r'(^\s*\@\w+[\s$])', re.MULTILINE)

     # Inherit docs from ParsedDocstring.
     def split_fields(self, errors=None):

         # Split the docstring into an alternating list of field tags
         # and text (odd pieces are field tags).
         pieces = self._FIELD_RE.split(self._docstring)

         # The first piece is the description.
         descr = ParsedJavadocDocstring(pieces[0])

         # The remaining pieces are the block fields (alternating tags
         # and bodies; odd pieces are tags).
         fields = []
         for i in range(1, len(pieces)):
             if i%2 == 1:
                 # Get the field tag.
                 tag = pieces[i].strip()[1:]
             else:
                 # Get the field argument (if appropriate).
                 if tag in self._ARG_FIELDS:
                     subpieces = pieces[i].strip().split(None, 1)+['','']
                     (arg, body) = subpieces[:2]
                 else:
                     (arg, body) = (None, pieces[i])

                 # Special processing for @see fields, since Epydoc
                 # allows unrestricted text in them, but Javadoc just
                 # uses them for xref links:
                 if tag == 'see' and body:
                     if body[0] in '"\'':
                         if body[-1] == body[0]: body = body[1:-1]
                     elif body[0] == '<': pass
                     else: body = '{@link %s}' % body

                 # Construct the field.
                 parsed_body = ParsedJavadocDocstring(body)
                 fields.append(Field(tag, arg, parsed_body))

         if pieces[0].strip():
             return (descr, fields)
         else:
             return (None, fields)

     #////////////////////////////////////////////////////////////
     # HTML Output.
     #////////////////////////////////////////////////////////////

     _LINK_SPLIT_RE = re.compile(r'({@link(?:plain)?\s[^}]+})')
     _LINK_RE = re.compile(r'{@link(?:plain)?\s+' + r'([\w#.]+)' +
                           r'(?:\([^\)]*\))?' + r'(\s+.*)?' + r'}')

     # Inherit docs from ParsedDocstring.
     def to_html(self, docstring_linker, **options):
         # Split the docstring into an alternating list of HTML and
         # links (odd pieces are links).
         pieces = self._LINK_SPLIT_RE.split(self._docstring)

         # This function is used to translate {@link ...}s to HTML.
         translate_xref = docstring_linker.translate_identifier_xref

         # Build up the HTML string from the pieces.  For HTML pieces
         # (even), just add it to html.  For link pieces (odd), use
         # docstring_linker to translate the crossreference link to
         # HTML for us.
         html = ''
         for i in range(len(pieces)):
             if i%2 == 0:
                 html += pieces[i]
             else:
                 # Decompose the link into pieces.
                 m = self._LINK_RE.match(pieces[i])
                 if m is None: continue # Error flagged by _check_links
                 (target, name) = m.groups()

                 # Normalize the target name.
                 if target[0] == '#': target = target[1:]
                 target = target.replace('#', '.')
                 target = re.sub(r'\(.*\)', '', target)

                 # Provide a name, if it wasn't specified.
                 if name is None: name = target
                 else: name = name.strip()

                 # Use docstring_linker to convert the name to html.
                 html += translate_xref(target, name)
         return html

     def _check_links(self, errors):
         """
         Make sure that all @{link}s are valid.  We need a separate
         method for ths because we want to do this at parse time, not
         html output time.  Any errors found are appended to C{errors}.
         """
         pieces = self._LINK_SPLIT_RE.split(self._docstring)
         linenum = 0
         for i in range(len(pieces)):
             if i%2 == 1 and not self._LINK_RE.match(pieces[i]):
                 estr = 'Bad link %r' % pieces[i]
                 errors.append(ParseError(estr, linenum, is_fatal=0))
             linenum += pieces[i].count('\n')

     #////////////////////////////////////////////////////////////
     # Plaintext Output.
     #////////////////////////////////////////////////////////////

     # Inherit docs from ParsedDocstring.  Since we don't define
     # to_latex, this is used when generating latex output.
     def to_plaintext(self, docstring_linker, **options):
         return self._docstring

     _SUMMARY_RE = re.compile(r'(\s*[\w\W]*?\.)(\s|$)')

     # Jeff's hack to get summary working
     def summary(self):
         # Drop tags
         doc = "\n".join([ row for row in self._docstring.split('\n')
                           if not row.lstrip().startswith('@') ])

         m = self._SUMMARY_RE.match(doc)
         if m:
             other = doc[m.end():]
             return (ParsedJavadocDocstring(m.group(1)),
                     other != '' and not other.isspace())

         else:
             parts = doc.strip('\n').split('\n', 1)
             if len(parts) == 1:
                 summary = parts[0]
                 other = False
             else:
                 summary = parts[0] + '...'
                 other = True

             return ParsedJavadocDocstring(summary), other

 #     def concatenate(self, other):
 #         if not isinstance(other, ParsedJavadocDocstring):
 #             raise ValueError, 'Could not concatenate docstrings'
 #         return ParsedJavadocDocstring(self._docstring+other._docstring)
	#
	# javadoc.py: javadoc docstring parsing
	# Edward Loper
	#
	# Created [07/03/03 12:37 PM]
	# $Id: javadoc.py 1574 2007-03-07 02:55:14Z dvarrazzo $
	#

	"""
	Epydoc parser for U{Javadoc<http://java.sun.com/j2se/javadoc/>}
	docstrings. Javadoc is an HTML-based markup language that was
	developed for documenting Java APIs with inline comments. It consists
	of raw HTML, augmented by Javadoc tags. There are two types of
	Javadoc tag:

	- X{Javadoc block tags} correspond to Epydoc fields. They are
	marked by starting a line with a string of the form \"C{@M{tag}
	[M{arg}]}\", where C{M{tag}} indicates the type of block, and
	C{M{arg}} is an optional argument. (For fields that take
	arguments, Javadoc assumes that the single word immediately
	following the tag is an argument; multi-word arguments cannot be
	used with javadoc.)

	- X{inline Javadoc tags} are used for inline markup. In particular,
	epydoc uses them for crossreference links between documentation.
	Inline tags may appear anywhere in the text, and have the form
	\"C{{@M{tag} M{[args...]}}}\", where C{M{tag}} indicates the
	type of inline markup, and C{M{args}} are optional arguments.

	Epydoc supports all Javadoc tags, I{except}:
	- C{{@docRoot}}, which gives the (relative) URL of the generated
	documentation's root.
	- C{{@inheritDoc}}, which copies the documentation of the nearest
	overridden object. This can be used to combine the documentation
	of the overridden object with the documentation of the
	overridding object.
	- C{@serial}, C{@serialField}, and C{@serialData} which describe the
	serialization (pickling) of an object.
	- C{{@value}}, which copies the value of a constant.

	@warning: Epydoc only supports HTML output for Javadoc docstrings.
	"""
	__docformat__ = 'epytext en'

	# Imports
	import re
	from xml.dom.minidom import *
	from epydoc.markup import *

	def parse_docstring(docstring, errors, **options):
	"""
	Parse the given docstring, which is formatted using Javadoc; and
	return a C{ParsedDocstring} representation of its contents.
	@param docstring: The docstring to parse
	@type docstring: C{string}
	@param errors: A list where any errors generated during parsing
	will be stored.
	@type errors: C{list} of L{ParseError}
	@param options: Extra options. Unknown options are ignored.
	Currently, no extra options are defined.
	@rtype: L{ParsedDocstring}
	"""
	return ParsedJavadocDocstring(docstring, errors)

	class ParsedJavadocDocstring(ParsedDocstring):
	"""
	An encoded version of a Javadoc docstring. Since Javadoc is a
	fairly simple markup language, we don't do any processing in
	advance; instead, we wait to split fields or resolve
	crossreference links until we need to.

	@group Field Splitting: split_fields, _ARG_FIELDS, _FIELD_RE
	@cvar _ARG_FIELDS: A list of the fields that take arguments.
	Since Javadoc doesn't mark arguments in any special way, we
	must consult this list to decide whether the first word of a
	field is an argument or not.
	@cvar _FIELD_RE: A regular expression used to search for Javadoc
	block tags.

	@group HTML Output: to_html, _LINK_SPLIT_RE, _LINK_RE
	@cvar _LINK_SPLIT_RE: A regular expression used to search for
	Javadoc inline tags.
	@cvar _LINK_RE: A regular expression used to process Javadoc
	inline tags.
	"""
	def __init__(self, docstring, errors=None):
	"""
	Create a new C{ParsedJavadocDocstring}.

	@param docstring: The docstring that should be used to
	construct this C{ParsedJavadocDocstring}.
	@type docstring: C{string}
	@param errors: A list where any errors generated during
	parsing will be stored. If no list is given, then
	all errors are ignored.
	@type errors: C{list} of L{ParseError}
	"""
	self._docstring = docstring
	if errors is None: errors = []
	self._check_links(errors)

	#////////////////////////////////////////////////////////////
	# Field Splitting
	#////////////////////////////////////////////////////////////

	_ARG_FIELDS = ('group variable var type cvariable cvar ivariable '+
	'ivar param '+
	'parameter arg argument raise raises exception '+
	'except deffield newfield keyword kwarg kwparam').split()
	_FIELD_RE = re.compile(r'(^\s*\@\w+[\s$])', re.MULTILINE)

	# Inherit docs from ParsedDocstring.
	def split_fields(self, errors=None):

	# Split the docstring into an alternating list of field tags
	# and text (odd pieces are field tags).
	pieces = self._FIELD_RE.split(self._docstring)

	# The first piece is the description.
	descr = ParsedJavadocDocstring(pieces[0])

	# The remaining pieces are the block fields (alternating tags
	# and bodies; odd pieces are tags).
	fields = []
	for i in range(1, len(pieces)):
	if i%2 == 1:
	# Get the field tag.
	tag = pieces[i].strip()[1:]
	else:
	# Get the field argument (if appropriate).
	if tag in self._ARG_FIELDS:
	subpieces = pieces[i].strip().split(None, 1)+['','']
	(arg, body) = subpieces[:2]
	else:
	(arg, body) = (None, pieces[i])

	# Special processing for @see fields, since Epydoc
	# allows unrestricted text in them, but Javadoc just
	# uses them for xref links:
	if tag == 'see' and body:
	if body[0] in '"\'':
	if body[-1] == body[0]: body = body[1:-1]
	elif body[0] == '<': pass
	else: body = '{@link %s}' % body

	# Construct the field.
	parsed_body = ParsedJavadocDocstring(body)
	fields.append(Field(tag, arg, parsed_body))

	if pieces[0].strip():
	return (descr, fields)
	else:
	return (None, fields)

	#////////////////////////////////////////////////////////////
	# HTML Output.
	#////////////////////////////////////////////////////////////

	_LINK_SPLIT_RE = re.compile(r'({@link(?:plain)?\s[^}]+})')
	_LINK_RE = re.compile(r'{@link(?:plain)?\s+' + r'([\w#.]+)' +
	r'(?:\([^\)]\))?' + r'(\s+.)?' + r'}')

	# Inherit docs from ParsedDocstring.
	def to_html(self, docstring_linker, **options):
	# Split the docstring into an alternating list of HTML and
	# links (odd pieces are links).
	pieces = self._LINK_SPLIT_RE.split(self._docstring)

	# This function is used to translate {@link ...}s to HTML.
	translate_xref = docstring_linker.translate_identifier_xref

	# Build up the HTML string from the pieces. For HTML pieces
	# (even), just add it to html. For link pieces (odd), use
	# docstring_linker to translate the crossreference link to
	# HTML for us.
	html = ''
	for i in range(len(pieces)):
	if i%2 == 0:
	html += pieces[i]
	else:
	# Decompose the link into pieces.
	m = self._LINK_RE.match(pieces[i])
	if m is None: continue # Error flagged by _check_links
	(target, name) = m.groups()

	# Normalize the target name.
	if target[0] == '#': target = target[1:]
	target = target.replace('#', '.')
	target = re.sub(r'\(.*\)', '', target)

	# Provide a name, if it wasn't specified.
	if name is None: name = target
	else: name = name.strip()

	# Use docstring_linker to convert the name to html.
	html += translate_xref(target, name)
	return html

	def _check_links(self, errors):
	"""
	Make sure that all @{link}s are valid. We need a separate
	method for ths because we want to do this at parse time, not
	html output time. Any errors found are appended to C{errors}.
	"""
	pieces = self._LINK_SPLIT_RE.split(self._docstring)
	linenum = 0
	for i in range(len(pieces)):
	if i%2 == 1 and not self._LINK_RE.match(pieces[i]):
	estr = 'Bad link %r' % pieces[i]
	errors.append(ParseError(estr, linenum, is_fatal=0))
	linenum += pieces[i].count('\n')

	#////////////////////////////////////////////////////////////
	# Plaintext Output.
	#////////////////////////////////////////////////////////////

	# Inherit docs from ParsedDocstring. Since we don't define
	# to_latex, this is used when generating latex output.
	def to_plaintext(self, docstring_linker, **options):
	return self._docstring

	_SUMMARY_RE = re.compile(r'(\s[\w\W]?\.)(\s\|$)')

	# Jeff's hack to get summary working
	def summary(self):
	# Drop tags
	doc = "\n".join([ row for row in self._docstring.split('\n')
	if not row.lstrip().startswith('@') ])

	m = self._SUMMARY_RE.match(doc)
	if m:
	other = doc[m.end():]
	return (ParsedJavadocDocstring(m.group(1)),
	other != '' and not other.isspace())

	else:
	parts = doc.strip('\n').split('\n', 1)
	if len(parts) == 1:
	summary = parts[0]
	other = False
	else:
	summary = parts[0] + '...'
	other = True

	return ParsedJavadocDocstring(summary), other

	# def concatenate(self, other):
	# if not isinstance(other, ParsedJavadocDocstring):
	# raise ValueError, 'Could not concatenate docstrings'
	# return ParsedJavadocDocstring(self._docstring+other._docstring)