"""Simple code to extract class & function docstrings from a module. | |
This code is used as an example in the library reference manual in the | |
section on using the parser module. Refer to the manual for a thorough | |
discussion of the operation of this code. | |
""" | |
import os | |
import parser | |
import symbol | |
import token | |
import types | |
from types import ListType, TupleType | |
def get_docs(fileName): | |
"""Retrieve information from the parse tree of a source file. | |
fileName | |
Name of the file to read Python source code from. | |
""" | |
source = open(fileName).read() | |
basename = os.path.basename(os.path.splitext(fileName)[0]) | |
ast = parser.suite(source) | |
return ModuleInfo(ast.totuple(), basename) | |
class SuiteInfoBase: | |
_docstring = '' | |
_name = '' | |
def __init__(self, tree = None): | |
self._class_info = {} | |
self._function_info = {} | |
if tree: | |
self._extract_info(tree) | |
def _extract_info(self, tree): | |
# extract docstring | |
if len(tree) == 2: | |
found, vars = match(DOCSTRING_STMT_PATTERN[1], tree[1]) | |
else: | |
found, vars = match(DOCSTRING_STMT_PATTERN, tree[3]) | |
if found: | |
self._docstring = eval(vars['docstring']) | |
# discover inner definitions | |
for node in tree[1:]: | |
found, vars = match(COMPOUND_STMT_PATTERN, node) | |
if found: | |
cstmt = vars['compound'] | |
if cstmt[0] == symbol.funcdef: | |
name = cstmt[2][1] | |
self._function_info[name] = FunctionInfo(cstmt) | |
elif cstmt[0] == symbol.classdef: | |
name = cstmt[2][1] | |
self._class_info[name] = ClassInfo(cstmt) | |
def get_docstring(self): | |
return self._docstring | |
def get_name(self): | |
return self._name | |
def get_class_names(self): | |
return self._class_info.keys() | |
def get_class_info(self, name): | |
return self._class_info[name] | |
def __getitem__(self, name): | |
try: | |
return self._class_info[name] | |
except KeyError: | |
return self._function_info[name] | |
class SuiteFuncInfo: | |
# Mixin class providing access to function names and info. | |
def get_function_names(self): | |
return self._function_info.keys() | |
def get_function_info(self, name): | |
return self._function_info[name] | |
class FunctionInfo(SuiteInfoBase, SuiteFuncInfo): | |
def __init__(self, tree = None): | |
self._name = tree[2][1] | |
SuiteInfoBase.__init__(self, tree and tree[-1] or None) | |
class ClassInfo(SuiteInfoBase): | |
def __init__(self, tree = None): | |
self._name = tree[2][1] | |
SuiteInfoBase.__init__(self, tree and tree[-1] or None) | |
def get_method_names(self): | |
return self._function_info.keys() | |
def get_method_info(self, name): | |
return self._function_info[name] | |
class ModuleInfo(SuiteInfoBase, SuiteFuncInfo): | |
def __init__(self, tree = None, name = "<string>"): | |
self._name = name | |
SuiteInfoBase.__init__(self, tree) | |
if tree: | |
found, vars = match(DOCSTRING_STMT_PATTERN, tree[1]) | |
if found: | |
self._docstring = vars["docstring"] | |
def match(pattern, data, vars=None): | |
"""Match `data' to `pattern', with variable extraction. | |
pattern | |
Pattern to match against, possibly containing variables. | |
data | |
Data to be checked and against which variables are extracted. | |
vars | |
Dictionary of variables which have already been found. If not | |
provided, an empty dictionary is created. | |
The `pattern' value may contain variables of the form ['varname'] which | |
are allowed to match anything. The value that is matched is returned as | |
part of a dictionary which maps 'varname' to the matched value. 'varname' | |
is not required to be a string object, but using strings makes patterns | |
and the code which uses them more readable. | |
This function returns two values: a boolean indicating whether a match | |
was found and a dictionary mapping variable names to their associated | |
values. | |
""" | |
if vars is None: | |
vars = {} | |
if type(pattern) is ListType: # 'variables' are ['varname'] | |
vars[pattern[0]] = data | |
return 1, vars | |
if type(pattern) is not TupleType: | |
return (pattern == data), vars | |
if len(data) != len(pattern): | |
return 0, vars | |
for pattern, data in map(None, pattern, data): | |
same, vars = match(pattern, data, vars) | |
if not same: | |
break | |
return same, vars | |
# This pattern identifies compound statements, allowing them to be readily | |
# differentiated from simple statements. | |
# | |
COMPOUND_STMT_PATTERN = ( | |
symbol.stmt, | |
(symbol.compound_stmt, ['compound']) | |
) | |
# This pattern will match a 'stmt' node which *might* represent a docstring; | |
# docstrings require that the statement which provides the docstring be the | |
# first statement in the class or function, which this pattern does not check. | |
# | |
DOCSTRING_STMT_PATTERN = ( | |
symbol.stmt, | |
(symbol.simple_stmt, | |
(symbol.small_stmt, | |
(symbol.expr_stmt, | |
(symbol.testlist, | |
(symbol.test, | |
(symbol.and_test, | |
(symbol.not_test, | |
(symbol.comparison, | |
(symbol.expr, | |
(symbol.xor_expr, | |
(symbol.and_expr, | |
(symbol.shift_expr, | |
(symbol.arith_expr, | |
(symbol.term, | |
(symbol.factor, | |
(symbol.power, | |
(symbol.atom, | |
(token.STRING, ['docstring']) | |
)))))))))))))))), | |
(token.NEWLINE, '') | |
)) |