| # -*- test-case-name: twisted.test.test_modules -*- |
| # Copyright (c) 2006-2010 Twisted Matrix Laboratories. |
| # See LICENSE for details. |
| |
| """ |
| This module aims to provide a unified, object-oriented view of Python's |
| runtime hierarchy. |
| |
| Python is a very dynamic language with wide variety of introspection utilities. |
| However, these utilities can be hard to use, because there is no consistent |
| API. The introspection API in python is made up of attributes (__name__, |
| __module__, func_name, etc) on instances, modules, classes and functions which |
| vary between those four types, utility modules such as 'inspect' which provide |
| some functionality, the 'imp' module, the "compiler" module, the semantics of |
| PEP 302 support, and setuptools, among other things. |
| |
| At the top, you have "PythonPath", an abstract representation of sys.path which |
| includes methods to locate top-level modules, with or without loading them. |
| The top-level exposed functions in this module for accessing the system path |
| are "walkModules", "iterModules", and "getModule". |
| |
| From most to least specific, here are the objects provided:: |
| |
| PythonPath # sys.path |
| | |
| v |
| PathEntry # one entry on sys.path: an importer |
| | |
| v |
| PythonModule # a module or package that can be loaded |
| | |
| v |
| PythonAttribute # an attribute of a module (function or class) |
| | |
| v |
| PythonAttribute # an attribute of a function or class |
| | |
| v |
| ... |
| |
| Here's an example of idiomatic usage: this is what you would do to list all of |
| the modules outside the standard library's python-files directory:: |
| |
| import os |
| stdlibdir = os.path.dirname(os.__file__) |
| |
| from twisted.python.modules import iterModules |
| |
| for modinfo in iterModules(): |
| if (modinfo.pathEntry.filePath.path != stdlibdir |
| and not modinfo.isPackage()): |
| print 'unpackaged: %s: %s' % ( |
| modinfo.name, modinfo.filePath.path) |
| """ |
| |
| __metaclass__ = type |
| |
| # let's try to keep path imports to a minimum... |
| from os.path import dirname, split as splitpath |
| |
| import sys |
| import zipimport |
| import inspect |
| import warnings |
| from zope.interface import Interface, implements |
| |
| from twisted.python.components import registerAdapter |
| from twisted.python.filepath import FilePath, UnlistableError |
| from twisted.python.zippath import ZipArchive |
| from twisted.python.reflect import namedAny |
| |
| _nothing = object() |
| |
| PYTHON_EXTENSIONS = ['.py'] |
| OPTIMIZED_MODE = __doc__ is None |
| if OPTIMIZED_MODE: |
| PYTHON_EXTENSIONS.append('.pyo') |
| else: |
| PYTHON_EXTENSIONS.append('.pyc') |
| |
| def _isPythonIdentifier(string): |
| """ |
| cheezy fake test for proper identifier-ness. |
| |
| @param string: a str which might or might not be a valid python identifier. |
| |
| @return: True or False |
| """ |
| return (' ' not in string and |
| '.' not in string and |
| '-' not in string) |
| |
| |
| |
| def _isPackagePath(fpath): |
| # Determine if a FilePath-like object is a Python package. TODO: deal with |
| # __init__module.(so|dll|pyd)? |
| extless = fpath.splitext()[0] |
| basend = splitpath(extless)[1] |
| return basend == "__init__" |
| |
| |
| |
| class _ModuleIteratorHelper: |
| """ |
| This mixin provides common behavior between python module and path entries, |
| since the mechanism for searching sys.path and __path__ attributes is |
| remarkably similar. |
| """ |
| |
| def iterModules(self): |
| """ |
| Loop over the modules present below this entry or package on PYTHONPATH. |
| |
| For modules which are not packages, this will yield nothing. |
| |
| For packages and path entries, this will only yield modules one level |
| down; i.e. if there is a package a.b.c, iterModules on a will only |
| return a.b. If you want to descend deeply, use walkModules. |
| |
| @return: a generator which yields PythonModule instances that describe |
| modules which can be, or have been, imported. |
| """ |
| yielded = {} |
| if not self.filePath.exists(): |
| return |
| |
| for placeToLook in self._packagePaths(): |
| try: |
| children = placeToLook.children() |
| except UnlistableError: |
| continue |
| |
| children.sort() |
| for potentialTopLevel in children: |
| ext = potentialTopLevel.splitext()[1] |
| potentialBasename = potentialTopLevel.basename()[:-len(ext)] |
| if ext in PYTHON_EXTENSIONS: |
| # TODO: this should be a little choosier about which path entry |
| # it selects first, and it should do all the .so checking and |
| # crud |
| if not _isPythonIdentifier(potentialBasename): |
| continue |
| modname = self._subModuleName(potentialBasename) |
| if modname.split(".")[-1] == '__init__': |
| # This marks the directory as a package so it can't be |
| # a module. |
| continue |
| if modname not in yielded: |
| yielded[modname] = True |
| pm = PythonModule(modname, potentialTopLevel, self._getEntry()) |
| assert pm != self |
| yield pm |
| else: |
| if (ext or not _isPythonIdentifier(potentialBasename) |
| or not potentialTopLevel.isdir()): |
| continue |
| modname = self._subModuleName(potentialTopLevel.basename()) |
| for ext in PYTHON_EXTENSIONS: |
| initpy = potentialTopLevel.child("__init__"+ext) |
| if initpy.exists(): |
| yielded[modname] = True |
| pm = PythonModule(modname, initpy, self._getEntry()) |
| assert pm != self |
| yield pm |
| break |
| |
| def walkModules(self, importPackages=False): |
| """ |
| Similar to L{iterModules}, this yields self, and then every module in my |
| package or entry, and every submodule in each package or entry. |
| |
| In other words, this is deep, and L{iterModules} is shallow. |
| """ |
| yield self |
| for package in self.iterModules(): |
| for module in package.walkModules(importPackages=importPackages): |
| yield module |
| |
| def _subModuleName(self, mn): |
| """ |
| This is a hook to provide packages with the ability to specify their names |
| as a prefix to submodules here. |
| """ |
| return mn |
| |
| def _packagePaths(self): |
| """ |
| Implement in subclasses to specify where to look for modules. |
| |
| @return: iterable of FilePath-like objects. |
| """ |
| raise NotImplementedError() |
| |
| def _getEntry(self): |
| """ |
| Implement in subclasses to specify what path entry submodules will come |
| from. |
| |
| @return: a PathEntry instance. |
| """ |
| raise NotImplementedError() |
| |
| |
| def __getitem__(self, modname): |
| """ |
| Retrieve a module from below this path or package. |
| |
| @param modname: a str naming a module to be loaded. For entries, this |
| is a top-level, undotted package name, and for packages it is the name |
| of the module without the package prefix. For example, if you have a |
| PythonModule representing the 'twisted' package, you could use:: |
| |
| twistedPackageObj['python']['modules'] |
| |
| to retrieve this module. |
| |
| @raise: KeyError if the module is not found. |
| |
| @return: a PythonModule. |
| """ |
| for module in self.iterModules(): |
| if module.name == self._subModuleName(modname): |
| return module |
| raise KeyError(modname) |
| |
| def __iter__(self): |
| """ |
| Implemented to raise NotImplementedError for clarity, so that attempting to |
| loop over this object won't call __getitem__. |
| |
| Note: in the future there might be some sensible default for iteration, |
| like 'walkEverything', so this is deliberately untested and undefined |
| behavior. |
| """ |
| raise NotImplementedError() |
| |
| class PythonAttribute: |
| """ |
| I represent a function, class, or other object that is present. |
| |
| @ivar name: the fully-qualified python name of this attribute. |
| |
| @ivar onObject: a reference to a PythonModule or other PythonAttribute that |
| is this attribute's logical parent. |
| |
| @ivar name: the fully qualified python name of the attribute represented by |
| this class. |
| """ |
| def __init__(self, name, onObject, loaded, pythonValue): |
| """ |
| Create a PythonAttribute. This is a private constructor. Do not construct |
| me directly, use PythonModule.iterAttributes. |
| |
| @param name: the FQPN |
| @param onObject: see ivar |
| @param loaded: always True, for now |
| @param pythonValue: the value of the attribute we're pointing to. |
| """ |
| self.name = name |
| self.onObject = onObject |
| self._loaded = loaded |
| self.pythonValue = pythonValue |
| |
| def __repr__(self): |
| return 'PythonAttribute<%r>'%(self.name,) |
| |
| def isLoaded(self): |
| """ |
| Return a boolean describing whether the attribute this describes has |
| actually been loaded into memory by importing its module. |
| |
| Note: this currently always returns true; there is no Python parser |
| support in this module yet. |
| """ |
| return self._loaded |
| |
| def load(self, default=_nothing): |
| """ |
| Load the value associated with this attribute. |
| |
| @return: an arbitrary Python object, or 'default' if there is an error |
| loading it. |
| """ |
| return self.pythonValue |
| |
| def iterAttributes(self): |
| for name, val in inspect.getmembers(self.load()): |
| yield PythonAttribute(self.name+'.'+name, self, True, val) |
| |
| class PythonModule(_ModuleIteratorHelper): |
| """ |
| Representation of a module which could be imported from sys.path. |
| |
| @ivar name: the fully qualified python name of this module. |
| |
| @ivar filePath: a FilePath-like object which points to the location of this |
| module. |
| |
| @ivar pathEntry: a L{PathEntry} instance which this module was located |
| from. |
| """ |
| |
| def __init__(self, name, filePath, pathEntry): |
| """ |
| Create a PythonModule. Do not construct this directly, instead inspect a |
| PythonPath or other PythonModule instances. |
| |
| @param name: see ivar |
| @param filePath: see ivar |
| @param pathEntry: see ivar |
| """ |
| assert not name.endswith(".__init__") |
| self.name = name |
| self.filePath = filePath |
| self.parentPath = filePath.parent() |
| self.pathEntry = pathEntry |
| |
| def _getEntry(self): |
| return self.pathEntry |
| |
| def __repr__(self): |
| """ |
| Return a string representation including the module name. |
| """ |
| return 'PythonModule<%r>' % (self.name,) |
| |
| |
| def isLoaded(self): |
| """ |
| Determine if the module is loaded into sys.modules. |
| |
| @return: a boolean: true if loaded, false if not. |
| """ |
| return self.pathEntry.pythonPath.moduleDict.get(self.name) is not None |
| |
| |
| def iterAttributes(self): |
| """ |
| List all the attributes defined in this module. |
| |
| Note: Future work is planned here to make it possible to list python |
| attributes on a module without loading the module by inspecting ASTs or |
| bytecode, but currently any iteration of PythonModule objects insists |
| they must be loaded, and will use inspect.getmodule. |
| |
| @raise NotImplementedError: if this module is not loaded. |
| |
| @return: a generator yielding PythonAttribute instances describing the |
| attributes of this module. |
| """ |
| if not self.isLoaded(): |
| raise NotImplementedError( |
| "You can't load attributes from non-loaded modules yet.") |
| for name, val in inspect.getmembers(self.load()): |
| yield PythonAttribute(self.name+'.'+name, self, True, val) |
| |
| def isPackage(self): |
| """ |
| Returns true if this module is also a package, and might yield something |
| from iterModules. |
| """ |
| return _isPackagePath(self.filePath) |
| |
| def load(self, default=_nothing): |
| """ |
| Load this module. |
| |
| @param default: if specified, the value to return in case of an error. |
| |
| @return: a genuine python module. |
| |
| @raise: any type of exception. Importing modules is a risky business; |
| the erorrs of any code run at module scope may be raised from here, as |
| well as ImportError if something bizarre happened to the system path |
| between the discovery of this PythonModule object and the attempt to |
| import it. If you specify a default, the error will be swallowed |
| entirely, and not logged. |
| |
| @rtype: types.ModuleType. |
| """ |
| try: |
| return self.pathEntry.pythonPath.moduleLoader(self.name) |
| except: # this needs more thought... |
| if default is not _nothing: |
| return default |
| raise |
| |
| def __eq__(self, other): |
| """ |
| PythonModules with the same name are equal. |
| """ |
| if not isinstance(other, PythonModule): |
| return False |
| return other.name == self.name |
| |
| def __ne__(self, other): |
| """ |
| PythonModules with different names are not equal. |
| """ |
| if not isinstance(other, PythonModule): |
| return True |
| return other.name != self.name |
| |
| def walkModules(self, importPackages=False): |
| if importPackages and self.isPackage(): |
| self.load() |
| return super(PythonModule, self).walkModules(importPackages=importPackages) |
| |
| def _subModuleName(self, mn): |
| """ |
| submodules of this module are prefixed with our name. |
| """ |
| return self.name + '.' + mn |
| |
| def _packagePaths(self): |
| """ |
| Yield a sequence of FilePath-like objects which represent path segments. |
| """ |
| if not self.isPackage(): |
| return |
| if self.isLoaded(): |
| load = self.load() |
| if hasattr(load, '__path__'): |
| for fn in load.__path__: |
| if fn == self.parentPath.path: |
| # this should _really_ exist. |
| assert self.parentPath.exists() |
| yield self.parentPath |
| else: |
| smp = self.pathEntry.pythonPath._smartPath(fn) |
| if smp.exists(): |
| yield smp |
| else: |
| yield self.parentPath |
| |
| |
| class PathEntry(_ModuleIteratorHelper): |
| """ |
| I am a proxy for a single entry on sys.path. |
| |
| @ivar filePath: a FilePath-like object pointing at the filesystem location |
| or archive file where this path entry is stored. |
| |
| @ivar pythonPath: a PythonPath instance. |
| """ |
| def __init__(self, filePath, pythonPath): |
| """ |
| Create a PathEntry. This is a private constructor. |
| """ |
| self.filePath = filePath |
| self.pythonPath = pythonPath |
| |
| def _getEntry(self): |
| return self |
| |
| def __repr__(self): |
| return 'PathEntry<%r>' % (self.filePath,) |
| |
| def _packagePaths(self): |
| yield self.filePath |
| |
| class IPathImportMapper(Interface): |
| """ |
| This is an internal interface, used to map importers to factories for |
| FilePath-like objects. |
| """ |
| def mapPath(self, pathLikeString): |
| """ |
| Return a FilePath-like object. |
| |
| @param pathLikeString: a path-like string, like one that might be |
| passed to an import hook. |
| |
| @return: a L{FilePath}, or something like it (currently only a |
| L{ZipPath}, but more might be added later). |
| """ |
| |
| class _DefaultMapImpl: |
| """ Wrapper for the default importer, i.e. None. """ |
| implements(IPathImportMapper) |
| def mapPath(self, fsPathString): |
| return FilePath(fsPathString) |
| _theDefaultMapper = _DefaultMapImpl() |
| |
| class _ZipMapImpl: |
| """ IPathImportMapper implementation for zipimport.ZipImporter. """ |
| implements(IPathImportMapper) |
| def __init__(self, importer): |
| self.importer = importer |
| |
| def mapPath(self, fsPathString): |
| """ |
| Map the given FS path to a ZipPath, by looking at the ZipImporter's |
| "archive" attribute and using it as our ZipArchive root, then walking |
| down into the archive from there. |
| |
| @return: a L{zippath.ZipPath} or L{zippath.ZipArchive} instance. |
| """ |
| za = ZipArchive(self.importer.archive) |
| myPath = FilePath(self.importer.archive) |
| itsPath = FilePath(fsPathString) |
| if myPath == itsPath: |
| return za |
| # This is NOT a general-purpose rule for sys.path or __file__: |
| # zipimport specifically uses regular OS path syntax in its pathnames, |
| # even though zip files specify that slashes are always the separator, |
| # regardless of platform. |
| segs = itsPath.segmentsFrom(myPath) |
| zp = za |
| for seg in segs: |
| zp = zp.child(seg) |
| return zp |
| |
| registerAdapter(_ZipMapImpl, zipimport.zipimporter, IPathImportMapper) |
| |
| def _defaultSysPathFactory(): |
| """ |
| Provide the default behavior of PythonPath's sys.path factory, which is to |
| return the current value of sys.path. |
| |
| @return: L{sys.path} |
| """ |
| return sys.path |
| |
| |
| class PythonPath: |
| """ |
| I represent the very top of the Python object-space, the module list in |
| sys.path and the modules list in sys.modules. |
| |
| @ivar _sysPath: a sequence of strings like sys.path. This attribute is |
| read-only. |
| |
| @ivar moduleDict: a dictionary mapping string module names to module |
| objects, like sys.modules. |
| |
| @ivar sysPathHooks: a list of PEP-302 path hooks, like sys.path_hooks. |
| |
| @ivar moduleLoader: a function that takes a fully-qualified python name and |
| returns a module, like twisted.python.reflect.namedAny. |
| """ |
| |
| def __init__(self, |
| sysPath=None, |
| moduleDict=sys.modules, |
| sysPathHooks=sys.path_hooks, |
| importerCache=sys.path_importer_cache, |
| moduleLoader=namedAny, |
| sysPathFactory=None): |
| """ |
| Create a PythonPath. You almost certainly want to use |
| modules.theSystemPath, or its aliased methods, rather than creating a |
| new instance yourself, though. |
| |
| All parameters are optional, and if unspecified, will use 'system' |
| equivalents that makes this PythonPath like the global L{theSystemPath} |
| instance. |
| |
| @param sysPath: a sys.path-like list to use for this PythonPath, to |
| specify where to load modules from. |
| |
| @param moduleDict: a sys.modules-like dictionary to use for keeping |
| track of what modules this PythonPath has loaded. |
| |
| @param sysPathHooks: sys.path_hooks-like list of PEP-302 path hooks to |
| be used for this PythonPath, to determie which importers should be |
| used. |
| |
| @param importerCache: a sys.path_importer_cache-like list of PEP-302 |
| importers. This will be used in conjunction with the given |
| sysPathHooks. |
| |
| @param moduleLoader: a module loader function which takes a string and |
| returns a module. That is to say, it is like L{namedAny} - *not* like |
| L{__import__}. |
| |
| @param sysPathFactory: a 0-argument callable which returns the current |
| value of a sys.path-like list of strings. Specify either this, or |
| sysPath, not both. This alternative interface is provided because the |
| way the Python import mechanism works, you can re-bind the 'sys.path' |
| name and that is what is used for current imports, so it must be a |
| factory rather than a value to deal with modification by rebinding |
| rather than modification by mutation. Note: it is not recommended to |
| rebind sys.path. Although this mechanism can deal with that, it is a |
| subtle point which some tools that it is easy for tools which interact |
| with sys.path to miss. |
| """ |
| if sysPath is not None: |
| sysPathFactory = lambda : sysPath |
| elif sysPathFactory is None: |
| sysPathFactory = _defaultSysPathFactory |
| self._sysPathFactory = sysPathFactory |
| self._sysPath = sysPath |
| self.moduleDict = moduleDict |
| self.sysPathHooks = sysPathHooks |
| self.importerCache = importerCache |
| self.moduleLoader = moduleLoader |
| |
| |
| def _getSysPath(self): |
| """ |
| Retrieve the current value of the module search path list. |
| """ |
| return self._sysPathFactory() |
| |
| sysPath = property(_getSysPath) |
| |
| def _findEntryPathString(self, modobj): |
| """ |
| Determine where a given Python module object came from by looking at path |
| entries. |
| """ |
| topPackageObj = modobj |
| while '.' in topPackageObj.__name__: |
| topPackageObj = self.moduleDict['.'.join( |
| topPackageObj.__name__.split('.')[:-1])] |
| if _isPackagePath(FilePath(topPackageObj.__file__)): |
| # if package 'foo' is on sys.path at /a/b/foo, package 'foo's |
| # __file__ will be /a/b/foo/__init__.py, and we are looking for |
| # /a/b here, the path-entry; so go up two steps. |
| rval = dirname(dirname(topPackageObj.__file__)) |
| else: |
| # the module is completely top-level, not within any packages. The |
| # path entry it's on is just its dirname. |
| rval = dirname(topPackageObj.__file__) |
| |
| # There are probably some awful tricks that an importer could pull |
| # which would break this, so let's just make sure... it's a loaded |
| # module after all, which means that its path MUST be in |
| # path_importer_cache according to PEP 302 -glyph |
| if rval not in self.importerCache: |
| warnings.warn( |
| "%s (for module %s) not in path importer cache " |
| "(PEP 302 violation - check your local configuration)." % ( |
| rval, modobj.__name__), |
| stacklevel=3) |
| |
| return rval |
| |
| def _smartPath(self, pathName): |
| """ |
| Given a path entry from sys.path which may refer to an importer, |
| return the appropriate FilePath-like instance. |
| |
| @param pathName: a str describing the path. |
| |
| @return: a FilePath-like object. |
| """ |
| importr = self.importerCache.get(pathName, _nothing) |
| if importr is _nothing: |
| for hook in self.sysPathHooks: |
| try: |
| importr = hook(pathName) |
| except ImportError: |
| pass |
| if importr is _nothing: # still |
| importr = None |
| return IPathImportMapper(importr, _theDefaultMapper).mapPath(pathName) |
| |
| def iterEntries(self): |
| """ |
| Iterate the entries on my sysPath. |
| |
| @return: a generator yielding PathEntry objects |
| """ |
| for pathName in self.sysPath: |
| fp = self._smartPath(pathName) |
| yield PathEntry(fp, self) |
| |
| |
| def __getitem__(self, modname): |
| """ |
| Get a python module by its given fully-qualified name. |
| |
| @param modname: The fully-qualified Python module name to load. |
| |
| @type modname: C{str} |
| |
| @return: an object representing the module identified by C{modname} |
| |
| @rtype: L{PythonModule} |
| |
| @raise KeyError: if the module name is not a valid module name, or no |
| such module can be identified as loadable. |
| """ |
| # See if the module is already somewhere in Python-land. |
| moduleObject = self.moduleDict.get(modname) |
| if moduleObject is not None: |
| # we need 2 paths; one of the path entry and one for the module. |
| pe = PathEntry( |
| self._smartPath( |
| self._findEntryPathString(moduleObject)), |
| self) |
| mp = self._smartPath(moduleObject.__file__) |
| return PythonModule(modname, mp, pe) |
| |
| # Recurse if we're trying to get a submodule. |
| if '.' in modname: |
| pkg = self |
| for name in modname.split('.'): |
| pkg = pkg[name] |
| return pkg |
| |
| # Finally do the slowest possible thing and iterate |
| for module in self.iterModules(): |
| if module.name == modname: |
| return module |
| raise KeyError(modname) |
| |
| |
| def __repr__(self): |
| """ |
| Display my sysPath and moduleDict in a string representation. |
| """ |
| return "PythonPath(%r,%r)" % (self.sysPath, self.moduleDict) |
| |
| def iterModules(self): |
| """ |
| Yield all top-level modules on my sysPath. |
| """ |
| for entry in self.iterEntries(): |
| for module in entry.iterModules(): |
| yield module |
| |
| def walkModules(self, importPackages=False): |
| """ |
| Similar to L{iterModules}, this yields every module on the path, then every |
| submodule in each package or entry. |
| """ |
| for package in self.iterModules(): |
| for module in package.walkModules(importPackages=False): |
| yield module |
| |
| theSystemPath = PythonPath() |
| |
| def walkModules(importPackages=False): |
| """ |
| Deeply iterate all modules on the global python path. |
| |
| @param importPackages: Import packages as they are seen. |
| """ |
| return theSystemPath.walkModules(importPackages=importPackages) |
| |
| def iterModules(): |
| """ |
| Iterate all modules and top-level packages on the global Python path, but |
| do not descend into packages. |
| |
| @param importPackages: Import packages as they are seen. |
| """ |
| return theSystemPath.iterModules() |
| |
| def getModule(moduleName): |
| """ |
| Retrieve a module from the system path. |
| """ |
| return theSystemPath[moduleName] |