root / trunk / twisted / python / modules.py

Revision 24671, 25.4 kB (checked in by exarkun, 10 months ago)

Merge module-loader-3388

Author: exarkun
Reviewer: washort, therve
Fixes: #3388

Remove the error handling code from twisted.python.modules which
attempted to reset sys.modules to its pre-import value when an
import raised an exception. This logic could lead to duplicate
instances of global state (one particular example being the running
processes state when parts of PyCrypto? were not installed). The
behavior only had value on Python 2.3, where Python itself did not
remove modules which failed to import with an error from the modules
dictionary. twisted.python.modules will now behave more similarly
to import on all versions of Python.

Line 
1 # -*- test-case-name: twisted.test.test_modules -*-
2 # Copyright (c) 2006-2007 Twisted Matrix Laboratories.
3 # See LICENSE for details.
4
5 """
6 This module aims to provide a unified, object-oriented view of Python's
7 runtime hierarchy.
8
9 Python is a very dynamic language with wide variety of introspection utilities.
10 However, these utilities can be hard to use, because there is no consistent
11 API.  The introspection API in python is made up of attributes (__name__,
12 __module__, func_name, etc) on instances, modules, classes and functions which
13 vary between those four types, utility modules such as 'inspect' which provide
14 some functionality, the 'imp' module, the "compiler" module, the semantics of
15 PEP 302 support, and setuptools, among other things.
16
17 At the top, you have "PythonPath", an abstract representation of sys.path which
18 includes methods to locate top-level modules, with or without loading them.
19 The top-level exposed functions in this module for accessing the system path
20 are "walkModules", "iterModules", and "getModule".
21
22 From most to least specific, here are the objects provided::
23
24                   PythonPath  # sys.path
25                       |
26                       v
27                   PathEntry   # one entry on sys.path: an importer
28                       |
29                       v
30                  PythonModule # a module or package that can be loaded
31                       |
32                       v
33                  PythonAttribute # an attribute of a module (function or class)
34                       |
35                       v
36                  PythonAttribute # an attribute of a function or class
37                       |
38                       v
39                      ...
40
41 Here's an example of idiomatic usage: this is what you would do to list all of
42 the modules outside the standard library's python-files directory::
43
44     import os
45     stdlibdir = os.path.dirname(os.__file__)
46
47     from twisted.python.modules import iterModules
48
49     for modinfo in iterModules():
50         if (modinfo.pathEntry.filePath.path != stdlibdir
51             and not modinfo.isPackage()):
52             print 'unpackaged: %s: %s' % (
53                 modinfo.name, modinfo.filePath.path)
54 """
55
56 __metaclass__ = type
57
58 # let's try to keep path imports to a minimum...
59 from os.path import dirname, split as splitpath
60
61 import sys
62 import zipimport
63 import inspect
64 from zope.interface import Interface, implements
65
66 from twisted.python.components import registerAdapter
67 from twisted.python.filepath import FilePath, UnlistableError
68 from twisted.python.zippath import ZipArchive
69 from twisted.python.reflect import namedAny
70
71 _nothing = object()
72
73 PYTHON_EXTENSIONS = ['.py']
74 OPTIMIZED_MODE = __doc__ is None
75 if OPTIMIZED_MODE:
76     PYTHON_EXTENSIONS.append('.pyo')
77 else:
78     PYTHON_EXTENSIONS.append('.pyc')
79
80 def _isPythonIdentifier(string):
81     """
82     cheezy fake test for proper identifier-ness.
83
84     @param string: a str which might or might not be a valid python identifier.
85
86     @return: True or False
87     """
88     return (' ' not in string and
89             '.' not in string and
90             '-' not in string)
91
92
93
94 def _isPackagePath(fpath):
95     # Determine if a FilePath-like object is a Python package.  TODO: deal with
96     # __init__module.(so|dll|pyd)?
97     extless = fpath.splitext()[0]
98     basend = splitpath(extless)[1]
99     return basend == "__init__"
100
101
102
103 class _ModuleIteratorHelper:
104     """
105     This mixin provides common behavior between python module and path entries,
106     since the mechanism for searching sys.path and __path__ attributes is
107     remarkably similar.
108     """
109
110     def iterModules(self):
111         """
112         Loop over the modules present below this entry or package on PYTHONPATH.
113
114         For modules which are not packages, this will yield nothing.
115
116         For packages and path entries, this will only yield modules one level
117         down; i.e. if there is a package a.b.c, iterModules on a will only
118         return a.b.  If you want to descend deeply, use walkModules.
119
120         @return: a generator which yields PythonModule instances that describe
121         modules which can be, or have been, imported.
122         """
123         yielded = {}
124         if not self.filePath.exists():
125             return
126
127         for placeToLook in self._packagePaths():
128             try:
129                 children = placeToLook.children()
130             except UnlistableError:
131                 continue
132
133             children.sort()
134             for potentialTopLevel in children:
135                 ext = potentialTopLevel.splitext()[1]
136                 potentialBasename = potentialTopLevel.basename()[:-len(ext)]
137                 if ext in PYTHON_EXTENSIONS:
138                     # TODO: this should be a little choosier about which path entry
139                     # it selects first, and it should do all the .so checking and
140                     # crud
141                     if not _isPythonIdentifier(potentialBasename):
142                         continue
143                     modname = self._subModuleName(potentialBasename)
144                     if modname.split(".")[-1] == '__init__':
145                         # This marks the directory as a package so it can't be
146                         # a module.
147                         continue
148                     if modname not in yielded:
149                         yielded[modname] = True
150                         pm = PythonModule(modname, potentialTopLevel, self._getEntry())
151                         assert pm != self
152                         yield pm
153                 else:
154                     if (ext or not _isPythonIdentifier(potentialBasename)
155                         or not potentialTopLevel.isdir()):
156                         continue
157                     modname = self._subModuleName(potentialTopLevel.basename())
158                     for ext in PYTHON_EXTENSIONS:
159                         initpy = potentialTopLevel.child("__init__"+ext)
160                         if initpy.exists():
161                             yielded[modname] = True
162                             pm = PythonModule(modname, initpy, self._getEntry())
163                             assert pm != self
164                             yield pm
165                             break
166
167     def walkModules(self, importPackages=False):
168         """
169         Similar to L{iterModules}, this yields self, and then every module in my
170         package or entry, and every submodule in each package or entry.
171
172         In other words, this is deep, and L{iterModules} is shallow.
173         """
174         yield self
175         for package in self.iterModules():
176             for module in package.walkModules(importPackages=importPackages):
177                 yield module
178
179     def _subModuleName(self, mn):
180         """
181         This is a hook to provide packages with the ability to specify their names
182         as a prefix to submodules here.
183         """
184         return mn
185
186     def _packagePaths(self):
187         """
188         Implement in subclasses to specify where to look for modules.
189
190         @return: iterable of FilePath-like objects.
191         """
192         raise NotImplementedError()
193
194     def _getEntry(self):
195         """
196         Implement in subclasses to specify what path entry submodules will come
197         from.
198
199         @return: a PathEntry instance.
200         """
201         raise NotImplementedError()
202
203
204     def __getitem__(self, modname):
205         """
206         Retrieve a module from below this path or package.
207
208         @param modname: a str naming a module to be loaded.  For entries, this
209         is a top-level, undotted package name, and for packages it is the name
210         of the module without the package prefix.  For example, if you have a
211         PythonModule representing the 'twisted' package, you could use::
212
213             twistedPackageObj['python']['modules']
214
215         to retrieve this module.
216
217         @raise: KeyError if the module is not found.
218
219         @return: a PythonModule.
220         """
221         for module in self.iterModules():
222             if module.name == self._subModuleName(modname):
223                 return module
224         raise KeyError(modname)
225
226     def __iter__(self):
227         """
228         Implemented to raise NotImplementedError for clarity, so that attempting to
229         loop over this object won't call __getitem__.
230
231         Note: in the future there might be some sensible default for iteration,
232         like 'walkEverything', so this is deliberately untested and undefined
233         behavior.
234         """
235         raise NotImplementedError()
236
237 class PythonAttribute:
238     """
239     I represent a function, class, or other object that is present.
240
241     @ivar name: the fully-qualified python name of this attribute.
242
243     @ivar onObject: a reference to a PythonModule or other PythonAttribute that
244     is this attribute's logical parent.
245
246     @ivar name: the fully qualified python name of the attribute represented by
247     this class.
248     """
249     def __init__(self, name, onObject, loaded, pythonValue):
250         """
251         Create a PythonAttribute.  This is a private constructor.  Do not construct
252         me directly, use PythonModule.iterAttributes.
253
254         @param name: the FQPN
255         @param onObject: see ivar
256         @param loaded: always True, for now
257         @param pythonValue: the value of the attribute we're pointing to.
258         """
259         self.name = name
260         self.onObject = onObject
261         self._loaded = loaded
262         self.pythonValue = pythonValue
263
264     def __repr__(self):
265         return 'PythonAttribute<%r>'%(self.name,)
266
267     def isLoaded(self):
268         """
269         Return a boolean describing whether the attribute this describes has
270         actually been loaded into memory by importing its module.
271
272         Note: this currently always returns true; there is no Python parser
273         support in this module yet.
274         """
275         return self._loaded
276
277     def load(self, default=_nothing):
278         """
279         Load the value associated with this attribute.
280
281         @return: an arbitrary Python object, or 'default' if there is an error
282         loading it.
283         """
284         return self.pythonValue
285
286     def iterAttributes(self):
287         for name, val in inspect.getmembers(self.load()):
288             yield PythonAttribute(self.name+'.'+name, self, True, val)
289
290 class PythonModule(_ModuleIteratorHelper):
291     """
292     Representation of a module which could be imported from sys.path.
293
294     @ivar name: the fully qualified python name of this module.
295
296     @ivar filePath: a FilePath-like object which points to the location of this
297     module.
298
299     @ivar pathEntry: a L{PathEntry} instance which this module was located
300     from.
301     """
302
303     def __init__(self, name, filePath, pathEntry):
304         """
305         Create a PythonModule.  Do not construct this directly, instead inspect a
306         PythonPath or other PythonModule instances.
307
308         @param name: see ivar
309         @param filePath: see ivar
310         @param pathEntry: see ivar
311         """
312         assert not name.endswith(".__init__")
313         self.name = name
314         self.filePath = filePath
315         self.parentPath = filePath.parent()
316         self.pathEntry = pathEntry
317
318     def _getEntry(self):
319         return self.pathEntry
320
321     def __repr__(self):
322         """
323         Return a string representation including the module name.
324         """
325         return 'PythonModule<%r>' % (self.name,)
326
327     def isLoaded(self):
328         """
329         Determine if the module is loaded into sys.modules.
330
331         @return: a boolean: true if loaded, false if not.
332         """
333         return self.name in self.pathEntry.pythonPath.moduleDict
334
335     def iterAttributes(self):
336         """
337         List all the attributes defined in this module.
338
339         Note: Future work is planned here to make it possible to list python
340         attributes on a module without loading the module by inspecting ASTs or
341         bytecode, but currently any iteration of PythonModule objects insists
342         they must be loaded, and will use inspect.getmodule.
343
344         @raise NotImplementedError: if this module is not loaded.
345
346         @return: a generator yielding PythonAttribute instances describing the
347         attributes of this module.
348         """
349         if not self.isLoaded():
350             raise NotImplementedError(
351                 "You can't load attributes from non-loaded modules yet.")
352         for name, val in inspect.getmembers(self.load()):
353             yield PythonAttribute(self.name+'.'+name, self, True, val)
354
355     def isPackage(self):
356         """
357         Returns true if this module is also a package, and might yield something
358         from iterModules.
359         """
360         return _isPackagePath(self.filePath)
361
362     def load(self, default=_nothing):
363         """
364         Load this module.
365
366         @param default: if specified, the value to return in case of an error.
367
368         @return: a genuine python module.
369
370         @raise: any type of exception.  Importing modules is a risky business;
371         the erorrs of any code run at module scope may be raised from here, as
372         well as ImportError if something bizarre happened to the system path
373         between the discovery of this PythonModule object and the attempt to
374         import it.  If you specify a default, the error will be swallowed
375         entirely, and not logged.
376
377         @rtype: types.ModuleType.
378         """
379         try:
380             return self.pathEntry.pythonPath.moduleLoader(self.name)
381         except:                 # this needs more thought...
382             if default is not _nothing:
383                 return default
384             raise
385
386     def __eq__(self, other):
387         """
388         PythonModules with the same name are equal.
389         """
390         if not isinstance(other, PythonModule):
391             return False
392         return other.name == self.name
393
394     def __ne__(self, other):
395         """
396         PythonModules with different names are not equal.
397         """
398         if not isinstance(other, PythonModule):
399             return True
400         return other.name != self.name
401
402     def walkModules(self, importPackages=False):
403         if importPackages and self.isPackage():
404             self.load()
405         return super(PythonModule, self).walkModules(importPackages=importPackages)
406
407     def _subModuleName(self, mn):
408         """
409         submodules of this module are prefixed with our name.
410         """
411         return self.name + '.' + mn
412
413     def _packagePaths(self):
414         """
415         Yield a sequence of FilePath-like objects which represent path segments.
416         """
417         if not self.isPackage():
418             return
419         if self.isLoaded():
420             load = self.load()
421             if hasattr(load, '__path__'):
422                 for fn in load.__path__:
423                     if fn == self.parentPath.path:
424                         # this should _really_ exist.
425                         assert self.parentPath.exists()
426                         yield self.parentPath
427                     else:
428                         smp = self.pathEntry.pythonPath._smartPath(fn)
429                         if smp.exists():
430                             yield smp
431         else:
432             yield self.parentPath
433
434
435 class PathEntry(_ModuleIteratorHelper):
436     """
437     I am a proxy for a single entry on sys.path.
438
439     @ivar filePath: a FilePath-like object pointing at the filesystem location
440     or archive file where this path entry is stored.
441
442     @ivar pythonPath: a PythonPath instance.
443     """
444     def __init__(self, filePath, pythonPath):
445         """
446         Create a PathEntry.  This is a private constructor.
447         """
448         self.filePath = filePath
449         self.pythonPath = pythonPath
450
451     def _getEntry(self):
452         return self
453
454     def __repr__(self):
455         return 'PathEntry<%r>' % (self.filePath,)
456
457     def _packagePaths(self):
458         yield self.filePath
459
460 class IPathImportMapper(Interface):
461     """
462     This is an internal interface, used to map importers to factories for
463     FilePath-like objects.
464     """
465     def mapPath(self, pathLikeString):
466         """
467         Return a FilePath-like object.
468
469         @param pathLikeString: a path-like string, like one that might be
470         passed to an import hook.
471
472         @return: a L{FilePath}, or something like it (currently only a
473         L{ZipPath}, but more might be added later).
474         """
475
476 class _DefaultMapImpl:
477     """ Wrapper for the default importer, i.e. None.  """
478     implements(IPathImportMapper)
479     def mapPath(self, fsPathString):
480         return FilePath(fsPathString)
481 _theDefaultMapper = _DefaultMapImpl()
482
483 class _ZipMapImpl:
484     """ IPathImportMapper implementation for zipimport.ZipImporter.  """
485     implements(IPathImportMapper)
486     def __init__(self, importer):
487         self.importer = importer
488
489     def mapPath(self, fsPathString):
490         """
491         Map the given FS path to a ZipPath, by looking at the ZipImporter's
492         "archive" attribute and using it as our ZipArchive root, then walking
493         down into the archive from there.
494
495         @return: a L{zippath.ZipPath} or L{zippath.ZipArchive} instance.
496         """
497         za = ZipArchive(self.importer.archive)
498         myPath = FilePath(self.importer.archive)
499         itsPath = FilePath(fsPathString)
500         if myPath == itsPath:
501             return za
502         # This is NOT a general-purpose rule for sys.path or __file__:
503         # zipimport specifically uses regular OS path syntax in its pathnames,
504         # even though zip files specify that slashes are always the separator,
505         # regardless of platform.
506         segs = itsPath.segmentsFrom(myPath)
507         zp = za
508         for seg in segs:
509             zp = zp.child(seg)
510         return zp
511
512 registerAdapter(_ZipMapImpl, zipimport.zipimporter, IPathImportMapper)
513
514 def _defaultSysPathFactory():
515     """
516     Provide the default behavior of PythonPath's sys.path factory, which is to
517     return the current value of sys.path.
518
519     @return: L{sys.path}
520     """
521     return sys.path
522
523
524 class PythonPath:
525     """
526     I represent the very top of the Python object-space, the module list in
527     sys.path and the modules list in sys.modules.
528
529     @ivar _sysPath: a sequence of strings like sys.path.  This attribute is
530     read-only.
531
532     @ivar moduleDict: a dictionary mapping string module names to module
533     objects, like sys.modules.
534
535     @ivar sysPathHooks: a list of PEP-302 path hooks, like sys.path_hooks.
536
537     @ivar moduleLoader: a function that takes a fully-qualified python name and
538     returns a module, like twisted.python.reflect.namedAny.
539     """
540
541     def __init__(self,
542                  sysPath=None,
543                  moduleDict=sys.modules,
544                  sysPathHooks=sys.path_hooks,
545                  importerCache=sys.path_importer_cache,
546                  moduleLoader=namedAny,
547                  sysPathFactory=None):
548         """
549         Create a PythonPath.  You almost certainly want to use
550         modules.theSystemPath, or its aliased methods, rather than creating a
551         new instance yourself, though.
552
553         All parameters are optional, and if unspecified, will use 'system'
554         equivalents that makes this PythonPath like the global L{theSystemPath}
555         instance.
556
557         @param sysPath: a sys.path-like list to use for this PythonPath, to
558         specify where to load modules from.
559
560         @param moduleDict: a sys.modules-like dictionary to use for keeping
561         track of what modules this PythonPath has loaded.
562
563         @param sysPathHooks: sys.path_hooks-like list of PEP-302 path hooks to
564         be used for this PythonPath, to determie which importers should be
565         used.
566
567         @param importerCache: a sys.path_importer_cache-like list of PEP-302
568         importers.  This will be used in conjunction with the given
569         sysPathHooks.
570
571         @param moduleLoader: a module loader function which takes a string and
572         returns a module.  That is to say, it is like L{namedAny} - *not* like
573         L{__import__}.
574
575         @param sysPathFactory: a 0-argument callable which returns the current
576         value of a sys.path-like list of strings.  Specify either this, or
577         sysPath, not both.  This alternative interface is provided because the
578         way the Python import mechanism works, you can re-bind the 'sys.path'
579         name and that is what is used for current imports, so it must be a
580         factory rather than a value to deal with modification by rebinding
581         rather than modification by mutation.  Note: it is not recommended to
582         rebind sys.path.  Although this mechanism can deal with that, it is a
583         subtle point which some tools that it is easy for tools which interact
584         with sys.path to miss.
585         """
586         if sysPath is not None:
587             sysPathFactory = lambda : sysPath
588         elif sysPathFactory is None:
589             sysPathFactory = _defaultSysPathFactory
590         self._sysPathFactory = sysPathFactory
591         self._sysPath = sysPath
592         self.moduleDict = moduleDict
593         self.sysPathHooks = sysPathHooks
594         self.importerCache = importerCache
595         self.moduleLoader = moduleLoader
596
597
598     def _getSysPath(self):
599         """
600         Retrieve the current value of the module search path list.
601         """
602         return self._sysPathFactory()
603
604     sysPath = property(_getSysPath)
605
606     def _findEntryPathString(self, modobj):
607         """
608         Determine where a given Python module object came from by looking at path
609         entries.
610         """
611         topPackageObj = modobj
612         while '.' in topPackageObj.__name__:
613             topPackageObj = self.moduleDict['.'.join(
614                     topPackageObj.__name__.split('.')[:-1])]
615         if _isPackagePath(FilePath(topPackageObj.__file__)):
616             # if package 'foo' is on sys.path at /a/b/foo, package 'foo's
617             # __file__ will be /a/b/foo/__init__.py, and we are looking for
618             # /a/b here, the path-entry; so go up two steps.
619             rval = dirname(dirname(topPackageObj.__file__))
620         else:
621             # the module is completely top-level, not within any packages.  The
622             # path entry it's on is just its dirname.
623             rval = dirname(topPackageObj.__file__)
624         # There are probably some awful tricks that an importer could pull
625         # which would break this, so let's just make sure... it's a loaded
626         # module after all, which means that its path MUST be in
627         # path_importer_cache according to PEP 302 -glyph
628         from pprint import pformat
629         assert rval in self.importerCache, '%r for %r not in import cache %s' % (
630             rval, modobj, pformat(self.importerCache))
631         return rval
632
633     def _smartPath(self, pathName):
634         """
635         Given a path entry from sys.path which may refer to an importer,
636         return the appropriate FilePath-like instance.
637
638         @param pathName: a str describing the path.
639
640         @return: a FilePath-like object.
641         """
642         importr = self.importerCache.get(pathName, _nothing)
643         if importr is _nothing:
644             for hook in self.sysPathHooks:
645                 try:
646                     importr = hook(pathName)
647                 except ImportError, ie:
648                     pass
649             if importr is _nothing: # still
650                 importr = None
651         return IPathImportMapper(importr, _theDefaultMapper).mapPath(pathName)
652
653     def iterEntries(self):
654         """
655         Iterate the entries on my sysPath.
656
657         @return: a generator yielding PathEntry objects
658         """
659         for pathName in self.sysPath:
660             fp = self._smartPath(pathName)
661             yield PathEntry(fp, self)
662
663     def __getitem__(self, modname):
664         """
665         Get a python module by a given fully-qualified name.
666
667         @return: a PythonModule object.
668
669         @raise: KeyError, if the module name is a module name.
670         """
671         # See if the module is already somewhere in Python-land.
672         if modname in self.moduleDict:
673             # we need 2 paths; one of the path entry and one for the module.
674             moduleObject = self.moduleDict[modname]
675             pe = PathEntry(
676                 self._smartPath(
677                     self._findEntryPathString(moduleObject)),
678                 self)
679             mp = self._smartPath(moduleObject.__file__)
680             return PythonModule(modname, mp, pe)
681
682         # Recurse if we're trying to get a submodule.
683         if '.' in modname:
684             pkg = self
685             for name in modname.split('.'):
686                 pkg = pkg[name]
687             return pkg
688
689         # Finally do the slowest possible thing and iterate
690         for module in self.iterModules():
691             if module.name == modname:
692                 return module
693         raise KeyError(modname)
694
695     def __repr__(self):
696         """
697         Display my sysPath and moduleDict in a string representation.
698         """
699         return "PythonPath(%r,%r)" % (self.sysPath, self.moduleDict)
700
701     def iterModules(self):
702         """
703         Yield all top-level modules on my sysPath.
704         """
705         for entry in self.iterEntries():
706             for module in entry.iterModules():
707                 yield module
708
709     def walkModules(self, importPackages=False):
710         """
711         Similar to L{iterModules}, this yields every module on the path, then every
712         submodule in each package or entry.
713         """
714         for package in self.iterModules():
715             for module in package.walkModules(importPackages=False):
716                 yield module
717
718 theSystemPath = PythonPath()
719
720 def walkModules(importPackages=False):
721     """
722     Deeply iterate all modules on the global python path.
723
724     @param importPackages: Import packages as they are seen.
725     """
726     return theSystemPath.walkModules(importPackages=importPackages)
727
728 def iterModules():
729     """
730     Iterate all modules and top-level packages on the global Python path, but
731     do not descend into packages.
732
733     @param importPackages: Import packages as they are seen.
734     """
735     return theSystemPath.iterModules()
736
737 def getModule(moduleName):
738     """
739     Retrieve a module from the system path.
740     """
741     return theSystemPath[moduleName]
Note: See TracBrowser for help on using the browser.