blob: aadcd23edbaaa7c91eb28ffe56b4cd75ab4d418f [file] [log] [blame]
xf.li86118912025-03-19 20:07:27 -07001"""Find modules used by a script, using introspection."""
2
3import dis
4import importlib._bootstrap_external
5import importlib.machinery
6import marshal
7import os
8import io
9import sys
10import types
11import warnings
12
13
14LOAD_CONST = dis.opmap['LOAD_CONST']
15IMPORT_NAME = dis.opmap['IMPORT_NAME']
16STORE_NAME = dis.opmap['STORE_NAME']
17STORE_GLOBAL = dis.opmap['STORE_GLOBAL']
18STORE_OPS = STORE_NAME, STORE_GLOBAL
19EXTENDED_ARG = dis.EXTENDED_ARG
20
21# Old imp constants:
22
23_SEARCH_ERROR = 0
24_PY_SOURCE = 1
25_PY_COMPILED = 2
26_C_EXTENSION = 3
27_PKG_DIRECTORY = 5
28_C_BUILTIN = 6
29_PY_FROZEN = 7
30
31# Modulefinder does a good job at simulating Python's, but it can not
32# handle __path__ modifications packages make at runtime. Therefore there
33# is a mechanism whereby you can register extra paths in this map for a
34# package, and it will be honored.
35
36# Note this is a mapping is lists of paths.
37packagePathMap = {}
38
39# A Public interface
40def AddPackagePath(packagename, path):
41 packagePathMap.setdefault(packagename, []).append(path)
42
43replacePackageMap = {}
44
45# This ReplacePackage mechanism allows modulefinder to work around
46# situations in which a package injects itself under the name
47# of another package into sys.modules at runtime by calling
48# ReplacePackage("real_package_name", "faked_package_name")
49# before running ModuleFinder.
50
51def ReplacePackage(oldname, newname):
52 replacePackageMap[oldname] = newname
53
54
55def _find_module(name, path=None):
56 """An importlib reimplementation of imp.find_module (for our purposes)."""
57
58 # It's necessary to clear the caches for our Finder first, in case any
59 # modules are being added/deleted/modified at runtime. In particular,
60 # test_modulefinder.py changes file tree contents in a cache-breaking way:
61
62 importlib.machinery.PathFinder.invalidate_caches()
63
64 spec = importlib.machinery.PathFinder.find_spec(name, path)
65
66 if spec is None:
67 raise ImportError("No module named {name!r}".format(name=name), name=name)
68
69 # Some special cases:
70
71 if spec.loader is importlib.machinery.BuiltinImporter:
72 return None, None, ("", "", _C_BUILTIN)
73
74 if spec.loader is importlib.machinery.FrozenImporter:
75 return None, None, ("", "", _PY_FROZEN)
76
77 file_path = spec.origin
78
79 if spec.loader.is_package(name):
80 return None, os.path.dirname(file_path), ("", "", _PKG_DIRECTORY)
81
82 if isinstance(spec.loader, importlib.machinery.SourceFileLoader):
83 kind = _PY_SOURCE
84
85 elif isinstance(spec.loader, importlib.machinery.ExtensionFileLoader):
86 kind = _C_EXTENSION
87
88 elif isinstance(spec.loader, importlib.machinery.SourcelessFileLoader):
89 kind = _PY_COMPILED
90
91 else: # Should never happen.
92 return None, None, ("", "", _SEARCH_ERROR)
93
94 file = io.open_code(file_path)
95 suffix = os.path.splitext(file_path)[-1]
96
97 return file, file_path, (suffix, "rb", kind)
98
99
100class Module:
101
102 def __init__(self, name, file=None, path=None):
103 self.__name__ = name
104 self.__file__ = file
105 self.__path__ = path
106 self.__code__ = None
107 # The set of global names that are assigned to in the module.
108 # This includes those names imported through starimports of
109 # Python modules.
110 self.globalnames = {}
111 # The set of starimports this module did that could not be
112 # resolved, ie. a starimport from a non-Python module.
113 self.starimports = {}
114
115 def __repr__(self):
116 s = "Module(%r" % (self.__name__,)
117 if self.__file__ is not None:
118 s = s + ", %r" % (self.__file__,)
119 if self.__path__ is not None:
120 s = s + ", %r" % (self.__path__,)
121 s = s + ")"
122 return s
123
124class ModuleFinder:
125
126 def __init__(self, path=None, debug=0, excludes=None, replace_paths=None):
127 if path is None:
128 path = sys.path
129 self.path = path
130 self.modules = {}
131 self.badmodules = {}
132 self.debug = debug
133 self.indent = 0
134 self.excludes = excludes if excludes is not None else []
135 self.replace_paths = replace_paths if replace_paths is not None else []
136 self.processed_paths = [] # Used in debugging only
137
138 def msg(self, level, str, *args):
139 if level <= self.debug:
140 for i in range(self.indent):
141 print(" ", end=' ')
142 print(str, end=' ')
143 for arg in args:
144 print(repr(arg), end=' ')
145 print()
146
147 def msgin(self, *args):
148 level = args[0]
149 if level <= self.debug:
150 self.indent = self.indent + 1
151 self.msg(*args)
152
153 def msgout(self, *args):
154 level = args[0]
155 if level <= self.debug:
156 self.indent = self.indent - 1
157 self.msg(*args)
158
159 def run_script(self, pathname):
160 self.msg(2, "run_script", pathname)
161 with io.open_code(pathname) as fp:
162 stuff = ("", "rb", _PY_SOURCE)
163 self.load_module('__main__', fp, pathname, stuff)
164
165 def load_file(self, pathname):
166 dir, name = os.path.split(pathname)
167 name, ext = os.path.splitext(name)
168 with io.open_code(pathname) as fp:
169 stuff = (ext, "rb", _PY_SOURCE)
170 self.load_module(name, fp, pathname, stuff)
171
172 def import_hook(self, name, caller=None, fromlist=None, level=-1):
173 self.msg(3, "import_hook", name, caller, fromlist, level)
174 parent = self.determine_parent(caller, level=level)
175 q, tail = self.find_head_package(parent, name)
176 m = self.load_tail(q, tail)
177 if not fromlist:
178 return q
179 if m.__path__:
180 self.ensure_fromlist(m, fromlist)
181 return None
182
183 def determine_parent(self, caller, level=-1):
184 self.msgin(4, "determine_parent", caller, level)
185 if not caller or level == 0:
186 self.msgout(4, "determine_parent -> None")
187 return None
188 pname = caller.__name__
189 if level >= 1: # relative import
190 if caller.__path__:
191 level -= 1
192 if level == 0:
193 parent = self.modules[pname]
194 assert parent is caller
195 self.msgout(4, "determine_parent ->", parent)
196 return parent
197 if pname.count(".") < level:
198 raise ImportError("relative importpath too deep")
199 pname = ".".join(pname.split(".")[:-level])
200 parent = self.modules[pname]
201 self.msgout(4, "determine_parent ->", parent)
202 return parent
203 if caller.__path__:
204 parent = self.modules[pname]
205 assert caller is parent
206 self.msgout(4, "determine_parent ->", parent)
207 return parent
208 if '.' in pname:
209 i = pname.rfind('.')
210 pname = pname[:i]
211 parent = self.modules[pname]
212 assert parent.__name__ == pname
213 self.msgout(4, "determine_parent ->", parent)
214 return parent
215 self.msgout(4, "determine_parent -> None")
216 return None
217
218 def find_head_package(self, parent, name):
219 self.msgin(4, "find_head_package", parent, name)
220 if '.' in name:
221 i = name.find('.')
222 head = name[:i]
223 tail = name[i+1:]
224 else:
225 head = name
226 tail = ""
227 if parent:
228 qname = "%s.%s" % (parent.__name__, head)
229 else:
230 qname = head
231 q = self.import_module(head, qname, parent)
232 if q:
233 self.msgout(4, "find_head_package ->", (q, tail))
234 return q, tail
235 if parent:
236 qname = head
237 parent = None
238 q = self.import_module(head, qname, parent)
239 if q:
240 self.msgout(4, "find_head_package ->", (q, tail))
241 return q, tail
242 self.msgout(4, "raise ImportError: No module named", qname)
243 raise ImportError("No module named " + qname)
244
245 def load_tail(self, q, tail):
246 self.msgin(4, "load_tail", q, tail)
247 m = q
248 while tail:
249 i = tail.find('.')
250 if i < 0: i = len(tail)
251 head, tail = tail[:i], tail[i+1:]
252 mname = "%s.%s" % (m.__name__, head)
253 m = self.import_module(head, mname, m)
254 if not m:
255 self.msgout(4, "raise ImportError: No module named", mname)
256 raise ImportError("No module named " + mname)
257 self.msgout(4, "load_tail ->", m)
258 return m
259
260 def ensure_fromlist(self, m, fromlist, recursive=0):
261 self.msg(4, "ensure_fromlist", m, fromlist, recursive)
262 for sub in fromlist:
263 if sub == "*":
264 if not recursive:
265 all = self.find_all_submodules(m)
266 if all:
267 self.ensure_fromlist(m, all, 1)
268 elif not hasattr(m, sub):
269 subname = "%s.%s" % (m.__name__, sub)
270 submod = self.import_module(sub, subname, m)
271 if not submod:
272 raise ImportError("No module named " + subname)
273
274 def find_all_submodules(self, m):
275 if not m.__path__:
276 return
277 modules = {}
278 # 'suffixes' used to be a list hardcoded to [".py", ".pyc"].
279 # But we must also collect Python extension modules - although
280 # we cannot separate normal dlls from Python extensions.
281 suffixes = []
282 suffixes += importlib.machinery.EXTENSION_SUFFIXES[:]
283 suffixes += importlib.machinery.SOURCE_SUFFIXES[:]
284 suffixes += importlib.machinery.BYTECODE_SUFFIXES[:]
285 for dir in m.__path__:
286 try:
287 names = os.listdir(dir)
288 except OSError:
289 self.msg(2, "can't list directory", dir)
290 continue
291 for name in names:
292 mod = None
293 for suff in suffixes:
294 n = len(suff)
295 if name[-n:] == suff:
296 mod = name[:-n]
297 break
298 if mod and mod != "__init__":
299 modules[mod] = mod
300 return modules.keys()
301
302 def import_module(self, partname, fqname, parent):
303 self.msgin(3, "import_module", partname, fqname, parent)
304 try:
305 m = self.modules[fqname]
306 except KeyError:
307 pass
308 else:
309 self.msgout(3, "import_module ->", m)
310 return m
311 if fqname in self.badmodules:
312 self.msgout(3, "import_module -> None")
313 return None
314 if parent and parent.__path__ is None:
315 self.msgout(3, "import_module -> None")
316 return None
317 try:
318 fp, pathname, stuff = self.find_module(partname,
319 parent and parent.__path__, parent)
320 except ImportError:
321 self.msgout(3, "import_module ->", None)
322 return None
323
324 try:
325 m = self.load_module(fqname, fp, pathname, stuff)
326 finally:
327 if fp:
328 fp.close()
329 if parent:
330 setattr(parent, partname, m)
331 self.msgout(3, "import_module ->", m)
332 return m
333
334 def load_module(self, fqname, fp, pathname, file_info):
335 suffix, mode, type = file_info
336 self.msgin(2, "load_module", fqname, fp and "fp", pathname)
337 if type == _PKG_DIRECTORY:
338 m = self.load_package(fqname, pathname)
339 self.msgout(2, "load_module ->", m)
340 return m
341 if type == _PY_SOURCE:
342 co = compile(fp.read(), pathname, 'exec')
343 elif type == _PY_COMPILED:
344 try:
345 data = fp.read()
346 importlib._bootstrap_external._classify_pyc(data, fqname, {})
347 except ImportError as exc:
348 self.msgout(2, "raise ImportError: " + str(exc), pathname)
349 raise
350 co = marshal.loads(memoryview(data)[16:])
351 else:
352 co = None
353 m = self.add_module(fqname)
354 m.__file__ = pathname
355 if co:
356 if self.replace_paths:
357 co = self.replace_paths_in_code(co)
358 m.__code__ = co
359 self.scan_code(co, m)
360 self.msgout(2, "load_module ->", m)
361 return m
362
363 def _add_badmodule(self, name, caller):
364 if name not in self.badmodules:
365 self.badmodules[name] = {}
366 if caller:
367 self.badmodules[name][caller.__name__] = 1
368 else:
369 self.badmodules[name]["-"] = 1
370
371 def _safe_import_hook(self, name, caller, fromlist, level=-1):
372 # wrapper for self.import_hook() that won't raise ImportError
373 if name in self.badmodules:
374 self._add_badmodule(name, caller)
375 return
376 try:
377 self.import_hook(name, caller, level=level)
378 except ImportError as msg:
379 self.msg(2, "ImportError:", str(msg))
380 self._add_badmodule(name, caller)
381 except SyntaxError as msg:
382 self.msg(2, "SyntaxError:", str(msg))
383 self._add_badmodule(name, caller)
384 else:
385 if fromlist:
386 for sub in fromlist:
387 fullname = name + "." + sub
388 if fullname in self.badmodules:
389 self._add_badmodule(fullname, caller)
390 continue
391 try:
392 self.import_hook(name, caller, [sub], level=level)
393 except ImportError as msg:
394 self.msg(2, "ImportError:", str(msg))
395 self._add_badmodule(fullname, caller)
396
397 def scan_opcodes(self, co):
398 # Scan the code, and yield 'interesting' opcode combinations
399 code = co.co_code
400 names = co.co_names
401 consts = co.co_consts
402 opargs = [(op, arg) for _, op, arg in dis._unpack_opargs(code)
403 if op != EXTENDED_ARG]
404 for i, (op, oparg) in enumerate(opargs):
405 if op in STORE_OPS:
406 yield "store", (names[oparg],)
407 continue
408 if (op == IMPORT_NAME and i >= 2
409 and opargs[i-1][0] == opargs[i-2][0] == LOAD_CONST):
410 level = consts[opargs[i-2][1]]
411 fromlist = consts[opargs[i-1][1]]
412 if level == 0: # absolute import
413 yield "absolute_import", (fromlist, names[oparg])
414 else: # relative import
415 yield "relative_import", (level, fromlist, names[oparg])
416 continue
417
418 def scan_code(self, co, m):
419 code = co.co_code
420 scanner = self.scan_opcodes
421 for what, args in scanner(co):
422 if what == "store":
423 name, = args
424 m.globalnames[name] = 1
425 elif what == "absolute_import":
426 fromlist, name = args
427 have_star = 0
428 if fromlist is not None:
429 if "*" in fromlist:
430 have_star = 1
431 fromlist = [f for f in fromlist if f != "*"]
432 self._safe_import_hook(name, m, fromlist, level=0)
433 if have_star:
434 # We've encountered an "import *". If it is a Python module,
435 # the code has already been parsed and we can suck out the
436 # global names.
437 mm = None
438 if m.__path__:
439 # At this point we don't know whether 'name' is a
440 # submodule of 'm' or a global module. Let's just try
441 # the full name first.
442 mm = self.modules.get(m.__name__ + "." + name)
443 if mm is None:
444 mm = self.modules.get(name)
445 if mm is not None:
446 m.globalnames.update(mm.globalnames)
447 m.starimports.update(mm.starimports)
448 if mm.__code__ is None:
449 m.starimports[name] = 1
450 else:
451 m.starimports[name] = 1
452 elif what == "relative_import":
453 level, fromlist, name = args
454 if name:
455 self._safe_import_hook(name, m, fromlist, level=level)
456 else:
457 parent = self.determine_parent(m, level=level)
458 self._safe_import_hook(parent.__name__, None, fromlist, level=0)
459 else:
460 # We don't expect anything else from the generator.
461 raise RuntimeError(what)
462
463 for c in co.co_consts:
464 if isinstance(c, type(co)):
465 self.scan_code(c, m)
466
467 def load_package(self, fqname, pathname):
468 self.msgin(2, "load_package", fqname, pathname)
469 newname = replacePackageMap.get(fqname)
470 if newname:
471 fqname = newname
472 m = self.add_module(fqname)
473 m.__file__ = pathname
474 m.__path__ = [pathname]
475
476 # As per comment at top of file, simulate runtime __path__ additions.
477 m.__path__ = m.__path__ + packagePathMap.get(fqname, [])
478
479 fp, buf, stuff = self.find_module("__init__", m.__path__)
480 try:
481 self.load_module(fqname, fp, buf, stuff)
482 self.msgout(2, "load_package ->", m)
483 return m
484 finally:
485 if fp:
486 fp.close()
487
488 def add_module(self, fqname):
489 if fqname in self.modules:
490 return self.modules[fqname]
491 self.modules[fqname] = m = Module(fqname)
492 return m
493
494 def find_module(self, name, path, parent=None):
495 if parent is not None:
496 # assert path is not None
497 fullname = parent.__name__+'.'+name
498 else:
499 fullname = name
500 if fullname in self.excludes:
501 self.msgout(3, "find_module -> Excluded", fullname)
502 raise ImportError(name)
503
504 if path is None:
505 if name in sys.builtin_module_names:
506 return (None, None, ("", "", _C_BUILTIN))
507
508 path = self.path
509
510 return _find_module(name, path)
511
512 def report(self):
513 """Print a report to stdout, listing the found modules with their
514 paths, as well as modules that are missing, or seem to be missing.
515 """
516 print()
517 print(" %-25s %s" % ("Name", "File"))
518 print(" %-25s %s" % ("----", "----"))
519 # Print modules found
520 keys = sorted(self.modules.keys())
521 for key in keys:
522 m = self.modules[key]
523 if m.__path__:
524 print("P", end=' ')
525 else:
526 print("m", end=' ')
527 print("%-25s" % key, m.__file__ or "")
528
529 # Print missing modules
530 missing, maybe = self.any_missing_maybe()
531 if missing:
532 print()
533 print("Missing modules:")
534 for name in missing:
535 mods = sorted(self.badmodules[name].keys())
536 print("?", name, "imported from", ', '.join(mods))
537 # Print modules that may be missing, but then again, maybe not...
538 if maybe:
539 print()
540 print("Submodules that appear to be missing, but could also be", end=' ')
541 print("global names in the parent package:")
542 for name in maybe:
543 mods = sorted(self.badmodules[name].keys())
544 print("?", name, "imported from", ', '.join(mods))
545
546 def any_missing(self):
547 """Return a list of modules that appear to be missing. Use
548 any_missing_maybe() if you want to know which modules are
549 certain to be missing, and which *may* be missing.
550 """
551 missing, maybe = self.any_missing_maybe()
552 return missing + maybe
553
554 def any_missing_maybe(self):
555 """Return two lists, one with modules that are certainly missing
556 and one with modules that *may* be missing. The latter names could
557 either be submodules *or* just global names in the package.
558
559 The reason it can't always be determined is that it's impossible to
560 tell which names are imported when "from module import *" is done
561 with an extension module, short of actually importing it.
562 """
563 missing = []
564 maybe = []
565 for name in self.badmodules:
566 if name in self.excludes:
567 continue
568 i = name.rfind(".")
569 if i < 0:
570 missing.append(name)
571 continue
572 subname = name[i+1:]
573 pkgname = name[:i]
574 pkg = self.modules.get(pkgname)
575 if pkg is not None:
576 if pkgname in self.badmodules[name]:
577 # The package tried to import this module itself and
578 # failed. It's definitely missing.
579 missing.append(name)
580 elif subname in pkg.globalnames:
581 # It's a global in the package: definitely not missing.
582 pass
583 elif pkg.starimports:
584 # It could be missing, but the package did an "import *"
585 # from a non-Python module, so we simply can't be sure.
586 maybe.append(name)
587 else:
588 # It's not a global in the package, the package didn't
589 # do funny star imports, it's very likely to be missing.
590 # The symbol could be inserted into the package from the
591 # outside, but since that's not good style we simply list
592 # it missing.
593 missing.append(name)
594 else:
595 missing.append(name)
596 missing.sort()
597 maybe.sort()
598 return missing, maybe
599
600 def replace_paths_in_code(self, co):
601 new_filename = original_filename = os.path.normpath(co.co_filename)
602 for f, r in self.replace_paths:
603 if original_filename.startswith(f):
604 new_filename = r + original_filename[len(f):]
605 break
606
607 if self.debug and original_filename not in self.processed_paths:
608 if new_filename != original_filename:
609 self.msgout(2, "co_filename %r changed to %r" \
610 % (original_filename,new_filename,))
611 else:
612 self.msgout(2, "co_filename %r remains unchanged" \
613 % (original_filename,))
614 self.processed_paths.append(original_filename)
615
616 consts = list(co.co_consts)
617 for i in range(len(consts)):
618 if isinstance(consts[i], type(co)):
619 consts[i] = self.replace_paths_in_code(consts[i])
620
621 return co.replace(co_consts=tuple(consts), co_filename=new_filename)
622
623
624def test():
625 # Parse command line
626 import getopt
627 try:
628 opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:")
629 except getopt.error as msg:
630 print(msg)
631 return
632
633 # Process options
634 debug = 1
635 domods = 0
636 addpath = []
637 exclude = []
638 for o, a in opts:
639 if o == '-d':
640 debug = debug + 1
641 if o == '-m':
642 domods = 1
643 if o == '-p':
644 addpath = addpath + a.split(os.pathsep)
645 if o == '-q':
646 debug = 0
647 if o == '-x':
648 exclude.append(a)
649
650 # Provide default arguments
651 if not args:
652 script = "hello.py"
653 else:
654 script = args[0]
655
656 # Set the path based on sys.path and the script directory
657 path = sys.path[:]
658 path[0] = os.path.dirname(script)
659 path = addpath + path
660 if debug > 1:
661 print("path:")
662 for item in path:
663 print(" ", repr(item))
664
665 # Create the module finder and turn its crank
666 mf = ModuleFinder(path, debug, exclude)
667 for arg in args[1:]:
668 if arg == '-m':
669 domods = 1
670 continue
671 if domods:
672 if arg[-2:] == '.*':
673 mf.import_hook(arg[:-2], None, ["*"])
674 else:
675 mf.import_hook(arg)
676 else:
677 mf.load_file(arg)
678 mf.run_script(script)
679 mf.report()
680 return mf # for -i debugging
681
682
683if __name__ == '__main__':
684 try:
685 mf = test()
686 except KeyboardInterrupt:
687 print("\n[interrupted]")