blob: 17a94bc42832415e46f88c3ffafbd70598342fb7 [file] [log] [blame]
xf.li86118912025-03-19 20:07:27 -07001"""distutils.util
2
3Miscellaneous utility functions -- anything that doesn't fit into
4one of the other *util.py modules.
5"""
6
7import os
8import re
9import importlib.util
10import string
11import sys
12from distutils.errors import DistutilsPlatformError
13from distutils.dep_util import newer
14from distutils.spawn import spawn
15from distutils import log
16from distutils.errors import DistutilsByteCompileError
17
18def get_host_platform():
19 """Return a string that identifies the current platform. This is used mainly to
20 distinguish platform-specific build directories and platform-specific built
21 distributions. Typically includes the OS name and version and the
22 architecture (as supplied by 'os.uname()'), although the exact information
23 included depends on the OS; eg. on Linux, the kernel version isn't
24 particularly important.
25
26 Examples of returned values:
27 linux-i586
28 linux-alpha (?)
29 solaris-2.6-sun4u
30
31 Windows will return one of:
32 win-amd64 (64bit Windows on AMD64 (aka x86_64, Intel64, EM64T, etc)
33 win32 (all others - specifically, sys.platform is returned)
34
35 For other non-POSIX platforms, currently just returns 'sys.platform'.
36
37 """
38 if os.name == 'nt':
39 if 'amd64' in sys.version.lower():
40 return 'win-amd64'
41 if '(arm)' in sys.version.lower():
42 return 'win-arm32'
43 if '(arm64)' in sys.version.lower():
44 return 'win-arm64'
45 return sys.platform
46
47 # Set for cross builds explicitly
48 if "_PYTHON_HOST_PLATFORM" in os.environ:
49 return os.environ["_PYTHON_HOST_PLATFORM"]
50
51 if os.name != "posix" or not hasattr(os, 'uname'):
52 # XXX what about the architecture? NT is Intel or Alpha,
53 # Mac OS is M68k or PPC, etc.
54 return sys.platform
55
56 # Try to distinguish various flavours of Unix
57
58 (osname, host, release, version, machine) = os.uname()
59
60 # Convert the OS name to lowercase, remove '/' characters, and translate
61 # spaces (for "Power Macintosh")
62 osname = osname.lower().replace('/', '')
63 machine = machine.replace(' ', '_')
64 machine = machine.replace('/', '-')
65
66 if osname[:5] == "linux":
67 # At least on Linux/Intel, 'machine' is the processor --
68 # i386, etc.
69 # XXX what about Alpha, SPARC, etc?
70 return "%s-%s" % (osname, machine)
71 elif osname[:5] == "sunos":
72 if release[0] >= "5": # SunOS 5 == Solaris 2
73 osname = "solaris"
74 release = "%d.%s" % (int(release[0]) - 3, release[2:])
75 # We can't use "platform.architecture()[0]" because a
76 # bootstrap problem. We use a dict to get an error
77 # if some suspicious happens.
78 bitness = {2147483647:"32bit", 9223372036854775807:"64bit"}
79 machine += ".%s" % bitness[sys.maxsize]
80 # fall through to standard osname-release-machine representation
81 elif osname[:3] == "aix":
82 return "%s-%s.%s" % (osname, version, release)
83 elif osname[:6] == "cygwin":
84 osname = "cygwin"
85 rel_re = re.compile (r'[\d.]+', re.ASCII)
86 m = rel_re.match(release)
87 if m:
88 release = m.group()
89 elif osname[:6] == "darwin":
90 import _osx_support, distutils.sysconfig
91 osname, release, machine = _osx_support.get_platform_osx(
92 distutils.sysconfig.get_config_vars(),
93 osname, release, machine)
94
95 return "%s-%s-%s" % (osname, release, machine)
96
97def get_platform():
98 if os.name == 'nt':
99 TARGET_TO_PLAT = {
100 'x86' : 'win32',
101 'x64' : 'win-amd64',
102 'arm' : 'win-arm32',
103 }
104 return TARGET_TO_PLAT.get(os.environ.get('VSCMD_ARG_TGT_ARCH')) or get_host_platform()
105 else:
106 return get_host_platform()
107
108def convert_path (pathname):
109 """Return 'pathname' as a name that will work on the native filesystem,
110 i.e. split it on '/' and put it back together again using the current
111 directory separator. Needed because filenames in the setup script are
112 always supplied in Unix style, and have to be converted to the local
113 convention before we can actually use them in the filesystem. Raises
114 ValueError on non-Unix-ish systems if 'pathname' either starts or
115 ends with a slash.
116 """
117 if os.sep == '/':
118 return pathname
119 if not pathname:
120 return pathname
121 if pathname[0] == '/':
122 raise ValueError("path '%s' cannot be absolute" % pathname)
123 if pathname[-1] == '/':
124 raise ValueError("path '%s' cannot end with '/'" % pathname)
125
126 paths = pathname.split('/')
127 while '.' in paths:
128 paths.remove('.')
129 if not paths:
130 return os.curdir
131 return os.path.join(*paths)
132
133# convert_path ()
134
135
136def change_root (new_root, pathname):
137 """Return 'pathname' with 'new_root' prepended. If 'pathname' is
138 relative, this is equivalent to "os.path.join(new_root,pathname)".
139 Otherwise, it requires making 'pathname' relative and then joining the
140 two, which is tricky on DOS/Windows and Mac OS.
141 """
142 if os.name == 'posix':
143 if not os.path.isabs(pathname):
144 return os.path.join(new_root, pathname)
145 else:
146 return os.path.join(new_root, pathname[1:])
147
148 elif os.name == 'nt':
149 (drive, path) = os.path.splitdrive(pathname)
150 if path[0] == '\\':
151 path = path[1:]
152 return os.path.join(new_root, path)
153
154 else:
155 raise DistutilsPlatformError("nothing known about platform '%s'" % os.name)
156
157
158_environ_checked = 0
159def check_environ ():
160 """Ensure that 'os.environ' has all the environment variables we
161 guarantee that users can use in config files, command-line options,
162 etc. Currently this includes:
163 HOME - user's home directory (Unix only)
164 PLAT - description of the current platform, including hardware
165 and OS (see 'get_platform()')
166 """
167 global _environ_checked
168 if _environ_checked:
169 return
170
171 if os.name == 'posix' and 'HOME' not in os.environ:
172 try:
173 import pwd
174 os.environ['HOME'] = pwd.getpwuid(os.getuid())[5]
175 except (ImportError, KeyError):
176 # bpo-10496: if the current user identifier doesn't exist in the
177 # password database, do nothing
178 pass
179
180 if 'PLAT' not in os.environ:
181 os.environ['PLAT'] = get_platform()
182
183 _environ_checked = 1
184
185
186def subst_vars (s, local_vars):
187 """Perform shell/Perl-style variable substitution on 'string'. Every
188 occurrence of '$' followed by a name is considered a variable, and
189 variable is substituted by the value found in the 'local_vars'
190 dictionary, or in 'os.environ' if it's not in 'local_vars'.
191 'os.environ' is first checked/augmented to guarantee that it contains
192 certain values: see 'check_environ()'. Raise ValueError for any
193 variables not found in either 'local_vars' or 'os.environ'.
194 """
195 check_environ()
196 def _subst (match, local_vars=local_vars):
197 var_name = match.group(1)
198 if var_name in local_vars:
199 return str(local_vars[var_name])
200 else:
201 return os.environ[var_name]
202
203 try:
204 return re.sub(r'\$([a-zA-Z_][a-zA-Z_0-9]*)', _subst, s)
205 except KeyError as var:
206 raise ValueError("invalid variable '$%s'" % var)
207
208# subst_vars ()
209
210
211def grok_environment_error (exc, prefix="error: "):
212 # Function kept for backward compatibility.
213 # Used to try clever things with EnvironmentErrors,
214 # but nowadays str(exception) produces good messages.
215 return prefix + str(exc)
216
217
218# Needed by 'split_quoted()'
219_wordchars_re = _squote_re = _dquote_re = None
220def _init_regex():
221 global _wordchars_re, _squote_re, _dquote_re
222 _wordchars_re = re.compile(r'[^\\\'\"%s ]*' % string.whitespace)
223 _squote_re = re.compile(r"'(?:[^'\\]|\\.)*'")
224 _dquote_re = re.compile(r'"(?:[^"\\]|\\.)*"')
225
226def split_quoted (s):
227 """Split a string up according to Unix shell-like rules for quotes and
228 backslashes. In short: words are delimited by spaces, as long as those
229 spaces are not escaped by a backslash, or inside a quoted string.
230 Single and double quotes are equivalent, and the quote characters can
231 be backslash-escaped. The backslash is stripped from any two-character
232 escape sequence, leaving only the escaped character. The quote
233 characters are stripped from any quoted string. Returns a list of
234 words.
235 """
236
237 # This is a nice algorithm for splitting up a single string, since it
238 # doesn't require character-by-character examination. It was a little
239 # bit of a brain-bender to get it working right, though...
240 if _wordchars_re is None: _init_regex()
241
242 s = s.strip()
243 words = []
244 pos = 0
245
246 while s:
247 m = _wordchars_re.match(s, pos)
248 end = m.end()
249 if end == len(s):
250 words.append(s[:end])
251 break
252
253 if s[end] in string.whitespace: # unescaped, unquoted whitespace: now
254 words.append(s[:end]) # we definitely have a word delimiter
255 s = s[end:].lstrip()
256 pos = 0
257
258 elif s[end] == '\\': # preserve whatever is being escaped;
259 # will become part of the current word
260 s = s[:end] + s[end+1:]
261 pos = end+1
262
263 else:
264 if s[end] == "'": # slurp singly-quoted string
265 m = _squote_re.match(s, end)
266 elif s[end] == '"': # slurp doubly-quoted string
267 m = _dquote_re.match(s, end)
268 else:
269 raise RuntimeError("this can't happen (bad char '%c')" % s[end])
270
271 if m is None:
272 raise ValueError("bad string (mismatched %s quotes?)" % s[end])
273
274 (beg, end) = m.span()
275 s = s[:beg] + s[beg+1:end-1] + s[end:]
276 pos = m.end() - 2
277
278 if pos >= len(s):
279 words.append(s)
280 break
281
282 return words
283
284# split_quoted ()
285
286
287def execute (func, args, msg=None, verbose=0, dry_run=0):
288 """Perform some action that affects the outside world (eg. by
289 writing to the filesystem). Such actions are special because they
290 are disabled by the 'dry_run' flag. This method takes care of all
291 that bureaucracy for you; all you have to do is supply the
292 function to call and an argument tuple for it (to embody the
293 "external action" being performed), and an optional message to
294 print.
295 """
296 if msg is None:
297 msg = "%s%r" % (func.__name__, args)
298 if msg[-2:] == ',)': # correct for singleton tuple
299 msg = msg[0:-2] + ')'
300
301 log.info(msg)
302 if not dry_run:
303 func(*args)
304
305
306def strtobool (val):
307 """Convert a string representation of truth to true (1) or false (0).
308
309 True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values
310 are 'n', 'no', 'f', 'false', 'off', and '0'. Raises ValueError if
311 'val' is anything else.
312 """
313 val = val.lower()
314 if val in ('y', 'yes', 't', 'true', 'on', '1'):
315 return 1
316 elif val in ('n', 'no', 'f', 'false', 'off', '0'):
317 return 0
318 else:
319 raise ValueError("invalid truth value %r" % (val,))
320
321
322def byte_compile (py_files,
323 optimize=0, force=0,
324 prefix=None, base_dir=None,
325 verbose=1, dry_run=0,
326 direct=None):
327 """Byte-compile a collection of Python source files to .pyc
328 files in a __pycache__ subdirectory. 'py_files' is a list
329 of files to compile; any files that don't end in ".py" are silently
330 skipped. 'optimize' must be one of the following:
331 0 - don't optimize
332 1 - normal optimization (like "python -O")
333 2 - extra optimization (like "python -OO")
334 If 'force' is true, all files are recompiled regardless of
335 timestamps.
336
337 The source filename encoded in each bytecode file defaults to the
338 filenames listed in 'py_files'; you can modify these with 'prefix' and
339 'basedir'. 'prefix' is a string that will be stripped off of each
340 source filename, and 'base_dir' is a directory name that will be
341 prepended (after 'prefix' is stripped). You can supply either or both
342 (or neither) of 'prefix' and 'base_dir', as you wish.
343
344 If 'dry_run' is true, doesn't actually do anything that would
345 affect the filesystem.
346
347 Byte-compilation is either done directly in this interpreter process
348 with the standard py_compile module, or indirectly by writing a
349 temporary script and executing it. Normally, you should let
350 'byte_compile()' figure out to use direct compilation or not (see
351 the source for details). The 'direct' flag is used by the script
352 generated in indirect mode; unless you know what you're doing, leave
353 it set to None.
354 """
355
356 # Late import to fix a bootstrap issue: _posixsubprocess is built by
357 # setup.py, but setup.py uses distutils.
358 import subprocess
359
360 # nothing is done if sys.dont_write_bytecode is True
361 if sys.dont_write_bytecode:
362 raise DistutilsByteCompileError('byte-compiling is disabled.')
363
364 # First, if the caller didn't force us into direct or indirect mode,
365 # figure out which mode we should be in. We take a conservative
366 # approach: choose direct mode *only* if the current interpreter is
367 # in debug mode and optimize is 0. If we're not in debug mode (-O
368 # or -OO), we don't know which level of optimization this
369 # interpreter is running with, so we can't do direct
370 # byte-compilation and be certain that it's the right thing. Thus,
371 # always compile indirectly if the current interpreter is in either
372 # optimize mode, or if either optimization level was requested by
373 # the caller.
374 if direct is None:
375 direct = (__debug__ and optimize == 0)
376
377 # "Indirect" byte-compilation: write a temporary script and then
378 # run it with the appropriate flags.
379 if not direct:
380 try:
381 from tempfile import mkstemp
382 (script_fd, script_name) = mkstemp(".py")
383 except ImportError:
384 from tempfile import mktemp
385 (script_fd, script_name) = None, mktemp(".py")
386 log.info("writing byte-compilation script '%s'", script_name)
387 if not dry_run:
388 if script_fd is not None:
389 script = os.fdopen(script_fd, "w")
390 else:
391 script = open(script_name, "w")
392
393 with script:
394 script.write("""\
395from distutils.util import byte_compile
396files = [
397""")
398
399 # XXX would be nice to write absolute filenames, just for
400 # safety's sake (script should be more robust in the face of
401 # chdir'ing before running it). But this requires abspath'ing
402 # 'prefix' as well, and that breaks the hack in build_lib's
403 # 'byte_compile()' method that carefully tacks on a trailing
404 # slash (os.sep really) to make sure the prefix here is "just
405 # right". This whole prefix business is rather delicate -- the
406 # problem is that it's really a directory, but I'm treating it
407 # as a dumb string, so trailing slashes and so forth matter.
408
409 #py_files = map(os.path.abspath, py_files)
410 #if prefix:
411 # prefix = os.path.abspath(prefix)
412
413 script.write(",\n".join(map(repr, py_files)) + "]\n")
414 script.write("""
415byte_compile(files, optimize=%r, force=%r,
416 prefix=%r, base_dir=%r,
417 verbose=%r, dry_run=0,
418 direct=1)
419""" % (optimize, force, prefix, base_dir, verbose))
420
421 cmd = [sys.executable]
422 cmd.extend(subprocess._optim_args_from_interpreter_flags())
423 cmd.append(script_name)
424 spawn(cmd, dry_run=dry_run)
425 execute(os.remove, (script_name,), "removing %s" % script_name,
426 dry_run=dry_run)
427
428 # "Direct" byte-compilation: use the py_compile module to compile
429 # right here, right now. Note that the script generated in indirect
430 # mode simply calls 'byte_compile()' in direct mode, a weird sort of
431 # cross-process recursion. Hey, it works!
432 else:
433 from py_compile import compile
434
435 for file in py_files:
436 if file[-3:] != ".py":
437 # This lets us be lazy and not filter filenames in
438 # the "install_lib" command.
439 continue
440
441 # Terminology from the py_compile module:
442 # cfile - byte-compiled file
443 # dfile - purported source filename (same as 'file' by default)
444 if optimize >= 0:
445 opt = '' if optimize == 0 else optimize
446 cfile = importlib.util.cache_from_source(
447 file, optimization=opt)
448 else:
449 cfile = importlib.util.cache_from_source(file)
450 dfile = file
451 if prefix:
452 if file[:len(prefix)] != prefix:
453 raise ValueError("invalid prefix: filename %r doesn't start with %r"
454 % (file, prefix))
455 dfile = dfile[len(prefix):]
456 if base_dir:
457 dfile = os.path.join(base_dir, dfile)
458
459 cfile_base = os.path.basename(cfile)
460 if direct:
461 if force or newer(file, cfile):
462 log.info("byte-compiling %s to %s", file, cfile_base)
463 if not dry_run:
464 compile(file, cfile, dfile)
465 else:
466 log.debug("skipping byte-compilation of %s to %s",
467 file, cfile_base)
468
469# byte_compile ()
470
471def rfc822_escape (header):
472 """Return a version of the string escaped for inclusion in an
473 RFC-822 header, by ensuring there are 8 spaces space after each newline.
474 """
475 lines = header.split('\n')
476 sep = '\n' + 8 * ' '
477 return sep.join(lines)
478
479# 2to3 support
480
481def run_2to3(files, fixer_names=None, options=None, explicit=None):
482 """Invoke 2to3 on a list of Python files.
483 The files should all come from the build area, as the
484 modification is done in-place. To reduce the build time,
485 only files modified since the last invocation of this
486 function should be passed in the files argument."""
487
488 if not files:
489 return
490
491 # Make this class local, to delay import of 2to3
492 from lib2to3.refactor import RefactoringTool, get_fixers_from_package
493 class DistutilsRefactoringTool(RefactoringTool):
494 def log_error(self, msg, *args, **kw):
495 log.error(msg, *args)
496
497 def log_message(self, msg, *args):
498 log.info(msg, *args)
499
500 def log_debug(self, msg, *args):
501 log.debug(msg, *args)
502
503 if fixer_names is None:
504 fixer_names = get_fixers_from_package('lib2to3.fixes')
505 r = DistutilsRefactoringTool(fixer_names, options=options)
506 r.refactor(files, write=True)
507
508def copydir_run_2to3(src, dest, template=None, fixer_names=None,
509 options=None, explicit=None):
510 """Recursively copy a directory, only copying new and changed files,
511 running run_2to3 over all newly copied Python modules afterward.
512
513 If you give a template string, it's parsed like a MANIFEST.in.
514 """
515 from distutils.dir_util import mkpath
516 from distutils.file_util import copy_file
517 from distutils.filelist import FileList
518 filelist = FileList()
519 curdir = os.getcwd()
520 os.chdir(src)
521 try:
522 filelist.findall()
523 finally:
524 os.chdir(curdir)
525 filelist.files[:] = filelist.allfiles
526 if template:
527 for line in template.splitlines():
528 line = line.strip()
529 if not line: continue
530 filelist.process_template_line(line)
531 copied = []
532 for filename in filelist.files:
533 outname = os.path.join(dest, filename)
534 mkpath(os.path.dirname(outname))
535 res = copy_file(os.path.join(src, filename), outname, update=1)
536 if res[1]: copied.append(outname)
537 run_2to3([fn for fn in copied if fn.lower().endswith('.py')],
538 fixer_names=fixer_names, options=options, explicit=explicit)
539 return copied
540
541class Mixin2to3:
542 '''Mixin class for commands that run 2to3.
543 To configure 2to3, setup scripts may either change
544 the class variables, or inherit from individual commands
545 to override how 2to3 is invoked.'''
546
547 # provide list of fixers to run;
548 # defaults to all from lib2to3.fixers
549 fixer_names = None
550
551 # options dictionary
552 options = None
553
554 # list of fixers to invoke even though they are marked as explicit
555 explicit = None
556
557 def run_2to3(self, files):
558 return run_2to3(files, self.fixer_names, self.options, self.explicit)