Blame - meta/poky/bitbake/lib/bb/codeparser.py - T103

blob: ddd1b97dcb1a41e751427d6b98fe6a93456a4b7b [file] [log] [blame]

rjw	1f88458	2022-01-06 17:20:42 +0800	[diff] [blame^]	1	"""
				2	BitBake code parser
				3
				4	Parses actual code (i.e. python and shell) for functions and in-line
				5	expressions. Used mainly to determine dependencies on other functions
				6	and variables within the BitBake metadata. Also provides a cache for
				7	this information in order to speed up processing.
				8
				9	(Not to be confused with the code that parses the metadata itself,
				10	see lib/bb/parse/ for that).
				11
				12	NOTE: if you change how the parsers gather information you will almost
				13	certainly need to increment CodeParserCache.CACHE_VERSION below so that
				14	any existing codeparser cache gets invalidated. Additionally you'll need
				15	to increment __cache_version__ in cache.py in order to ensure that old
				16	recipe caches don't trigger "Taskhash mismatch" errors.
				17
				18	"""
				19
				20	import ast
				21	import sys
				22	import codegen
				23	import logging
				24	import pickle
				25	import bb.pysh as pysh
				26	import os.path
				27	import bb.utils, bb.data
				28	import hashlib
				29	from itertools import chain
				30	from bb.pysh import pyshyacc, pyshlex, sherrors
				31	from bb.cache import MultiProcessCache
				32
				33	logger = logging.getLogger('BitBake.CodeParser')
				34
				35	def bbhash(s):
				36	return hashlib.md5(s.encode("utf-8")).hexdigest()
				37
				38	def check_indent(codestr):
				39	"""If the code is indented, add a top level piece of code to 'remove' the indentation"""
				40
				41	i = 0
				42	while codestr[i] in ["\n", "\t", " "]:
				43	i = i + 1
				44
				45	if i == 0:
				46	return codestr
				47
				48	if codestr[i-1] == "\t" or codestr[i-1] == " ":
				49	if codestr[0] == "\n":
				50	# Since we're adding a line, we need to remove one line of any empty padding
				51	# to ensure line numbers are correct
				52	codestr = codestr[1:]
				53	return "if 1:\n" + codestr
				54
				55	return codestr
				56
				57
				58	# Basically pickle, in python 2.7.3 at least, does badly with data duplication
				59	# upon pickling and unpickling. Combine this with duplicate objects and things
				60	# are a mess.
				61	#
				62	# When the sets are originally created, python calls intern() on the set keys
				63	# which significantly improves memory usage. Sadly the pickle/unpickle process
				64	# doesn't call intern() on the keys and results in the same strings being duplicated
				65	# in memory. This also means pickle will save the same string multiple times in
				66	# the cache file.
				67	#
				68	# By having shell and python cacheline objects with setstate/getstate, we force
				69	# the object creation through our own routine where we can call intern (via internSet).
				70	#
				71	# We also use hashable frozensets and ensure we use references to these so that
				72	# duplicates can be removed, both in memory and in the resulting pickled data.
				73	#
				74	# By playing these games, the size of the cache file shrinks dramatically
				75	# meaning faster load times and the reloaded cache files also consume much less
				76	# memory. Smaller cache files, faster load times and lower memory usage is good.
				77	#
				78	# A custom getstate/setstate using tuples is actually worth 15% cachesize by
				79	# avoiding duplication of the attribute names!
				80
				81	class SetCache(object):
				82	def __init__(self):
				83	self.setcache = {}
				84
				85	def internSet(self, items):
				86
				87	new = []
				88	for i in items:
				89	new.append(sys.intern(i))
				90	s = frozenset(new)
				91	h = hash(s)
				92	if h in self.setcache:
				93	return self.setcache[h]
				94	self.setcache[h] = s
				95	return s
				96
				97	codecache = SetCache()
				98
				99	class pythonCacheLine(object):
				100	def __init__(self, refs, execs, contains):
				101	self.refs = codecache.internSet(refs)
				102	self.execs = codecache.internSet(execs)
				103	self.contains = {}
				104	for c in contains:
				105	self.contains[c] = codecache.internSet(contains[c])
				106
				107	def __getstate__(self):
				108	return (self.refs, self.execs, self.contains)
				109
				110	def __setstate__(self, state):
				111	(refs, execs, contains) = state
				112	self.__init__(refs, execs, contains)
				113	def __hash__(self):
				114	l = (hash(self.refs), hash(self.execs))
				115	for c in sorted(self.contains.keys()):
				116	l = l + (c, hash(self.contains[c]))
				117	return hash(l)
				118	def __repr__(self):
				119	return " ".join([str(self.refs), str(self.execs), str(self.contains)])
				120
				121
				122	class shellCacheLine(object):
				123	def __init__(self, execs):
				124	self.execs = codecache.internSet(execs)
				125
				126	def __getstate__(self):
				127	return (self.execs)
				128
				129	def __setstate__(self, state):
				130	(execs) = state
				131	self.__init__(execs)
				132	def __hash__(self):
				133	return hash(self.execs)
				134	def __repr__(self):
				135	return str(self.execs)
				136
				137	class CodeParserCache(MultiProcessCache):
				138	cache_file_name = "bb_codeparser.dat"
				139	# NOTE: you must increment this if you change how the parsers gather information,
				140	# so that an existing cache gets invalidated. Additionally you'll need
				141	# to increment __cache_version__ in cache.py in order to ensure that old
				142	# recipe caches don't trigger "Taskhash mismatch" errors.
				143	CACHE_VERSION = 10
				144
				145	def __init__(self):
				146	MultiProcessCache.__init__(self)
				147	self.pythoncache = self.cachedata[0]
				148	self.shellcache = self.cachedata[1]
				149	self.pythoncacheextras = self.cachedata_extras[0]
				150	self.shellcacheextras = self.cachedata_extras[1]
				151
				152	# To avoid duplication in the codeparser cache, keep
				153	# a lookup of hashes of objects we already have
				154	self.pythoncachelines = {}
				155	self.shellcachelines = {}
				156
				157	def newPythonCacheLine(self, refs, execs, contains):
				158	cacheline = pythonCacheLine(refs, execs, contains)
				159	h = hash(cacheline)
				160	if h in self.pythoncachelines:
				161	return self.pythoncachelines[h]
				162	self.pythoncachelines[h] = cacheline
				163	return cacheline
				164
				165	def newShellCacheLine(self, execs):
				166	cacheline = shellCacheLine(execs)
				167	h = hash(cacheline)
				168	if h in self.shellcachelines:
				169	return self.shellcachelines[h]
				170	self.shellcachelines[h] = cacheline
				171	return cacheline
				172
				173	def init_cache(self, d):
				174	# Check if we already have the caches
				175	if self.pythoncache:
				176	return
				177
				178	MultiProcessCache.init_cache(self, d)
				179
				180	# cachedata gets re-assigned in the parent
				181	self.pythoncache = self.cachedata[0]
				182	self.shellcache = self.cachedata[1]
				183
				184	def create_cachedata(self):
				185	data = [{}, {}]
				186	return data
				187
				188	codeparsercache = CodeParserCache()
				189
				190	def parser_cache_init(d):
				191	codeparsercache.init_cache(d)
				192
				193	def parser_cache_save():
				194	codeparsercache.save_extras()
				195
				196	def parser_cache_savemerge():
				197	codeparsercache.save_merge()
				198
				199	Logger = logging.getLoggerClass()
				200	class BufferedLogger(Logger):
				201	def __init__(self, name, level=0, target=None):
				202	Logger.__init__(self, name)
				203	self.setLevel(level)
				204	self.buffer = []
				205	self.target = target
				206
				207	def handle(self, record):
				208	self.buffer.append(record)
				209
				210	def flush(self):
				211	for record in self.buffer:
				212	if self.target.isEnabledFor(record.levelno):
				213	self.target.handle(record)
				214	self.buffer = []
				215
				216	class PythonParser():
				217	getvars = (".getVar", ".appendVar", ".prependVar", "oe.utils.conditional")
				218	getvarflags = (".getVarFlag", ".appendVarFlag", ".prependVarFlag")
				219	containsfuncs = ("bb.utils.contains", "base_contains")
				220	containsanyfuncs = ("bb.utils.contains_any", "bb.utils.filter")
				221	execfuncs = ("bb.build.exec_func", "bb.build.exec_task")
				222
				223	def warn(self, func, arg):
				224	"""Warn about calls of bitbake APIs which pass a non-literal
				225	argument for the variable name, as we're not able to track such
				226	a reference.
				227	"""
				228
				229	try:
				230	funcstr = codegen.to_source(func)
				231	argstr = codegen.to_source(arg)
				232	except TypeError:
				233	self.log.debug(2, 'Failed to convert function and argument to source form')
				234	else:
				235	self.log.debug(1, self.unhandled_message % (funcstr, argstr))
				236
				237	def visit_Call(self, node):
				238	name = self.called_node_name(node.func)
				239	if name and (name.endswith(self.getvars) or name.endswith(self.getvarflags) or name in self.containsfuncs or name in self.containsanyfuncs):
				240	if isinstance(node.args[0], ast.Str):
				241	varname = node.args[0].s
				242	if name in self.containsfuncs and isinstance(node.args[1], ast.Str):
				243	if varname not in self.contains:
				244	self.contains[varname] = set()
				245	self.contains[varname].add(node.args[1].s)
				246	elif name in self.containsanyfuncs and isinstance(node.args[1], ast.Str):
				247	if varname not in self.contains:
				248	self.contains[varname] = set()
				249	self.contains[varname].update(node.args[1].s.split())
				250	elif name.endswith(self.getvarflags):
				251	if isinstance(node.args[1], ast.Str):
				252	self.references.add('%s[%s]' % (varname, node.args[1].s))
				253	else:
				254	self.warn(node.func, node.args[1])
				255	else:
				256	self.references.add(varname)
				257	else:
				258	self.warn(node.func, node.args[0])
				259	elif name and name.endswith(".expand"):
				260	if isinstance(node.args[0], ast.Str):
				261	value = node.args[0].s
				262	d = bb.data.init()
				263	parser = d.expandWithRefs(value, self.name)
				264	self.references \|= parser.references
				265	self.execs \|= parser.execs
				266	for varname in parser.contains:
				267	if varname not in self.contains:
				268	self.contains[varname] = set()
				269	self.contains[varname] \|= parser.contains[varname]
				270	elif name in self.execfuncs:
				271	if isinstance(node.args[0], ast.Str):
				272	self.var_execs.add(node.args[0].s)
				273	else:
				274	self.warn(node.func, node.args[0])
				275	elif name and isinstance(node.func, (ast.Name, ast.Attribute)):
				276	self.execs.add(name)
				277
				278	def called_node_name(self, node):
				279	"""Given a called node, return its original string form"""
				280	components = []
				281	while node:
				282	if isinstance(node, ast.Attribute):
				283	components.append(node.attr)
				284	node = node.value
				285	elif isinstance(node, ast.Name):
				286	components.append(node.id)
				287	return '.'.join(reversed(components))
				288	else:
				289	break
				290
				291	def __init__(self, name, log):
				292	self.name = name
				293	self.var_execs = set()
				294	self.contains = {}
				295	self.execs = set()
				296	self.references = set()
				297	self.log = BufferedLogger('BitBake.Data.PythonParser', logging.DEBUG, log)
				298
				299	self.unhandled_message = "in call of %s, argument '%s' is not a string literal"
				300	self.unhandled_message = "while parsing %s, %s" % (name, self.unhandled_message)
				301
				302	def parse_python(self, node, lineno=0, filename="<string>"):
				303	if not node or not node.strip():
				304	return
				305
				306	h = bbhash(str(node))
				307
				308	if h in codeparsercache.pythoncache:
				309	self.references = set(codeparsercache.pythoncache[h].refs)
				310	self.execs = set(codeparsercache.pythoncache[h].execs)
				311	self.contains = {}
				312	for i in codeparsercache.pythoncache[h].contains:
				313	self.contains[i] = set(codeparsercache.pythoncache[h].contains[i])
				314	return
				315
				316	if h in codeparsercache.pythoncacheextras:
				317	self.references = set(codeparsercache.pythoncacheextras[h].refs)
				318	self.execs = set(codeparsercache.pythoncacheextras[h].execs)
				319	self.contains = {}
				320	for i in codeparsercache.pythoncacheextras[h].contains:
				321	self.contains[i] = set(codeparsercache.pythoncacheextras[h].contains[i])
				322	return
				323
				324	# We can't add to the linenumbers for compile, we can pad to the correct number of blank lines though
				325	node = "\n" * int(lineno) + node
				326	code = compile(check_indent(str(node)), filename, "exec",
				327	ast.PyCF_ONLY_AST)
				328
				329	for n in ast.walk(code):
				330	if n.__class__.__name__ == "Call":
				331	self.visit_Call(n)
				332
				333	self.execs.update(self.var_execs)
				334
				335	codeparsercache.pythoncacheextras[h] = codeparsercache.newPythonCacheLine(self.references, self.execs, self.contains)
				336
				337	class ShellParser():
				338	def __init__(self, name, log):
				339	self.funcdefs = set()
				340	self.allexecs = set()
				341	self.execs = set()
				342	self.log = BufferedLogger('BitBake.Data.%s' % name, logging.DEBUG, log)
				343	self.unhandled_template = "unable to handle non-literal command '%s'"
				344	self.unhandled_template = "while parsing %s, %s" % (name, self.unhandled_template)
				345
				346	def parse_shell(self, value):
				347	"""Parse the supplied shell code in a string, returning the external
				348	commands it executes.
				349	"""
				350
				351	h = bbhash(str(value))
				352
				353	if h in codeparsercache.shellcache:
				354	self.execs = set(codeparsercache.shellcache[h].execs)
				355	return self.execs
				356
				357	if h in codeparsercache.shellcacheextras:
				358	self.execs = set(codeparsercache.shellcacheextras[h].execs)
				359	return self.execs
				360
				361	self._parse_shell(value)
				362	self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs)
				363
				364	codeparsercache.shellcacheextras[h] = codeparsercache.newShellCacheLine(self.execs)
				365
				366	return self.execs
				367
				368	def _parse_shell(self, value):
				369	try:
				370	tokens, _ = pyshyacc.parse(value, eof=True, debug=False)
				371	except pyshlex.NeedMore:
				372	raise sherrors.ShellSyntaxError("Unexpected EOF")
				373
				374	self.process_tokens(tokens)
				375
				376	def process_tokens(self, tokens):
				377	"""Process a supplied portion of the syntax tree as returned by
				378	pyshyacc.parse.
				379	"""
				380
				381	def function_definition(value):
				382	self.funcdefs.add(value.name)
				383	return [value.body], None
				384
				385	def case_clause(value):
				386	# Element 0 of each item in the case is the list of patterns, and
				387	# Element 1 of each item in the case is the list of commands to be
				388	# executed when that pattern matches.
				389	words = chain(*[item[0] for item in value.items])
				390	cmds = chain(*[item[1] for item in value.items])
				391	return cmds, words
				392
				393	def if_clause(value):
				394	main = chain(value.cond, value.if_cmds)
				395	rest = value.else_cmds
				396	if isinstance(rest, tuple) and rest[0] == "elif":
				397	return chain(main, if_clause(rest[1]))
				398	else:
				399	return chain(main, rest)
				400
				401	def simple_command(value):
				402	return None, chain(value.words, (assign[1] for assign in value.assigns))
				403
				404	token_handlers = {
				405	"and_or": lambda x: ((x.left, x.right), None),
				406	"async": lambda x: ([x], None),
				407	"brace_group": lambda x: (x.cmds, None),
				408	"for_clause": lambda x: (x.cmds, x.items),
				409	"function_definition": function_definition,
				410	"if_clause": lambda x: (if_clause(x), None),
				411	"pipeline": lambda x: (x.commands, None),
				412	"redirect_list": lambda x: ([x.cmd], None),
				413	"subshell": lambda x: (x.cmds, None),
				414	"while_clause": lambda x: (chain(x.condition, x.cmds), None),
				415	"until_clause": lambda x: (chain(x.condition, x.cmds), None),
				416	"simple_command": simple_command,
				417	"case_clause": case_clause,
				418	}
				419
				420	def process_token_list(tokens):
				421	for token in tokens:
				422	if isinstance(token, list):
				423	process_token_list(token)
				424	continue
				425	name, value = token
				426	try:
				427	more_tokens, words = token_handlers[name](value)
				428	except KeyError:
				429	raise NotImplementedError("Unsupported token type " + name)
				430
				431	if more_tokens:
				432	self.process_tokens(more_tokens)
				433
				434	if words:
				435	self.process_words(words)
				436
				437	process_token_list(tokens)
				438
				439	def process_words(self, words):
				440	"""Process a set of 'words' in pyshyacc parlance, which includes
				441	extraction of executed commands from $() blocks, as well as grabbing
				442	the command name argument.
				443	"""
				444
				445	words = list(words)
				446	for word in list(words):
				447	wtree = pyshlex.make_wordtree(word[1])
				448	for part in wtree:
				449	if not isinstance(part, list):
				450	continue
				451
				452	if part[0] in ('`', '$('):
				453	command = pyshlex.wordtree_as_string(part[1:-1])
				454	self._parse_shell(command)
				455
				456	if word[0] in ("cmd_name", "cmd_word"):
				457	if word in words:
				458	words.remove(word)
				459
				460	usetoken = False
				461	for word in words:
				462	if word[0] in ("cmd_name", "cmd_word") or \
				463	(usetoken and word[0] == "TOKEN"):
				464	if "=" in word[1]:
				465	usetoken = True
				466	continue
				467
				468	cmd = word[1]
				469	if cmd.startswith("$"):
				470	self.log.debug(1, self.unhandled_template % cmd)
				471	elif cmd == "eval":
				472	command = " ".join(word for _, word in words[1:])
				473	self._parse_shell(command)
				474	else:
				475	self.allexecs.add(cmd)
				476	break