M7350/oe-core/bitbake/lib/bb/codeparser.py
2024-09-09 08:52:07 +00:00

376 lines
12 KiB
Python

import ast
import codegen
import logging
import os.path
import bb.utils, bb.data
from itertools import chain
from pysh import pyshyacc, pyshlex, sherrors
logger = logging.getLogger('BitBake.CodeParser')
PARSERCACHE_VERSION = 2
try:
import cPickle as pickle
except ImportError:
import pickle
logger.info('Importing cPickle failed. Falling back to a very slow implementation.')
def check_indent(codestr):
"""If the code is indented, add a top level piece of code to 'remove' the indentation"""
i = 0
while codestr[i] in ["\n", "\t", " "]:
i = i + 1
if i == 0:
return codestr
if codestr[i-1] == "\t" or codestr[i-1] == " ":
return "if 1:\n" + codestr
return codestr
pythonparsecache = {}
shellparsecache = {}
def parser_cachefile(d):
cachedir = (d.getVar("PERSISTENT_DIR", True) or
d.getVar("CACHE", True))
if cachedir in [None, '']:
return None
bb.utils.mkdirhier(cachedir)
cachefile = os.path.join(cachedir, "bb_codeparser.dat")
logger.debug(1, "Using cache in '%s' for codeparser cache", cachefile)
return cachefile
def parser_cache_init(d):
global pythonparsecache
global shellparsecache
cachefile = parser_cachefile(d)
if not cachefile:
return
try:
p = pickle.Unpickler(file(cachefile, "rb"))
data, version = p.load()
except:
return
if version != PARSERCACHE_VERSION:
return
pythonparsecache = data[0]
shellparsecache = data[1]
def parser_cache_save(d):
cachefile = parser_cachefile(d)
if not cachefile:
return
glf = bb.utils.lockfile(cachefile + ".lock", shared=True)
i = os.getpid()
lf = None
while not lf:
shellcache = {}
pythoncache = {}
lf = bb.utils.lockfile(cachefile + ".lock." + str(i), retry=False)
if not lf or os.path.exists(cachefile + "-" + str(i)):
if lf:
bb.utils.unlockfile(lf)
lf = None
i = i + 1
continue
try:
p = pickle.Unpickler(file(cachefile, "rb"))
data, version = p.load()
except (IOError, EOFError, ValueError):
data, version = None, None
if version != PARSERCACHE_VERSION:
shellcache = shellparsecache
pythoncache = pythonparsecache
else:
for h in pythonparsecache:
if h not in data[0]:
pythoncache[h] = pythonparsecache[h]
for h in shellparsecache:
if h not in data[1]:
shellcache[h] = shellparsecache[h]
p = pickle.Pickler(file(cachefile + "-" + str(i), "wb"), -1)
p.dump([[pythoncache, shellcache], PARSERCACHE_VERSION])
bb.utils.unlockfile(lf)
bb.utils.unlockfile(glf)
def parser_cache_savemerge(d):
cachefile = parser_cachefile(d)
if not cachefile:
return
glf = bb.utils.lockfile(cachefile + ".lock")
try:
p = pickle.Unpickler(file(cachefile, "rb"))
data, version = p.load()
except (IOError, EOFError):
data, version = None, None
if version != PARSERCACHE_VERSION:
data = [{}, {}]
for f in [y for y in os.listdir(os.path.dirname(cachefile)) if y.startswith(os.path.basename(cachefile) + '-')]:
f = os.path.join(os.path.dirname(cachefile), f)
try:
p = pickle.Unpickler(file(f, "rb"))
extradata, version = p.load()
except (IOError, EOFError):
extradata, version = [{}, {}], None
if version != PARSERCACHE_VERSION:
continue
for h in extradata[0]:
if h not in data[0]:
data[0][h] = extradata[0][h]
for h in extradata[1]:
if h not in data[1]:
data[1][h] = extradata[1][h]
os.unlink(f)
p = pickle.Pickler(file(cachefile, "wb"), -1)
p.dump([data, PARSERCACHE_VERSION])
bb.utils.unlockfile(glf)
Logger = logging.getLoggerClass()
class BufferedLogger(Logger):
def __init__(self, name, level=0, target=None):
Logger.__init__(self, name)
self.setLevel(level)
self.buffer = []
self.target = target
def handle(self, record):
self.buffer.append(record)
def flush(self):
for record in self.buffer:
self.target.handle(record)
self.buffer = []
class PythonParser():
getvars = ("d.getVar", "bb.data.getVar", "data.getVar")
execfuncs = ("bb.build.exec_func", "bb.build.exec_task")
def warn(self, func, arg):
"""Warn about calls of bitbake APIs which pass a non-literal
argument for the variable name, as we're not able to track such
a reference.
"""
try:
funcstr = codegen.to_source(func)
argstr = codegen.to_source(arg)
except TypeError:
self.log.debug(2, 'Failed to convert function and argument to source form')
else:
self.log.debug(1, self.unhandled_message % (funcstr, argstr))
def visit_Call(self, node):
name = self.called_node_name(node.func)
if name in self.getvars:
if isinstance(node.args[0], ast.Str):
self.var_references.add(node.args[0].s)
else:
self.warn(node.func, node.args[0])
elif name in self.execfuncs:
if isinstance(node.args[0], ast.Str):
self.var_execs.add(node.args[0].s)
else:
self.warn(node.func, node.args[0])
elif name and isinstance(node.func, (ast.Name, ast.Attribute)):
self.execs.add(name)
def called_node_name(self, node):
"""Given a called node, return its original string form"""
components = []
while node:
if isinstance(node, ast.Attribute):
components.append(node.attr)
node = node.value
elif isinstance(node, ast.Name):
components.append(node.id)
return '.'.join(reversed(components))
else:
break
def __init__(self, name, log):
self.var_references = set()
self.var_execs = set()
self.execs = set()
self.references = set()
self.log = BufferedLogger('BitBake.Data.%s' % name, logging.DEBUG, log)
self.unhandled_message = "in call of %s, argument '%s' is not a string literal"
self.unhandled_message = "while parsing %s, %s" % (name, self.unhandled_message)
def parse_python(self, node):
h = hash(str(node))
if h in pythonparsecache:
self.references = pythonparsecache[h]["refs"]
self.execs = pythonparsecache[h]["execs"]
return
code = compile(check_indent(str(node)), "<string>", "exec",
ast.PyCF_ONLY_AST)
for n in ast.walk(code):
if n.__class__.__name__ == "Call":
self.visit_Call(n)
self.references.update(self.var_references)
self.references.update(self.var_execs)
pythonparsecache[h] = {}
pythonparsecache[h]["refs"] = self.references
pythonparsecache[h]["execs"] = self.execs
class ShellParser():
def __init__(self, name, log):
self.funcdefs = set()
self.allexecs = set()
self.execs = set()
self.log = BufferedLogger('BitBake.Data.%s' % name, logging.DEBUG, log)
self.unhandled_template = "unable to handle non-literal command '%s'"
self.unhandled_template = "while parsing %s, %s" % (name, self.unhandled_template)
def parse_shell(self, value):
"""Parse the supplied shell code in a string, returning the external
commands it executes.
"""
h = hash(str(value))
if h in shellparsecache:
self.execs = shellparsecache[h]["execs"]
return self.execs
try:
tokens, _ = pyshyacc.parse(value, eof=True, debug=False)
except pyshlex.NeedMore:
raise sherrors.ShellSyntaxError("Unexpected EOF")
for token in tokens:
self.process_tokens(token)
self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs)
shellparsecache[h] = {}
shellparsecache[h]["execs"] = self.execs
return self.execs
def process_tokens(self, tokens):
"""Process a supplied portion of the syntax tree as returned by
pyshyacc.parse.
"""
def function_definition(value):
self.funcdefs.add(value.name)
return [value.body], None
def case_clause(value):
# Element 0 of each item in the case is the list of patterns, and
# Element 1 of each item in the case is the list of commands to be
# executed when that pattern matches.
words = chain(*[item[0] for item in value.items])
cmds = chain(*[item[1] for item in value.items])
return cmds, words
def if_clause(value):
main = chain(value.cond, value.if_cmds)
rest = value.else_cmds
if isinstance(rest, tuple) and rest[0] == "elif":
return chain(main, if_clause(rest[1]))
else:
return chain(main, rest)
def simple_command(value):
return None, chain(value.words, (assign[1] for assign in value.assigns))
token_handlers = {
"and_or": lambda x: ((x.left, x.right), None),
"async": lambda x: ([x], None),
"brace_group": lambda x: (x.cmds, None),
"for_clause": lambda x: (x.cmds, x.items),
"function_definition": function_definition,
"if_clause": lambda x: (if_clause(x), None),
"pipeline": lambda x: (x.commands, None),
"redirect_list": lambda x: ([x.cmd], None),
"subshell": lambda x: (x.cmds, None),
"while_clause": lambda x: (chain(x.condition, x.cmds), None),
"until_clause": lambda x: (chain(x.condition, x.cmds), None),
"simple_command": simple_command,
"case_clause": case_clause,
}
for token in tokens:
name, value = token
try:
more_tokens, words = token_handlers[name](value)
except KeyError:
raise NotImplementedError("Unsupported token type " + name)
if more_tokens:
self.process_tokens(more_tokens)
if words:
self.process_words(words)
def process_words(self, words):
"""Process a set of 'words' in pyshyacc parlance, which includes
extraction of executed commands from $() blocks, as well as grabbing
the command name argument.
"""
words = list(words)
for word in list(words):
wtree = pyshlex.make_wordtree(word[1])
for part in wtree:
if not isinstance(part, list):
continue
if part[0] in ('`', '$('):
command = pyshlex.wordtree_as_string(part[1:-1])
self.parse_shell(command)
if word[0] in ("cmd_name", "cmd_word"):
if word in words:
words.remove(word)
usetoken = False
for word in words:
if word[0] in ("cmd_name", "cmd_word") or \
(usetoken and word[0] == "TOKEN"):
if "=" in word[1]:
usetoken = True
continue
cmd = word[1]
if cmd.startswith("$"):
self.log.debug(1, self.unhandled_template % cmd)
elif cmd == "eval":
command = " ".join(word for _, word in words[1:])
self.parse_shell(command)
else:
self.allexecs.add(cmd)
break