M7350/oe-core/bitbake/lib/bb/codeparser.py

import ast
import codegen
import logging
import os.path
import bb.utils, bb.data
from itertools import chain
from pysh import pyshyacc, pyshlex, sherrors
from bb.cache import MultiProcessCache


logger = logging.getLogger('BitBake.CodeParser')

try:
    import cPickle as pickle
except ImportError:
    import pickle
    logger.info('Importing cPickle failed.  Falling back to a very slow implementation.')


def check_indent(codestr):
    """If the code is indented, add a top level piece of code to 'remove' the indentation"""

    i = 0
    while codestr[i] in ["\n", "\t", " "]:
        i = i + 1

    if i == 0:
        return codestr

    if codestr[i-1] == "\t" or codestr[i-1] == " ":
        return "if 1:\n" + codestr

    return codestr


# Basically pickle, in python 2.7.3 at least, does badly with data duplication 
# upon pickling and unpickling. Combine this with duplicate objects and things
# are a mess.
#
# When the sets are originally created, python calls intern() on the set keys
# which significantly improves memory usage. Sadly the pickle/unpickle process
# doesn't call intern() on the keys and results in the same strings being duplicated
# in memory. This also means pickle will save the same string multiple times in
# the cache file.
#
# By having shell and python cacheline objects with setstate/getstate, we force
# the object creation through our own routine where we can call intern (via internSet).
#
# We also use hashable frozensets and ensure we use references to these so that
# duplicates can be removed, both in memory and in the resulting pickled data.
#
# By playing these games, the size of the cache file shrinks dramatically
# meaning faster load times and the reloaded cache files also consume much less
# memory. Smaller cache files, faster load times and lower memory usage is good.
#
# A custom getstate/setstate using tuples is actually worth 15% cachesize by
# avoiding duplication of the attribute names!

class SetCache(object):
    def __init__(self):
        self.setcache = {}

    def internSet(self, items):
        
        new = []
        for i in items:
            new.append(intern(i))
        s = frozenset(new)
        if hash(s) in self.setcache:
            return self.setcache[hash(s)]
        self.setcache[hash(s)] = s
        return s

codecache = SetCache()

class pythonCacheLine(object):
    def __init__(self, refs, execs, contains):
        self.refs = codecache.internSet(refs)
        self.execs = codecache.internSet(execs)
        self.contains = {}
        for c in contains:
            self.contains[c] = codecache.internSet(contains[c])

    def __getstate__(self):
        return (self.refs, self.execs, self.contains)

    def __setstate__(self, state):
        (refs, execs, contains) = state
        self.__init__(refs, execs, contains)
    def __hash__(self):
        l = (hash(self.refs), hash(self.execs))
        for c in sorted(self.contains.keys()):
            l = l + (c, hash(self.contains[c]))
        return hash(l)

class shellCacheLine(object):
    def __init__(self, execs):
        self.execs = codecache.internSet(execs)

    def __getstate__(self):
        return (self.execs)

    def __setstate__(self, state):
        (execs) = state
        self.__init__(execs)
    def __hash__(self):
        return hash(self.execs)

class CodeParserCache(MultiProcessCache):
    cache_file_name = "bb_codeparser.dat"
    CACHE_VERSION = 7

    def __init__(self):
        MultiProcessCache.__init__(self)
        self.pythoncache = self.cachedata[0]
        self.shellcache = self.cachedata[1]
        self.pythoncacheextras = self.cachedata_extras[0]
        self.shellcacheextras = self.cachedata_extras[1]

        # To avoid duplication in the codeparser cache, keep
        # a lookup of hashes of objects we already have
        self.pythoncachelines = {}
        self.shellcachelines = {}

    def newPythonCacheLine(self, refs, execs, contains):
        cacheline = pythonCacheLine(refs, execs, contains)
        h = hash(cacheline)
        if h in self.pythoncachelines:
            return self.pythoncachelines[h]
        self.pythoncachelines[h] = cacheline
        return cacheline

    def newShellCacheLine(self, execs):
        cacheline = shellCacheLine(execs)
        h = hash(cacheline)
        if h in self.shellcachelines:
            return self.shellcachelines[h]
        self.shellcachelines[h] = cacheline
        return cacheline

    def init_cache(self, d):
        MultiProcessCache.init_cache(self, d)

        # cachedata gets re-assigned in the parent
        self.pythoncache = self.cachedata[0]
        self.shellcache = self.cachedata[1]

    def create_cachedata(self):
        data = [{}, {}]
        return data

codeparsercache = CodeParserCache()

def parser_cache_init(d):
    codeparsercache.init_cache(d)

def parser_cache_save(d):
    codeparsercache.save_extras(d)

def parser_cache_savemerge(d):
    codeparsercache.save_merge(d)

Logger = logging.getLoggerClass()
class BufferedLogger(Logger):
    def __init__(self, name, level=0, target=None):
        Logger.__init__(self, name)
        self.setLevel(level)
        self.buffer = []
        self.target = target

    def handle(self, record):
        self.buffer.append(record)

    def flush(self):
        for record in self.buffer:
            self.target.handle(record)
        self.buffer = []

class PythonParser():
    getvars = (".getVar", ".appendVar", ".prependVar")
    containsfuncs = ("bb.utils.contains", "base_contains", "bb.utils.contains_any")
    execfuncs = ("bb.build.exec_func", "bb.build.exec_task")

    def warn(self, func, arg):
        """Warn about calls of bitbake APIs which pass a non-literal
        argument for the variable name, as we're not able to track such
        a reference.
        """

        try:
            funcstr = codegen.to_source(func)
            argstr = codegen.to_source(arg)
        except TypeError:
            self.log.debug(2, 'Failed to convert function and argument to source form')
        else:
            self.log.debug(1, self.unhandled_message % (funcstr, argstr))

    def visit_Call(self, node):
        name = self.called_node_name(node.func)
        if name and name.endswith(self.getvars) or name in self.containsfuncs:
            if isinstance(node.args[0], ast.Str):
                varname = node.args[0].s
                if name in self.containsfuncs and isinstance(node.args[1], ast.Str):
                    if varname not in self.contains:
                        self.contains[varname] = set()
                    self.contains[varname].add(node.args[1].s)
                else:                      
                    self.references.add(node.args[0].s)
            else:
                self.warn(node.func, node.args[0])
        elif name in self.execfuncs:
            if isinstance(node.args[0], ast.Str):
                self.var_execs.add(node.args[0].s)
            else:
                self.warn(node.func, node.args[0])
        elif name and isinstance(node.func, (ast.Name, ast.Attribute)):
            self.execs.add(name)

    def called_node_name(self, node):
        """Given a called node, return its original string form"""
        components = []
        while node:
            if isinstance(node, ast.Attribute):
                components.append(node.attr)
                node = node.value
            elif isinstance(node, ast.Name):
                components.append(node.id)
                return '.'.join(reversed(components))
            else:
                break

    def __init__(self, name, log):
        self.var_execs = set()
        self.contains = {}
        self.execs = set()
        self.references = set()
        self.log = BufferedLogger('BitBake.Data.PythonParser', logging.DEBUG, log)

        self.unhandled_message = "in call of %s, argument '%s' is not a string literal"
        self.unhandled_message = "while parsing %s, %s" % (name, self.unhandled_message)

    def parse_python(self, node):
        h = hash(str(node))

        if h in codeparsercache.pythoncache:
            self.references = set(codeparsercache.pythoncache[h].refs)
            self.execs = set(codeparsercache.pythoncache[h].execs)
            self.contains = {}
            for i in codeparsercache.pythoncache[h].contains:
                self.contains[i] = set(codeparsercache.pythoncache[h].contains[i])
            return

        if h in codeparsercache.pythoncacheextras:
            self.references = set(codeparsercache.pythoncacheextras[h].refs)
            self.execs = set(codeparsercache.pythoncacheextras[h].execs)
            self.contains = {}
            for i in codeparsercache.pythoncacheextras[h].contains:
                self.contains[i] = set(codeparsercache.pythoncacheextras[h].contains[i])
            return

        code = compile(check_indent(str(node)), "<string>", "exec",
                       ast.PyCF_ONLY_AST)

        for n in ast.walk(code):
            if n.__class__.__name__ == "Call":
                self.visit_Call(n)

        self.execs.update(self.var_execs)

        codeparsercache.pythoncacheextras[h] = codeparsercache.newPythonCacheLine(self.references, self.execs, self.contains)

class ShellParser():
    def __init__(self, name, log):
        self.funcdefs = set()
        self.allexecs = set()
        self.execs = set()
        self.log = BufferedLogger('BitBake.Data.%s' % name, logging.DEBUG, log)
        self.unhandled_template = "unable to handle non-literal command '%s'"
        self.unhandled_template = "while parsing %s, %s" % (name, self.unhandled_template)

    def parse_shell(self, value):
        """Parse the supplied shell code in a string, returning the external
        commands it executes.
        """

        h = hash(str(value))

        if h in codeparsercache.shellcache:
            self.execs = set(codeparsercache.shellcache[h].execs)
            return self.execs

        if h in codeparsercache.shellcacheextras:
            self.execs = set(codeparsercache.shellcacheextras[h].execs)
            return self.execs

        self._parse_shell(value)
        self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs)

        codeparsercache.shellcacheextras[h] = codeparsercache.newShellCacheLine(self.execs)

        return self.execs

    def _parse_shell(self, value):
        try:
            tokens, _ = pyshyacc.parse(value, eof=True, debug=False)
        except pyshlex.NeedMore:
            raise sherrors.ShellSyntaxError("Unexpected EOF")

        for token in tokens:
            self.process_tokens(token)

    def process_tokens(self, tokens):
        """Process a supplied portion of the syntax tree as returned by
        pyshyacc.parse.
        """

        def function_definition(value):
            self.funcdefs.add(value.name)
            return [value.body], None

        def case_clause(value):
            # Element 0 of each item in the case is the list of patterns, and
            # Element 1 of each item in the case is the list of commands to be
            # executed when that pattern matches.
            words = chain(*[item[0] for item in value.items])
            cmds  = chain(*[item[1] for item in value.items])
            return cmds, words

        def if_clause(value):
            main = chain(value.cond, value.if_cmds)
            rest = value.else_cmds
            if isinstance(rest, tuple) and rest[0] == "elif":
                return chain(main, if_clause(rest[1]))
            else:
                return chain(main, rest)

        def simple_command(value):
            return None, chain(value.words, (assign[1] for assign in value.assigns))

        token_handlers = {
            "and_or": lambda x: ((x.left, x.right), None),
            "async": lambda x: ([x], None),
            "brace_group": lambda x: (x.cmds, None),
            "for_clause": lambda x: (x.cmds, x.items),
            "function_definition": function_definition,
            "if_clause": lambda x: (if_clause(x), None),
            "pipeline": lambda x: (x.commands, None),
            "redirect_list": lambda x: ([x.cmd], None),
            "subshell": lambda x: (x.cmds, None),
            "while_clause": lambda x: (chain(x.condition, x.cmds), None),
            "until_clause": lambda x: (chain(x.condition, x.cmds), None),
            "simple_command": simple_command,
            "case_clause": case_clause,
        }

        for token in tokens:
            name, value = token
            try:
                more_tokens, words = token_handlers[name](value)
            except KeyError:
                raise NotImplementedError("Unsupported token type " + name)

            if more_tokens:
                self.process_tokens(more_tokens)

            if words:
                self.process_words(words)

    def process_words(self, words):
        """Process a set of 'words' in pyshyacc parlance, which includes
        extraction of executed commands from $() blocks, as well as grabbing
        the command name argument.
        """

        words = list(words)
        for word in list(words):
            wtree = pyshlex.make_wordtree(word[1])
            for part in wtree:
                if not isinstance(part, list):
                    continue

                if part[0] in ('`', '$('):
                    command = pyshlex.wordtree_as_string(part[1:-1])
                    self._parse_shell(command)

                    if word[0] in ("cmd_name", "cmd_word"):
                        if word in words:
                            words.remove(word)

        usetoken = False
        for word in words:
            if word[0] in ("cmd_name", "cmd_word") or \
               (usetoken and word[0] == "TOKEN"):
                if "=" in word[1]:
                    usetoken = True
                    continue

                cmd = word[1]
                if cmd.startswith("$"):
                    self.log.debug(1, self.unhandled_template % cmd)
                elif cmd == "eval":
                    command = " ".join(word for _, word in words[1:])
                    self._parse_shell(command)
                else:
                    self.allexecs.add(cmd)
                break
M7350v1_en_gpl 2024-09-09 08:52:07 +00:00			`import ast`
			`import codegen`
			`import logging`
			`import os.path`
			`import bb.utils, bb.data`
			`from itertools import chain`
			`from pysh import pyshyacc, pyshlex, sherrors`
M7350v5_en_gpl 2024-09-09 08:57:42 +00:00			`from bb.cache import MultiProcessCache`
M7350v1_en_gpl 2024-09-09 08:52:07 +00:00

			`logger = logging.getLogger('BitBake.CodeParser')`

			`try:`
			`import cPickle as pickle`
			`except ImportError:`
			`import pickle`
			`logger.info('Importing cPickle failed. Falling back to a very slow implementation.')`


			`def check_indent(codestr):`
			`"""If the code is indented, add a top level piece of code to 'remove' the indentation"""`

			`i = 0`
			`while codestr[i] in ["\n", "\t", " "]:`
			`i = i + 1`

			`if i == 0:`
			`return codestr`

			`if codestr[i-1] == "\t" or codestr[i-1] == " ":`
			`return "if 1:\n" + codestr`

			`return codestr`


M7350v5_en_gpl 2024-09-09 08:57:42 +00:00			`# Basically pickle, in python 2.7.3 at least, does badly with data duplication`
			`# upon pickling and unpickling. Combine this with duplicate objects and things`
			`# are a mess.`
			`#`
			`# When the sets are originally created, python calls intern() on the set keys`
			`# which significantly improves memory usage. Sadly the pickle/unpickle process`
			`# doesn't call intern() on the keys and results in the same strings being duplicated`
			`# in memory. This also means pickle will save the same string multiple times in`
			`# the cache file.`
			`#`
			`# By having shell and python cacheline objects with setstate/getstate, we force`
			`# the object creation through our own routine where we can call intern (via internSet).`
			`#`
			`# We also use hashable frozensets and ensure we use references to these so that`
			`# duplicates can be removed, both in memory and in the resulting pickled data.`
			`#`
			`# By playing these games, the size of the cache file shrinks dramatically`
			`# meaning faster load times and the reloaded cache files also consume much less`
			`# memory. Smaller cache files, faster load times and lower memory usage is good.`
			`#`
			`# A custom getstate/setstate using tuples is actually worth 15% cachesize by`
			`# avoiding duplication of the attribute names!`

			`class SetCache(object):`
			`def __init__(self):`
			`self.setcache = {}`

			`def internSet(self, items):`

			`new = []`
			`for i in items:`
			`new.append(intern(i))`
			`s = frozenset(new)`
			`if hash(s) in self.setcache:`
			`return self.setcache[hash(s)]`
			`self.setcache[hash(s)] = s`
			`return s`

			`codecache = SetCache()`

			`class pythonCacheLine(object):`
			`def __init__(self, refs, execs, contains):`
			`self.refs = codecache.internSet(refs)`
			`self.execs = codecache.internSet(execs)`
			`self.contains = {}`
			`for c in contains:`
			`self.contains[c] = codecache.internSet(contains[c])`

			`def __getstate__(self):`
			`return (self.refs, self.execs, self.contains)`

			`def __setstate__(self, state):`
			`(refs, execs, contains) = state`
			`self.__init__(refs, execs, contains)`
			`def __hash__(self):`
			`l = (hash(self.refs), hash(self.execs))`
			`for c in sorted(self.contains.keys()):`
			`l = l + (c, hash(self.contains[c]))`
			`return hash(l)`

			`class shellCacheLine(object):`
			`def __init__(self, execs):`
			`self.execs = codecache.internSet(execs)`

			`def __getstate__(self):`
			`return (self.execs)`

			`def __setstate__(self, state):`
			`(execs) = state`
			`self.__init__(execs)`
			`def __hash__(self):`
			`return hash(self.execs)`

			`class CodeParserCache(MultiProcessCache):`
			`cache_file_name = "bb_codeparser.dat"`
			`CACHE_VERSION = 7`

			`def __init__(self):`
			`MultiProcessCache.__init__(self)`
			`self.pythoncache = self.cachedata[0]`
			`self.shellcache = self.cachedata[1]`
			`self.pythoncacheextras = self.cachedata_extras[0]`
			`self.shellcacheextras = self.cachedata_extras[1]`

			`# To avoid duplication in the codeparser cache, keep`
			`# a lookup of hashes of objects we already have`
			`self.pythoncachelines = {}`
			`self.shellcachelines = {}`

			`def newPythonCacheLine(self, refs, execs, contains):`
			`cacheline = pythonCacheLine(refs, execs, contains)`
			`h = hash(cacheline)`
			`if h in self.pythoncachelines:`
			`return self.pythoncachelines[h]`
			`self.pythoncachelines[h] = cacheline`
			`return cacheline`

			`def newShellCacheLine(self, execs):`
			`cacheline = shellCacheLine(execs)`
			`h = hash(cacheline)`
			`if h in self.shellcachelines:`
			`return self.shellcachelines[h]`
			`self.shellcachelines[h] = cacheline`
			`return cacheline`

			`def init_cache(self, d):`
			`MultiProcessCache.init_cache(self, d)`

			`# cachedata gets re-assigned in the parent`
			`self.pythoncache = self.cachedata[0]`
			`self.shellcache = self.cachedata[1]`

			`def create_cachedata(self):`
			`data = [{}, {}]`
			`return data`
M7350v1_en_gpl 2024-09-09 08:52:07 +00:00
M7350v5_en_gpl 2024-09-09 08:57:42 +00:00			`codeparsercache = CodeParserCache()`
M7350v1_en_gpl 2024-09-09 08:52:07 +00:00
M7350v5_en_gpl 2024-09-09 08:57:42 +00:00			`def parser_cache_init(d):`
			`codeparsercache.init_cache(d)`
M7350v1_en_gpl 2024-09-09 08:52:07 +00:00
			`def parser_cache_save(d):`
M7350v5_en_gpl 2024-09-09 08:57:42 +00:00			`codeparsercache.save_extras(d)`
M7350v1_en_gpl 2024-09-09 08:52:07 +00:00
			`def parser_cache_savemerge(d):`
M7350v5_en_gpl 2024-09-09 08:57:42 +00:00			`codeparsercache.save_merge(d)`
M7350v1_en_gpl 2024-09-09 08:52:07 +00:00
			`Logger = logging.getLoggerClass()`
			`class BufferedLogger(Logger):`
			`def __init__(self, name, level=0, target=None):`
			`Logger.__init__(self, name)`
			`self.setLevel(level)`
			`self.buffer = []`
			`self.target = target`

			`def handle(self, record):`
			`self.buffer.append(record)`

			`def flush(self):`
			`for record in self.buffer:`
			`self.target.handle(record)`
			`self.buffer = []`

			`class PythonParser():`
M7350v5_en_gpl 2024-09-09 08:57:42 +00:00			`getvars = (".getVar", ".appendVar", ".prependVar")`
			`containsfuncs = ("bb.utils.contains", "base_contains", "bb.utils.contains_any")`
M7350v1_en_gpl 2024-09-09 08:52:07 +00:00			`execfuncs = ("bb.build.exec_func", "bb.build.exec_task")`

			`def warn(self, func, arg):`
			`"""Warn about calls of bitbake APIs which pass a non-literal`
			`argument for the variable name, as we're not able to track such`
			`a reference.`
			`"""`

			`try:`
			`funcstr = codegen.to_source(func)`
			`argstr = codegen.to_source(arg)`
			`except TypeError:`
			`self.log.debug(2, 'Failed to convert function and argument to source form')`
			`else:`
			`self.log.debug(1, self.unhandled_message % (funcstr, argstr))`

			`def visit_Call(self, node):`
			`name = self.called_node_name(node.func)`
M7350v5_en_gpl 2024-09-09 08:57:42 +00:00			`if name and name.endswith(self.getvars) or name in self.containsfuncs:`
M7350v1_en_gpl 2024-09-09 08:52:07 +00:00			`if isinstance(node.args[0], ast.Str):`
M7350v5_en_gpl 2024-09-09 08:57:42 +00:00			`varname = node.args[0].s`
			`if name in self.containsfuncs and isinstance(node.args[1], ast.Str):`
			`if varname not in self.contains:`
			`self.contains[varname] = set()`
			`self.contains[varname].add(node.args[1].s)`
			`else:`
			`self.references.add(node.args[0].s)`
M7350v1_en_gpl 2024-09-09 08:52:07 +00:00			`else:`
			`self.warn(node.func, node.args[0])`
			`elif name in self.execfuncs:`
			`if isinstance(node.args[0], ast.Str):`
			`self.var_execs.add(node.args[0].s)`
			`else:`
			`self.warn(node.func, node.args[0])`
			`elif name and isinstance(node.func, (ast.Name, ast.Attribute)):`
			`self.execs.add(name)`

			`def called_node_name(self, node):`
			`"""Given a called node, return its original string form"""`
			`components = []`
			`while node:`
			`if isinstance(node, ast.Attribute):`
			`components.append(node.attr)`
			`node = node.value`
			`elif isinstance(node, ast.Name):`
			`components.append(node.id)`
			`return '.'.join(reversed(components))`
			`else:`
			`break`

			`def __init__(self, name, log):`
			`self.var_execs = set()`
M7350v5_en_gpl 2024-09-09 08:57:42 +00:00			`self.contains = {}`
M7350v1_en_gpl 2024-09-09 08:52:07 +00:00			`self.execs = set()`
			`self.references = set()`
M7350v5_en_gpl 2024-09-09 08:57:42 +00:00			`self.log = BufferedLogger('BitBake.Data.PythonParser', logging.DEBUG, log)`
M7350v1_en_gpl 2024-09-09 08:52:07 +00:00
			`self.unhandled_message = "in call of %s, argument '%s' is not a string literal"`
			`self.unhandled_message = "while parsing %s, %s" % (name, self.unhandled_message)`

			`def parse_python(self, node):`
			`h = hash(str(node))`

M7350v5_en_gpl 2024-09-09 08:57:42 +00:00			`if h in codeparsercache.pythoncache:`
			`self.references = set(codeparsercache.pythoncache[h].refs)`
			`self.execs = set(codeparsercache.pythoncache[h].execs)`
			`self.contains = {}`
			`for i in codeparsercache.pythoncache[h].contains:`
			`self.contains[i] = set(codeparsercache.pythoncache[h].contains[i])`
			`return`

			`if h in codeparsercache.pythoncacheextras:`
			`self.references = set(codeparsercache.pythoncacheextras[h].refs)`
			`self.execs = set(codeparsercache.pythoncacheextras[h].execs)`
			`self.contains = {}`
			`for i in codeparsercache.pythoncacheextras[h].contains:`
			`self.contains[i] = set(codeparsercache.pythoncacheextras[h].contains[i])`
M7350v1_en_gpl 2024-09-09 08:52:07 +00:00			`return`

			`code = compile(check_indent(str(node)), "<string>", "exec",`
			`ast.PyCF_ONLY_AST)`

			`for n in ast.walk(code):`
			`if n.__class__.__name__ == "Call":`
			`self.visit_Call(n)`

M7350v5_en_gpl 2024-09-09 08:57:42 +00:00			`self.execs.update(self.var_execs)`
M7350v1_en_gpl 2024-09-09 08:52:07 +00:00
M7350v5_en_gpl 2024-09-09 08:57:42 +00:00			`codeparsercache.pythoncacheextras[h] = codeparsercache.newPythonCacheLine(self.references, self.execs, self.contains)`
M7350v1_en_gpl 2024-09-09 08:52:07 +00:00
			`class ShellParser():`
			`def __init__(self, name, log):`
			`self.funcdefs = set()`
			`self.allexecs = set()`
			`self.execs = set()`
			`self.log = BufferedLogger('BitBake.Data.%s' % name, logging.DEBUG, log)`
			`self.unhandled_template = "unable to handle non-literal command '%s'"`
			`self.unhandled_template = "while parsing %s, %s" % (name, self.unhandled_template)`

			`def parse_shell(self, value):`
			`"""Parse the supplied shell code in a string, returning the external`
			`commands it executes.`
			`"""`

			`h = hash(str(value))`

M7350v5_en_gpl 2024-09-09 08:57:42 +00:00			`if h in codeparsercache.shellcache:`
			`self.execs = set(codeparsercache.shellcache[h].execs)`
			`return self.execs`

			`if h in codeparsercache.shellcacheextras:`
			`self.execs = set(codeparsercache.shellcacheextras[h].execs)`
M7350v1_en_gpl 2024-09-09 08:52:07 +00:00			`return self.execs`

M7350v5_en_gpl 2024-09-09 08:57:42 +00:00			`self._parse_shell(value)`
			`self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs)`

			`codeparsercache.shellcacheextras[h] = codeparsercache.newShellCacheLine(self.execs)`

			`return self.execs`

			`def _parse_shell(self, value):`
M7350v1_en_gpl 2024-09-09 08:52:07 +00:00			`try:`
			`tokens, _ = pyshyacc.parse(value, eof=True, debug=False)`
			`except pyshlex.NeedMore:`
			`raise sherrors.ShellSyntaxError("Unexpected EOF")`

			`for token in tokens:`
			`self.process_tokens(token)`

			`def process_tokens(self, tokens):`
			`"""Process a supplied portion of the syntax tree as returned by`
			`pyshyacc.parse.`
			`"""`

			`def function_definition(value):`
			`self.funcdefs.add(value.name)`
			`return [value.body], None`

			`def case_clause(value):`
			`# Element 0 of each item in the case is the list of patterns, and`
			`# Element 1 of each item in the case is the list of commands to be`
			`# executed when that pattern matches.`
			`words = chain(*[item[0] for item in value.items])`
			`cmds = chain(*[item[1] for item in value.items])`
			`return cmds, words`

			`def if_clause(value):`
			`main = chain(value.cond, value.if_cmds)`
			`rest = value.else_cmds`
			`if isinstance(rest, tuple) and rest[0] == "elif":`
			`return chain(main, if_clause(rest[1]))`
			`else:`
			`return chain(main, rest)`

			`def simple_command(value):`
			`return None, chain(value.words, (assign[1] for assign in value.assigns))`

			`token_handlers = {`
			`"and_or": lambda x: ((x.left, x.right), None),`
			`"async": lambda x: ([x], None),`
			`"brace_group": lambda x: (x.cmds, None),`
			`"for_clause": lambda x: (x.cmds, x.items),`
			`"function_definition": function_definition,`
			`"if_clause": lambda x: (if_clause(x), None),`
			`"pipeline": lambda x: (x.commands, None),`
			`"redirect_list": lambda x: ([x.cmd], None),`
			`"subshell": lambda x: (x.cmds, None),`
			`"while_clause": lambda x: (chain(x.condition, x.cmds), None),`
			`"until_clause": lambda x: (chain(x.condition, x.cmds), None),`
			`"simple_command": simple_command,`
			`"case_clause": case_clause,`
			`}`

			`for token in tokens:`
			`name, value = token`
			`try:`
			`more_tokens, words = token_handlers[name](value)`
			`except KeyError:`
			`raise NotImplementedError("Unsupported token type " + name)`

			`if more_tokens:`
			`self.process_tokens(more_tokens)`

			`if words:`
			`self.process_words(words)`

			`def process_words(self, words):`
			`"""Process a set of 'words' in pyshyacc parlance, which includes`
			`extraction of executed commands from $() blocks, as well as grabbing`
			`the command name argument.`
			`"""`

			`words = list(words)`
			`for word in list(words):`
			`wtree = pyshlex.make_wordtree(word[1])`
			`for part in wtree:`
			`if not isinstance(part, list):`
			`continue`

			if part[0] in ('`', '$('):
			`command = pyshlex.wordtree_as_string(part[1:-1])`
M7350v5_en_gpl 2024-09-09 08:57:42 +00:00			`self._parse_shell(command)`
M7350v1_en_gpl 2024-09-09 08:52:07 +00:00
			`if word[0] in ("cmd_name", "cmd_word"):`
			`if word in words:`
			`words.remove(word)`

			`usetoken = False`
			`for word in words:`
			`if word[0] in ("cmd_name", "cmd_word") or \`
			`(usetoken and word[0] == "TOKEN"):`
			`if "=" in word[1]:`
			`usetoken = True`
			`continue`

			`cmd = word[1]`
			`if cmd.startswith("$"):`
			`self.log.debug(1, self.unhandled_template % cmd)`
			`elif cmd == "eval":`
			`command = " ".join(word for _, word in words[1:])`
M7350v5_en_gpl 2024-09-09 08:57:42 +00:00			`self._parse_shell(command)`
M7350v1_en_gpl 2024-09-09 08:52:07 +00:00			`else:`
			`self.allexecs.add(cmd)`
			`break`