checkkconfigsymbols.py 15.5 KB
Newer Older
1
#!/usr/bin/env python3
2

3
"""Find Kconfig symbols that are referenced but not defined."""
4

5
# (c) 2014-2016 Valentin Rothberg <valentinrothberg@gmail.com>
6
# (c) 2014 Stefan Hengelein <stefan.hengelein@fau.de>
7
#
8
# Licensed under the terms of the GNU GPL License version 2
9 10


11
import argparse
12
import difflib
13 14
import os
import re
15
import signal
16
import subprocess
17
import sys
18
from multiprocessing import Pool, cpu_count
19

20 21

# regex expressions
22
OPERATORS = r"&|\(|\)|\||\!"
23 24 25
SYMBOL = r"(?:\w*[A-Z0-9]\w*){2,}"
DEF = r"^\s*(?:menu){,1}config\s+(" + SYMBOL + r")\s*"
EXPR = r"(?:" + OPERATORS + r"|\s|" + SYMBOL + r")+"
26 27
DEFAULT = r"default\s+.*?(?:if\s.+){,1}"
STMT = r"^\s*(?:if|select|depends\s+on|(?:" + DEFAULT + r"))\s+" + EXPR
28
SOURCE_SYMBOL = r"(?:\W|\b)+[D]{,1}CONFIG_(" + SYMBOL + r")"
29

30
# regex objects
31
REGEX_FILE_KCONFIG = re.compile(r".*Kconfig[\.\w+\-]*$")
32 33
REGEX_SYMBOL = re.compile(r'(?!\B)' + SYMBOL + r'(?!\B)')
REGEX_SOURCE_SYMBOL = re.compile(SOURCE_SYMBOL)
34
REGEX_KCONFIG_DEF = re.compile(DEF)
35 36 37
REGEX_KCONFIG_EXPR = re.compile(EXPR)
REGEX_KCONFIG_STMT = re.compile(STMT)
REGEX_KCONFIG_HELP = re.compile(r"^\s+(help|---help---)\s*$")
38
REGEX_FILTER_SYMBOLS = re.compile(r"[A-Za-z0-9]$")
39
REGEX_NUMERIC = re.compile(r"0[xX][0-9a-fA-F]+|[0-9]+")
40
REGEX_QUOTES = re.compile("(\"(.*?)\")")
41 42


43 44
def parse_options():
    """The user interface of this module."""
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
    usage = "Run this tool to detect Kconfig symbols that are referenced but " \
            "not defined in Kconfig.  If no option is specified, "             \
            "checkkconfigsymbols defaults to check your current tree.  "       \
            "Please note that specifying commits will 'git reset --hard\' "    \
            "your current tree!  You may save uncommitted changes to avoid "   \
            "losing data."

    parser = argparse.ArgumentParser(description=usage)

    parser.add_argument('-c', '--commit', dest='commit', action='store',
                        default="",
                        help="check if the specified commit (hash) introduces "
                             "undefined Kconfig symbols")

    parser.add_argument('-d', '--diff', dest='diff', action='store',
                        default="",
                        help="diff undefined symbols between two commits "
                             "(e.g., -d commmit1..commit2)")

    parser.add_argument('-f', '--find', dest='find', action='store_true',
                        default=False,
                        help="find and show commits that may cause symbols to be "
                             "missing (required to run with --diff)")

    parser.add_argument('-i', '--ignore', dest='ignore', action='store',
                        default="",
                        help="ignore files matching this Python regex "
                             "(e.g., -i '.*defconfig')")

    parser.add_argument('-s', '--sim', dest='sim', action='store', default="",
                        help="print a list of max. 10 string-similar symbols")

    parser.add_argument('--force', dest='force', action='store_true',
                        default=False,
                        help="reset current Git tree even when it's dirty")

    parser.add_argument('--no-color', dest='color', action='store_false',
                        default=True,
                        help="don't print colored output (default when not "
                             "outputting to a terminal)")

    args = parser.parse_args()

    if args.commit and args.diff:
89 90
        sys.exit("Please specify only one option at once.")

91
    if args.diff and not re.match(r"^[\w\-\.]+\.\.[\w\-\.]+$", args.diff):
92
        sys.exit("Please specify valid input in the following format: "
93
                 "\'commit1..commit2\'")
94

95 96
    if args.commit or args.diff:
        if not args.force and tree_is_dirty():
97 98 99 100 101 102 103
            sys.exit("The current Git tree is dirty (see 'git status').  "
                     "Running this script may\ndelete important data since it "
                     "calls 'git reset --hard' for some performance\nreasons. "
                     " Please run this script in a clean Git tree or pass "
                     "'--force' if you\nwant to ignore this warning and "
                     "continue.")

104 105
    if args.commit:
        args.find = False
106

107
    if args.ignore:
108
        try:
109
            re.match(args.ignore, "this/is/just/a/test.c")
110 111 112
        except:
            sys.exit("Please specify a valid Python regex.")

113
    return args
114 115


116 117
def main():
    """Main function of this module."""
118
    args = parse_options()
119

120 121
    global COLOR
    COLOR = args.color and sys.stdout.isatty()
122

123 124
    if args.sim and not args.commit and not args.diff:
        sims = find_sims(args.sim, args.ignore)
125
        if sims:
126
            print("%s: %s" % (yel("Similar symbols"), ', '.join(sims)))
127
        else:
128
            print("%s: no similar symbols found" % yel("Similar symbols"))
129 130 131 132 133 134
        sys.exit(0)

    # dictionary of (un)defined symbols
    defined = {}
    undefined = {}

135
    if args.commit or args.diff:
136 137 138 139 140
        head = get_head()

        # get commit range
        commit_a = None
        commit_b = None
141 142 143 144 145
        if args.commit:
            commit_a = args.commit + "~"
            commit_b = args.commit
        elif args.diff:
            split = args.diff.split("..")
146 147 148 149 150 151
            commit_a = split[0]
            commit_b = split[1]
            undefined_a = {}
            undefined_b = {}

        # get undefined items before the commit
152
        reset(commit_a)
153
        undefined_a, _ = check_symbols(args.ignore)
154 155

        # get undefined items for the commit
156
        reset(commit_b)
157
        undefined_b, defined = check_symbols(args.ignore)
158 159

        # report cases that are present for the commit but not before
160 161 162 163 164 165
        for symbol in sorted(undefined_b):
            # symbol has not been undefined before
            if symbol not in undefined_a:
                files = sorted(undefined_b.get(symbol))
                undefined[symbol] = files
            # check if there are new files that reference the undefined symbol
166
            else:
167 168
                files = sorted(undefined_b.get(symbol) -
                               undefined_a.get(symbol))
169
                if files:
170
                    undefined[symbol] = files
171 172

        # reset to head
173
        reset(head)
174 175 176

    # default to check the entire tree
    else:
177
        undefined, defined = check_symbols(args.ignore)
178 179

    # now print the output
180 181
    for symbol in sorted(undefined):
        print(red(symbol))
182

183
        files = sorted(undefined.get(symbol))
184
        print("%s: %s" % (yel("Referencing files"), ", ".join(files)))
185

186
        sims = find_sims(symbol, args.ignore, defined)
187 188
        sims_out = yel("Similar symbols")
        if sims:
189
            print("%s: %s" % (sims_out, ', '.join(sims)))
190
        else:
191
            print("%s: %s" % (sims_out, "no similar symbols found"))
192

193
        if args.find:
194
            print("%s:" % yel("Commits changing symbol"))
195
            commits = find_commits(symbol, args.diff)
196 197 198
            if commits:
                for commit in commits:
                    commit = commit.split(" ", 1)
199
                    print("\t- %s (\"%s\")" % (yel(commit[0]), commit[1]))
200
            else:
201
                print("\t- no commit found")
202
        print()  # new line
203 204


205 206 207 208 209
def reset(commit):
    """Reset current git tree to %commit."""
    execute(["git", "reset", "--hard", commit])


210 211 212 213
def yel(string):
    """
    Color %string yellow.
    """
214
    return "\033[33m%s\033[0m" % string if COLOR else string
215 216 217 218 219 220


def red(string):
    """
    Color %string red.
    """
221
    return "\033[31m%s\033[0m" % string if COLOR else string
222 223 224 225


def execute(cmd):
    """Execute %cmd and return stdout.  Exit in case of error."""
226
    try:
227
        stdout = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False)
228
        stdout = stdout.decode(errors='replace')
229
    except subprocess.CalledProcessError as fail:
230
        exit(fail)
231 232 233
    return stdout


234 235
def find_commits(symbol, diff):
    """Find commits changing %symbol in the given range of %diff."""
236 237 238
    commits = execute(["git", "log", "--pretty=oneline",
                       "--abbrev-commit", "-G",
                       symbol, diff])
239
    return [x for x in commits.split("\n") if x]
240 241


242 243 244
def tree_is_dirty():
    """Return true if the current working tree is dirty (i.e., if any file has
    been added, deleted, modified, renamed or copied but not committed)."""
245
    stdout = execute(["git", "status", "--porcelain"])
246 247 248 249 250 251 252 253
    for line in stdout:
        if re.findall(r"[URMADC]{1}", line[:2]):
            return True
    return False


def get_head():
    """Return commit hash of current HEAD."""
254
    stdout = execute(["git", "rev-parse", "HEAD"])
255 256 257
    return stdout.strip('\n')


258 259
def partition(lst, size):
    """Partition list @lst into eveni-sized lists of size @size."""
260
    return [lst[i::size] for i in range(size)]
261 262 263 264 265 266 267


def init_worker():
    """Set signal handler to ignore SIGINT."""
    signal.signal(signal.SIGINT, signal.SIG_IGN)


268
def find_sims(symbol, ignore, defined=[]):
269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292
    """Return a list of max. ten Kconfig symbols that are string-similar to
    @symbol."""
    if defined:
        return sorted(difflib.get_close_matches(symbol, set(defined), 10))

    pool = Pool(cpu_count(), init_worker)
    kfiles = []
    for gitfile in get_files():
        if REGEX_FILE_KCONFIG.match(gitfile):
            kfiles.append(gitfile)

    arglist = []
    for part in partition(kfiles, cpu_count()):
        arglist.append((part, ignore))

    for res in pool.map(parse_kconfig_files, arglist):
        defined.extend(res[0])

    return sorted(difflib.get_close_matches(symbol, set(defined), 10))


def get_files():
    """Return a list of all files in the current git directory."""
    # use 'git ls-files' to get the worklist
293
    stdout = execute(["git", "ls-files"])
294 295 296 297 298 299 300 301 302 303 304 305 306
    if len(stdout) > 0 and stdout[-1] == "\n":
        stdout = stdout[:-1]

    files = []
    for gitfile in stdout.rsplit("\n"):
        if ".git" in gitfile or "ChangeLog" in gitfile or      \
                ".log" in gitfile or os.path.isdir(gitfile) or \
                gitfile.startswith("tools/"):
            continue
        files.append(gitfile)
    return files


307
def check_symbols(ignore):
308
    """Find undefined Kconfig symbols and return a dict with the symbol as key
309 310
    and a list of referencing files as value.  Files matching %ignore are not
    checked for undefined symbols."""
311 312 313 314 315 316 317 318 319 320 321 322
    pool = Pool(cpu_count(), init_worker)
    try:
        return check_symbols_helper(pool, ignore)
    except KeyboardInterrupt:
        pool.terminate()
        pool.join()
        sys.exit(1)


def check_symbols_helper(pool, ignore):
    """Helper method for check_symbols().  Used to catch keyboard interrupts in
    check_symbols() in order to properly terminate running worker processes."""
323 324
    source_files = []
    kconfig_files = []
325 326
    defined_symbols = []
    referenced_symbols = dict()  # {file: [symbols]}
327

328
    for gitfile in get_files():
329 330 331
        if REGEX_FILE_KCONFIG.match(gitfile):
            kconfig_files.append(gitfile)
        else:
332 333 334
            if ignore and not re.match(ignore, gitfile):
                continue
            # add source files that do not match the ignore pattern
335 336
            source_files.append(gitfile)

337 338 339
    # parse source files
    arglist = partition(source_files, cpu_count())
    for res in pool.map(parse_source_files, arglist):
340
        referenced_symbols.update(res)
341

342 343 344 345 346
    # parse kconfig files
    arglist = []
    for part in partition(kconfig_files, cpu_count()):
        arglist.append((part, ignore))
    for res in pool.map(parse_kconfig_files, arglist):
347 348 349
        defined_symbols.extend(res[0])
        referenced_symbols.update(res[1])
    defined_symbols = set(defined_symbols)
350

351
    # inverse mapping of referenced_symbols to dict(symbol: [files])
352
    inv_map = dict()
353 354 355 356 357 358 359 360
    for _file, symbols in referenced_symbols.items():
        for symbol in symbols:
            inv_map[symbol] = inv_map.get(symbol, set())
            inv_map[symbol].add(_file)
    referenced_symbols = inv_map

    undefined = {}  # {symbol: [files]}
    for symbol in sorted(referenced_symbols):
361
        # filter some false positives
362 363
        if symbol == "FOO" or symbol == "BAR" or \
                symbol == "FOO_BAR" or symbol == "XXX":
364
            continue
365 366
        if symbol not in defined_symbols:
            if symbol.endswith("_MODULE"):
367
                # avoid false positives for kernel modules
368
                if symbol[:-len("_MODULE")] in defined_symbols:
369
                    continue
370 371
            undefined[symbol] = referenced_symbols.get(symbol)
    return undefined, defined_symbols
372 373


374 375 376
def parse_source_files(source_files):
    """Parse each source file in @source_files and return dictionary with source
    files as keys and lists of references Kconfig symbols as values."""
377
    referenced_symbols = dict()
378
    for sfile in source_files:
379 380
        referenced_symbols[sfile] = parse_source_file(sfile)
    return referenced_symbols
381 382 383


def parse_source_file(sfile):
384
    """Parse @sfile and return a list of referenced Kconfig symbols."""
385
    lines = []
386 387 388 389 390
    references = []

    if not os.path.exists(sfile):
        return references

391
    with open(sfile, "r", encoding='utf-8', errors='replace') as stream:
392 393 394
        lines = stream.readlines()

    for line in lines:
395
        if "CONFIG_" not in line:
396
            continue
397 398 399
        symbols = REGEX_SOURCE_SYMBOL.findall(line)
        for symbol in symbols:
            if not REGEX_FILTER_SYMBOLS.search(symbol):
400
                continue
401
            references.append(symbol)
402 403

    return references
404 405


406 407 408
def get_symbols_in_line(line):
    """Return mentioned Kconfig symbols in @line."""
    return REGEX_SYMBOL.findall(line)
409 410


411 412 413 414 415 416
def parse_kconfig_files(args):
    """Parse kconfig files and return tuple of defined and references Kconfig
    symbols.  Note, @args is a tuple of a list of files and the @ignore
    pattern."""
    kconfig_files = args[0]
    ignore = args[1]
417 418
    defined_symbols = []
    referenced_symbols = dict()
419 420 421

    for kfile in kconfig_files:
        defined, references = parse_kconfig_file(kfile)
422
        defined_symbols.extend(defined)
423 424 425
        if ignore and re.match(ignore, kfile):
            # do not collect references for files that match the ignore pattern
            continue
426 427
        referenced_symbols[kfile] = references
    return (defined_symbols, referenced_symbols)
428 429 430


def parse_kconfig_file(kfile):
431
    """Parse @kfile and update symbol definitions and references."""
432
    lines = []
433 434
    defined = []
    references = []
435 436
    skip = False

437 438 439
    if not os.path.exists(kfile):
        return defined, references

440
    with open(kfile, "r", encoding='utf-8', errors='replace') as stream:
441 442 443 444 445
        lines = stream.readlines()

    for i in range(len(lines)):
        line = lines[i]
        line = line.strip('\n')
446
        line = line.split("#")[0]  # ignore comments
447 448

        if REGEX_KCONFIG_DEF.match(line):
449 450
            symbol_def = REGEX_KCONFIG_DEF.findall(line)
            defined.append(symbol_def[0])
451 452 453 454
            skip = False
        elif REGEX_KCONFIG_HELP.match(line):
            skip = True
        elif skip:
455
            # ignore content of help messages
456 457
            pass
        elif REGEX_KCONFIG_STMT.match(line):
458
            line = REGEX_QUOTES.sub("", line)
459
            symbols = get_symbols_in_line(line)
460
            # multi-line statements
461 462 463 464
            while line.endswith("\\"):
                i += 1
                line = lines[i]
                line = line.strip('\n')
465 466 467
                symbols.extend(get_symbols_in_line(line))
            for symbol in set(symbols):
                if REGEX_NUMERIC.match(symbol):
468 469
                    # ignore numeric values
                    continue
470
                references.append(symbol)
471 472

    return defined, references
473 474 475 476


if __name__ == "__main__":
    main()