Index: cfe/trunk/utils/creduce-clang-crash.py =================================================================== --- cfe/trunk/utils/creduce-clang-crash.py +++ cfe/trunk/utils/creduce-clang-crash.py @@ -1,8 +1,14 @@ #!/usr/bin/env python """Calls C-Reduce to create a minimal reproducer for clang crashes. + +Output files: + *.reduced.sh -- crash reproducer with minimal arguments + *.reduced.cpp -- the reduced file + *.test.sh -- interestingness test for C-Reduce """ -from argparse import ArgumentParser +from __future__ import print_function +from argparse import ArgumentParser, RawTextHelpFormatter import os import re import stat @@ -15,10 +21,14 @@ from distutils.spawn import find_executable verbose = False -llvm_bin = None creduce_cmd = None +clang_cmd = None not_cmd = None +def verbose_print(*args, **kwargs): + if verbose: + print(*args, **kwargs) + def check_file(fname): if not os.path.isfile(fname): sys.exit("ERROR: %s does not exist" % (fname)) @@ -33,166 +43,339 @@ cmd = find_executable(cmd_path) if cmd: return cmd - sys.exit("ERROR: executable %s not found" % (cmd_path)) + sys.exit("ERROR: executable `%s` not found" % (cmd_path)) cmd = find_executable(cmd_name, path=cmd_dir) if cmd: return cmd - sys.exit("ERROR: %s not found in %s" % (cmd_name, cmd_dir)) -def quote_cmd(cmd): - return ' '.join(arg if arg.startswith('$') else pipes.quote(arg) - for arg in cmd) + if not cmd_dir: + cmd_dir = "$PATH" + sys.exit("ERROR: `%s` not found in %s" % (cmd_name, cmd_dir)) -def get_crash_cmd(crash_script): - with open(crash_script) as f: - # Assume clang call is on the last line of the script - line = f.readlines()[-1] - cmd = shlex.split(line) - - # Overwrite the script's clang with the user's clang path - new_clang = check_cmd('clang', llvm_bin) - cmd[0] = pipes.quote(new_clang) - return cmd +def quote_cmd(cmd): + return ' '.join(pipes.quote(arg) for arg in cmd) -def has_expected_output(crash_cmd, expected_output): - p = subprocess.Popen(crash_cmd, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT) - crash_output, _ = p.communicate() - return all(msg in crash_output for msg in expected_output) - -def get_expected_output(crash_cmd): - p = subprocess.Popen(crash_cmd, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT) - crash_output, _ = p.communicate() - - # If there is an assertion failure, use that; - # otherwise use the last five stack trace functions - assertion_re = r'Assertion `([^\']+)\' failed' - assertion_match = re.search(assertion_re, crash_output) - if assertion_match: - return [assertion_match.group(1)] - else: - stacktrace_re = r'#[0-9]+\s+0[xX][0-9a-fA-F]+\s*([^(]+)\(' - matches = re.findall(stacktrace_re, crash_output) - return matches[-5:] - -def write_interestingness_test(testfile, crash_cmd, expected_output, - file_to_reduce): - filename = os.path.basename(file_to_reduce) - if filename not in crash_cmd: - sys.exit("ERROR: expected %s to be in the crash command" % filename) - - # Replace all instances of file_to_reduce with a command line variable - output = ['#!/bin/bash', - 'if [ -z "$1" ] ; then', - ' f=%s' % (pipes.quote(filename)), - 'else', - ' f="$1"', - 'fi'] - cmd = ['$f' if s == filename else s for s in crash_cmd] - - output.append('%s --crash %s >& t.log || exit 1' % (pipes.quote(not_cmd), - quote_cmd(cmd))) - - for msg in expected_output: - output.append('grep %s t.log || exit 1' % pipes.quote(msg)) - - with open(testfile, 'w') as f: - f.write('\n'.join(output)) - os.chmod(testfile, os.stat(testfile).st_mode | stat.S_IEXEC) - -def check_interestingness(testfile, file_to_reduce): - testfile = os.path.abspath(testfile) - - # Check that the test considers the original file interesting - with open(os.devnull, 'w') as devnull: - returncode = subprocess.call(testfile, stdout=devnull) - if returncode: - sys.exit("The interestingness test does not pass for the original file.") - - # Check that an empty file is not interesting - _, empty_file = tempfile.mkstemp() - with open(os.devnull, 'w') as devnull: - returncode = subprocess.call([testfile, empty_file], stdout=devnull) - os.remove(empty_file) - if not returncode: - sys.exit("The interestingness test passes for an empty file.") - -def clang_preprocess(file_to_reduce, crash_cmd, expected_output): - _, tmpfile = tempfile.mkstemp() - shutil.copy(file_to_reduce, tmpfile) - - cmd = crash_cmd + ['-E', '-P'] - p = subprocess.Popen(cmd, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT) - preprocessed, _ = p.communicate() - - with open(file_to_reduce, 'w') as f: - f.write(preprocessed) - - if has_expected_output(crash_cmd, expected_output): - if verbose: - print("Successfuly preprocessed with %s" % (quote_cmd(cmd))) - os.remove(tmpfile) - else: - if verbose: - print("Failed to preprocess with %s" % (quote_cmd(cmd))) - shutil.move(tmpfile, file_to_reduce) - - -def filter_args(args, opts_startswith=[]): - result = [arg for arg in args if all(not arg.startswith(a) for a in - opts_startswith)] - return result - -def try_remove_args(cmd, expected_output, msg=None, extra_arg=None, **kwargs): - new_cmd = filter_args(cmd, **kwargs) - if extra_arg and extra_arg not in new_cmd: - new_cmd.append(extra_arg) - if new_cmd != cmd and has_expected_output(new_cmd, expected_output): - if msg and verbose: - print(msg) - return new_cmd - return cmd - -def simplify_crash_cmd(crash_cmd, expected_output): - new_cmd = try_remove_args(crash_cmd, expected_output, - msg="Removed debug info options", - opts_startswith=["-gcodeview", - "-dwarf-column-info", - "-debug-info-kind=", - "-debugger-tuning=", - "-gdwarf"]) - new_cmd = try_remove_args(new_cmd, expected_output, - msg="Replaced -W options with -w", - extra_arg='-w', - opts_startswith=["-W"]) - new_cmd = try_remove_args(new_cmd, expected_output, - msg="Replaced optimization level with -O0", - extra_arg="-O0", - opts_startswith=["-O"]) - return new_cmd +def write_to_script(text, filename): + with open(filename, 'w') as f: + f.write(text) + os.chmod(filename, os.stat(filename).st_mode | stat.S_IEXEC) + +class Reduce(object): + def __init__(self, crash_script, file_to_reduce): + crash_script_name, crash_script_ext = os.path.splitext(crash_script) + file_reduce_name, file_reduce_ext = os.path.splitext(file_to_reduce) + + self.testfile = file_reduce_name + '.test.sh' + self.crash_script = crash_script_name + '.reduced' + crash_script_ext + self.file_to_reduce = file_reduce_name + '.reduced' + file_reduce_ext + shutil.copy(file_to_reduce, self.file_to_reduce) + + self.clang = clang_cmd + self.clang_args = [] + self.expected_output = [] + self.is_crash = True + self.creduce_flags = ["--tidy"] + + self.read_clang_args(crash_script, file_to_reduce) + self.read_expected_output() + + def get_crash_cmd(self, cmd=None, args=None, filename=None): + if not cmd: + cmd = self.clang + if not args: + args = self.clang_args + if not filename: + filename = self.file_to_reduce + + return [cmd] + args + [filename] + + def read_clang_args(self, crash_script, filename): + print("\nReading arguments from crash script...") + with open(crash_script) as f: + # Assume clang call is on the last line of the script + line = f.readlines()[-1] + cmd = shlex.split(line) + + # Remove clang and filename from the command + # Assume the last occurrence of the filename is the clang input file + del cmd[0] + for i in range(len(cmd)-1, -1, -1): + if cmd[i] == filename: + del cmd[i] + break + self.clang_args = cmd + verbose_print("Clang arguments:", quote_cmd(self.clang_args)) + + def read_expected_output(self): + print("\nGetting expected crash output...") + p = subprocess.Popen(self.get_crash_cmd(), + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + crash_output, _ = p.communicate() + result = [] + + # Remove color codes + ansi_escape = r'\x1b\[[0-?]*m' + crash_output = re.sub(ansi_escape, '', crash_output.decode('utf-8')) + + # Look for specific error messages + regexes = [r"Assertion `(.+)' failed", # Linux assert() + r"Assertion failed: (.+),", # FreeBSD/Mac assert() + r"fatal error: backend error: (.+)", + r"LLVM ERROR: (.+)", + r"UNREACHABLE executed (at .+)?!", + r"LLVM IR generation of ceclaration '(.+)'", + r"Generating code for declaration '(.+)'", + r"\*\*\* Bad machine code: (.+) \*\*\*"] + for msg_re in regexes: + match = re.search(msg_re, crash_output) + if match: + msg = match.group(1) + result = [msg] + print("Found message:", msg) + + if "fatal error:" in msg_re: + self.is_crash = False + break + + # If no message was found, use the top five stack trace functions, + # ignoring some common functions + # Five is a somewhat arbitrary number; the goal is to get a small number + # of identifying functions with some leeway for common functions + if not result: + stacktrace_re = r'[0-9]+\s+0[xX][0-9a-fA-F]+\s*([^(]+)\(' + filters = ["PrintStackTraceSignalHandler", + "llvm::sys::RunSignalHandlers", + "SignalHandler", "__restore_rt", "gsignal", "abort"] + matches = re.findall(stacktrace_re, crash_output) + result = [x for x in matches if x and x.strip() not in filters][:5] + for msg in result: + print("Found stack trace function:", msg) + + if not result: + print("ERROR: no crash was found") + print("The crash output was:\n========\n%s========" % crash_output) + sys.exit(1) + + self.expected_output = result + + def check_expected_output(self, args=None, filename=None): + if not args: + args = self.clang_args + if not filename: + filename = self.file_to_reduce + + p = subprocess.Popen(self.get_crash_cmd(args=args, filename=filename), + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + crash_output, _ = p.communicate() + return all(msg in crash_output.decode('utf-8') for msg in + self.expected_output) + + def write_interestingness_test(self): + print("\nCreating the interestingness test...") + + crash_flag = "--crash" if self.is_crash else "" + + output = "#!/bin/bash\n%s %s %s >& t.log || exit 1\n" % \ + (pipes.quote(not_cmd), crash_flag, quote_cmd(self.get_crash_cmd())) + + for msg in self.expected_output: + output += 'grep %s t.log || exit 1\n' % pipes.quote(msg) + + write_to_script(output, self.testfile) + self.check_interestingness() + + def check_interestingness(self): + testfile = os.path.abspath(self.testfile) + + # Check that the test considers the original file interesting + with open(os.devnull, 'w') as devnull: + returncode = subprocess.call(testfile, stdout=devnull) + if returncode: + sys.exit("The interestingness test does not pass for the original file.") + + # Check that an empty file is not interesting + # Instead of modifying the filename in the test file, just run the command + with tempfile.NamedTemporaryFile() as empty_file: + is_interesting = self.check_expected_output(filename=empty_file.name) + if is_interesting: + sys.exit("The interestingness test passes for an empty file.") + + def clang_preprocess(self): + print("\nTrying to preprocess the source file...") + with tempfile.NamedTemporaryFile() as tmpfile: + cmd_preprocess = self.get_crash_cmd() + ['-E', '-o', tmpfile.name] + cmd_preprocess_no_lines = cmd_preprocess + ['-P'] + try: + subprocess.check_call(cmd_preprocess_no_lines) + if self.check_expected_output(filename=tmpfile.name): + print("Successfully preprocessed with line markers removed") + shutil.copy(tmpfile.name, self.file_to_reduce) + else: + subprocess.check_call(cmd_preprocess) + if self.check_expected_output(filename=tmpfile.name): + print("Successfully preprocessed without removing line markers") + shutil.copy(tmpfile.name, self.file_to_reduce) + else: + print("No longer crashes after preprocessing -- " + "using original source") + except subprocess.CalledProcessError: + print("Preprocessing failed") + + @staticmethod + def filter_args(args, opts_equal=[], opts_startswith=[], + opts_one_arg_startswith=[]): + result = [] + skip_next = False + for arg in args: + if skip_next: + skip_next = False + continue + if any(arg == a for a in opts_equal): + continue + if any(arg.startswith(a) for a in opts_startswith): + continue + if any(arg.startswith(a) for a in opts_one_arg_startswith): + skip_next = True + continue + result.append(arg) + return result + + def try_remove_args(self, args, msg=None, extra_arg=None, **kwargs): + new_args = self.filter_args(args, **kwargs) + + if extra_arg: + if extra_arg in new_args: + new_args.remove(extra_arg) + new_args.append(extra_arg) + + if (new_args != args and + self.check_expected_output(args=new_args)): + if msg: + verbose_print(msg) + return new_args + return args + + def try_remove_arg_by_index(self, args, index): + new_args = args[:index] + args[index+1:] + removed_arg = args[index] + + # Heuristic for grouping arguments: + # remove next argument if it doesn't start with "-" + if index < len(new_args) and not new_args[index].startswith('-'): + del new_args[index] + removed_arg += ' ' + args[index+1] + + if self.check_expected_output(args=new_args): + verbose_print("Removed", removed_arg) + return new_args, index + return args, index+1 + + def simplify_clang_args(self): + """Simplify clang arguments before running C-Reduce to reduce the time the + interestingness test takes to run. + """ + print("\nSimplifying the clang command...") + + # Remove some clang arguments to speed up the interestingness test + new_args = self.clang_args + new_args = self.try_remove_args(new_args, + msg="Removed debug info options", + opts_startswith=["-gcodeview", + "-debug-info-kind=", + "-debugger-tuning="]) + # Not suppressing warnings (-w) sometimes prevents the crash from occurring + # after preprocessing + new_args = self.try_remove_args(new_args, + msg="Replaced -W options with -w", + extra_arg='-w', + opts_startswith=["-W"]) + new_args = self.try_remove_args(new_args, + msg="Replaced optimization level with -O0", + extra_arg="-O0", + opts_startswith=["-O"]) + + # Try to remove compilation steps + new_args = self.try_remove_args(new_args, msg="Added -emit-llvm", + extra_arg="-emit-llvm") + new_args = self.try_remove_args(new_args, msg="Added -fsyntax-only", + extra_arg="-fsyntax-only") + + # Try to make implicit int an error for more sensible test output + new_args = self.try_remove_args(new_args, msg="Added -Werror=implicit-int", + opts_equal=["-w"], + extra_arg="-Werror=implicit-int") + + self.clang_args = new_args + verbose_print("Simplified command:", quote_cmd(self.get_crash_cmd())) + + def reduce_clang_args(self): + """Minimize the clang arguments after running C-Reduce, to get the smallest + command that reproduces the crash on the reduced file. + """ + print("\nReducing the clang crash command...") + + new_args = self.clang_args + + # Remove some often occurring args + new_args = self.try_remove_args(new_args, msg="Removed -D options", + opts_startswith=["-D"]) + new_args = self.try_remove_args(new_args, msg="Removed -D options", + opts_one_arg_startswith=["-D"]) + new_args = self.try_remove_args(new_args, msg="Removed -I options", + opts_startswith=["-I"]) + new_args = self.try_remove_args(new_args, msg="Removed -I options", + opts_one_arg_startswith=["-I"]) + new_args = self.try_remove_args(new_args, msg="Removed -W options", + opts_startswith=["-W"]) + + # Remove other cases that aren't covered by the heuristic + new_args = self.try_remove_args(new_args, msg="Removed -mllvm", + opts_one_arg_startswith=["-mllvm"]) + + i = 0 + while i < len(new_args): + new_args, i = self.try_remove_arg_by_index(new_args, i) + + self.clang_args = new_args + + reduced_cmd = quote_cmd(self.get_crash_cmd()) + write_to_script(reduced_cmd, self.crash_script) + print("Reduced command:", reduced_cmd) + + def run_creduce(self): + print("\nRunning C-Reduce...") + try: + p = subprocess.Popen([creduce_cmd] + self.creduce_flags + + [self.testfile, self.file_to_reduce]) + p.communicate() + except KeyboardInterrupt: + # Hack to kill C-Reduce because it jumps into its own pgid + print('\n\nctrl-c detected, killed creduce') + p.kill() def main(): global verbose - global llvm_bin global creduce_cmd + global clang_cmd global not_cmd - parser = ArgumentParser(description=__doc__) + parser = ArgumentParser(description=__doc__, + formatter_class=RawTextHelpFormatter) parser.add_argument('crash_script', type=str, nargs=1, help="Name of the script that generates the crash.") parser.add_argument('file_to_reduce', type=str, nargs=1, help="Name of the file to be reduced.") parser.add_argument('--llvm-bin', dest='llvm_bin', type=str, - required=True, help="Path to the LLVM bin directory.") + help="Path to the LLVM bin directory.") parser.add_argument('--llvm-not', dest='llvm_not', type=str, help="The path to the `not` executable. " "By default uses the llvm-bin directory.") + parser.add_argument('--clang', dest='clang', type=str, + help="The path to the `clang` executable. " + "By default uses the llvm-bin directory.") parser.add_argument('--creduce', dest='creduce', type=str, help="The path to the `creduce` executable. " "Required if `creduce` is not in PATH environment.") @@ -200,41 +383,21 @@ args = parser.parse_args() verbose = args.verbose - llvm_bin = os.path.abspath(args.llvm_bin) + llvm_bin = os.path.abspath(args.llvm_bin) if args.llvm_bin else None creduce_cmd = check_cmd('creduce', None, args.creduce) + clang_cmd = check_cmd('clang', llvm_bin, args.clang) not_cmd = check_cmd('not', llvm_bin, args.llvm_not) + crash_script = check_file(args.crash_script[0]) file_to_reduce = check_file(args.file_to_reduce[0]) - print("\nParsing the crash script and getting expected output...") - crash_cmd = get_crash_cmd(crash_script) - - expected_output = get_expected_output(crash_cmd) - if len(expected_output) < 1: - sys.exit("ERROR: no crash was found") - - print("\nSimplifying the crash command...") - crash_cmd = simplify_crash_cmd(crash_cmd, expected_output) - - print("\nWriting interestingness test to file...") - testfile = os.path.splitext(file_to_reduce)[0] + '.test.sh' - write_interestingness_test(testfile, crash_cmd, expected_output, - file_to_reduce) - check_interestingness(testfile, file_to_reduce) - - print("\nPreprocessing the file to reduce...") - clang_preprocess(file_to_reduce, crash_cmd, expected_output) - - print("\nRunning C-Reduce...") - try: - p = subprocess.Popen([creduce_cmd, testfile, file_to_reduce]) - p.communicate() - except KeyboardInterrupt: - # Hack to kill C-Reduce because it jumps into its own pgid - print('\n\nctrl-c detected, killed creduce') - p.kill() + r = Reduce(crash_script, file_to_reduce) - # FIXME: reduce the clang crash command + r.simplify_clang_args() + r.write_interestingness_test() + r.clang_preprocess() + r.run_creduce() + r.reduce_clang_args() if __name__ == '__main__': main()