Index: utils/UpdateTestChecks/asm.py =================================================================== --- utils/UpdateTestChecks/asm.py +++ utils/UpdateTestChecks/asm.py @@ -1,10 +1,18 @@ import re -import string +import sys from . import common +if sys.version_info[0] > 2: + class string: + expandtabs = str.expandtabs +else: + import string + # RegEx: this is where the magic happens. +##### Assembly parser + ASM_FUNCTION_X86_RE = re.compile( r'^_?(?P[^:]+):[ \t]*#+[ \t]*@(?P=func)\n[^:]*?' r'(?P^##?[ \t]+[^:]+:.*?)\s*' @@ -197,3 +205,29 @@ common.build_function_body_dictionary( function_re, scrubber, [args], raw_tool_output, prefixes, func_dict, args.verbose) + +##### Generator of assembly CHECK lines + +def add_asm_checks(output_lines, comment_marker, run_list, func_dict, func_name): + printed_prefixes = [] + for p in run_list: + checkprefixes = p[0] + for checkprefix in checkprefixes: + if checkprefix in printed_prefixes: + break + # TODO func_dict[checkprefix] may be None, '' or not exist. + # Fix the call sites. + if func_name not in func_dict[checkprefix] or not func_dict[checkprefix][func_name]: + continue + # Add some space between different check prefixes. + if len(printed_prefixes) != 0: + output_lines.append(comment_marker) + printed_prefixes.append(checkprefix) + output_lines.append('%s %s-LABEL: %s:' % (comment_marker, checkprefix, func_name)) + func_body = func_dict[checkprefix][func_name].splitlines() + output_lines.append('%s %s: %s' % (comment_marker, checkprefix, func_body[0])) + for func_line in func_body[1:]: + output_lines.append('%s %s-NEXT: %s' % (comment_marker, checkprefix, func_line)) + # Add space between different check prefixes and the first line of code. + # output_lines.append(';') + break Index: utils/UpdateTestChecks/common.py =================================================================== --- utils/UpdateTestChecks/common.py +++ utils/UpdateTestChecks/common.py @@ -1,22 +1,16 @@ from __future__ import print_function import re +import string import subprocess import sys -RUN_LINE_RE = re.compile('^\s*;\s*RUN:\s*(.*)$') -CHECK_PREFIX_RE = re.compile('--?check-prefix(?:es)?=(\S+)') -CHECK_RE = re.compile(r'^\s*;\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:') +if sys.version_info[0] > 2: + class string: + expandtabs = str.expandtabs +else: + import string -IR_FUNCTION_RE = re.compile('^\s*define\s+(?:internal\s+)?[^@]*@(\w+)\s*\(') -TRIPLE_IR_RE = re.compile(r'^target\s+triple\s*=\s*"([^"]+)"$') -TRIPLE_ARG_RE = re.compile(r'-mtriple=([^ ]+)') - -SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)') -SCRUB_WHITESPACE_RE = re.compile(r'(?!^(| \w))[ \t]+', flags=re.M) -SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M) -SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n') -SCRUB_LOOP_COMMENT_RE = re.compile( - r'# =>This Inner Loop Header:.*|# in Loop:.*', flags=re.M) +##### Common utilities for update_*test_checks.py def should_add_line_to_output(input_line, prefix_set): # Skip any blank comment lines in the IR. @@ -42,6 +36,38 @@ # Fix line endings to unix CR style. return stdout.replace('\r\n', '\n') +##### LLVM IR parser + +RUN_LINE_RE = re.compile('^\s*;\s*RUN:\s*(.*)$') +CHECK_PREFIX_RE = re.compile('--?check-prefix(?:es)?=(\S+)') +CHECK_RE = re.compile(r'^\s*;\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:') + +OPT_FUNCTION_RE = re.compile( + r'^\s*define\s+(?:internal\s+)?[^@]*@(?P[\w-]+?)\s*\(' + r'(\s+)?[^)]*[^{]*\{\n(?P.*?)^\}$', + flags=(re.M | re.S)) + +IR_FUNCTION_RE = re.compile('^\s*define\s+(?:internal\s+)?[^@]*@(\w+)\s*\(') +TRIPLE_IR_RE = re.compile(r'^target\s+triple\s*=\s*"([^"]+)"$') +TRIPLE_ARG_RE = re.compile(r'-mtriple=([^ ]+)') + +SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)') +SCRUB_WHITESPACE_RE = re.compile(r'(?!^(| \w))[ \t]+', flags=re.M) +SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M) +SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n') +SCRUB_LOOP_COMMENT_RE = re.compile( + r'# =>This Inner Loop Header:.*|# in Loop:.*', flags=re.M) + +def scrub_body(body): + # Scrub runs of whitespace out of the assembly, but leave the leading + # whitespace in place. + body = SCRUB_WHITESPACE_RE.sub(r' ', body) + # Expand the tabs used for indentation. + body = string.expandtabs(body, 2) + # Strip trailing whitespace. + body = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', body) + return body + # Build up a dictionary of all the function bodies. def build_function_body_dictionary(function_re, scrubber, scrubber_args, raw_tool_output, prefixes, func_dict, verbose): for m in function_re.finditer(raw_tool_output): @@ -66,3 +92,114 @@ continue func_dict[prefix][func] = scrubbed_body + +##### Generator of LLVM IR CHECK lines + +SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*') + +# Match things that look at identifiers, but only if they are followed by +# spaces, commas, paren, or end of the string +IR_VALUE_RE = re.compile(r'(\s+)%([\w\.]+?)([,\s\(\)]|\Z)') + +# Create a FileCheck variable name based on an IR name. +def get_value_name(var): + if var.isdigit(): + var = 'TMP' + var + var = var.replace('.', '_') + return var.upper() + + +# Create a FileCheck variable from regex. +def get_value_definition(var): + return '[[' + get_value_name(var) + ':%.*]]' + + +# Use a FileCheck variable. +def get_value_use(var): + return '[[' + get_value_name(var) + ']]' + +# Replace IR value defs and uses with FileCheck variables. +def genericize_check_lines(lines): + # This gets called for each match that occurs in + # a line. We transform variables we haven't seen + # into defs, and variables we have seen into uses. + def transform_line_vars(match): + var = match.group(2) + if var in vars_seen: + rv = get_value_use(var) + else: + vars_seen.add(var) + rv = get_value_definition(var) + # re.sub replaces the entire regex match + # with whatever you return, so we have + # to make sure to hand it back everything + # including the commas and spaces. + return match.group(1) + rv + match.group(3) + + vars_seen = set() + lines_with_def = [] + + for i, line in enumerate(lines): + # An IR variable named '%.' matches the FileCheck regex string. + line = line.replace('%.', '%dot') + # Ignore any comments, since the check lines will too. + scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line) + lines[i] = IR_VALUE_RE.sub(transform_line_vars, scrubbed_line) + return lines + + +def add_ir_checks(output_lines, prefix_list, func_dict, func_name, opt_basename): + # Label format is based on IR string. + check_label_format = "; %s-LABEL: @%s(" + + printed_prefixes = [] + for checkprefixes, _ in prefix_list: + for checkprefix in checkprefixes: + if checkprefix in printed_prefixes: + break + if not func_dict[checkprefix][func_name]: + continue + # Add some space between different check prefixes, but not after the last + # check line (before the test code). + #if len(printed_prefixes) != 0: + # output_lines.append(';') + printed_prefixes.append(checkprefix) + output_lines.append(check_label_format % (checkprefix, func_name)) + func_body = func_dict[checkprefix][func_name].splitlines() + + # For IR output, change all defs to FileCheck variables, so we're immune + # to variable naming fashions. + func_body = genericize_check_lines(func_body) + + # This could be selectively enabled with an optional invocation argument. + # Disabled for now: better to check everything. Be safe rather than sorry. + + # Handle the first line of the function body as a special case because + # it's often just noise (a useless asm comment or entry label). + #if func_body[0].startswith("#") or func_body[0].startswith("entry:"): + # is_blank_line = True + #else: + # output_lines.append('; %s: %s' % (checkprefix, func_body[0])) + # is_blank_line = False + + is_blank_line = False + + for func_line in func_body: + if func_line.strip() == '': + is_blank_line = True + continue + # Do not waste time checking IR comments. + func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line) + + # Skip blank lines instead of checking them. + if is_blank_line == True: + output_lines.append('; %s: %s' % (checkprefix, func_line)) + else: + output_lines.append('; %s-NEXT: %s' % (checkprefix, func_line)) + is_blank_line = False + + # Add space between different check prefixes and also before the first + # line of code in the test function. + output_lines.append(';') + break + return output_lines Index: utils/update_cc_test_checks.py =================================================================== --- /dev/null +++ utils/update_cc_test_checks.py @@ -0,0 +1,205 @@ +#!/usr/bin/env python3 +'''A utility to update LLVM IR or assembly CHECK lines in C/C++ FileCheck test files. + +Example RUN lines in .c/.cc test files: + +// RUN: %clang -S %s -o - -O2 | FileCheck %s +// RUN: %clang -emit-llvm -S %s -o - -O2 | FileCheck %s + +Usage: + +% utils/update_cc_test_checks.py --llvm-bin=release/bin test/a.cc +% utils/update_cc_test_checks.py --c-index-test-exe=release/bin/c-index-test \ + --clang=release/bin/clang /tmp/c/a.cc +''' + +import argparse +import distutils.spawn +import os # Used to advertise this file's name ("autogenerated_note"). +import shlex +import string +import subprocess +import sys +import re +import tempfile + +from UpdateTestChecks import asm, common + +ADVERT = '// NOTE: Assertions have been autogenerated by ' + +CHECK_RE = re.compile(r'^\s*//\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:') +RUN_LINE_RE = re.compile('^//\s*RUN:\s*(.*)$') + +def get_line2spell_and_mangled(args, filename): + ret = {} + with tempfile.NamedTemporaryFile() as f: + # TODO Make c-index-test print mangled names without circumventing through precompiled headers + subprocess.check_call([args.c_index_test_exe, + '-write-pch', f.name, filename] + args.c_index_test_args) + output = subprocess.check_output([args.c_index_test_exe, + '-test-print-mangle', f.name]) + if sys.version_info[0] > 2: + output = output.decode() + + RE = re.compile(r'^FunctionDecl=(\w+):(\d+):\d+ \(Definition\) \[mangled=([^]]+)\]') + for line in output.splitlines(): + m = RE.match(line) + if not m: continue + spell, line, mangled = m.groups() + ret[int(line)-1] = (spell, mangled) + if args.verbose: + for line, func_name in ret.items(): + print('line {}: found function {}'.format(line+1, func_name), file=sys.stderr) + return ret + + +def main(): + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawTextHelpFormatter) + parser.add_argument('-v', '--verbose', action='store_true') + parser.add_argument('--llvm-bin', help='llvm $prefix/bin path') + parser.add_argument('--clang-exe', + help='"clang" executable, defaults to $llvm_bin/clang') + parser.add_argument('--c-index-test-exe', + help='"c-index-test" executable, defaults to $llvm_bin/c-index-test') + parser.add_argument('--c-index-test-args', + help='Space-separated extra args to c-index-test, e.g. -resource-dir clang/7.0.0') + parser.add_argument( + '--functions', nargs='+', help='A list of function name regexes. ' + 'If specified, update CHECK lines for functions matching at least one regex') + parser.add_argument( + '--x86_extra_scrub', action='store_true', + help='Use more regex for x86 matching to reduce diffs between various subtargets') + parser.add_argument('tests', nargs='+') + args = parser.parse_args() + args.c_index_test_args = shlex.split(args.c_index_test_args or '') + + if args.clang_exe is None: + if args.llvm_bin is None: + args.clang_exe = 'clang' + else: + args.clang_exe = os.path.join(args.llvm_bin, 'clang') + if not distutils.spawn.find_executable(args.clang_exe): + print('Please specify --llvm-bin or --clang-exe', file=sys.stderr) + return 1 + if args.c_index_test_exe is None: + if args.llvm_bin is None: + args.c_index_test_exe = 'c-index-test' + else: + args.c_index_test_exe = os.path.join(args.llvm_bin, 'c-index-test') + if not distutils.spawn.find_executable(args.c_index_test_exe): + print('Please specify --llvm-bin or --c-index-test-exe', file=sys.stderr) + return 1 + + autogenerated_note = (ADVERT + 'utils/' + os.path.basename(__file__)) + + for test in args.tests: + with open(test) as f: + input_lines = [l.rstrip() for l in f] + + # Parse RUN lines and get `prefix_set`. + raw_lines = [m.group(1) + for m in [RUN_LINE_RE.match(l) for l in input_lines] if m] + run_lines = [raw_lines[0]] if len(raw_lines) > 0 else [] + for l in raw_lines[1:]: + if run_lines[-1].endswith("\\"): + run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l + else: + run_lines.append(l) + + if args.verbose: + print('Found {} RUN lines:'.format(len(run_lines)), file=sys.stderr) + for l in run_lines: + print(' RUN: ' + l, file=sys.stderr) + + # Build a list of clang command lines from RUN lines. + run_list = [] + for l in run_lines: + commands = [cmd.strip() for cmd in l.split('|', 1)] + clang_cmd = commands[0] + + triple_in_cmd = None + m = common.TRIPLE_ARG_RE.search(clang_cmd) + if m: + triple_in_cmd = m.groups()[0] + + filecheck_cmd = commands[-1] + if clang_cmd.startswith('%clang '): + clang_args = clang_cmd[len('%clang '):].replace('%s', test).strip() + elif clang_cmd.startswith('%clang_cc1 '): + clang_args = clang_cmd.replace('%clang_cc1 ', '-cc1 ').replace('%s', test).strip() + else: + print('WARNING: Skipping non-clang RUN line: ' + l, file=sys.stderr) + continue + + if not filecheck_cmd.startswith('FileCheck '): + print('WARNING: Skipping non-FileChecked RUN line: ' + l, file=sys.stderr) + continue + + check_prefixes = [item for m in common.CHECK_PREFIX_RE.finditer(filecheck_cmd) + for item in m.group(1).split(',')] + if not check_prefixes: + check_prefixes = ['CHECK'] + run_list.append((check_prefixes, clang_args, triple_in_cmd)) + + # Execute clang, generate assembly, and extract functions. + func_dict = {} + for p in run_list: + prefixes = p[0] + for prefix in prefixes: + func_dict.update({prefix: dict()}) + for prefixes, clang_args, triple_in_cmd in run_list: + if args.verbose: + print('Extracted clang cmd: clang {}'.format(clang_args), file=sys.stderr) + print('Extracted FileCheck prefixes: {}'.format(prefixes), file=sys.stderr) + + raw_tool_output = common.invoke_tool(args.clang_exe, + clang_args, + test) + + if '-emit-llvm' in clang_args: + common.build_function_body_dictionary( + common.OPT_FUNCTION_RE, common.scrub_body, [], + raw_tool_output, prefixes, func_dict, args.verbose) + else: + asm.build_function_body_dictionary_for_triple(args, raw_tool_output, + triple_in_cmd or 'x86', prefixes, func_dict) + + # Strip CHECK lines which are in `prefix_set`, update test file. + prefix_set = set([prefix for p in run_list for prefix in p[0]]) + input_lines = [] + with open(test, 'r+') as f: + for line in f: + m = CHECK_RE.match(line) + if not (m and m.group(1) in prefix_set) and line != '//\n': + input_lines.append(line) + f.seek(0) + f.writelines(input_lines) + f.truncate() + + # Invoke c-index-test to get mapping from start lines to mangled names. + line2spell_and_mangled = get_line2spell_and_mangled(args, test) + output_lines = [autogenerated_note] + for idx, line in enumerate(input_lines): + # Discard any previous script advertising. + if line.startswith(ADVERT): + continue + if idx in line2spell_and_mangled: + spell, mangled = line2spell_and_mangled[idx] + # If --functions is unspecified or one regex matches the spelling name. + if args.functions is None or any(re.search(regex, spell) + for regex in args.functions): + asm.add_asm_checks(output_lines, '//', run_list, func_dict, mangled) + output_lines.append(line.rstrip('\n')) + + # Update the test file. + with open(test, 'w') as f: + for line in output_lines: + f.write(line + '\n') + + return 0 + + +if __name__ == '__main__': + sys.exit(main()) Index: utils/update_llc_test_checks.py =================================================================== --- utils/update_llc_test_checks.py +++ utils/update_llc_test_checks.py @@ -19,30 +19,6 @@ ADVERT = '; NOTE: Assertions have been autogenerated by ' -def add_checks(output_lines, run_list, func_dict, func_name): - printed_prefixes = [] - for p in run_list: - checkprefixes = p[0] - for checkprefix in checkprefixes: - if checkprefix in printed_prefixes: - break - if not func_dict[checkprefix][func_name]: - continue - # Add some space between different check prefixes. - if len(printed_prefixes) != 0: - output_lines.append(';') - printed_prefixes.append(checkprefix) - output_lines.append('; %s-LABEL: %s:' % (checkprefix, func_name)) - func_body = func_dict[checkprefix][func_name].splitlines() - output_lines.append('; %s: %s' % (checkprefix, func_body[0])) - for func_line in func_body[1:]: - output_lines.append('; %s-NEXT: %s' % (checkprefix, func_line)) - # Add space between different check prefixes and the first line of code. - # output_lines.append(';') - break - return output_lines - - def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-v', '--verbose', action='store_true', @@ -156,7 +132,7 @@ continue # Print out the various check lines here. - output_lines = add_checks(output_lines, run_list, func_dict, func_name) + asm.add_asm_checks(output_lines, ';', run_list, func_dict, func_name) is_in_function_start = False if is_in_function: Index: utils/update_test_checks.py =================================================================== --- utils/update_test_checks.py +++ utils/update_test_checks.py @@ -44,133 +44,10 @@ # RegEx: this is where the magic happens. -SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*') - IR_FUNCTION_RE = re.compile('^\s*define\s+(?:internal\s+)?[^@]*@([\w-]+)\s*\(') -OPT_FUNCTION_RE = re.compile( - r'^\s*define\s+(?:internal\s+)?[^@]*@(?P[\w-]+?)\s*\(' - r'(\s+)?[^)]*[^{]*\{\n(?P.*?)^\}$', - flags=(re.M | re.S)) -# Match things that look at identifiers, but only if they are followed by -# spaces, commas, paren, or end of the string -IR_VALUE_RE = re.compile(r'(\s+)%([\w\.]+?)([,\s\(\)]|\Z)') -def scrub_body(body, opt_basename): - # Scrub runs of whitespace out of the assembly, but leave the leading - # whitespace in place. - body = common.SCRUB_WHITESPACE_RE.sub(r' ', body) - # Expand the tabs used for indentation. - body = string.expandtabs(body, 2) - # Strip trailing whitespace. - body = common.SCRUB_TRAILING_WHITESPACE_RE.sub(r'', body) - return body - - - -# Create a FileCheck variable name based on an IR name. -def get_value_name(var): - if var.isdigit(): - var = 'TMP' + var - var = var.replace('.', '_') - return var.upper() - - -# Create a FileCheck variable from regex. -def get_value_definition(var): - return '[[' + get_value_name(var) + ':%.*]]' - - -# Use a FileCheck variable. -def get_value_use(var): - return '[[' + get_value_name(var) + ']]' - -# Replace IR value defs and uses with FileCheck variables. -def genericize_check_lines(lines): - # This gets called for each match that occurs in - # a line. We transform variables we haven't seen - # into defs, and variables we have seen into uses. - def transform_line_vars(match): - var = match.group(2) - if var in vars_seen: - rv = get_value_use(var) - else: - vars_seen.add(var) - rv = get_value_definition(var) - # re.sub replaces the entire regex match - # with whatever you return, so we have - # to make sure to hand it back everything - # including the commas and spaces. - return match.group(1) + rv + match.group(3) - - vars_seen = set() - lines_with_def = [] - - for i, line in enumerate(lines): - # An IR variable named '%.' matches the FileCheck regex string. - line = line.replace('%.', '%dot') - # Ignore any comments, since the check lines will too. - scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line) - lines[i] = IR_VALUE_RE.sub(transform_line_vars, scrubbed_line) - return lines - - -def add_checks(output_lines, prefix_list, func_dict, func_name, opt_basename): - # Label format is based on IR string. - check_label_format = "; %s-LABEL: @%s(" - - printed_prefixes = [] - for checkprefixes, _ in prefix_list: - for checkprefix in checkprefixes: - if checkprefix in printed_prefixes: - break - if not func_dict[checkprefix][func_name]: - continue - # Add some space between different check prefixes, but not after the last - # check line (before the test code). - #if len(printed_prefixes) != 0: - # output_lines.append(';') - printed_prefixes.append(checkprefix) - output_lines.append(check_label_format % (checkprefix, func_name)) - func_body = func_dict[checkprefix][func_name].splitlines() - - # For IR output, change all defs to FileCheck variables, so we're immune - # to variable naming fashions. - func_body = genericize_check_lines(func_body) - - # This could be selectively enabled with an optional invocation argument. - # Disabled for now: better to check everything. Be safe rather than sorry. - - # Handle the first line of the function body as a special case because - # it's often just noise (a useless asm comment or entry label). - #if func_body[0].startswith("#") or func_body[0].startswith("entry:"): - # is_blank_line = True - #else: - # output_lines.append('; %s: %s' % (checkprefix, func_body[0])) - # is_blank_line = False - - is_blank_line = False - - for func_line in func_body: - if func_line.strip() == '': - is_blank_line = True - continue - # Do not waste time checking IR comments. - func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line) - - # Skip blank lines instead of checking them. - if is_blank_line == True: - output_lines.append('; %s: %s' % (checkprefix, func_line)) - else: - output_lines.append('; %s-NEXT: %s' % (checkprefix, func_line)) - is_blank_line = False - - # Add space between different check prefixes and also before the first - # line of code in the test function. - output_lines.append(';') - break - return output_lines def main(): @@ -247,8 +124,8 @@ raw_tool_output = common.invoke_tool(args.opt_binary, opt_args, test) common.build_function_body_dictionary( - OPT_FUNCTION_RE, scrub_body, [opt_basename], raw_tool_output, - prefixes, func_dict, args.verbose) + common.OPT_FUNCTION_RE, common.scrub_body, [], + raw_tool_output, prefixes, func_dict, args.verbose) is_in_function = False is_in_function_start = False @@ -269,7 +146,7 @@ continue # Print out the various check lines here. - output_lines = add_checks(output_lines, prefix_list, func_dict, func_name, opt_basename) + output_lines = common.add_ir_checks(output_lines, prefix_list, func_dict, func_name, opt_basename) is_in_function_start = False if is_in_function: