Index: cfe/trunk/utils/perf-training/CMakeLists.txt =================================================================== --- cfe/trunk/utils/perf-training/CMakeLists.txt +++ cfe/trunk/utils/perf-training/CMakeLists.txt @@ -1,24 +1,24 @@ -if(LLVM_BUILD_INSTRUMENTED) - if (CMAKE_CFG_INTDIR STREQUAL ".") - set(LLVM_BUILD_MODE ".") - else () - set(LLVM_BUILD_MODE "%(build_mode)s") - endif () +if (CMAKE_CFG_INTDIR STREQUAL ".") + set(LLVM_BUILD_MODE ".") +else () + set(LLVM_BUILD_MODE "%(build_mode)s") +endif () - string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} CLANG_TOOLS_DIR ${LLVM_RUNTIME_OUTPUT_INTDIR}) +string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} CLANG_TOOLS_DIR ${LLVM_RUNTIME_OUTPUT_INTDIR}) +if(LLVM_BUILD_INSTRUMENTED) configure_lit_site_cfg( ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in - ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg + ${CMAKE_CURRENT_BINARY_DIR}/pgo-data/lit.site.cfg ) add_lit_testsuite(generate-profraw "Generating clang PGO data" - ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_CURRENT_BINARY_DIR}/pgo-data/ DEPENDS clang clear-profraw ) add_custom_target(clear-profraw - COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} + COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} profraw COMMENT "Clearing old profraw data") if(NOT LLVM_PROFDATA) @@ -34,3 +34,26 @@ COMMENT "Merging profdata" DEPENDS generate-profraw) endif() + +find_program(DTRACE dtrace) +if(DTRACE) + configure_lit_site_cfg( + ${CMAKE_CURRENT_SOURCE_DIR}/order-files.lit.site.cfg.in + ${CMAKE_CURRENT_BINARY_DIR}/order-files/lit.site.cfg + ) + + add_lit_testsuite(generate-dtrace-logs "Generating clang dtrace data" + ${CMAKE_CURRENT_BINARY_DIR}/order-files/ + DEPENDS clang clear-dtrace-logs + ) + + add_custom_target(clear-dtrace-logs + COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} dtrace + COMMENT "Clearing old dtrace data") + + + add_custom_target(generate-order-file + COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py gen-order-file --binary $ --output ${CMAKE_CURRENT_BINARY_DIR}/clang.order ${CMAKE_CURRENT_BINARY_DIR} + COMMENT "Generating order file" + DEPENDS generate-dtrace-logs) +endif() Index: cfe/trunk/utils/perf-training/order-files.lit.cfg =================================================================== --- cfe/trunk/utils/perf-training/order-files.lit.cfg +++ cfe/trunk/utils/perf-training/order-files.lit.cfg @@ -0,0 +1,39 @@ +# -*- Python -*- + +from lit import Test +import lit.formats +import lit.util +import os + +def getSysrootFlagsOnDarwin(config, lit_config): + # On Darwin, support relocatable SDKs by providing Clang with a + # default system root path. + if 'darwin' in config.target_triple: + try: + out = lit.util.capture(['xcrun', '--show-sdk-path']).strip() + res = 0 + except OSError: + res = -1 + if res == 0 and out: + sdk_path = out + lit_config.note('using SDKROOT: %r' % sdk_path) + return '-isysroot %s' % sdk_path + return '' + +sysroot_flags = getSysrootFlagsOnDarwin(config, lit_config) + +config.clang = os.path.realpath(lit.util.which('clang', config.clang_tools_dir)).replace('\\', '/') + +config.name = 'Clang Perf Training' +config.suffixes = ['.c', '.cpp', '.m', '.mm', '.cu', '.ll', '.cl', '.s', '.S', '.modulemap'] + +dtrace_wrapper = '%s %s/perf-helper.py dtrace' % (config.python_exe, config.test_source_root) + +use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL") +config.test_format = lit.formats.ShTest(use_lit_shell == "0") +config.substitutions.append( ('%clang_cpp', ' %s %s --driver-mode=cpp %s ' % (dtrace_wrapper, config.clang, sysroot_flags))) +config.substitutions.append( ('%clang_cc1', ' %s %s -cc1 %s ' % (dtrace_wrapper, config.clang, sysroot_flags))) +config.substitutions.append( ('%clang', ' %s %s %s ' % (dtrace_wrapper, config.clang, sysroot_flags) ) ) +config.substitutions.append( ('%test_root', config.test_exec_root ) ) + + Index: cfe/trunk/utils/perf-training/order-files.lit.site.cfg.in =================================================================== --- cfe/trunk/utils/perf-training/order-files.lit.site.cfg.in +++ cfe/trunk/utils/perf-training/order-files.lit.site.cfg.in @@ -0,0 +1,21 @@ +import sys + +## Autogenerated by LLVM/Clang configuration. +# Do not edit! +config.clang_tools_dir = "@CLANG_TOOLS_DIR@" +config.test_exec_root = "@CMAKE_CURRENT_BINARY_DIR@" +config.test_source_root = "@CMAKE_CURRENT_SOURCE_DIR@" +config.target_triple = "@TARGET_TRIPLE@" +config.python_exe = "@PYTHON_EXECUTABLE@" + +# Support substitution of the tools and libs dirs with user parameters. This is +# used when we can't determine the tool dir at configuration time. +try: + config.clang_tools_dir = config.clang_tools_dir % lit_config.params +except KeyError: + e = sys.exc_info()[1] + key, = e.args + lit_config.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key,key)) + +# Let the main config do the real work. +lit_config.load_config(config, "@CLANG_SOURCE_DIR@/utils/perf-training/order-files.lit.cfg") Index: cfe/trunk/utils/perf-training/perf-helper.py =================================================================== --- cfe/trunk/utils/perf-training/perf-helper.py +++ cfe/trunk/utils/perf-training/perf-helper.py @@ -10,33 +10,336 @@ import sys import os import subprocess +import argparse +import time +import bisect -def findProfrawFiles(path): - profraw_files = [] +def findFilesWithExtension(path, extension): + filenames = [] for root, dirs, files in os.walk(path): for filename in files: - if filename.endswith(".profraw"): - profraw_files.append(os.path.join(root, filename)) - return profraw_files + if filename.endswith(extension): + filenames.append(os.path.join(root, filename)) + return filenames def clean(args): - if len(args) != 1: - print 'Usage: %s clean \n\tRemoves all *.profraw files from .' % __file__ + if len(args) != 2: + print 'Usage: %s clean ' % __file__ + print '\tRemoves all files with extension from .' return 1 - for profraw in findProfrawFiles(args[0]): - os.remove(profraw) + for filename in findFilesWithExtension(args[0], args[1]): + os.remove(filename) return 0 def merge(args): if len(args) != 3: - print 'Usage: %s clean \n\tMerges all profraw files from path into output.' % __file__ + print 'Usage: %s clean \n' % __file__ + print '\tMerges all profraw files from path into output.' return 1 cmd = [args[0], 'merge', '-o', args[1]] - cmd.extend(findProfrawFiles(args[2])) + cmd.extend(findFilesWithExtension(args[2], "profraw")) subprocess.check_call(cmd) return 0 -commands = {'clean' : clean, 'merge' : merge} +def dtrace(args): + parser = argparse.ArgumentParser(prog='perf-helper dtrace', + description='dtrace wrapper for order file generation') + parser.add_argument('--buffer-size', metavar='size', type=int, required=False, + default=1, help='dtrace buffer size in MB (default 1)') + parser.add_argument('--use-oneshot', required=False, action='store_true', + help='Use dtrace\'s oneshot probes') + parser.add_argument('--use-ustack', required=False, action='store_true', + help='Use dtrace\'s ustack to print function names') + parser.add_argument('cmd', nargs='*', help='') + + # Use python's arg parser to handle all leading option arguments, but pass + # everything else through to dtrace + first_cmd = next(arg for arg in args if not arg.startswith("--")) + last_arg_idx = args.index(first_cmd) + + opts = parser.parse_args(args[:last_arg_idx]) + cmd = args[last_arg_idx:] + + if opts.use_oneshot: + target = "oneshot$target:::entry" + else: + target = "pid$target:::entry" + predicate = '%s/probemod=="%s"/' % (target, os.path.basename(args[0])) + log_timestamp = 'printf("dtrace-TS: %d\\n", timestamp)' + if opts.use_ustack: + action = 'ustack(1);' + else: + action = 'printf("dtrace-Symbol: %s\\n", probefunc);' + dtrace_script = "%s { %s; %s }" % (predicate, log_timestamp, action) + + dtrace_args = [] + if not os.geteuid() == 0: + print 'Script must be run as root, or you must add the following to your sudoers:' + print '%%admin ALL=(ALL) NOPASSWD: /usr/sbin/dtrace' + dtrace_args.append("sudo") + + dtrace_args.extend(( + 'dtrace', '-xevaltime=exec', + '-xbufsize=%dm' % (opts.buffer_size), + '-q', '-n', dtrace_script, + '-c', ' '.join(cmd))) + + if sys.platform == "darwin": + dtrace_args.append('-xmangled') + + f = open("%d.dtrace" % os.getpid(), "w") + start_time = time.time() + subprocess.check_call(dtrace_args, stdout=f, stderr=subprocess.PIPE) + elapsed = time.time() - start_time + print "... data collection took %.4fs" % elapsed + + return 0 + +def parse_dtrace_symbol_file(path, all_symbols, all_symbols_set, + missing_symbols, opts): + def fix_mangling(symbol): + if sys.platform == "darwin": + if symbol[0] != '_' and symbol != 'start': + symbol = '_' + symbol + return symbol + + def get_symbols_with_prefix(symbol): + start_index = bisect.bisect_left(all_symbols, symbol) + for s in all_symbols[start_index:]: + if not s.startswith(symbol): + break + yield s + + # Extract the list of symbols from the given file, which is assumed to be + # the output of a dtrace run logging either probefunc or ustack(1) and + # nothing else. The dtrace -xdemangle option needs to be used. + # + # This is particular to OS X at the moment, because of the '_' handling. + with open(path) as f: + current_timestamp = None + for ln in f: + # Drop leading and trailing whitespace. + ln = ln.strip() + if not ln.startswith("dtrace-"): + continue + + # If this is a timestamp specifier, extract it. + if ln.startswith("dtrace-TS: "): + _,data = ln.split(': ', 1) + if not data.isdigit(): + print >>sys.stderr, ( + "warning: unrecognized timestamp line %r, ignoring" % ln) + continue + current_timestamp = int(data) + continue + elif ln.startswith("dtrace-Symbol: "): + + _,ln = ln.split(': ', 1) + if not ln: + continue + + # If there is a '`' in the line, assume it is a ustack(1) entry in + # the form of `, where is never + # truncated (but does need the mangling patched). + if '`' in ln: + yield (current_timestamp, fix_mangling(ln.split('`',1)[1])) + continue + + # Otherwise, assume this is a probefunc printout. DTrace on OS X + # seems to have a bug where it prints the mangled version of symbols + # which aren't C++ mangled. We just add a '_' to anything but start + # which doesn't already have a '_'. + symbol = fix_mangling(ln) + + # If we don't know all the symbols, or the symbol is one of them, + # just return it. + if not all_symbols_set or symbol in all_symbols_set: + yield (current_timestamp, symbol) + continue + + # Otherwise, we have a symbol name which isn't present in the + # binary. We assume it is truncated, and try to extend it. + + # Get all the symbols with this prefix. + possible_symbols = list(get_symbols_with_prefix(symbol)) + if not possible_symbols: + continue + + # If we found too many possible symbols, ignore this as a prefix. + if len(possible_symbols) > 100: + print >>sys.stderr, ( + "warning: ignoring symbol %r " % symbol + + "(no match and too many possible suffixes)") + continue + + # Report that we resolved a missing symbol. + if opts.show_missing_symbols and symbol not in missing_symbols: + print >>sys.stderr, ( "warning: resolved missing symbol %r" % symbol) + missing_symbols.add(symbol) + + # Otherwise, treat all the possible matches as having occurred. This + # is an over-approximation, but it should be ok in practice. + for s in possible_symbols: + yield (current_timestamp, s) + +def check_output(*popen_args, **popen_kwargs): + p = subprocess.Popen(stdout=subprocess.PIPE, *popen_args, **popen_kwargs) + stdout,stderr = p.communicate() + if p.wait() != 0: + raise RuntimeError("process failed") + return stdout + +def uniq(list): + seen = set() + for item in list: + if item not in seen: + yield item + seen.add(item) + +def form_by_call_order(symbol_lists): + # Simply strategy, just return symbols in order of occurrence, even across + # multiple runs. + return uniq(s for symbols in symbol_lists for s in symbols) + +def form_by_call_order_fair(symbol_lists): + # More complicated strategy that tries to respect the call order across all + # of the test cases, instead of giving a huge preference to the first test + # case. + + # First, uniq all the lists. + uniq_lists = [list(uniq(symbols)) for symbols in symbol_lists] + + # Compute the successors for each list. + succs = {} + for symbols in uniq_lists: + for a,b in zip(symbols[:-1], symbols[1:]): + succs[a] = items = succs.get(a, []) + if b not in items: + items.append(b) + + # Emit all the symbols, but make sure to always emit all successors from any + # call list whenever we see a symbol. + # + # There isn't much science here, but this sometimes works better than the + # more naive strategy. Then again, sometimes it doesn't so more research is + # probably needed. + return uniq(s + for symbols in symbol_lists + for node in symbols + for s in ([node] + succs.get(node,[]))) + +def form_by_frequency(symbol_lists): + # Form the order file by just putting the most commonly occurring symbols + # first. This assumes the data files didn't use the oneshot dtrace method. + + counts = {} + for symbols in symbol_lists: + for a in symbols: + counts[a] = counts.get(a,0) + 1 + + by_count = counts.items() + by_count.sort(key = lambda (_,n): -n) + return [s for s,n in by_count] + +def form_by_random(symbol_lists): + # Randomize the symbols. + merged_symbols = uniq(s for symbols in symbol_lists + for s in symbols) + random.shuffle(merged_symbols) + return merged_symbols + +def form_by_alphabetical(symbol_lists): + # Alphabetize the symbols. + merged_symbols = list(set(s for symbols in symbol_lists for s in symbols)) + merged_symbols.sort() + return merged_symbols + +methods = dict((name[len("form_by_"):],value) + for name,value in locals().items() if name.startswith("form_by_")) + +def genOrderFile(args): + parser = argparse.ArgumentParser( + "%prog [options] ]") + parser.add_argument('input', nargs='+', help='') + parser.add_argument("--binary", metavar="PATH", type=str, dest="binary_path", + help="Path to the binary being ordered (for getting all symbols)", + default=None) + parser.add_argument("--output", dest="output_path", + help="path to output order file to write", default=None, required=True, + metavar="PATH") + parser.add_argument("--show-missing-symbols", dest="show_missing_symbols", + help="show symbols which are 'fixed up' to a valid name (requires --binary)", + action="store_true", default=None) + parser.add_argument("--output-unordered-symbols", + dest="output_unordered_symbols_path", + help="write a list of the unordered symbols to PATH (requires --binary)", + default=None, metavar="PATH") + parser.add_argument("--method", dest="method", + help="order file generation method to use", choices=methods.keys(), + default='call_order') + opts = parser.parse_args(args) + + # If the user gave us a binary, get all the symbols in the binary by + # snarfing 'nm' output. + if opts.binary_path is not None: + output = check_output(['nm', '-P', opts.binary_path]) + lines = output.split("\n") + all_symbols = [ln.split(' ',1)[0] + for ln in lines + if ln.strip()] + print "found %d symbols in binary" % len(all_symbols) + all_symbols.sort() + else: + all_symbols = [] + all_symbols_set = set(all_symbols) + + # Compute the list of input files. + input_files = [] + for dirname in opts.input: + input_files.extend(findFilesWithExtension(dirname, "dtrace")) + + # Load all of the input files. + print "loading from %d data files" % len(input_files) + missing_symbols = set() + timestamped_symbol_lists = [ + list(parse_dtrace_symbol_file(path, all_symbols, all_symbols_set, + missing_symbols, opts)) + for path in input_files] + + # Reorder each symbol list. + symbol_lists = [] + for timestamped_symbols_list in timestamped_symbol_lists: + timestamped_symbols_list.sort() + symbol_lists.append([symbol for _,symbol in timestamped_symbols_list]) + + # Execute the desire order file generation method. + method = methods.get(opts.method) + result = list(method(symbol_lists)) + + # Report to the user on what percentage of symbols are present in the order + # file. + num_ordered_symbols = len(result) + if all_symbols: + print >>sys.stderr, "note: order file contains %d/%d symbols (%.2f%%)" % ( + num_ordered_symbols, len(all_symbols), + 100.*num_ordered_symbols/len(all_symbols)) + + if opts.output_unordered_symbols_path: + ordered_symbols_set = set(result) + with open(opts.output_unordered_symbols_path, 'w') as f: + f.write("\n".join(s for s in all_symbols if s not in ordered_symbols_set)) + + # Write the order file. + with open(opts.output_path, 'w') as f: + f.write("\n".join(result)) + f.write("\n") + + return 0 + +commands = {'clean' : clean, + 'merge' : merge, + 'dtrace' : dtrace, + 'gen-order-file' : genOrderFile} def main(): f = commands[sys.argv[1]]