Index: cfe/trunk/utils/perf-training/CMakeLists.txt
===================================================================
--- cfe/trunk/utils/perf-training/CMakeLists.txt
+++ cfe/trunk/utils/perf-training/CMakeLists.txt
@@ -1,24 +1,24 @@
-if(LLVM_BUILD_INSTRUMENTED)
-  if (CMAKE_CFG_INTDIR STREQUAL ".")
-    set(LLVM_BUILD_MODE ".")
-  else ()
-    set(LLVM_BUILD_MODE "%(build_mode)s")
-  endif ()
+if (CMAKE_CFG_INTDIR STREQUAL ".")
+  set(LLVM_BUILD_MODE ".")
+else ()
+  set(LLVM_BUILD_MODE "%(build_mode)s")
+endif ()
 
-  string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} CLANG_TOOLS_DIR ${LLVM_RUNTIME_OUTPUT_INTDIR})
+string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} CLANG_TOOLS_DIR ${LLVM_RUNTIME_OUTPUT_INTDIR})
 
+if(LLVM_BUILD_INSTRUMENTED)
   configure_lit_site_cfg(
     ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
-    ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
+    ${CMAKE_CURRENT_BINARY_DIR}/pgo-data/lit.site.cfg
     )
 
   add_lit_testsuite(generate-profraw "Generating clang PGO data"
-    ${CMAKE_CURRENT_BINARY_DIR}
+    ${CMAKE_CURRENT_BINARY_DIR}/pgo-data/
     DEPENDS clang clear-profraw
     )
 
   add_custom_target(clear-profraw
-    COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR}
+    COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} profraw
     COMMENT "Clearing old profraw data")
 
   if(NOT LLVM_PROFDATA)
@@ -34,3 +34,26 @@
     COMMENT "Merging profdata"
     DEPENDS generate-profraw)
 endif()
+
+find_program(DTRACE dtrace)
+if(DTRACE)
+  configure_lit_site_cfg(
+    ${CMAKE_CURRENT_SOURCE_DIR}/order-files.lit.site.cfg.in
+    ${CMAKE_CURRENT_BINARY_DIR}/order-files/lit.site.cfg
+    )
+
+  add_lit_testsuite(generate-dtrace-logs "Generating clang dtrace data"
+    ${CMAKE_CURRENT_BINARY_DIR}/order-files/
+    DEPENDS clang clear-dtrace-logs
+    )
+
+  add_custom_target(clear-dtrace-logs
+    COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} dtrace
+    COMMENT "Clearing old dtrace data")
+
+
+  add_custom_target(generate-order-file
+    COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py gen-order-file --binary $<TARGET_FILE:clang> --output ${CMAKE_CURRENT_BINARY_DIR}/clang.order ${CMAKE_CURRENT_BINARY_DIR}
+    COMMENT "Generating order file"
+    DEPENDS generate-dtrace-logs)
+endif()
Index: cfe/trunk/utils/perf-training/order-files.lit.cfg
===================================================================
--- cfe/trunk/utils/perf-training/order-files.lit.cfg
+++ cfe/trunk/utils/perf-training/order-files.lit.cfg
@@ -0,0 +1,39 @@
+# -*- Python -*-
+
+from lit import Test
+import lit.formats
+import lit.util
+import os
+
+def getSysrootFlagsOnDarwin(config, lit_config):
+    # On Darwin, support relocatable SDKs by providing Clang with a
+    # default system root path.
+    if 'darwin' in config.target_triple:
+        try:
+            out = lit.util.capture(['xcrun', '--show-sdk-path']).strip()
+            res = 0
+        except OSError:
+            res = -1
+        if res == 0 and out:
+            sdk_path = out
+            lit_config.note('using SDKROOT: %r' % sdk_path)
+            return '-isysroot %s' % sdk_path
+    return ''
+
+sysroot_flags = getSysrootFlagsOnDarwin(config, lit_config)
+
+config.clang = os.path.realpath(lit.util.which('clang', config.clang_tools_dir)).replace('\\', '/')
+
+config.name = 'Clang Perf Training'
+config.suffixes = ['.c', '.cpp', '.m', '.mm', '.cu', '.ll', '.cl', '.s', '.S', '.modulemap']
+
+dtrace_wrapper = '%s %s/perf-helper.py dtrace' % (config.python_exe, config.test_source_root)
+
+use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
+config.test_format = lit.formats.ShTest(use_lit_shell == "0")
+config.substitutions.append( ('%clang_cpp', ' %s %s --driver-mode=cpp %s ' % (dtrace_wrapper, config.clang, sysroot_flags)))
+config.substitutions.append( ('%clang_cc1', ' %s %s -cc1 %s ' % (dtrace_wrapper, config.clang, sysroot_flags)))
+config.substitutions.append( ('%clang', ' %s %s %s ' % (dtrace_wrapper, config.clang, sysroot_flags) ) )
+config.substitutions.append( ('%test_root', config.test_exec_root ) )
+
+
Index: cfe/trunk/utils/perf-training/order-files.lit.site.cfg.in
===================================================================
--- cfe/trunk/utils/perf-training/order-files.lit.site.cfg.in
+++ cfe/trunk/utils/perf-training/order-files.lit.site.cfg.in
@@ -0,0 +1,21 @@
+import sys
+
+## Autogenerated by LLVM/Clang configuration.
+# Do not edit!
+config.clang_tools_dir = "@CLANG_TOOLS_DIR@"
+config.test_exec_root = "@CMAKE_CURRENT_BINARY_DIR@"
+config.test_source_root = "@CMAKE_CURRENT_SOURCE_DIR@"
+config.target_triple = "@TARGET_TRIPLE@"
+config.python_exe = "@PYTHON_EXECUTABLE@"
+
+# Support substitution of the tools and libs dirs with user parameters. This is
+# used when we can't determine the tool dir at configuration time.
+try:
+    config.clang_tools_dir = config.clang_tools_dir % lit_config.params
+except KeyError:
+    e = sys.exc_info()[1]
+    key, = e.args
+    lit_config.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key,key))
+
+# Let the main config do the real work.
+lit_config.load_config(config, "@CLANG_SOURCE_DIR@/utils/perf-training/order-files.lit.cfg")
Index: cfe/trunk/utils/perf-training/perf-helper.py
===================================================================
--- cfe/trunk/utils/perf-training/perf-helper.py
+++ cfe/trunk/utils/perf-training/perf-helper.py
@@ -10,33 +10,336 @@
 import sys
 import os
 import subprocess
+import argparse
+import time
+import bisect
 
-def findProfrawFiles(path):
-  profraw_files = []
+def findFilesWithExtension(path, extension):
+  filenames = []
   for root, dirs, files in os.walk(path): 
     for filename in files:
-      if filename.endswith(".profraw"):
-        profraw_files.append(os.path.join(root, filename))
-  return profraw_files
+      if filename.endswith(extension):
+        filenames.append(os.path.join(root, filename))
+  return filenames
 
 def clean(args):
-  if len(args) != 1:
-    print 'Usage: %s clean <path>\n\tRemoves all *.profraw files from <path>.' % __file__
+  if len(args) != 2:
+    print 'Usage: %s clean <path> <extension>' % __file__
+    print '\tRemoves all files with extension from <path>.'
     return 1
-  for profraw in findProfrawFiles(args[0]):
-    os.remove(profraw)
+  for filename in findFilesWithExtension(args[0], args[1]):
+    os.remove(filename)
   return 0
 
 def merge(args):
   if len(args) != 3:
-    print 'Usage: %s clean <llvm-profdata> <output> <path>\n\tMerges all profraw files from path into output.' % __file__
+    print 'Usage: %s clean <llvm-profdata> <output> <path>\n' % __file__
+    print '\tMerges all profraw files from path into output.'
     return 1
   cmd = [args[0], 'merge', '-o', args[1]]
-  cmd.extend(findProfrawFiles(args[2]))
+  cmd.extend(findFilesWithExtension(args[2], "profraw"))
   subprocess.check_call(cmd)
   return 0
 
-commands = {'clean' : clean, 'merge' : merge}
+def dtrace(args):
+  parser = argparse.ArgumentParser(prog='perf-helper dtrace',
+    description='dtrace wrapper for order file generation')
+  parser.add_argument('--buffer-size', metavar='size', type=int, required=False,
+    default=1, help='dtrace buffer size in MB (default 1)')
+  parser.add_argument('--use-oneshot', required=False, action='store_true',
+    help='Use dtrace\'s oneshot probes')
+  parser.add_argument('--use-ustack', required=False, action='store_true',
+    help='Use dtrace\'s ustack to print function names')
+  parser.add_argument('cmd', nargs='*', help='')
+
+  # Use python's arg parser to handle all leading option arguments, but pass
+  # everything else through to dtrace
+  first_cmd = next(arg for arg in args if not arg.startswith("--"))
+  last_arg_idx = args.index(first_cmd)
+
+  opts = parser.parse_args(args[:last_arg_idx])
+  cmd = args[last_arg_idx:]
+
+  if opts.use_oneshot:
+      target = "oneshot$target:::entry"
+  else:
+      target = "pid$target:::entry"
+  predicate = '%s/probemod=="%s"/' % (target, os.path.basename(args[0]))
+  log_timestamp = 'printf("dtrace-TS: %d\\n", timestamp)'
+  if opts.use_ustack:
+      action = 'ustack(1);'
+  else:
+      action = 'printf("dtrace-Symbol: %s\\n", probefunc);'
+  dtrace_script = "%s { %s; %s }" % (predicate, log_timestamp, action)
+
+  dtrace_args = []
+  if not os.geteuid() == 0:
+    print 'Script must be run as root, or you must add the following to your sudoers:'
+    print '%%admin ALL=(ALL) NOPASSWD: /usr/sbin/dtrace'
+    dtrace_args.append("sudo")
+
+  dtrace_args.extend((
+      'dtrace', '-xevaltime=exec',
+      '-xbufsize=%dm' % (opts.buffer_size),
+      '-q', '-n', dtrace_script, 
+      '-c', ' '.join(cmd)))
+
+  if sys.platform == "darwin":
+    dtrace_args.append('-xmangled')
+
+  f = open("%d.dtrace" % os.getpid(), "w")
+  start_time = time.time()
+  subprocess.check_call(dtrace_args, stdout=f, stderr=subprocess.PIPE)
+  elapsed = time.time() - start_time
+  print "... data collection took %.4fs" % elapsed
+
+  return 0
+
+def parse_dtrace_symbol_file(path, all_symbols, all_symbols_set,
+                             missing_symbols, opts):
+  def fix_mangling(symbol):
+    if sys.platform == "darwin":
+      if symbol[0] != '_' and symbol != 'start':
+          symbol = '_' + symbol
+    return symbol
+
+  def get_symbols_with_prefix(symbol):
+    start_index = bisect.bisect_left(all_symbols, symbol)
+    for s in all_symbols[start_index:]:
+      if not s.startswith(symbol):
+        break
+      yield s
+
+  # Extract the list of symbols from the given file, which is assumed to be
+  # the output of a dtrace run logging either probefunc or ustack(1) and
+  # nothing else. The dtrace -xdemangle option needs to be used.
+  #
+  # This is particular to OS X at the moment, because of the '_' handling.
+  with open(path) as f:
+    current_timestamp = None
+    for ln in f:
+      # Drop leading and trailing whitespace.
+      ln = ln.strip()
+      if not ln.startswith("dtrace-"):
+        continue
+
+      # If this is a timestamp specifier, extract it.
+      if ln.startswith("dtrace-TS: "):
+        _,data = ln.split(': ', 1)
+        if not data.isdigit():
+          print >>sys.stderr, (
+            "warning: unrecognized timestamp line %r, ignoring" % ln)
+          continue
+        current_timestamp = int(data)
+        continue
+      elif ln.startswith("dtrace-Symbol: "):
+
+        _,ln = ln.split(': ', 1)
+        if not ln:
+          continue
+
+        # If there is a '`' in the line, assume it is a ustack(1) entry in
+        # the form of <modulename>`<modulefunc>, where <modulefunc> is never
+        # truncated (but does need the mangling patched).
+        if '`' in ln:
+          yield (current_timestamp, fix_mangling(ln.split('`',1)[1]))
+          continue
+
+        # Otherwise, assume this is a probefunc printout. DTrace on OS X
+        # seems to have a bug where it prints the mangled version of symbols
+        # which aren't C++ mangled. We just add a '_' to anything but start
+        # which doesn't already have a '_'.
+        symbol = fix_mangling(ln)
+
+        # If we don't know all the symbols, or the symbol is one of them,
+        # just return it.
+        if not all_symbols_set or symbol in all_symbols_set:
+          yield (current_timestamp, symbol)
+          continue
+
+        # Otherwise, we have a symbol name which isn't present in the
+        # binary. We assume it is truncated, and try to extend it.
+
+        # Get all the symbols with this prefix.
+        possible_symbols = list(get_symbols_with_prefix(symbol))
+        if not possible_symbols:
+          continue
+
+        # If we found too many possible symbols, ignore this as a prefix.
+        if len(possible_symbols) > 100:
+          print >>sys.stderr, (
+            "warning: ignoring symbol %r " % symbol +
+            "(no match and too many possible suffixes)") 
+          continue
+
+        # Report that we resolved a missing symbol.
+        if opts.show_missing_symbols and symbol not in missing_symbols:
+          print >>sys.stderr, ( "warning: resolved missing symbol %r" % symbol)
+          missing_symbols.add(symbol)
+
+        # Otherwise, treat all the possible matches as having occurred. This
+        # is an over-approximation, but it should be ok in practice.
+        for s in possible_symbols:
+          yield (current_timestamp, s)
+
+def check_output(*popen_args, **popen_kwargs):
+    p = subprocess.Popen(stdout=subprocess.PIPE, *popen_args, **popen_kwargs)
+    stdout,stderr = p.communicate()
+    if p.wait() != 0:
+        raise RuntimeError("process failed")
+    return stdout
+
+def uniq(list):
+  seen = set()
+  for item in list:
+    if item not in seen:
+      yield item
+      seen.add(item)
+
+def form_by_call_order(symbol_lists):
+  # Simply strategy, just return symbols in order of occurrence, even across
+  # multiple runs.
+  return uniq(s for symbols in symbol_lists for s in symbols)
+
+def form_by_call_order_fair(symbol_lists):
+  # More complicated strategy that tries to respect the call order across all
+  # of the test cases, instead of giving a huge preference to the first test
+  # case.
+
+  # First, uniq all the lists.
+  uniq_lists = [list(uniq(symbols)) for symbols in symbol_lists]
+
+  # Compute the successors for each list.
+  succs = {}
+  for symbols in uniq_lists:
+    for a,b in zip(symbols[:-1], symbols[1:]):
+      succs[a] = items = succs.get(a, [])
+      if b not in items:
+        items.append(b)
+  
+  # Emit all the symbols, but make sure to always emit all successors from any
+  # call list whenever we see a symbol.
+  #
+  # There isn't much science here, but this sometimes works better than the
+  # more naive strategy. Then again, sometimes it doesn't so more research is
+  # probably needed.
+  return uniq(s
+    for symbols in symbol_lists
+    for node in symbols
+    for s in ([node] + succs.get(node,[])))
+ 
+def form_by_frequency(symbol_lists):
+  # Form the order file by just putting the most commonly occurring symbols
+  # first. This assumes the data files didn't use the oneshot dtrace method.
+ 
+  counts = {}
+  for symbols in symbol_lists:
+    for a in symbols:
+      counts[a] = counts.get(a,0) + 1
+
+  by_count = counts.items()
+  by_count.sort(key = lambda (_,n): -n)
+  return [s for s,n in by_count]
+ 
+def form_by_random(symbol_lists):
+  # Randomize the symbols.
+  merged_symbols = uniq(s for symbols in symbol_lists
+                          for s in symbols)
+  random.shuffle(merged_symbols)
+  return merged_symbols
+ 
+def form_by_alphabetical(symbol_lists):
+  # Alphabetize the symbols.
+  merged_symbols = list(set(s for symbols in symbol_lists for s in symbols))
+  merged_symbols.sort()
+  return merged_symbols
+
+methods = dict((name[len("form_by_"):],value)
+  for name,value in locals().items() if name.startswith("form_by_"))
+
+def genOrderFile(args):
+  parser = argparse.ArgumentParser(
+    "%prog  [options] <dtrace data file directories>]")
+  parser.add_argument('input', nargs='+', help='')
+  parser.add_argument("--binary", metavar="PATH", type=str, dest="binary_path",
+    help="Path to the binary being ordered (for getting all symbols)",
+    default=None)
+  parser.add_argument("--output", dest="output_path",
+    help="path to output order file to write", default=None, required=True,
+    metavar="PATH")
+  parser.add_argument("--show-missing-symbols", dest="show_missing_symbols",
+    help="show symbols which are 'fixed up' to a valid name (requires --binary)",
+    action="store_true", default=None)
+  parser.add_argument("--output-unordered-symbols",
+    dest="output_unordered_symbols_path",
+    help="write a list of the unordered symbols to PATH (requires --binary)",
+    default=None, metavar="PATH")
+  parser.add_argument("--method", dest="method",
+    help="order file generation method to use", choices=methods.keys(),
+    default='call_order')
+  opts = parser.parse_args(args)
+
+  # If the user gave us a binary, get all the symbols in the binary by
+  # snarfing 'nm' output.
+  if opts.binary_path is not None:
+     output = check_output(['nm', '-P', opts.binary_path])
+     lines = output.split("\n")
+     all_symbols = [ln.split(' ',1)[0]
+                    for ln in lines
+                    if ln.strip()]
+     print "found %d symbols in binary" % len(all_symbols)
+     all_symbols.sort()
+  else:
+     all_symbols = []
+  all_symbols_set = set(all_symbols)
+
+  # Compute the list of input files.
+  input_files = []
+  for dirname in opts.input:
+    input_files.extend(findFilesWithExtension(dirname, "dtrace"))
+
+  # Load all of the input files.
+  print "loading from %d data files" % len(input_files)
+  missing_symbols = set()
+  timestamped_symbol_lists = [
+      list(parse_dtrace_symbol_file(path, all_symbols, all_symbols_set,
+                                    missing_symbols, opts))
+      for path in input_files]
+
+  # Reorder each symbol list.
+  symbol_lists = []
+  for timestamped_symbols_list in timestamped_symbol_lists:
+    timestamped_symbols_list.sort()
+    symbol_lists.append([symbol for _,symbol in timestamped_symbols_list])
+
+  # Execute the desire order file generation method.
+  method = methods.get(opts.method)
+  result = list(method(symbol_lists))
+
+  # Report to the user on what percentage of symbols are present in the order
+  # file.
+  num_ordered_symbols = len(result)
+  if all_symbols:
+    print >>sys.stderr, "note: order file contains %d/%d symbols (%.2f%%)" % (
+      num_ordered_symbols, len(all_symbols),
+      100.*num_ordered_symbols/len(all_symbols))
+
+  if opts.output_unordered_symbols_path:
+    ordered_symbols_set = set(result)
+    with open(opts.output_unordered_symbols_path, 'w') as f:
+      f.write("\n".join(s for s in all_symbols if s not in ordered_symbols_set))
+
+  # Write the order file.
+  with open(opts.output_path, 'w') as f:
+    f.write("\n".join(result))
+    f.write("\n")
+
+  return 0
+
+commands = {'clean' : clean,
+  'merge' : merge, 
+  'dtrace' : dtrace,
+  'gen-order-file' : genOrderFile}
 
 def main():
   f = commands[sys.argv[1]]