Index: cfe/trunk/include/clang/Driver/CC1Options.td =================================================================== --- cfe/trunk/include/clang/Driver/CC1Options.td +++ cfe/trunk/include/clang/Driver/CC1Options.td @@ -507,6 +507,9 @@ def arcmt_migrate : Flag<["-"], "arcmt-migrate">, HelpText<"Apply modifications and produces temporary files that conform to ARC">; +def opt_record_file : Separate<["-"], "opt-record-file">, + HelpText<"File name to use for YAML optimization record output">; + def print_stats : Flag<["-"], "print-stats">, HelpText<"Print performance metrics and statistics">; def stats_file : Joined<["-"], "stats-file=">, Index: cfe/trunk/include/clang/Driver/Options.td =================================================================== --- cfe/trunk/include/clang/Driver/Options.td +++ cfe/trunk/include/clang/Driver/Options.td @@ -1192,6 +1192,15 @@ Group; def foperator_arrow_depth_EQ : Joined<["-"], "foperator-arrow-depth=">, Group; + +def fsave_optimization_record : Flag<["-"], "fsave-optimization-record">, + Group, HelpText<"Generate a YAML optimization record file">; +def fno_save_optimization_record : Flag<["-"], "fno-save-optimization-record">, + Group, Flags<[NoArgumentUnused]>; +def foptimization_record_file_EQ : Joined<["-"], "foptimization-record-file=">, + Group, + HelpText<"Specify the file name of any generated YAML optimization record">; + def ftest_coverage : Flag<["-"], "ftest-coverage">, Group; def fvectorize : Flag<["-"], "fvectorize">, Group, HelpText<"Enable the loop vectorization passes">; Index: cfe/trunk/include/clang/Frontend/CodeGenOptions.h =================================================================== --- cfe/trunk/include/clang/Frontend/CodeGenOptions.h +++ cfe/trunk/include/clang/Frontend/CodeGenOptions.h @@ -181,6 +181,10 @@ /// object file. std::vector CudaGpuBinaryFileNames; + /// The name of the file to which the backend should save YAML optimization + /// records. + std::string OptRecordFile; + /// Regular expression to select optimizations for which we should enable /// optimization remarks. Transformation passes whose name matches this /// expression (and support this feature), will emit a diagnostic Index: cfe/trunk/lib/CodeGen/CodeGenAction.cpp =================================================================== --- cfe/trunk/lib/CodeGen/CodeGenAction.cpp +++ cfe/trunk/lib/CodeGen/CodeGenAction.cpp @@ -33,6 +33,8 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/Timer.h" +#include "llvm/Support/ToolOutputFile.h" +#include "llvm/Support/YAMLTraits.h" #include using namespace clang; using namespace llvm; @@ -181,6 +183,24 @@ Ctx.setDiagnosticHandler(DiagnosticHandler, this); Ctx.setDiagnosticHotnessRequested(CodeGenOpts.DiagnosticsWithHotness); + std::unique_ptr OptRecordFile; + if (!CodeGenOpts.OptRecordFile.empty()) { + std::error_code EC; + OptRecordFile = + llvm::make_unique(CodeGenOpts.OptRecordFile, + EC, sys::fs::F_None); + if (EC) { + Diags.Report(diag::err_cannot_open_file) << + CodeGenOpts.OptRecordFile << EC.message(); + return; + } + + Ctx.setDiagnosticsOutputFile(new yaml::Output(OptRecordFile->os())); + + if (CodeGenOpts.getProfileUse() != CodeGenOptions::ProfileNone) + Ctx.setDiagnosticHotnessRequested(true); + } + // Link LinkModule into this module if present, preserving its validity. for (auto &I : LinkModules) { unsigned LinkFlags = I.first; @@ -198,6 +218,9 @@ Ctx.setInlineAsmDiagnosticHandler(OldHandler, OldContext); Ctx.setDiagnosticHandler(OldDiagnosticHandler, OldDiagnosticContext); + + if (OptRecordFile) + OptRecordFile->keep(); } void HandleTagDeclDefinition(TagDecl *D) override { Index: cfe/trunk/lib/Driver/Tools.cpp =================================================================== --- cfe/trunk/lib/Driver/Tools.cpp +++ cfe/trunk/lib/Driver/Tools.cpp @@ -6080,6 +6080,39 @@ CmdArgs.push_back("-fno-math-builtin"); } + if (Args.hasFlag(options::OPT_fsave_optimization_record, + options::OPT_fno_save_optimization_record, false)) { + CmdArgs.push_back("-opt-record-file"); + + const Arg *A = Args.getLastArg(options::OPT_foptimization_record_file_EQ); + if (A) { + CmdArgs.push_back(A->getValue()); + } else { + SmallString<128> F; + if (Output.isFilename() && (Args.hasArg(options::OPT_c) || + Args.hasArg(options::OPT_S))) { + F = Output.getFilename(); + } else { + // Use the compilation directory. + F = llvm::sys::path::stem(Input.getBaseInput()); + + // If we're compiling for an offload architecture (i.e. a CUDA device), + // we need to make the file name for the device compilation different + // from the host compilation. + if (!JA.isDeviceOffloading(Action::OFK_None) && + !JA.isDeviceOffloading(Action::OFK_Host)) { + llvm::sys::path::replace_extension(F, ""); + F += JA.getOffloadingFileNamePrefix(Triple.normalize()); + F += "-"; + F += JA.getOffloadingArch(); + } + } + + llvm::sys::path::replace_extension(F, "opt.yaml"); + CmdArgs.push_back(Args.MakeArgString(F)); + } + } + // Default to -fno-builtin-str{cat,cpy} on Darwin for ARM. // // FIXME: Now that PR4941 has been fixed this can be enabled. Index: cfe/trunk/lib/Frontend/CompilerInvocation.cpp =================================================================== --- cfe/trunk/lib/Frontend/CompilerInvocation.cpp +++ cfe/trunk/lib/Frontend/CompilerInvocation.cpp @@ -826,6 +826,10 @@ Opts.LinkerOptions = Args.getAllArgValues(OPT_linker_option); bool NeedLocTracking = false; + Opts.OptRecordFile = Args.getLastArgValue(OPT_opt_record_file); + if (!Opts.OptRecordFile.empty()) + NeedLocTracking = true; + if (Arg *A = Args.getLastArg(OPT_Rpass_EQ)) { Opts.OptimizationRemarkPattern = GenerateOptimizationRemarkRegex(Diags, Args, A); Index: cfe/trunk/test/CodeGen/Inputs/opt-record.proftext =================================================================== --- cfe/trunk/test/CodeGen/Inputs/opt-record.proftext +++ cfe/trunk/test/CodeGen/Inputs/opt-record.proftext @@ -0,0 +1,26 @@ +foo +# Func Hash: +0 +# Num Counters: +1 +# Counter Values: +30 + +bar +# Func Hash: +0 +# Num Counters: +1 +# Counter Values: +30 + +Test +# Func Hash: +269 +# Num Counters: +3 +# Counter Values: +1 +30 +15 + Index: cfe/trunk/test/CodeGen/opt-record.c =================================================================== --- cfe/trunk/test/CodeGen/opt-record.c +++ cfe/trunk/test/CodeGen/opt-record.c @@ -0,0 +1,33 @@ +// RUN: %clang_cc1 -O3 -triple x86_64-unknown-linux-gnu -target-cpu x86-64 %s -o %t -dwarf-column-info -opt-record-file %t.yaml -emit-obj +// RUN: cat %t.yaml | FileCheck %s +// RUN: llvm-profdata merge %S/Inputs/opt-record.proftext -o %t.profdata +// RUN: %clang_cc1 -O3 -triple x86_64-unknown-linux-gnu -target-cpu x86-64 -fprofile-instrument-use-path=%t.profdata %s -o %t -dwarf-column-info -opt-record-file %t.yaml -emit-obj +// RUN: cat %t.yaml | FileCheck -check-prefix=CHECK -check-prefix=CHECK-PGO %s +// REQUIRES: x86-registered-target + +void bar(); +void foo() { bar(); } + +void Test(int *res, int *c, int *d, int *p, int n) { + int i; + +#pragma clang loop vectorize(assume_safety) + for (i = 0; i < 1600; i++) { + res[i] = (p[i] == 0) ? res[i] : res[i] + d[i]; + } +} + +// CHECK: --- !Missed +// CHECK: Pass: inline +// CHECK: Name: NoDefinition +// CHECK: DebugLoc: +// CHECK: Function: foo +// CHECK-PGO: Hotness: + +// CHECK: --- !Passed +// CHECK: Pass: loop-vectorize +// CHECK: Name: Vectorized +// CHECK: DebugLoc: +// CHECK: Function: Test +// CHECK-PGO: Hotness: + Index: cfe/trunk/test/Driver/opt-record.c =================================================================== --- cfe/trunk/test/Driver/opt-record.c +++ cfe/trunk/test/Driver/opt-record.c @@ -0,0 +1,18 @@ +// RUN: %clang -### -S -o FOO -fsave-optimization-record %s 2>&1 | FileCheck %s +// RUN: %clang -### -c -o FOO -fsave-optimization-record %s 2>&1 | FileCheck %s +// RUN: %clang -### -c -fsave-optimization-record %s 2>&1 | FileCheck %s -check-prefix=CHECK-NO-O +// RUN: %clang -### -fsave-optimization-record %s 2>&1 | FileCheck %s -check-prefix=CHECK-NO-O +// RUN: %clang -### -S -fsave-optimization-record -x cuda -nocudainc -nocudalib %s 2>&1 | FileCheck %s -check-prefix=CHECK-NO-O -check-prefix=CHECK-CUDA-DEV +// RUN: %clang -### -fsave-optimization-record -x cuda -nocudainc -nocudalib %s 2>&1 | FileCheck %s -check-prefix=CHECK-NO-O -check-prefix=CHECK-CUDA-DEV +// RUN: %clang -### -S -o FOO -fsave-optimization-record -foptimization-record-file=BAR.txt %s 2>&1 | FileCheck %s -check-prefix=CHECK-EQ + +// CHECK: "-cc1" +// CHECK: "-opt-record-file" "FOO.opt.yaml" + +// CHECK-NO-O: "-cc1" +// CHECK-NO-O-DAG: "-opt-record-file" "opt-record.opt.yaml" +// CHECK-CUDA-DEV-DAG: "-opt-record-file" "opt-record-device-cuda-nvptx64-nvidia-cuda-sm_20.opt.yaml" + +// CHECK-EQ: "-cc1" +// CHECK-EQ: "-opt-record-file" "BAR.txt" +