diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -151,6 +151,7 @@ ///< linker. CODEGENOPT(MergeAllConstants , 1, 1) ///< Merge identical constants. CODEGENOPT(MergeFunctions , 1, 0) ///< Set when -fmerge-functions is enabled. +CODEGENOPT(SplitColdCode , 1, 0) ///< Set when -fsplit-cold-code is enabled. CODEGENOPT(MemProf , 1, 0) ///< Set when -fmemory-profile is enabled. CODEGENOPT(MSVolatile , 1, 0) ///< Set when /volatile:ms is enabled. CODEGENOPT(NoCommon , 1, 0) ///< Set when -fno-common or C++ is enabled. diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3782,6 +3782,10 @@ HelpText<"Dump the layouts of all vtables that will be emitted in a translation unit">; def fmerge_functions : Flag<["-"], "fmerge-functions">, HelpText<"Permit merging of identical functions when optimizing.">; +def fsplit_cold_code : Flag<["-"], "fsplit-cold-code">, + HelpText<"Permit splitting of cold code when optimizing (off by default).">; +def fno_split_cold_code : Flag<["-"], "fno-split-cold-code">, + HelpText<"Disable splitting of cold code when optimizing.">; def femit_coverage_notes : Flag<["-"], "femit-coverage-notes">, HelpText<"Emit a gcov coverage notes file when compiling.">; def femit_coverage_data: Flag<["-"], "femit-coverage-data">, diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -643,6 +643,7 @@ // enabled when loop unrolling is enabled. PMBuilder.LoopsInterleaved = CodeGenOpts.UnrollLoops; PMBuilder.MergeFunctions = CodeGenOpts.MergeFunctions; + PMBuilder.SplitColdCode = CodeGenOpts.SplitColdCode; PMBuilder.PrepareForThinLTO = CodeGenOpts.PrepareForThinLTO; PMBuilder.PrepareForLTO = CodeGenOpts.PrepareForLTO; PMBuilder.RerollLoops = CodeGenOpts.RerollLoops; @@ -1272,6 +1273,9 @@ // configure the pipeline. PassBuilder::OptimizationLevel Level = mapToLevel(CodeGenOpts); + // -f[no-]split-cold-code + PB.setEnableHotColdSplitting(CodeGenOpts.SplitColdCode); + // If we reached here with a non-empty index file name, then the index // file was empty and we are not performing ThinLTO backend compilation // (used in testing in a distributed build environment). Drop any the type diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1454,6 +1454,13 @@ Opts.DefaultFunctionAttrs = Args.getAllArgValues(OPT_default_function_attr); + // -f[no-]split-cold-code + // This may only be enabled when optimizing, and when small code size + // increases are tolerable. + Opts.SplitColdCode = + (Opts.OptimizationLevel > 0) && (Opts.OptimizeSize != 2) && + Args.hasFlag(OPT_fsplit_cold_code, OPT_fno_split_cold_code, true); + Opts.PassPlugins = Args.getAllArgValues(OPT_fpass_plugin_EQ); Opts.SymbolPartition = diff --git a/clang/test/CodeGen/split-cold-code.c b/clang/test/CodeGen/split-cold-code.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/split-cold-code.c @@ -0,0 +1,81 @@ +// === Old PM === +// No splitting at -O0. +// RUN: %clang_cc1 -O0 -fsplit-cold-code -mllvm -debug-pass=Structure \ +// RUN: -emit-llvm -o /dev/null %s 2>&1 | FileCheck --check-prefix=OLDPM-NO-SPLIT %s +// +// No splitting at -Oz. +// RUN: %clang_cc1 -Oz -fsplit-cold-code -mllvm -debug-pass=Structure \ +// RUN: -emit-llvm -o /dev/null %s 2>&1 | FileCheck --check-prefix=OLDPM-NO-SPLIT %s +// +// Split by default. +// RUN: %clang_cc1 -O3 -mllvm -debug-pass=Structure \ +// RUN: -emit-llvm -o /dev/null %s 2>&1 | FileCheck --check-prefix=OLDPM-SPLIT %s +// +// No splitting when it's explicitly disabled. +// RUN: %clang_cc1 -O3 -fno-split-cold-code -mllvm -debug-pass=Structure \ +// RUN: -emit-llvm -o /dev/null %s 2>&1 | FileCheck --check-prefix=OLDPM-NO-SPLIT %s +// +// No splitting when LLVM passes are disabled. +// RUN: %clang_cc1 -O3 -fsplit-cold-code -disable-llvm-passes -mllvm -debug-pass=Structure \ +// RUN: -emit-llvm -o /dev/null %s 2>&1 | FileCheck --check-prefix=OLDPM-NO-SPLIT %s +// +// Split at -O1. +// RUN: %clang_cc1 -O1 -fsplit-cold-code -mllvm -debug-pass=Structure \ +// RUN: -emit-llvm -o /dev/null %s 2>&1 | FileCheck --check-prefix=OLDPM-SPLIT %s +// +// Split at -Os. +// RUN: %clang_cc1 -Os -fsplit-cold-code -mllvm -debug-pass=Structure \ +// RUN: -emit-llvm -o /dev/null %s 2>&1 | FileCheck --check-prefix=OLDPM-SPLIT %s +// +// Split at -O2. +// RUN: %clang_cc1 -O2 -fsplit-cold-code -mllvm -debug-pass=Structure \ +// RUN: -emit-llvm -o /dev/null %s 2>&1 | FileCheck --check-prefix=OLDPM-SPLIT %s +// +// Split at -O3. +// RUN: %clang_cc1 -O3 -fsplit-cold-code -mllvm -debug-pass=Structure \ +// RUN: -emit-llvm -o /dev/null %s 2>&1 | FileCheck --check-prefix=OLDPM-SPLIT %s + +// OLDPM-NO-SPLIT-NOT: Hot Cold Split + +// OLDPM-SPLIT: Hot Cold Split + +// === New PM (ditto) === +// No splitting at -O0. +// RUN: %clang_cc1 -O0 -fsplit-cold-code -fexperimental-new-pass-manager -fdebug-pass-manager \ +// RUN: -emit-llvm -o /dev/null %s 2>&1 | FileCheck --check-prefix=NEWPM-NO-SPLIT %s +// +// No splitting at -Oz. +// RUN: %clang_cc1 -Oz -fsplit-cold-code -fexperimental-new-pass-manager -fdebug-pass-manager \ +// RUN: -emit-llvm -o /dev/null %s 2>&1 | FileCheck --check-prefix=NEWPM-NO-SPLIT %s +// +// Split by default. +// RUN: %clang_cc1 -O3 -fexperimental-new-pass-manager -fdebug-pass-manager \ +// RUN: -emit-llvm -o /dev/null %s 2>&1 | FileCheck --check-prefix=NEWPM-SPLIT %s +// +// No splitting when it's explicitly disabled. +// RUN: %clang_cc1 -O3 -fno-split-cold-code -fexperimental-new-pass-manager -fdebug-pass-manager \ +// RUN: -emit-llvm -o /dev/null %s 2>&1 | FileCheck --check-prefix=NEWPM-NO-SPLIT %s +// +// No splitting when LLVM passes are disabled. +// RUN: %clang_cc1 -O3 -fsplit-cold-code -disable-llvm-passes -fexperimental-new-pass-manager -fdebug-pass-manager \ +// RUN: -emit-llvm -o /dev/null %s 2>&1 | FileCheck --check-prefix=NEWPM-NO-SPLIT %s +// +// Split at -O1. +// RUN: %clang_cc1 -O1 -fsplit-cold-code -fexperimental-new-pass-manager -fdebug-pass-manager \ +// RUN: -emit-llvm -o /dev/null %s 2>&1 | FileCheck --check-prefix=NEWPM-SPLIT %s +// +// Split at -Os. +// RUN: %clang_cc1 -Os -fsplit-cold-code -fexperimental-new-pass-manager -fdebug-pass-manager \ +// RUN: -emit-llvm -o /dev/null %s 2>&1 | FileCheck --check-prefix=NEWPM-SPLIT %s +// +// Split at -O2. +// RUN: %clang_cc1 -O2 -fsplit-cold-code -fexperimental-new-pass-manager -fdebug-pass-manager \ +// RUN: -emit-llvm -o /dev/null %s 2>&1 | FileCheck --check-prefix=NEWPM-SPLIT %s +// +// Split at -O3. +// RUN: %clang_cc1 -O3 -fsplit-cold-code -fexperimental-new-pass-manager -fdebug-pass-manager \ +// RUN: -emit-llvm -o /dev/null %s 2>&1 | FileCheck --check-prefix=NEWPM-SPLIT %s + +// NEWPM-NO-SPLIT-NOT: HotColdSplit + +// NEWPM-SPLIT: HotColdSplit diff --git a/clang/test/CodeGenCXX/microsoft-abi-typeid.cpp b/clang/test/CodeGenCXX/microsoft-abi-typeid.cpp --- a/clang/test/CodeGenCXX/microsoft-abi-typeid.cpp +++ b/clang/test/CodeGenCXX/microsoft-abi-typeid.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -emit-llvm -O1 -o - -triple=i386-pc-win32 %s -fexceptions -fcxx-exceptions | FileCheck %s +// RUN: %clang_cc1 -emit-llvm -O1 -o - -triple=i386-pc-win32 %s -fno-split-cold-code -fexceptions -fcxx-exceptions | FileCheck %s struct type_info; namespace std { using ::type_info; } diff --git a/clang/test/CodeGenCXX/nrvo.cpp b/clang/test/CodeGenCXX/nrvo.cpp --- a/clang/test/CodeGenCXX/nrvo.cpp +++ b/clang/test/CodeGenCXX/nrvo.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm -O1 -fno-experimental-new-pass-manager -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm -O1 -fno-experimental-new-pass-manager -fcxx-exceptions -fexceptions -std=c++03 -o - %s | FileCheck --check-prefixes=CHECK-EH,CHECK-EH-03 %s -// RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm -O1 -fno-experimental-new-pass-manager -fcxx-exceptions -fexceptions -std=c++11 -o - %s | FileCheck --check-prefixes=CHECK-EH,CHECK-EH-11 %s +// RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm -O1 -fno-split-cold-code -fno-experimental-new-pass-manager -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm -O1 -fno-split-cold-code -fno-experimental-new-pass-manager -fcxx-exceptions -fexceptions -std=c++03 -o - %s | FileCheck --check-prefixes=CHECK-EH,CHECK-EH-03 %s +// RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm -O1 -fno-split-cold-code -fno-experimental-new-pass-manager -fcxx-exceptions -fexceptions -std=c++11 -o - %s | FileCheck --check-prefixes=CHECK-EH,CHECK-EH-11 %s // Test code generation for the named return value optimization. class X { diff --git a/clang/test/CodeGenObjC/synchronized.m b/clang/test/CodeGenObjC/synchronized.m --- a/clang/test/CodeGenObjC/synchronized.m +++ b/clang/test/CodeGenObjC/synchronized.m @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -emit-llvm -triple i686-apple-darwin9 -fobjc-runtime=macosx-fragile-10.5 -o - %s -O2 | FileCheck %s +// RUN: %clang_cc1 -emit-llvm -triple i686-apple-darwin9 -fno-split-cold-code -fobjc-runtime=macosx-fragile-10.5 -o - %s -O2 | FileCheck %s @interface MyClass { diff --git a/clang/test/CodeGenObjCXX/exceptions-legacy.mm b/clang/test/CodeGenObjCXX/exceptions-legacy.mm --- a/clang/test/CodeGenObjCXX/exceptions-legacy.mm +++ b/clang/test/CodeGenObjCXX/exceptions-legacy.mm @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple i386-apple-darwin10 -fobjc-runtime=macosx-fragile-10.5 -emit-llvm -fexceptions -fobjc-exceptions -O2 -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple i386-apple-darwin10 -fno-split-cold-code -fobjc-runtime=macosx-fragile-10.5 -emit-llvm -fexceptions -fobjc-exceptions -O2 -o - %s | FileCheck %s // Test we maintain at least a basic amount of interoperation between // ObjC and C++ exceptions in the legacy runtime. diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -684,6 +684,10 @@ const std::function, bool VerifyEachPass, bool DebugLogging)> &C); + /// Enable or disable the hot/cold splitting optimization. By default, it is + /// disabled. + void setEnableHotColdSplitting(bool Enabled); + /// Add PGOInstrumenation passes for O0 only. void addPGOInstrPassesForO0(ModulePassManager &MPM, bool DebugLogging, bool RunProfileGen, bool IsCS, @@ -786,6 +790,9 @@ // AA callbacks SmallVector, 2> AAParsingCallbacks; + + // Tunable passes + bool SplitColdCode = false; }; /// This utility template takes care of adding require<> and invalidate<> diff --git a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h --- a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h +++ b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h @@ -167,6 +167,7 @@ bool VerifyInput; bool VerifyOutput; bool MergeFunctions; + bool SplitColdCode; bool PrepareForLTO; bool PrepareForThinLTO; bool PerformThinLTO; diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1231,7 +1231,7 @@ // Split out cold code. Splitting is done late to avoid hiding context from // other optimizations and inadvertently regressing performance. The tradeoff // is that this has a higher code size cost than splitting early. - if (EnableHotColdSplit && !LTOPreLink) + if ((EnableHotColdSplit || SplitColdCode) && !LTOPreLink) MPM.addPass(HotColdSplittingPass()); // LoopSink pass sinks instructions hoisted by LICM, which serves as a @@ -1620,7 +1620,7 @@ // Enable splitting late in the FullLTO post-link pipeline. This is done in // the same stage in the old pass manager (\ref addLateLTOOptimizationPasses). - if (EnableHotColdSplit) + if (EnableHotColdSplit || SplitColdCode) MPM.addPass(HotColdSplittingPass()); // Add late LTO optimization passes. @@ -2817,6 +2817,10 @@ return Error::success(); } +void PassBuilder::setEnableHotColdSplitting(bool Enabled) { + SplitColdCode = Enabled; +} + bool PassBuilder::isAAPassName(StringRef PassName) { #define MODULE_ALIAS_ANALYSIS(NAME, CREATE_PASS) \ if (PassName == NAME) \ diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -194,6 +194,7 @@ VerifyInput = false; VerifyOutput = false; MergeFunctions = false; + SplitColdCode = false; PrepareForLTO = false; EnablePGOInstrGen = false; EnablePGOCSInstrGen = false; @@ -865,7 +866,8 @@ // See comment in the new PM for justification of scheduling splitting at // this stage (\ref buildModuleSimplificationPipeline). - if (EnableHotColdSplit && !(PrepareForLTO || PrepareForThinLTO)) + if ((EnableHotColdSplit || SplitColdCode) && + !(PrepareForLTO || PrepareForThinLTO)) MPM.add(createHotColdSplittingPass()); if (MergeFunctions) @@ -1089,7 +1091,7 @@ legacy::PassManagerBase &PM) { // See comment in the new PM for justification of scheduling splitting at // this stage (\ref buildLTODefaultPipeline). - if (EnableHotColdSplit) + if (EnableHotColdSplit || SplitColdCode) PM.add(createHotColdSplittingPass()); // Delete basic blocks, which optimization passes may have killed.