Index: llvm/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/lib/Passes/PassBuilder.cpp +++ llvm/lib/Passes/PassBuilder.cpp @@ -703,14 +703,6 @@ if (EnableSyntheticCounts && !PGOOpt) MPM.addPass(SyntheticCountsPropagation()); - // Split out cold code. Splitting is done before inlining because 1) the most - // common kinds of cold regions can (a) be found before inlining and (b) do - // not grow after inlining, and 2) inhibiting inlining of cold code improves - // code size & compile time. Split after Mem2Reg to make code model estimates - // more accurate, but before InstCombine to allow it to clean things up. - if (EnableHotColdSplit && Phase != ThinLTOPhase::PostLink) - MPM.addPass(HotColdSplittingPass()); - // Require the GlobalsAA analysis for the module so we can query it within // the CGSCC pipeline. MPM.addPass(RequireAnalysisPass()); @@ -766,6 +758,12 @@ createModuleToPostOrderCGSCCPassAdaptor(createDevirtSCCRepeatedPass( std::move(MainCGPipeline), MaxDevirtIterations))); + // Split out cold code. Splitting is done late to avoid hiding context from + // other optimizations and inadvertently regressing performance. The tradeoff + // is that this has a higher code size cost than splitting early. + if (EnableHotColdSplit && Phase != ThinLTOPhase::PreLink) + MPM.addPass(HotColdSplittingPass()); + return MPM; } Index: llvm/lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -529,11 +529,6 @@ if (DefaultOrPreLinkPipeline && !PrepareForThinLTOUsingPGOSampleProfile) addPGOInstrPasses(MPM); - // Split out cold code before inlining. See comment in the new PM - // (\ref buildModuleSimplificationPipeline). - if (EnableHotColdSplit && DefaultOrPreLinkPipeline) - MPM.add(createHotColdSplittingPass()); - // We add a module alias analysis pass here. In part due to bugs in the // analysis infrastructure this "works" in that the analysis stays alive // for the entire SCC pass run below. @@ -730,6 +725,11 @@ MPM.add(createConstantMergePass()); // Merge dup global constants } + // See comment in the new PM for justification of scheduling splitting at + // this stage (\ref buildModuleSimplificationPipeline). + if (EnableHotColdSplit && !(PrepareForLTO || PrepareForThinLTO)) + MPM.add(createHotColdSplittingPass()); + if (MergeFunctions) MPM.add(createMergeFunctionsPass()); @@ -918,6 +918,11 @@ void PassManagerBuilder::addLateLTOOptimizationPasses( legacy::PassManagerBase &PM) { + // See comment in the new PM for justification of scheduling splitting at + // this stage (\ref buildModuleSimplificationPipeline). + if (EnableHotColdSplit) + PM.add(createHotColdSplittingPass()); + // Delete basic blocks, which optimization passes may have killed. PM.add(createCFGSimplificationPass()); Index: llvm/test/Other/X86/lto-hot-cold-split.ll =================================================================== --- llvm/test/Other/X86/lto-hot-cold-split.ll +++ llvm/test/Other/X86/lto-hot-cold-split.ll @@ -1,10 +1,10 @@ ; RUN: opt -module-summary %s -o %t.bc -; RUN: llvm-lto -hot-cold-split=true -thinlto-action=run %t.bc -debug-pass=Structure 2>&1 | FileCheck %s -check-prefix=OLDPM-THINLTO-POSTLINK-Os +; RUN: llvm-lto -hot-cold-split=true -thinlto-action=run %t.bc -debug-pass=Structure 2>&1 | FileCheck %s -check-prefix=OLDPM-ANYLTO-POSTLINK-Os +; RUN: llvm-lto -hot-cold-split=true %t.bc -debug-pass=Structure 2>&1 | FileCheck %s -check-prefix=OLDPM-ANYLTO-POSTLINK-Os ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -; OLDPM-THINLTO-POSTLINK-Os-LABEL: Pass Arguments -; OLDPM-THINLTO-POSTLINK-Os-NOT: Hot Cold Splitting +; OLDPM-ANYLTO-POSTLINK-Os: Hot Cold Splitting Index: llvm/test/Other/new-pm-pgo.ll =================================================================== --- llvm/test/Other/new-pm-pgo.ll +++ llvm/test/Other/new-pm-pgo.ll @@ -13,7 +13,6 @@ ; GEN: Running pass: PGOInstrumentationGen ; USE: Running pass: PGOInstrumentationUse ; USE: Running pass: PGOIndirectCallPromotion -; SPLIT: Running pass: HotColdSplittingPass ; USE: Running pass: PGOMemOPSizeOpt ; SAMPLE_USE_O: Running pass: ModuleToFunctionPassAdaptor<{{.*}}AddDiscriminatorsPass{{.*}}> ; SAMPLE_USE_PRE_LINK: Running pass: ModuleToFunctionPassAdaptor<{{.*}}AddDiscriminatorsPass{{.*}}> @@ -27,6 +26,7 @@ ; SAMPLE_USE_POST_LINK-NOT: Running pass: GlobalOptPass ; SAMPLE_USE_POST_LINK: Running pass: PGOIndirectCallPromotion ; SAMPLE_GEN: Running pass: ModuleToFunctionPassAdaptor<{{.*}}AddDiscriminatorsPass{{.*}}> +; SPLIT: Running pass: HotColdSplittingPass define void @foo() { ret void Index: llvm/test/Other/opt-hot-cold-split.ll =================================================================== --- llvm/test/Other/opt-hot-cold-split.ll +++ llvm/test/Other/opt-hot-cold-split.ll @@ -5,18 +5,19 @@ ; REQUIRES: asserts -; Splitting should occur after Mem2Reg and should be followed by InstCombine. +; Splitting should occur late. -; DEFAULT-Os: Promote Memory to Register ; DEFAULT-Os: Hot Cold Splitting -; DEFAULT-Os: Combine redundant instructions +; DEFAULT-Os: Simplify the CFG +; The new pass manager intentionally does not provide a way to differentiate +; between an FullLTO prelink and a non-LTO pipeline. Therefore, expect splitting +; to occur late in the FullLTO prelink and in the postlink. ; LTO-PRELINK-Os-LABEL: Starting llvm::Module pass manager run. ; LTO-PRELINK-Os: Running pass: {{.*}}PromotePass ; LTO-PRELINK-Os: Running pass: HotColdSplittingPass ; THINLTO-PRELINK-Os-LABEL: Running analysis: PassInstrumentationAnalysis -; THINLTO-PRELINK-Os: Running pass: {{.*}}PromotePass -; THINLTO-PRELINK-Os: Running pass: HotColdSplittingPass +; THINLTO-PRELINK-Os-NOT: Running pass: HotColdSplittingPass -; THINLTO-POSTLINK-Os-NOT: HotColdSplitting +; THINLTO-POSTLINK-Os: HotColdSplitting Index: llvm/test/Other/pass-pipelines.ll =================================================================== --- llvm/test/Other/pass-pipelines.ll +++ llvm/test/Other/pass-pipelines.ll @@ -41,7 +41,6 @@ ; PGOUSE: Function Integration/Inlining ; PGOUSE: PGOInstrumentationUsePass ; PGOUSE: PGOIndirectCallPromotion -; SPLIT: Hot Cold Splitting ; PGOUSE: CallGraph Construction ; CHECK-O2-NEXT: Globals Alias Analysis ; CHECK-O2-NEXT: Call Graph SCC Pass Manager @@ -100,6 +99,7 @@ ; the runtime unrolling though. ; CHECK-O2: Loop Pass Manager ; CHECK-O2-NEXT: Loop Invariant Code Motion +; SPLIT: Hot Cold Splitting ; CHECK-O2: FunctionPass Manager ; CHECK-O2: Loop Pass Manager ; CHECK-O2-NEXT: Loop Sink