Index: llvm/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/lib/Passes/PassBuilder.cpp +++ llvm/lib/Passes/PassBuilder.cpp @@ -658,6 +658,14 @@ // globals. MPM.addPass(DeadArgumentEliminationPass()); + // Split out cold code. Splitting is done before inlining because 1) the most + // common kinds of cold regions can (a) be found before inlining and (b) do + // not grow after inlining, and 2) inhibiting inlining of cold code improves + // code size & compile time. Split after Mem2Reg to make code model estimates + // more accurate, but before InstCombine to allow it to clean things up. + if (EnableHotColdSplit && Phase != ThinLTOPhase::PostLink) + MPM.addPass(HotColdSplittingPass()); + // Create a small function pass pipeline to cleanup after all the global // optimizations. FunctionPassManager GlobalCleanupPM(DebugLogging); @@ -723,11 +731,6 @@ MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor( buildFunctionSimplificationPipeline(Level, Phase, DebugLogging))); - // We only want to do hot cold splitting once for ThinLTO, during the - // post-link ThinLTO. - if (EnableHotColdSplit && Phase != ThinLTOPhase::PreLink) - MPM.addPass(HotColdSplittingPass()); - for (auto &C : CGSCCOptimizerLateEPCallbacks) C(MainCGPipeline, Level); Index: llvm/lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -420,6 +420,10 @@ void PassManagerBuilder::populateModulePassManager( legacy::PassManagerBase &MPM) { + // Whether this is a default or *LTO pre-link pipeline. The FullLTO post-link + // is handled separately, so just check this is not the ThinLTO post-link. + bool DefaultOrPreLinkPipeline = !PerformThinLTO; + if (!PGOSampleUse.empty()) { MPM.add(createPruneEHPass()); // In ThinLTO mode, when flattened profile is used, all the available @@ -513,6 +517,11 @@ MPM.add(createDeadArgEliminationPass()); // Dead argument elimination + // Split out cold code before inlining. See comment in the new PM + // (\ref buildModuleSimplificationPipeline). + if (EnableHotColdSplit && DefaultOrPreLinkPipeline) + MPM.add(createHotColdSplittingPass()); + addInstructionCombiningPass(MPM); // Clean up after IPCP & DAE addExtensionsToPM(EP_Peephole, MPM); MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE @@ -522,7 +531,7 @@ // profile annotation in backend more difficult. // PGO instrumentation is added during the compile phase for ThinLTO, do // not run it a second time - if (!PerformThinLTO && !PrepareForThinLTOUsingPGOSampleProfile) + if (DefaultOrPreLinkPipeline && !PrepareForThinLTOUsingPGOSampleProfile) addPGOInstrPasses(MPM); // We add a module alias analysis pass here. In part due to bugs in the @@ -737,9 +746,6 @@ // flattening of blocks. MPM.add(createDivRemPairsPass()); - if (EnableHotColdSplit) - MPM.add(createHotColdSplittingPass()); - // LoopSink (and other loop passes since the last simplifyCFG) might have // resulted in single-entry-single-exit or empty blocks. Clean up the CFG. MPM.add(createCFGSimplificationPass()); Index: llvm/test/Other/X86/lto-hot-cold-split.ll =================================================================== --- /dev/null +++ llvm/test/Other/X86/lto-hot-cold-split.ll @@ -0,0 +1,10 @@ +; RUN: opt -module-summary %s -o %t.bc +; RUN: llvm-lto -hot-cold-split=true -thinlto-action=run %t.bc -debug-pass=Structure 2>&1 | FileCheck %s -check-prefix=OLDPM-THINLTO-POSTLINK-Os + +; REQUIRES: asserts + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; OLDPM-THINLTO-POSTLINK-Os-LABEL: Pass Arguments +; OLDPM-THINLTO-POSTLINK-Os-NOT: Hot Cold Splitting Index: llvm/test/Other/new-pm-thinlto-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-defaults.ll +++ llvm/test/Other/new-pm-thinlto-defaults.ll @@ -26,10 +26,6 @@ ; RUN: opt -disable-verify -debug-pass-manager -new-pm-debug-info-for-profiling \ ; RUN: -passes='thinlto-pre-link,name-anon-globals' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefixes=CHECK-DIS,CHECK-O,CHECK-O2,CHECK-PRELINK-O,CHECK-PRELINK-O2 -; Enabling the hot-cold-split pass should not affect the ThinLTO pre-link -; RUN: opt -disable-verify -debug-pass-manager \ -; RUN: -passes='thinlto-pre-link,name-anon-globals' -hot-cold-split -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS,CHECK-PRELINK-O2 ; ; Postlink pipelines: ; RUN: opt -disable-verify -debug-pass-manager \ Index: llvm/test/Other/opt-hot-cold-split.ll =================================================================== --- llvm/test/Other/opt-hot-cold-split.ll +++ llvm/test/Other/opt-hot-cold-split.ll @@ -1,296 +1,24 @@ -; RUN: opt -mtriple=x86_64-- -Os -hotcoldsplit -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s +; RUN: opt -mtriple=x86_64-- -Os -hot-cold-split=true -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s -check-prefix=DEFAULT-Os +; RUN: opt -mtriple=x86_64-- -Os -hot-cold-split=true -passes='lto-pre-link' -debug-pass-manager < %s -o /dev/null 2>&1 | FileCheck %s -check-prefix=LTO-PRELINK-Os +; RUN: opt -mtriple=x86_64-- -Os -hot-cold-split=true -passes='thinlto-pre-link' -debug-pass-manager < %s -o /dev/null 2>&1 | FileCheck %s -check-prefix=THINLTO-PRELINK-Os +; RUN: opt -mtriple=x86_64-- -Os -hot-cold-split=true -passes='thinlto' -debug-pass-manager < %s -o /dev/null 2>&1 | FileCheck %s -check-prefix=THINLTO-POSTLINK-Os + ; REQUIRES: asserts -; CHECK-LABEL: Pass Arguments: -; CHECK-NEXT: Target Transform Information -; CHECK-NEXT: Type-Based Alias Analysis -; CHECK-NEXT: Scoped NoAlias Alias Analysis -; CHECK-NEXT: Assumption Cache Tracker -; CHECK-NEXT: Target Library Information -; CHECK-NEXT: FunctionPass Manager -; CHECK-NEXT: Module Verifier -; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (pre inlining) -; CHECK-NEXT: Simplify the CFG -; CHECK-NEXT: Dominator Tree Construction -; CHECK-NEXT: SROA -; CHECK-NEXT: Early CSE -; CHECK-NEXT: Lower 'expect' Intrinsics -; CHECK-NEXT: Pass Arguments: -; CHECK-NEXT: Target Library Information -; CHECK-NEXT: Target Transform Information -; Target Pass Configuration -; CHECK: Type-Based Alias Analysis -; CHECK-NEXT: Scoped NoAlias Alias Analysis -; CHECK-NEXT: Assumption Cache Tracker -; CHECK-NEXT: Profile summary info -; CHECK-NEXT: ModulePass Manager -; CHECK-NEXT: Force set function attributes -; CHECK-NEXT: Infer set function attributes -; CHECK-NEXT: Interprocedural Sparse Conditional Constant Propagation -; CHECK-NEXT: Unnamed pass: implement Pass::getPassName() -; CHECK-NEXT: Called Value Propagation -; CHECK-NEXT: Global Variable Optimizer -; CHECK-NEXT: Unnamed pass: implement Pass::getPassName() -; CHECK-NEXT: FunctionPass Manager -; CHECK-NEXT: Dominator Tree Construction -; CHECK-NEXT: Promote Memory to Register -; CHECK-NEXT: Dead Argument Elimination -; CHECK-NEXT: FunctionPass Manager -; CHECK-NEXT: Dominator Tree Construction -; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) -; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Natural Loop Information -; CHECK-NEXT: Lazy Branch Probability Analysis -; CHECK-NEXT: Lazy Block Frequency Analysis -; CHECK-NEXT: Optimization Remark Emitter -; CHECK-NEXT: Combine redundant instructions -; CHECK-NEXT: Simplify the CFG -; CHECK-NEXT: CallGraph Construction -; CHECK-NEXT: Globals Alias Analysis -; CHECK-NEXT: Call Graph SCC Pass Manager -; CHECK-NEXT: Remove unused exception handling info -; CHECK-NEXT: Function Integration/Inlining -; CHECK-NEXT: Deduce function attributes -; CHECK-NEXT: FunctionPass Manager -; CHECK-NEXT: Dominator Tree Construction -; CHECK-NEXT: SROA -; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) -; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Memory SSA -; CHECK-NEXT: Early CSE w/ MemorySSA -; CHECK-NEXT: Speculatively execute instructions if target has divergent branches -; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) -; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Lazy Value Information Analysis -; CHECK-NEXT: Jump Threading -; CHECK-NEXT: Value Propagation -; CHECK-NEXT: Simplify the CFG -; CHECK-NEXT: Dominator Tree Construction -; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) -; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Natural Loop Information -; CHECK-NEXT: Lazy Branch Probability Analysis -; CHECK-NEXT: Lazy Block Frequency Analysis -; CHECK-NEXT: Optimization Remark Emitter -; CHECK-NEXT: Combine redundant instructions -; CHECK-NEXT: Optimization Remark Emitter -; CHECK-NEXT: Tail Call Elimination -; CHECK-NEXT: Simplify the CFG -; CHECK-NEXT: Reassociate expressions -; CHECK-NEXT: Dominator Tree Construction -; CHECK-NEXT: Natural Loop Information -; CHECK-NEXT: Canonicalize natural loops -; CHECK-NEXT: LCSSA Verifier -; CHECK-NEXT: Loop-Closed SSA Form Pass -; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) -; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Scalar Evolution Analysis -; CHECK-NEXT: Loop Pass Manager -; CHECK-NEXT: Rotate Loops -; CHECK-NEXT: Loop Invariant Code Motion -; CHECK-NEXT: Unswitch loops -; CHECK-NEXT: Simplify the CFG -; CHECK-NEXT: Dominator Tree Construction -; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) -; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Natural Loop Information -; CHECK-NEXT: Lazy Branch Probability Analysis -; CHECK-NEXT: Lazy Block Frequency Analysis -; CHECK-NEXT: Optimization Remark Emitter -; CHECK-NEXT: Combine redundant instructions -; CHECK-NEXT: Canonicalize natural loops -; CHECK-NEXT: LCSSA Verifier -; CHECK-NEXT: Loop-Closed SSA Form Pass -; CHECK-NEXT: Scalar Evolution Analysis -; CHECK-NEXT: Loop Pass Manager -; CHECK-NEXT: Induction Variable Simplification -; CHECK-NEXT: Recognize loop idioms -; CHECK-NEXT: Delete dead loops -; CHECK-NEXT: Unroll loops -; CHECK-NEXT: MergedLoadStoreMotion -; CHECK-NEXT: Phi Values Analysis -; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) -; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Memory Dependence Analysis -; CHECK-NEXT: Lazy Branch Probability Analysis -; CHECK-NEXT: Lazy Block Frequency Analysis -; CHECK-NEXT: Optimization Remark Emitter -; CHECK-NEXT: Global Value Numbering -; CHECK-NEXT: Phi Values Analysis -; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) -; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Memory Dependence Analysis -; CHECK-NEXT: MemCpy Optimization -; CHECK-NEXT: Sparse Conditional Constant Propagation -; CHECK-NEXT: Demanded bits analysis -; CHECK-NEXT: Bit-Tracking Dead Code Elimination -; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) -; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Natural Loop Information -; CHECK-NEXT: Lazy Branch Probability Analysis -; CHECK-NEXT: Lazy Block Frequency Analysis -; CHECK-NEXT: Optimization Remark Emitter -; CHECK-NEXT: Combine redundant instructions -; CHECK-NEXT: Lazy Value Information Analysis -; CHECK-NEXT: Jump Threading -; CHECK-NEXT: Value Propagation -; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) -; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Phi Values Analysis -; CHECK-NEXT: Memory Dependence Analysis -; CHECK-NEXT: Dead Store Elimination -; CHECK-NEXT: Natural Loop Information -; CHECK-NEXT: Canonicalize natural loops -; CHECK-NEXT: LCSSA Verifier -; CHECK-NEXT: Loop-Closed SSA Form Pass -; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) -; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Scalar Evolution Analysis -; CHECK-NEXT: Loop Pass Manager -; CHECK-NEXT: Loop Invariant Code Motion -; CHECK-NEXT: Post-Dominator Tree Construction -; CHECK-NEXT: Aggressive Dead Code Elimination -; CHECK-NEXT: Simplify the CFG -; CHECK-NEXT: Dominator Tree Construction -; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) -; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Natural Loop Information -; CHECK-NEXT: Lazy Branch Probability Analysis -; CHECK-NEXT: Lazy Block Frequency Analysis -; CHECK-NEXT: Optimization Remark Emitter -; CHECK-NEXT: Combine redundant instructions -; CHECK-NEXT: A No-Op Barrier Pass -; CHECK-NEXT: Eliminate Available Externally Globals -; CHECK-NEXT: CallGraph Construction -; CHECK-NEXT: Deduce function attributes in RPO -; CHECK-NEXT: Global Variable Optimizer -; CHECK-NEXT: Unnamed pass: implement Pass::getPassName() -; CHECK-NEXT: Dead Global Elimination -; CHECK-NEXT: CallGraph Construction -; CHECK-NEXT: Globals Alias Analysis -; CHECK-NEXT: FunctionPass Manager -; CHECK-NEXT: Float to int -; CHECK-NEXT: Dominator Tree Construction -; CHECK-NEXT: Natural Loop Information -; CHECK-NEXT: Canonicalize natural loops -; CHECK-NEXT: LCSSA Verifier -; CHECK-NEXT: Loop-Closed SSA Form Pass -; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) -; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Scalar Evolution Analysis -; CHECK-NEXT: Loop Pass Manager -; CHECK-NEXT: Rotate Loops -; CHECK-NEXT: Loop Access Analysis -; CHECK-NEXT: Lazy Branch Probability Analysis -; CHECK-NEXT: Lazy Block Frequency Analysis -; CHECK-NEXT: Optimization Remark Emitter -; CHECK-NEXT: Loop Distribution -; CHECK-NEXT: Branch Probability Analysis -; CHECK-NEXT: Block Frequency Analysis -; CHECK-NEXT: Scalar Evolution Analysis -; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) -; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Loop Access Analysis -; CHECK-NEXT: Demanded bits analysis -; CHECK-NEXT: Lazy Branch Probability Analysis -; CHECK-NEXT: Lazy Block Frequency Analysis -; CHECK-NEXT: Optimization Remark Emitter -; CHECK-NEXT: Loop Vectorization -; CHECK-NEXT: Canonicalize natural loops -; CHECK-NEXT: Scalar Evolution Analysis -; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Loop Access Analysis -; CHECK-NEXT: Loop Load Elimination -; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) -; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Lazy Branch Probability Analysis -; CHECK-NEXT: Lazy Block Frequency Analysis -; CHECK-NEXT: Optimization Remark Emitter -; CHECK-NEXT: Combine redundant instructions -; CHECK-NEXT: Simplify the CFG -; CHECK-NEXT: Dominator Tree Construction -; CHECK-NEXT: Natural Loop Information -; CHECK-NEXT: Scalar Evolution Analysis -; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) -; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Demanded bits analysis -; CHECK-NEXT: Lazy Branch Probability Analysis -; CHECK-NEXT: Lazy Block Frequency Analysis -; CHECK-NEXT: Optimization Remark Emitter -; CHECK-NEXT: SLP Vectorizer -; CHECK-NEXT: Optimization Remark Emitter -; CHECK-NEXT: Combine redundant instructions -; CHECK-NEXT: Canonicalize natural loops -; CHECK-NEXT: LCSSA Verifier -; CHECK-NEXT: Loop-Closed SSA Form Pass -; CHECK-NEXT: Scalar Evolution Analysis -; CHECK-NEXT: Loop Pass Manager -; CHECK-NEXT: Unroll loops -; CHECK-NEXT: Lazy Branch Probability Analysis -; CHECK-NEXT: Lazy Block Frequency Analysis -; CHECK-NEXT: Optimization Remark Emitter -; CHECK-NEXT: Combine redundant instructions -; CHECK-NEXT: Canonicalize natural loops -; CHECK-NEXT: LCSSA Verifier -; CHECK-NEXT: Loop-Closed SSA Form Pass -; CHECK-NEXT: Scalar Evolution Analysis -; CHECK-NEXT: Loop Pass Manager -; CHECK-NEXT: Loop Invariant Code Motion -; CHECK-NEXT: Lazy Branch Probability Analysis -; CHECK-NEXT: Lazy Block Frequency Analysis -; CHECK-NEXT: Optimization Remark Emitter -; CHECK-NEXT: Warn about non-applied transformations -; CHECK-NEXT: Alignment from assumptions -; CHECK-NEXT: Strip Unused Function Prototypes -; CHECK-NEXT: Dead Global Elimination -; CHECK-NEXT: Merge Duplicate Global Constants -; CHECK-NEXT: FunctionPass Manager -; CHECK-NEXT: Dominator Tree Construction -; CHECK-NEXT: Natural Loop Information -; CHECK-NEXT: Branch Probability Analysis -; CHECK-NEXT: Block Frequency Analysis -; CHECK-NEXT: Canonicalize natural loops -; CHECK-NEXT: LCSSA Verifier -; CHECK-NEXT: Loop-Closed SSA Form Pass -; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) -; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Scalar Evolution Analysis -; CHECK-NEXT: Branch Probability Analysis -; CHECK-NEXT: Block Frequency Analysis -; CHECK-NEXT: Loop Pass Manager -; CHECK-NEXT: Loop Sink -; CHECK-NEXT: Lazy Branch Probability Analysis -; CHECK-NEXT: Lazy Block Frequency Analysis -; CHECK-NEXT: Optimization Remark Emitter -; CHECK-NEXT: Remove redundant instructions -; CHECK-NEXT: Hoist/decompose integer division and remainder -; CHECK-NEXT: Simplify the CFG -; CHECK-NEXT: Hot Cold Splitting -; CHECK-NEXT: Unnamed pass: implement Pass::getPassName() -; CHECK-NEXT: FunctionPass Manager -; CHECK-NEXT: Module Verifier -; CHECK-NEXT: Bitcode Writer -; CHECK-NEXT: Pass Arguments: -domtree -; CHECK-NEXT: FunctionPass Manager -; CHECK-NEXT: Dominator Tree Construction -; CHECK-NEXT: Pass Arguments: -targetlibinfo -domtree -loops -branch-prob -block-freq -; CHECK-NEXT: Target Library Information -; CHECK-NEXT: FunctionPass Manager -; CHECK-NEXT: Dominator Tree Construction -; CHECK-NEXT: Natural Loop Information -; CHECK-NEXT: Branch Probability Analysis -; CHECK-NEXT: Block Frequency Analysis -; CHECK-NEXT: Pass Arguments: -targetlibinfo -domtree -loops -branch-prob -block-freq -; CHECK-NEXT: Target Library Information -; CHECK-NEXT: FunctionPass Manager -; CHECK-NEXT: Dominator Tree Construction -; CHECK-NEXT: Natural Loop Information -; CHECK-NEXT: Branch Probability Analysis -; CHECK-NEXT: Block Frequency Analysis -; CHECK-NEXT: Pass Arguments: -targetlibinfo -domtree -loops -branch-prob -block-freq -; CHECK-NEXT: Target Library Information -; CHECK-NEXT: FunctionPass Manager -; CHECK-NEXT: Dominator Tree Construction -; CHECK-NEXT: Natural Loop Information -; CHECK-NEXT: Branch Probability Analysis -; CHECK-NEXT: Block Frequency Analysis +; Splitting should occur after Mem2Reg and should be followed by InstCombine. + +; DEFAULT-Os: Promote Memory to Register +; DEFAULT-Os: Hot Cold Splitting +; DEFAULT-Os: Combine redundant instructions + +; LTO-PRELINK-Os-LABEL: Starting llvm::Module pass manager run. +; LTO-PRELINK-Os: Running pass: ModuleToFunctionPassAdaptor +; LTO-PRELINK-Os: Running pass: HotColdSplittingPass +; LTO-PRELINK-Os: Running pass: ModuleToFunctionPassAdaptor > + +; THINLTO-PRELINK-Os-LABEL: Running analysis: PassInstrumentationAnalysis +; THINLTO-PRELINK-Os: Running pass: ModuleToFunctionPassAdaptor +; THINLTO-PRELINK-Os: Running pass: HotColdSplittingPass +; THINLTO-PRELINK-Os: Running pass: ModuleToFunctionPassAdaptor > + +; THINLTO-POSTLINK-Os-NOT: HotColdSplitting