diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -620,6 +620,7 @@ PMBuilder.SizeLevel = CodeGenOpts.OptimizeSize; PMBuilder.SLPVectorize = CodeGenOpts.VectorizeSLP; PMBuilder.LoopVectorize = CodeGenOpts.VectorizeLoop; + PMBuilder.CallGraphProfile = CodeGenOpts.CallGraphProfile; PMBuilder.DisableUnrollLoops = !CodeGenOpts.UnrollLoops; // Loop interleaving in the loop vectorizer has historically been set to be diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -102,6 +102,7 @@ void initializeCFIInstrInserterPass(PassRegistry&); void initializeCFLAndersAAWrapperPassPass(PassRegistry&); void initializeCFLSteensAAWrapperPassPass(PassRegistry&); +void initializeCGProfileLegacyPassPass(PassRegistry &); void initializeCallGraphDOTPrinterPass(PassRegistry&); void initializeCallGraphPrinterLegacyPassPass(PassRegistry&); void initializeCallGraphViewerPass(PassRegistry&); diff --git a/llvm/include/llvm/Transforms/IPO.h b/llvm/include/llvm/Transforms/IPO.h --- a/llvm/include/llvm/Transforms/IPO.h +++ b/llvm/include/llvm/Transforms/IPO.h @@ -282,6 +282,8 @@ ModulePass *createWriteThinLTOBitcodePass(raw_ostream &Str, raw_ostream *ThinLinkOS = nullptr); +ModulePass *createCGProfileLegacyPass(); + } // End llvm namespace #endif diff --git a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h --- a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h +++ b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h @@ -156,6 +156,7 @@ bool DisableTailCalls; bool DisableUnrollLoops; + bool CallGraphProfile; bool SLPVectorize; bool LoopVectorize; bool LoopsInterleaved; diff --git a/llvm/include/llvm/Transforms/Instrumentation/CGProfile.h b/llvm/include/llvm/Transforms/Instrumentation/CGProfile.h --- a/llvm/include/llvm/Transforms/Instrumentation/CGProfile.h +++ b/llvm/include/llvm/Transforms/Instrumentation/CGProfile.h @@ -19,11 +19,6 @@ class CGProfilePass : public PassInfoMixin { public: PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); - -private: - void addModuleFlags( - Module &M, - MapVector, uint64_t> &Counts) const; }; } // end namespace llvm diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -248,10 +248,6 @@ EnableCHR("enable-chr-npm", cl::init(true), cl::Hidden, cl::desc("Enable control height reduction optimization (CHR)")); -static cl::opt EnableCallGraphProfile( - "enable-npm-call-graph-profile", cl::init(true), cl::Hidden, - cl::desc("Enable call graph profile pass for the new PM (default = on)")); - /// Flag to enable inline deferral during PGO. static cl::opt EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true), @@ -267,7 +263,6 @@ Coroutines = false; LicmMssaOptCap = SetLicmMssaOptCap; LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap; - CallGraphProfile = EnableCallGraphProfile; } extern cl::opt EnableHotColdSplit; diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -834,6 +834,10 @@ if (MergeFunctions) MPM.add(createMergeFunctionsPass()); + // Add Module flag "CG Profile" based on Branch Frequency Information. + if (CallGraphProfile) + MPM.add(createCGProfileLegacyPass()); + // LoopSink pass sinks instructions hoisted by LICM, which serves as a // canonicalization pass that enables other optimizations. As a result, // LoopSink pass needs to be a very late IR pass to avoid undoing LICM diff --git a/llvm/lib/Transforms/Instrumentation/CGProfile.cpp b/llvm/lib/Transforms/Instrumentation/CGProfile.cpp --- a/llvm/lib/Transforms/Instrumentation/CGProfile.cpp +++ b/llvm/lib/Transforms/Instrumentation/CGProfile.cpp @@ -15,17 +15,42 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/PassManager.h" +#include "llvm/InitializePasses.h" #include "llvm/ProfileData/InstrProf.h" +#include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Instrumentation.h" #include using namespace llvm; -PreservedAnalyses CGProfilePass::run(Module &M, ModuleAnalysisManager &MAM) { +static bool +addModuleFlags(Module &M, + MapVector, uint64_t> &Counts) { + if (Counts.empty()) + return false; + + LLVMContext &Context = M.getContext(); + MDBuilder MDB(Context); + std::vector Nodes; + + for (auto E : Counts) { + Metadata *Vals[] = {ValueAsMetadata::get(E.first.first), + ValueAsMetadata::get(E.first.second), + MDB.createConstant(ConstantInt::get( + Type::getInt64Ty(Context), E.second))}; + Nodes.push_back(MDNode::get(Context, Vals)); + } + + M.addModuleFlag(Module::Append, "CG Profile", MDNode::get(Context, Nodes)); + return true; +} + +static bool +runCGProfilePass(Module &M, + function_ref GetBFI, + function_ref GetTTI) { MapVector, uint64_t> Counts; - FunctionAnalysisManager &FAM = - MAM.getResult(M).getManager(); InstrProfSymtab Symtab; auto UpdateCounts = [&](TargetTransformInfo &TTI, Function *F, Function *CalledF, uint64_t NewCount) { @@ -35,14 +60,14 @@ Count = SaturatingAdd(Count, NewCount); }; // Ignore error here. Indirect calls are ignored if this fails. - (void)(bool)Symtab.create(M); + (void)(bool) Symtab.create(M); for (auto &F : M) { if (F.isDeclaration()) continue; - auto &BFI = FAM.getResult(F); + auto &BFI = GetBFI(F); if (BFI.getEntryFreq() == 0) continue; - TargetTransformInfo &TTI = FAM.getResult(F); + TargetTransformInfo &TTI = GetTTI(F); for (auto &BB : F) { Optional BBCount = BFI.getBlockProfileCount(&BB); if (!BBCount) @@ -69,28 +94,56 @@ } } - addModuleFlags(M, Counts); - - return PreservedAnalyses::all(); + return addModuleFlags(M, Counts); } -void CGProfilePass::addModuleFlags( - Module &M, - MapVector, uint64_t> &Counts) const { - if (Counts.empty()) - return; +namespace { +struct CGProfileLegacyPass final : public ModulePass { + static char ID; + CGProfileLegacyPass() : ModulePass(ID) { + initializeCGProfileLegacyPassPass(*PassRegistry::getPassRegistry()); + } - LLVMContext &Context = M.getContext(); - MDBuilder MDB(Context); - std::vector Nodes; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addRequired(); + } - for (auto E : Counts) { - Metadata *Vals[] = {ValueAsMetadata::get(E.first.first), - ValueAsMetadata::get(E.first.second), - MDB.createConstant(ConstantInt::get( - Type::getInt64Ty(Context), E.second))}; - Nodes.push_back(MDNode::get(Context, Vals)); + bool runOnModule(Module &M) override { + auto GetBFI = [this](Function &F) -> BlockFrequencyInfo & { + return this->getAnalysis(F).getBFI(); + }; + auto GetTTI = [this](Function &F) -> TargetTransformInfo & { + return this->getAnalysis().getTTI(F); + }; + + return runCGProfilePass(M, GetBFI, GetTTI); } +}; - M.addModuleFlag(Module::Append, "CG Profile", MDNode::get(Context, Nodes)); +} // namespace + +char CGProfileLegacyPass::ID = 0; + +INITIALIZE_PASS(CGProfileLegacyPass, "cg-profile", "Call Graph Profile", false, + false) + +ModulePass *llvm::createCGProfileLegacyPass() { + return new CGProfileLegacyPass(); +} + +PreservedAnalyses CGProfilePass::run(Module &M, ModuleAnalysisManager &MAM) { + FunctionAnalysisManager &FAM = + MAM.getResult(M).getManager(); + auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & { + return FAM.getResult(F); + }; + auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & { + return FAM.getResult(F); + }; + + runCGProfilePass(M, GetBFI, GetTTI); + + return PreservedAnalyses::all(); } diff --git a/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp b/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp --- a/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp @@ -112,6 +112,7 @@ initializePGOInstrumentationUseLegacyPassPass(Registry); initializePGOIndirectCallPromotionLegacyPassPass(Registry); initializePGOMemOPSizeOptLegacyPassPass(Registry); + initializeCGProfileLegacyPassPass(Registry); initializeInstrOrderFileLegacyPassPass(Registry); initializeInstrProfilingLegacyPassPass(Registry); initializeMemorySanitizerLegacyPassPass(Registry); diff --git a/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll b/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll --- a/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll @@ -276,6 +276,13 @@ ; GCN-O1-NEXT: Warn about non-applied transformations ; GCN-O1-NEXT: Alignment from assumptions ; GCN-O1-NEXT: Strip Unused Function Prototypes +; GCN-O1-NEXT: Call Graph Profile +; GCN-O1-NEXT: FunctionPass Manager +; GCN-O1-NEXT: Dominator Tree Construction +; GCN-O1-NEXT: Natural Loop Information +; GCN-O1-NEXT: Post-Dominator Tree Construction +; GCN-O1-NEXT: Branch Probability Analysis +; GCN-O1-NEXT: Block Frequency Analysis ; GCN-O1-NEXT: FunctionPass Manager ; GCN-O1-NEXT: Dominator Tree Construction ; GCN-O1-NEXT: Natural Loop Information @@ -623,6 +630,13 @@ ; GCN-O2-NEXT: Strip Unused Function Prototypes ; GCN-O2-NEXT: Dead Global Elimination ; GCN-O2-NEXT: Merge Duplicate Global Constants +; GCN-O2-NEXT: Call Graph Profile +; GCN-O2-NEXT: FunctionPass Manager +; GCN-O2-NEXT: Dominator Tree Construction +; GCN-O2-NEXT: Natural Loop Information +; GCN-O2-NEXT: Post-Dominator Tree Construction +; GCN-O2-NEXT: Branch Probability Analysis +; GCN-O2-NEXT: Block Frequency Analysis ; GCN-O2-NEXT: FunctionPass Manager ; GCN-O2-NEXT: Dominator Tree Construction ; GCN-O2-NEXT: Natural Loop Information @@ -975,6 +989,13 @@ ; GCN-O3-NEXT: Strip Unused Function Prototypes ; GCN-O3-NEXT: Dead Global Elimination ; GCN-O3-NEXT: Merge Duplicate Global Constants +; GCN-O3-NEXT: Call Graph Profile +; GCN-O3-NEXT: FunctionPass Manager +; GCN-O3-NEXT: Dominator Tree Construction +; GCN-O3-NEXT: Natural Loop Information +; GCN-O3-NEXT: Post-Dominator Tree Construction +; GCN-O3-NEXT: Branch Probability Analysis +; GCN-O3-NEXT: Block Frequency Analysis ; GCN-O3-NEXT: FunctionPass Manager ; GCN-O3-NEXT: Dominator Tree Construction ; GCN-O3-NEXT: Natural Loop Information diff --git a/llvm/test/Instrumentation/cgprofile.ll b/llvm/test/Instrumentation/cgprofile.ll --- a/llvm/test/Instrumentation/cgprofile.ll +++ b/llvm/test/Instrumentation/cgprofile.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -passes cg-profile -S | FileCheck %s +; RUN: opt < %s -cg-profile -S | FileCheck %s declare void @b() diff --git a/llvm/test/Other/new-pm-cgprofile.ll b/llvm/test/Other/new-pm-cgprofile.ll --- a/llvm/test/Other/new-pm-cgprofile.ll +++ b/llvm/test/Other/new-pm-cgprofile.ll @@ -1,6 +1,6 @@ ; RUN: opt -debug-pass-manager -passes='default' %s 2>&1 |FileCheck %s --check-prefixes=DEFAULT -; RUN: opt -debug-pass-manager -passes='default' -enable-npm-call-graph-profile=0 %s 2>&1 |FileCheck %s --check-prefixes=OFF -; RUN: opt -debug-pass-manager -passes='default' -enable-npm-call-graph-profile=1 %s 2>&1 |FileCheck %s --check-prefixes=ON +; RUN: opt -debug-pass-manager -passes='default' -enable-call-graph-profile=0 %s 2>&1 |FileCheck %s --check-prefixes=OFF +; RUN: opt -debug-pass-manager -passes='default' -enable-call-graph-profile=1 %s 2>&1 |FileCheck %s --check-prefixes=ON ; ; DEFAULT: Running pass: CGProfilePass ; OFF-NOT: Running pass: CGProfilePass diff --git a/llvm/test/Other/opt-O2-pipeline.ll b/llvm/test/Other/opt-O2-pipeline.ll --- a/llvm/test/Other/opt-O2-pipeline.ll +++ b/llvm/test/Other/opt-O2-pipeline.ll @@ -280,6 +280,13 @@ ; CHECK-NEXT: Strip Unused Function Prototypes ; CHECK-NEXT: Dead Global Elimination ; CHECK-NEXT: Merge Duplicate Global Constants +; CHECK-NEXT: Call Graph Profile +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Post-Dominator Tree Construction +; CHECK-NEXT: Branch Probability Analysis +; CHECK-NEXT: Block Frequency Analysis ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Natural Loop Information diff --git a/llvm/test/Other/opt-O3-pipeline.ll b/llvm/test/Other/opt-O3-pipeline.ll --- a/llvm/test/Other/opt-O3-pipeline.ll +++ b/llvm/test/Other/opt-O3-pipeline.ll @@ -285,6 +285,13 @@ ; CHECK-NEXT: Strip Unused Function Prototypes ; CHECK-NEXT: Dead Global Elimination ; CHECK-NEXT: Merge Duplicate Global Constants +; CHECK-NEXT: Call Graph Profile +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Post-Dominator Tree Construction +; CHECK-NEXT: Branch Probability Analysis +; CHECK-NEXT: Block Frequency Analysis ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Natural Loop Information diff --git a/llvm/test/Other/opt-Os-pipeline.ll b/llvm/test/Other/opt-Os-pipeline.ll --- a/llvm/test/Other/opt-Os-pipeline.ll +++ b/llvm/test/Other/opt-Os-pipeline.ll @@ -266,6 +266,13 @@ ; CHECK-NEXT: Strip Unused Function Prototypes ; CHECK-NEXT: Dead Global Elimination ; CHECK-NEXT: Merge Duplicate Global Constants +; CHECK-NEXT: Call Graph Profile +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Post-Dominator Tree Construction +; CHECK-NEXT: Branch Probability Analysis +; CHECK-NEXT: Block Frequency Analysis ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Natural Loop Information diff --git a/llvm/tools/opt/NewPMDriver.h b/llvm/tools/opt/NewPMDriver.h --- a/llvm/tools/opt/NewPMDriver.h +++ b/llvm/tools/opt/NewPMDriver.h @@ -66,7 +66,8 @@ bool ShouldPreserveAssemblyUseListOrder, bool ShouldPreserveBitcodeUseListOrder, bool EmitSummaryIndex, bool EmitModuleHash, - bool EnableDebugify, bool Coroutines); + bool EnableDebugify, bool Coroutines, + bool CallGraphProfile); } // namespace llvm #endif diff --git a/llvm/tools/opt/NewPMDriver.cpp b/llvm/tools/opt/NewPMDriver.cpp --- a/llvm/tools/opt/NewPMDriver.cpp +++ b/llvm/tools/opt/NewPMDriver.cpp @@ -220,7 +220,8 @@ bool ShouldPreserveAssemblyUseListOrder, bool ShouldPreserveBitcodeUseListOrder, bool EmitSummaryIndex, bool EmitModuleHash, - bool EnableDebugify, bool Coroutines) { + bool EnableDebugify, bool Coroutines, + bool CallGraphProfile) { bool VerifyEachPass = VK == VK_VerifyEachPass; Optional P; @@ -266,6 +267,7 @@ SI.registerCallbacks(PIC); PipelineTuningOptions PTO; + PTO.CallGraphProfile = CallGraphProfile; // LoopUnrolling defaults on to true and DisableLoopUnrolling is initialized // to false above so we shouldn't necessarily need to check whether or not the // option has been enabled. diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -278,6 +278,10 @@ cl::desc("Specify time trace file destination"), cl::value_desc("filename")); +static cl::opt EnableCallGraphProfile( + "enable-call-graph-profile", cl::init(true), cl::Hidden, + cl::desc("Enable call graph profile pass (default = on)")); + static cl::opt RemarksWithHotness( "pass-remarks-with-hotness", cl::desc("With PGO, include profile count in optimization remarks"), @@ -414,6 +418,8 @@ Builder.SLPVectorize = OptLevel > 1 && SizeLevel < 2; + Builder.CallGraphProfile = EnableCallGraphProfile; + if (TM) TM->adjustPassManager(Builder); @@ -767,7 +773,8 @@ RemarksFile.get(), PassPipeline, Passes, OK, VK, PreserveAssemblyUseListOrder, PreserveBitcodeUseListOrder, EmitSummaryIndex, - EmitModuleHash, EnableDebugify, Coroutines) + EmitModuleHash, EnableDebugify, Coroutines, + EnableCallGraphProfile) ? 0 : 1; }