Index: include/llvm/Passes/PassBuilder.h =================================================================== --- include/llvm/Passes/PassBuilder.h +++ include/llvm/Passes/PassBuilder.h @@ -32,6 +32,7 @@ std::string ProfileGenFile = ""; std::string ProfileUseFile = ""; bool RunProfileGen = false; + bool SamplePGO = false; }; /// \brief This class provides access to building LLVM's passes. Index: include/llvm/Transforms/Instrumentation.h =================================================================== --- include/llvm/Transforms/Instrumentation.h +++ include/llvm/Transforms/Instrumentation.h @@ -86,7 +86,8 @@ ModulePass *createPGOInstrumentationGenLegacyPass(); ModulePass * createPGOInstrumentationUseLegacyPass(StringRef Filename = StringRef("")); -ModulePass *createPGOIndirectCallPromotionLegacyPass(bool InLTO = false); +ModulePass *createPGOIndirectCallPromotionLegacyPass(bool InLTO = false, + bool SamplePGO = false); // Helper function to check if it is legal to promote indirect call \p Inst // to a direct call of function \p F. Stores the reason in \p Reason. @@ -102,9 +103,12 @@ // TotalCount is the profile count value that the instruction executes. // Count is the profile count value that F is the target function. // These two values are used to update the branch weight. +// If \p AttachProfToDirectCall is true, a prof metadata is attached to the +// new direct call to contain \p Count. // Returns the promoted direct call instruction. Instruction *promoteIndirectCall(Instruction *Inst, Function *F, uint64_t Count, - uint64_t TotalCount); + uint64_t TotalCount, + bool AttachProfToDirectCall); /// Options for the frontend instrumentation based profiling pass. struct InstrProfOptions { Index: include/llvm/Transforms/PGOInstrumentation.h =================================================================== --- include/llvm/Transforms/PGOInstrumentation.h +++ include/llvm/Transforms/PGOInstrumentation.h @@ -38,10 +38,13 @@ /// The indirect function call promotion pass. class PGOIndirectCallPromotion : public PassInfoMixin { public: - PGOIndirectCallPromotion(bool IsInLTO = false) : InLTO(IsInLTO) {} + PGOIndirectCallPromotion(bool IsInLTO = false, bool SamplePGO = false) + : InLTO(IsInLTO), SamplePGO(SamplePGO) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + private: bool InLTO; + bool SamplePGO; }; } // End llvm namespace Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -486,13 +486,14 @@ // Add all the requested passes for PGO Instrumentation, if requested. if (PGOOpt) { - assert(PGOOpt->RunProfileGen || !PGOOpt->ProfileUseFile.empty()); + assert(PGOOpt->RunProfileGen || PGOOpt->SamplePGO || + !PGOOpt->ProfileUseFile.empty()); addPGOInstrPasses(MPM, DebugLogging, Level, PGOOpt->RunProfileGen, PGOOpt->ProfileGenFile, PGOOpt->ProfileUseFile); } // Indirect call promotion that promotes intra-module targes only. - MPM.addPass(PGOIndirectCallPromotion()); + MPM.addPass(PGOIndirectCallPromotion(false, PGOOpt && PGOOpt->SamplePGO)); // Require the GlobalsAA analysis for the module so we can query it within // the CGSCC pipeline. @@ -665,7 +666,8 @@ // left by the earlier promotion pass that promotes intra-module targets. // This two-step promotion is to save the compile time. For LTO, it should // produce the same result as if we only do promotion here. - MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */)); + MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, + PGOOpt && PGOOpt->SamplePGO)); // Propagate constants at call sites into the functions they call. This // opens opportunities for globalopt (and inlining) by substituting function Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -431,7 +431,8 @@ // earlier in the pass pipeline, here before globalopt. Otherwise imported // available_externally functions look unreferenced and are removed. if (PerformThinLTO) - MPM.add(createPGOIndirectCallPromotionLegacyPass(/*InLTO = */ true)); + MPM.add(createPGOIndirectCallPromotionLegacyPass(/*InLTO = */ true, + !PGOSampleUse.empty())); if (!DisableUnitAtATime) { // Infer attributes about declarations if possible. @@ -458,7 +459,8 @@ // Indirect call promotion that promotes intra-module targets only. // For ThinLTO this is done earlier due to interactions with globalopt // for imported functions. - MPM.add(createPGOIndirectCallPromotionLegacyPass()); + MPM.add( + createPGOIndirectCallPromotionLegacyPass(false, !PGOSampleUse.empty())); } if (EnableNonLTOGlobalsModRef) @@ -681,7 +683,8 @@ // left by the earlier promotion pass that promotes intra-module targets. // This two-step promotion is to save the compile time. For LTO, it should // produce the same result as if we only do promotion here. - PM.add(createPGOIndirectCallPromotionLegacyPass(true)); + PM.add( + createPGOIndirectCallPromotionLegacyPass(true, !PGOSampleUse.empty())); // Propagate constants at call sites into the functions they call. This // opens opportunities for globalopt (and inlining) by substituting function Index: lib/Transforms/IPO/SampleProfile.cpp =================================================================== --- lib/Transforms/IPO/SampleProfile.cpp +++ lib/Transforms/IPO/SampleProfile.cpp @@ -645,7 +645,7 @@ // We set the probability to 80% taken to indicate that the static // call is likely taken. DI = dyn_cast( - promoteIndirectCall(I, CalledFunction, 80, 100) + promoteIndirectCall(I, CalledFunction, 80, 100, false) ->stripPointerCasts()); PromotedInsns.insert(I); } else { Index: lib/Transforms/Instrumentation/IndirectCallPromotion.cpp =================================================================== --- lib/Transforms/Instrumentation/IndirectCallPromotion.cpp +++ lib/Transforms/Instrumentation/IndirectCallPromotion.cpp @@ -80,6 +80,12 @@ cl::desc("Run indirect-call promotion in LTO " "mode")); +// Set if the pass is called in SamplePGO mode. The difference for SamplePGO +// mode is it will add prof metadatato the created direct call. +static cl::opt + ICPSamplePGOMode("icp-samplepgo", cl::init(false), cl::Hidden, + cl::desc("Run indirect-call promotion in SamplePGO mode")); + // If the option is set to true, only call instructions will be considered for // transformation -- invoke instructions will be ignored. static cl::opt @@ -105,8 +111,8 @@ public: static char ID; - PGOIndirectCallPromotionLegacyPass(bool InLTO = false) - : ModulePass(ID), InLTO(InLTO) { + PGOIndirectCallPromotionLegacyPass(bool InLTO = false, bool SamplePGO = false) + : ModulePass(ID), InLTO(InLTO), SamplePGO(SamplePGO) { initializePGOIndirectCallPromotionLegacyPassPass( *PassRegistry::getPassRegistry()); } @@ -119,6 +125,10 @@ // If this pass is called in LTO. We need to special handling the PGOFuncName // for the static variables due to LTO's internalization. bool InLTO; + + // If this pass is called in SamplePGO. We need to add the prof metadata to + // the promoted direct call. + bool SamplePGO; }; } // end anonymous namespace @@ -128,8 +138,9 @@ "direct calls.", false, false) -ModulePass *llvm::createPGOIndirectCallPromotionLegacyPass(bool InLTO) { - return new PGOIndirectCallPromotionLegacyPass(InLTO); +ModulePass *llvm::createPGOIndirectCallPromotionLegacyPass(bool InLTO, + bool SamplePGO) { + return new PGOIndirectCallPromotionLegacyPass(InLTO, SamplePGO); } namespace { @@ -144,6 +155,8 @@ // defines. InstrProfSymtab *Symtab; + bool SamplePGO; + // Test if we can legally promote this direct-call of Target. bool isPromotionLegal(Instruction *Inst, uint64_t Target, Function *&F, const char **Reason = nullptr); @@ -175,9 +188,9 @@ ICallPromotionFunc &operator=(const ICallPromotionFunc &other) = delete; public: - ICallPromotionFunc(Function &Func, Module *Modu, InstrProfSymtab *Symtab) - : F(Func), M(Modu), Symtab(Symtab) { - } + ICallPromotionFunc(Function &Func, Module *Modu, InstrProfSymtab *Symtab, + bool SamplePGO) + : F(Func), M(Modu), Symtab(Symtab), SamplePGO(SamplePGO) {} bool processFunction(); }; @@ -509,10 +522,14 @@ // Ret = phi(Ret1, Ret2); // It adds type casts for the args do not match the parameters and the return // value. Branch weights metadata also updated. +// If \p AttachProfToDirectCall is true, a prof metadata is attached to the +// new direct call to contain \p Count. This is used by SamplePGO inliner to +// check callsite hotness. // Returns the promoted direct call instruction. Instruction *llvm::promoteIndirectCall(Instruction *Inst, Function *DirectCallee, uint64_t Count, - uint64_t TotalCount) { + uint64_t TotalCount, + bool AttachProfToDirectCall) { assert(DirectCallee != nullptr); BasicBlock *BB = Inst->getParent(); // Just to suppress the non-debug build warning. @@ -527,6 +544,14 @@ Instruction *NewInst = createDirectCallInst(Inst, DirectCallee, DirectCallBB, MergeBB); + if (AttachProfToDirectCall) { + SmallVector Weights; + Weights.push_back(Count); + MDBuilder MDB(NewInst->getContext()); + dyn_cast(NewInst->stripPointerCasts()) + ->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); + } + // Move Inst from MergeBB to IndirectCallBB. Inst->removeFromParent(); IndirectCallBB->getInstList().insert(IndirectCallBB->getFirstInsertionPt(), @@ -569,7 +594,7 @@ for (auto &C : Candidates) { uint64_t Count = C.Count; - promoteIndirectCall(Inst, C.TargetFunction, Count, TotalCount); + promoteIndirectCall(Inst, C.TargetFunction, Count, TotalCount, SamplePGO); assert(TotalCount >= Count); TotalCount -= Count; NumOfPGOICallPromotion++; @@ -610,7 +635,7 @@ } // A wrapper function that does the actual work. -static bool promoteIndirectCalls(Module &M, bool InLTO) { +static bool promoteIndirectCalls(Module &M, bool InLTO, bool SamplePGO) { if (DisableICP) return false; InstrProfSymtab Symtab; @@ -621,7 +646,7 @@ continue; if (F.hasFnAttribute(Attribute::OptimizeNone)) continue; - ICallPromotionFunc ICallPromotion(F, &M, &Symtab); + ICallPromotionFunc ICallPromotion(F, &M, &Symtab, SamplePGO); bool FuncChanged = ICallPromotion.processFunction(); if (ICPDUMPAFTER && FuncChanged) { DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs())); @@ -638,11 +663,14 @@ bool PGOIndirectCallPromotionLegacyPass::runOnModule(Module &M) { // Command-line option has the priority for InLTO. - return promoteIndirectCalls(M, InLTO | ICPLTOMode); + return promoteIndirectCalls(M, InLTO | ICPLTOMode, + SamplePGO | ICPSamplePGOMode); } -PreservedAnalyses PGOIndirectCallPromotion::run(Module &M, ModuleAnalysisManager &AM) { - if (!promoteIndirectCalls(M, InLTO | ICPLTOMode)) +PreservedAnalyses PGOIndirectCallPromotion::run(Module &M, + ModuleAnalysisManager &AM) { + if (!promoteIndirectCalls(M, InLTO | ICPLTOMode, + SamplePGO | ICPSamplePGOMode)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); Index: test/Transforms/PGOProfile/indirect_call_promotion.ll =================================================================== --- test/Transforms/PGOProfile/indirect_call_promotion.ll +++ test/Transforms/PGOProfile/indirect_call_promotion.ll @@ -1,4 +1,6 @@ ; RUN: opt < %s -pgo-icall-prom -S | FileCheck %s --check-prefix=ICALL-PROM +; RUN: opt < %s -pgo-icall-prom -S -icp-samplepgo | FileCheck %s --check-prefix=ICALL-PROM +; RUN: opt < %s -pgo-icall-prom -S -icp-samplepgo | FileCheck %s --check-prefix=ICALL-PROM-SAMPLEPGO ; RUN: opt < %s -passes=pgo-icall-prom -S | FileCheck %s --check-prefix=ICALL-PROM ; RUN: opt < %s -pgo-icall-prom -S -pass-remarks=pgo-icall-prom -icp-count-threshold=0 -icp-percent-threshold=0 -icp-max-prom=4 2>&1 | FileCheck %s --check-prefix=PASS-REMARK ; RUN: opt < %s -passes=pgo-icall-prom -S -pass-remarks=pgo-icall-prom -icp-count-threshold=0 -icp-percent-threshold=0 -icp-max-prom=4 2>&1 | FileCheck %s --check-prefix=PASS-REMARK @@ -40,6 +42,7 @@ ; ICALL-PROM: br i1 [[CMP]], label %if.true.direct_targ, label %if.false.orig_indirect, !prof [[BRANCH_WEIGHT:![0-9]+]] ; ICALL-PROM: if.true.direct_targ: ; ICALL-PROM: [[DIRCALL_RET:%[0-9]+]] = call i32 @func4() +; ICALL-PROM-SAMPLEPGO: call i32 @func4(), !prof [[CALL_METADATA:![0-9]+]] ; ICALL-PROM: br label %if.end.icp %call = call i32 %tmp(), !prof !1 ; ICALL-PROM: if.false.orig_indirect: @@ -54,3 +57,4 @@ ; ICALL-PROM: [[BRANCH_WEIGHT]] = !{!"branch_weights", i32 1030, i32 570} ; ICALL-PROM: [[NEW_VP_METADATA]] = !{!"VP", i32 0, i64 570, i64 -4377547752858689819, i64 410} +; ICALL-PROM-SAMPLEPGO: [[CALL_METADATA]] = !{!"branch_weights", i32 1030}