diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -4014,6 +4014,47 @@ return isa(V) && classof(cast(V)); } + static bool getProfMetaData(MDNode *MD, MDNode *&BranchWeightMD, + MDNode *&IndirectCallMD) { + BranchWeightMD = IndirectCallMD = nullptr; + + auto isMDwithName = [](MDNode *MD, const char *Str) { + auto *MDName = dyn_cast(MD->getOperand(0)); + if (!MDName) + return false; + return (MDName->getString().equals(Str)); + }; + + if (isMDwithName(MD, "branch_weights")) { + BranchWeightMD = MD; + return true; + } + if (isMDwithName(MD, "VP")) { + IndirectCallMD = MD; + return true; + } + if (MD->getNumOperands() != 3) + return false; + for (int I = 1; I < 3; I++) { + auto *MDT = cast(MD->getOperand(I)); + if (!MDT) + continue; + if (isMDwithName(MDT, "branch_weights")) + BranchWeightMD = MDT; + else if (isMDwithName(MDT, "VP")) + IndirectCallMD = MDT; + } + return (BranchWeightMD || IndirectCallMD); + } + + // A utility method that return "branch_weights" metadata. The return + // value will be nullptr if the metadata is not found. + static MDNode *getBranchWeightProfData(MDNode *MD) { + MDNode *BranchWeightMD, *IndirectCallMD; + getProfMetaData(MD, BranchWeightMD, IndirectCallMD); + return BranchWeightMD; + } + private: // Shadow Instruction::setInstructionSubclassData with a private forwarding // method so that subclasses cannot accidentally use it. diff --git a/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h b/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h --- a/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h +++ b/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h @@ -18,6 +18,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" #include #include @@ -79,13 +80,15 @@ /// The indirect function call promotion pass. class PGOIndirectCallPromotion : public PassInfoMixin { public: - PGOIndirectCallPromotion(bool IsInLTO = false, bool SamplePGO = false) - : InLTO(IsInLTO), SamplePGO(SamplePGO) {} + PGOIndirectCallPromotion( + ThinOrFullLTOPhase Phase = ThinOrFullLTOPhase::ThinLTOPreLink, + bool SamplePGO = false) + : Phase(Phase), SamplePGO(SamplePGO) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); private: - bool InLTO; + ThinOrFullLTOPhase Phase; bool SamplePGO; }; diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -953,7 +953,7 @@ // the sample profile in the ThinLTO backend, we ideally shouldn't have to // provide the sample profile file. if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile) - MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile)); + MPM.addPass(PGOIndirectCallPromotion(Phase, HasSampleProfile)); // Do basic inference of function attributes from known properties of system // libraries and other oracles. @@ -990,8 +990,7 @@ // This is important for the ThinLTO backend phase because otherwise // imported available_externally functions look unreferenced and are // removed. - MPM.addPass( - PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */)); + MPM.addPass(PGOIndirectCallPromotion(Phase, true /* SamplePGO */)); } // Try to perform OpenMP specific optimizations on the module. This is a @@ -1047,7 +1046,7 @@ /* RunProfileGen */ PGOOpt->Action == PGOOptions::IRInstr, /* IsCS */ false, PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, Phase, PGOOpt->FS); - MPM.addPass(PGOIndirectCallPromotion(false, false)); + MPM.addPass(PGOIndirectCallPromotion(Phase, false /* SamplePGO */)); } if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink && PGOOpt->CSAction == PGOOptions::CSIRInstr) @@ -1616,8 +1615,9 @@ // left by the earlier promotion pass that promotes intra-module targets. // This two-step promotion is to save the compile time. For LTO, it should // produce the same result as if we only do promotion here. - MPM.addPass(PGOIndirectCallPromotion( - true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)); + MPM.addPass(PGOIndirectCallPromotion(ThinOrFullLTOPhase::FullLTOPostLink, + PGOOpt && PGOOpt->Action == + PGOOptions::SampleUse)); // Propagate constants at call sites into the functions they call. This // opens opportunities for globalopt (and inlining) by substituting function diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp --- a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp +++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp @@ -102,6 +102,12 @@ ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden, cl::desc("Dump IR after transformation happens")); +static cl::opt ICPClearTargetValueProfiles( + "icp-clear-target-value-profiles", cl::init(false), cl::Hidden, + cl::desc("If set, override the " + "PGOIndirectCallPromotion::ClearValueProfileMetadata. For testing " + "and debugging purpose only")); + namespace { // The class for main data structure to promote indirect calls to conditional @@ -116,6 +122,7 @@ InstrProfSymtab *Symtab; bool SamplePGO; + bool ClearValueProfileMetadata = false; OptimizationRemarkEmitter &ORE; @@ -144,8 +151,10 @@ public: ICallPromotionFunc(Function &Func, Module *Modu, InstrProfSymtab *Symtab, - bool SamplePGO, OptimizationRemarkEmitter &ORE) - : F(Func), M(Modu), Symtab(Symtab), SamplePGO(SamplePGO), ORE(ORE) {} + bool SamplePGO, bool ClearValueProfileMetadata, + OptimizationRemarkEmitter &ORE) + : F(Func), M(Modu), Symtab(Symtab), SamplePGO(SamplePGO), + ClearValueProfileMetadata(ClearValueProfileMetadata), ORE(ORE) {} ICallPromotionFunc(const ICallPromotionFunc &) = delete; ICallPromotionFunc &operator=(const ICallPromotionFunc &) = delete; @@ -293,6 +302,21 @@ return NumPromoted; } +static void clearValueProfile(CallBase *CB) { + assert(CB != nullptr && "Caller guaranteed"); + MDNode *MDProf = CB->getMetadata(LLVMContext::MD_prof); + if (!MDProf || isa(CB)) { + CB->setMetadata(LLVMContext::MD_prof, nullptr); + return; + } + + if (!isa(CB)) + return; + + CB->setMetadata(LLVMContext::MD_prof, + InvokeInst::getBranchWeightProfData(MDProf)); +} + // Traverse all the indirect-call callsite and get the value profile // annotation to perform indirect-call promotion. bool ICallPromotionFunc::processFunction(ProfileSummaryInfo *PSI) { @@ -309,6 +333,11 @@ auto PromotionCandidates = getPromotionCandidatesForCallSite( *CB, ICallProfDataRef, TotalCount, NumCandidates); uint32_t NumPromoted = tryToPromote(*CB, PromotionCandidates, TotalCount); + + if (ClearValueProfileMetadata) { + clearValueProfile(CB); + continue; + } if (NumPromoted == 0) continue; @@ -326,8 +355,8 @@ } // A wrapper function that does the actual work. -static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, - bool InLTO, bool SamplePGO, +static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO, + bool SamplePGO, bool ClearValueProfileMetadata, ModuleAnalysisManager *AM = nullptr) { if (DisableICP) return false; @@ -353,7 +382,8 @@ ORE = OwnedORE.get(); } - ICallPromotionFunc ICallPromotion(F, &M, &Symtab, SamplePGO, *ORE); + ICallPromotionFunc ICallPromotion(F, &M, &Symtab, SamplePGO, + ClearValueProfileMetadata, *ORE); bool FuncChanged = ICallPromotion.processFunction(PSI); if (ICPDUMPAFTER && FuncChanged) { LLVM_DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs())); @@ -372,8 +402,18 @@ ModuleAnalysisManager &AM) { ProfileSummaryInfo *PSI = &AM.getResult(M); + const bool InLTO = (Phase == ThinOrFullLTOPhase::ThinLTOPostLink || + Phase == ThinOrFullLTOPhase::FullLTOPostLink); + + bool ClearValueProfileMetadata = + (Phase != ThinOrFullLTOPhase::ThinLTOPreLink && + Phase != ThinOrFullLTOPhase::FullLTOPreLink); + if (ICPClearTargetValueProfiles.getNumOccurrences()) + ClearValueProfileMetadata = ICPClearTargetValueProfiles; + if (!promoteIndirectCalls(M, PSI, InLTO | ICPLTOMode, - SamplePGO | ICPSamplePGOMode, &AM)) + SamplePGO | ICPSamplePGOMode, + ClearValueProfileMetadata, &AM)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); diff --git a/llvm/test/Transforms/PGOProfile/indirect_call_promotion.ll b/llvm/test/Transforms/PGOProfile/indirect_call_promotion.ll --- a/llvm/test/Transforms/PGOProfile/indirect_call_promotion.ll +++ b/llvm/test/Transforms/PGOProfile/indirect_call_promotion.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -passes=pgo-icall-prom -S -icp-total-percent-threshold=50 | FileCheck %s --check-prefix=ICALL-PROM +; RUN: opt < %s -passes=pgo-icall-prom -S -icp-total-percent-threshold=50 -icp-clear-target-value-profiles | FileCheck %s --check-prefix=ICALL-PROM-NO-PROF ; RUN: opt < %s -passes=pgo-icall-prom -S -pass-remarks=pgo-icall-prom -icp-remaining-percent-threshold=0 -icp-total-percent-threshold=0 -icp-max-prom=4 2>&1 | FileCheck %s --check-prefix=PASS-REMARK ; RUN: opt < %s -passes=pgo-icall-prom -S -pass-remarks=pgo-icall-prom -icp-remaining-percent-threshold=0 -icp-total-percent-threshold=20 -icp-max-prom=4 2>&1 | FileCheck %s --check-prefix=PASS2-REMARK @@ -49,6 +50,7 @@ %call = call i32 %tmp(), !prof !1 ; ICALL-PROM: if.false.orig_indirect: ; ICALL-PROM: %call = call i32 %tmp(), !prof [[NEW_VP_METADATA:![0-9]+]] +; ICALL-PROM-NO-PROF: %call = call i32 %tmp() ret i32 %call ; ICALL-PROM: if.end.icp: ; ICALL-PROM: [[PHI_RET:%[0-9]+]] = phi i32 [ %call, %if.false.orig_indirect ], [ [[DIRCALL_RET]], %if.true.direct_targ ] diff --git a/llvm/test/Transforms/PGOProfile/indirect_call_promotion_musttail_typecheck.ll b/llvm/test/Transforms/PGOProfile/indirect_call_promotion_musttail_typecheck.ll --- a/llvm/test/Transforms/PGOProfile/indirect_call_promotion_musttail_typecheck.ll +++ b/llvm/test/Transforms/PGOProfile/indirect_call_promotion_musttail_typecheck.ll @@ -1,7 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; Mustcall needs stricter parameter type checks otherwise it will fail in verifier. -; RUN: opt < %s -passes=pgo-icall-prom -S | FileCheck %s +; RUN: opt < %s -passes=pgo-icall-prom -S | FileCheck %s --check-prefixes=CHECK,CHECK_ICP_PROF +; RUN: opt < %s -passes=pgo-icall-prom -icp-clear-target-value-profiles -S | FileCheck %s --check-prefixes=CHECK,CHECK_NO_ICP_PROF ; Here we check there is no ICP due to parameter mismatch. define ptr @func(ptr %msg, ptr %ptr, ptr %ctx, i64 %data.coerce, ptr %table, i64 %hasbits) { @@ -10,7 +11,8 @@ ; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr null, align 8 ; CHECK-NEXT: ret ptr null ; CHECK: 1: -; CHECK-NEXT: [[CALL11_I:%.*]] = musttail call ptr [[TMP0]](ptr null, ptr null, ptr null, i64 0, ptr null, i64 0), !prof [[PROF0:![0-9]+]] +; CHECK_ICP_PROF-NEXT: [[CALL11_I:%.*]] = musttail call ptr [[TMP0]](ptr null, ptr null, ptr null, i64 0, ptr null, i64 0), !prof [[PROF0:![0-9]+]] +; CHECK_NO_ICP_PROF-NEXT: [[CALL11_I:%.*]] = musttail call ptr [[TMP0]](ptr null, ptr null, ptr null, i64 0, ptr null, i64 0) ; CHECK-NEXT: ret ptr [[CALL11_I]] ; entry: @@ -35,7 +37,8 @@ ; CHECK-NEXT: [[TMP3:%.*]] = musttail call ptr @_ZN6proto28internal12ExtensionSet10ParseFieldEmPKcPKNS_7MessageEPNS0_16InternalMetadataEPNS0_12ParseContextE(ptr null, i64 0, ptr null, ptr null, ptr null, ptr null) ; CHECK-NEXT: ret ptr [[TMP3]] ; CHECK: 4: -; CHECK-NEXT: [[CALL11_I:%.*]] = musttail call ptr [[TMP1]](ptr null, i64 0, ptr null, ptr null, ptr null, ptr null), !prof [[PROF2:![0-9]+]] +; CHECK_ICP_PROF-NEXT: [[CALL11_I:%.*]] = musttail call ptr [[TMP1]](ptr null, i64 0, ptr null, ptr null, ptr null, ptr null), !prof [[PROF2:![0-9]+]] +; CHECK_NO_ICP_PROF-NEXT: [[CALL11_I:%.*]] = musttail call ptr [[TMP1]](ptr null, i64 0, ptr null, ptr null, ptr null, ptr null) ; CHECK-NEXT: ret ptr [[CALL11_I]] ; entry: