Index: llvm/include/llvm/IR/Metadata.h =================================================================== --- llvm/include/llvm/IR/Metadata.h +++ llvm/include/llvm/IR/Metadata.h @@ -24,6 +24,7 @@ #include "llvm/ADT/ilist_node.h" #include "llvm/ADT/iterator_range.h" #include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Value.h" #include "llvm/Support/CBindingWrapping.h" @@ -36,6 +37,7 @@ #include #include #include +#include #include namespace llvm { @@ -1274,6 +1276,28 @@ template static void dispatchResetHash(NodeTy *, std::false_type) {} + // Given a value profile Node, gets the TotalCount and the list of + // pairs. Returns false if indirect-call + // sites value profile are not found. + static bool getIndirectCallValueProfileMetadata( + const MDNode *Node, uint64_t &Sum, + std::unordered_map &ValCntMap); + + // Similar to the function above but takes an instruction and !prof + // rather than known value profile. + // This is mostly a wrapper around the function above, and extracts + // value profile from invoke instructions. + static bool getIndirectCallValueProfileMetadata( + const Instruction *Inst, const MDNode *Node, uint64_t &Sum, + std::unordered_map &ValCntMap); + + /// Merge two indirect callsites value profiles by + /// 1) adding total count and each target value's counters + /// 2) sort pairs based on counter value descendingly. + static MDNode *getMergedIndirectCallProfMetadata(MDNode *A, MDNode *B, + const Instruction *AInstr, + const Instruction *BInstr); + /// Merge branch weights from two direct callsites. static MDNode *mergeDirectCallProfMetadata(MDNode *A, MDNode *B, const Instruction *AInstr, @@ -1324,6 +1348,11 @@ static MDNode *getMostGenericRange(MDNode *A, MDNode *B); static MDNode *getMostGenericAliasScope(MDNode *A, MDNode *B); static MDNode *getMostGenericAlignmentOrDereferenceable(MDNode *A, MDNode *B); + + // Helper function to decide if it's profitable to merge indirect call profile + // metadata. + static bool isProfitableToMergeICallProfMetadata(MDNode *A, MDNode *B, + const Module &M); /// Merge !prof metadata from two instructions. /// Currently only implemented with direct callsites with branch weights. static MDNode *getMergedProfMetadata(MDNode *A, MDNode *B, Index: llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h =================================================================== --- llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h +++ llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h @@ -18,6 +18,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" #include #include @@ -79,13 +80,15 @@ /// The indirect function call promotion pass. class PGOIndirectCallPromotion : public PassInfoMixin { public: - PGOIndirectCallPromotion(bool IsInLTO = false, bool SamplePGO = false) - : InLTO(IsInLTO), SamplePGO(SamplePGO) {} + PGOIndirectCallPromotion( + ThinOrFullLTOPhase Phase = ThinOrFullLTOPhase::ThinLTOPreLink, + bool SamplePGO = false) + : Phase(Phase), SamplePGO(SamplePGO) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); private: - bool InLTO; + ThinOrFullLTOPhase Phase; bool SamplePGO; }; Index: llvm/lib/IR/Metadata.cpp =================================================================== --- llvm/lib/IR/Metadata.cpp +++ llvm/lib/IR/Metadata.cpp @@ -51,6 +51,7 @@ #include #include #include +#include #include #include @@ -1072,6 +1073,142 @@ return B; } +bool MDNode::isProfitableToMergeICallProfMetadata(MDNode *A, MDNode *B, + const Module &M) { + const ConstantInt *MF = mdconst::dyn_extract_or_null( + M.getModuleFlag("IndirectCallPromComplete")); + + // Before PGOIndirectCallPromotion pass completes, merge value profiles + // only if at most indirect callsites have value profile metadata. + if (!MF || MF->isZero()) + return !(A && B); + + // If PGOIndirectCallPromotion pass completes, merging value profiles + // to preserve pairs for CGProfile pass. + // It's fine to sum the total count from two !prof and change distribution + // at this point since no other pass other than PGOIndirectCallPromotion + // uses total count. + return true; +} + +bool MDNode::getIndirectCallValueProfileMetadata( + const Instruction *Inst, const MDNode *Node, uint64_t &Sum, + std::unordered_map &ValCntMap) { + const unsigned NOps = Node->getNumOperands(); + if (NOps < 3) + return false; + + MDString *Tag = cast(Node->getOperand(0)); + if (Tag && Tag->getString().equals("VP")) + return getIndirectCallValueProfileMetadata(Node, Sum, ValCntMap); + + auto isMDwithName = [](const MDNode *MD, const char *Str) { + auto *MDName = dyn_cast(MD->getOperand(0)); + if (!MDName) + return false; + return (MDName->getString().equals(Str)); + }; + if (NOps == 3 && isa(Inst)) { + for (unsigned I = 1; I < 3; I++) { + const MDNode *OperandNode = dyn_cast(Node->getOperand(I)); + if (OperandNode && isMDwithName(OperandNode, "VP")) + return getIndirectCallValueProfileMetadata(OperandNode, Sum, ValCntMap); + } + } + return false; +} + +bool MDNode::getIndirectCallValueProfileMetadata( + const MDNode *Node, uint64_t &Sum, + std::unordered_map &ValCntMap) { + const unsigned NOps = Node->getNumOperands(); + if (NOps < 3) + return false; + + // Operand 0 is the string "VP" + MDString *Tag = cast(Node->getOperand(0)); + if (!Tag || !Tag->getString().equals("VP")) + return false; + + // Operand 1 is the value profile kind. + ConstantInt *KindInt = mdconst::dyn_extract(Node->getOperand(1)); + if (!KindInt || !KindInt->getZExtValue() == 0) + return false; + + ConstantInt *TotalCnt = + mdconst::dyn_extract(Node->getOperand(2)); + if (!TotalCnt) + return false; + + Sum = TotalCnt->getZExtValue(); + + for (unsigned I = 3; I < NOps; I += 2) { + ConstantInt *TargetValue = + mdconst::dyn_extract(Node->getOperand(I)); + ConstantInt *Count = + mdconst::dyn_extract(Node->getOperand(I + 1)); + + if (!TargetValue || !Count) + return false; + + const uint64_t ValInt = TargetValue->getZExtValue(); + + ValCntMap[ValInt] = SaturatingAdd(ValCntMap[ValInt], Count->getZExtValue()); + } + + return true; +} + +MDNode *MDNode::getMergedIndirectCallProfMetadata(MDNode *A, MDNode *B, + const Instruction *AInstr, + const Instruction *BInstr) { + assert(A && B && "Caller guaranteed"); + uint64_t ASum = 0; + uint64_t BSum = 0; + std::unordered_map AValCntMap, BValCntMap; + + // If one metadata is malformed, conservatively returns the other for now. + // FIXME: Note this shouldn't happen and IR verifier should verify the format + // of value profiles instead. + if (!getIndirectCallValueProfileMetadata(BInstr, B, BSum, BValCntMap)) + return A; + if (!getIndirectCallValueProfileMetadata(BInstr, A, ASum, AValCntMap)) + return B; + + for (const auto &[Val, Cnt] : AValCntMap) { + BValCntMap[Val] = SaturatingAdd(Cnt, BValCntMap[Val]); + } + + using ValCntPair = std::pair; + + SmallVector MergedVals; + for (const auto &[Val, Cnt] : BValCntMap) { + MergedVals.push_back(std::make_pair(Val, Cnt)); + } + + // Sort target values by counters descendingly. + llvm::sort(MergedVals, + [](const ValCntPair &LHS, const ValCntPair &RHS) -> bool { + return LHS.second > RHS.second; + }); + + auto &Ctx = AInstr->getContext(); + MDBuilder MDHelper(Ctx); + SmallVector Vals; + Vals.push_back(MDHelper.createString("VP")); + Vals.push_back(MDHelper.createConstant( + ConstantInt::get(Type::getInt32Ty(Ctx), 0 /* ValueKind */))); + Vals.push_back(MDHelper.createConstant( + ConstantInt::get(Type::getInt64Ty(Ctx), SaturatingAdd(ASum, BSum)))); + for (const auto &[Val, Cnt] : MergedVals) { + Vals.push_back( + MDHelper.createConstant(ConstantInt::get(Type::getInt64Ty(Ctx), Val))); + Vals.push_back( + MDHelper.createConstant(ConstantInt::get(Type::getInt64Ty(Ctx), Cnt))); + } + return MDNode::get(Ctx, Vals); +} + // Call instructions with branch weights are only used in SamplePGO as // documented in /// https://llvm.org/docs/BranchWeightMetadata.html#callinst). @@ -1123,14 +1260,32 @@ assert(BInstr->getMetadata(LLVMContext::MD_prof) == B && "Caller should guarantee"); - const CallInst *ACall = dyn_cast(AInstr); - const CallInst *BCall = dyn_cast(BInstr); + // Note a CallBase could be CallInst (e.g., direct or virtual member calls), + // InvokeInst (e.g., virtual destructors) or CallBrInst (inline assembly goto + // that are not relevant here). + auto getFunctionCallInstr = [](const Instruction *Instr) -> const CallBase * { + if (isa(Instr) || isa(Instr)) + return dyn_cast(Instr); + return nullptr; + }; + + const CallBase *ACall = getFunctionCallInstr(AInstr); + const CallBase *BCall = getFunctionCallInstr(BInstr); // Both ACall and BCall are direct callsites. if (ACall && BCall && ACall->getCalledFunction() && BCall->getCalledFunction()) return mergeDirectCallProfMetadata(A, B, AInstr, BInstr); + if (ACall && BCall && ACall->isIndirectCall() && BCall->isIndirectCall()) { + const Module *M = ACall->getModule(); + assert(M == BCall->getModule() && + "ACall and BCall should come from the same module"); + if (!isProfitableToMergeICallProfMetadata(A, B, *M)) + return A ? A : B; + return getMergedIndirectCallProfMetadata(A, B, AInstr, BInstr); + } + // The rest of the cases are not implemented but could be added // when there are use cases. return nullptr; Index: llvm/lib/Passes/PassBuilderPipelines.cpp =================================================================== --- llvm/lib/Passes/PassBuilderPipelines.cpp +++ llvm/lib/Passes/PassBuilderPipelines.cpp @@ -995,7 +995,7 @@ // the sample profile in the ThinLTO backend, we ideally shouldn't have to // provide the sample profile file. if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile) - MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile)); + MPM.addPass(PGOIndirectCallPromotion(Phase, HasSampleProfile)); // Do basic inference of function attributes from known properties of system // libraries and other oracles. @@ -1032,8 +1032,7 @@ // This is important for the ThinLTO backend phase because otherwise // imported available_externally functions look unreferenced and are // removed. - MPM.addPass( - PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */)); + MPM.addPass(PGOIndirectCallPromotion(Phase, true /* SamplePGO */)); } // Try to perform OpenMP specific optimizations on the module. This is a @@ -1088,7 +1087,7 @@ /* RunProfileGen */ PGOOpt->Action == PGOOptions::IRInstr, /* IsCS */ false, PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, Phase, PGOOpt->FS); - MPM.addPass(PGOIndirectCallPromotion(false, false)); + MPM.addPass(PGOIndirectCallPromotion(Phase, false /* SamplePGO */)); } if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink && PGOOpt->CSAction == PGOOptions::CSIRInstr) @@ -1638,8 +1637,9 @@ // left by the earlier promotion pass that promotes intra-module targets. // This two-step promotion is to save the compile time. For LTO, it should // produce the same result as if we only do promotion here. - MPM.addPass(PGOIndirectCallPromotion( - true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)); + MPM.addPass(PGOIndirectCallPromotion(ThinOrFullLTOPhase::FullLTOPostLink, + PGOOpt && PGOOpt->Action == + PGOOptions::SampleUse)); // Propagate constants at call sites into the functions they call. This // opens opportunities for globalopt (and inlining) by substituting function Index: llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp =================================================================== --- llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp +++ llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/IndirectCallPromotionAnalysis.h" @@ -102,6 +103,11 @@ ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden, cl::desc("Dump IR after transformation happens")); +static cl::opt MarkIndirectCallPromotionComplete( + "mark-icp-complete", cl::init(false), cl::Hidden, + cl::desc("If true, override the IndirectCallPromotionCompleteAfterPass. " + "For testing and debugging purpose only")); + namespace { // The class for main data structure to promote indirect calls to conditional @@ -116,6 +122,7 @@ InstrProfSymtab *Symtab; bool SamplePGO; + bool MarkIndirectCallsPromoted = false; OptimizationRemarkEmitter &ORE; @@ -326,8 +333,8 @@ } // A wrapper function that does the actual work. -static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, - bool InLTO, bool SamplePGO, +static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO, + bool SamplePGO, ModuleAnalysisManager *AM = nullptr) { if (DisableICP) return false; @@ -372,6 +379,27 @@ ModuleAnalysisManager &AM) { ProfileSummaryInfo *PSI = &AM.getResult(M); + const bool InLTO = (Phase == ThinOrFullLTOPhase::ThinLTOPostLink || + Phase == ThinOrFullLTOPhase::FullLTOPostLink); + + // If this ICP pass runs in a non-LTO pipeline or postlink pipeline of LTO + // build, it promotes all eligible indirect calls. After that it's performance + // neutral to merge indirect call value profiles. + bool IndirectCallPromotionCompleteAfterPass = + (Phase != ThinOrFullLTOPhase::ThinLTOPreLink && + Phase != ThinOrFullLTOPhase::FullLTOPreLink); + if (MarkIndirectCallPromotionComplete.getNumOccurrences()) + IndirectCallPromotionCompleteAfterPass = MarkIndirectCallPromotionComplete; + + // Set module flag before pass completes. It's fine to set this flag at the + // beginning but then the name 'IndirectCallPromComplete' is confusing. + auto SetModuleFlagOnExit = llvm::make_scope_exit([&]() { + if (IndirectCallPromotionCompleteAfterPass) + M.setModuleFlag(Module::Error, "IndirectCallPromComplete", + ConstantAsMetadata::get(ConstantInt::get( + Type::getInt1Ty(M.getContext()), 1))); + }); + if (!promoteIndirectCalls(M, PSI, InLTO | ICPLTOMode, SamplePGO | ICPSamplePGOMode, &AM)) return PreservedAnalyses::all(); Index: llvm/lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1488,17 +1488,26 @@ /// instructions \p I1 and \p I2 can and should be hoisted. static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2, const TargetTransformInfo &TTI) { - // If we're going to hoist a call, make sure that the two instructions - // we're commoning/hoisting are both marked with musttail, or neither of - // them is marked as such. Otherwise, we might end up in a situation where - // we hoist from a block where the terminator is a `ret` to a block where - // the terminator is a `br`, and `musttail` calls expect to be followed by - // a return. + auto *C1 = dyn_cast(I1); auto *C2 = dyn_cast(I2); - if (C1 && C2) + if (C1 && C2) { + // If we're going to hoist a call, make sure that the two instructions + // we're commoning/hoisting are both marked with musttail, or neither of + // them is marked as such. Otherwise, we might end up in a situation where + // we hoist from a block where the terminator is a `ret` to a block where + // the terminator is a `br`, and `musttail` calls expect to be followed by + // a return. if (C1->isMustTailCall() != C2->isMustTailCall()) return false; + // Do not hoist two indirect calls which requires merging value profile + // metadata when it's not profitable to do so in a pass pipeline. + if (C1->isIndirectCall() && C2->isIndirectCall() && + !MDNode::isProfitableToMergeICallProfMetadata( + C1->getMetadata(LLVMContext::MD_prof), + C2->getMetadata(LLVMContext::MD_prof), *C1->getModule())) + return false; + } if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2)) return false; @@ -1864,6 +1873,14 @@ if (HaveIndirectCalls) { if (!AllCallsAreIndirect) return false; + + const Module &M = *I0->getModule(); + MDNode *I0Prof = I0->getMetadata(LLVMContext::MD_prof); + for (unsigned I = 1; I < Insts.size(); I++) { + if (!MDNode::isProfitableToMergeICallProfMetadata( + I0Prof, Insts[I]->getMetadata(LLVMContext::MD_prof), M)) + return false; + } } else { // All callees must be identical. Value *Callee = nullptr; Index: llvm/test/Other/new-pm-lto-defaults.ll =================================================================== --- llvm/test/Other/new-pm-lto-defaults.ll +++ llvm/test/Other/new-pm-lto-defaults.ll @@ -174,3 +174,7 @@ exit: ret void } + +; CHECK-O23SZ: !llvm.module.flags = !{[[ICP:![0-9]+]]} + +; CHECK-O23SZ: [[ICP]] = !{i32 1, !"IndirectCallPromComplete", i1 true} Index: llvm/test/Other/new-pm-pgo.ll =================================================================== --- llvm/test/Other/new-pm-pgo.ll +++ llvm/test/Other/new-pm-pgo.ll @@ -1,15 +1,15 @@ ; RUN: opt -debug-pass-manager -passes='default' -pgo-kind=pgo-instr-gen-pipeline -profile-file='temp' %s 2>&1 |FileCheck %s --check-prefixes=GEN ; RUN: llvm-profdata merge %S/Inputs/new-pm-pgo.proftext -o %t.profdata -; RUN: opt -debug-pass-manager -passes='default' -pgo-kind=pgo-instr-use-pipeline -profile-file='%t.profdata' %s 2>&1 |FileCheck %s --check-prefixes=USE -; RUN: opt -debug-pass-manager -passes='thinlto-pre-link' -pgo-kind=pgo-instr-use-pipeline -profile-file='%t.profdata' %s 2>&1 |FileCheck %s --check-prefixes=USE -; RUN: opt -debug-pass-manager -passes='thinlto' -pgo-kind=pgo-instr-use-pipeline -profile-file='%t.profdata' %s 2>&1 |FileCheck %s --check-prefixes=USE_POST_LINK -; RUN: opt -debug-pass-manager -passes='default' -hot-cold-split -pgo-kind=pgo-instr-use-pipeline -profile-file='%t.profdata' %s 2>&1 |FileCheck %s --check-prefixes=USE --check-prefixes=SPLIT -; RUN: opt -debug-pass-manager -passes='default' -pgo-kind=pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-pgo.prof' %s 2>&1 \ -; RUN: |FileCheck %s --check-prefixes=SAMPLE_USE,SAMPLE_USE_O -; RUN: opt -debug-pass-manager -passes='thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-pgo.prof' %s 2>&1 \ -; RUN: |FileCheck %s --check-prefixes=SAMPLE_USE,SAMPLE_USE_PRE_LINK -; RUN: opt -debug-pass-manager -passes='thinlto' -pgo-kind=pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-pgo.prof' %s 2>&1 \ -; RUN: |FileCheck %s --check-prefixes=SAMPLE_USE,SAMPLE_USE_POST_LINK +; RUN: opt -debug-pass-manager -passes='default' -pgo-kind=pgo-instr-use-pipeline -profile-file='%t.profdata' -S %s 2>&1 |FileCheck %s --check-prefixes=USE,MOD_PS_ICP +; RUN: opt -debug-pass-manager -passes='thinlto-pre-link' -pgo-kind=pgo-instr-use-pipeline -profile-file='%t.profdata' -S %s 2>&1 |FileCheck %s --check-prefixes=USE,MOD_PS +; RUN: opt -debug-pass-manager -passes='thinlto' -pgo-kind=pgo-instr-use-pipeline -profile-file='%t.profdata' -S %s 2>&1 |FileCheck %s --check-prefixes=USE_POST_LINK,MOD_ICP +; RUN: opt -debug-pass-manager -passes='default' -hot-cold-split -pgo-kind=pgo-instr-use-pipeline -profile-file='%t.profdata' -S %s 2>&1 |FileCheck %s --check-prefixes=USE,SPLIT,MOD_PS_ICP +; RUN: opt -debug-pass-manager -passes='default' -pgo-kind=pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-pgo.prof' -S %s 2>&1 \ +; RUN: |FileCheck %s --check-prefixes=SAMPLE_USE,SAMPLE_USE_O,MOD_PS_ICP +; RUN: opt -debug-pass-manager -passes='thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-pgo.prof' -S %s 2>&1 \ +; RUN: |FileCheck %s --check-prefixes=SAMPLE_USE,SAMPLE_USE_PRE_LINK,MOD_PS +; RUN: opt -debug-pass-manager -passes='thinlto' -pgo-kind=pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-pgo.prof' -S %s 2>&1 \ +; RUN: |FileCheck %s --check-prefixes=SAMPLE_USE,SAMPLE_USE_POST_LINK,MOD_PS_ICP ; RUN: opt -debug-pass-manager -passes='default' -debug-info-for-profiling %s 2>&1 |FileCheck %s --check-prefixes=SAMPLE_GEN ; ; GEN: Running pass: PGOInstrumentationGen @@ -35,3 +35,14 @@ define void @foo() { ret void } + + +; MOD_PS_ICP: !llvm.module.flags = !{[[PSMOD:![0-9]+]], [[ICPMOD:![0-9]+]]} +; MOD_PS_ICP: [[PSMOD]] = !{i32 1, !"ProfileSummary", !1} +; MOD_PS_ICP: [[ICPMOD]] = !{i32 1, !"IndirectCallPromComplete", i1 true} + +; MOD_PS: !llvm.module.flags = !{[[PSMOD:![0-9]+]]} +; MOD_PS: [[PSMOD]] = !{i32 1, !"ProfileSummary", !1} + +; MOD_ICP: !llvm.module.flags = !{[[ICPMOD:![0-9]+]]} +; MOD_ICP: [[ICPMOD]] = !{i32 1, !"IndirectCallPromComplete", i1 true} Index: llvm/test/Other/new-pm-thinlto-postlink-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-postlink-defaults.ll +++ llvm/test/Other/new-pm-thinlto-postlink-defaults.ll @@ -240,3 +240,7 @@ exit: ret void } + +; CHECK-O: !llvm.module.flags = !{[[ICP:![0-9]+]]} + +; CHECK-0: [[ICP]] = !{i32 1, !"IndirectCallPromComplete", i1 true} Index: llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -230,6 +230,8 @@ !llvm.module.flags = !{!0} +; CHECK-O: !llvm.module.flags = !{[[PS:![0-9]+]], [[ICP:![0-9]+]]} + !0 = !{i32 1, !"ProfileSummary", !1} !1 = !{!2, !3, !4, !5, !6, !7, !8, !9} !2 = !{!"ProfileFormat", !"InstrProf"} @@ -257,3 +259,4 @@ !24 = !{i32 999900, i64 0, i32 0} !25 = !{i32 999990, i64 0, i32 0} !26 = !{i32 999999, i64 0, i32 0} +; CHECK-O: [[ICP]] = !{i32 1, !"IndirectCallPromComplete", i1 true} Index: llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -234,3 +234,8 @@ exit: ret void } + +; CHECK-O: !llvm.module.flags = !{[[PS:![0-9]+]], [[ICP:![0-9]+]]} + +; CHECK-O: [[PS]] = !{i32 1, !"ProfileSummary", !1} +; CHECK-O: [[ICP]] = !{i32 1, !"IndirectCallPromComplete", i1 true} Index: llvm/test/Other/new-pm-thinlto-prelink-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-prelink-defaults.ll +++ llvm/test/Other/new-pm-thinlto-prelink-defaults.ll @@ -200,3 +200,5 @@ exit: ret void } + +; CHECK-O-NOT: !llvm.module.flags Index: llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll +++ llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll @@ -226,3 +226,7 @@ exit: ret void } + +; CHECK-O: !llvm.module.flags = !{[[PS:![0-9]+]]} + +; CHECK-O: [[PS]] = !{i32 1, !"ProfileSummary", !1} Index: llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll +++ llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll @@ -189,3 +189,7 @@ exit: ret void } + +; CHECK-O: !llvm.module.flags = !{[[PS:![0-9]+]]} + +; CHECK-O: [[PS]] = !{i32 1, !"ProfileSummary", !1} Index: llvm/test/Transforms/PGOProfile/indirect_call_promotion.ll =================================================================== --- llvm/test/Transforms/PGOProfile/indirect_call_promotion.ll +++ llvm/test/Transforms/PGOProfile/indirect_call_promotion.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -passes=pgo-icall-prom -S -icp-total-percent-threshold=50 | FileCheck %s --check-prefix=ICALL-PROM +; RUN: opt < %s -passes=pgo-icall-prom -S -icp-total-percent-threshold=50 | FileCheck %s --check-prefixes=ICALL-PROM,ICALL-PROM-NO-MODU +; RUN: opt < %s -passes=pgo-icall-prom -S -icp-total-percent-threshold=50 -mark-icp-complete | FileCheck %s --check-prefixes=ICALL-PROM,ICALL-PROM-MODU ; RUN: opt < %s -passes=pgo-icall-prom -S -pass-remarks=pgo-icall-prom -icp-remaining-percent-threshold=0 -icp-total-percent-threshold=0 -icp-max-prom=4 2>&1 | FileCheck %s --check-prefix=PASS-REMARK ; RUN: opt < %s -passes=pgo-icall-prom -S -pass-remarks=pgo-icall-prom -icp-remaining-percent-threshold=0 -icp-total-percent-threshold=20 -icp-max-prom=4 2>&1 | FileCheck %s --check-prefix=PASS2-REMARK @@ -55,6 +56,11 @@ ; ICALL-PROM: ret i32 [[PHI_RET]] } +; ICALL-PROM-NO-MODU-NOT: !llvm.module.flags = !{[[MODULE_FLAG:![0-9]+]]} +; ICALL-PROM-MODU: !llvm.module.flags = !{[[MODULE_FLAG:![0-9]+]]} + +; ICALL-PROM-MODULE: [[MODULE_FLAG]] = !{i32 1, !"IndirectCallPromComplete", i1 true} + !1 = !{!"VP", i32 0, i64 1600, i64 7651369219802541373, i64 1030, i64 -4377547752858689819, i64 410, i64 -6929281286627296573, i64 150, i64 -2545542355363006406, i64 10} ; ICALL-PROM: [[BRANCH_WEIGHT]] = !{!"branch_weights", i32 1030, i32 570} Index: llvm/test/Transforms/PGOProfile/indirect_call_promotion_musttail_typecheck.ll =================================================================== --- llvm/test/Transforms/PGOProfile/indirect_call_promotion_musttail_typecheck.ll +++ llvm/test/Transforms/PGOProfile/indirect_call_promotion_musttail_typecheck.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; Mustcall needs stricter parameter type checks otherwise it will fail in verifier. -; RUN: opt < %s -passes=pgo-icall-prom -S | FileCheck %s +; RUN: opt < %s -passes=pgo-icall-prom -mark-icp-complete -S | FileCheck %s --check-prefixes=CHECK,CHECK-MODU ; Here we check there is no ICP due to parameter mismatch. define ptr @func(ptr %msg, ptr %ptr, ptr %ctx, i64 %data.coerce, ptr %table, i64 %hasbits) { @@ -53,3 +53,7 @@ } !0 = !{!"VP", i32 0, i64 2024, i64 -4843250054591211088, i64 -1, i64 1456131869974120143, i64 947, i64 -4941069334091589447, i64 18} + +; CHECK-MODU: !llvm.module.flags = !{[[MODULE_FLAG:![0-9]+]]} + +; CHECK-MODU: [[MODULE_FLAG]] = !{i32 1, !"IndirectCallPromComplete", i1 true} Index: llvm/test/Transforms/SimplifyCFG/merge-indirect-call-metadata.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SimplifyCFG/merge-indirect-call-metadata.ll @@ -0,0 +1,152 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals --version 2 +; RUN: opt < %s -passes='simplifycfg' -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s --check-prefix=HOIST +; RUN: opt < %s -passes='simplifycfg' -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s --check-prefix=SINK + +; IR @indirect_call_value_profile_merged_by_hoist is generated based on following C++ with manually-annotated !prof +; Tests that value profiles are merged with instruction hoisted if module flag 'IndirectCallPromComplete' is true. +; class Base { +; public: +; virtual int gettype() = 0; +; virtual int func1(int a, int b) = 0; +;}; +; +; int func2(int x, int y); +; +; Base* createptr(int c); +; +; int func(int x, int a, int b, int c) { +; Base* d = createptr(c); +; if (d->gettype() % 5 == 0) { +; auto ret = d->func1(a, b); +; return ret + func2(b, a); +; } +; return d->func1(a, b); +; } +define i32 @indirect_call_value_profile_merged_by_hoist(i32 %x, i32 %a, i32 %b, i32 %c) { +; HOIST-LABEL: define i32 @indirect_call_value_profile_merged_by_hoist +; HOIST-SAME: (i32 [[X:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; HOIST-NEXT: entry: +; HOIST-NEXT: [[CALL:%.*]] = tail call ptr @createptr(i32 [[C]]) +; HOIST-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[CALL]], align 8 +; HOIST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VTABLE]], align 8 +; HOIST-NEXT: [[CALL1:%.*]] = tail call i32 [[TMP0]](ptr [[CALL]]) +; HOIST-NEXT: [[REM:%.*]] = srem i32 [[CALL1]], 5 +; HOIST-NEXT: [[CMP:%.*]] = icmp eq i32 [[REM]], 0 +; HOIST-NEXT: [[VTABLE2:%.*]] = load ptr, ptr [[CALL]], align 8 +; HOIST-NEXT: [[VFN3:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE2]], i64 1 +; HOIST-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VFN3]], align 8 +; HOIST-NEXT: [[CALL4:%.*]] = tail call i32 [[TMP1]](ptr [[CALL]], i32 [[A]], i32 [[B]]), !prof [[PROF1:![0-9]+]] +; HOIST-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[CLEANUP:%.*]] +; HOIST: if.then: +; HOIST-NEXT: [[CALL5:%.*]] = tail call i32 @func2(i32 [[B]], i32 [[A]]) +; HOIST-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL5]], [[CALL4]] +; HOIST-NEXT: br label [[CLEANUP]] +; HOIST: cleanup: +; HOIST-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[ADD]], [[IF_THEN]] ], [ [[CALL4]], [[ENTRY:%.*]] ] +; HOIST-NEXT: ret i32 [[RETVAL_0]] +; + +entry: + %call = tail call ptr @createptr(i32 %c) + %vtable = load ptr, ptr %call, align 8 + %0 = load ptr, ptr %vtable, align 8 + %call1 = tail call i32 %0(ptr %call) + %rem = srem i32 %call1, 5 + %cmp = icmp eq i32 %rem, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %vtable2 = load ptr, ptr %call, align 8 + %vfn3 = getelementptr inbounds ptr, ptr %vtable2, i64 1 + %1 = load ptr, ptr %vfn3, align 8 + %call4 = tail call i32 %1(ptr %call, i32 %a, i32 %b), !prof !1 + %call5 = tail call i32 @func2(i32 %b, i32 %a) + %add = add nsw i32 %call5, %call4 + br label %cleanup + +if.end: ; preds = %entry + %vtable6 = load ptr, ptr %call, align 8 + %vfn7 = getelementptr inbounds ptr, ptr %vtable6, i64 1 + %2 = load ptr, ptr %vfn7, align 8 + %call8 = tail call i32 %2(ptr %call, i32 %a, i32 %b), !prof !2 + br label %cleanup + +cleanup: ; preds = %if.end, %if.then + %retval.0 = phi i32 [ %add, %if.then ], [ %call8, %if.end ] + ret i32 %retval.0 +} + +; IR @call_not_sinked is generated based on the following C++ code, with manually annotated !prof +; Tests that value profiles are merged with instruction sink if module flag 'IndirectCallPromComplete' is true. +; class Base { +; public: +; virtual int func() = 0; +; virtual int func1(int a, int b) = 0; +; virtual int func2(int a, int b) = 0; +; }; +; +; Base* createptr(int c); +; +; int func(int x, int a, int b, int c) { +; Base* d = createptr(c); +; if (x % 1000 == 0) +; return d->func1(a, b); +; return d->func2(a, b); +; } +define i32 @indirect_call_value_profile_merged_by_sink(i32 %x, i32 %a, i32 %b, i32 %c) { +; SINK-LABEL: define i32 @indirect_call_value_profile_merged_by_sink +; SINK-SAME: (i32 [[X:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; SINK-NEXT: entry: +; SINK-NEXT: [[CALL:%.*]] = tail call ptr @createptr(i32 [[C]]) +; SINK-NEXT: [[REM:%.*]] = srem i32 [[X]], 1000 +; SINK-NEXT: [[CMP:%.*]] = icmp eq i32 [[REM]], 0 +; SINK-NEXT: [[DOT:%.*]] = select i1 [[CMP]], i64 1, i64 2 +; SINK-NEXT: [[VTABLE2:%.*]] = load ptr, ptr [[CALL]], align 8 +; SINK-NEXT: [[VFN3:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE2]], i64 [[DOT]] +; SINK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VFN3]], align 8 +; SINK-NEXT: [[CALL4:%.*]] = tail call i32 [[TMP0]](ptr [[CALL]], i32 [[A]], i32 [[B]]), !prof [[PROF3:![0-9]+]] +; SINK-NEXT: ret i32 [[CALL4]] +; +entry: + %call = tail call ptr @createptr(i32 %c) + %rem = srem i32 %x, 1000 + %cmp = icmp eq i32 %rem, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %vtable = load ptr, ptr %call, align 8 + %vfn = getelementptr inbounds ptr, ptr %vtable, i64 1 + %0 = load ptr, ptr %vfn + %call1 = tail call i32 %0(ptr %call, i32 %a, i32 %b), !prof !3 + br label %cleanup + +if.end: ; preds = %entry + %vtable2 = load ptr, ptr %call, align 8 + %vfn3 = getelementptr inbounds ptr, ptr %vtable2, i64 2 + %1 = load ptr, ptr %vfn3 + %call4 = tail call i32 %1(ptr %call, i32 %a, i32 %b), !prof !4 + br label %cleanup + +cleanup: ; preds = %if.end, %if.then + %retval.0 = phi i32 [ %call1, %if.then ], [ %call4, %if.end ] + ret i32 %retval.0 +} + + +declare ptr @createptr(i32) +declare i32 @func2(i32, i32) + +!llvm.module.flags = !{!0} + +!0 = !{i32 1, !"IndirectCallPromComplete", i1 true} +!1 = !{!"VP", i32 0, i64 1600, i64 12345, i64 1030, i64 678, i64 410} +!2 = !{!"VP", i32 0, i64 1601, i64 54321, i64 1030, i64 678, i64 410} +!3 =!{!"VP", i32 0, i64 1600, i64 12345, i64 1030, i64 678, i64 410} +!4 =!{!"VP", i32 0, i64 1601, i64 54321, i64 1030, i64 876, i64 410} +;. +; HOIST: [[META0:![0-9]+]] = !{i32 1, !"IndirectCallPromComplete", i1 true} +; HOIST: [[PROF1]] = !{!"VP", i32 0, i64 3201, i64 12345, i64 1030, i64 54321, i64 1030, i64 678, i64 820} +;. +; SINK: [[META0:![0-9]+]] = !{i32 1, !"IndirectCallPromComplete", i1 true} +; SINK: [[PROF3]] = !{!"VP", i32 0, i64 3201, i64 54321, i64 1030, i64 12345, i64 1030, i64 876, i64 410, i64 678, i64 410} +;. Index: llvm/test/Transforms/SimplifyCFG/preserve-indirect-call-metadata.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SimplifyCFG/preserve-indirect-call-metadata.ll @@ -0,0 +1,168 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals --version 2 +; RUN: opt < %s -passes='simplifycfg' -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s --check-prefix=HOIST +; RUN: opt < %s -passes='simplifycfg' -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s --check-prefix=SINK + +; IR @indirect_call_value_profile_preserved_by_hoist is generated based on following C++ with manually-annotated !prof. +; Tests that indirect callsites are preserved as opposed to sunk if module flag 'IndirectCallPromComplete' is not present. +; Without preserving call instructions, `d->func1` is hoisted while it may not make sense +; to do so. For example, the candidate calls are different based on derived type. +; class Base { +; public: +; virtual int gettype() = 0; +; virtual int func1(int a, int b) = 0; +;}; +; +; int func2(int x, int y); +; +; Base* createptr(int c); +; +; int func(int x, int a, int b, int c) { +; Base* d = createptr(c); +; if (d->gettype() % 5 == 0) { +; auto ret = d->func1(a, b); +; return ret + func2(b, a); +; } +; return d->func1(a, b); +; } +define i32 @indirect_call_value_profile_preserved_by_hoist(i32 %x, i32 %a, i32 %b, i32 %c) { +; +; SINK-LABEL: define i32 @indirect_call_value_profile_preserved_by_hoist +; SINK-SAME: (i32 [[X:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; SINK-NEXT: entry: +; SINK-NEXT: [[CALL:%.*]] = tail call ptr @createptr(i32 [[C]]) +; SINK-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[CALL]], align 8 +; SINK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VTABLE]], align 8 +; SINK-NEXT: [[CALL1:%.*]] = tail call i32 [[TMP0]](ptr [[CALL]]) +; SINK-NEXT: [[REM:%.*]] = srem i32 [[CALL1]], 5 +; SINK-NEXT: [[CMP:%.*]] = icmp eq i32 [[REM]], 0 +; SINK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; SINK: if.then: +; SINK-NEXT: [[VTABLE2:%.*]] = load ptr, ptr [[CALL]], align 8 +; SINK-NEXT: [[VFN3:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE2]], i64 1 +; SINK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VFN3]], align 8 +; SINK-NEXT: [[CALL4:%.*]] = tail call i32 [[TMP1]](ptr [[CALL]], i32 [[A]], i32 [[B]]), !prof [[PROF0:![0-9]+]] +; SINK-NEXT: [[CALL5:%.*]] = tail call i32 @func2(i32 [[B]], i32 [[A]]) +; SINK-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL5]], [[CALL4]] +; SINK-NEXT: br label [[CLEANUP:%.*]] +; SINK: if.end: +; SINK-NEXT: [[VTABLE6:%.*]] = load ptr, ptr [[CALL]], align 8 +; SINK-NEXT: [[VFN7:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE6]], i64 1 +; SINK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VFN7]], align 8 +; SINK-NEXT: [[CALL8:%.*]] = tail call i32 [[TMP2]](ptr [[CALL]], i32 [[A]], i32 [[B]]), !prof [[PROF1:![0-9]+]] +; SINK-NEXT: br label [[CLEANUP]] +; SINK: cleanup: +; SINK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[ADD]], [[IF_THEN]] ], [ [[CALL8]], [[IF_END]] ] +; SINK-NEXT: ret i32 [[RETVAL_0]] +; + +entry: + %call = tail call ptr @createptr(i32 %c) + %vtable = load ptr, ptr %call, align 8 + %0 = load ptr, ptr %vtable, align 8 + %call1 = tail call i32 %0(ptr %call) + %rem = srem i32 %call1, 5 + %cmp = icmp eq i32 %rem, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %vtable2 = load ptr, ptr %call, align 8 + %vfn3 = getelementptr inbounds ptr, ptr %vtable2, i64 1 + %1 = load ptr, ptr %vfn3, align 8 + %call4 = tail call i32 %1(ptr %call, i32 %a, i32 %b), !prof !1 + %call5 = tail call i32 @func2(i32 %b, i32 %a) + %add = add nsw i32 %call5, %call4 + br label %cleanup + +if.end: ; preds = %entry + %vtable6 = load ptr, ptr %call, align 8 + %vfn7 = getelementptr inbounds ptr, ptr %vtable6, i64 1 + %2 = load ptr, ptr %vfn7, align 8 + %call8 = tail call i32 %2(ptr %call, i32 %a, i32 %b), !prof !2 + br label %cleanup + +cleanup: ; preds = %if.end, %if.then + %retval.0 = phi i32 [ %add, %if.then ], [ %call8, %if.end ] + ret i32 %retval.0 +} + +; IR @indirect_call_value_profile_preserved_by_hoist is generated based on the following C++ code, with manually annotated !prof metadatas. +; Note 'd->func1' and 'd->func2' are two virtual functions with different offsets so could have different value profiles. +; Without preserving call instructions with !prof, call instructions are simplified to one with selected offsets. +; class Base { +; public: +; virtual int func() = 0; +; virtual int func1(int a, int b) = 0; +; virtual int func2(int a, int b) = 0; +; }; +; +; Base* createptr(int c); +; +; int func(int x, int a, int b, int c) { +; Base* d = createptr(c); +; if (x % 1000 == 0) +; return d->func1(a, b); +; return d->func2(a, b); +; } +define i32 @indirect_call_value_profile_preserved_by_sink(i32 %x, i32 %a, i32 %b, i32 %c) { +; HOIST-LABEL: define i32 @indirect_call_value_profile_preserved_by_sink +; HOIST-SAME: (i32 [[X:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; HOIST-NEXT: entry: +; HOIST-NEXT: [[CALL:%.*]] = tail call ptr @createptr(i32 [[C]]) +; HOIST-NEXT: [[REM:%.*]] = srem i32 [[X]], 1000 +; HOIST-NEXT: [[CMP:%.*]] = icmp eq i32 [[REM]], 0 +; HOIST-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[CALL]], align 8 +; HOIST-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; HOIST: if.then: +; HOIST-NEXT: [[VFN:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE]], i64 1 +; HOIST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VFN]], align 8 +; HOIST-NEXT: [[CALL1:%.*]] = tail call i32 [[TMP0]](ptr [[CALL]], i32 [[A]], i32 [[B]]), !prof [[PROF2:![0-9]+]] +; HOIST-NEXT: br label [[CLEANUP:%.*]] +; HOIST: if.end: +; HOIST-NEXT: [[VFN3:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE]], i64 2 +; HOIST-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VFN3]], align 8 +; HOIST-NEXT: [[CALL4:%.*]] = tail call i32 [[TMP1]](ptr [[CALL]], i32 [[A]], i32 [[B]]), !prof [[PROF3:![0-9]+]] +; HOIST-NEXT: br label [[CLEANUP]] +; HOIST: cleanup: +; HOIST-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[CALL1]], [[IF_THEN]] ], [ [[CALL4]], [[IF_END]] ] +; HOIST-NEXT: ret i32 [[RETVAL_0]] + +entry: + %call = tail call ptr @createptr(i32 %c) + %rem = srem i32 %x, 1000 + %cmp = icmp eq i32 %rem, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %vtable = load ptr, ptr %call, align 8 + %vfn = getelementptr inbounds ptr, ptr %vtable, i64 1 + %0 = load ptr, ptr %vfn + %call1 = tail call i32 %0(ptr %call, i32 %a, i32 %b), !prof !3 + br label %cleanup + +if.end: ; preds = %entry + %vtable2 = load ptr, ptr %call, align 8 + %vfn3 = getelementptr inbounds ptr, ptr %vtable2, i64 2 + %1 = load ptr, ptr %vfn3 + %call4 = tail call i32 %1(ptr %call, i32 %a, i32 %b), !prof !4 + br label %cleanup + +cleanup: ; preds = %if.end, %if.then + %retval.0 = phi i32 [ %call1, %if.then ], [ %call4, %if.end ] + ret i32 %retval.0 +} + + +declare ptr @createptr(i32) +declare i32 @func2(i32, i32) + +!1 = !{!"VP", i32 0, i64 1600, i64 12345, i64 1030, i64 678, i64 410} +!2 = !{!"VP", i32 0, i64 1601, i64 54321, i64 1030, i64 678, i64 410} +!3 =!{!"VP", i32 0, i64 1602, i64 12345, i64 1030, i64 678, i64 410} +!4 =!{!"VP", i32 0, i64 1603, i64 54321, i64 1030, i64 876, i64 410} +;. +; HOIST: [[PROF2]] = !{!"VP", i32 0, i64 1602, i64 12345, i64 1030, i64 678, i64 410} +; HOIST: [[PROF3]] = !{!"VP", i32 0, i64 1603, i64 54321, i64 1030, i64 876, i64 410} +;. +; SINK: [[PROF0]] = !{!"VP", i32 0, i64 1600, i64 12345, i64 1030, i64 678, i64 410} +; SINK: [[PROF1]] = !{!"VP", i32 0, i64 1601, i64 54321, i64 1030, i64 678, i64 410} +;.