Index: llvm/include/llvm/IR/Instructions.h =================================================================== --- llvm/include/llvm/IR/Instructions.h +++ llvm/include/llvm/IR/Instructions.h @@ -3965,6 +3965,62 @@ return isa(V) && classof(cast(V)); } + // Find the "branch_weights" metadata and indirect-call metadata. + // Return true if either metadata is found. + // Return false if none of the metadtata is found. + // This method sets the reference argument to nullptr if the metadata + // is not found. + // TODO: make this function more generic to return a hash_map of + // the metdata and its key. + static bool getProfMetaData(MDNode *MD, MDNode *&BranchWeightMD, + MDNode *&IndirectCallMD) { + BranchWeightMD = IndirectCallMD = nullptr; + + auto isMDwithName = [](MDNode *MD, const char *Str) { + auto *MDName = dyn_cast(MD->getOperand(0)); + if (!MDName) + return false; + return (MDName->getString().equals(Str)); + }; + + if (isMDwithName(MD, "branch_weights")) { + BranchWeightMD = MD; + return true; + } + if (isMDwithName(MD, "VP")) { + IndirectCallMD = MD; + return true; + } + if (MD->getNumOperands() != 3) + return false; + for (int I = 1; I < 3; I++) { + auto *MDT = cast(MD->getOperand(I)); + if (!MDT) + continue; + if (isMDwithName(MDT, "branch_weights")) + BranchWeightMD = MDT; + else if (isMDwithName(MDT, "VP")) + IndirectCallMD = MDT; + } + return (BranchWeightMD || IndirectCallMD); + } + + // A utility method that return "branch_weights" metadata. The return + // value will be nullptr if the metadata is not found. + static MDNode *getBranchWeightProfData(MDNode *MD) { + MDNode *BranchWeightMD, *IndirectCallMD; + getProfMetaData(MD, BranchWeightMD, IndirectCallMD); + return BranchWeightMD; + } + + // A utility method that return indirect-call metadata. The return + // value will be nullptr if the metadata is not found. + static MDNode *getIndirectCallProfData(MDNode *MD) { + MDNode *BranchWeightMD, *IndirectCallMD; + getProfMetaData(MD, BranchWeightMD, IndirectCallMD); + return IndirectCallMD; + } + private: // Shadow Instruction::setInstructionSubclassData with a private forwarding // method so that subclasses cannot accidentally use it. Index: llvm/lib/Analysis/BranchProbabilityInfo.cpp =================================================================== --- llvm/lib/Analysis/BranchProbabilityInfo.cpp +++ llvm/lib/Analysis/BranchProbabilityInfo.cpp @@ -403,7 +403,9 @@ SmallVector UnreachableIdxs; SmallVector ReachableIdxs; - extractBranchWeights(*TI, Weights); + if (!extractBranchWeights(*TI, Weights)) + return false; + for (unsigned I = 0, E = Weights.size(); I != E; ++I) { WeightSum += Weights[I]; const LoopBlock SrcLoopBB = getLoopBlock(BB); Index: llvm/lib/IR/Metadata.cpp =================================================================== --- llvm/lib/IR/Metadata.cpp +++ llvm/lib/IR/Metadata.cpp @@ -1502,7 +1502,16 @@ getOpcode() == Instruction::Switch) && "Looking for branch weights on something besides branch"); - return ::extractProfTotalWeight(getMetadata(LLVMContext::MD_prof), TotalVal); + TotalVal = 0; + auto *ProfileData = getMetadata(LLVMContext::MD_prof); + if (!ProfileData) + return false; + if (getOpcode() == Instruction::Invoke) { + ProfileData = InvokeInst::getBranchWeightProfData(ProfileData); + if (!ProfileData) + return false; + } + return ::extractProfTotalWeight(ProfileData, TotalVal); } void GlobalObject::copyMetadata(const GlobalObject *Other, unsigned Offset) { Index: llvm/lib/IR/ProfDataUtils.cpp =================================================================== --- llvm/lib/IR/ProfDataUtils.cpp +++ llvm/lib/IR/ProfDataUtils.cpp @@ -111,17 +111,28 @@ bool extractBranchWeights(const Instruction &I, SmallVectorImpl &Weights) { auto *ProfileData = I.getMetadata(LLVMContext::MD_prof); + if (!ProfileData) + return false; + if (I.getOpcode() == Instruction::Invoke) + ProfileData = InvokeInst::getBranchWeightProfData(ProfileData); return extractBranchWeights(ProfileData, Weights); } bool extractBranchWeights(const Instruction &I, uint64_t &TrueVal, uint64_t &FalseVal) { assert((I.getOpcode() == Instruction::Br || - I.getOpcode() == Instruction::Select) && + I.getOpcode() == Instruction::Select || + I.getOpcode() == Instruction::Invoke) && "Looking for branch weights on something besides branch or select"); SmallVector Weights; auto *ProfileData = I.getMetadata(LLVMContext::MD_prof); + if (!ProfileData) + return false; + + if (I.getOpcode() == Instruction::Invoke) + ProfileData = InvokeInst::getBranchWeightProfData(ProfileData); + if (!extractBranchWeights(ProfileData, Weights)) return false; Index: llvm/lib/IR/Verifier.cpp =================================================================== --- llvm/lib/IR/Verifier.cpp +++ llvm/lib/IR/Verifier.cpp @@ -4475,49 +4475,61 @@ } void Verifier::visitProfMetadata(Instruction &I, MDNode *MD) { - Check(MD->getNumOperands() >= 2, - "!prof annotations should have no less than 2 operands", MD); - - // Check first operand. - Check(MD->getOperand(0) != nullptr, "first operand should not be null", MD); - Check(isa(MD->getOperand(0)), - "expected string with name of the !prof annotation", MD); - MDString *MDS = cast(MD->getOperand(0)); - StringRef ProfName = MDS->getString(); - - // Check consistency of !prof branch_weights metadata. - if (ProfName.equals("branch_weights")) { - if (isa(&I)) { - Check(MD->getNumOperands() == 2 || MD->getNumOperands() == 3, - "Wrong number of InvokeInst branch_weights operands", MD); - } else { - unsigned ExpectedNumOperands = 0; - if (BranchInst *BI = dyn_cast(&I)) - ExpectedNumOperands = BI->getNumSuccessors(); - else if (SwitchInst *SI = dyn_cast(&I)) - ExpectedNumOperands = SI->getNumSuccessors(); - else if (isa(&I)) - ExpectedNumOperands = 1; - else if (IndirectBrInst *IBI = dyn_cast(&I)) - ExpectedNumOperands = IBI->getNumDestinations(); - else if (isa(&I)) - ExpectedNumOperands = 2; - else if (CallBrInst *CI = dyn_cast(&I)) - ExpectedNumOperands = CI->getNumSuccessors(); - else - CheckFailed("!prof branch_weights are not allowed for this instruction", - MD); - - Check(MD->getNumOperands() == 1 + ExpectedNumOperands, - "Wrong number of operands", MD); - } - for (unsigned i = 1; i < MD->getNumOperands(); ++i) { - auto &MDO = MD->getOperand(i); - Check(MDO, "second operand should not be null", MD); - Check(mdconst::dyn_extract(MDO), - "!prof brunch_weights operand is not a const int"); + auto checkInst = [this](Instruction &I, MDNode *MD) { + Check(MD->getNumOperands() >= 2, + "!prof annotations should have no less than 2 operands", MD); + + // Check first operand. + Check(MD->getOperand(0) != nullptr, "first operand should not be null", MD); + Check(isa(MD->getOperand(0)), + "expected string with name of the !prof annotation", MD); + MDString *MDS = cast(MD->getOperand(0)); + StringRef ProfName = MDS->getString(); + + // Check consistency of !prof branch_weights metadata. + if (ProfName.equals("branch_weights")) { + if (isa(&I)) { + Check(MD->getNumOperands() == 2 || MD->getNumOperands() == 3, + "Wrong number of InvokeInst branch_weights operands", MD); + } else { + unsigned ExpectedNumOperands = 0; + if (BranchInst *BI = dyn_cast(&I)) + ExpectedNumOperands = BI->getNumSuccessors(); + else if (SwitchInst *SI = dyn_cast(&I)) + ExpectedNumOperands = SI->getNumSuccessors(); + else if (isa(&I)) + ExpectedNumOperands = 1; + else if (IndirectBrInst *IBI = dyn_cast(&I)) + ExpectedNumOperands = IBI->getNumDestinations(); + else if (isa(&I)) + ExpectedNumOperands = 2; + else if (CallBrInst *CI = dyn_cast(&I)) + ExpectedNumOperands = CI->getNumSuccessors(); + else + CheckFailed( + "!prof branch_weights are not allowed for this instruction", MD); + + Check(MD->getNumOperands() == 1 + ExpectedNumOperands, + "Wrong number of operands", MD); + } + for (unsigned i = 1; i < MD->getNumOperands(); ++i) { + auto &MDO = MD->getOperand(i); + Check(MDO, "second operand should not be null", MD); + Check(mdconst::dyn_extract(MDO), + "!prof brunch_weights operand is not a const int"); + } } + }; + + // Check MD for non-invoke instructions or InvokeInst only with branch_weight. + if (!isa(&I) || isa(MD->getOperand(0))) { + checkInst(I, MD); + return; } + + // For an InvokeInst that has both branch_weight and VP profiles. + checkInst(I, cast(MD->getOperand(1))); + checkInst(I, cast(MD->getOperand(2))); } void Verifier::visitCallStackMetadata(MDNode *MD) { Index: llvm/lib/ProfileData/InstrProf.cpp =================================================================== --- llvm/lib/ProfileData/InstrProf.cpp +++ llvm/lib/ProfileData/InstrProf.cpp @@ -25,6 +25,7 @@ #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" @@ -1056,7 +1057,26 @@ if (--MDCount == 0) break; } - Inst.setMetadata(LLVMContext::MD_prof, MDNode::get(Ctx, Vals)); + if (!isa(&Inst)) { + Inst.setMetadata(LLVMContext::MD_prof, MDNode::get(Ctx, Vals)); + return; + } + + // For an invoke instruction, if we have VP counts, we should already have a + // "branch_weights" MD_prof. + MDNode *BranchWeight = Inst.getMetadata(LLVMContext::MD_prof); + assert(BranchWeight != nullptr && + dyn_cast(BranchWeight->getOperand(0)) + ->getString() + .equals("branch_weights") && + "Expect non-null branch_weight metadata"); + SmallVector InvokeVals; + InvokeVals.push_back(nullptr); + InvokeVals.push_back(BranchWeight); + InvokeVals.push_back(MDNode::get(Ctx, Vals)); + MDNode *NewM = MDNode::getDistinct(Ctx, InvokeVals); + NewM->replaceOperandWith(0, NewM); + Inst.setMetadata(LLVMContext::MD_prof, NewM); } bool getValueProfDataFromInst(const Instruction &Inst, @@ -1069,6 +1089,12 @@ if (!MD) return false; + if (isa(&Inst)) { + MD = cast(&Inst)->getIndirectCallProfData(MD); + if (!MD) + return false; + } + unsigned NOps = MD->getNumOperands(); if (NOps < 5) Index: llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp =================================================================== --- llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp +++ llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp @@ -313,8 +313,13 @@ continue; Changed = true; - // Adjust the MD.prof metadata. First delete the old one. - CB->setMetadata(LLVMContext::MD_prof, nullptr); + // Adjust the MD.prof metadata. First delete the old one. xur + if (isa(CB)) { + MDNode *MD = CB->getMetadata(LLVMContext::MD_prof); + MD = InvokeInst::getBranchWeightProfData(MD); + CB->setMetadata(LLVMContext::MD_prof, MD); + } else + CB->setMetadata(LLVMContext::MD_prof, nullptr); // If all promoted, we don't need the MD.prof metadata. if (TotalCount == 0 || NumPromoted == NumVals) continue; Index: llvm/lib/Transforms/Utils/Local.cpp =================================================================== --- llvm/lib/Transforms/Utils/Local.cpp +++ llvm/lib/Transforms/Utils/Local.cpp @@ -2158,17 +2158,25 @@ NewCall->setDebugLoc(II->getDebugLoc()); NewCall->copyMetadata(*II); - // If the invoke had profile metadata, try converting them for CallInst. - uint64_t TotalWeight; - if (NewCall->extractProfTotalWeight(TotalWeight)) { - // Set the total weight if it fits into i32, otherwise reset. - MDBuilder MDB(NewCall->getContext()); - auto NewWeights = uint32_t(TotalWeight) != TotalWeight - ? nullptr - : MDB.createBranchWeights({uint32_t(TotalWeight)}); - NewCall->setMetadata(LLVMContext::MD_prof, NewWeights); + // Move MD_prof data to the new instruction. We can only have one. + // First to keep indirect-call profile metadata. + // Otherwise, if try converting them for total call profile data. + auto *ProfileData = II->getMetadata(LLVMContext::MD_prof); + if (!ProfileData) + return NewCall; + ProfileData = InvokeInst::getIndirectCallProfData(ProfileData); + if (!ProfileData) { + uint64_t TotalWeight; + if (II->extractProfTotalWeight(TotalWeight)) { + // Set the total weight if it fits into i32, otherwise reset. + MDBuilder MDB(NewCall->getContext()); + ProfileData = uint32_t(TotalWeight) != TotalWeight + ? nullptr + : MDB.createBranchWeights({uint32_t(TotalWeight)}); + } } - + // Set the MD_prof metadata. + NewCall->setMetadata(LLVMContext::MD_prof, ProfileData); return NewCall; } Index: llvm/test/Transforms/PGOProfile/Inputs/invoke_icall.proftext =================================================================== --- /dev/null +++ llvm/test/Transforms/PGOProfile/Inputs/invoke_icall.proftext @@ -0,0 +1,30 @@ +# IR level Instrumentation Flag +:ir +_Z3bari +# Func Hash: +784007058953177093 +# Num Counters: +2 +# Counter Values: +10000 +7500 + +_Z3fooi +# Func Hash: +264495437628610820 +# Num Counters: +4 +# Counter Values: +10000 +0 +7500 +0 +# Num Value Kinds: +1 +# ValueKind = IPVK_IndirectCallTarget: +0 +# NumValueSites: +1 +1 +_Z3bari:10000 + Index: llvm/test/Transforms/PGOProfile/invoke_icall.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/PGOProfile/invoke_icall.ll @@ -0,0 +1,97 @@ +; RUN: llvm-profdata merge %S/Inputs/invoke_icall.proftext -o %t.profdata +; RUN: opt < %s -passes=pgo-instr-use -pgo-instrument-entry=false -pgo-test-profile-file=%t.profdata -S | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%"class.std::basic_ostream" = type { i32 (...)**, %"class.std::basic_ios" } +%"class.std::basic_ios" = type { %"class.std::ios_base", %"class.std::basic_ostream"*, i8, i8 } +%"class.std::ios_base" = type { i32 (...)**, i64, i64, i32, i32, i32, %"struct.std::ios_base::_Callback_list"* } +%"struct.std::ios_base::_Callback_list" = type { %"struct.std::ios_base::_Callback_list"*, void (i32, %"class.std::ios_base"*, i32)*, i32, i32 } + +@_ZTIi = external dso_local constant i8* +@goo = dso_local global i32 (i32)* null, align 8 +@_ZSt4cerr = external dso_local global %"class.std::basic_ostream", align 8 +@.str = private unnamed_addr constant [19 x i8] c"got an exception.\0A\00", align 1 + +define dso_local i32 @_Z3bari(i32 %a) { +entry: + %0 = and i32 %a, 3 + %tobool.not = icmp eq i32 %0, 0 + br i1 %tobool.not, label %if.end, label %if.then + +if.then: + %exception = call i8* @__cxa_allocate_exception(i64 4) + %1 = bitcast i8* %exception to i32* + store i32 20, i32* %1, align 16 + call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null) + unreachable + +if.end: + %mul = mul nsw i32 %a, %a + ret i32 %mul +} + +declare dso_local i8* @__cxa_allocate_exception(i64) + +declare dso_local void @__cxa_throw(i8*, i8*, i8*) + +define dso_local i32 @_Z3fooi(i32 %n) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + %0 = load i32 (i32)*, i32 (i32)** @goo, align 8 + %call = invoke i32 %0(i32 %n) + to label %cleanup unwind label %lpad +; CHECK-LABEL: @_Z3fooi +; CHECK-SAME: !prof ![[BW_ENTRY:[0-9]+]] +; CHECK: %call = invoke +; CHECK: to label +; CHECK-SAME: !prof ![[MD_INVOKE:[0-9]+]] +; CHECK-DAG: ![[BW_ENTRY]] = !{!"function_entry_count", i64 10000} +; CHECK-DAG: ![[BW_INVOKE:[0-9]+]] = !{!"branch_weights", i32 2500, i32 7500} +; CHECK-DAG: ![[MD_INVOKE]] = distinct !{![[MD_INVOKE]], ![[BW_INVOKE]], ![[VP_INVOKE:[0-9]+]]} +; CHECK-DAG: ![[VP_INVOKE]] = !{!"VP", i32 0, i64 10000, i64 3727899762981752933, i64 10000} + +lpad: + %1 = landingpad { i8*, i32 } + cleanup + catch i8* bitcast (i8** @_ZTIi to i8*) + %2 = extractvalue { i8*, i32 } %1, 1 + %3 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) + %matches = icmp eq i32 %2, %3 + br i1 %matches, label %catch, label %ehcleanup + +catch: + %4 = extractvalue { i8*, i32 } %1, 0 + %5 = call i8* @__cxa_begin_catch(i8* %4) + %call1.i12 = invoke nonnull align 8 dereferenceable(8) %"class.std::basic_ostream"* @_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l(%"class.std::basic_ostream"* nonnull align 8 dereferenceable(8) @_ZSt4cerr, i8* nonnull getelementptr inbounds ([19 x i8], [19 x i8]* @.str, i64 0, i64 0), i64 18) + to label %invoke.cont2 unwind label %lpad1 + +invoke.cont2: + call void @__cxa_end_catch() + br label %cleanup + +lpad1: + %6 = landingpad { i8*, i32 } + cleanup + call void @__cxa_end_catch() + br label %ehcleanup + +cleanup: + %retval.0 = phi i32 [ -1, %invoke.cont2 ], [ %call, %entry ] + ret i32 %retval.0 + +ehcleanup: + %lpad.val6.merged = phi { i8*, i32 } [ %6, %lpad1 ], [ %1, %lpad ] + resume { i8*, i32 } %lpad.val6.merged +} + +declare dso_local i32 @__gxx_personality_v0(...) + +declare i32 @llvm.eh.typeid.for(i8*) + +declare dso_local i8* @__cxa_begin_catch(i8*) + +declare dso_local void @__cxa_end_catch() + +declare dso_local nonnull align 8 dereferenceable(8) %"class.std::basic_ostream"* @_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l(%"class.std::basic_ostream"* nonnull align 8 dereferenceable(8), i8*, i64) +