Index: docs/LangRef.rst =================================================================== --- docs/LangRef.rst +++ docs/LangRef.rst @@ -9704,6 +9704,37 @@ format that can be written out by a compiler runtime and consumed via the ``llvm-profdata`` tool. +'``llvm.instrprof_increment_step``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare void @llvm.instrprof_increment_step(i8* , i64 , + i32 , + i32 , i64 ) + +Overview: +""""""""" + +The '``llvm.instrprof_increment_step``' intrinsic is an extension to +the '``llvm.instrprof_increment``' intrinsic with an additional fifth +argument to specify the step of the increment. + +Arguments: +"""""""""" +The first four arguments are the same as '``llvm.instrprof_increment``' +instrinsic. + +The last argument specifies the value of the increment of the counter variable. + +Semantics: +"""""""""" +See description of '``llvm.instrprof_increment``' instrinsic. + + '``llvm.instrprof_value_profile``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Index: include/llvm/IR/IntrinsicInst.h =================================================================== --- include/llvm/IR/IntrinsicInst.h +++ include/llvm/IR/IntrinsicInst.h @@ -361,6 +361,17 @@ } }; + class InstrProfIncrementInstStep : public InstrProfIncrementInst { + public: + static inline bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::instrprof_increment_step; + } + static inline bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } + Value *getStep() const { return const_cast(getArgOperand(4)); } + }; + /// This represents the llvm.instrprof_value_profile intrinsic. class InstrProfValueProfileInst : public IntrinsicInst { public: Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -346,6 +346,12 @@ llvm_i32_ty, llvm_i32_ty], []>; +// A counter increment with step for instrumentation based profiling. +def int_instrprof_increment_step : Intrinsic<[], + [llvm_ptr_ty, llvm_i64_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], + []>; + // A call to profile runtime for value profiling of target expressions // through instrumentation based profiling. def int_instrprof_value_profile : Intrinsic<[], Index: lib/Transforms/Instrumentation/InstrProfiling.cpp =================================================================== --- lib/Transforms/Instrumentation/InstrProfiling.cpp +++ lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -107,6 +107,13 @@ return getInstrProfCoverageSectionName(isMachO()); } +static InstrProfIncrementInst *castToIncrementInst(Instruction *Instr) { + InstrProfIncrementInst *Inc = dyn_cast(Instr); + if (Inc) + return Inc; + return dyn_cast(Instr); +} + bool InstrProfiling::run(Module &M) { bool MadeChange = false; @@ -138,7 +145,8 @@ for (BasicBlock &BB : F) for (auto I = BB.begin(), E = BB.end(); I != E;) { auto Instr = I++; - if (auto *Inc = dyn_cast(Instr)) { + InstrProfIncrementInst *Inc = castToIncrementInst(&*Instr); + if (Inc) { lowerIncrement(Inc); MadeChange = true; } else if (auto *Ind = dyn_cast(Instr)) { @@ -214,6 +222,14 @@ Ind->eraseFromParent(); } +static Value *getIncrementStep(InstrProfIncrementInst *Inc, + IRBuilder<> &Builder) { + auto *IncWithStep = dyn_cast(Inc); + if (IncWithStep) + return IncWithStep->getStep(); + return Builder.getInt64(1); +} + void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) { GlobalVariable *Counters = getOrCreateRegionCounters(Inc); @@ -221,7 +237,7 @@ uint64_t Index = Inc->getIndex()->getZExtValue(); Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters, 0, Index); Value *Count = Builder.CreateLoad(Addr, "pgocount"); - Count = Builder.CreateAdd(Count, Builder.getInt64(1)); + Count = Builder.CreateAdd(Count, getIncrementStep(Inc, Builder)); Inc->replaceAllUsesWith(Builder.CreateStore(Count, Addr)); Inc->eraseFromParent(); } Index: lib/Transforms/Instrumentation/PGOInstrumentation.cpp =================================================================== --- lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -86,6 +86,7 @@ #define DEBUG_TYPE "pgo-instrumentation" STATISTIC(NumOfPGOInstrument, "Number of edges instrumented."); +STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented."); STATISTIC(NumOfPGOEdge, "Number of edges."); STATISTIC(NumOfPGOBB, "Number of basic-blocks."); STATISTIC(NumOfPGOSplit, "Number of critical edge splits."); @@ -133,7 +134,65 @@ static cl::opt NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden); +// Command line option to enable/disable select instruction instrumentation. +static cl::opt PGOInstrSelect("pgo-instr-select", cl::init(true), + cl::Hidden); namespace { + +/// The select instruction visitor plays three roles specified +/// by the mode. In \c VM_counting mode, it simply counts the number of +/// select instructions. In \c VM_instrument mode, it inserts code to count +/// the number times TrueValue of select is taken. In \c VM_annotate mode, +/// it reads the profile data and annotate the select instruction with metadata. +enum VisitMode { VM_counting, VM_instrument, VM_annotate }; +class PGOUseFunc; + +/// Instruction Visitor class to visit select instructions. +struct SelectInstVisitor : public InstVisitor { + Function &F; + unsigned NSIs = 0; // Number of select instructions instrumented. + VisitMode Mode = VM_counting; // Visiting mode. + unsigned *CurCtrIdx = nullptr; // Pointer to current counter index. + unsigned TotalNumCtrs = 0; // Total number of counters + GlobalVariable *FuncNameVar = nullptr; + uint64_t FuncHash = 0; + PGOUseFunc *UseFunc = nullptr; + + SelectInstVisitor(Function &Func) : F(Func) {} + + void countSelects(Function &Func) { + Mode = VM_counting; + visit(Func); + } + // Visit the IR stream and instrument all select instructions. \p + // Ind is a pointer to the counter index variable; \p TotalNC + // is the total number of counters; \p FNV is the pointer to the + // PGO function name var; \p FHash is the function hash. + void instrumentSelects(Function &Func, unsigned *Ind, unsigned TotalNC, + GlobalVariable *FNV, uint64_t FHash) { + Mode = VM_instrument; + CurCtrIdx = Ind; + TotalNumCtrs = TotalNC; + FuncHash = FHash; + FuncNameVar = FNV; + visit(Func); + } + + // Visit the IR stream and annotate all select instructions. + void annotateSelects(Function &Func, PGOUseFunc *UF, unsigned *Ind) { + Mode = VM_annotate; + UseFunc = UF; + CurCtrIdx = Ind; + visit(Func); + } + + void instrumentOneSelectInst(SelectInst &SI); + void annotateOneSelectInst(SelectInst &SI); + // Visit \p SI instruction and perform tasks according to visit mode. + void visitSelectInst(SelectInst &SI); + unsigned getNumOfSelectInsts() const { return NSIs; } +}; + class PGOInstrumentationGenLegacyPass : public ModulePass { public: static char ID; @@ -180,6 +239,7 @@ AU.addRequired(); } }; + } // end anonymous namespace char PGOInstrumentationGenLegacyPass::ID = 0; @@ -254,6 +314,7 @@ std::unordered_multimap &ComdatMembers; public: + SelectInstVisitor SIVisitor; std::string FuncName; GlobalVariable *FuncNameVar; // CFG hash value for this function. @@ -280,8 +341,13 @@ std::unordered_multimap &ComdatMembers, bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, BlockFrequencyInfo *BFI = nullptr) - : F(Func), ComdatMembers(ComdatMembers), FunctionHash(0), + : F(Func), ComdatMembers(ComdatMembers), SIVisitor(Func), FunctionHash(0), MST(F, BPI, BFI) { + + // This should be done before CFG hash computation. + SIVisitor.countSelects(Func); + NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); + FuncName = getPGOFuncName(F); computeCFGHash(); if (ComdatMembers.size()) @@ -308,7 +374,7 @@ if (!E->InMST && !E->Removed) NumCounters++; } - return NumCounters; + return NumCounters + SIVisitor.getNumOfSelectInsts(); } }; @@ -328,7 +394,8 @@ } } JC.update(Indexes); - FunctionHash = (uint64_t)findIndirectCallSites(F).size() << 48 | + FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 | + (uint64_t)findIndirectCallSites(F).size() << 48 | (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC(); } @@ -473,6 +540,10 @@ Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters), Builder.getInt32(I++)}); } + + // Now instrument select instructions: + FuncInfo.SIVisitor.instrumentSelects(F, &I, NumCounters, FuncInfo.FuncNameVar, + FuncInfo.FunctionHash); assert(I == NumCounters); if (DisableValueProfiling) @@ -594,17 +665,17 @@ // Return the profile record for this function; InstrProfRecord &getProfileRecord() { return ProfileRecord; } + // Return the auxiliary BB information. + UseBBInfo &getBBInfo(const BasicBlock *BB) const { + return FuncInfo.getBBInfo(BB); + } + private: Function &F; Module *M; // This member stores the shared information with class PGOGenFunc. FuncPGOInstrumentation FuncInfo; - // Return the auxiliary BB information. - UseBBInfo &getBBInfo(const BasicBlock *BB) const { - return FuncInfo.getBBInfo(BB); - } - // The maximum count value in the profile. This is only used in PGO use // compilation. uint64_t ProgramMaxCount; @@ -677,6 +748,8 @@ NewEdge1.InMST = true; getBBInfo(InstrBB).setBBInfoCount(CountValue); } + // Now annotate select instructions + FuncInfo.SIVisitor.annotateSelects(F, this, &I); assert(I == CountFromProfile.size()); } @@ -820,7 +893,7 @@ DEBUG(FuncInfo.dumpInfo("after reading profile.")); } -static void setProfMetadata(Module *M, TerminatorInst *TI, +static void setProfMetadata(Module *M, Instruction *TI, ArrayRef EdgeCounts, uint64_t MaxCount) { MDBuilder MDB(M->getContext()); assert(MaxCount > 0 && "Bad max count"); @@ -869,6 +942,57 @@ } } +void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) { + Module *M = F.getParent(); + IRBuilder<> Builder(&SI); + Type *Int64Ty = Builder.getInt64Ty(); + Type *I8PtrTy = Builder.getInt8PtrTy(); + auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty); + Builder.CreateCall( + Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step), + {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), + Builder.getInt64(FuncHash), + Builder.getInt32(TotalNumCtrs), Builder.getInt32(*CurCtrIdx), Step}); + ++(*CurCtrIdx); +} + +void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) { + std::vector &CountFromProfile = UseFunc->getProfileRecord().Counts; + assert(*CurCtrIdx < CountFromProfile.size() && + "Out of bound access of counters"); + uint64_t SCounts[2]; + SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count + ++(*CurCtrIdx); + uint64_t TotalCount = UseFunc->getBBInfo(SI.getParent()).CountValue; + // False Count + SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0); + uint64_t MaxCount = std::max(SCounts[0], SCounts[1]); + setProfMetadata(F.getParent(), &SI, SCounts, MaxCount); +} + +void SelectInstVisitor::visitSelectInst(SelectInst &SI) { + if (!PGOInstrSelect) + return; + // FIXME: do not handle this yet. + if (SI.getCondition()->getType()->isVectorTy()) + return; + + NSIs++; + switch (Mode) { + case VM_counting: + return; + case VM_instrument: + instrumentOneSelectInst(SI); + break; + case VM_annotate: + annotateOneSelectInst(SI); + break; + default: + assert(false && "Unknown visiting mode"); + break; + } +} + // Traverse all the indirect callsites and annotate the instructions. void PGOUseFunc::annotateIndirectCallSites() { if (DisableValueProfiling) Index: test/Transforms/PGOProfile/Inputs/select1.proftext =================================================================== --- test/Transforms/PGOProfile/Inputs/select1.proftext +++ test/Transforms/PGOProfile/Inputs/select1.proftext @@ -0,0 +1,8 @@ +:ir +test_br_2 +72057623705475732 +3 +4 +1 +1 + Index: test/Transforms/PGOProfile/select1.ll =================================================================== --- test/Transforms/PGOProfile/select1.ll +++ test/Transforms/PGOProfile/select1.ll @@ -0,0 +1,34 @@ +; RUN: opt < %s -pgo-instr-gen -pgo-instr-select=true -S | FileCheck %s --check-prefix=GEN +; RUN: opt < %s -passes=pgo-instr-gen -pgo-instr-select=true -S | FileCheck %s --check-prefix=GEN +; RUN: opt < %s -pgo-instr-gen -pgo-instr-select=false -S | FileCheck %s --check-prefix=NOSELECT +; RUN: opt < %s -passes=pgo-instr-gen -pgo-instr-select=false -S | FileCheck %s --check-prefix=NOSELECT +; RUN: llvm-profdata merge %S/Inputs/select1.proftext -o %t.profdata +; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -pgo-instr-select=true -S | FileCheck %s --check-prefix=USE +; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -pgo-instr-select=true -S | FileCheck %s --check-prefix=USE +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @test_br_2(i32 %i) { +entry: + %cmp = icmp sgt i32 %i, 0 + br i1 %cmp, label %if.then, label %if.else + +if.then: + %add = add nsw i32 %i, 2 +;GEN: %[[STEP:[0-9]+]] = zext i1 %cmp to i64 +;GEN: call void @llvm.instrprof.increment.step({{.*}} i32 3, i32 2, i64 %[[STEP]]) +;NOSELECT-NOT: call void @llvm.instrprof.increment.step + %s = select i1 %cmp, i32 %add, i32 0 +;USE: select i1 %cmp{{.*}}, !prof ![[BW_ENTRY:[0-9]+]] +;USE: ![[BW_ENTRY]] = !{!"branch_weights", i32 1, i32 3} + + br label %if.end + +if.else: + %sub = sub nsw i32 %i, 2 + br label %if.end + +if.end: + %retv = phi i32 [ %add, %if.then ], [ %sub, %if.else ] + ret i32 %retv +}