Index: compiler-rt/include/profile/InstrProfData.inc =================================================================== --- compiler-rt/include/profile/InstrProfData.inc +++ compiler-rt/include/profile/InstrProfData.inc @@ -187,13 +187,15 @@ VALUE_PROF_KIND(IPVK_IndirectCallTarget, 0, "indirect call target") /* For memory intrinsic functions size profiling. */ VALUE_PROF_KIND(IPVK_MemOPSize, 1, "memory intrinsic functions size") +/* For loop versioning opportunities. */ +VALUE_PROF_KIND(IPVK_LoopTripCnt, 2, "loop trip count") /* These two kinds must be the last to be * declared. This is to make sure the string * array created with the template can be * indexed with the kind value. */ VALUE_PROF_KIND(IPVK_First, IPVK_IndirectCallTarget, "first") -VALUE_PROF_KIND(IPVK_Last, IPVK_MemOPSize, "last") +VALUE_PROF_KIND(IPVK_Last, IPVK_LoopTripCnt, "last") #undef VALUE_PROF_KIND /* VALUE_PROF_KIND end */ Index: compiler-rt/test/profile/instrprof-looptc.c =================================================================== --- /dev/null +++ compiler-rt/test/profile/instrprof-looptc.c @@ -0,0 +1,28 @@ +// RUN: %clang_pgogen -fexperimental-new-pass-manager -O2 -mllvm -pgo-loop-trip-count -o %t %s +// RUN: env LLVM_PROFILE_FILE=%t.1.profraw %run %t 5 || echo ignore rc +// RUN: env LLVM_PROFILE_FILE=%t.2.profraw %run %t 4 || echo ignore rc +// RUN: env LLVM_PROFILE_FILE=%t.3.profraw %run %t 4 || echo ignore rc +// RUN: llvm-profdata merge -text -o %t.proftext %t.1.profraw %t.2.profraw %t.3.profraw +// FileCheck %s < %t.proftext + +int atoi(const char *); + +int main(int argc, char *argv[]) { + unsigned N = atoi(argv[1]); + int c = 0; + int ar[100] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + for (unsigned i = 0; i < N; i += 1) + c += ar[i]; + + return c; +} + +// CHECK: # Num Value Kinds: +// CHECK-NEXT: 1 +// CHECK-NEXT: # ValueKind = IPVK_LoopTripCnt: +// CHECK-NEXT: 2 +// CHECK-NEXT: # NumValueSites: +// CHECK-NEXT: 1 +// CHECK-NEXT: 2 +// CHECK-NEXT: 4:2 +// CHECK-NEXT: 5:1 Index: llvm/include/llvm/Analysis/LoopInfo.h =================================================================== --- llvm/include/llvm/Analysis/LoopInfo.h +++ llvm/include/llvm/Analysis/LoopInfo.h @@ -52,6 +52,8 @@ #include #include +struct InstrProfValueData; + namespace llvm { class DominatorTree; @@ -811,6 +813,27 @@ /// unrolling pass is run more than once (which it generally is). void setLoopAlreadyUnrolled(); + /// Add the trip count value profile metadata to the llvm.loop metadata of + /// this loop. + void setLoopTripCount(MDNode *TripCountValueProfile) const; + + /// Return the trip count value profile metadata if available. The metadata is + /// stored inside the llvm.loop (MD_loop) metadata. + MDNode *getLoopTripCount() const; + + /// Fetch the top \p MaxNumOfValues trip counts based on value profiling info. + /// Populate the \p ValueData array with {value,count} pairs, where `value` is + /// a profiled/recorded value of this loop's trip count, and `count` is the + /// corresponding number of times this value appears in the training run. Also + /// set \p NumVals to the number of values fetched (should be less than or + /// equal to \p MaxNumOfValues), and set \p TotalCount to the total number of + /// trip counts recorded for this loop (i.e. the number of times the loop + /// pre-header executed). + /// Returns false if no profile data exist, true otherwise. + bool getLoopTripCount(uint32_t MaxNumOfValues, + SmallVectorImpl &ValueData, + uint32_t &NumVals, uint64_t &TotalCount); + void dump() const; void dumpVerbose() const; @@ -1260,17 +1283,23 @@ /// @param Context The LLVMContext in which to create the new LoopID. /// @param OrigLoopID The original LoopID; can be nullptr if the original /// loop has no LoopID. -/// @param RemovePrefixes Remove all loop attributes that have these prefixes. -/// Use to remove metadata of the transformation that has -/// been applied. /// @param AddAttrs Add these loop attributes to the new LoopID. /// +/// @param RemovePred A predicate to test each metadata operand of the +/// LoopID, and if true then remove that metadata. /// @return A new LoopID that can be applied using Loop::setLoopID(). llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, - llvm::ArrayRef RemovePrefixes, - llvm::ArrayRef AddAttrs); - + ArrayRef AddAttrs, + function_ref RemovePred); + +/// A special case of the above function, where the predicate returns true if +/// the metadata operand is an MDString that has a prefix from the given list +/// of prefixes (the array \p RemovePrefixes) +llvm::MDNode *makePostTransformationMetadata(llvm::LLVMContext &Context, + MDNode *OrigLoopID, + ArrayRef RemovePrefixes, + ArrayRef AddAttrs); } // End llvm namespace #endif Index: llvm/include/llvm/ProfileData/InstrProf.h =================================================================== --- llvm/include/llvm/ProfileData/InstrProf.h +++ llvm/include/llvm/ProfileData/InstrProf.h @@ -247,6 +247,20 @@ ArrayRef VDs, uint64_t Sum, InstrProfValueKind ValueKind, uint32_t MaxMDCount); +/// Get the value profile data for value site \p SiteIdx from \p InstrProfR, +/// extracting up to \p MaxMDCount number of records per value site. +MDNode *createValueProfileMD(Module &M, const InstrProfRecord &InstrProfR, + InstrProfValueKind ValueKind, uint32_t SiteIdx, + uint32_t MaxMDCount); + +/// Same as the above interface but using an ArrayRef, as well as \p Sum. +MDNode *createValueProfileMD(LLVMContext &Ctx, ArrayRef VDs, + uint64_t Sum, InstrProfValueKind ValueKind, + uint32_t MaxMDCount); + +/// Return true if this a Value Profiling metadata node. +bool isValueProfMD(MDNode *MD, InstrProfValueKind ValueKind); + /// Extract the value profile data from \p Inst which is annotated with /// value profile meta data. Return false if there is no value data annotated, /// otherwise return true. @@ -256,6 +270,12 @@ InstrProfValueData ValueData[], uint32_t &ActualNumValueData, uint64_t &TotalC); +/// Extract the value profile data from the given metadata. Return true if +/// successful. +bool getValueProfDataFromMD(const MDNode *MD, uint32_t MaxNumValueData, + InstrProfValueData ValueData[], + uint32_t &ActualNumValueData, uint64_t &TotalC); + inline StringRef getPGOFuncNameMetadataName() { return "PGOFuncName"; } /// Return the PGOFuncName meta data associated with a function. @@ -783,6 +803,7 @@ struct ValueProfData { std::vector IndirectCallSites; std::vector MemOPSizes; + std::vector LoopTCSites; }; std::unique_ptr ValueData; @@ -805,6 +826,8 @@ return ValueData->IndirectCallSites; case IPVK_MemOPSize: return ValueData->MemOPSizes; + case IPVK_LoopTripCnt: + return ValueData->LoopTCSites; default: llvm_unreachable("Unknown value kind!"); } @@ -819,6 +842,8 @@ return ValueData->IndirectCallSites; case IPVK_MemOPSize: return ValueData->MemOPSizes; + case IPVK_LoopTripCnt: + return ValueData->LoopTCSites; default: llvm_unreachable("Unknown value kind!"); } Index: llvm/include/llvm/ProfileData/InstrProfData.inc =================================================================== --- llvm/include/llvm/ProfileData/InstrProfData.inc +++ llvm/include/llvm/ProfileData/InstrProfData.inc @@ -187,13 +187,16 @@ VALUE_PROF_KIND(IPVK_IndirectCallTarget, 0, "indirect call target") /* For memory intrinsic functions size profiling. */ VALUE_PROF_KIND(IPVK_MemOPSize, 1, "memory intrinsic functions size") +/* For loop versioning opportunities. */ +VALUE_PROF_KIND(IPVK_LoopTripCnt, 2, "loop trip count") + /* These two kinds must be the last to be * declared. This is to make sure the string * array created with the template can be * indexed with the kind value. */ VALUE_PROF_KIND(IPVK_First, IPVK_IndirectCallTarget, "first") -VALUE_PROF_KIND(IPVK_Last, IPVK_MemOPSize, "last") +VALUE_PROF_KIND(IPVK_Last, IPVK_LoopTripCnt, "last") #undef VALUE_PROF_KIND /* VALUE_PROF_KIND end */ Index: llvm/lib/Analysis/LoopInfo.cpp =================================================================== --- llvm/lib/Analysis/LoopInfo.cpp +++ llvm/lib/Analysis/LoopInfo.cpp @@ -35,6 +35,7 @@ #include "llvm/IR/Metadata.h" #include "llvm/IR/PassManager.h" #include "llvm/InitializePasses.h" +#include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -1009,10 +1010,66 @@ return Node->getNumOperands() == 0 && Node->isDistinct(); } +void Loop::setLoopTripCount(MDNode *TripCountValueProfile) const { + assert(isValueProfMD(TripCountValueProfile, IPVK_LoopTripCnt) && + "Expecting a Value Profile metadata"); + LLVMContext &Context = getHeader()->getContext(); + + // Remove any existing trip count value profile nodes. + auto RemovePred = [](Metadata *Op) { + MDNode *MD = dyn_cast(Op); + return MD && isValueProfMD(MD, IPVK_LoopTripCnt); + }; + + MDNode *NewLoopID = makePostTransformationMetadata( + Context, getLoopID(), {TripCountValueProfile}, RemovePred); + setLoopID(NewLoopID); +} + +MDNode *Loop::getLoopTripCount() const { + MDNode *LoopID = getLoopID(); + if (!LoopID) + return nullptr; + + for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { + MDNode *MD = dyn_cast(LoopID->getOperand(i)); + if (MD && isValueProfMD(MD, IPVK_LoopTripCnt)) + return MD; + } + return nullptr; +} + +bool Loop::getLoopTripCount(uint32_t MaxNumValueData, + SmallVectorImpl &ValueData, + uint32_t &NumVals, uint64_t &TotalCount) { + MDNode *LoopTC = getLoopTripCount(); + if (!LoopTC) + return false; + + ValueData.reserve(MaxNumValueData); + return getValueProfDataFromMD(LoopTC, MaxNumValueData, ValueData.data(), + NumVals, TotalCount); +} + MDNode *llvm::makePostTransformationMetadata(LLVMContext &Context, MDNode *OrigLoopID, ArrayRef RemovePrefixes, ArrayRef AddAttrs) { + auto RemovePred = [&RemovePrefixes](Metadata *Op) { + if (MDNode *MD = dyn_cast(Op)) + if (const MDString *S = dyn_cast(MD->getOperand(0))) + return llvm::any_of(RemovePrefixes, [S](StringRef Prefix) -> bool { + return S->getString().startswith(Prefix); + }); + return false; + }; + return makePostTransformationMetadata(Context, OrigLoopID, AddAttrs, + RemovePred); +} + +MDNode *llvm::makePostTransformationMetadata( + LLVMContext &Context, MDNode *OrigLoopID, ArrayRef AddAttrs, + function_ref RemovePred) { // First remove any existing loop metadata related to this transformation. SmallVector MDs; @@ -1024,17 +1081,8 @@ // outdated. if (OrigLoopID) { for (unsigned i = 1, ie = OrigLoopID->getNumOperands(); i < ie; ++i) { - bool IsVectorMetadata = false; Metadata *Op = OrigLoopID->getOperand(i); - if (MDNode *MD = dyn_cast(Op)) { - const MDString *S = dyn_cast(MD->getOperand(0)); - if (S) - IsVectorMetadata = - llvm::any_of(RemovePrefixes, [S](StringRef Prefix) -> bool { - return S->getString().startswith(Prefix); - }); - } - if (!IsVectorMetadata) + if (!RemovePred(Op)) MDs.push_back(Op); } } Index: llvm/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/lib/Passes/PassBuilder.cpp +++ llvm/lib/Passes/PassBuilder.cpp @@ -591,6 +591,7 @@ // dead code. Instrumentation can end up keeping dead code around and // dramatically increase code size. MPM.addPass(GlobalDCEPass()); + MPM.addPass(createModuleToFunctionPassAdaptor(LoopSimplifyPass())); } if (!RunProfileGen) { Index: llvm/lib/ProfileData/InstrProf.cpp =================================================================== --- llvm/lib/ProfileData/InstrProf.cpp +++ llvm/lib/ProfileData/InstrProf.cpp @@ -701,6 +701,8 @@ if (ValueKind == IPVK_IndirectCallTarget) return SymTab->getFunctionHashFromAddress(Value); + assert((ValueKind == IPVK_MemOPSize || ValueKind == IPVK_LoopTripCnt) && + "unexpected Value Profile kind"); return Value; } @@ -932,23 +934,40 @@ const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIdx, uint32_t MaxMDCount) { + + MDNode *MD = + createValueProfileMD(M, InstrProfR, ValueKind, SiteIdx, MaxMDCount); + if (MD) + Inst.setMetadata(LLVMContext::MD_prof, MD); +} + +MDNode *createValueProfileMD(Module &M, const InstrProfRecord &InstrProfR, + InstrProfValueKind ValueKind, uint32_t SiteIdx, + uint32_t MaxMDCount) { uint32_t NV = InstrProfR.getNumValueDataForSite(ValueKind, SiteIdx); if (!NV) - return; + return nullptr; uint64_t Sum = 0; std::unique_ptr VD = InstrProfR.getValueForSite(ValueKind, SiteIdx, &Sum); ArrayRef VDs(VD.get(), NV); - annotateValueSite(M, Inst, VDs, Sum, ValueKind, MaxMDCount); + return createValueProfileMD(M.getContext(), VDs, Sum, ValueKind, MaxMDCount); } void annotateValueSite(Module &M, Instruction &Inst, ArrayRef VDs, uint64_t Sum, InstrProfValueKind ValueKind, uint32_t MaxMDCount) { - LLVMContext &Ctx = M.getContext(); + MDNode *MD = + createValueProfileMD(M.getContext(), VDs, Sum, ValueKind, MaxMDCount); + Inst.setMetadata(LLVMContext::MD_prof, MD); +} + +MDNode *createValueProfileMD(LLVMContext &Ctx, ArrayRef VDs, + uint64_t Sum, InstrProfValueKind ValueKind, + uint32_t MaxMDCount) { MDBuilder MDHelper(Ctx); SmallVector Vals; // Tag @@ -970,18 +989,10 @@ if (--MDCount == 0) break; } - Inst.setMetadata(LLVMContext::MD_prof, MDNode::get(Ctx, Vals)); + return MDNode::get(Ctx, Vals); } -bool getValueProfDataFromInst(const Instruction &Inst, - InstrProfValueKind ValueKind, - uint32_t MaxNumValueData, - InstrProfValueData ValueData[], - uint32_t &ActualNumValueData, uint64_t &TotalC) { - MDNode *MD = Inst.getMetadata(LLVMContext::MD_prof); - if (!MD) - return false; - +bool isValueProfMD(MDNode *MD, InstrProfValueKind ValueKind) { unsigned NOps = MD->getNumOperands(); if (NOps < 5) @@ -1002,6 +1013,25 @@ if (KindInt->getZExtValue() != ValueKind) return false; + return true; +} + +bool getValueProfDataFromInst(const Instruction &Inst, + InstrProfValueKind ValueKind, + uint32_t MaxNumValueData, + InstrProfValueData ValueData[], + uint32_t &ActualNumValueData, uint64_t &TotalC) { + MDNode *MD = Inst.getMetadata(LLVMContext::MD_prof); + if (!MD || !isValueProfMD(MD, ValueKind)) + return false; + + return getValueProfDataFromMD(MD, MaxNumValueData, ValueData, + ActualNumValueData, TotalC); +} + +bool getValueProfDataFromMD(const MDNode *MD, uint32_t MaxNumValueData, + InstrProfValueData ValueData[], + uint32_t &ActualNumValueData, uint64_t &TotalC) { // Get total count ConstantInt *TotalCInt = mdconst::dyn_extract(MD->getOperand(2)); if (!TotalCInt) @@ -1010,7 +1040,7 @@ ActualNumValueData = 0; - for (unsigned I = 3; I < NOps; I += 2) { + for (unsigned I = 3, NOps = MD->getNumOperands(); I < NOps; I += 2) { if (ActualNumValueData >= MaxNumValueData) break; ConstantInt *Value = mdconst::dyn_extract(MD->getOperand(I)); Index: llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp =================================================================== --- llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -66,6 +66,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -109,6 +110,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/MisExpect.h" #include #include @@ -183,6 +185,13 @@ cl::desc("Max number of preicise value annotations for a single memop" "intrinsic")); +// Command line option to set the maximum number of value annotations +// to write to the metadata for a single Loop-Trip-Count value profile site. +static cl::opt MaxNumLoopTCAnnotations( + "loop-trip-count-max-annotations", cl::init(4), cl::Hidden, cl::ZeroOrMore, + cl::desc("Max number of precise value annotations for " + "the trip count of a loop")); + // Command line option to control appending FunctionHash to the name of a COMDAT // function. This is to avoid the hash mismatch caused by the preinliner. static cl::opt DoComdatRenaming( @@ -248,6 +257,12 @@ "optimization remarks: -{Rpass|" "pass-remarks}=pgo-instrumentation")); +// Control loop trip count profiling instrumentation. +static cl::opt + LoopTC("pgo-loop-trip-count", cl::init(false), cl::Hidden, + cl::desc("Disable loop trip count profiling instrumentation when " + "this option is ON")); + // Command line option to turn on CFG dot dump after profile annotation. // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts extern cl::opt PGOViewCounts; @@ -376,6 +391,7 @@ void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); + AU.addRequired(); } }; @@ -404,6 +420,7 @@ void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); + AU.addRequired(); } }; @@ -436,6 +453,7 @@ "PGO instrumentation.", false, false) INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", "PGO instrumentation.", false, false) @@ -449,6 +467,7 @@ "Read PGO instrumentation profile.", false, false) INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_END(PGOInstrumentationUseLegacyPass, "pgo-instr-use", "Read PGO instrumentation profile.", false, false) @@ -565,12 +584,17 @@ } FuncPGOInstrumentation( - Function &Func, + Function &Func, LoopInfo &LI, std::unordered_multimap &ComdatMembers, bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, - BlockFrequencyInfo *BFI = nullptr, bool IsCS = false) - : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func), + BlockFrequencyInfo *BFI = nullptr, ScalarEvolution *SE = nullptr, + bool IsCS = false) + : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, LI, SE), ValueSites(IPVK_Last + 1), SIVisitor(Func), MST(F, BPI, BFI) { + // Run Loop trip count profiling first because it might insert selects. + if (LoopTC) + ValueSites[IPVK_LoopTripCnt] = VPC.get(IPVK_LoopTripCnt); + // This should be done before CFG hash computation. SIVisitor.countSelects(Func); ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize); @@ -803,14 +827,15 @@ // Critical edges will be split. static void instrumentOneFunc( Function &F, Module *M, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI, + LoopInfo *LI, ScalarEvolution *SE, std::unordered_multimap &ComdatMembers, bool IsCS) { // Split indirectbr critical edges here before computing the MST rather than // later in getInstrBB() to avoid invalidating it. SplitIndirectBrCriticalEdges(F, BPI, BFI); - FuncPGOInstrumentation FuncInfo(F, ComdatMembers, true, BPI, - BFI, IsCS); + FuncPGOInstrumentation FuncInfo(F, *LI, ComdatMembers, true, + BPI, BFI, SE, IsCS); std::vector InstrumentBBs; FuncInfo.getInstrumentBBs(InstrumentBBs); unsigned NumCounters = @@ -953,12 +978,12 @@ class PGOUseFunc { public: - PGOUseFunc(Function &Func, Module *Modu, + PGOUseFunc(Function &Func, Module *Modu, LoopInfo &LI, std::unordered_multimap &ComdatMembers, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin, - ProfileSummaryInfo *PSI, bool IsCS) + ProfileSummaryInfo *PSI, ScalarEvolution *SE, bool IsCS) : F(Func), M(Modu), BFI(BFIin), PSI(PSI), - FuncInfo(Func, ComdatMembers, false, BPI, BFIin, IsCS), + FuncInfo(Func, LI, ComdatMembers, false, BPI, BFIin, SE, IsCS), FreqAttr(FFA_Normal), IsCS(IsCS) {} // Read counts for the instrumented BB from profile. @@ -1384,6 +1409,18 @@ if (SI.getCondition()->getType()->isVectorTy()) return; + // Other instrumentation kinds might insert selects, skip them. + // FIXME: There is a tiny chance this might skip user selects. + if (SI.use_empty() || + llvm::all_of(SI.users(), [](User *U) { + return U->use_empty() && + isa(U) && + wouldInstructionBeTriviallyDead(cast(U)); + })) { + LLVM_DEBUG(dbgs() << "skipping unused " << SI << "\n"); + return; + } + switch (Mode) { case VM_counting: NSIs++; @@ -1429,14 +1466,21 @@ return; } + uint32_t MaxMDCount = Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations : + Kind == IPVK_LoopTripCnt ? MaxNumLoopTCAnnotations + : MaxNumAnnotations; + InstrProfValueKind K = static_cast(Kind); for (VPCandidateInfo &I : ValueSites) { LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind << "): Index = " << ValueSiteIndex << " out of " << NumValueSites << "\n"); - annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord, - static_cast(Kind), ValueSiteIndex, - Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations - : MaxNumAnnotations); + if (I.MDHolder.is()) + annotateValueSite(*M, *I.MDHolder.get(), ProfileRecord, K, + ValueSiteIndex, MaxMDCount); + else if (MDNode *MD = createValueProfileMD(*M, ProfileRecord, K, + ValueSiteIndex, MaxMDCount)) + I.MDHolder.get()->setLoopTripCount(MD); + ValueSiteIndex++; } } @@ -1461,7 +1505,9 @@ static bool InstrumentAllFunctions( Module &M, function_ref LookupBPI, - function_ref LookupBFI, bool IsCS) { + function_ref LookupBFI, + function_ref LookupLI, + function_ref LookupSE, bool IsCS) { // For the context-sensitve instrumentation, we should have a separated pass // (before LTO/ThinLTO linking) to create these variables. if (!IsCS) @@ -1474,7 +1520,9 @@ continue; auto *BPI = LookupBPI(F); auto *BFI = LookupBFI(F); - instrumentOneFunc(F, &M, BPI, BFI, ComdatMembers, IsCS); + auto *LI = LookupLI(F); + auto *SE = LookupSE(F); + instrumentOneFunc(F, &M, BPI, BFI, LI, SE, ComdatMembers, IsCS); } return true; } @@ -1496,7 +1544,13 @@ auto LookupBFI = [this](Function &F) { return &this->getAnalysis(F).getBFI(); }; - return InstrumentAllFunctions(M, LookupBPI, LookupBFI, IsCS); + auto LookupLI = [this](Function &F) { + return &this->getAnalysis(F).getLoopInfo(); + }; + auto LookupSE = [](Function &F) { return nullptr; }; + + return InstrumentAllFunctions(M, LookupBPI, LookupBFI, LookupLI, LookupSE, + IsCS); } PreservedAnalyses PGOInstrumentationGen::run(Module &M, @@ -1509,8 +1563,14 @@ auto LookupBFI = [&FAM](Function &F) { return &FAM.getResult(F); }; - - if (!InstrumentAllFunctions(M, LookupBPI, LookupBFI, IsCS)) + auto LookupLI = [&FAM](Function &F) { + return &FAM.getResult(F); + }; + auto LookupSE = [&FAM](Function &F) { + return &FAM.getResult(F); + }; + if (!InstrumentAllFunctions(M, LookupBPI, LookupBFI, LookupLI, LookupSE, + IsCS)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); @@ -1520,6 +1580,8 @@ Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, function_ref LookupBPI, function_ref LookupBFI, + function_ref LookupLI, + function_ref LookupSE, ProfileSummaryInfo *PSI, bool IsCS) { LLVM_DEBUG(dbgs() << "Read in profile counters: "); auto &Ctx = M.getContext(); @@ -1567,10 +1629,13 @@ continue; auto *BPI = LookupBPI(F); auto *BFI = LookupBFI(F); + auto *LI = LookupLI(F); + auto *SE = LookupSE(F); + assert(LI && "need a LoopInfo"); // Split indirectbr critical edges here before computing the MST rather than // later in getInstrBB() to avoid invalidating it. SplitIndirectBrCriticalEdges(F, BPI, BFI); - PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI, PSI, IsCS); + PGOUseFunc Func(F, &M, *LI, ComdatMembers, BPI, BFI, PSI, SE, IsCS); bool AllZeros = false; if (!Func.readCounters(PGOReader.get(), AllZeros)) continue; @@ -1659,10 +1724,19 @@ return &FAM.getResult(F); }; + auto LookupLI = [&FAM](Function &F) { + return &FAM.getResult(F); + }; + + auto LookupSE = [&FAM](Function &F) { + return &FAM.getResult(F); + }; + auto *PSI = &AM.getResult(M); if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, - LookupBPI, LookupBFI, PSI, IsCS)) + LookupBPI, LookupBFI, LookupLI, LookupSE, PSI, + IsCS)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); @@ -1678,10 +1752,14 @@ auto LookupBFI = [this](Function &F) { return &this->getAnalysis(F).getBFI(); }; + auto LookupLI = [this](Function &F) { + return &this->getAnalysis(F).getLoopInfo(); + }; + auto LookupSE = [](Function &F) { return nullptr; }; auto *PSI = &getAnalysis().getPSI(); - return annotateAllFunctions(M, ProfileFileName, "", LookupBPI, LookupBFI, PSI, - IsCS); + return annotateAllFunctions(M, ProfileFileName, "", LookupBPI, LookupBFI, + LookupLI, LookupSE, PSI, IsCS); } static std::string getSimpleNodeName(const BasicBlock *Node) { Index: llvm/lib/Transforms/Instrumentation/ValueProfileCollector.h =================================================================== --- llvm/lib/Transforms/Instrumentation/ValueProfileCollector.h +++ llvm/lib/Transforms/Instrumentation/ValueProfileCollector.h @@ -23,6 +23,10 @@ namespace llvm { +class LoopInfo; +class ScalarEvolution; +class Loop; + /// Utility analysis that determines what values are worth profiling. /// The actual logic is inside the ValueProfileCollectorImpl, whose job is to /// populate the Candidates vector. @@ -55,10 +59,10 @@ struct CandidateInfo { Value *V; // The value to profile. Instruction *InsertPt; // Insert the VP lib call before this instr. - Instruction *AnnotatedInst; // Where metadata is attached. + PointerUnion MDHolder; // Where metadata is attached. }; - ValueProfileCollector(Function &Fn); + ValueProfileCollector(Function &Fn, LoopInfo &LI, ScalarEvolution *SE); ValueProfileCollector(ValueProfileCollector &&) = delete; ValueProfileCollector &operator=(ValueProfileCollector &&) = delete; Index: llvm/lib/Transforms/Instrumentation/ValueProfileCollector.cpp =================================================================== --- llvm/lib/Transforms/Instrumentation/ValueProfileCollector.cpp +++ llvm/lib/Transforms/Instrumentation/ValueProfileCollector.cpp @@ -16,15 +16,36 @@ #include "llvm/InitializePasses.h" #include +#include using namespace llvm; namespace { +// A wrapper class around a Plugin type that allows constructing the plugin from +// one of the arguments given to the wrapper. +template class SelectiveCtor { + PluginT obj; + +public: + // clang-format off + template + explicit SelectiveCtor(Function &F, LoopInfo &LI, ScalarEvolution *SE, + typename std::enable_if::value>::type* = 0) + : obj(F) {} + template + explicit SelectiveCtor(Function &F, LoopInfo &LI, ScalarEvolution *SE, + typename std::enable_if::value>::type * = 0) + : obj(LI, SE) {} + // clang-format on + PluginT *operator->() { return &obj; } +}; + /// A plugin-based class that takes an arbitrary number of Plugin types. /// Each plugin type must satisfy the following API: -/// 1) the constructor must take a `Function &f`. Typically, the plugin would -/// scan the function looking for candidates. +/// 1) the constructor must take a `Function &` or a +/// `LoopInfo &, ScalarEvolution *`. Typically, the plugin would scan the +/// function or traverse the loops looking for candidates. /// 2) contain a member function with the following signature and name: /// void run(std::vector &Candidates); /// such that the plugin would append its result into the vector parameter. @@ -38,21 +59,22 @@ template <> class PluginChain<> { public: - PluginChain(Function &F) {} + PluginChain(Function &F, LoopInfo &LI, ScalarEvolution *SE) {} void get(InstrProfValueKind K, std::vector &Candidates) {} }; template class PluginChain : public PluginChain { - PluginT Plugin; + SelectiveCtor Plugin; using Base = PluginChain; public: - PluginChain(Function &F) : PluginChain(F), Plugin(F) {} + PluginChain(Function &F, LoopInfo &LI, ScalarEvolution *SE) + : Base(F, LI, SE), Plugin(F, LI, SE) {} void get(InstrProfValueKind K, std::vector &Candidates) { if (K == PluginT::Kind) - Plugin.run(Candidates); + Plugin->run(Candidates); Base::get(K, Candidates); } }; @@ -65,8 +87,9 @@ using PluginChainFinal::PluginChainFinal; }; -ValueProfileCollector::ValueProfileCollector(Function &F) - : PImpl(new ValueProfileCollectorImpl(F)) {} +ValueProfileCollector::ValueProfileCollector(Function &F, LoopInfo &LI, + ScalarEvolution *SE) + : PImpl(new ValueProfileCollectorImpl(F, LI, SE)) {} ValueProfileCollector::~ValueProfileCollector() = default; Index: llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc =================================================================== --- llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc +++ llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc @@ -15,7 +15,12 @@ #include "ValueProfileCollector.h" #include "llvm/Analysis/IndirectCallVisitor.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/IR/InstVisitor.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "vpo" using namespace llvm; using CandidateInfo = ValueProfileCollector::CandidateInfo; @@ -67,9 +72,66 @@ } }; +///------------------------------ LoopInfoPlugin ------------------------------- +class LoopInfoPlugin { + LoopInfo &LI; + ScalarEvolution *SE; + std::unique_ptr Expander; +public: + static constexpr InstrProfValueKind Kind = IPVK_LoopTripCnt; + + LoopInfoPlugin(LoopInfo &LInfo, ScalarEvolution *SE) + : LI(LInfo), SE(SE), Expander(nullptr) { + if (SE && !LI.empty()) { + Module *M = (*LI.begin())->getHeader()->getModule(); + Expander = std::make_unique(*SE, M->getDataLayout(), + "trip-count-prof"); + } + } + void run(std::vector &Candidates) { + LLVM_DEBUG(dbgs() << "running LoopInfoPlugin on LoopInfo " << &LI << "\n"); + SmallVector LoopStack(LI.begin(), LI.end()); + + while (!LoopStack.empty()) { + Loop *L = LoopStack.pop_back_val(); + LoopStack.insert(LoopStack.end(), L->begin(), L->end()); + processLoop(L, Candidates); + } + } + + void processLoop(Loop *L, std::vector &Candidates) { + if (!L->isLoopSimplifyForm() || !SE) + return; + + // Handle loops with single exit point because their trip count should be + // accurately measurable. + BasicBlock *ExitingBlock = L->getExitingBlock(); + if (!ExitingBlock) + return; + + const SCEV *ExitCount = SE->getExitCount(L, ExitingBlock); + if (isa(ExitCount) || isa(ExitCount)) + return; + + // Insert the trip count calculation and recording in the preheader. + BasicBlock *PreHeader = L->getLoopPreheader(); + Value *TripCount = Expander->expandCodeFor(ExitCount, ExitCount->getType(), + PreHeader->getTerminator()); + Instruction *InsertPt = PreHeader->getTerminator(); + Loop *MDHolder = L; + LLVM_DEBUG(dbgs() << "Adding candidate: \n" + << " Profiled Value = " << *TripCount << "\n" + << " Insertion Point = " << *InsertPt << "\n" + << " Metadata Holder = " << MDHolder << "\n"); + Candidates.emplace_back(CandidateInfo{TripCount, InsertPt, MDHolder}); + } +}; + ///----------------------- Registration of the plugins ------------------------- /// For now, registering a plugin with the ValueProfileCollector is done by /// adding the plugin type to the VP_PLUGIN_LIST macro. #define VP_PLUGIN_LIST \ MemIntrinsicPlugin, \ - IndirectCallPromotionPlugin + IndirectCallPromotionPlugin, \ + LoopInfoPlugin + Index: llvm/lib/Transforms/Utils/LLVMBuild.txt =================================================================== --- llvm/lib/Transforms/Utils/LLVMBuild.txt +++ llvm/lib/Transforms/Utils/LLVMBuild.txt @@ -18,4 +18,4 @@ type = Library name = TransformUtils parent = Transforms -required_libraries = Analysis Core Support +required_libraries = Analysis Core ProfileData Support Index: llvm/lib/Transforms/Utils/LoopUtils.cpp =================================================================== --- llvm/lib/Transforms/Utils/LoopUtils.cpp +++ llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -37,6 +37,7 @@ #include "llvm/IR/ValueHandle.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" +#include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/Debug.h" #include "llvm/Support/KnownBits.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -326,6 +327,8 @@ InheritOptionsExceptPrefix](MDNode *Op) { if (!InheritSomeAttrs) return false; + if (isValueProfMD(Op, IPVK_LoopTripCnt)) + return false; // Skip malformatted attribute metadata nodes. if (Op->getNumOperands() == 0) Index: llvm/test/Instrumentation/InstrProfiling/PR23499.ll =================================================================== --- llvm/test/Instrumentation/InstrProfiling/PR23499.ll +++ llvm/test/Instrumentation/InstrProfiling/PR23499.ll @@ -15,13 +15,13 @@ ; CHECK-NOT: __profn__Z3barIvEvv ; CHECK: @__profc__Z3barIvEvv = linkonce_odr hidden global [1 x i64] zeroinitializer, section "{{.*}}__llvm_prf_cnts", comdat, align 8 -; CHECK: @__profd__Z3barIvEvv = linkonce_odr hidden global { i64, i64, i64*, i8*, i8*, i32, [2 x i16] } { i64 4947693190065689389, i64 0, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__Z3barIvEvv, i32 0, i32 0), i8*{{.*}}, i8* null, i32 1, [2 x i16] zeroinitializer }, section "{{.*}}__llvm_prf_data{{.*}}", comdat, align 8 +; CHECK: @__profd__Z3barIvEvv = linkonce_odr hidden global { i64, i64, i64*, i8*, i8*, i32, [3 x i16] } { i64 4947693190065689389, i64 0, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__Z3barIvEvv, i32 0, i32 0), i8*{{.*}}, i8* null, i32 1, [3 x i16] zeroinitializer }, section "{{.*}}__llvm_prf_data{{.*}}", comdat, align 8 ; CHECK: @__llvm_prf_nm = private constant [{{.*}} x i8] c"{{.*}}", section "{{.*}}__llvm_prf_names" ; COFF-NOT: __profn__Z3barIvEvv ; COFF: @__profc__Z3barIvEvv = linkonce_odr hidden global [1 x i64] zeroinitializer, section "{{.*}}prfc$M", comdat, align 8 -; COFF: @__profd__Z3barIvEvv = linkonce_odr hidden global { i64, i64, i64*, i8*, i8*, i32, [2 x i16] } { i64 4947693190065689389, i64 0, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__Z3barIvEvv, i32 0, i32 0), i8*{{.*}}, i8* null, i32 1, [2 x i16] zeroinitializer }, section "{{.*}}prfd{{.*}}", comdat, align 8 +; COFF: @__profd__Z3barIvEvv = linkonce_odr hidden global { i64, i64, i64*, i8*, i8*, i32, [3 x i16] } { i64 4947693190065689389, i64 0, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__Z3barIvEvv, i32 0, i32 0), i8*{{.*}}, i8* null, i32 1, [3 x i16] zeroinitializer }, section "{{.*}}prfd{{.*}}", comdat, align 8 declare void @llvm.instrprof.increment(i8*, i64, i32, i32) #1 Index: llvm/test/Instrumentation/InstrProfiling/icall.ll =================================================================== --- llvm/test/Instrumentation/InstrProfiling/icall.ll +++ llvm/test/Instrumentation/InstrProfiling/icall.ll @@ -37,9 +37,9 @@ ; DYN-NOT: @__profvp_foo ; DYN-NOT: @__llvm_prf_vnodes -; STATIC: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 0) -; STATIC-EXT: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 zeroext 0) -; STATIC-SEXT: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 signext 0) +; STATIC: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [3 x i16] }* @__profd_foo to i8*), i32 0) +; STATIC-EXT: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [3 x i16] }* @__profd_foo to i8*), i32 zeroext 0) +; STATIC-SEXT: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [3 x i16] }* @__profd_foo to i8*), i32 signext 0) ; STATIC: declare void @__llvm_profile_instrument_target(i64, i8*, i32) ; STATIC-EXT: declare void @__llvm_profile_instrument_target(i64, i8*, i32 zeroext) Index: llvm/test/Transforms/PGOProfile/Inputs/looptc.proftext =================================================================== --- /dev/null +++ llvm/test/Transforms/PGOProfile/Inputs/looptc.proftext @@ -0,0 +1,21 @@ +# IR level Instrumentation Flag +:ir +main +# Func Hash: +29212902728 +# Num Counters: +2 +# Counter Values: +13 +3 +# Num Value Kinds: +1 +# ValueKind = IPVK_LoopTripCnt: +2 +# NumValueSites: +1 +# Number of Values seen for site #1 +2 +# The value:count pairs for site #1 +4:2 +5:1 Index: llvm/test/Transforms/PGOProfile/comdat_internal.ll =================================================================== --- llvm/test/Transforms/PGOProfile/comdat_internal.ll +++ llvm/test/Transforms/PGOProfile/comdat_internal.ll @@ -13,9 +13,9 @@ ; CHECK: @__llvm_profile_raw_version = constant i64 {{[0-9]+}}, comdat ; CHECK-NOT: __profn__stdin__foo ; CHECK: @__profc__stdin__foo.[[FOO_HASH:[0-9]+]] = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", comdat, align 8 -; CHECK: @__profd__stdin__foo.[[FOO_HASH]] = private global { i64, i64, i64*, i8*, i8*, i32, [2 x i16] } { i64 -5640069336071256030, i64 [[FOO_HASH]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__stdin__foo.[[FOO_HASH]], i32 0, i32 0), i8* null +; CHECK: @__profd__stdin__foo.[[FOO_HASH]] = private global { i64, i64, i64*, i8*, i8*, i32, [3 x i16] } { i64 -5640069336071256030, i64 [[FOO_HASH]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__stdin__foo.[[FOO_HASH]], i32 0, i32 0), i8* null ; CHECK-NOT: bitcast (i32 ()* @foo to i8*) -; CHECK-SAME: , i8* null, i32 1, [2 x i16] zeroinitializer }, section "__llvm_prf_data", comdat, align 8 +; CHECK-SAME: , i8* null, i32 1, [3 x i16] zeroinitializer }, section "__llvm_prf_data", comdat, align 8 ; CHECK: @__llvm_prf_nm ; CHECK: @llvm.used Index: llvm/test/Transforms/PGOProfile/looptc.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/PGOProfile/looptc.ll @@ -0,0 +1,58 @@ +;------------------------------------------------------------------------------ +; 1) Test that loop trip count profile instrumentation is generated in the +; correct location. Note this is only implemented for the new PM.; +; RUN: opt < %s -passes=pgo-instr-gen,instrprof -pgo-loop-trip-count -S | FileCheck %s --check-prefix=CG-CHECK +; +; To generate the looptc.proftext file, do: +; 1) clang -fprofile-generate -mllvm -pgo-loop-trip-count %s -o looptc.exe +; 2) LLVM_PROFILE_FILE=t.profraw looptc.exe 4 +; LLVM_PROFILE_FILE=t.profraw looptc.exe 4 +; LLVM_PROFILE_FILE=t.profraw looptc.exe 5 +; 3) llvm-profdata merge -text -output=looptc.proftext t.profraw +; +; 2) Test that llvm-profdata can consume the textual profile data containing +; loop trip count profile info. +; RUN: llvm-profdata merge %S/Inputs/looptc.proftext -o %t.profdata +; +; 3) Test that loop trip count profile is correctly read and annotated on the IR. +; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -pgo-loop-trip-count -S | FileCheck %s --check-prefix=VP-ANNOTATION +; +;------------------------------------------------------------------------------- + +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +declare i32 @atoi(i8*) + +@__const.array = private unnamed_addr constant <{ [10 x i32], [90 x i32] }> <{ [10 x i32] [i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10], [90 x i32] zeroinitializer }>, align 4 + +define i32 @main(i32 signext %argc, i8** %argv) { +entry: + %arrayidx = getelementptr inbounds i8*, i8** %argv, i64 1 + %0 = load i8*, i8** %arrayidx, align 8 + %call = call signext i32 @atoi(i8* %0) +; CG-CHECK: [[ZEXT:%[0-9]+]] = zext i32 %call to i64 +; CG-CHECK: call void @__llvm_profile_instrument_target(i64 [[ZEXT]], i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [3 x i16] }* @__profd_main to i8*), i32 zeroext 0) + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %c.0 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %i.0 = phi i32 [ 0, %entry ], [ %add2, %for.body ] + %cmp = icmp ult i32 %i.0, %call + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + ret i32 %c.0 + +for.body: ; preds = %for.cond + %idxprom = zext i32 %i.0 to i64 + %arrayidx1 = getelementptr inbounds [100 x i32], [100 x i32]* bitcast (<{ [10 x i32], [90 x i32] }>* @__const.array to [100 x i32]*), i64 0, i64 %idxprom + %1 = load i32, i32* %arrayidx1, align 4 + %add = add nsw i32 %c.0, %1 + %add2 = add i32 %i.0, 1 + br label %for.cond +; VP-ANNOTATION: br label %for.cond, !llvm.loop ![[LOOP_MD:[0-9]+]] +} + +; VP-ANNOTATION: ![[LOOP_MD]] = distinct !{![[LOOP_MD]], ![[LOOP_TRIP_COUNT:[0-9]*]]} +; VP-ANNOTATION: ![[LOOP_TRIP_COUNT]] = !{!"VP", i32 2, i64 3, i64 4, i64 2, i64 5, i64 1} Index: llvm/test/Transforms/PGOProfile/memcpy.ll =================================================================== --- llvm/test/Transforms/PGOProfile/memcpy.ll +++ llvm/test/Transforms/PGOProfile/memcpy.ll @@ -23,7 +23,7 @@ for.body3: %conv = sext i32 %add to i64 -; CHECK: call void @__llvm_profile_instrument_range(i64 %conv, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 0, i64 0, i64 8, i64 8192) +; CHECK: call void @__llvm_profile_instrument_range(i64 %conv, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [3 x i16] }* @__profd_foo to i8*), i32 0, i64 0, i64 8, i64 8192) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %conv, i1 false) %inc = add nsw i32 %j.0, 1 br label %for.cond1 Index: llvm/test/Transforms/PGOProfile/select1.ll =================================================================== --- llvm/test/Transforms/PGOProfile/select1.ll +++ llvm/test/Transforms/PGOProfile/select1.ll @@ -8,6 +8,8 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" +declare void @use(i32) + define i32 @test_br_2(i32 %i) { entry: %cmp = icmp sgt i32 %i, 0 @@ -19,9 +21,9 @@ ;GEN: call void @llvm.instrprof.increment.step({{.*}} i32 3, i32 2, i64 %[[STEP]]) ;NOSELECT-NOT: call void @llvm.instrprof.increment.step %s = select i1 %cmp, i32 %add, i32 0 -;USE: select i1 %cmp{{.*}}, !prof ![[BW_ENTRY:[0-9]+]] +;USE: %s = select i1 %cmp{{.*}}, !prof ![[BW_ENTRY:[0-9]+]] ;USE: ![[BW_ENTRY]] = !{!"branch_weights", i32 1, i32 3} - + call void @use(i32 %s) br label %if.end if.else: Index: llvm/test/tools/llvm-profdata/raw-64-bits-be.test =================================================================== --- llvm/test/tools/llvm-profdata/raw-64-bits-be.test +++ llvm/test/tools/llvm-profdata/raw-64-bits-be.test @@ -1,3 +1,4 @@ +The Header: __llvm_profile_header RUN: printf '\377lprofr\201' > %t RUN: printf '\0\0\0\0\0\0\0\5' >> %t RUN: printf '\0\0\0\0\0\0\0\2' >> %t @@ -9,12 +10,14 @@ RUN: printf '\0\0\0\2\0\4\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t +N __llvm_profile_data objects, where N is Header.DataSize RUN: printf '\134\370\302\114\333\030\275\254' >> %t RUN: printf '\0\0\0\0\0\0\0\1' >> %t RUN: printf '\0\0\0\1\0\4\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\1\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\344\023\165\112\031\035\265\067' >> %t RUN: printf '\0\0\0\0\0\0\0\02' >> %t @@ -22,7 +25,9 @@ RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\02\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t +Counters RUN: printf '\0\0\0\0\0\0\0\023' >> %t RUN: printf '\0\0\0\0\0\0\0\067' >> %t RUN: printf '\0\0\0\0\0\0\0\101' >> %t Index: llvm/test/tools/llvm-profdata/raw-64-bits-le.test =================================================================== --- llvm/test/tools/llvm-profdata/raw-64-bits-le.test +++ llvm/test/tools/llvm-profdata/raw-64-bits-le.test @@ -1,3 +1,4 @@ +The Header: __llvm_profile_header RUN: printf '\201rforpl\377' > %t RUN: printf '\5\0\0\0\0\0\0\0' >> %t RUN: printf '\2\0\0\0\0\0\0\0' >> %t @@ -9,12 +10,14 @@ RUN: printf '\0\0\4\0\2\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t +N __llvm_profile_data objects, where N is Header.DataSize RUN: printf '\254\275\030\333\114\302\370\134' >> %t RUN: printf '\1\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\4\0\1\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\1\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\067\265\035\031\112\165\023\344' >> %t RUN: printf '\02\0\0\0\0\0\0\0' >> %t @@ -22,7 +25,9 @@ RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\02\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t +Counters RUN: printf '\023\0\0\0\0\0\0\0' >> %t RUN: printf '\067\0\0\0\0\0\0\0' >> %t RUN: printf '\101\0\0\0\0\0\0\0' >> %t Index: llvm/test/tools/llvm-profdata/raw-two-profiles.test =================================================================== --- llvm/test/tools/llvm-profdata/raw-two-profiles.test +++ llvm/test/tools/llvm-profdata/raw-two-profiles.test @@ -15,6 +15,8 @@ RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw RUN: printf '\1\0\0\0\0\0\0\0' >> %t-foo.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw + RUN: printf '\023\0\0\0\0\0\0\0' >> %t-foo.profraw RUN: printf '\3\0foo\0\0\0' >> %t-foo.profraw @@ -36,6 +38,7 @@ RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw RUN: printf '\02\0\0\0\0\0\0\0' >> %t-bar.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw RUN: printf '\067\0\0\0\0\0\0\0' >> %t-bar.profraw RUN: printf '\101\0\0\0\0\0\0\0' >> %t-bar.profraw