diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1306,7 +1306,10 @@ } std::unique_ptr PGOReader = std::move(ReaderOrErr.get()); - if (PGOReader->isIRLevelProfile()) { + // Currently memprof profiles are only added at the IR level. Mark the profile + // type as IR in that case as well and the subsequent matching needs to detect + // which is available (might be one or both). + if (PGOReader->isIRLevelProfile() || PGOReader->hasMemoryProfile()) { if (PGOReader->hasCSIRLevelProfile()) Opts.setProfileUse(CodeGenOptions::ProfileCSIRInstr); else diff --git a/clang/test/CodeGen/Inputs/memprof.exe b/clang/test/CodeGen/Inputs/memprof.exe new file mode 100755 index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 GIT binary patch literal 0 Hc$@&1 | FileCheck %s -check-prefix=INSTRUMENT +// INSTRUMENT: Running pass: MemProfilerPass on main +// INSTRUMENT: Running pass: ModuleMemProfilerPass on [module] + +// TODO: Use text profile inputs once that is available for memprof. +// +// The following commands were used to compile the source to instrumented +// executables and collect raw binary format profiles: +// +// # Collect memory profile: +// $ clang++ -fuse-ld=lld -no-pie -Wl,--no-rosegment -gmlt \ +// -fdebug-info-for-profiling -mno-omit-leaf-frame-pointer \ +// -fno-omit-frame-pointer -fno-optimize-sibling-calls -m64 -Wl,-build-id \ +// memprof.cpp -o memprof.exe -fmemory-profile +// $ env MEMPROF_OPTIONS=log_path=stdout ./memprof.exe > memprof.memprofraw +// +// RUN: llvm-profdata merge %S/Inputs/memprof.memprofraw --profiled-binary %S/Inputs/memprof.exe -o %t.memprofdata + +// Profile use: +// Ensure Pass PGOInstrumentationUse is invoked with the memprof-only profile. +// RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t.memprofdata %s -fdebug-pass-manager -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=USE +// USE: Running pass: PGOInstrumentationUse on [module] + +char *foo() { + return new char[10]; +} +int main() { + char *a = foo(); + delete[] a; + return 0; +} diff --git a/llvm/include/llvm/Analysis/MemoryBuiltins.h b/llvm/include/llvm/Analysis/MemoryBuiltins.h --- a/llvm/include/llvm/Analysis/MemoryBuiltins.h +++ b/llvm/include/llvm/Analysis/MemoryBuiltins.h @@ -56,6 +56,10 @@ bool isAllocationFn(const Value *V, function_ref GetTLI); +/// Tests if a value is a call or invoke to a library function that +/// allocates memory via new. +bool isNewLikeFn(const Value *V, const TargetLibraryInfo *TLI); + /// Tests if a value is a call or invoke to a library function that /// allocates memory similar to malloc or calloc. bool isMallocOrCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI); diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h --- a/llvm/include/llvm/ProfileData/InstrProfReader.h +++ b/llvm/include/llvm/ProfileData/InstrProfReader.h @@ -118,6 +118,9 @@ /// Return true if the profile only instruments function entries. virtual bool functionEntryOnly() const = 0; + /// Return true if profile includes a memory profile. + virtual bool hasMemoryProfile() const = 0; + /// Returns a BitsetEnum describing the attributes of the profile. To check /// individual attributes prefer using the helpers above. virtual InstrProfKind getProfileKind() const = 0; @@ -233,6 +236,11 @@ return static_cast(ProfileKind & InstrProfKind::FunctionEntryOnly); } + bool hasMemoryProfile() const override { + // TODO: Add support for text format memory profiles. + return false; + } + InstrProfKind getProfileKind() const override { return ProfileKind; } /// Read the header. @@ -322,6 +330,12 @@ return (Version & VARIANT_MASK_FUNCTION_ENTRY_ONLY) != 0; } + bool hasMemoryProfile() const override { + // Memory profiles have a separate raw format, so this should never be set. + assert(!(Version & VARIANT_MASK_MEMPROF)); + return false; + } + /// Returns a BitsetEnum describing the attributes of the raw instr profile. InstrProfKind getProfileKind() const override; @@ -466,6 +480,7 @@ virtual bool instrEntryBBEnabled() const = 0; virtual bool hasSingleByteCoverage() const = 0; virtual bool functionEntryOnly() const = 0; + virtual bool hasMemoryProfile() const = 0; virtual InstrProfKind getProfileKind() const = 0; virtual Error populateSymtab(InstrProfSymtab &) = 0; }; @@ -532,6 +547,10 @@ return (FormatVersion & VARIANT_MASK_FUNCTION_ENTRY_ONLY) != 0; } + bool hasMemoryProfile() const override { + return (FormatVersion & VARIANT_MASK_MEMPROF) != 0; + } + InstrProfKind getProfileKind() const override; Error populateSymtab(InstrProfSymtab &Symtab) override { @@ -605,6 +624,8 @@ bool functionEntryOnly() const override { return Index->functionEntryOnly(); } + bool hasMemoryProfile() const override { return Index->hasMemoryProfile(); } + /// Returns a BitsetEnum describing the attributes of the indexed instr /// profile. InstrProfKind getProfileKind() const override { diff --git a/llvm/lib/Analysis/MemoryBuiltins.cpp b/llvm/lib/Analysis/MemoryBuiltins.cpp --- a/llvm/lib/Analysis/MemoryBuiltins.cpp +++ b/llvm/lib/Analysis/MemoryBuiltins.cpp @@ -303,6 +303,12 @@ checkFnAllocKind(V, AllocFnKind::Alloc | AllocFnKind::Realloc); } +/// Tests if a value is a call or invoke to a library function that +/// allocates memory via new. +bool llvm::isNewLikeFn(const Value *V, const TargetLibraryInfo *TLI) { + return getAllocationData(V, OpNewLike, TLI).hasValue(); +} + /// Tests if a value is a call or invoke to a library function that /// allocates uninitialized memory (such as malloc). static bool isMallocLikeFn(const Value *V, const TargetLibraryInfo *TLI) { diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -65,6 +65,8 @@ #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/MemoryProfileInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -114,14 +116,17 @@ #include #include #include +#include #include #include +#include #include #include #include #include using namespace llvm; +using namespace llvm::memprof; using ProfileCount = Function::ProfileCount; using VPCandidateInfo = ValueProfileCollector::CandidateInfo; @@ -136,6 +141,7 @@ STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts."); STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile."); STATISTIC(NumOfPGOMissing, "Number of functions without profile."); +STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile."); STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations."); STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO."); STATISTIC(NumOfCSPGOSelectInsts, @@ -296,6 +302,10 @@ "pgo-function-size-threshold", cl::Hidden, cl::desc("Do not instrument functions smaller than this threshold")); +static cl::opt MatchMemProf( + "pgo-match-memprof", cl::init(true), cl::Hidden, + cl::desc("Perform matching and annotation of memprof profiles.")); + namespace llvm { // Command line option to turn on CFG dot dump after profile annotation. // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts @@ -504,6 +514,7 @@ void renameComdatFunction(); public: + const TargetLibraryInfo &TLI; std::vector> ValueSites; SelectInstVisitor SIVisitor; std::string FuncName; @@ -542,7 +553,7 @@ BlockFrequencyInfo *BFI = nullptr, bool IsCS = false, bool InstrumentFuncEntry = true) : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI), - ValueSites(IPVK_Last + 1), SIVisitor(Func), + TLI(TLI), ValueSites(IPVK_Last + 1), SIVisitor(Func), MST(F, InstrumentFuncEntry, BPI, BFI) { // This should be done before CFG hash computation. SIVisitor.countSelects(Func); @@ -1021,6 +1032,9 @@ bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, InstrProfRecord::CountPseudoKind &PseudoKind); + // Read memprof data for the instrumented function from profile. + bool readMemprof(IndexedInstrProfReader *PGOReader); + // Populate the counts for all BBs. void populateCounters(); @@ -1221,6 +1235,251 @@ F.setMetadata(LLVMContext::MD_annotation, MD); } +static void addCallsiteMetadata(Instruction &I, + std::vector &InlinedCallStack, + LLVMContext &Ctx) { + I.setMetadata(LLVMContext::MD_callsite, + buildCallstackMetadata(InlinedCallStack, Ctx)); +} + +static hash_code computeStackId(GlobalValue::GUID Function, uint32_t LineOffset, + uint32_t Column) { + return hash_combine(Function, LineOffset, Column); +} + +static hash_code computeStackId(const memprof::Frame &Frame) { + return computeStackId(Frame.Function, Frame.LineOffset, Frame.Column); +} + +static void addCallStack(CallStackTrie &AllocTrie, + const AllocationInfo *AllocInfo) { + SmallVector StackIds; + for (auto StackFrame : AllocInfo->CallStack) + StackIds.push_back(computeStackId(StackFrame)); + auto AllocType = getAllocType(AllocInfo->Info.getMaxAccessCount(), + AllocInfo->Info.getMinSize(), + AllocInfo->Info.getMinLifetime()); + AllocTrie.addCallStack(AllocType, StackIds); +} + +// Helper to compare the InlinedCallStack computed from an instruction's debug +// info to a list of Frames from profile data (either the allocation data or a +// callsite). For callsites, the StartIndex to use in the Frame array may be +// non-zero. +static bool +stackFrameIncludesInlinedCallStack(ArrayRef ProfileCallStack, + ArrayRef InlinedCallStack, + unsigned StartIndex = 0) { + auto StackFrame = ProfileCallStack.begin() + StartIndex; + auto InlCallStackIter = InlinedCallStack.begin(); + for (; StackFrame != ProfileCallStack.end() && + InlCallStackIter != InlinedCallStack.end(); + ++StackFrame, ++InlCallStackIter) { + uint64_t StackId = computeStackId(*StackFrame); + if (StackId != *InlCallStackIter) + return false; + } + // Return true if we found and matched all stack ids from the call + // instruction. + return InlCallStackIter == InlinedCallStack.end(); +} + +bool PGOUseFunc::readMemprof(IndexedInstrProfReader *PGOReader) { + if (!MatchMemProf) + return true; + + auto &Ctx = M->getContext(); + + auto FuncGUID = Function::getGUID(FuncInfo.FuncName); + Expected MemProfResult = + PGOReader->getMemProfRecord(FuncGUID); + if (Error E = MemProfResult.takeError()) { + handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { + auto Err = IPE.get(); + bool SkipWarning = false; + LLVM_DEBUG(dbgs() << "Error in reading profile for Func " + << FuncInfo.FuncName << ": "); + if (Err == instrprof_error::unknown_function) { + NumOfMemProfMissing++; + SkipWarning = !PGOWarnMissing; + LLVM_DEBUG(dbgs() << "unknown function"); + } else if (Err == instrprof_error::hash_mismatch) { + SkipWarning = + NoPGOWarnMismatch || + (NoPGOWarnMismatchComdatWeak && + (F.hasComdat() || + F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); + LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")"); + } + + if (SkipWarning) + return; + + std::string Msg = + (IPE.message() + Twine(" ") + F.getName().str() + Twine(" Hash = ") + + std::to_string(FuncInfo.FunctionHash)) + .str(); + + Ctx.diagnose( + DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); + }); + return false; + } + + // Build maps of the location hash to all profile data with that leaf location + // (allocation info and the callsites). + std::map> LocHashToAllocInfo; + // For the callsites we need to record the index of the associated frame in + // the frame array (see comments below where the map entries are added). + std::map *, unsigned>>> + LocHashToCallSites; + const auto MemProfRec = std::move(MemProfResult.get()); + for (auto &AI : MemProfRec.AllocSites) { + // Associate the allocation info with the leaf frame. The later matching + // code will match any inlined call sequences in the IR with a longer prefix + // of call stack frames. + uint64_t StackId = computeStackId(AI.CallStack[0]); + LocHashToAllocInfo[StackId].insert(&AI); + } + for (auto &CS : MemProfRec.CallSites) { + // Need to record all frames from leaf up to and including this function, + // as any of these may or may not have been inlined at this point. + unsigned Idx = 0; + for (auto &StackFrame : CS) { + uint64_t StackId = computeStackId(StackFrame); + LocHashToCallSites[StackId].insert(std::make_pair(&CS, Idx++)); + // Once we find this function, we can stop recording. + if (StackFrame.Function == FuncGUID) + break; + } + assert(Idx <= CS.size() && CS[Idx - 1].Function == FuncGUID); + } + + auto GetOffset = [](const DILocation *DIL) { + return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) & + 0xffff; + }; + + // Now walk the instructions, looking up the associated profile data using + // dbug locations. + for (auto &BB : F) { + for (auto &I : BB) { + if (I.isDebugOrPseudoInst()) + continue; + // We are only interested in calls (allocation or interior call stack + // context calls). + auto *CI = dyn_cast(&I); + if (!CI) + continue; + auto *CalledFunction = CI->getCalledFunction(); + if (CalledFunction && CalledFunction->isIntrinsic()) + continue; + // List of call stack ids computed from the location hashes on debug + // locations (leaf to inlined at root). + std::vector InlinedCallStack; + // Was the leaf location found in one of the profile maps? + bool LeafFound = false; + // If leaf was found in a map, iterators pointing to its location in both + // of the maps. It might exist in neither, one, or both (the latter case + // can happen because we don't currently have discriminators to + // distinguish the case when a single line/col maps to both an allocation + // and another callsite). + std::map>::iterator + AllocInfoIter; + std::map *, + unsigned>>>::iterator CallSitesIter; + for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr; + DIL = DIL->getInlinedAt()) { + // Use C++ linkage name if possible. Need to compile with + // -fdebug-info-for-profiling to get linkage name. + StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName(); + if (Name.empty()) + Name = DIL->getScope()->getSubprogram()->getName(); + auto CalleeGUID = Function::getGUID(Name); + auto StackId = + computeStackId(CalleeGUID, GetOffset(DIL), DIL->getColumn()); + // LeafFound will only be false on the first iteration, since we either + // set it true or break out of the loop below. + if (!LeafFound) { + AllocInfoIter = LocHashToAllocInfo.find(StackId); + CallSitesIter = LocHashToCallSites.find(StackId); + // Check if the leaf is in one of the maps. If not, no need to look + // further at this call. + if (AllocInfoIter == LocHashToAllocInfo.end() && + CallSitesIter == LocHashToCallSites.end()) + break; + LeafFound = true; + } + InlinedCallStack.push_back(StackId); + } + // If leaf not in either of the maps, skip inst. + if (!LeafFound) + continue; + + // First add !memprof metadata from allocation info, if we found the + // instruction's leaf location in that map, and if the rest of the + // instruction's locations match the prefix Frame locations on an + // allocation context with the same leaf. + if (AllocInfoIter != LocHashToAllocInfo.end()) { + // Only consider allocations via new, to reduce unnecessary metadata, + // since those are the only allocations that will be targeted initially. + if (!isNewLikeFn(CI, &FuncInfo.TLI)) + continue; + // We may match this instruction's location list to multiple MIB + // contexts. Add them to a Trie specialized for trimming the contexts to + // the minimal needed to disambiguate contexts with unique behavior. + CallStackTrie AllocTrie; + for (auto *AllocInfo : AllocInfoIter->second) { + // Check the full inlined call stack against this one. + // If we found and thus matched all frames on the call, include + // this MIB. + if (stackFrameIncludesInlinedCallStack(AllocInfo->CallStack, + InlinedCallStack)) + addCallStack(AllocTrie, AllocInfo); + } + // We might not have matched any to the full inlined call stack. + // But if we did, create and attach metadata, or a function attribute if + // all contexts have identical profiled behavior. + if (!AllocTrie.empty()) { + // MemprofMDAttached will be false if a function attribute was + // attached. + bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI); + assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof)); + if (MemprofMDAttached) { + // Add callsite metadata for the instruction's location list so that + // it simpler later on to identify which part of the MIB contexts + // are from this particular instruction (including during inlining, + // when the callsite metdata will be updated appropriately). + // FIXME: can this be changed to strip out the matching stack + // context ids from the MIB contexts and not add any callsite + // metadata here to save space? + addCallsiteMetadata(I, InlinedCallStack, Ctx); + } + } + continue; + } + + // Otherwise, add callsite metadata. If we reach here then we found the + // instruction's leaf location in the callsites map and not the allocation + // map. + assert(CallSitesIter != LocHashToCallSites.end()); + for (auto CallStackIdx : CallSitesIter->second) { + // If we found and thus matched all frames on the call, create and + // attach call stack metadata. + if (stackFrameIncludesInlinedCallStack( + *CallStackIdx.first, InlinedCallStack, CallStackIdx.second)) { + addCallsiteMetadata(I, InlinedCallStack, Ctx); + // Only need to find one with a matching call stack and add a single + // callsite metadata. + break; + } + } + } + } + + return true; +} + // Read the profile from ProfileFileName and assign the value to the // instrumented BB and the edges. This function also updates ProgramMaxCount. // Return true if the profile are successfully read, and false on errors. @@ -1774,7 +2033,7 @@ return false; // TODO: might need to change the warning once the clang option is finalized. - if (!PGOReader->isIRLevelProfile()) { + if (!PGOReader->isIRLevelProfile() && !PGOReader->hasMemoryProfile()) { Ctx.diagnose(DiagnosticInfoPGOProfile( ProfileFileName.data(), "Not an IR level instrumentation profile")); return false; @@ -1821,11 +2080,25 @@ SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI); PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS, InstrumentFuncEntry); + // Read and match memprof first since we do this via debug info and can + // match even if there is an IR mismatch detected for regular PGO below. + if (PGOReader->hasMemoryProfile()) + Func.readMemprof(PGOReader.get()); + + if (!PGOReader->isIRLevelProfile()) + continue; + // When PseudoKind is set to a vaule other than InstrProfRecord::NotPseudo, // it means the profile for the function is unrepresentative and this // function is actually hot / warm. We will reset the function hot / cold // attribute and drop all the profile counters. InstrProfRecord::CountPseudoKind PseudoKind = InstrProfRecord::NotPseudo; + + // When AllMinusOnes is true, it means the profile for the function + // is unrepresentative and this function is actually hot. Set the + // entry count of the function to be multiple times of hot threshold + // and drop all its internal counters. + bool AllMinusOnes = false; bool AllZeros = false; if (!Func.readCounters(PGOReader.get(), AllZeros, PseudoKind)) continue; diff --git a/llvm/test/Transforms/PGOProfile/Inputs/memprof.exe b/llvm/test/Transforms/PGOProfile/Inputs/memprof.exe new file mode 100755 index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 GIT binary patch literal 0 Hc$@ +;; #include +;; #include +;; char *foo() { +;; return new char[10]; +;; } +;; char *foo2() { +;; return foo(); +;; } +;; char *bar() { +;; return foo2(); +;; } +;; char *baz() { +;; return foo2(); +;; } +;; char *recurse(unsigned n) { +;; if (!n) +;; return foo(); +;; return recurse(n-1); +;; } +;; int main(int argc, char **argv) { +;; // Test allocations with different combinations of stack contexts and +;; // coldness (based on lifetime, since they are all accessed a single time +;; // per byte via the memset). +;; char *a = new char[10]; +;; char *b = new char[10]; +;; char *c = foo(); +;; char *d = foo(); +;; char *e = bar(); +;; char *f = baz(); +;; memset(a, 0, 10); +;; memset(b, 0, 10); +;; memset(c, 0, 10); +;; memset(d, 0, 10); +;; memset(e, 0, 10); +;; memset(f, 0, 10); +;; // a and c have short lifetimes +;; delete[] a; +;; delete[] c; +;; // b, d, e, and f have long lifetimes and will be detected as cold by default. +;; sleep(200); +;; delete[] b; +;; delete[] d; +;; delete[] e; +;; delete[] f; +;; // Loop ensures the two calls to recurse have stack contexts that only differ +;; // in one level of recursion. We should get two stack contexts reflecting the +;; // different levels of recursion and different allocation behavior (since the +;; // first has a very long lifetime and the second has a short lifetime). +;; for (unsigned i = 0; i < 2; i++) { +;; char *g = recurse(i + 3); +;; memset(g, 0, 10); +;; if (!i) +;; sleep(200); +;; delete[] g; +;; } +;; return 0; +;; } +;; +;; The following commands were used to compile the source to instrumented +;; executables and collect raw binary format profiles: +;; +;; # Collect memory profile: +;; $ clang++ -fuse-ld=lld -no-pie -Wl,--no-rosegment -gmlt \ +;; -fdebug-info-for-profiling -mno-omit-leaf-frame-pointer \ +;; -fno-omit-frame-pointer -fno-optimize-sibling-calls -m64 -Wl,-build-id \ +;; memprof.cc -o memprof.exe -fmemory-profile +;; $ env MEMPROF_OPTIONS=log_path=stdout ./memprof.exe > memprof.memprofraw +;; +;; # Collect IR PGO profile: +;; $ clang++ -fuse-ld=lld -no-pie -Wl,--no-rosegment -gmlt \ +;; -fdebug-info-for-profiling -mno-omit-leaf-frame-pointer \ +;; -fno-omit-frame-pointer -fno-optimize-sibling-calls -m64 -Wl,-build-id \ +;; memprof.cc -o pgo.exe -fprofile-generate=. +;; $ ./pgo.exe +;; $ mv default_*.profraw memprof_pgo.profraw +;; +;; # Generate below LLVM IR for use in matching: +;; $ clang++ -gmlt -fdebug-info-for-profiling -fno-omit-frame-pointer \ +;; -fno-optimize-sibling-calls memprof.cc -S -emit-llvm + +;; Generate indexed profiles of all combinations: +; RUN: llvm-profdata merge %S/Inputs/memprof.memprofraw --profiled-binary %S/Inputs/memprof.exe -o %t.memprofdata +; RUN: llvm-profdata merge %S/Inputs/memprof_pgo.profraw %S/Inputs/memprof.memprofraw --profiled-binary %S/Inputs/memprof.exe -o %t.pgomemprofdata +; RUN: llvm-profdata merge %S/Inputs/memprof_pgo.profraw -o %t.pgoprofdata + +;; In all below cases we should not get any messages about missing profile data +;; for any functions. Either we are not performing any matching for a particular +;; profile type or we are performing the matching and it should be successful. +; ALL-NOT: memprof record not found for function hash +; ALL-NOT: no profile data available for function + +;; Feed back memprof-only profile +; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.memprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,MEMPROFONLY +; There should not be any PGO metadata +; MEMPROFONLY-NOT: !prof + +;; Feed back pgo-only profile +; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.pgoprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=PGO,ALL,PGOONLY +; There should not be any memprof related metadata +; PGOONLY-NOT: !memprof +; PGOONLY-NOT: !callsite + +;; Feed back pgo+memprof-only profile +; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.pgomemprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,PGO,ALL + +; ModuleID = 'memprof.cc' +source_filename = "memprof.cc" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: mustprogress noinline optnone uwtable +; ALL-LABEL: define dso_local noundef ptr @_Z3foov() +; There should be some PGO metadata +; PGO: !prof +define dso_local noundef ptr @_Z3foov() #0 !dbg !10 { +entry: + ; MEMPROF: call {{.*}} @_Znam{{.*}} !memprof ![[M1:[0-9]+]], !callsite ![[C1:[0-9]+]] + %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !dbg !13 + ret ptr %call, !dbg !14 +} + +; Function Attrs: nobuiltin allocsize(0) +declare noundef nonnull ptr @_Znam(i64 noundef) #1 + +; Function Attrs: mustprogress noinline optnone uwtable +; ALL-LABEL: define dso_local noundef ptr @_Z4foo2v() +define dso_local noundef ptr @_Z4foo2v() #0 !dbg !15 { +entry: + ; MEMPROF: call {{.*}} @_Z3foov{{.*}} !callsite ![[C2:[0-9]+]] + %call = call noundef ptr @_Z3foov(), !dbg !16 + ret ptr %call, !dbg !17 +} + +; Function Attrs: mustprogress noinline optnone uwtable +define dso_local noundef ptr @_Z3barv() #0 !dbg !18 { +entry: + ; MEMPROF: call {{.*}} @_Z4foo2v{{.*}} !callsite ![[C3:[0-9]+]] + %call = call noundef ptr @_Z4foo2v(), !dbg !19 + ret ptr %call, !dbg !20 +} + +; Function Attrs: mustprogress noinline optnone uwtable +define dso_local noundef ptr @_Z3bazv() #0 !dbg !21 { +entry: + ; MEMPROF: call {{.*}} @_Z4foo2v{{.*}} !callsite ![[C4:[0-9]+]] + %call = call noundef ptr @_Z4foo2v(), !dbg !22 + ret ptr %call, !dbg !23 +} + +; Function Attrs: mustprogress noinline optnone uwtable +define dso_local noundef ptr @_Z7recursej(i32 noundef %n) #0 !dbg !24 { +entry: + %retval = alloca ptr, align 8 + %n.addr = alloca i32, align 4 + store i32 %n, ptr %n.addr, align 4 + %0 = load i32, ptr %n.addr, align 4, !dbg !25 + %tobool = icmp ne i32 %0, 0, !dbg !25 + br i1 %tobool, label %if.end, label %if.then, !dbg !26 + +if.then: ; preds = %entry + ; MEMPROF: call {{.*}} @_Z3foov{{.*}} !callsite ![[C5:[0-9]+]] + %call = call noundef ptr @_Z3foov(), !dbg !27 + store ptr %call, ptr %retval, align 8, !dbg !28 + br label %return, !dbg !28 + +if.end: ; preds = %entry + %1 = load i32, ptr %n.addr, align 4, !dbg !29 + %sub = sub i32 %1, 1, !dbg !30 + ; MEMPROF: call {{.*}} @_Z7recursej{{.*}} !callsite ![[C6:[0-9]+]] + %call1 = call noundef ptr @_Z7recursej(i32 noundef %sub), !dbg !31 + store ptr %call1, ptr %retval, align 8, !dbg !32 + br label %return, !dbg !32 + +return: ; preds = %if.end, %if.then + %2 = load ptr, ptr %retval, align 8, !dbg !33 + ret ptr %2, !dbg !33 +} + +; Function Attrs: mustprogress noinline norecurse optnone uwtable +define dso_local noundef i32 @main(i32 noundef %argc, ptr noundef %argv) #2 !dbg !34 { +entry: + %retval = alloca i32, align 4 + %argc.addr = alloca i32, align 4 + %argv.addr = alloca ptr, align 8 + %a = alloca ptr, align 8 + %b = alloca ptr, align 8 + %c = alloca ptr, align 8 + %d = alloca ptr, align 8 + %e = alloca ptr, align 8 + %f = alloca ptr, align 8 + %i = alloca i32, align 4 + %g = alloca ptr, align 8 + store i32 0, ptr %retval, align 4 + store i32 %argc, ptr %argc.addr, align 4 + store ptr %argv, ptr %argv.addr, align 8 + ; MEMPROF: call {{.*}} @_Znam{{.*}} #[[A1:[0-9]+]] + %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !dbg !35 + store ptr %call, ptr %a, align 8, !dbg !36 + ; MEMPROF: call {{.*}} @_Znam{{.*}} #[[A2:[0-9]+]] + %call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !dbg !37 + store ptr %call1, ptr %b, align 8, !dbg !38 + ; MEMPROF: call {{.*}} @_Z3foov{{.*}} !callsite ![[C7:[0-9]+]] + %call2 = call noundef ptr @_Z3foov(), !dbg !39 + store ptr %call2, ptr %c, align 8, !dbg !40 + ; MEMPROF: call {{.*}} @_Z3foov{{.*}} !callsite ![[C8:[0-9]+]] + %call3 = call noundef ptr @_Z3foov(), !dbg !41 + store ptr %call3, ptr %d, align 8, !dbg !42 + ; MEMPROF: call {{.*}} @_Z3barv{{.*}} !callsite ![[C9:[0-9]+]] + %call4 = call noundef ptr @_Z3barv(), !dbg !43 + store ptr %call4, ptr %e, align 8, !dbg !44 + ; MEMPROF: call {{.*}} @_Z3bazv{{.*}} !callsite ![[C10:[0-9]+]] + %call5 = call noundef ptr @_Z3bazv(), !dbg !45 + store ptr %call5, ptr %f, align 8, !dbg !46 + %0 = load ptr, ptr %a, align 8, !dbg !47 + call void @llvm.memset.p0.i64(ptr align 1 %0, i8 0, i64 10, i1 false), !dbg !48 + %1 = load ptr, ptr %b, align 8, !dbg !49 + call void @llvm.memset.p0.i64(ptr align 1 %1, i8 0, i64 10, i1 false), !dbg !50 + %2 = load ptr, ptr %c, align 8, !dbg !51 + call void @llvm.memset.p0.i64(ptr align 1 %2, i8 0, i64 10, i1 false), !dbg !52 + %3 = load ptr, ptr %d, align 8, !dbg !53 + call void @llvm.memset.p0.i64(ptr align 1 %3, i8 0, i64 10, i1 false), !dbg !54 + %4 = load ptr, ptr %e, align 8, !dbg !55 + call void @llvm.memset.p0.i64(ptr align 1 %4, i8 0, i64 10, i1 false), !dbg !56 + %5 = load ptr, ptr %f, align 8, !dbg !57 + call void @llvm.memset.p0.i64(ptr align 1 %5, i8 0, i64 10, i1 false), !dbg !58 + %6 = load ptr, ptr %a, align 8, !dbg !59 + %isnull = icmp eq ptr %6, null, !dbg !60 + br i1 %isnull, label %delete.end, label %delete.notnull, !dbg !60 + +delete.notnull: ; preds = %entry + call void @_ZdaPv(ptr noundef %6) #7, !dbg !61 + br label %delete.end, !dbg !61 + +delete.end: ; preds = %delete.notnull, %entry + %7 = load ptr, ptr %c, align 8, !dbg !63 + %isnull6 = icmp eq ptr %7, null, !dbg !64 + br i1 %isnull6, label %delete.end8, label %delete.notnull7, !dbg !64 + +delete.notnull7: ; preds = %delete.end + call void @_ZdaPv(ptr noundef %7) #7, !dbg !65 + br label %delete.end8, !dbg !65 + +delete.end8: ; preds = %delete.notnull7, %delete.end + %call9 = call i32 @sleep(i32 noundef 200), !dbg !66 + %8 = load ptr, ptr %b, align 8, !dbg !67 + %isnull10 = icmp eq ptr %8, null, !dbg !68 + br i1 %isnull10, label %delete.end12, label %delete.notnull11, !dbg !68 + +delete.notnull11: ; preds = %delete.end8 + call void @_ZdaPv(ptr noundef %8) #7, !dbg !69 + br label %delete.end12, !dbg !69 + +delete.end12: ; preds = %delete.notnull11, %delete.end8 + %9 = load ptr, ptr %d, align 8, !dbg !70 + %isnull13 = icmp eq ptr %9, null, !dbg !71 + br i1 %isnull13, label %delete.end15, label %delete.notnull14, !dbg !71 + +delete.notnull14: ; preds = %delete.end12 + call void @_ZdaPv(ptr noundef %9) #7, !dbg !72 + br label %delete.end15, !dbg !72 + +delete.end15: ; preds = %delete.notnull14, %delete.end12 + %10 = load ptr, ptr %e, align 8, !dbg !73 + %isnull16 = icmp eq ptr %10, null, !dbg !74 + br i1 %isnull16, label %delete.end18, label %delete.notnull17, !dbg !74 + +delete.notnull17: ; preds = %delete.end15 + call void @_ZdaPv(ptr noundef %10) #7, !dbg !75 + br label %delete.end18, !dbg !75 + +delete.end18: ; preds = %delete.notnull17, %delete.end15 + %11 = load ptr, ptr %f, align 8, !dbg !76 + %isnull19 = icmp eq ptr %11, null, !dbg !77 + br i1 %isnull19, label %delete.end21, label %delete.notnull20, !dbg !77 + +delete.notnull20: ; preds = %delete.end18 + call void @_ZdaPv(ptr noundef %11) #7, !dbg !78 + br label %delete.end21, !dbg !78 + +delete.end21: ; preds = %delete.notnull20, %delete.end18 + store i32 0, ptr %i, align 4, !dbg !79 + br label %for.cond, !dbg !80 + +for.cond: ; preds = %for.inc, %delete.end21 + %12 = load i32, ptr %i, align 4, !dbg !81 + %cmp = icmp ult i32 %12, 2, !dbg !82 + br i1 %cmp, label %for.body, label %for.end, !dbg !83 + +for.body: ; preds = %for.cond + %13 = load i32, ptr %i, align 4, !dbg !84 + %add = add i32 %13, 3, !dbg !85 + ; MEMPROF: call {{.*}} @_Z7recursej{{.*}} !callsite ![[C11:[0-9]+]] + %call22 = call noundef ptr @_Z7recursej(i32 noundef %add), !dbg !86 + store ptr %call22, ptr %g, align 8, !dbg !87 + %14 = load ptr, ptr %g, align 8, !dbg !88 + call void @llvm.memset.p0.i64(ptr align 1 %14, i8 0, i64 10, i1 false), !dbg !89 + %15 = load i32, ptr %i, align 4, !dbg !90 + %tobool = icmp ne i32 %15, 0, !dbg !90 + br i1 %tobool, label %if.end, label %if.then, !dbg !91 + +if.then: ; preds = %for.body + %call23 = call i32 @sleep(i32 noundef 200), !dbg !92 + br label %if.end, !dbg !92 + +if.end: ; preds = %if.then, %for.body + %16 = load ptr, ptr %g, align 8, !dbg !93 + %isnull24 = icmp eq ptr %16, null, !dbg !94 + br i1 %isnull24, label %delete.end26, label %delete.notnull25, !dbg !94 + +delete.notnull25: ; preds = %if.end + call void @_ZdaPv(ptr noundef %16) #7, !dbg !95 + br label %delete.end26, !dbg !95 + +delete.end26: ; preds = %delete.notnull25, %if.end + br label %for.inc, !dbg !96 + +for.inc: ; preds = %delete.end26 + %17 = load i32, ptr %i, align 4, !dbg !97 + %inc = add i32 %17, 1, !dbg !97 + store i32 %inc, ptr %i, align 4, !dbg !97 + br label %for.cond, !dbg !99, !llvm.loop !100 + +for.end: ; preds = %for.cond + ret i32 0, !dbg !103 +} + +; MEMPROF: #[[A1]] = { builtin allocsize(0) "memprof"="notcold" } +; MEMPROF: #[[A2]] = { builtin allocsize(0) "memprof"="cold" } +; MEMPROF: ![[M1]] = !{![[MIB1:[0-9]+]], ![[MIB2:[0-9]+]], ![[MIB3:[0-9]+]], ![[MIB4:[0-9]+]], ![[MIB5:[0-9]+]]} +; MEMPROF: ![[MIB1]] = !{![[STACK1:[0-9]+]], !"notcold"} +; MEMPROF: ![[STACK1]] = !{i64 -2458008693472584243, i64 3952224878458323, i64 -6408471049535768163, i64 -6408471049535768163, i64 -6408471049535768163, i64 -6408471049535768163} +; MEMPROF: ![[MIB2]] = !{![[STACK2:[0-9]+]], !"cold"} +; MEMPROF: ![[STACK2]] = !{i64 -2458008693472584243, i64 3952224878458323, i64 -6408471049535768163, i64 -6408471049535768163, i64 -6408471049535768163, i64 -2523213715586649525} +; MEMPROF: ![[MIB3]] = !{![[STACK3:[0-9]+]], !"cold"} +; MEMPROF: ![[STACK3]] = !{i64 -2458008693472584243, i64 4060711043150162853} +; MEMPROF: ![[MIB4]] = !{![[STACK4:[0-9]+]], !"notcold"} +; MEMPROF: ![[STACK4]] = !{i64 -2458008693472584243, i64 6197270713521362189} +; MEMPROF: ![[MIB5]] = !{![[STACK5:[0-9]+]], !"cold"} +; MEMPROF: ![[STACK5]] = !{i64 -2458008693472584243, i64 -8079659623765193173} +; MEMPROF: ![[C1]] = !{i64 -2458008693472584243} +; MEMPROF: ![[C2]] = !{i64 -8079659623765193173} +; MEMPROF: ![[C3]] = !{i64 -972865200055133905} +; MEMPROF: ![[C4]] = !{i64 -4805294506621015872} +; MEMPROF: ![[C5]] = !{i64 3952224878458323} +; MEMPROF: ![[C6]] = !{i64 -6408471049535768163} +; MEMPROF: ![[C7]] = !{i64 6197270713521362189} +; MEMPROF: ![[C8]] = !{i64 4060711043150162853} +; MEMPROF: ![[C9]] = !{i64 1503792662459039327} +; MEMPROF: ![[C10]] = !{i64 -1910610273966575552} +; MEMPROF: ![[C11]] = !{i64 -2523213715586649525} + +; Function Attrs: argmemonly nofree nounwind willreturn writeonly +declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #3 + +; Function Attrs: nobuiltin nounwind +declare void @_ZdaPv(ptr noundef) #4 + +declare i32 @sleep(i32 noundef) #5 + +attributes #0 = { mustprogress noinline optnone uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #1 = { nobuiltin allocsize(0) "disable-tail-calls"="true" "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #2 = { mustprogress noinline norecurse optnone uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #3 = { argmemonly nofree nounwind willreturn writeonly } +attributes #4 = { nobuiltin nounwind "disable-tail-calls"="true" "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #5 = { "disable-tail-calls"="true" "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #6 = { builtin allocsize(0) } +attributes #7 = { builtin nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8} +!llvm.ident = !{!9} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 15.0.0 (https://github.com/llvm/llvm-project.git 6cbe6284d1f0a088b5c6482ae27b738f03d82fe7)", isOptimized: false, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None) +!1 = !DIFile(filename: "memprof.cc", directory: "/usr/local/google/home/tejohnson/llvm/tmp", checksumkind: CSK_MD5, checksum: "e8c40ebe4b21776b4d60e9632cbc13c2") +!2 = !{i32 7, !"Dwarf Version", i32 5} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 7, !"PIC Level", i32 2} +!6 = !{i32 7, !"PIE Level", i32 2} +!7 = !{i32 7, !"uwtable", i32 2} +!8 = !{i32 7, !"frame-pointer", i32 2} +!9 = !{!"clang version 15.0.0 (https://github.com/llvm/llvm-project.git 6cbe6284d1f0a088b5c6482ae27b738f03d82fe7)"} +!10 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 4, type: !11, scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !12) +!11 = !DISubroutineType(types: !12) +!12 = !{} +!13 = !DILocation(line: 5, column: 10, scope: !10) +!14 = !DILocation(line: 5, column: 3, scope: !10) +!15 = distinct !DISubprogram(name: "foo2", linkageName: "_Z4foo2v", scope: !1, file: !1, line: 7, type: !11, scopeLine: 7, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !12) +!16 = !DILocation(line: 8, column: 10, scope: !15) +!17 = !DILocation(line: 8, column: 3, scope: !15) +!18 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barv", scope: !1, file: !1, line: 10, type: !11, scopeLine: 10, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !12) +!19 = !DILocation(line: 11, column: 10, scope: !18) +!20 = !DILocation(line: 11, column: 3, scope: !18) +!21 = distinct !DISubprogram(name: "baz", linkageName: "_Z3bazv", scope: !1, file: !1, line: 13, type: !11, scopeLine: 13, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !12) +!22 = !DILocation(line: 14, column: 10, scope: !21) +!23 = !DILocation(line: 14, column: 3, scope: !21) +!24 = distinct !DISubprogram(name: "recurse", linkageName: "_Z7recursej", scope: !1, file: !1, line: 16, type: !11, scopeLine: 16, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !12) +!25 = !DILocation(line: 17, column: 8, scope: !24) +!26 = !DILocation(line: 17, column: 7, scope: !24) +!27 = !DILocation(line: 18, column: 12, scope: !24) +!28 = !DILocation(line: 18, column: 5, scope: !24) +!29 = !DILocation(line: 19, column: 18, scope: !24) +!30 = !DILocation(line: 19, column: 19, scope: !24) +!31 = !DILocation(line: 19, column: 10, scope: !24) +!32 = !DILocation(line: 19, column: 3, scope: !24) +!33 = !DILocation(line: 20, column: 1, scope: !24) +!34 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 21, type: !11, scopeLine: 21, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !12) +!35 = !DILocation(line: 25, column: 13, scope: !34) +!36 = !DILocation(line: 25, column: 9, scope: !34) +!37 = !DILocation(line: 26, column: 13, scope: !34) +!38 = !DILocation(line: 26, column: 9, scope: !34) +!39 = !DILocation(line: 27, column: 13, scope: !34) +!40 = !DILocation(line: 27, column: 9, scope: !34) +!41 = !DILocation(line: 28, column: 13, scope: !34) +!42 = !DILocation(line: 28, column: 9, scope: !34) +!43 = !DILocation(line: 29, column: 13, scope: !34) +!44 = !DILocation(line: 29, column: 9, scope: !34) +!45 = !DILocation(line: 30, column: 13, scope: !34) +!46 = !DILocation(line: 30, column: 9, scope: !34) +!47 = !DILocation(line: 31, column: 10, scope: !34) +!48 = !DILocation(line: 31, column: 3, scope: !34) +!49 = !DILocation(line: 32, column: 10, scope: !34) +!50 = !DILocation(line: 32, column: 3, scope: !34) +!51 = !DILocation(line: 33, column: 10, scope: !34) +!52 = !DILocation(line: 33, column: 3, scope: !34) +!53 = !DILocation(line: 34, column: 10, scope: !34) +!54 = !DILocation(line: 34, column: 3, scope: !34) +!55 = !DILocation(line: 35, column: 10, scope: !34) +!56 = !DILocation(line: 35, column: 3, scope: !34) +!57 = !DILocation(line: 36, column: 10, scope: !34) +!58 = !DILocation(line: 36, column: 3, scope: !34) +!59 = !DILocation(line: 38, column: 12, scope: !34) +!60 = !DILocation(line: 38, column: 3, scope: !34) +!61 = !DILocation(line: 38, column: 3, scope: !62) +!62 = !DILexicalBlockFile(scope: !34, file: !1, discriminator: 2) +!63 = !DILocation(line: 39, column: 12, scope: !34) +!64 = !DILocation(line: 39, column: 3, scope: !34) +!65 = !DILocation(line: 39, column: 3, scope: !62) +!66 = !DILocation(line: 41, column: 3, scope: !34) +!67 = !DILocation(line: 42, column: 12, scope: !34) +!68 = !DILocation(line: 42, column: 3, scope: !34) +!69 = !DILocation(line: 42, column: 3, scope: !62) +!70 = !DILocation(line: 43, column: 12, scope: !34) +!71 = !DILocation(line: 43, column: 3, scope: !34) +!72 = !DILocation(line: 43, column: 3, scope: !62) +!73 = !DILocation(line: 44, column: 12, scope: !34) +!74 = !DILocation(line: 44, column: 3, scope: !34) +!75 = !DILocation(line: 44, column: 3, scope: !62) +!76 = !DILocation(line: 45, column: 12, scope: !34) +!77 = !DILocation(line: 45, column: 3, scope: !34) +!78 = !DILocation(line: 45, column: 3, scope: !62) +!79 = !DILocation(line: 51, column: 17, scope: !34) +!80 = !DILocation(line: 51, column: 8, scope: !34) +!81 = !DILocation(line: 51, column: 24, scope: !62) +!82 = !DILocation(line: 51, column: 26, scope: !62) +!83 = !DILocation(line: 51, column: 3, scope: !62) +!84 = !DILocation(line: 52, column: 23, scope: !34) +!85 = !DILocation(line: 52, column: 25, scope: !34) +!86 = !DILocation(line: 52, column: 15, scope: !34) +!87 = !DILocation(line: 52, column: 11, scope: !34) +!88 = !DILocation(line: 53, column: 12, scope: !34) +!89 = !DILocation(line: 53, column: 5, scope: !34) +!90 = !DILocation(line: 54, column: 10, scope: !34) +!91 = !DILocation(line: 54, column: 9, scope: !34) +!92 = !DILocation(line: 55, column: 7, scope: !34) +!93 = !DILocation(line: 56, column: 14, scope: !34) +!94 = !DILocation(line: 56, column: 5, scope: !34) +!95 = !DILocation(line: 56, column: 5, scope: !62) +!96 = !DILocation(line: 57, column: 3, scope: !34) +!97 = !DILocation(line: 51, column: 32, scope: !98) +!98 = !DILexicalBlockFile(scope: !34, file: !1, discriminator: 4) +!99 = !DILocation(line: 51, column: 3, scope: !98) +!100 = distinct !{!100, !101, !96, !102} +!101 = !DILocation(line: 51, column: 3, scope: !34) +!102 = !{!"llvm.loop.mustprogress"} +!103 = !DILocation(line: 58, column: 3, scope: !34) diff --git a/llvm/test/Transforms/PGOProfile/memprofmissingfunc.ll b/llvm/test/Transforms/PGOProfile/memprofmissingfunc.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/memprofmissingfunc.ll @@ -0,0 +1,25 @@ +;; Tests that we get a missing memprof error for a function not in profile when +;; using -pgo-warn-missing-function. + +;; TODO: Use text profile inputs once that is available for memprof. + +;; The raw profiles have been generated from the source used for the memprof.ll +;; test (see comments at the top of that file). + +; RUN: llvm-profdata merge %S/Inputs/memprof.memprofraw --profiled-binary %S/Inputs/memprof.exe -o %t.memprofdata + +; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.memprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s + +; CHECK: memprof record not found for function hash {{.*}} _Z16funcnotinprofilev + +; ModuleID = 'memprofmissingfunc.cc' +source_filename = "memprofmissingfunc.cc" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: mustprogress noinline nounwind optnone uwtable +define dso_local void @_Z16funcnotinprofilev() { +entry: + ret void +} +