diff --git a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h --- a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h +++ b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h @@ -43,6 +43,13 @@ static bool isRequired() { return true; } }; +// TODO: Remove this declaration and make readMemprof static once the matching +// is moved into its own pass. +class IndexedInstrProfReader; +class TargetLibraryInfo; +void readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader, + const TargetLibraryInfo &TLI); + } // namespace llvm #endif diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp --- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp @@ -18,9 +18,12 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/MemoryProfileInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Constant.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/IRBuilder.h" @@ -30,16 +33,28 @@ #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/ProfileData/InstrProf.h" +#include "llvm/ProfileData/InstrProfReader.h" +#include "llvm/Support/BLAKE3.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/HashBuilder.h" #include "llvm/TargetParser/Triple.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/ModuleUtils.h" +#include +#include using namespace llvm; +using namespace llvm::memprof; #define DEBUG_TYPE "memprof" +namespace llvm { +extern cl::opt PGOWarnMissing; +extern cl::opt NoPGOWarnMismatch; +extern cl::opt NoPGOWarnMismatchComdatWeak; +} // namespace llvm + constexpr int LLVM_MEM_PROFILER_VERSION = 1; // Size of memory mapped to a single shadow location. @@ -128,6 +143,7 @@ STATISTIC(NumInstrumentedWrites, "Number of instrumented writes"); STATISTIC(NumSkippedStackReads, "Number of non-instrumented stack reads"); STATISTIC(NumSkippedStackWrites, "Number of non-instrumented stack writes"); +STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile."); namespace { @@ -601,3 +617,251 @@ return FunctionModified; } + +static void addCallsiteMetadata(Instruction &I, + std::vector &InlinedCallStack, + LLVMContext &Ctx) { + I.setMetadata(LLVMContext::MD_callsite, + buildCallstackMetadata(InlinedCallStack, Ctx)); +} + +static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset, + uint32_t Column) { + llvm::HashBuilder, llvm::support::endianness::little> + HashBuilder; + HashBuilder.add(Function, LineOffset, Column); + llvm::BLAKE3Result<8> Hash = HashBuilder.final(); + uint64_t Id; + std::memcpy(&Id, Hash.data(), sizeof(Hash)); + return Id; +} + +static uint64_t computeStackId(const memprof::Frame &Frame) { + return computeStackId(Frame.Function, Frame.LineOffset, Frame.Column); +} + +static void addCallStack(CallStackTrie &AllocTrie, + const AllocationInfo *AllocInfo) { + SmallVector StackIds; + for (const auto &StackFrame : AllocInfo->CallStack) + StackIds.push_back(computeStackId(StackFrame)); + auto AllocType = getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(), + AllocInfo->Info.getAllocCount(), + AllocInfo->Info.getTotalLifetime()); + AllocTrie.addCallStack(AllocType, StackIds); +} + +// Helper to compare the InlinedCallStack computed from an instruction's debug +// info to a list of Frames from profile data (either the allocation data or a +// callsite). For callsites, the StartIndex to use in the Frame array may be +// non-zero. +static bool +stackFrameIncludesInlinedCallStack(ArrayRef ProfileCallStack, + ArrayRef InlinedCallStack, + unsigned StartIndex = 0) { + auto StackFrame = ProfileCallStack.begin() + StartIndex; + auto InlCallStackIter = InlinedCallStack.begin(); + for (; StackFrame != ProfileCallStack.end() && + InlCallStackIter != InlinedCallStack.end(); + ++StackFrame, ++InlCallStackIter) { + uint64_t StackId = computeStackId(*StackFrame); + if (StackId != *InlCallStackIter) + return false; + } + // Return true if we found and matched all stack ids from the call + // instruction. + return InlCallStackIter == InlinedCallStack.end(); +} + +void llvm::readMemprof(Module &M, Function &F, + IndexedInstrProfReader *MemProfReader, + const TargetLibraryInfo &TLI) { + auto &Ctx = M.getContext(); + + auto FuncName = getPGOFuncName(F); + auto FuncGUID = Function::getGUID(FuncName); + Expected MemProfResult = + MemProfReader->getMemProfRecord(FuncGUID); + if (Error E = MemProfResult.takeError()) { + handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { + auto Err = IPE.get(); + bool SkipWarning = false; + LLVM_DEBUG(dbgs() << "Error in reading profile for Func " << FuncName + << ": "); + if (Err == instrprof_error::unknown_function) { + NumOfMemProfMissing++; + SkipWarning = !PGOWarnMissing; + LLVM_DEBUG(dbgs() << "unknown function"); + } else if (Err == instrprof_error::hash_mismatch) { + SkipWarning = + NoPGOWarnMismatch || + (NoPGOWarnMismatchComdatWeak && + (F.hasComdat() || + F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); + LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")"); + } + + if (SkipWarning) + return; + + std::string Msg = (IPE.message() + Twine(" ") + F.getName().str() + + Twine(" Hash = ") + std::to_string(FuncGUID)) + .str(); + + Ctx.diagnose( + DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning)); + }); + return; + } + + // Build maps of the location hash to all profile data with that leaf location + // (allocation info and the callsites). + std::map> LocHashToAllocInfo; + // For the callsites we need to record the index of the associated frame in + // the frame array (see comments below where the map entries are added). + std::map *, unsigned>>> + LocHashToCallSites; + const auto MemProfRec = std::move(MemProfResult.get()); + for (auto &AI : MemProfRec.AllocSites) { + // Associate the allocation info with the leaf frame. The later matching + // code will match any inlined call sequences in the IR with a longer prefix + // of call stack frames. + uint64_t StackId = computeStackId(AI.CallStack[0]); + LocHashToAllocInfo[StackId].insert(&AI); + } + for (auto &CS : MemProfRec.CallSites) { + // Need to record all frames from leaf up to and including this function, + // as any of these may or may not have been inlined at this point. + unsigned Idx = 0; + for (auto &StackFrame : CS) { + uint64_t StackId = computeStackId(StackFrame); + LocHashToCallSites[StackId].insert(std::make_pair(&CS, Idx++)); + // Once we find this function, we can stop recording. + if (StackFrame.Function == FuncGUID) + break; + } + assert(Idx <= CS.size() && CS[Idx - 1].Function == FuncGUID); + } + + auto GetOffset = [](const DILocation *DIL) { + return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) & + 0xffff; + }; + + // Now walk the instructions, looking up the associated profile data using + // dbug locations. + for (auto &BB : F) { + for (auto &I : BB) { + if (I.isDebugOrPseudoInst()) + continue; + // We are only interested in calls (allocation or interior call stack + // context calls). + auto *CI = dyn_cast(&I); + if (!CI) + continue; + auto *CalledFunction = CI->getCalledFunction(); + if (CalledFunction && CalledFunction->isIntrinsic()) + continue; + // List of call stack ids computed from the location hashes on debug + // locations (leaf to inlined at root). + std::vector InlinedCallStack; + // Was the leaf location found in one of the profile maps? + bool LeafFound = false; + // If leaf was found in a map, iterators pointing to its location in both + // of the maps. It might exist in neither, one, or both (the latter case + // can happen because we don't currently have discriminators to + // distinguish the case when a single line/col maps to both an allocation + // and another callsite). + std::map>::iterator + AllocInfoIter; + std::map *, + unsigned>>>::iterator CallSitesIter; + for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr; + DIL = DIL->getInlinedAt()) { + // Use C++ linkage name if possible. Need to compile with + // -fdebug-info-for-profiling to get linkage name. + StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName(); + if (Name.empty()) + Name = DIL->getScope()->getSubprogram()->getName(); + auto CalleeGUID = Function::getGUID(Name); + auto StackId = + computeStackId(CalleeGUID, GetOffset(DIL), DIL->getColumn()); + // LeafFound will only be false on the first iteration, since we either + // set it true or break out of the loop below. + if (!LeafFound) { + AllocInfoIter = LocHashToAllocInfo.find(StackId); + CallSitesIter = LocHashToCallSites.find(StackId); + // Check if the leaf is in one of the maps. If not, no need to look + // further at this call. + if (AllocInfoIter == LocHashToAllocInfo.end() && + CallSitesIter == LocHashToCallSites.end()) + break; + LeafFound = true; + } + InlinedCallStack.push_back(StackId); + } + // If leaf not in either of the maps, skip inst. + if (!LeafFound) + continue; + + // First add !memprof metadata from allocation info, if we found the + // instruction's leaf location in that map, and if the rest of the + // instruction's locations match the prefix Frame locations on an + // allocation context with the same leaf. + if (AllocInfoIter != LocHashToAllocInfo.end()) { + // Only consider allocations via new, to reduce unnecessary metadata, + // since those are the only allocations that will be targeted initially. + if (!isNewLikeFn(CI, &TLI)) + continue; + // We may match this instruction's location list to multiple MIB + // contexts. Add them to a Trie specialized for trimming the contexts to + // the minimal needed to disambiguate contexts with unique behavior. + CallStackTrie AllocTrie; + for (auto *AllocInfo : AllocInfoIter->second) { + // Check the full inlined call stack against this one. + // If we found and thus matched all frames on the call, include + // this MIB. + if (stackFrameIncludesInlinedCallStack(AllocInfo->CallStack, + InlinedCallStack)) + addCallStack(AllocTrie, AllocInfo); + } + // We might not have matched any to the full inlined call stack. + // But if we did, create and attach metadata, or a function attribute if + // all contexts have identical profiled behavior. + if (!AllocTrie.empty()) { + // MemprofMDAttached will be false if a function attribute was + // attached. + bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI); + assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof)); + if (MemprofMDAttached) { + // Add callsite metadata for the instruction's location list so that + // it simpler later on to identify which part of the MIB contexts + // are from this particular instruction (including during inlining, + // when the callsite metdata will be updated appropriately). + // FIXME: can this be changed to strip out the matching stack + // context ids from the MIB contexts and not add any callsite + // metadata here to save space? + addCallsiteMetadata(I, InlinedCallStack, Ctx); + } + } + continue; + } + + // Otherwise, add callsite metadata. If we reach here then we found the + // instruction's leaf location in the callsites map and not the allocation + // map. + assert(CallSitesIter != LocHashToCallSites.end()); + for (auto CallStackIdx : CallSitesIter->second) { + // If we found and thus matched all frames on the call, create and + // attach call stack metadata. + if (stackFrameIncludesInlinedCallStack( + *CallStackIdx.first, InlinedCallStack, CallStackIdx.second)) { + addCallsiteMetadata(I, InlinedCallStack, Ctx); + // Only need to find one with a matching call stack and add a single + // callsite metadata. + break; + } + } + } + } +} diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -62,8 +62,6 @@ #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Analysis/MemoryProfileInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -97,7 +95,6 @@ #include "llvm/IR/Value.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/InstrProfReader.h" -#include "llvm/Support/BLAKE3.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/CRC.h" #include "llvm/Support/Casting.h" @@ -107,31 +104,28 @@ #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GraphWriter.h" -#include "llvm/Support/HashBuilder.h" #include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TargetParser/Triple.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Instrumentation/BlockCoverageInference.h" #include "llvm/Transforms/Instrumentation/CFGMST.h" +#include "llvm/Transforms/Instrumentation/MemProfiler.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/MisExpect.h" #include "llvm/Transforms/Utils/ModuleUtils.h" #include #include #include -#include #include #include #include -#include #include #include #include #include using namespace llvm; -using namespace llvm::memprof; using ProfileCount = Function::ProfileCount; using VPCandidateInfo = ValueProfileCollector::CandidateInfo; @@ -146,7 +140,6 @@ STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts."); STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile."); STATISTIC(NumOfPGOMissing, "Number of functions without profile."); -STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile."); STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations."); STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO."); STATISTIC(NumOfCSPGOSelectInsts, @@ -203,31 +196,31 @@ cl::desc("Append function hash to the name of COMDAT function to avoid " "function hash mismatch due to the preinliner")); +namespace llvm { // Command line option to enable/disable the warning about missing profile // information. -static cl::opt - PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden, - cl::desc("Use this option to turn on/off " - "warnings about missing profile data for " - "functions.")); +cl::opt PGOWarnMissing("pgo-warn-missing-function", cl::init(false), + cl::Hidden, + cl::desc("Use this option to turn on/off " + "warnings about missing profile data for " + "functions.")); -namespace llvm { // Command line option to enable/disable the warning about a hash mismatch in // the profile data. cl::opt NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden, cl::desc("Use this option to turn off/on " "warnings about profile cfg mismatch.")); -} // namespace llvm // Command line option to enable/disable the warning about a hash mismatch in // the profile data for Comdat functions, which often turns out to be false // positive due to the pre-instrumentation inline. -static cl::opt NoPGOWarnMismatchComdatWeak( +cl::opt NoPGOWarnMismatchComdatWeak( "no-pgo-warn-mismatch-comdat-weak", cl::init(true), cl::Hidden, cl::desc("The option is used to turn on/off " "warnings about hash mismatch for comdat " "or weak functions.")); +} // namespace llvm // Command line option to enable/disable select instruction instrumentation. static cl::opt @@ -321,10 +314,6 @@ "pgo-function-size-threshold", cl::Hidden, cl::desc("Do not instrument functions smaller than this threshold.")); -static cl::opt MatchMemProf( - "pgo-match-memprof", cl::init(true), cl::Hidden, - cl::desc("Perform matching and annotation of memprof profiles.")); - static cl::opt PGOFunctionCriticalEdgeThreshold( "pgo-critical-edge-threshold", cl::init(20000), cl::Hidden, cl::desc("Do not instrument functions with the number of critical edges " @@ -1081,9 +1070,6 @@ bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, InstrProfRecord::CountPseudoKind &PseudoKind); - // Read memprof data for the instrumented function from profile. - bool readMemprof(IndexedInstrProfReader *PGOReader); - // Populate the counts for all BBs. void populateCounters(); @@ -1303,257 +1289,6 @@ F.setMetadata(LLVMContext::MD_annotation, MD); } -static void addCallsiteMetadata(Instruction &I, - std::vector &InlinedCallStack, - LLVMContext &Ctx) { - I.setMetadata(LLVMContext::MD_callsite, - buildCallstackMetadata(InlinedCallStack, Ctx)); -} - -static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset, - uint32_t Column) { - llvm::HashBuilder, llvm::support::endianness::little> - HashBuilder; - HashBuilder.add(Function, LineOffset, Column); - llvm::BLAKE3Result<8> Hash = HashBuilder.final(); - uint64_t Id; - std::memcpy(&Id, Hash.data(), sizeof(Hash)); - return Id; -} - -static uint64_t computeStackId(const memprof::Frame &Frame) { - return computeStackId(Frame.Function, Frame.LineOffset, Frame.Column); -} - -static void addCallStack(CallStackTrie &AllocTrie, - const AllocationInfo *AllocInfo) { - SmallVector StackIds; - for (const auto &StackFrame : AllocInfo->CallStack) - StackIds.push_back(computeStackId(StackFrame)); - auto AllocType = getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(), - AllocInfo->Info.getAllocCount(), - AllocInfo->Info.getTotalLifetime()); - AllocTrie.addCallStack(AllocType, StackIds); -} - -// Helper to compare the InlinedCallStack computed from an instruction's debug -// info to a list of Frames from profile data (either the allocation data or a -// callsite). For callsites, the StartIndex to use in the Frame array may be -// non-zero. -static bool -stackFrameIncludesInlinedCallStack(ArrayRef ProfileCallStack, - ArrayRef InlinedCallStack, - unsigned StartIndex = 0) { - auto StackFrame = ProfileCallStack.begin() + StartIndex; - auto InlCallStackIter = InlinedCallStack.begin(); - for (; StackFrame != ProfileCallStack.end() && - InlCallStackIter != InlinedCallStack.end(); - ++StackFrame, ++InlCallStackIter) { - uint64_t StackId = computeStackId(*StackFrame); - if (StackId != *InlCallStackIter) - return false; - } - // Return true if we found and matched all stack ids from the call - // instruction. - return InlCallStackIter == InlinedCallStack.end(); -} - -bool PGOUseFunc::readMemprof(IndexedInstrProfReader *PGOReader) { - if (!MatchMemProf) - return true; - - auto &Ctx = M->getContext(); - - auto FuncGUID = Function::getGUID(FuncInfo.FuncName); - Expected MemProfResult = - PGOReader->getMemProfRecord(FuncGUID); - if (Error E = MemProfResult.takeError()) { - handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { - auto Err = IPE.get(); - bool SkipWarning = false; - LLVM_DEBUG(dbgs() << "Error in reading profile for Func " - << FuncInfo.FuncName << ": "); - if (Err == instrprof_error::unknown_function) { - NumOfMemProfMissing++; - SkipWarning = !PGOWarnMissing; - LLVM_DEBUG(dbgs() << "unknown function"); - } else if (Err == instrprof_error::hash_mismatch) { - SkipWarning = - NoPGOWarnMismatch || - (NoPGOWarnMismatchComdatWeak && - (F.hasComdat() || - F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); - LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")"); - } - - if (SkipWarning) - return; - - std::string Msg = - (IPE.message() + Twine(" ") + F.getName().str() + Twine(" Hash = ") + - std::to_string(FuncInfo.FunctionHash)) - .str(); - - Ctx.diagnose( - DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); - }); - return false; - } - - // Build maps of the location hash to all profile data with that leaf location - // (allocation info and the callsites). - std::map> LocHashToAllocInfo; - // For the callsites we need to record the index of the associated frame in - // the frame array (see comments below where the map entries are added). - std::map *, unsigned>>> - LocHashToCallSites; - const auto MemProfRec = std::move(MemProfResult.get()); - for (auto &AI : MemProfRec.AllocSites) { - // Associate the allocation info with the leaf frame. The later matching - // code will match any inlined call sequences in the IR with a longer prefix - // of call stack frames. - uint64_t StackId = computeStackId(AI.CallStack[0]); - LocHashToAllocInfo[StackId].insert(&AI); - } - for (auto &CS : MemProfRec.CallSites) { - // Need to record all frames from leaf up to and including this function, - // as any of these may or may not have been inlined at this point. - unsigned Idx = 0; - for (auto &StackFrame : CS) { - uint64_t StackId = computeStackId(StackFrame); - LocHashToCallSites[StackId].insert(std::make_pair(&CS, Idx++)); - // Once we find this function, we can stop recording. - if (StackFrame.Function == FuncGUID) - break; - } - assert(Idx <= CS.size() && CS[Idx - 1].Function == FuncGUID); - } - - auto GetOffset = [](const DILocation *DIL) { - return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) & - 0xffff; - }; - - // Now walk the instructions, looking up the associated profile data using - // dbug locations. - for (auto &BB : F) { - for (auto &I : BB) { - if (I.isDebugOrPseudoInst()) - continue; - // We are only interested in calls (allocation or interior call stack - // context calls). - auto *CI = dyn_cast(&I); - if (!CI) - continue; - auto *CalledFunction = CI->getCalledFunction(); - if (CalledFunction && CalledFunction->isIntrinsic()) - continue; - // List of call stack ids computed from the location hashes on debug - // locations (leaf to inlined at root). - std::vector InlinedCallStack; - // Was the leaf location found in one of the profile maps? - bool LeafFound = false; - // If leaf was found in a map, iterators pointing to its location in both - // of the maps. It might exist in neither, one, or both (the latter case - // can happen because we don't currently have discriminators to - // distinguish the case when a single line/col maps to both an allocation - // and another callsite). - std::map>::iterator - AllocInfoIter; - std::map *, - unsigned>>>::iterator CallSitesIter; - for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr; - DIL = DIL->getInlinedAt()) { - // Use C++ linkage name if possible. Need to compile with - // -fdebug-info-for-profiling to get linkage name. - StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName(); - if (Name.empty()) - Name = DIL->getScope()->getSubprogram()->getName(); - auto CalleeGUID = Function::getGUID(Name); - auto StackId = - computeStackId(CalleeGUID, GetOffset(DIL), DIL->getColumn()); - // LeafFound will only be false on the first iteration, since we either - // set it true or break out of the loop below. - if (!LeafFound) { - AllocInfoIter = LocHashToAllocInfo.find(StackId); - CallSitesIter = LocHashToCallSites.find(StackId); - // Check if the leaf is in one of the maps. If not, no need to look - // further at this call. - if (AllocInfoIter == LocHashToAllocInfo.end() && - CallSitesIter == LocHashToCallSites.end()) - break; - LeafFound = true; - } - InlinedCallStack.push_back(StackId); - } - // If leaf not in either of the maps, skip inst. - if (!LeafFound) - continue; - - // First add !memprof metadata from allocation info, if we found the - // instruction's leaf location in that map, and if the rest of the - // instruction's locations match the prefix Frame locations on an - // allocation context with the same leaf. - if (AllocInfoIter != LocHashToAllocInfo.end()) { - // Only consider allocations via new, to reduce unnecessary metadata, - // since those are the only allocations that will be targeted initially. - if (!isNewLikeFn(CI, &FuncInfo.TLI)) - continue; - // We may match this instruction's location list to multiple MIB - // contexts. Add them to a Trie specialized for trimming the contexts to - // the minimal needed to disambiguate contexts with unique behavior. - CallStackTrie AllocTrie; - for (auto *AllocInfo : AllocInfoIter->second) { - // Check the full inlined call stack against this one. - // If we found and thus matched all frames on the call, include - // this MIB. - if (stackFrameIncludesInlinedCallStack(AllocInfo->CallStack, - InlinedCallStack)) - addCallStack(AllocTrie, AllocInfo); - } - // We might not have matched any to the full inlined call stack. - // But if we did, create and attach metadata, or a function attribute if - // all contexts have identical profiled behavior. - if (!AllocTrie.empty()) { - // MemprofMDAttached will be false if a function attribute was - // attached. - bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI); - assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof)); - if (MemprofMDAttached) { - // Add callsite metadata for the instruction's location list so that - // it simpler later on to identify which part of the MIB contexts - // are from this particular instruction (including during inlining, - // when the callsite metdata will be updated appropriately). - // FIXME: can this be changed to strip out the matching stack - // context ids from the MIB contexts and not add any callsite - // metadata here to save space? - addCallsiteMetadata(I, InlinedCallStack, Ctx); - } - } - continue; - } - - // Otherwise, add callsite metadata. If we reach here then we found the - // instruction's leaf location in the callsites map and not the allocation - // map. - assert(CallSitesIter != LocHashToCallSites.end()); - for (auto CallStackIdx : CallSitesIter->second) { - // If we found and thus matched all frames on the call, create and - // attach call stack metadata. - if (stackFrameIncludesInlinedCallStack( - *CallStackIdx.first, InlinedCallStack, CallStackIdx.second)) { - addCallsiteMetadata(I, InlinedCallStack, Ctx); - // Only need to find one with a matching call stack and add a single - // callsite metadata. - break; - } - } - } - } - - return true; -} - void PGOUseFunc::handleInstrProfError(Error Err, uint64_t MismatchedFuncSum) { handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) { auto &Ctx = M->getContext(); @@ -2310,7 +2045,7 @@ // Read and match memprof first since we do this via debug info and can // match even if there is an IR mismatch detected for regular PGO below. if (PGOReader->hasMemoryProfile()) - Func.readMemprof(PGOReader.get()); + readMemprof(M, F, PGOReader.get(), TLI); if (!PGOReader->isIRLevelProfile()) continue;