diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h --- a/clang/include/clang/Basic/CodeGenOptions.h +++ b/clang/include/clang/Basic/CodeGenOptions.h @@ -282,6 +282,9 @@ /// Name of the profile file to use as output for with -fmemory-profile. std::string MemoryProfileOutput; + /// Name of the profile file to use as input for -fmemory-profile-use. + std::string MemoryProfileUsePath; + /// Name of the profile file to use as input for -fprofile-instr-use std::string ProfileInstrumentUsePath; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1772,6 +1772,10 @@ def fmemory_profile_EQ : Joined<["-"], "fmemory-profile=">, Group, Flags<[CC1Option]>, MetaVarName<"">, HelpText<"Enable heap memory profiling and dump results into ">; +def fmemory_profile_use_EQ : Joined<["-"], "fmemory-profile-use=">, + Group, Flags<[CC1Option, CoreOption]>, MetaVarName<"">, + HelpText<"Use memory profile for profile-guided memory optimization">, + MarshallingInfoString>; // Begin sanitizer flags. These should all be core options exposed in all driver // modes. diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -762,31 +762,37 @@ PGOOpt = PGOOptions( CodeGenOpts.InstrProfileOutput.empty() ? getDefaultProfileGenName() : CodeGenOpts.InstrProfileOutput, - "", "", nullptr, PGOOptions::IRInstr, PGOOptions::NoCSAction, - CodeGenOpts.DebugInfoForProfiling); + "", "", CodeGenOpts.MemoryProfileUsePath, nullptr, PGOOptions::IRInstr, + PGOOptions::NoCSAction, CodeGenOpts.DebugInfoForProfiling); else if (CodeGenOpts.hasProfileIRUse()) { // -fprofile-use. auto CSAction = CodeGenOpts.hasProfileCSIRUse() ? PGOOptions::CSIRUse : PGOOptions::NoCSAction; - PGOOpt = - PGOOptions(CodeGenOpts.ProfileInstrumentUsePath, "", - CodeGenOpts.ProfileRemappingFile, VFS, PGOOptions::IRUse, - CSAction, CodeGenOpts.DebugInfoForProfiling); + PGOOpt = PGOOptions( + CodeGenOpts.ProfileInstrumentUsePath, "", + CodeGenOpts.ProfileRemappingFile, CodeGenOpts.MemoryProfileUsePath, VFS, + PGOOptions::IRUse, CSAction, CodeGenOpts.DebugInfoForProfiling); } else if (!CodeGenOpts.SampleProfileFile.empty()) // -fprofile-sample-use PGOOpt = PGOOptions( CodeGenOpts.SampleProfileFile, "", CodeGenOpts.ProfileRemappingFile, - VFS, PGOOptions::SampleUse, PGOOptions::NoCSAction, - CodeGenOpts.DebugInfoForProfiling, CodeGenOpts.PseudoProbeForProfiling); + CodeGenOpts.MemoryProfileUsePath, VFS, PGOOptions::SampleUse, + PGOOptions::NoCSAction, CodeGenOpts.DebugInfoForProfiling, + CodeGenOpts.PseudoProbeForProfiling); + else if (!CodeGenOpts.MemoryProfileUsePath.empty()) + // -fmemory-profile-use (without any of the above options) + PGOOpt = PGOOptions("", "", "", CodeGenOpts.MemoryProfileUsePath, VFS, + PGOOptions::NoAction, PGOOptions::NoCSAction, + CodeGenOpts.DebugInfoForProfiling); else if (CodeGenOpts.PseudoProbeForProfiling) // -fpseudo-probe-for-profiling - PGOOpt = PGOOptions("", "", "", nullptr, PGOOptions::NoAction, - PGOOptions::NoCSAction, + PGOOpt = PGOOptions("", "", "", /*MemoryProfile=*/"", nullptr, + PGOOptions::NoAction, PGOOptions::NoCSAction, CodeGenOpts.DebugInfoForProfiling, true); else if (CodeGenOpts.DebugInfoForProfiling) // -fdebug-info-for-profiling - PGOOpt = PGOOptions("", "", "", nullptr, PGOOptions::NoAction, - PGOOptions::NoCSAction, true); + PGOOpt = PGOOptions("", "", "", /*MemoryProfile=*/"", nullptr, + PGOOptions::NoAction, PGOOptions::NoCSAction, true); // Check to see if we want to generate a CS profile. if (CodeGenOpts.hasProfileCSIRInstr()) { @@ -808,8 +814,8 @@ CodeGenOpts.InstrProfileOutput.empty() ? getDefaultProfileGenName() : CodeGenOpts.InstrProfileOutput, - "", nullptr, PGOOptions::NoAction, PGOOptions::CSIRInstr, - CodeGenOpts.DebugInfoForProfiling); + "", /*MemoryProfile=*/"", nullptr, PGOOptions::NoAction, + PGOOptions::CSIRInstr, CodeGenOpts.DebugInfoForProfiling); } if (TM) TM->setPGOOption(PGOOpt); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -4946,6 +4946,18 @@ !MemProfArg->getOption().matches(options::OPT_fno_memory_profile)) MemProfArg->render(Args, CmdArgs); + if (auto *MemProfUseArg = + Args.getLastArg(options::OPT_fmemory_profile_use_EQ)) { + if (MemProfArg) + D.Diag(diag::err_drv_argument_not_allowed_with) + << MemProfUseArg->getAsString(Args) << MemProfArg->getAsString(Args); + if (auto *PGOInstrArg = Args.getLastArg(options::OPT_fprofile_generate, + options::OPT_fprofile_generate_EQ)) + D.Diag(diag::err_drv_argument_not_allowed_with) + << MemProfUseArg->getAsString(Args) << PGOInstrArg->getAsString(Args); + MemProfUseArg->render(Args, CmdArgs); + } + // Embed-bitcode option. // Only white-listed flags below are allowed to be embedded. if (C.getDriver().embedBitcodeInObject() && !IsUsingLTO && diff --git a/clang/test/CodeGen/memprof.cpp b/clang/test/CodeGen/memprof.cpp --- a/clang/test/CodeGen/memprof.cpp +++ b/clang/test/CodeGen/memprof.cpp @@ -16,8 +16,8 @@ // Profile use: // Ensure Pass PGOInstrumentationUse is invoked with the memprof-only profile. -// RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t.memprofdata %s -fdebug-pass-manager -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=USE -// USE: Running pass: PGOInstrumentationUse on [module] +// RUN: %clang_cc1 -O2 -fmemory-profile-use=%t.memprofdata %s -fdebug-pass-manager -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=USE +// USE: Running pass: MemProfUsePass on [module] char *foo() { return new char[10]; diff --git a/clang/test/Driver/fmemprof.cpp b/clang/test/Driver/fmemprof.cpp --- a/clang/test/Driver/fmemprof.cpp +++ b/clang/test/Driver/fmemprof.cpp @@ -8,3 +8,12 @@ // DIR: ld{{.*}}libclang_rt.memprof{{.*}}libclang_rt.memprof_cxx // OFF-NOT: "-fmemory-profile" // OFF-NOT: libclang_rt.memprof + +// RUN: %clangxx -target x86_64-linux-gnu -fmemory-profile-use=foo %s -### 2>&1 | FileCheck %s --check-prefix=USE +// USE: "-cc1" {{.*}} "-fmemory-profile-use=foo" + +// RUN: %clangxx -target x86_64-linux-gnu -fmemory-profile -fmemory-profile-use=foo %s -### 2>&1 | FileCheck %s --check-prefix=CONFLICTWITHMEMPROFINSTR +// CONFLICTWITHMEMPROFINSTR: error: invalid argument '-fmemory-profile-use=foo' not allowed with '-fmemory-profile' + +// RUN: %clangxx -target x86_64-linux-gnu -fprofile-generate -fmemory-profile-use=foo %s -### 2>&1 | FileCheck %s --check-prefix=CONFLICTWITHPGOINSTR +// CONFLICTWITHPGOINSTR: error: invalid argument '-fmemory-profile-use=foo' not allowed with '-fprofile-generate' diff --git a/llvm/include/llvm/Support/PGOOptions.h b/llvm/include/llvm/Support/PGOOptions.h --- a/llvm/include/llvm/Support/PGOOptions.h +++ b/llvm/include/llvm/Support/PGOOptions.h @@ -28,7 +28,7 @@ enum PGOAction { NoAction, IRInstr, IRUse, SampleUse }; enum CSPGOAction { NoCSAction, CSIRInstr, CSIRUse }; PGOOptions(std::string ProfileFile, std::string CSProfileGenFile, - std::string ProfileRemappingFile, + std::string ProfileRemappingFile, std::string MemoryProfile, IntrusiveRefCntPtr FS, PGOAction Action = NoAction, CSPGOAction CSAction = NoCSAction, bool DebugInfoForProfiling = false, @@ -40,6 +40,7 @@ std::string ProfileFile; std::string CSProfileGenFile; std::string ProfileRemappingFile; + std::string MemoryProfile; PGOAction Action; CSPGOAction CSAction; bool DebugInfoForProfiling; diff --git a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h --- a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h +++ b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h @@ -12,6 +12,7 @@ #ifndef LLVM_TRANSFORMS_INSTRUMENTATION_MEMPROFILER_H #define LLVM_TRANSFORMS_INSTRUMENTATION_MEMPROFILER_H +#include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/IR/PassManager.h" namespace llvm { @@ -20,6 +21,10 @@ class Module; class ModulePass; +namespace vfs { +class FileSystem; +} // namespace vfs + /// Public interface to the memory profiler pass for instrumenting code to /// profile memory accesses. /// @@ -43,6 +48,17 @@ static bool isRequired() { return true; } }; +class MemProfUsePass : public PassInfoMixin { +public: + explicit MemProfUsePass(std::string MemoryProfileFile, + IntrusiveRefCntPtr FS = nullptr); + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + +private: + std::string MemoryProfileFileName; + IntrusiveRefCntPtr FS; +}; + } // namespace llvm #endif diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -236,20 +236,21 @@ auto FS = vfs::getRealFileSystem(); std::optional PGOOpt; if (!Conf.SampleProfile.empty()) - PGOOpt = PGOOptions(Conf.SampleProfile, "", Conf.ProfileRemapping, FS, - PGOOptions::SampleUse, PGOOptions::NoCSAction, true); + PGOOpt = PGOOptions(Conf.SampleProfile, "", Conf.ProfileRemapping, + /*MemoryProfile=*/"", FS, PGOOptions::SampleUse, + PGOOptions::NoCSAction, true); else if (Conf.RunCSIRInstr) { - PGOOpt = PGOOptions("", Conf.CSIRProfile, Conf.ProfileRemapping, FS, - PGOOptions::IRUse, PGOOptions::CSIRInstr, - Conf.AddFSDiscriminator); + PGOOpt = PGOOptions("", Conf.CSIRProfile, Conf.ProfileRemapping, + /*MemoryProfile=*/"", FS, PGOOptions::IRUse, + PGOOptions::CSIRInstr, Conf.AddFSDiscriminator); } else if (!Conf.CSIRProfile.empty()) { - PGOOpt = PGOOptions(Conf.CSIRProfile, "", Conf.ProfileRemapping, FS, - PGOOptions::IRUse, PGOOptions::CSIRUse, - Conf.AddFSDiscriminator); + PGOOpt = PGOOptions(Conf.CSIRProfile, "", Conf.ProfileRemapping, + /*MemoryProfile=*/"", FS, PGOOptions::IRUse, + PGOOptions::CSIRUse, Conf.AddFSDiscriminator); NoPGOWarnMismatch = !Conf.PGOWarnMismatch; } else if (Conf.AddFSDiscriminator) { - PGOOpt = PGOOptions("", "", "", nullptr, PGOOptions::NoAction, - PGOOptions::NoCSAction, true); + PGOOpt = PGOOptions("", "", "", /*MemoryProfile=*/"", nullptr, + PGOOptions::NoAction, PGOOptions::NoCSAction, true); } TM->setPGOOption(PGOOpt); diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1071,6 +1071,23 @@ "MemorySSAPrinterPass"); } +Expected parseMemProfUsePassOptions(StringRef Params) { + std::string Result; + while (!Params.empty()) { + StringRef ParamName; + std::tie(ParamName, Params) = Params.split(';'); + + if (ParamName.consume_front("profile-filename=")) { + Result = ParamName.str(); + } else { + return make_error( + formatv("invalid MemProfUse pass parameter '{0}' ", ParamName).str(), + inconvertibleErrorCode()); + } + } + return Result; +} + } // namespace /// Tests whether a pass name starts with a valid prefix for a default pipeline diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -1102,6 +1102,10 @@ PGOOpt->CSAction == PGOOptions::CSIRInstr) MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile)); + if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink && + !PGOOpt->MemoryProfile.empty()) + MPM.addPass(MemProfUsePass(PGOOpt->MemoryProfile, PGOOpt->FS)); + // Synthesize function entry counts for non-PGO compilation. if (EnableSyntheticCounts && !PGOOpt) MPM.addPass(SyntheticCountsPropagation()); diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -183,6 +183,13 @@ }, parseEmbedBitcodePassOptions, "thinlto;emit-summary") +MODULE_PASS_WITH_PARAMS("memprof-use", + "MemProfUsePass", + [](std::string Opts) { + return MemProfUsePass(Opts); + }, + parseMemProfUsePassOptions, + "profile-filename=S") #undef MODULE_PASS_WITH_PARAMS #ifndef CGSCC_ANALYSIS diff --git a/llvm/lib/Support/PGOOptions.cpp b/llvm/lib/Support/PGOOptions.cpp --- a/llvm/lib/Support/PGOOptions.cpp +++ b/llvm/lib/Support/PGOOptions.cpp @@ -13,12 +13,13 @@ PGOOptions::PGOOptions(std::string ProfileFile, std::string CSProfileGenFile, std::string ProfileRemappingFile, + std::string MemoryProfile, IntrusiveRefCntPtr FS, PGOAction Action, CSPGOAction CSAction, bool DebugInfoForProfiling, bool PseudoProbeForProfiling) : ProfileFile(ProfileFile), CSProfileGenFile(CSProfileGenFile), - ProfileRemappingFile(ProfileRemappingFile), Action(Action), - CSAction(CSAction), + ProfileRemappingFile(ProfileRemappingFile), MemoryProfile(MemoryProfile), + Action(Action), CSAction(CSAction), DebugInfoForProfiling(DebugInfoForProfiling || (Action == SampleUse && !PseudoProbeForProfiling)), PseudoProbeForProfiling(PseudoProbeForProfiling), FS(std::move(FS)) { @@ -36,13 +37,18 @@ // a profile. assert(this->CSAction != CSIRUse || this->Action == IRUse); - // If neither Action nor CSAction, DebugInfoForProfiling or - // PseudoProbeForProfiling needs to be true. + // Cannot optimize with MemProf profile during IR instrumentation. + assert(this->MemoryProfile.empty() || this->Action != PGOOptions::IRInstr); + + // If neither Action nor CSAction nor MemoryProfile are set, + // DebugInfoForProfiling or PseudoProbeForProfiling needs to be true. assert(this->Action != NoAction || this->CSAction != NoCSAction || - this->DebugInfoForProfiling || this->PseudoProbeForProfiling); + !this->MemoryProfile.empty() || this->DebugInfoForProfiling || + this->PseudoProbeForProfiling); // If we need to use the profile, the VFS cannot be nullptr. - assert(this->FS || !(this->Action == IRUse || this->CSAction == CSIRUse)); + assert(this->FS || !(this->Action == IRUse || this->CSAction == CSIRUse || + !this->MemoryProfile.empty())); } PGOOptions::PGOOptions(const PGOOptions &) = default; diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp --- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp @@ -18,9 +18,12 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/MemoryProfileInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Constant.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/IRBuilder.h" @@ -30,16 +33,29 @@ #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/ProfileData/InstrProf.h" +#include "llvm/ProfileData/InstrProfReader.h" +#include "llvm/Support/BLAKE3.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/HashBuilder.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/TargetParser/Triple.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/ModuleUtils.h" +#include +#include using namespace llvm; +using namespace llvm::memprof; #define DEBUG_TYPE "memprof" +namespace llvm { +extern cl::opt PGOWarnMissing; +extern cl::opt NoPGOWarnMismatch; +extern cl::opt NoPGOWarnMismatchComdatWeak; +} // namespace llvm + constexpr int LLVM_MEM_PROFILER_VERSION = 1; // Size of memory mapped to a single shadow location. @@ -128,6 +144,7 @@ STATISTIC(NumInstrumentedWrites, "Number of instrumented writes"); STATISTIC(NumSkippedStackReads, "Number of non-instrumented stack reads"); STATISTIC(NumSkippedStackWrites, "Number of non-instrumented stack writes"); +STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile."); namespace { @@ -601,3 +618,299 @@ return FunctionModified; } + +static void addCallsiteMetadata(Instruction &I, + std::vector &InlinedCallStack, + LLVMContext &Ctx) { + I.setMetadata(LLVMContext::MD_callsite, + buildCallstackMetadata(InlinedCallStack, Ctx)); +} + +static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset, + uint32_t Column) { + llvm::HashBuilder, llvm::support::endianness::little> + HashBuilder; + HashBuilder.add(Function, LineOffset, Column); + llvm::BLAKE3Result<8> Hash = HashBuilder.final(); + uint64_t Id; + std::memcpy(&Id, Hash.data(), sizeof(Hash)); + return Id; +} + +static uint64_t computeStackId(const memprof::Frame &Frame) { + return computeStackId(Frame.Function, Frame.LineOffset, Frame.Column); +} + +static void addCallStack(CallStackTrie &AllocTrie, + const AllocationInfo *AllocInfo) { + SmallVector StackIds; + for (const auto &StackFrame : AllocInfo->CallStack) + StackIds.push_back(computeStackId(StackFrame)); + auto AllocType = getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(), + AllocInfo->Info.getAllocCount(), + AllocInfo->Info.getTotalLifetime()); + AllocTrie.addCallStack(AllocType, StackIds); +} + +// Helper to compare the InlinedCallStack computed from an instruction's debug +// info to a list of Frames from profile data (either the allocation data or a +// callsite). For callsites, the StartIndex to use in the Frame array may be +// non-zero. +static bool +stackFrameIncludesInlinedCallStack(ArrayRef ProfileCallStack, + ArrayRef InlinedCallStack, + unsigned StartIndex = 0) { + auto StackFrame = ProfileCallStack.begin() + StartIndex; + auto InlCallStackIter = InlinedCallStack.begin(); + for (; StackFrame != ProfileCallStack.end() && + InlCallStackIter != InlinedCallStack.end(); + ++StackFrame, ++InlCallStackIter) { + uint64_t StackId = computeStackId(*StackFrame); + if (StackId != *InlCallStackIter) + return false; + } + // Return true if we found and matched all stack ids from the call + // instruction. + return InlCallStackIter == InlinedCallStack.end(); +} + +static bool readMemprof(Module &M, Function &F, + IndexedInstrProfReader *MemProfReader, + const TargetLibraryInfo &TLI) { + auto &Ctx = M.getContext(); + + auto FuncName = getPGOFuncName(F); + auto FuncGUID = Function::getGUID(FuncName); + Expected MemProfResult = + MemProfReader->getMemProfRecord(FuncGUID); + if (Error E = MemProfResult.takeError()) { + handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { + auto Err = IPE.get(); + bool SkipWarning = false; + LLVM_DEBUG(dbgs() << "Error in reading profile for Func " << FuncName + << ": "); + if (Err == instrprof_error::unknown_function) { + NumOfMemProfMissing++; + SkipWarning = !PGOWarnMissing; + LLVM_DEBUG(dbgs() << "unknown function"); + } else if (Err == instrprof_error::hash_mismatch) { + SkipWarning = + NoPGOWarnMismatch || + (NoPGOWarnMismatchComdatWeak && + (F.hasComdat() || + F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); + LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")"); + } + + if (SkipWarning) + return; + + std::string Msg = (IPE.message() + Twine(" ") + F.getName().str() + + Twine(" Hash = ") + std::to_string(FuncGUID)) + .str(); + + Ctx.diagnose( + DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning)); + }); + return false; + } + + // Build maps of the location hash to all profile data with that leaf location + // (allocation info and the callsites). + std::map> LocHashToAllocInfo; + // For the callsites we need to record the index of the associated frame in + // the frame array (see comments below where the map entries are added). + std::map *, unsigned>>> + LocHashToCallSites; + const auto MemProfRec = std::move(MemProfResult.get()); + for (auto &AI : MemProfRec.AllocSites) { + // Associate the allocation info with the leaf frame. The later matching + // code will match any inlined call sequences in the IR with a longer prefix + // of call stack frames. + uint64_t StackId = computeStackId(AI.CallStack[0]); + LocHashToAllocInfo[StackId].insert(&AI); + } + for (auto &CS : MemProfRec.CallSites) { + // Need to record all frames from leaf up to and including this function, + // as any of these may or may not have been inlined at this point. + unsigned Idx = 0; + for (auto &StackFrame : CS) { + uint64_t StackId = computeStackId(StackFrame); + LocHashToCallSites[StackId].insert(std::make_pair(&CS, Idx++)); + // Once we find this function, we can stop recording. + if (StackFrame.Function == FuncGUID) + break; + } + assert(Idx <= CS.size() && CS[Idx - 1].Function == FuncGUID); + } + + auto GetOffset = [](const DILocation *DIL) { + return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) & + 0xffff; + }; + + // Now walk the instructions, looking up the associated profile data using + // dbug locations. + for (auto &BB : F) { + for (auto &I : BB) { + if (I.isDebugOrPseudoInst()) + continue; + // We are only interested in calls (allocation or interior call stack + // context calls). + auto *CI = dyn_cast(&I); + if (!CI) + continue; + auto *CalledFunction = CI->getCalledFunction(); + if (CalledFunction && CalledFunction->isIntrinsic()) + continue; + // List of call stack ids computed from the location hashes on debug + // locations (leaf to inlined at root). + std::vector InlinedCallStack; + // Was the leaf location found in one of the profile maps? + bool LeafFound = false; + // If leaf was found in a map, iterators pointing to its location in both + // of the maps. It might exist in neither, one, or both (the latter case + // can happen because we don't currently have discriminators to + // distinguish the case when a single line/col maps to both an allocation + // and another callsite). + std::map>::iterator + AllocInfoIter; + std::map *, + unsigned>>>::iterator CallSitesIter; + for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr; + DIL = DIL->getInlinedAt()) { + // Use C++ linkage name if possible. Need to compile with + // -fdebug-info-for-profiling to get linkage name. + StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName(); + if (Name.empty()) + Name = DIL->getScope()->getSubprogram()->getName(); + auto CalleeGUID = Function::getGUID(Name); + auto StackId = + computeStackId(CalleeGUID, GetOffset(DIL), DIL->getColumn()); + // LeafFound will only be false on the first iteration, since we either + // set it true or break out of the loop below. + if (!LeafFound) { + AllocInfoIter = LocHashToAllocInfo.find(StackId); + CallSitesIter = LocHashToCallSites.find(StackId); + // Check if the leaf is in one of the maps. If not, no need to look + // further at this call. + if (AllocInfoIter == LocHashToAllocInfo.end() && + CallSitesIter == LocHashToCallSites.end()) + break; + LeafFound = true; + } + InlinedCallStack.push_back(StackId); + } + // If leaf not in either of the maps, skip inst. + if (!LeafFound) + continue; + + // First add !memprof metadata from allocation info, if we found the + // instruction's leaf location in that map, and if the rest of the + // instruction's locations match the prefix Frame locations on an + // allocation context with the same leaf. + if (AllocInfoIter != LocHashToAllocInfo.end()) { + // Only consider allocations via new, to reduce unnecessary metadata, + // since those are the only allocations that will be targeted initially. + if (!isNewLikeFn(CI, &TLI)) + continue; + // We may match this instruction's location list to multiple MIB + // contexts. Add them to a Trie specialized for trimming the contexts to + // the minimal needed to disambiguate contexts with unique behavior. + CallStackTrie AllocTrie; + for (auto *AllocInfo : AllocInfoIter->second) { + // Check the full inlined call stack against this one. + // If we found and thus matched all frames on the call, include + // this MIB. + if (stackFrameIncludesInlinedCallStack(AllocInfo->CallStack, + InlinedCallStack)) + addCallStack(AllocTrie, AllocInfo); + } + // We might not have matched any to the full inlined call stack. + // But if we did, create and attach metadata, or a function attribute if + // all contexts have identical profiled behavior. + if (!AllocTrie.empty()) { + // MemprofMDAttached will be false if a function attribute was + // attached. + bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI); + assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof)); + if (MemprofMDAttached) { + // Add callsite metadata for the instruction's location list so that + // it simpler later on to identify which part of the MIB contexts + // are from this particular instruction (including during inlining, + // when the callsite metdata will be updated appropriately). + // FIXME: can this be changed to strip out the matching stack + // context ids from the MIB contexts and not add any callsite + // metadata here to save space? + addCallsiteMetadata(I, InlinedCallStack, Ctx); + } + } + continue; + } + + // Otherwise, add callsite metadata. If we reach here then we found the + // instruction's leaf location in the callsites map and not the allocation + // map. + assert(CallSitesIter != LocHashToCallSites.end()); + for (auto CallStackIdx : CallSitesIter->second) { + // If we found and thus matched all frames on the call, create and + // attach call stack metadata. + if (stackFrameIncludesInlinedCallStack( + *CallStackIdx.first, InlinedCallStack, CallStackIdx.second)) { + addCallsiteMetadata(I, InlinedCallStack, Ctx); + // Only need to find one with a matching call stack and add a single + // callsite metadata. + break; + } + } + } + } + + return true; +} + +MemProfUsePass::MemProfUsePass(std::string MemoryProfileFile, + IntrusiveRefCntPtr FS) + : MemoryProfileFileName(MemoryProfileFile), FS(FS) { + if (!FS) + this->FS = vfs::getRealFileSystem(); +} + +PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) { + LLVM_DEBUG(dbgs() << "Read in memory profile:"); + auto &Ctx = M.getContext(); + auto ReaderOrErr = IndexedInstrProfReader::create(MemoryProfileFileName, *FS); + if (Error E = ReaderOrErr.takeError()) { + handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { + Ctx.diagnose( + DiagnosticInfoPGOProfile(MemoryProfileFileName.data(), EI.message())); + }); + return PreservedAnalyses::all(); + } + + std::unique_ptr MemProfReader = + std::move(ReaderOrErr.get()); + if (!MemProfReader) { + Ctx.diagnose(DiagnosticInfoPGOProfile( + MemoryProfileFileName.data(), StringRef("Cannot get MemProfReader"))); + return PreservedAnalyses::all(); + } + + if (!MemProfReader->hasMemoryProfile()) { + Ctx.diagnose(DiagnosticInfoPGOProfile(MemoryProfileFileName.data(), + "Not a memory profile")); + return PreservedAnalyses::all(); + } + + auto &FAM = AM.getResult(M).getManager(); + + for (auto &F : M) { + if (F.isDeclaration()) + continue; + + const TargetLibraryInfo &TLI = FAM.getResult(F); + readMemprof(M, F, MemProfReader.get(), TLI); + } + + return PreservedAnalyses::none(); +} diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -62,8 +62,6 @@ #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Analysis/MemoryProfileInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -97,7 +95,6 @@ #include "llvm/IR/Value.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/InstrProfReader.h" -#include "llvm/Support/BLAKE3.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/CRC.h" #include "llvm/Support/Casting.h" @@ -107,7 +104,6 @@ #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GraphWriter.h" -#include "llvm/Support/HashBuilder.h" #include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TargetParser/Triple.h" @@ -120,18 +116,15 @@ #include #include #include -#include #include #include #include -#include #include #include #include #include using namespace llvm; -using namespace llvm::memprof; using ProfileCount = Function::ProfileCount; using VPCandidateInfo = ValueProfileCollector::CandidateInfo; @@ -146,7 +139,6 @@ STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts."); STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile."); STATISTIC(NumOfPGOMissing, "Number of functions without profile."); -STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile."); STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations."); STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO."); STATISTIC(NumOfCSPGOSelectInsts, @@ -203,31 +195,31 @@ cl::desc("Append function hash to the name of COMDAT function to avoid " "function hash mismatch due to the preinliner")); +namespace llvm { // Command line option to enable/disable the warning about missing profile // information. -static cl::opt - PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden, - cl::desc("Use this option to turn on/off " - "warnings about missing profile data for " - "functions.")); +cl::opt PGOWarnMissing("pgo-warn-missing-function", cl::init(false), + cl::Hidden, + cl::desc("Use this option to turn on/off " + "warnings about missing profile data for " + "functions.")); -namespace llvm { // Command line option to enable/disable the warning about a hash mismatch in // the profile data. cl::opt NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden, cl::desc("Use this option to turn off/on " "warnings about profile cfg mismatch.")); -} // namespace llvm // Command line option to enable/disable the warning about a hash mismatch in // the profile data for Comdat functions, which often turns out to be false // positive due to the pre-instrumentation inline. -static cl::opt NoPGOWarnMismatchComdatWeak( +cl::opt NoPGOWarnMismatchComdatWeak( "no-pgo-warn-mismatch-comdat-weak", cl::init(true), cl::Hidden, cl::desc("The option is used to turn on/off " "warnings about hash mismatch for comdat " "or weak functions.")); +} // namespace llvm // Command line option to enable/disable select instruction instrumentation. static cl::opt @@ -321,10 +313,6 @@ "pgo-function-size-threshold", cl::Hidden, cl::desc("Do not instrument functions smaller than this threshold.")); -static cl::opt MatchMemProf( - "pgo-match-memprof", cl::init(true), cl::Hidden, - cl::desc("Perform matching and annotation of memprof profiles.")); - static cl::opt PGOFunctionCriticalEdgeThreshold( "pgo-critical-edge-threshold", cl::init(20000), cl::Hidden, cl::desc("Do not instrument functions with the number of critical edges " @@ -1081,9 +1069,6 @@ bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, InstrProfRecord::CountPseudoKind &PseudoKind); - // Read memprof data for the instrumented function from profile. - bool readMemprof(IndexedInstrProfReader *PGOReader); - // Populate the counts for all BBs. void populateCounters(); @@ -1303,257 +1288,6 @@ F.setMetadata(LLVMContext::MD_annotation, MD); } -static void addCallsiteMetadata(Instruction &I, - std::vector &InlinedCallStack, - LLVMContext &Ctx) { - I.setMetadata(LLVMContext::MD_callsite, - buildCallstackMetadata(InlinedCallStack, Ctx)); -} - -static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset, - uint32_t Column) { - llvm::HashBuilder, llvm::support::endianness::little> - HashBuilder; - HashBuilder.add(Function, LineOffset, Column); - llvm::BLAKE3Result<8> Hash = HashBuilder.final(); - uint64_t Id; - std::memcpy(&Id, Hash.data(), sizeof(Hash)); - return Id; -} - -static uint64_t computeStackId(const memprof::Frame &Frame) { - return computeStackId(Frame.Function, Frame.LineOffset, Frame.Column); -} - -static void addCallStack(CallStackTrie &AllocTrie, - const AllocationInfo *AllocInfo) { - SmallVector StackIds; - for (const auto &StackFrame : AllocInfo->CallStack) - StackIds.push_back(computeStackId(StackFrame)); - auto AllocType = getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(), - AllocInfo->Info.getAllocCount(), - AllocInfo->Info.getTotalLifetime()); - AllocTrie.addCallStack(AllocType, StackIds); -} - -// Helper to compare the InlinedCallStack computed from an instruction's debug -// info to a list of Frames from profile data (either the allocation data or a -// callsite). For callsites, the StartIndex to use in the Frame array may be -// non-zero. -static bool -stackFrameIncludesInlinedCallStack(ArrayRef ProfileCallStack, - ArrayRef InlinedCallStack, - unsigned StartIndex = 0) { - auto StackFrame = ProfileCallStack.begin() + StartIndex; - auto InlCallStackIter = InlinedCallStack.begin(); - for (; StackFrame != ProfileCallStack.end() && - InlCallStackIter != InlinedCallStack.end(); - ++StackFrame, ++InlCallStackIter) { - uint64_t StackId = computeStackId(*StackFrame); - if (StackId != *InlCallStackIter) - return false; - } - // Return true if we found and matched all stack ids from the call - // instruction. - return InlCallStackIter == InlinedCallStack.end(); -} - -bool PGOUseFunc::readMemprof(IndexedInstrProfReader *PGOReader) { - if (!MatchMemProf) - return true; - - auto &Ctx = M->getContext(); - - auto FuncGUID = Function::getGUID(FuncInfo.FuncName); - Expected MemProfResult = - PGOReader->getMemProfRecord(FuncGUID); - if (Error E = MemProfResult.takeError()) { - handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { - auto Err = IPE.get(); - bool SkipWarning = false; - LLVM_DEBUG(dbgs() << "Error in reading profile for Func " - << FuncInfo.FuncName << ": "); - if (Err == instrprof_error::unknown_function) { - NumOfMemProfMissing++; - SkipWarning = !PGOWarnMissing; - LLVM_DEBUG(dbgs() << "unknown function"); - } else if (Err == instrprof_error::hash_mismatch) { - SkipWarning = - NoPGOWarnMismatch || - (NoPGOWarnMismatchComdatWeak && - (F.hasComdat() || - F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); - LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")"); - } - - if (SkipWarning) - return; - - std::string Msg = - (IPE.message() + Twine(" ") + F.getName().str() + Twine(" Hash = ") + - std::to_string(FuncInfo.FunctionHash)) - .str(); - - Ctx.diagnose( - DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); - }); - return false; - } - - // Build maps of the location hash to all profile data with that leaf location - // (allocation info and the callsites). - std::map> LocHashToAllocInfo; - // For the callsites we need to record the index of the associated frame in - // the frame array (see comments below where the map entries are added). - std::map *, unsigned>>> - LocHashToCallSites; - const auto MemProfRec = std::move(MemProfResult.get()); - for (auto &AI : MemProfRec.AllocSites) { - // Associate the allocation info with the leaf frame. The later matching - // code will match any inlined call sequences in the IR with a longer prefix - // of call stack frames. - uint64_t StackId = computeStackId(AI.CallStack[0]); - LocHashToAllocInfo[StackId].insert(&AI); - } - for (auto &CS : MemProfRec.CallSites) { - // Need to record all frames from leaf up to and including this function, - // as any of these may or may not have been inlined at this point. - unsigned Idx = 0; - for (auto &StackFrame : CS) { - uint64_t StackId = computeStackId(StackFrame); - LocHashToCallSites[StackId].insert(std::make_pair(&CS, Idx++)); - // Once we find this function, we can stop recording. - if (StackFrame.Function == FuncGUID) - break; - } - assert(Idx <= CS.size() && CS[Idx - 1].Function == FuncGUID); - } - - auto GetOffset = [](const DILocation *DIL) { - return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) & - 0xffff; - }; - - // Now walk the instructions, looking up the associated profile data using - // dbug locations. - for (auto &BB : F) { - for (auto &I : BB) { - if (I.isDebugOrPseudoInst()) - continue; - // We are only interested in calls (allocation or interior call stack - // context calls). - auto *CI = dyn_cast(&I); - if (!CI) - continue; - auto *CalledFunction = CI->getCalledFunction(); - if (CalledFunction && CalledFunction->isIntrinsic()) - continue; - // List of call stack ids computed from the location hashes on debug - // locations (leaf to inlined at root). - std::vector InlinedCallStack; - // Was the leaf location found in one of the profile maps? - bool LeafFound = false; - // If leaf was found in a map, iterators pointing to its location in both - // of the maps. It might exist in neither, one, or both (the latter case - // can happen because we don't currently have discriminators to - // distinguish the case when a single line/col maps to both an allocation - // and another callsite). - std::map>::iterator - AllocInfoIter; - std::map *, - unsigned>>>::iterator CallSitesIter; - for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr; - DIL = DIL->getInlinedAt()) { - // Use C++ linkage name if possible. Need to compile with - // -fdebug-info-for-profiling to get linkage name. - StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName(); - if (Name.empty()) - Name = DIL->getScope()->getSubprogram()->getName(); - auto CalleeGUID = Function::getGUID(Name); - auto StackId = - computeStackId(CalleeGUID, GetOffset(DIL), DIL->getColumn()); - // LeafFound will only be false on the first iteration, since we either - // set it true or break out of the loop below. - if (!LeafFound) { - AllocInfoIter = LocHashToAllocInfo.find(StackId); - CallSitesIter = LocHashToCallSites.find(StackId); - // Check if the leaf is in one of the maps. If not, no need to look - // further at this call. - if (AllocInfoIter == LocHashToAllocInfo.end() && - CallSitesIter == LocHashToCallSites.end()) - break; - LeafFound = true; - } - InlinedCallStack.push_back(StackId); - } - // If leaf not in either of the maps, skip inst. - if (!LeafFound) - continue; - - // First add !memprof metadata from allocation info, if we found the - // instruction's leaf location in that map, and if the rest of the - // instruction's locations match the prefix Frame locations on an - // allocation context with the same leaf. - if (AllocInfoIter != LocHashToAllocInfo.end()) { - // Only consider allocations via new, to reduce unnecessary metadata, - // since those are the only allocations that will be targeted initially. - if (!isNewLikeFn(CI, &FuncInfo.TLI)) - continue; - // We may match this instruction's location list to multiple MIB - // contexts. Add them to a Trie specialized for trimming the contexts to - // the minimal needed to disambiguate contexts with unique behavior. - CallStackTrie AllocTrie; - for (auto *AllocInfo : AllocInfoIter->second) { - // Check the full inlined call stack against this one. - // If we found and thus matched all frames on the call, include - // this MIB. - if (stackFrameIncludesInlinedCallStack(AllocInfo->CallStack, - InlinedCallStack)) - addCallStack(AllocTrie, AllocInfo); - } - // We might not have matched any to the full inlined call stack. - // But if we did, create and attach metadata, or a function attribute if - // all contexts have identical profiled behavior. - if (!AllocTrie.empty()) { - // MemprofMDAttached will be false if a function attribute was - // attached. - bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI); - assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof)); - if (MemprofMDAttached) { - // Add callsite metadata for the instruction's location list so that - // it simpler later on to identify which part of the MIB contexts - // are from this particular instruction (including during inlining, - // when the callsite metdata will be updated appropriately). - // FIXME: can this be changed to strip out the matching stack - // context ids from the MIB contexts and not add any callsite - // metadata here to save space? - addCallsiteMetadata(I, InlinedCallStack, Ctx); - } - } - continue; - } - - // Otherwise, add callsite metadata. If we reach here then we found the - // instruction's leaf location in the callsites map and not the allocation - // map. - assert(CallSitesIter != LocHashToCallSites.end()); - for (auto CallStackIdx : CallSitesIter->second) { - // If we found and thus matched all frames on the call, create and - // attach call stack metadata. - if (stackFrameIncludesInlinedCallStack( - *CallStackIdx.first, InlinedCallStack, CallStackIdx.second)) { - addCallsiteMetadata(I, InlinedCallStack, Ctx); - // Only need to find one with a matching call stack and add a single - // callsite metadata. - break; - } - } - } - } - - return true; -} - void PGOUseFunc::handleInstrProfError(Error Err, uint64_t MismatchedFuncSum) { handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) { auto &Ctx = M->getContext(); @@ -2262,7 +1996,7 @@ return false; // TODO: might need to change the warning once the clang option is finalized. - if (!PGOReader->isIRLevelProfile() && !PGOReader->hasMemoryProfile()) { + if (!PGOReader->isIRLevelProfile()) { Ctx.diagnose(DiagnosticInfoPGOProfile( ProfileFileName.data(), "Not an IR level instrumentation profile")); return false; @@ -2307,14 +2041,6 @@ } PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS, InstrumentFuncEntry, HasSingleByteCoverage); - // Read and match memprof first since we do this via debug info and can - // match even if there is an IR mismatch detected for regular PGO below. - if (PGOReader->hasMemoryProfile()) - Func.readMemprof(PGOReader.get()); - - if (!PGOReader->isIRLevelProfile()) - continue; - if (HasSingleByteCoverage) { Func.populateCoverage(PGOReader.get()); continue; diff --git a/llvm/test/Transforms/PGOProfile/memprof.ll b/llvm/test/Transforms/PGOProfile/memprof.ll --- a/llvm/test/Transforms/PGOProfile/memprof.ll +++ b/llvm/test/Transforms/PGOProfile/memprof.ll @@ -23,19 +23,36 @@ ; ALL-NOT: memprof record not found for function hash ; ALL-NOT: no profile data available for function -;; Feed back memprof-only profile -; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.memprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,MEMPROFONLY +;; Using a memprof-only profile for memprof-use should only give memprof metadata +; RUN: opt < %s -passes='memprof-use' -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,MEMPROFONLY ; There should not be any PGO metadata ; MEMPROFONLY-NOT: !prof -;; Feed back pgo-only profile -; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.pgoprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=PGO,ALL,PGOONLY +;; Test the same thing but by passing the memory profile through to a default +;; pipeline via -memory-profile-file=, which should cause the necessary field +;; of the PGOOptions structure to be populated with the profile filename. +; RUN: opt < %s -passes='default' -memory-profile-file=%t.memprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,MEMPROFONLY + +;; Using a pgo+memprof profile for memprof-use should only give memprof metadata +; RUN: opt < %s -passes='memprof-use' -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,MEMPROFONLY + +;; Using a pgo-only profile for memprof-use should give an error +; RUN: not opt < %s -passes='memprof-use' -S 2>&1 | FileCheck %s --check-prefixes=MEMPROFWITHPGOONLY +; MEMPROFWITHPGOONLY: Not a memory profile + +;; Using a memprof-only profile for pgo-instr-use should give an error +; RUN: not opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.memprofdata -S 2>&1 | FileCheck %s --check-prefixes=PGOWITHMEMPROFONLY +; PGOWITHMEMPROFONLY: Not an IR level instrumentation profile + +;; Using a pgo+memprof profile for pgo-instr-use should only give pgo metadata +; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.pgomemprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=PGO,ALL,PGOONLY ; There should not be any memprof related metadata ; PGOONLY-NOT: !memprof ; PGOONLY-NOT: !callsite -;; Feed back pgo+memprof-only profile -; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.pgomemprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,PGO,ALL +;; Using a pgo+memprof profile for both memprof-use and pgo-instr-use should +;; give both memprof and pgo metadata. +; RUN: opt < %s -passes='pgo-instr-use,memprof-use' -pgo-test-profile-file=%t.pgomemprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,PGO ; ModuleID = 'memprof.cc' source_filename = "memprof.cc" diff --git a/llvm/test/Transforms/PGOProfile/memprofmissingfunc.ll b/llvm/test/Transforms/PGOProfile/memprofmissingfunc.ll --- a/llvm/test/Transforms/PGOProfile/memprofmissingfunc.ll +++ b/llvm/test/Transforms/PGOProfile/memprofmissingfunc.ll @@ -11,7 +11,7 @@ ; RUN: llvm-profdata merge %S/Inputs/memprof.memprofraw --profiled-binary %S/Inputs/memprof.exe -o %t.memprofdata -; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.memprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s +; RUN: opt < %s -passes='memprof-use' -pgo-warn-missing-function -S 2>&1 | FileCheck %s ; CHECK: memprof record not found for function hash {{.*}} _Z16funcnotinprofilev diff --git a/llvm/tools/opt/NewPMDriver.cpp b/llvm/tools/opt/NewPMDriver.cpp --- a/llvm/tools/opt/NewPMDriver.cpp +++ b/llvm/tools/opt/NewPMDriver.cpp @@ -176,6 +176,9 @@ "Use sampled profile to guide PGO."))); static cl::opt ProfileFile("profile-file", cl::desc("Path to the profile."), cl::Hidden); +static cl::opt + MemoryProfileFile("memory-profile-file", + cl::desc("Path to the memory profile."), cl::Hidden); static cl::opt CSPGOKindFlag( "cspgo-kind", cl::init(NoCSPGO), cl::Hidden, @@ -336,19 +339,21 @@ std::optional P; switch (PGOKindFlag) { case InstrGen: - P = PGOOptions(ProfileFile, "", "", FS, PGOOptions::IRInstr); + P = PGOOptions(ProfileFile, "", "", MemoryProfileFile, FS, + PGOOptions::IRInstr); break; case InstrUse: - P = PGOOptions(ProfileFile, "", ProfileRemappingFile, FS, + P = PGOOptions(ProfileFile, "", ProfileRemappingFile, MemoryProfileFile, FS, PGOOptions::IRUse); break; case SampleUse: - P = PGOOptions(ProfileFile, "", ProfileRemappingFile, FS, + P = PGOOptions(ProfileFile, "", ProfileRemappingFile, MemoryProfileFile, FS, PGOOptions::SampleUse); break; case NoPGO: - if (DebugInfoForProfiling || PseudoProbeForProfiling) - P = PGOOptions("", "", "", nullptr, PGOOptions::NoAction, + if (DebugInfoForProfiling || PseudoProbeForProfiling || + !MemoryProfileFile.empty()) + P = PGOOptions("", "", "", MemoryProfileFile, FS, PGOOptions::NoAction, PGOOptions::NoCSAction, DebugInfoForProfiling, PseudoProbeForProfiling); else @@ -369,8 +374,9 @@ P->CSAction = PGOOptions::CSIRInstr; P->CSProfileGenFile = CSProfileGenFile; } else - P = PGOOptions("", CSProfileGenFile, ProfileRemappingFile, FS, - PGOOptions::NoAction, PGOOptions::CSIRInstr); + P = PGOOptions("", CSProfileGenFile, ProfileRemappingFile, + /*MemoryProfile=*/"", FS, PGOOptions::NoAction, + PGOOptions::CSIRInstr); } else /* CSPGOKindFlag == CSInstrUse */ { if (!P) { errs() << "CSInstrUse needs to be together with InstrUse";