diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp --- a/llvm/lib/ProfileData/MemProf.cpp +++ b/llvm/lib/ProfileData/MemProf.cpp @@ -2,6 +2,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/IR/Function.h" #include "llvm/ProfileData/InstrProf.h" +#include "llvm/ProfileData/SampleProf.h" #include "llvm/Support/Endian.h" #include "llvm/Support/EndianStream.h" @@ -71,14 +72,17 @@ } GlobalValue::GUID IndexedMemProfRecord::getGUID(const StringRef FunctionName) { - const auto Pos = FunctionName.find(".llvm."); + // Canonicalize the function name to drop suffixes such as ".llvm.", ".uniq." + // etc. We can then match functions in the profile use phase prior to the + // addition of these suffixes. Note that this applies to both instrumented and + // sampled function names. + StringRef CanonicalName = + sampleprof::FunctionSamples::getCanonicalFnName(FunctionName); // We use the function guid which we expect to be a uint64_t. At - // this time, it is the lower 64 bits of the md5 of the function - // name. Any suffix with .llvm. is trimmed since these are added by - // thinLTO global promotion. At the time the profile is consumed, - // these suffixes will not be present. - return Function::getGUID(FunctionName.take_front(Pos)); + // this time, it is the lower 64 bits of the md5 of the canonical + // function name. + return Function::getGUID(CanonicalName); } Expected readMemProfSchema(const unsigned char *&Buffer) { diff --git a/llvm/lib/ProfileData/RawMemProfReader.cpp b/llvm/lib/ProfileData/RawMemProfReader.cpp --- a/llvm/lib/ProfileData/RawMemProfReader.cpp +++ b/llvm/lib/ProfileData/RawMemProfReader.cpp @@ -34,6 +34,7 @@ #include "llvm/ProfileData/MemProf.h" #include "llvm/ProfileData/MemProfData.inc" #include "llvm/ProfileData/RawMemProfReader.h" +#include "llvm/ProfileData/SampleProf.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" @@ -507,12 +508,16 @@ const Frame F(Guid, DIFrame.Line - DIFrame.StartLine, DIFrame.Column, // Only the last entry is not an inlined location. I != NumFrames - 1); - // Here we retain a mapping from the GUID to symbol name instead of - // adding it to the frame object directly to reduce memory overhead. - // This is because there can be many unique frames, particularly for - // callsite frames. - if (KeepSymbolName) - GuidToSymbolName.insert({Guid, DIFrame.FunctionName}); + // Here we retain a mapping from the GUID to canonical symbol name + // instead of adding it to the frame object directly to reduce memory + // overhead. This is because there can be many unique frames, + // particularly for callsite frames. + if (KeepSymbolName) { + StringRef CanonicalName = + sampleprof::FunctionSamples::getCanonicalFnName( + DIFrame.FunctionName); + GuidToSymbolName.insert({Guid, CanonicalName.str()}); + } const FrameId Hash = F.hash(); IdToFrame.insert({Hash, F}); diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp --- a/llvm/unittests/ProfileData/MemProfTest.cpp +++ b/llvm/unittests/ProfileData/MemProfTest.cpp @@ -99,7 +99,7 @@ MATCHER_P4(FrameContains, FunctionName, LineOffset, Column, Inline, "") { const Frame &F = arg; - const uint64_t ExpectedHash = llvm::Function::getGUID(FunctionName); + const uint64_t ExpectedHash = IndexedMemProfRecord::getGUID(FunctionName); if (F.Function != ExpectedHash) { *result_listener << "Hash mismatch"; return false; @@ -147,7 +147,7 @@ specifier(), false)) .Times(1) .WillRepeatedly(Return(makeInliningInfo({ - {"xyz", 10, 5, 30}, + {"xyz.llvm.123", 10, 5, 30}, {"abc", 10, 5, 30}, })));