diff --git a/llvm/test/tools/llvm-profgen/inline-cs-noprobe.test b/llvm/test/tools/llvm-profgen/inline-cs-noprobe.test --- a/llvm/test/tools/llvm-profgen/inline-cs-noprobe.test +++ b/llvm/test/tools/llvm-profgen/inline-cs-noprobe.test @@ -1,12 +1,12 @@ ; RUN: llvm-profgen --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --show-unwinder-output --csprof-cold-thres=0 | FileCheck %s --check-prefix=CHECK-UNWINDER ; RUN: FileCheck %s --input-file %t -; CHECK:[main:1 @ foo]:44:0 +; CHECK:[main:1 @ foo]:309:0 ; CHECK: 2.1: 14 ; CHECK: 3: 15 ; CHECK: 3.1: 14 bar:14 ; CHECK: 3.2: 1 -; CHECK:[main:1 @ foo:3.1 @ bar]:14:0 +; CHECK:[main:1 @ foo:3.1 @ bar]:84:0 ; CHECK: 1: 14 ; CHECK-UNWINDER: Binary(inline-cs-noprobe.perfbin)'s Range Counter: diff --git a/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test b/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test --- a/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test +++ b/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test @@ -1,15 +1,15 @@ ; RUN: llvm-profgen --perfscript=%S/Inputs/noinline-cs-noprobe.perfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --show-unwinder-output --csprof-cold-thres=0 | FileCheck %s --check-prefix=CHECK-UNWINDER ; RUN: FileCheck %s --input-file %t -; CHECK:[main:1 @ foo:3 @ bar]:12:3 +; CHECK:[main:1 @ foo]:54:0 +; CHECK: 2: 3 +; CHECK: 3: 3 bar:3 +; CHECK:[main:1 @ foo:3 @ bar]:50:3 ; CHECK: 0: 3 ; CHECK: 1: 3 ; CHECK: 2: 2 ; CHECK: 4: 1 ; CHECK: 5: 3 -; CHECK:[main:1 @ foo]:6:0 -; CHECK: 2: 3 -; CHECK: 3: 3 bar:3 ; CHECK-UNWINDER: Binary(noinline-cs-noprobe.perfbin)'s Range Counter: ; CHECK-UNWINDER: main:1 @ foo diff --git a/llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test b/llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test --- a/llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test +++ b/llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test @@ -4,38 +4,38 @@ ; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-noprobe.perfscript --binary=%S/Inputs/recursion-compression-noprobe.perfbin --output=%t --csprof-cold-thres=0 ; RUN: FileCheck %s --input-file %t -; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa]:14:0 -; CHECK-UNCOMPRESS: 1: 1 -; CHECK-UNCOMPRESS: 2: 13 fb:11 -; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb]:12:0 +; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb]:48:0 ; CHECK-UNCOMPRESS: 1: 11 ; CHECK-UNCOMPRESS: 2: 1 fa:1 -; CHECK-UNCOMPRESS:[main:1 @ foo]:3:0 +; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa]:24:0 +; CHECK-UNCOMPRESS: 1: 1 +; CHECK-UNCOMPRESS: 2: 13 fb:11 +; CHECK-UNCOMPRESS:[main:1 @ foo]:7:0 ; CHECK-UNCOMPRESS: 2: 1 ; CHECK-UNCOMPRESS: 3: 2 fa:1 -; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb:2 @ fa]:3:0 +; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb:2 @ fa]:7:0 ; CHECK-UNCOMPRESS: 1: 1 ; CHECK-UNCOMPRESS: 2: 2 fb:1 -; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb:2 @ fa:2 @ fb]:1:0 +; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb:2 @ fa:2 @ fb]:2:0 ; CHECK-UNCOMPRESS: 2: 1 fa:1 -; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb:2 @ fa:2 @ fb:2 @ fa]:1:0 +; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb:2 @ fa:2 @ fb:2 @ fa]:2:0 ; CHECK-UNCOMPRESS: 4: 1 -; CHECK: [main:1 @ foo:3 @ fa]:14:0 -; CHECK: 1: 1 -; CHECK: 2: 13 fb:11 -; CHECK: [main:1 @ foo:3 @ fa:2 @ fb]:12:0 +; CHECK: [main:1 @ foo:3 @ fa:2 @ fb]:48:0 ; CHECK: 1: 11 ; CHECK: 2: 1 fa:1 -; CHECK: [main:1 @ foo:3 @ fa:2 @ fb:2 @ fa]:4:0 +; CHECK: [main:1 @ foo:3 @ fa]:24:0 +; CHECK: 1: 1 +; CHECK: 2: 13 fb:11 +; CHECK: [main:1 @ foo:3 @ fa:2 @ fb:2 @ fa]:9:0 ; CHECK: 1: 1 ; CHECK: 2: 2 fb:1 ; CHECK: 4: 1 -; CHECK: [main:1 @ foo]:3:0 +; CHECK: [main:1 @ foo]:7:0 ; CHECK: 2: 1 ; CHECK: 3: 2 fa:1 -; CHECK: [main:1 @ foo:3 @ fa:2 @ fb:2 @ fa:2 @ fb]:0:0 +; CHECK: [main:1 @ foo:3 @ fa:2 @ fb:2 @ fa:2 @ fb]:1:0 ; original code: diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp --- a/llvm/tools/llvm-profgen/PerfReader.cpp +++ b/llvm/tools/llvm-profgen/PerfReader.cpp @@ -93,6 +93,8 @@ std::shared_ptr KeyStr = std::make_shared(); KeyStr->Context = Binary->getExpandedContextStr(Stack); + if (KeyStr->Context.empty()) + return nullptr; KeyStr->genHashCode(); return KeyStr; } @@ -116,6 +118,8 @@ return; std::shared_ptr Key = Stack.getContextKey(); + if (Key == nullptr) + return; auto Ret = CtxCounterMap->emplace(Hashable(Key), SampleCounter()); SampleCounter &SCounter = Ret.first->second; for (auto &Item : Cur->RangeSamples) { diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -212,7 +212,6 @@ FunctionProfile.addBodySamples(LeafLoc.second.LineOffset, LeafLoc.second.Discriminator, Count - PreviousCount); - FunctionProfile.addTotalSamples(Count - PreviousCount); } } @@ -242,9 +241,13 @@ while (IP.Address <= RangeEnd) { uint64_t Offset = Binary->virtualAddrToOffset(IP.Address); - const FrameLocation &LeafLoc = Binary->getInlineLeafFrameLoc(Offset); - // Recording body sample for this specific context - updateBodySamplesforFunctionProfile(FunctionProfile, LeafLoc, Count); + auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset); + if (LeafLoc.hasValue()) { + // Recording body sample for this specific context + updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count); + } + // Accumulate total sample count even it's a line with invalid debug info + FunctionProfile.addTotalSamples(Count); // Move to next IP within the range IP.advance(); } @@ -266,9 +269,11 @@ continue; // Record called target sample and its count - const FrameLocation &LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset); - FunctionProfile.addCalledTargetSamples(LeafLoc.second.LineOffset, - LeafLoc.second.Discriminator, + auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset); + if (!LeafLoc.hasValue()) + continue; + FunctionProfile.addCalledTargetSamples(LeafLoc->second.LineOffset, + LeafLoc->second.Discriminator, CalleeName, Count); // Record head sample for called target(callee) @@ -277,7 +282,7 @@ OCalleeCtxStr << ContextId.rsplit(" @ ").first.str(); OCalleeCtxStr << " @ "; } - OCalleeCtxStr << getCallSite(LeafLoc) << " @ " << CalleeName.str(); + OCalleeCtxStr << getCallSite(*LeafLoc) << " @ " << CalleeName.str(); FunctionSamples &CalleeProfile = getFunctionProfileForContext(OCalleeCtxStr.str()); diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -11,6 +11,7 @@ #include "CallContext.h" #include "PseudoProbe.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" #include "llvm/DebugInfo/Symbolize/Symbolize.h" #include "llvm/MC/MCAsmInfo.h" @@ -225,9 +226,11 @@ return FuncStartAddrMap[Offset]; } - const FrameLocation &getInlineLeafFrameLoc(uint64_t Offset, - bool NameOnly = false) { - return getFrameLocationStack(Offset).back(); + Optional getInlineLeafFrameLoc(uint64_t Offset) { + const auto &Stack = getFrameLocationStack(Offset); + if (Stack.empty()) + return {}; + return Stack.back(); } // Compare two addresses' inline context diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -119,7 +119,8 @@ const FrameLocationStack &Context2 = getFrameLocationStack(Offset2); if (Context1.size() != Context2.size()) return false; - + if (Context1.empty()) + return false; // The leaf frame contains location within the leaf, and it // needs to be remove that as it's not part of the calling context return std::equal(Context1.begin(), Context1.begin() + Context1.size() - 1, @@ -134,6 +135,10 @@ for (auto Address : Stack) { uint64_t Offset = virtualAddrToOffset(Address); const FrameLocationStack &ExpandedContext = getFrameLocationStack(Offset); + // An instruction without a valid debug line will be ignored by sample + // processing + if (ExpandedContext.empty()) + return std::string(); for (const auto &Loc : ExpandedContext) { ContextVec.push_back(getCallSite(Loc)); } @@ -242,9 +247,14 @@ const MCInstrDesc &MCDesc = MII->get(Inst.getOpcode()); // Populate a vector of the symbolized callsite at this location - InstructionPointer IP(this, Offset); - Offset2LocStackMap[Offset] = symbolize(IP, true); - + // We don't need symbolized info for probe-based profile, just use an empty + // stack as an entry to indicate a valid binary offset + FrameLocationStack SymbolizedCallStack; + if (!UsePseudoProbes) { + InstructionPointer IP(this, Offset); + SymbolizedCallStack = symbolize(IP, true); + } + Offset2LocStackMap[Offset] = SymbolizedCallStack; // Populate address maps. CodeAddrs.push_back(Offset); if (MCDesc.isCall())