diff --git a/clang/test/CodeGen/pseudo-probe-emit.c b/clang/test/CodeGen/pseudo-probe-emit.c --- a/clang/test/CodeGen/pseudo-probe-emit.c +++ b/clang/test/CodeGen/pseudo-probe-emit.c @@ -1,3 +1,4 @@ +// RUN: %clang_cc1 -O0 -fno-legacy-pass-manager -fpseudo-probe-for-profiling -debug-info-kind=limited -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -O2 -fno-legacy-pass-manager -fpseudo-probe-for-profiling -debug-info-kind=limited -emit-llvm -o - %s | FileCheck %s // Check the generation of pseudoprobe intrinsic call diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1924,6 +1924,13 @@ ModulePassManager MPM; + // Perform pseudo probe instrumentation in O0 mode. This is for the + // consistency between different build modes. For example, a LTO build can be + // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in + // the postlink will require pseudo probe instrumentation in the prelink. + if (PGOOpt && PGOOpt->PseudoProbeForProfiling) + MPM.addPass(SampleProfileProbePass(TM)); + if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr || PGOOpt->Action == PGOOptions::IRUse)) addPGOInstrPassesForO0( diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp --- a/llvm/tools/llvm-profgen/PerfReader.cpp +++ b/llvm/tools/llvm-profgen/PerfReader.cpp @@ -7,15 +7,23 @@ //===----------------------------------------------------------------------===// #include "PerfReader.h" #include "ProfileGenerator.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Support/FileSystem.h" +#define DEBUG_TYPE "perf-reader" + +STATISTIC(NumStackSamplesWithInvalidReturnAddress, + "Number of stack samples with an invalid return address"); + +STATISTIC(NumStackSamples, "Number of stack samples"); + static cl::opt ShowMmapEvents("show-mmap-events", cl::ReallyHidden, cl::init(false), cl::ZeroOrMore, cl::desc("Print binary load events.")); cl::opt SkipSymbolization("skip-symbolization", cl::ReallyHidden, cl::init(false), cl::ZeroOrMore, - cl::desc("Dump the unsumbolized profile to the " + cl::desc("Dump the unsymbolized profile to the " "output file. It will show unwinder " "output for CS profile generation.")); @@ -510,15 +518,24 @@ if (!Binary->addressIsCode(FrameAddr)) break; - // We need to translate return address to call address - // for non-leaf frames + // We need to translate return address to call address for non-leaf frames. if (!CallStack.empty()) { - FrameAddr = Binary->getCallAddrFromFrameAddr(FrameAddr); + auto I = Binary->getIndexForAddr(FrameAddr); + FrameAddr = I ? Binary->getAddressforIndex(I - 1) : 0; + // Stop at an invalid return address caused by bad unwinding. This could + // happen to frame-pointer-based unwinding and the callee functions that + // do not have the frame pointer chain set up. + if (!FrameAddr || !Binary->addressIsCall(FrameAddr)) { + NumStackSamplesWithInvalidReturnAddress++; + break; + } } CallStack.emplace_back(FrameAddr); } + NumStackSamples++; + // Skip other unrelated line, find the next valid LBR line // Note that even for empty call stack, we should skip the address at the // bottom, otherwise the following pass may generate a truncated callstack