diff --git a/llvm/test/tools/llvm-profgen/inline-cs-noprobe.test b/llvm/test/tools/llvm-profgen/inline-cs-noprobe.test --- a/llvm/test/tools/llvm-profgen/inline-cs-noprobe.test +++ b/llvm/test/tools/llvm-profgen/inline-cs-noprobe.test @@ -2,6 +2,8 @@ ; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-UNWINDER ; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --profile-summary-cold-count=0 ; RUN: FileCheck %s --input-file %t +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --profile-summary-cold-count=0 --ignore-stack-samples +; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-STRIP-CTX ; CHECK:[main:1 @ foo]:309:0 ; CHECK: 2.1: 14 @@ -11,6 +13,18 @@ ; CHECK:[main:1 @ foo:3.1 @ bar]:84:0 ; CHECK: 1: 14 +; CHECK-STRIP-CTX: main:379:0 +; CHECK-STRIP-CTX: 0: 0 +; CHECK-STRIP-CTX: 2: 0 +; CHECK-STRIP-CTX: 1: foo:379 +; CHECK-STRIP-CTX: 2.1: 14 +; CHECK-STRIP-CTX: 3: 15 +; CHECK-STRIP-CTX: 3.2: 1 +; CHECK-STRIP-CTX: 4: 0 +; CHECK-STRIP-CTX: 7: 0 +; CHECK-STRIP-CTX: 3.1: bar:84 +; CHECK-STRIP-CTX: 1: 14 + ; CHECK-UNWINDER: [main:1 @ foo] ; CHECK-UNWINDER: 4 ; CHECK-UNWINDER: 670-6ad:1 diff --git a/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test b/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test --- a/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test +++ b/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test @@ -4,6 +4,8 @@ ; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-UNWINDER ; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-noprobe.perfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --profile-summary-cold-count=0 ; RUN: FileCheck %s --input-file %t +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-noprobe.perfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --profile-summary-cold-count=0 --ignore-stack-samples +; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-STRIP-CTX ; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-noprobe.aggperfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --skip-symbolization --profile-summary-cold-count=0 ; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-AGG-UNWINDER ; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-noprobe.aggperfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --profile-summary-cold-count=0 @@ -58,6 +60,21 @@ ; CHECK: 4: 1 ; CHECK: 5: 3 +; CHECK-STRIP-CTX: foo:57:0 +; CHECK-STRIP-CTX: 0: 0 +; CHECK-STRIP-CTX: 1: 0 +; CHECK-STRIP-CTX: 2: 3 +; CHECK-STRIP-CTX: 3: 3 bar:3 +; CHECK-STRIP-CTX: 4: 0 +; CHECK-STRIP-CTX: 5: 0 +; CHECK-STRIP-CTX: bar:50:3 +; CHECK-STRIP-CTX: 0: 3 +; CHECK-STRIP-CTX: 1: 3 +; CHECK-STRIP-CTX: 2: 2 +; CHECK-STRIP-CTX: 4: 1 +; CHECK-STRIP-CTX: 5: 3 +; CHECK-STRIP-CTX: main:0:0 +; CHECK-STRIP-CTX: 0: 0 ; CHECK-UNWINDER: [main:1 @ foo] ; CHECK-UNWINDER-NEXT: 3 diff --git a/llvm/tools/llvm-profgen/PerfReader.h b/llvm/tools/llvm-profgen/PerfReader.h --- a/llvm/tools/llvm-profgen/PerfReader.h +++ b/llvm/tools/llvm-profgen/PerfReader.h @@ -574,12 +574,15 @@ }; void updateBinaryAddress(const MMapEvent &Event); - PerfScriptType getPerfScriptType() const { return PerfType; } // Entry of the reader to parse multiple perf traces void parsePerfTraces(cl::list &PerfTraceFilenames); + // Post process the profile after trace aggregation, we will do simple range + // overlap computation for AutoFDO, or unwind for CSSPGO(hybrid sample). + void generateRawProfile(); const ContextSampleCounterMap &getSampleCounters() const { return SampleCounters; } + bool profileIsCS() { return ProfileIsCS; } protected: static PerfScriptType @@ -609,9 +612,10 @@ // An aggregated count is given to indicate how many times the sample is // repeated. virtual void parseSample(TraceStream &TraceIt, uint64_t Count) = 0; - // Post process the profile after trace aggregation, we will do simple range - // overlap computation for AutoFDO, or unwind for CSSPGO(hybrid sample). - virtual void generateRawProfile() = 0; + // Unwind the hybrid samples after aggregration + void unwindSamples(); + void processLBROnlySamples(); + void computeCounterFromLBR(const PerfSample *Sample, uint64_t Repeat); void writeRawProfile(StringRef Filename); void writeRawProfile(raw_fd_ostream &OS); @@ -623,6 +627,8 @@ PerfScriptType PerfType = PERF_UNKNOWN; // Keep track of all invalid return addresses std::set InvalidReturnAddresses; + + bool ProfileIsCS = false; }; /* @@ -641,11 +647,6 @@ }; // Parse the hybrid sample including the call and LBR line void parseSample(TraceStream &TraceIt, uint64_t Count) override; - void generateRawProfile() override; - -private: - // Unwind the hybrid samples after aggregration - void unwindSamples(); }; /* @@ -657,21 +658,10 @@ class LBRPerfReader : public PerfReaderBase { public: LBRPerfReader(ProfiledBinary *Binary) : PerfReaderBase(Binary) { - // There is no context for LBR only sample, so initialize one entry with - // fake "empty" context key. - std::shared_ptr Key = - std::make_shared(); - Key->genHashCode(); - SampleCounters.emplace(Hashable(Key), SampleCounter()); PerfType = PERF_LBR; }; - // Parse the LBR only sample. void parseSample(TraceStream &TraceIt, uint64_t Count) override; - void generateRawProfile() override; - -private: - void computeCounterFromLBR(const PerfSample *Sample, uint64_t Repeat); }; } // end namespace sampleprof diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp --- a/llvm/tools/llvm-profgen/PerfReader.cpp +++ b/llvm/tools/llvm-profgen/PerfReader.cpp @@ -24,6 +24,11 @@ cl::ZeroOrMore, cl::desc("Work with `--skip-symbolization` to dump the " "offset instead of virtual address.")); +cl::opt + IgnoreStackSamples("ignore-stack-samples", cl::ReallyHidden, + cl::init(false), cl::ZeroOrMore, + cl::desc("Ignore call stack sample for hybrid sample " + "and produce context-insensitive profile.")); extern cl::opt ShowDisassemblyOnly; extern cl::opt ShowSourceLocations; @@ -349,7 +354,7 @@ } } -void HybridPerfReader::unwindSamples() { +void PerfReaderBase::unwindSamples() { std::set AllUntrackedCallsites; for (const auto &Item : AggregatedSamples) { const PerfSample *Sample = Item.first.getPtr(); @@ -365,9 +370,6 @@ WithColor::warning() << "Profile context truncated due to missing probe " << "for call instruction at " << format("%" PRIx64, Address) << "\n"; - - if (SkipSymbolization) - writeRawProfile(OutputFilename); } bool PerfReaderBase::extractLBRStack(TraceStream &TraceIt, @@ -643,8 +645,8 @@ } } -void LBRPerfReader::computeCounterFromLBR(const PerfSample *Sample, - uint64_t Repeat) { +void PerfReaderBase::computeCounterFromLBR(const PerfSample *Sample, + uint64_t Repeat) { SampleCounter &Counter = SampleCounters.begin()->second; uint64_t EndOffeset = 0; for (const LBREntry &LBR : Sample->LBRStack) { @@ -674,15 +676,19 @@ } } -void LBRPerfReader::generateRawProfile() { - assert(SampleCounters.size() == 1 && "Must have one entry of sample counter"); +void PerfReaderBase::processLBROnlySamples() { + // There is no context for LBR only sample, so initialize one entry with + // fake "empty" context key. + assert(SampleCounters.empty() && + "Sample counter map should be empty before raw profile generation"); + std::shared_ptr Key = + std::make_shared(); + Key->genHashCode(); + SampleCounters.emplace(Hashable(Key), SampleCounter()); for (const auto &Item : AggregatedSamples) { const PerfSample *Sample = Item.first.getPtr(); computeCounterFromLBR(Sample, Item.second); } - - if (SkipSymbolization) - PerfReaderBase::writeRawProfile(OutputFilename); } uint64_t PerfReaderBase::parseAggregatedCount(TraceStream &TraceIt) { @@ -774,7 +780,17 @@ return PerfType; } -void HybridPerfReader::generateRawProfile() { unwindSamples(); } +void PerfReaderBase::generateRawProfile() { + if (PerfType == PERF_LBR || IgnoreStackSamples) { + processLBROnlySamples(); + } else if (PerfType == PERF_LBR_STACK) { + ProfileIsCS = true; + unwindSamples(); + } + + if (SkipSymbolization) + writeRawProfile(OutputFilename); +} void PerfReaderBase::warnTruncatedStack() { for (auto Address : InvalidReturnAddresses) { @@ -792,7 +808,6 @@ parseAndAggregateTrace(Filename); warnTruncatedStack(); - generateRawProfile(); } } // end namespace sampleprof diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h --- a/llvm/tools/llvm-profgen/ProfileGenerator.h +++ b/llvm/tools/llvm-profgen/ProfileGenerator.h @@ -34,7 +34,7 @@ virtual ~ProfileGeneratorBase() = default; static std::unique_ptr create(ProfiledBinary *Binary, const ContextSampleCounterMap &SampleCounters, - enum PerfScriptType SampleType); + bool ProfileIsCS); virtual void generateProfile() = 0; void write(); diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -77,15 +77,12 @@ std::unique_ptr ProfileGeneratorBase::create(ProfiledBinary *Binary, const ContextSampleCounterMap &SampleCounters, - enum PerfScriptType SampleType) { + bool ProfileIsCS) { std::unique_ptr Generator; - if (SampleType == PERF_LBR) { - // TODO: Support probe based profile generation - Generator.reset(new ProfileGenerator(Binary, SampleCounters)); - } else if (SampleType == PERF_LBR_STACK) { + if (ProfileIsCS) { Generator.reset(new CSProfileGenerator(Binary, SampleCounters)); } else { - llvm_unreachable("Unsupported perfscript!"); + Generator.reset(new ProfileGenerator(Binary, SampleCounters)); } return Generator; diff --git a/llvm/tools/llvm-profgen/llvm-profgen.cpp b/llvm/tools/llvm-profgen/llvm-profgen.cpp --- a/llvm/tools/llvm-profgen/llvm-profgen.cpp +++ b/llvm/tools/llvm-profgen/llvm-profgen.cpp @@ -89,13 +89,14 @@ std::unique_ptr Reader = PerfReaderBase::create(Binary.get(), PerfTraceFilenames); Reader->parsePerfTraces(PerfTraceFilenames); + Reader->generateRawProfile(); if (SkipSymbolization) return EXIT_SUCCESS; std::unique_ptr Generator = ProfileGeneratorBase::create(Binary.get(), Reader->getSampleCounters(), - Reader->getPerfScriptType()); + Reader->profileIsCS()); Generator->generateProfile(); Generator->write();