diff --git a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h --- a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h +++ b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h @@ -142,6 +142,9 @@ ContextTrieNode &getRootContext(); void promoteMergeContextSamplesTree(const Instruction &Inst, StringRef CalleeName); + + // Create a merged conext-less profile map. + void createContextLessProfileMap(SampleProfileMap &ContextLessProfiles); // Dump the internal context profile trie. void dump(); @@ -158,7 +161,6 @@ promoteMergeContextSamplesTree(ContextTrieNode &FromNode, ContextTrieNode &ToNodeParent, uint32_t ContextFramesToRemove); - // Map from function name to context profiles (excluding base profile) StringMap FuncToCtxtProfiles; diff --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp --- a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp +++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp @@ -595,4 +595,24 @@ return *ToNode; } + +void SampleContextTracker::createContextLessProfileMap( + SampleProfileMap &ContextLessProfiles) { + std::queue NodeQueue; + NodeQueue.push(&RootContext); + + while (!NodeQueue.empty()) { + ContextTrieNode *Node = NodeQueue.front(); + FunctionSamples *FProfile = Node->getFunctionSamples(); + NodeQueue.pop(); + + if (FProfile) { + // Profile's context can be empty, use ContextNode's func name. + ContextLessProfiles[Node->getFuncName()].merge(*FProfile); + } + + for (auto &It : Node->getAllChildContext()) + NodeQueue.push(&It.second); + } +} } // namespace llvm diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h --- a/llvm/tools/llvm-profgen/ProfileGenerator.h +++ b/llvm/tools/llvm-profgen/ProfileGenerator.h @@ -32,6 +32,7 @@ class ProfileGeneratorBase { public: + ProfileGeneratorBase(ProfiledBinary *Binary) : Binary(Binary){}; ProfileGeneratorBase(ProfiledBinary *Binary, const ContextSampleCounterMap *Counters) : Binary(Binary), SampleCounters(Counters){}; @@ -44,7 +45,7 @@ create(ProfiledBinary *Binary, const ContextSampleCounterMap *Counters, bool profileIsCS); static std::unique_ptr - create(ProfiledBinary *Binary, const SampleProfileMap &&ProfileMap, + create(ProfiledBinary *Binary, SampleProfileMap &ProfileMap, bool profileIsCS); virtual void generateProfile() = 0; void write(); @@ -109,7 +110,7 @@ StringRef getCalleeNameForOffset(uint64_t TargetOffset); - void computeSummaryAndThreshold(); + void computeSummaryAndThreshold(SampleProfileMap &ProfileMap); void calculateAndShowDensity(const SampleProfileMap &Profiles); @@ -118,7 +119,9 @@ void showDensitySuggestion(double Density); - void collectProfiledFunctions(); + void collectProfiledFunctions( + const ContextSampleCounterMap *SampleCounters, + std::unordered_set &ProfiledFunctions); // Thresholds from profile summary to answer isHotCount/isColdCount queries. uint64_t HotCountThreshold; @@ -166,6 +169,7 @@ void postProcessProfiles(); void trimColdProfiles(const SampleProfileMap &Profiles, uint64_t ColdCntThreshold); + void collectProfiledFunctions(); }; class CSProfileGenerator : public ProfileGeneratorBase { @@ -173,8 +177,16 @@ CSProfileGenerator(ProfiledBinary *Binary, const ContextSampleCounterMap *Counters) : ProfileGeneratorBase(Binary, Counters){}; - CSProfileGenerator(ProfiledBinary *Binary, const SampleProfileMap &&Profiles) - : ProfileGeneratorBase(Binary, std::move(Profiles)){}; + CSProfileGenerator(ProfiledBinary *Binary, SampleProfileMap &Profiles) + : ProfileGeneratorBase(Binary), ContextTracker(Profiles, nullptr) { + // This is for the case the input is a llvm sample profile. + std::unordered_set ProfiledFunctions; + for (const auto &FS : Profiles) { + if (auto *Func = Binary->getBinaryFunction(FS.first.getName())) + ProfiledFunctions.insert(Func); + } + Binary->setProfiledFunctions(ProfiledFunctions); + }; void generateProfile() override; // Trim the context stack at a given depth. @@ -345,7 +357,8 @@ SampleContextFrameVector &Context); void buildProfileMap(); - + void computeSummaryAndThreshold(); + void collectProfiledFunctions(); ContextTrieNode &getRootContext() { return ContextTracker.getRootContext(); }; // Underlying context table serves for sample profile writer. diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -91,6 +91,7 @@ llvm::cl::Optional); extern cl::opt ProfileSummaryCutoffHot; +extern cl::opt UseContextLessSummary; static cl::opt GenCSNestedProfile( "gen-cs-nested-profile", cl::Hidden, cl::init(true), @@ -128,14 +129,13 @@ } std::unique_ptr -ProfileGeneratorBase::create(ProfiledBinary *Binary, - const SampleProfileMap &&Profiles, +ProfileGeneratorBase::create(ProfiledBinary *Binary, SampleProfileMap &Profiles, bool ProfileIsCS) { std::unique_ptr Generator; if (ProfileIsCS) { if (Binary->useFSDiscriminator()) exitWithError("FS discriminator is not supported in CS profile."); - Generator.reset(new CSProfileGenerator(Binary, std::move(Profiles))); + Generator.reset(new CSProfileGenerator(Binary, Profiles)); } else { Generator.reset(new ProfileGenerator(Binary, std::move(Profiles))); } @@ -401,36 +401,44 @@ updateTotalSamples(); } -void ProfileGeneratorBase::collectProfiledFunctions() { - std::unordered_set ProfiledFunctions; - if (SampleCounters) { - // Go through all the stacks, ranges and branches in sample counters, use - // the start of the range to look up the function it belongs and record the - // function. - for (const auto &CI : *SampleCounters) { - if (const auto *CtxKey = dyn_cast(CI.first.getPtr())) { - for (auto Addr : CtxKey->Context) { - if (FuncRange *FRange = Binary->findFuncRangeForOffset( - Binary->virtualAddrToOffset(Addr))) - ProfiledFunctions.insert(FRange->Func); - } - } - - for (auto Item : CI.second.RangeCounter) { - uint64_t StartOffset = Item.first.first; - if (FuncRange *FRange = Binary->findFuncRangeForOffset(StartOffset)) +void ProfileGeneratorBase::collectProfiledFunctions( + const ContextSampleCounterMap *SampleCounters, + std::unordered_set &ProfiledFunctions) { + assert(SampleCounters && "SampleCounters should not be null"); + // Go through all the stacks, ranges and branches in sample counters, use + // the start of the range to look up the function it belongs and record the + // function. + for (const auto &CI : *SampleCounters) { + if (const auto *CtxKey = dyn_cast(CI.first.getPtr())) { + for (auto Addr : CtxKey->Context) { + if (FuncRange *FRange = Binary->findFuncRangeForOffset( + Binary->virtualAddrToOffset(Addr))) ProfiledFunctions.insert(FRange->Func); } + } - for (auto Item : CI.second.BranchCounter) { - uint64_t SourceOffset = Item.first.first; - uint64_t TargetOffset = Item.first.first; - if (FuncRange *FRange = Binary->findFuncRangeForOffset(SourceOffset)) - ProfiledFunctions.insert(FRange->Func); - if (FuncRange *FRange = Binary->findFuncRangeForOffset(TargetOffset)) - ProfiledFunctions.insert(FRange->Func); - } + for (auto Item : CI.second.RangeCounter) { + uint64_t StartOffset = Item.first.first; + if (FuncRange *FRange = Binary->findFuncRangeForOffset(StartOffset)) + ProfiledFunctions.insert(FRange->Func); + } + + for (auto Item : CI.second.BranchCounter) { + uint64_t SourceOffset = Item.first.first; + uint64_t TargetOffset = Item.first.first; + if (FuncRange *FRange = Binary->findFuncRangeForOffset(SourceOffset)) + ProfiledFunctions.insert(FRange->Func); + if (FuncRange *FRange = Binary->findFuncRangeForOffset(TargetOffset)) + ProfiledFunctions.insert(FRange->Func); } + } +} + +void ProfileGenerator::collectProfiledFunctions() { + std::unordered_set ProfiledFunctions; + if (SampleCounters) { + ProfileGeneratorBase::collectProfiledFunctions(SampleCounters, + ProfiledFunctions); } else { // This is for the case the input is a llvm sample profile. for (const auto &FS : ProfileMap) { @@ -438,7 +446,30 @@ ProfiledFunctions.insert(Func); } } + Binary->setProfiledFunctions(ProfiledFunctions); +} +void CSProfileGenerator::collectProfiledFunctions() { + std::unordered_set ProfiledFunctions; + if (SampleCounters) { + ProfileGeneratorBase::collectProfiledFunctions(SampleCounters, + ProfiledFunctions); + } else { + // This is for the case the input is a llvm sample profile. + std::queue NodeQueue; + NodeQueue.push(&getRootContext()); + while (!NodeQueue.empty()) { + ContextTrieNode *Node = NodeQueue.front(); + NodeQueue.pop(); + + if (!Node->getFuncName().empty()) + if (auto *Func = Binary->getBinaryFunction(Node->getFuncName())) + ProfiledFunctions.insert(Func); + + for (auto &It : Node->getAllChildContext()) + NodeQueue.push(&It.second); + } + } Binary->setProfiledFunctions(ProfiledFunctions); } @@ -471,7 +502,7 @@ } void ProfileGenerator::postProcessProfiles() { - computeSummaryAndThreshold(); + computeSummaryAndThreshold(ProfileMap); trimColdProfiles(ProfileMap, ColdCountThreshold); calculateAndShowDensity(ProfileMap); } @@ -959,14 +990,14 @@ } void CSProfileGenerator::postProcessProfiles() { - buildProfileMap(); - // TODO: free the FunctionSamples' memory that is created in profile - // generator. - // Compute hot/cold threshold based on profile. This will be used for cold // context profile merging/trimming. computeSummaryAndThreshold(); + buildProfileMap(); + // TODO: free the FunctionSamples' memory that is created in profile + // generator. + // Run global pre-inliner to adjust/merge context profile based on estimated // inline decisions. if (EnableCSPreInliner) { @@ -998,15 +1029,26 @@ } } -void ProfileGeneratorBase::computeSummaryAndThreshold() { +void ProfileGeneratorBase::computeSummaryAndThreshold( + SampleProfileMap &Profiles) { SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); - Summary = Builder.computeSummaryForProfiles(ProfileMap); + Summary = Builder.computeSummaryForProfiles(Profiles); HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold( (Summary->getDetailedSummary())); ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold( (Summary->getDetailedSummary())); } +void CSProfileGenerator::computeSummaryAndThreshold() { + SampleProfileMap ContextLessProfiles; + // Always merge and use context-less profile map to compute summary. + bool OldOpt = UseContextLessSummary; + UseContextLessSummary = true; + ContextTracker.createContextLessProfileMap(ContextLessProfiles); + ProfileGeneratorBase::computeSummaryAndThreshold(ContextLessProfiles); + UseContextLessSummary = OldOpt; +} + void ProfileGeneratorBase::extractProbesFromRange( const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter, bool FindDisjointRanges) { diff --git a/llvm/tools/llvm-profgen/llvm-profgen.cpp b/llvm/tools/llvm-profgen/llvm-profgen.cpp --- a/llvm/tools/llvm-profgen/llvm-profgen.cpp +++ b/llvm/tools/llvm-profgen/llvm-profgen.cpp @@ -167,8 +167,7 @@ std::move(ReaderOrErr.get()); Reader->read(); std::unique_ptr Generator = - ProfileGeneratorBase::create(Binary.get(), - std::move(Reader->getProfiles()), + ProfileGeneratorBase::create(Binary.get(), Reader->getProfiles(), Reader->profileIsCS()); Generator->generateProfile(); Generator->write();