diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -206,7 +206,8 @@ enum class SecFuncMetadataFlags : uint32_t { SecFlagInvalid = 0, SecFlagIsProbeBased = (1 << 0), - SecFlagHasAttribute = (1 << 1) + SecFlagHasAttribute = (1 << 1), + SecFlagIsPreInlined = (1 << 2), }; enum class SecFuncOffsetFlags : uint32_t { @@ -591,11 +592,11 @@ : hash_value(getName()); } - /// Set the name of the function. + /// Set the name of the function and clear the current context. void setName(StringRef FunctionName) { - assert(FullContext.empty() && - "setName should only be called for non-CS profile"); Name = FunctionName; + FullContext = SampleContextFrames(); + State = UnknownContext; } void setContext(SampleContextFrames Context, @@ -745,6 +746,16 @@ } } + // Set current context and all callee contexts to be synthetic. + void SetContextSynthetic() { + Context.setState(SyntheticContext); + for (auto &I : CallsiteSamples) { + for (auto &CS : I.second) { + CS.second.SetContextSynthetic(); + } + } + } + /// Return the number of samples collected at the given location. /// Each location is specified by \p LineOffset and \p Discriminator. /// If the location is not found in profile, return error. @@ -816,7 +827,7 @@ /// Return the sample count of the first instruction of the function. /// The function can be either a standalone symbol or an inlined function. uint64_t getEntrySamples() const { - if (FunctionSamples::ProfileIsCS && getHeadSamples()) { + if (FunctionSamples::ProfileIsCSFlat && getHeadSamples()) { // For CS profile, if we already have more accurate head samples // counted by branch sample from caller, use them as entry samples. return getHeadSamples(); @@ -1008,7 +1019,13 @@ /// instruction. This is wrapper of two scenarios, the probe-based profile and /// regular profile, to hide implementation details from the sample loader and /// the context tracker. - static LineLocation getCallSiteIdentifier(const DILocation *DIL); + static LineLocation getCallSiteIdentifier(const DILocation *DIL, + bool ProfileIsFS = false); + + /// Returns a unique hash code for a combination of a callsite location and + /// the callee function name. + static uint64_t getCallSiteHash(StringRef CalleeName, + const LineLocation &Callsite); /// Get the FunctionSamples of the inline instance where DIL originates /// from. @@ -1027,7 +1044,9 @@ static bool ProfileIsProbeBased; - static bool ProfileIsCS; + static bool ProfileIsCSFlat; + + static bool ProfileIsCSNested; SampleContext &getContext() const { return Context; } @@ -1161,6 +1180,40 @@ SampleProfileMap &ProfileMap; }; +// CSProfileConverter converts a full context-sensitive flat sample profile into +// a nested context-sensitive sample profile. +class CSProfileConverter { +public: + CSProfileConverter(SampleProfileMap &Profiles); + void convertProfiles(); + struct FrameNode { + FrameNode(StringRef FName = StringRef(), + FunctionSamples *FSamples = nullptr, + LineLocation CallLoc = {0, 0}) + : FuncName(FName), FuncSamples(FSamples), CallSiteLoc(CallLoc){}; + + // Map line+discriminator location to child frame + std::map AllChildFrames; + // Function name for current frame + StringRef FuncName; + // Function Samples for current frame + FunctionSamples *FuncSamples; + // Callsite location in parent context + LineLocation CallSiteLoc; + + FrameNode *getOrCreateChildFrame(const LineLocation &CallSite, + StringRef CalleeName); + }; + +private: + // Nest all children profiles into the profile of Node. + void convertProfiles(FrameNode &Node); + FrameNode *getOrCreateContextPath(const SampleContext &Context); + + SampleProfileMap &ProfileMap; + FrameNode RootFrame; +}; + /// ProfileSymbolList records the list of function symbols shown up /// in the binary used to generate the profile. It is useful to /// to discriminate a function being so cold as not to shown up diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h --- a/llvm/include/llvm/ProfileData/SampleProfReader.h +++ b/llvm/include/llvm/ProfileData/SampleProfReader.h @@ -473,8 +473,11 @@ /// Whether input profile is based on pseudo probes. bool profileIsProbeBased() const { return ProfileIsProbeBased; } - /// Whether input profile is fully context-sensitive - bool profileIsCS() const { return ProfileIsCS; } + /// Whether input profile is fully context-sensitive and flat. + bool profileIsCSFlat() const { return ProfileIsCSFlat; } + + /// Whether input profile is fully context-sensitive and nested. + bool profileIsCSNested() const { return ProfileIsCSNested; } virtual std::unique_ptr getProfileSymbolList() { return nullptr; @@ -533,8 +536,11 @@ /// \brief Whether samples are collected based on pseudo probes. bool ProfileIsProbeBased = false; - /// Whether function profiles are context-sensitive. - bool ProfileIsCS = false; + /// Whether function profiles are context-sensitive flat profiles. + bool ProfileIsCSFlat = false; + + /// Whether function profiles are context-sensitive nested profiles. + bool ProfileIsCSNested = false; /// Number of context-sensitive profiles. uint32_t CSProfileCount = 0; @@ -698,6 +704,8 @@ std::error_code readSecHdrTable(); std::error_code readFuncMetadata(bool ProfileHasAttribute); + std::error_code readFuncMetadata(bool ProfileHasAttribute, + FunctionSamples *FProfile); std::error_code readFuncOffsetTable(); std::error_code readFuncProfiles(); std::error_code readMD5NameTable(); diff --git a/llvm/include/llvm/ProfileData/SampleProfWriter.h b/llvm/include/llvm/ProfileData/SampleProfWriter.h --- a/llvm/include/llvm/ProfileData/SampleProfWriter.h +++ b/llvm/include/llvm/ProfileData/SampleProfWriter.h @@ -269,6 +269,7 @@ std::error_code writeCSNameTableSection(); std::error_code writeFuncMetadata(const SampleProfileMap &Profiles); + std::error_code writeFuncMetadata(const FunctionSamples &Profile); // Functions to write various kinds of sections. std::error_code writeNameTableSection(const SampleProfileMap &ProfileMap); diff --git a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h --- a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h +++ b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h @@ -68,7 +68,8 @@ // Constructor for non-CS profile. ProfiledCallGraph(SampleProfileMap &ProfileMap) { - assert(!FunctionSamples::ProfileIsCS && "CS profile is not handled here"); + assert(!FunctionSamples::ProfileIsCSFlat && + "CS flat profile is not handled here"); for (const auto &Samples : ProfileMap) { addProfiledCalls(Samples.second); } diff --git a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h --- a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h +++ b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h @@ -66,8 +66,6 @@ void dumpTree(); private: - static uint64_t nodeHash(StringRef ChildName, const LineLocation &Callsite); - // Map line+discriminator location to child context std::map AllChildContext; diff --git a/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp b/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp --- a/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp +++ b/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp @@ -194,7 +194,7 @@ // more function profiles each with lower counts, which in turn leads to lower // hot thresholds. To compensate for that, by default we merge context // profiles before computing profile summary. - if (UseContextLessSummary || (sampleprof::FunctionSamples::ProfileIsCS && + if (UseContextLessSummary || (sampleprof::FunctionSamples::ProfileIsCSFlat && !UseContextLessSummary.getNumOccurrences())) { for (const auto &I : Profiles) { ContextLessProfiles[I.second.getName()].merge(I.second); diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp --- a/llvm/lib/ProfileData/SampleProf.cpp +++ b/llvm/lib/ProfileData/SampleProf.cpp @@ -35,11 +35,18 @@ cl::desc("Cutoff value about how many symbols in profile symbol list " "will be used. This is very useful for performance debugging")); +cl::opt GenerateMergedBaseProfiles( + "generate-merged-base-profiles", cl::init(true), cl::ZeroOrMore, + cl::desc("When generating nested context-sensitive profiles, always " + "generate extra base profile for function with all its context " + "profiles merged into it.")); + namespace llvm { namespace sampleprof { SampleProfileFormat FunctionSamples::Format; bool FunctionSamples::ProfileIsProbeBased = false; -bool FunctionSamples::ProfileIsCS = false; +bool FunctionSamples::ProfileIsCSFlat = false; +bool FunctionSamples::ProfileIsCSNested = false; bool FunctionSamples::UseMD5 = false; bool FunctionSamples::HasUniqSuffix = true; bool FunctionSamples::ProfileIsFS = false; @@ -218,8 +225,9 @@ 0xffff; } -LineLocation FunctionSamples::getCallSiteIdentifier(const DILocation *DIL) { - if (FunctionSamples::ProfileIsProbeBased) +LineLocation FunctionSamples::getCallSiteIdentifier(const DILocation *DIL, + bool ProfileIsFS) { + if (FunctionSamples::ProfileIsProbeBased) { // In a pseudo-probe based profile, a callsite is simply represented by the // ID of the probe associated with the call instruction. The probe ID is // encoded in the Discriminator field of the call instruction's debug @@ -227,9 +235,19 @@ return LineLocation(PseudoProbeDwarfDiscriminator::extractProbeIndex( DIL->getDiscriminator()), 0); - else - return LineLocation(FunctionSamples::getOffset(DIL), - DIL->getBaseDiscriminator()); + } else { + unsigned Discriminator = + ProfileIsFS ? DIL->getDiscriminator() : DIL->getBaseDiscriminator(); + return LineLocation(FunctionSamples::getOffset(DIL), Discriminator); + } +} + +uint64_t FunctionSamples::getCallSiteHash(StringRef CalleeName, + const LineLocation &Callsite) { + uint64_t NameHash = std::hash{}(CalleeName.str()); + uint64_t LocId = + (((uint64_t)Callsite.LineOffset) << 32) | Callsite.Discriminator; + return NameHash + (LocId << 5) + LocId; } const FunctionSamples *FunctionSamples::findFunctionSamples( @@ -239,21 +257,16 @@ const DILocation *PrevDIL = DIL; for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) { - unsigned Discriminator; - if (ProfileIsFS) - Discriminator = DIL->getDiscriminator(); - else - Discriminator = DIL->getBaseDiscriminator(); - // Use C++ linkage name if possible. StringRef Name = PrevDIL->getScope()->getSubprogram()->getLinkageName(); if (Name.empty()) Name = PrevDIL->getScope()->getSubprogram()->getName(); - - S.push_back( - std::make_pair(LineLocation(getOffset(DIL), Discriminator), Name)); + S.emplace_back(FunctionSamples::getCallSiteIdentifier( + DIL, FunctionSamples::ProfileIsFS), + Name); PrevDIL = DIL; } + if (S.size() == 0) return this; const FunctionSamples *FS = this; @@ -454,3 +467,81 @@ for (auto &Sym : SortedList) OS << Sym << "\n"; } + +CSProfileConverter::FrameNode * +CSProfileConverter::FrameNode::getOrCreateChildFrame( + const LineLocation &CallSite, StringRef CalleeName) { + uint64_t Hash = FunctionSamples::getCallSiteHash(CalleeName, CallSite); + auto It = AllChildFrames.find(Hash); + if (It != AllChildFrames.end()) { + assert(It->second.FuncName == CalleeName && + "Hash collision for child context node"); + return &It->second; + } + + AllChildFrames[Hash] = FrameNode(CalleeName, nullptr, CallSite); + return &AllChildFrames[Hash]; +} + +CSProfileConverter::CSProfileConverter(SampleProfileMap &Profiles) + : ProfileMap(Profiles) { + for (auto &FuncSample : Profiles) { + FunctionSamples *FSamples = &FuncSample.second; + auto *NewNode = getOrCreateContextPath(FSamples->getContext()); + assert(!NewNode->FuncSamples && "New node cannot have sample profile"); + NewNode->FuncSamples = FSamples; + } +} + +CSProfileConverter::FrameNode * +CSProfileConverter::getOrCreateContextPath(const SampleContext &Context) { + auto Node = &RootFrame; + LineLocation CallSiteLoc(0, 0); + for (auto &Callsite : Context.getContextFrames()) { + Node = Node->getOrCreateChildFrame(CallSiteLoc, Callsite.FuncName); + CallSiteLoc = Callsite.Location; + } + return Node; +} + +void CSProfileConverter::convertProfiles(CSProfileConverter::FrameNode &Node) { + // Process each child profile. Add each child profile to callsite profile map + // of the current node `Node` if `Node` comes with a profile. Otherwise + // promote the child profile to a standalone profile. + auto *NodeProfile = Node.FuncSamples; + for (auto &It : Node.AllChildFrames) { + auto &ChildNode = It.second; + convertProfiles(ChildNode); + auto *ChildProfile = ChildNode.FuncSamples; + if (!ChildProfile) + continue; + SampleContext OrigChildContext = ChildProfile->getContext(); + // Reset the child context to be contextless. + ChildProfile->getContext().setName(OrigChildContext.getName()); + if (NodeProfile) { + // Add child profile to the callsite profile map. + auto &SamplesMap = NodeProfile->functionSamplesAt(ChildNode.CallSiteLoc); + SamplesMap.emplace(OrigChildContext.getName(), *ChildProfile); + NodeProfile->addTotalSamples(ChildProfile->getTotalSamples()); + } + + // Separate child profile to be a standalone profile, if the current parent + // profile doesn't exist. This is a duplicating operation when the child + // profile is already incorporated into the parent which is still useful and + // thus done optionally. It is seen that duplicating context profiles into + // base profiles improves the code quality for thinlto build by allowing a + // profile in the prelink phase for to-be-fully-inlined functions. + if (!NodeProfile || GenerateMergedBaseProfiles) + ProfileMap[ChildProfile->getContext()].merge(*ChildProfile); + + // Contexts coming with a `ContextShouldBeInlined` attribute indicate this + // is a preinliner-computed profile. + if (OrigChildContext.hasAttribute(ContextShouldBeInlined)) + FunctionSamples::ProfileIsCSNested = true; + + // Remove the original child profile. + ProfileMap.erase(OrigChildContext); + } +} + +void CSProfileConverter::convertProfiles() { convertProfiles(RootFrame); } diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -146,7 +146,7 @@ if (Depth == 0) return false; - if (Depth == 1 && Input[Depth] == '!') { + if (Input[Depth] == '!') { LineTy = LineType::Metadata; return parseMetadata(Input.substr(Depth), FunctionHash, Attributes); } @@ -244,11 +244,11 @@ sampleprof_error Result = sampleprof_error::success; InlineCallStack InlineStack; - uint32_t ProbeProfileCount = 0; + uint32_t TopLevelProbeProfileCount = 0; // SeenMetadata tracks whether we have processed metadata for the current - // top-level function profile. - bool SeenMetadata = false; + // top-level or nested function profile. + uint32_t DepthMetadata = 0; ProfileIsFS = ProfileIsFSDisciminator; FunctionSamples::ProfileIsFS = ProfileIsFS; @@ -275,7 +275,7 @@ "Expected 'mangled_name:NUM:NUM', found " + *LineIt); return sampleprof_error::malformed; } - SeenMetadata = false; + DepthMetadata = 0; SampleContext FContext(FName, CSNameTable); if (FContext.hasContext()) ++CSProfileCount; @@ -302,7 +302,7 @@ *LineIt); return sampleprof_error::malformed; } - if (SeenMetadata && LineTy != LineType::Metadata) { + if (LineTy != LineType::Metadata && Depth == DepthMetadata) { // Metadata must be put at the end of a function profile. reportError(LineIt.line_number(), "Found non-metadata after metadata: " + *LineIt); @@ -322,6 +322,7 @@ FSamples.setName(FName); MergeResult(Result, FSamples.addTotalSamples(NumSamples)); InlineStack.push_back(&FSamples); + DepthMetadata = 0; break; } case LineType::BodyProfile: { @@ -342,11 +343,13 @@ FunctionSamples &FProfile = *InlineStack.back(); if (FunctionHash) { FProfile.setFunctionHash(FunctionHash); - ++ProbeProfileCount; + if (Depth == 1) + ++TopLevelProbeProfileCount; } - if (Attributes) - FProfile.getContext().setAllAttributes(Attributes); - SeenMetadata = true; + FProfile.getContext().setAllAttributes(Attributes); + if (Attributes & (uint32_t)ContextShouldBeInlined) + ProfileIsCSNested = true; + DepthMetadata = Depth; break; } } @@ -355,12 +358,14 @@ assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) && "Cannot have both context-sensitive and regular profile"); - ProfileIsCS = (CSProfileCount > 0); - assert((ProbeProfileCount == 0 || ProbeProfileCount == Profiles.size()) && + ProfileIsCSFlat = (CSProfileCount > 0); + assert((TopLevelProbeProfileCount == 0 || + TopLevelProbeProfileCount == Profiles.size()) && "Cannot have both probe-based profiles and regular profiles"); - ProfileIsProbeBased = (ProbeProfileCount > 0); + ProfileIsProbeBased = (TopLevelProbeProfileCount > 0); FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased; - FunctionSamples::ProfileIsCS = ProfileIsCS; + FunctionSamples::ProfileIsCSFlat = ProfileIsCSFlat; + FunctionSamples::ProfileIsCSNested = ProfileIsCSNested; if (Result == sampleprof_error::success) computeSummary(); @@ -625,7 +630,7 @@ ErrorOr SampleProfileReaderExtBinaryBase::readSampleContextFromTable() { - if (ProfileIsCS) { + if (ProfileIsCSFlat) { auto FContext(readContextFromTable()); if (std::error_code EC = FContext.getError()) return EC; @@ -649,7 +654,7 @@ if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial)) Summary->setPartialProfile(true); if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext)) - FunctionSamples::ProfileIsCS = ProfileIsCS = true; + FunctionSamples::ProfileIsCSFlat = ProfileIsCSFlat = true; if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator)) FunctionSamples::ProfileIsFS = ProfileIsFS = true; break; @@ -683,6 +688,9 @@ ProfileIsProbeBased = hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased); FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased; + ProfileIsCSNested = + hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsPreInlined); + FunctionSamples::ProfileIsCSNested = ProfileIsCSNested; bool HasAttribute = hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute); if (std::error_code EC = readFuncMetadata(HasAttribute)) @@ -770,7 +778,7 @@ } } - if (ProfileIsCS) { + if (ProfileIsCSFlat) { DenseSet FuncGuidsToUse; if (useMD5()) { for (auto Name : FuncsToUse) @@ -840,7 +848,7 @@ } assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) && "Cannot have both context-sensitive and regular profile"); - assert((!CSProfileCount || ProfileIsCS) && + assert((!CSProfileCount || ProfileIsCSFlat) && "Section flag should be consistent with actual profile"); return sampleprof_error::success; } @@ -1078,30 +1086,77 @@ } std::error_code -SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) { - while (Data < End) { - auto FContext(readSampleContextFromTable()); - if (std::error_code EC = FContext.getError()) - return EC; - bool ProfileInMap = Profiles.count(*FContext); +SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute, + FunctionSamples *FProfile) { + if (Data < End) { if (ProfileIsProbeBased) { auto Checksum = readNumber(); if (std::error_code EC = Checksum.getError()) return EC; - if (ProfileInMap) - Profiles[*FContext].setFunctionHash(*Checksum); + if (FProfile) + FProfile->setFunctionHash(*Checksum); } if (ProfileHasAttribute) { auto Attributes = readNumber(); if (std::error_code EC = Attributes.getError()) return EC; - if (ProfileInMap) - Profiles[*FContext].getContext().setAllAttributes(*Attributes); + if (FProfile) + FProfile->getContext().setAllAttributes(*Attributes); + } + + if (!ProfileIsCSFlat) { + // Read all the attributes for inlined function calls. + auto NumCallsites = readNumber(); + if (std::error_code EC = NumCallsites.getError()) + return EC; + + for (uint32_t J = 0; J < *NumCallsites; ++J) { + auto LineOffset = readNumber(); + if (std::error_code EC = LineOffset.getError()) + return EC; + + auto Discriminator = readNumber(); + if (std::error_code EC = Discriminator.getError()) + return EC; + + auto FContext(readSampleContextFromTable()); + if (std::error_code EC = FContext.getError()) + return EC; + + FunctionSamples *CalleeProfile = nullptr; + if (FProfile) { + CalleeProfile = const_cast( + &FProfile->functionSamplesAt(LineLocation( + *LineOffset, + *Discriminator))[std::string(FContext.get().getName())]); + } + if (std::error_code EC = + readFuncMetadata(ProfileHasAttribute, CalleeProfile)) + return EC; + } } } + return sampleprof_error::success; +} + +std::error_code +SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) { + while (Data < End) { + auto FContext(readSampleContextFromTable()); + if (std::error_code EC = FContext.getError()) + return EC; + FunctionSamples *FProfile = nullptr; + auto It = Profiles.find(*FContext); + if (It != Profiles.end()) + FProfile = &It->second; + + if (std::error_code EC = readFuncMetadata(ProfileHasAttribute, FProfile)) + return EC; + } + assert(Data == End && "More data is read than expected"); return sampleprof_error::success; } @@ -1233,6 +1288,8 @@ Flags.append("probe,"); if (hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute)) Flags.append("attr,"); + if (hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsPreInlined)) + Flags.append("preinlined,"); break; default: break; diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp --- a/llvm/lib/ProfileData/SampleProfWriter.cpp +++ b/llvm/lib/ProfileData/SampleProfWriter.cpp @@ -172,7 +172,7 @@ return (std::error_code)sampleprof_error::success; }; - if (FunctionSamples::ProfileIsCS) { + if (FunctionSamples::ProfileIsCSFlat) { // Sort the contexts before writing them out. This is to help fast load all // context profiles for a function as well as their callee contexts which // can help profile-guided importing for ThinLTO. @@ -194,18 +194,46 @@ return sampleprof_error::success; } +std::error_code SampleProfileWriterExtBinaryBase::writeFuncMetadata( + const FunctionSamples &FunctionProfile) { + auto &OS = *OutputStream; + if (std::error_code EC = writeContextIdx(FunctionProfile.getContext())) + return EC; + + if (FunctionSamples::ProfileIsProbeBased) + encodeULEB128(FunctionProfile.getFunctionHash(), OS); + if (FunctionSamples::ProfileIsCSFlat || FunctionSamples::ProfileIsCSNested) { + encodeULEB128(FunctionProfile.getContext().getAllAttributes(), OS); + } + + if (!FunctionSamples::ProfileIsCSFlat) { + // Recursively emit attributes for all callee samples. + uint64_t NumCallsites = 0; + for (const auto &J : FunctionProfile.getCallsiteSamples()) + NumCallsites += J.second.size(); + encodeULEB128(NumCallsites, OS); + for (const auto &J : FunctionProfile.getCallsiteSamples()) { + for (const auto &FS : J.second) { + LineLocation Loc = J.first; + encodeULEB128(Loc.LineOffset, OS); + encodeULEB128(Loc.Discriminator, OS); + if (std::error_code EC = writeFuncMetadata(FS.second)) + return EC; + } + } + } + + return sampleprof_error::success; +} + std::error_code SampleProfileWriterExtBinaryBase::writeFuncMetadata( const SampleProfileMap &Profiles) { - if (!FunctionSamples::ProfileIsProbeBased && !FunctionSamples::ProfileIsCS) + if (!FunctionSamples::ProfileIsProbeBased && + !FunctionSamples::ProfileIsCSFlat && !FunctionSamples::ProfileIsCSNested) return sampleprof_error::success; - auto &OS = *OutputStream; for (const auto &Entry : Profiles) { - if (std::error_code EC = writeContextIdx(Entry.second.getContext())) + if (std::error_code EC = writeFuncMetadata(Entry.second)) return EC; - if (FunctionSamples::ProfileIsProbeBased) - encodeULEB128(Entry.second.getFunctionHash(), OS); - if (FunctionSamples::ProfileIsCS) - encodeULEB128(Entry.second.getContext().getAllAttributes(), OS); } return sampleprof_error::success; } @@ -295,10 +323,13 @@ setToCompressSection(SecProfileSymbolList); if (Type == SecFuncMetadata && FunctionSamples::ProfileIsProbeBased) addSectionFlag(SecFuncMetadata, SecFuncMetadataFlags::SecFlagIsProbeBased); - if (Type == SecProfSummary && FunctionSamples::ProfileIsCS) - addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagFullContext); - if (Type == SecFuncMetadata && FunctionSamples::ProfileIsCS) + if (Type == SecFuncMetadata && FunctionSamples::ProfileIsCSNested) + addSectionFlag(SecFuncMetadata, SecFuncMetadataFlags::SecFlagIsPreInlined); + if (Type == SecFuncMetadata && + (FunctionSamples::ProfileIsCSFlat || FunctionSamples::ProfileIsCSNested)) addSectionFlag(SecFuncMetadata, SecFuncMetadataFlags::SecFlagHasAttribute); + if (Type == SecProfSummary && FunctionSamples::ProfileIsCSFlat) + addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagFullContext); if (Type == SecProfSummary && FunctionSamples::ProfileIsFS) addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagFSDiscriminator); @@ -440,7 +471,7 @@ /// it needs to be parsed by the SampleProfileReaderText class. std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) { auto &OS = *OutputStream; - if (FunctionSamples::ProfileIsCS) + if (FunctionSamples::ProfileIsCSFlat) OS << "[" << S.getContext().toString() << "]:" << S.getTotalSamples(); else OS << S.getName() << ":" << S.getTotalSamples(); @@ -483,15 +514,14 @@ } Indent -= 1; - if (Indent == 0) { - if (FunctionSamples::ProfileIsProbeBased) { - OS.indent(Indent + 1); - OS << "!CFGChecksum: " << S.getFunctionHash() << "\n"; - } - if (FunctionSamples::ProfileIsCS) { - OS.indent(Indent + 1); - OS << "!Attributes: " << S.getContext().getAllAttributes() << "\n"; - } + if (FunctionSamples::ProfileIsProbeBased) { + OS.indent(Indent + 1); + OS << "!CFGChecksum: " << S.getFunctionHash() << "\n"; + } + + if (S.getContext().getAllAttributes()) { + OS.indent(Indent + 1); + OS << "!Attributes: " << S.getContext().getAllAttributes() << "\n"; } return sampleprof_error::success; @@ -841,7 +871,8 @@ std::unique_ptr Writer; // Currently only Text and Extended Binary format are supported for CSSPGO. - if ((FunctionSamples::ProfileIsCS || FunctionSamples::ProfileIsProbeBased) && + if ((FunctionSamples::ProfileIsCSFlat || + FunctionSamples::ProfileIsProbeBased) && (Format == SPF_Binary || Format == SPF_Compact_Binary)) return sampleprof_error::unsupported_writing_format; diff --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp --- a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp +++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp @@ -32,7 +32,7 @@ if (CalleeName.empty()) return getHottestChildContext(CallSite); - uint64_t Hash = nodeHash(CalleeName, CallSite); + uint64_t Hash = FunctionSamples::getCallSiteHash(CalleeName, CallSite); auto It = AllChildContext.find(Hash); if (It != AllChildContext.end()) return &It->second; @@ -65,7 +65,8 @@ ContextTrieNode &ContextTrieNode::moveToChildContext( const LineLocation &CallSite, ContextTrieNode &&NodeToMove, uint32_t ContextFramesToRemove, bool DeleteNode) { - uint64_t Hash = nodeHash(NodeToMove.getFuncName(), CallSite); + uint64_t Hash = + FunctionSamples::getCallSiteHash(NodeToMove.getFuncName(), CallSite); assert(!AllChildContext.count(Hash) && "Node to remove must exist"); LineLocation OldCallSite = NodeToMove.CallSiteLoc; ContextTrieNode &OldParentContext = *NodeToMove.getParentContext(); @@ -108,7 +109,7 @@ void ContextTrieNode::removeChildContext(const LineLocation &CallSite, StringRef CalleeName) { - uint64_t Hash = nodeHash(CalleeName, CallSite); + uint64_t Hash = FunctionSamples::getCallSiteHash(CalleeName, CallSite); // Note this essentially calls dtor and destroys that child context AllChildContext.erase(Hash); } @@ -174,21 +175,9 @@ } } -uint64_t ContextTrieNode::nodeHash(StringRef ChildName, - const LineLocation &Callsite) { - // We still use child's name for child hash, this is - // because for children of root node, we don't have - // different line/discriminator, and we'll rely on name - // to differentiate children. - uint64_t NameHash = std::hash{}(ChildName.str()); - uint64_t LocId = - (((uint64_t)Callsite.LineOffset) << 32) | Callsite.Discriminator; - return NameHash + (LocId << 5) + LocId; -} - ContextTrieNode *ContextTrieNode::getOrCreateChildContext( const LineLocation &CallSite, StringRef CalleeName, bool AllowCreate) { - uint64_t Hash = nodeHash(CalleeName, CallSite); + uint64_t Hash = FunctionSamples::getCallSiteHash(CalleeName, CallSite); auto It = AllChildContext.find(Hash); if (It != AllChildContext.end()) { assert(It->second.getFuncName() == CalleeName && diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -467,6 +467,9 @@ void emitOptimizationRemarksForInlineCandidates( const SmallVectorImpl &Candidates, const Function &F, bool Hot); + void promoteMergeNotInlinedContextSamples( + DenseMap NonInlinedCallSites, + const Function &F); std::vector buildFunctionOrder(Module &M, CallGraph *CG); std::unique_ptr buildProfiledCallGraph(CallGraph &CG); void generateMDProfMetadata(Function &F); @@ -485,7 +488,7 @@ std::unique_ptr ContextTracker; /// Flag indicating whether input profile is context-sensitive - bool ProfileIsCS = false; + bool ProfileIsCSFlat = false; /// Flag indicating which LTO/ThinLTO phase the pass is invoked in. /// @@ -602,7 +605,7 @@ // call instruction should have 0 count. // For CS profile, the callsite count of previously inlined callees is // populated with the entry count of the callees. - if (!ProfileIsCS) + if (!ProfileIsCSFlat) if (const auto *CB = dyn_cast(&Inst)) if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB)) return 0; @@ -641,7 +644,7 @@ // call instruction should have 0 count. // For CS profile, the callsite count of previously inlined callees is // populated with the entry count of the callees. - if (!ProfileIsCS) + if (!ProfileIsCSFlat) if (const auto *CB = dyn_cast(&Inst)) if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB)) return 0; @@ -695,7 +698,7 @@ if (Function *Callee = Inst.getCalledFunction()) CalleeName = Callee->getName(); - if (ProfileIsCS) + if (ProfileIsCSFlat) return ContextTracker->getCalleeContextSamplesFor(Inst, CalleeName); const FunctionSamples *FS = findFunctionSamples(Inst); @@ -727,7 +730,7 @@ FunctionSamples::getGUID(R->getName()); }; - if (ProfileIsCS) { + if (ProfileIsCSFlat) { auto CalleeSamples = ContextTracker->getIndirectCalleeContextSamplesFor(DIL); if (CalleeSamples.empty()) @@ -780,7 +783,7 @@ auto it = DILocation2SampleMap.try_emplace(DIL,nullptr); if (it.second) { - if (ProfileIsCS) + if (ProfileIsCSFlat) it.first->second = ContextTracker->getContextSamplesFor(DIL); else it.first->second = @@ -1039,7 +1042,7 @@ // For AutoFDO profile, retrieve candidate profiles by walking over // the nested inlinee profiles. - if (!ProfileIsCS) { + if (!ProfileIsCSFlat) { Samples->findInlinedFunctions(InlinedGUIDs, SymbolMap, Threshold); return; } @@ -1134,7 +1137,7 @@ assert((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) && "GUIDToFuncNameMap has to be populated"); AllCandidates.push_back(CB); - if (FS->getEntrySamples() > 0 || ProfileIsCS) + if (FS->getEntrySamples() > 0 || ProfileIsCSFlat) LocalNotInlinedCallSites.try_emplace(CB, FS); if (callsiteIsHot(FS, PSI, ProfAccForSymsInList)) Hot = true; @@ -1198,53 +1201,9 @@ } // For CS profile, profile for not inlined context will be merged when - // base profile is being trieved - if (ProfileIsCS) - return Changed; - - // Accumulate not inlined callsite information into notInlinedSamples - for (const auto &Pair : LocalNotInlinedCallSites) { - CallBase *I = Pair.getFirst(); - Function *Callee = I->getCalledFunction(); - if (!Callee || Callee->isDeclaration()) - continue; - - ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "NotInline", - I->getDebugLoc(), I->getParent()) - << "previous inlining not repeated: '" - << ore::NV("Callee", Callee) << "' into '" - << ore::NV("Caller", &F) << "'"); - - ++NumCSNotInlined; - const FunctionSamples *FS = Pair.getSecond(); - if (FS->getTotalSamples() == 0 && FS->getEntrySamples() == 0) { - continue; - } - - if (ProfileMergeInlinee) { - // A function call can be replicated by optimizations like callsite - // splitting or jump threading and the replicates end up sharing the - // sample nested callee profile instead of slicing the original inlinee's - // profile. We want to do merge exactly once by filtering out callee - // profiles with a non-zero head sample count. - if (FS->getHeadSamples() == 0) { - // Use entry samples as head samples during the merge, as inlinees - // don't have head samples. - const_cast(FS)->addHeadSamples( - FS->getEntrySamples()); - - // Note that we have to do the merge right after processing function. - // This allows OutlineFS's profile to be used for annotation during - // top-down processing of functions' annotation. - FunctionSamples *OutlineFS = Reader->getOrCreateSamplesFor(*Callee); - OutlineFS->merge(*FS); - } - } else { - auto pair = - notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0}); - pair.first->second.entryCount += FS->getEntrySamples(); - } - } + // base profile is being retrieved. + if (!FunctionSamples::ProfileIsCSFlat) + promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites, F); return Changed; } @@ -1285,7 +1244,7 @@ InlinedCallSites->push_back(I); } - if (ProfileIsCS) + if (ProfileIsCSFlat) ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples); ++NumCSInlined; @@ -1430,7 +1389,6 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority( Function &F, DenseSet &InlinedGUIDs) { - assert(ProfileIsCS && "Prioritiy based inliner only works with CSSPGO now"); // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure // Profile symbol list is ignored when profile-sample-accurate is on. @@ -1467,6 +1425,8 @@ if (ExternalInlineAdvisor) SizeLimit = std::numeric_limits::max(); + DenseMap LocalNotInlinedCallSites; + // Perform iterative BFS call site prioritized inlining bool Changed = false; while (!CQueue.empty() && F.getInstructionCount() < SizeLimit) { @@ -1521,6 +1481,8 @@ } ICPCount++; Changed = true; + } else if (!ContextTracker) { + LocalNotInlinedCallSites.try_emplace(I, FS); } } } else if (CalledFunction && CalledFunction->getSubprogram() && @@ -1532,6 +1494,8 @@ CQueue.emplace(NewCandidate); } Changed = true; + } else if (!ContextTracker) { + LocalNotInlinedCallSites.try_emplace(I, Candidate.CalleeSamples); } } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) { findExternalInlineCandidate(I, findCalleeFunctionSamples(*I), @@ -1549,9 +1513,63 @@ ++NumCSInlinedHitGrowthLimit; } + // For CS profile, profile for not inlined context will be merged when + // base profile is being retrieved. + if (!FunctionSamples::ProfileIsCSFlat) + promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites, F); return Changed; } +void SampleProfileLoader::promoteMergeNotInlinedContextSamples( + DenseMap NonInlinedCallSites, + const Function &F) { + // Accumulate not inlined callsite information into notInlinedSamples + for (const auto &Pair : NonInlinedCallSites) { + CallBase *I = Pair.getFirst(); + Function *Callee = I->getCalledFunction(); + if (!Callee || Callee->isDeclaration()) + continue; + + ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "NotInline", + I->getDebugLoc(), I->getParent()) + << "previous inlining not repeated: '" + << ore::NV("Callee", Callee) << "' into '" + << ore::NV("Caller", &F) << "'"); + + ++NumCSNotInlined; + const FunctionSamples *FS = Pair.getSecond(); + if (FS->getTotalSamples() == 0 && FS->getEntrySamples() == 0) { + continue; + } + + if (ProfileMergeInlinee) { + // A function call can be replicated by optimizations like callsite + // splitting or jump threading and the replicates end up sharing the + // sample nested callee profile instead of slicing the original + // inlinee's profile. We want to do merge exactly once by filtering out + // callee profiles with a non-zero head sample count. + if (FS->getHeadSamples() == 0) { + // Use entry samples as head samples during the merge, as inlinees + // don't have head samples. + const_cast(FS)->addHeadSamples( + FS->getEntrySamples()); + + // Note that we have to do the merge right after processing function. + // This allows OutlineFS's profile to be used for annotation during + // top-down processing of functions' annotation. + FunctionSamples *OutlineFS = Reader->getOrCreateSamplesFor(*Callee); + OutlineFS->merge(*FS, 1); + // Set outlined profile to be synthetic to not bias the inliner. + OutlineFS->SetContextSynthetic(); + } + } else { + auto pair = + notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0}); + pair.first->second.entryCount += FS->getEntrySamples(); + } + } +} + /// Returns the sorted CallTargetMap \p M by count in descending order. static SmallVector GetSortedValueDataFromCallTargets(const SampleRecord::CallTargetMap &M) { @@ -1607,7 +1625,7 @@ // With CSSPGO all indirect call targets are counted torwards the // original indirect call site in the profile, including both // inlined and non-inlined targets. - if (!FunctionSamples::ProfileIsCS) { + if (!FunctionSamples::ProfileIsCSFlat) { if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt(CallSite)) { for (const auto &NameFS : *M) @@ -1754,7 +1772,7 @@ } DenseSet InlinedGUIDs; - if (ProfileIsCS && CallsitePrioritizedInline) + if (CallsitePrioritizedInline) Changed |= inlineHotFunctionsWithPriority(F, InlinedGUIDs); else Changed |= inlineHotFunctions(F, InlinedGUIDs); @@ -1782,7 +1800,7 @@ std::unique_ptr SampleProfileLoader::buildProfiledCallGraph(CallGraph &CG) { std::unique_ptr ProfiledCG; - if (ProfileIsCS) + if (ProfileIsCSFlat) ProfiledCG = std::make_unique(*ContextTracker); else ProfiledCG = std::make_unique(Reader->getProfiles()); @@ -1828,7 +1846,7 @@ assert(&CG->getModule() == &M); if (UseProfiledCallGraph || - (ProfileIsCS && !UseProfiledCallGraph.getNumOccurrences())) { + (ProfileIsCSFlat && !UseProfiledCallGraph.getNumOccurrences())) { // Use profiled call edges to augment the top-down order. There are cases // that the top-down order computed based on the static call graph doesn't // reflect real execution order. For example @@ -1961,10 +1979,8 @@ } // Apply tweaks if context-sensitive profile is available. - if (Reader->profileIsCS()) { - ProfileIsCS = true; - FunctionSamples::ProfileIsCS = true; - + if (Reader->profileIsCSFlat() || Reader->profileIsCSNested()) { + ProfileIsCSFlat = Reader->profileIsCSFlat(); // Enable priority-base inliner and size inline by default for CSSPGO. if (!ProfileSizeInline.getNumOccurrences()) ProfileSizeInline = true; @@ -1986,9 +2002,11 @@ if (!SampleProfileUseProfi.getNumOccurrences()) SampleProfileUseProfi = true; - // Tracker for profiles under different context - ContextTracker = std::make_unique( - Reader->getProfiles(), &GUIDToFuncNameMap); + if (FunctionSamples::ProfileIsCSFlat) { + // Tracker for profiles under different context + ContextTracker = std::make_unique( + Reader->getProfiles(), &GUIDToFuncNameMap); + } } // Load pseudo probe descriptors for probe-based function samples. @@ -2065,7 +2083,7 @@ } // Account for cold calls not inlined.... - if (!ProfileIsCS) + if (!ProfileIsCSFlat) for (const std::pair &pair : notInlinedCallInfo) updateProfileCallee(pair.first, pair.second.entryCount); @@ -2141,7 +2159,7 @@ ORE = OwnedORE.get(); } - if (ProfileIsCS) + if (ProfileIsCSFlat) Samples = ContextTracker->getBaseSamplesFor(F); else Samples = Reader->getSamplesFor(F); diff --git a/llvm/test/Transforms/SampleProfile/Inputs/inline-priority.prof b/llvm/test/Transforms/SampleProfile/Inputs/inline-priority.prof new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/inline-priority.prof @@ -0,0 +1,7 @@ +main:225715:0 + 2.1: 5553 + 3: 5391 + 3.1: _Z3sumii:5860 + 0: 5300 + 1: 5279 + 2: 5279 diff --git a/llvm/test/Transforms/SampleProfile/csspgo-inline.ll b/llvm/test/Transforms/SampleProfile/csspgo-inline.ll --- a/llvm/test/Transforms/SampleProfile/csspgo-inline.ll +++ b/llvm/test/Transforms/SampleProfile/csspgo-inline.ll @@ -10,8 +10,12 @@ ; RUN: llvm-profdata merge --sample --extbinary --use-md5 %S/Inputs/profile-context-tracker.prof -o %t.md5 ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.md5 -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE +; RUN: llvm-profdata merge --sample --text --gen-nested-cs-profile %S/Inputs/profile-context-tracker.prof -o %t.prof +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.prof -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE + ; With new FDO early inliner, callee entry count is used to drive inlining instead of callee total samples, so we get less inlining for given profile ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-NEW +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.prof -sample-profile-prioritized-inline -sample-profile-inline-size -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-NEW ; ; With new FDO early inliner, callee entry count is used to drive inlining instead of callee total samples, tuning hot cutoff can get us the same inlining ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-summary-cutoff-hot=999900 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE diff --git a/llvm/test/Transforms/SampleProfile/csspgo-use-preinliner.ll b/llvm/test/Transforms/SampleProfile/csspgo-use-preinliner.ll --- a/llvm/test/Transforms/SampleProfile/csspgo-use-preinliner.ll +++ b/llvm/test/Transforms/SampleProfile/csspgo-use-preinliner.ll @@ -3,6 +3,9 @@ ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/csspgo-use-preinliner.prof -pass-remarks=inline -sample-profile-prioritized-inline -profile-sample-accurate -sample-profile-use-preinliner=0 -S 2>&1 | FileCheck %s --check-prefix=DEFAULT ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/csspgo-use-preinliner.prof -pass-remarks=inline -sample-profile-prioritized-inline -profile-sample-accurate -sample-profile-use-preinliner=1 -S 2>&1 | FileCheck %s --check-prefix=PREINLINE +; RUN: llvm-profdata merge --sample --text --gen-nested-cs-profile -generate-merged-base-profiles=0 %S/Inputs/csspgo-use-preinliner.prof -o %t.prof +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.prof -pass-remarks=inline -sample-profile-prioritized-inline -profile-sample-accurate -sample-profile-use-preinliner=0 -S 2>&1 | FileCheck %s --check-prefix=DEFAULT +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.prof -pass-remarks=inline -sample-profile-prioritized-inline -profile-sample-accurate -sample-profile-use-preinliner=1 -S 2>&1 | FileCheck %s --check-prefix=PREINLINE ; DEFAULT: '_Z5funcAi' inlined into 'main' ; DEFAULT-NOT: inlined into diff --git a/llvm/test/Transforms/SampleProfile/inline-mergeprof.ll b/llvm/test/Transforms/SampleProfile/inline-mergeprof.ll --- a/llvm/test/Transforms/SampleProfile/inline-mergeprof.ll +++ b/llvm/test/Transforms/SampleProfile/inline-mergeprof.ll @@ -5,6 +5,7 @@ ; Test we properly merge not inlined profile with '-sample-profile-merge-inlinee' ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-mergeprof.prof -sample-profile-merge-inlinee=true -use-profiled-call-graph=0 -S | FileCheck -check-prefix=MERGE %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-mergeprof.prof -sample-profile-merge-inlinee=true -use-profiled-call-graph=0 -sample-profile-prioritized-inline=1 -S | FileCheck -check-prefix=MERGE %s ; Test we properly merge not inlined profile with '-sample-profile-merge-inlinee' ; when the profile uses md5. diff --git a/llvm/test/Transforms/SampleProfile/inline.ll b/llvm/test/Transforms/SampleProfile/inline.ll --- a/llvm/test/Transforms/SampleProfile/inline.ll +++ b/llvm/test/Transforms/SampleProfile/inline.ll @@ -1,5 +1,6 @@ ; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -S | FileCheck %s ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline.prof -S | FileCheck %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-priority.prof -sample-profile-prioritized-inline=1 -S | FileCheck %s ; Original C++ test case ; diff --git a/llvm/test/tools/llvm-profdata/Inputs/cs-sample.proftext b/llvm/test/tools/llvm-profdata/Inputs/cs-sample-preinline-probe.proftext copy from llvm/test/tools/llvm-profdata/Inputs/cs-sample.proftext copy to llvm/test/tools/llvm-profdata/Inputs/cs-sample-preinline-probe.proftext --- a/llvm/test/tools/llvm-profdata/Inputs/cs-sample.proftext +++ b/llvm/test/tools/llvm-profdata/Inputs/cs-sample-preinline-probe.proftext @@ -4,7 +4,8 @@ 3: 287884 4: 287864 _Z3fibi:315608 15: 23 - !Attributes: 0 + !CFGChecksum: 281479271677951 + !Attributes: 2 [main:3.1 @ _Z5funcBi:1 @ _Z8funcLeafi]:500853:20 0: 15 1: 15 @@ -13,32 +14,35 @@ 10: 23324 11: 23327 _Z3fibi:25228 15: 11 - !Attributes: 1 + !CFGChecksum: 281479271677951 + !Attributes: 2 [external:12 @ main]:154:12 2: 12 3: 10 _Z5funcAi:7 3.1: 10 _Z5funcBi:11 - !Attributes: 0 + !CFGChecksum: 563125815542069 [main]:154:0 2: 12 3: 18 _Z5funcAi:11 3.1: 18 _Z5funcBi:19 - !Attributes: 0 + !CFGChecksum: 563125815542069 [external:10 @ _Z5funcBi]:120:10 0: 10 1: 10 - !Attributes: 0 + !CFGChecksum: 563022570642068 [externalA:17 @ _Z5funcBi]:120:3 0: 3 1: 3 - !Attributes: 0 + !CFGChecksum: 563022570642068 [main:3.1 @ _Z5funcBi]:120:19 0: 19 1: 19 _Z8funcLeafi:20 3: 12 - !Attributes: 1 + !CFGChecksum: 563022570642068 + !Attributes: 2 [main:3 @ _Z5funcAi]:99:11 0: 10 1: 10 _Z8funcLeafi:11 3: 24 - !Attributes: 0 + !CFGChecksum: 844530426352218 + !Attributes: 2 diff --git a/llvm/test/tools/llvm-profdata/Inputs/cs-sample.proftext b/llvm/test/tools/llvm-profdata/Inputs/cs-sample-preinline.proftext copy from llvm/test/tools/llvm-profdata/Inputs/cs-sample.proftext copy to llvm/test/tools/llvm-profdata/Inputs/cs-sample-preinline.proftext --- a/llvm/test/tools/llvm-profdata/Inputs/cs-sample.proftext +++ b/llvm/test/tools/llvm-profdata/Inputs/cs-sample-preinline.proftext @@ -4,7 +4,7 @@ 3: 287884 4: 287864 _Z3fibi:315608 15: 23 - !Attributes: 0 + !Attributes: 2 [main:3.1 @ _Z5funcBi:1 @ _Z8funcLeafi]:500853:20 0: 15 1: 15 @@ -13,32 +13,28 @@ 10: 23324 11: 23327 _Z3fibi:25228 15: 11 - !Attributes: 1 + !Attributes: 2 [external:12 @ main]:154:12 2: 12 3: 10 _Z5funcAi:7 3.1: 10 _Z5funcBi:11 - !Attributes: 0 [main]:154:0 2: 12 3: 18 _Z5funcAi:11 3.1: 18 _Z5funcBi:19 - !Attributes: 0 [external:10 @ _Z5funcBi]:120:10 0: 10 1: 10 - !Attributes: 0 [externalA:17 @ _Z5funcBi]:120:3 0: 3 1: 3 - !Attributes: 0 [main:3.1 @ _Z5funcBi]:120:19 0: 19 1: 19 _Z8funcLeafi:20 3: 12 - !Attributes: 1 + !Attributes: 2 [main:3 @ _Z5funcAi]:99:11 0: 10 1: 10 _Z8funcLeafi:11 3: 24 - !Attributes: 0 + !Attributes: 2 diff --git a/llvm/test/tools/llvm-profdata/Inputs/cs-sample.proftext b/llvm/test/tools/llvm-profdata/Inputs/cs-sample.proftext --- a/llvm/test/tools/llvm-profdata/Inputs/cs-sample.proftext +++ b/llvm/test/tools/llvm-profdata/Inputs/cs-sample.proftext @@ -4,7 +4,6 @@ 3: 287884 4: 287864 _Z3fibi:315608 15: 23 - !Attributes: 0 [main:3.1 @ _Z5funcBi:1 @ _Z8funcLeafi]:500853:20 0: 15 1: 15 @@ -18,20 +17,16 @@ 2: 12 3: 10 _Z5funcAi:7 3.1: 10 _Z5funcBi:11 - !Attributes: 0 [main]:154:0 2: 12 3: 18 _Z5funcAi:11 3.1: 18 _Z5funcBi:19 - !Attributes: 0 [external:10 @ _Z5funcBi]:120:10 0: 10 1: 10 - !Attributes: 0 [externalA:17 @ _Z5funcBi]:120:3 0: 3 1: 3 - !Attributes: 0 [main:3.1 @ _Z5funcBi]:120:19 0: 19 1: 19 _Z8funcLeafi:20 @@ -41,4 +36,3 @@ 0: 10 1: 10 _Z8funcLeafi:11 3: 24 - !Attributes: 0 diff --git a/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test b/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test @@ -0,0 +1,157 @@ +RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/cs-sample-preinline.proftext --gen-nested-cs-profile=1 -generate-merged-base-profiles=0 +RUN: FileCheck %s < %t.proftext --match-full-lines --strict-whitespace +RUN: llvm-profdata merge --sample --text -output=%t.probe.proftext %S/Inputs/cs-sample-preinline-probe.proftext --gen-nested-cs-profile=1 -generate-merged-base-profiles=0 +RUN: FileCheck %s < %t.probe.proftext --match-full-lines --strict-whitespace -check-prefix=PROBE +RUN: llvm-profdata merge --sample --extbinary -output=%t.profbin %S/Inputs/cs-sample-preinline.proftext --gen-nested-cs-profile=1 -generate-merged-base-profiles=0 +RUN: llvm-profdata merge --sample --text -output=%t2.proftext %t.profbin +RUN: FileCheck %s < %t2.proftext --match-full-lines --strict-whitespace +RUN: llvm-profdata show --sample -show-sec-info-only %t.profbin | FileCheck %s -check-prefix=PREINLINE +RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/cs-sample-preinline.proftext --gen-nested-cs-profile=1 -generate-merged-base-profiles=1 +RUN: FileCheck %s < %t.proftext --match-full-lines --strict-whitespace -check-prefix=RECOUNT + + +; CHECK:main:1968679:12 +; CHECK-NEXT: 2: 24 +; CHECK-NEXT: 3: 28 _Z5funcAi:18 +; CHECK-NEXT: 3.1: 28 _Z5funcBi:30 +; CHECK-NEXT: 3: _Z5funcAi:1467398 +; CHECK-NEXT: 0: 10 +; CHECK-NEXT: 1: 10 _Z8funcLeafi:11 +; CHECK-NEXT: 3: 24 +; CHECK-NEXT: 1: _Z8funcLeafi:1467299 +; CHECK-NEXT: 0: 6 +; CHECK-NEXT: 1: 6 +; CHECK-NEXT: 3: 287884 +; CHECK-NEXT: 4: 287864 _Z3fibi:315608 +; CHECK-NEXT: 15: 23 +; CHECK-NEXT: !Attributes: 2 +; CHECK-NEXT: !Attributes: 2 +; CHECK-NEXT: 3.1: _Z5funcBi:500973 +; CHECK-NEXT: 0: 19 +; CHECK-NEXT: 1: 19 _Z8funcLeafi:20 +; CHECK-NEXT: 3: 12 +; CHECK-NEXT: 1: _Z8funcLeafi:500853 +; CHECK-NEXT: 0: 15 +; CHECK-NEXT: 1: 15 +; CHECK-NEXT: 3: 74946 +; CHECK-NEXT: 4: 74941 _Z3fibi:82359 +; CHECK-NEXT: 10: 23324 +; CHECK-NEXT: 11: 23327 _Z3fibi:25228 +; CHECK-NEXT: 15: 11 +; CHECK-NEXT: !Attributes: 2 +; CHECK-NEXT: !Attributes: 2 +; CHECK-NEXT:_Z5funcBi:240:13 +; CHECK-NEXT: 0: 13 +; CHECK-NEXT: 1: 13 + + + +; RECOUNT:main:1968679:12 +; RECOUNT-NEXT: 2: 24 +; RECOUNT-NEXT: 3: 28 _Z5funcAi:18 +; RECOUNT-NEXT: 3.1: 28 _Z5funcBi:30 +; RECOUNT-NEXT: 3: _Z5funcAi:1467398 +; RECOUNT-NEXT: 0: 10 +; RECOUNT-NEXT: 1: 10 _Z8funcLeafi:11 +; RECOUNT-NEXT: 3: 24 +; RECOUNT-NEXT: 1: _Z8funcLeafi:1467299 +; RECOUNT-NEXT: 0: 6 +; RECOUNT-NEXT: 1: 6 +; RECOUNT-NEXT: 3: 287884 +; RECOUNT-NEXT: 4: 287864 _Z3fibi:315608 +; RECOUNT-NEXT: 15: 23 +; RECOUNT-NEXT: !Attributes: 2 +; RECOUNT-NEXT: !Attributes: 2 +; RECOUNT-NEXT: 3.1: _Z5funcBi:500973 +; RECOUNT-NEXT: 0: 19 +; RECOUNT-NEXT: 1: 19 _Z8funcLeafi:20 +; RECOUNT-NEXT: 3: 12 +; RECOUNT-NEXT: 1: _Z8funcLeafi:500853 +; RECOUNT-NEXT: 0: 15 +; RECOUNT-NEXT: 1: 15 +; RECOUNT-NEXT: 3: 74946 +; RECOUNT-NEXT: 4: 74941 _Z3fibi:82359 +; RECOUNT-NEXT: 10: 23324 +; RECOUNT-NEXT: 11: 23327 _Z3fibi:25228 +; RECOUNT-NEXT: 15: 11 +; RECOUNT-NEXT: !Attributes: 2 +; RECOUNT-NEXT: !Attributes: 2 +; RECOUNT-NEXT:_Z8funcLeafi:1968152:31 +; RECOUNT-NEXT: 0: 21 +; RECOUNT-NEXT: 1: 21 +; RECOUNT-NEXT: 3: 362830 +; RECOUNT-NEXT: 4: 362805 _Z3fibi:397967 +; RECOUNT-NEXT: 10: 23324 +; RECOUNT-NEXT: 11: 23327 _Z3fibi:25228 +; RECOUNT-NEXT: 15: 34 +; RECOUNT-NEXT: !Attributes: 2 +; RECOUNT-NEXT:_Z5funcAi:1467398:11 +; RECOUNT-NEXT: 0: 10 +; RECOUNT-NEXT: 1: 10 _Z8funcLeafi:11 +; RECOUNT-NEXT: 3: 24 +; RECOUNT-NEXT: 1: _Z8funcLeafi:1467299 +; RECOUNT-NEXT: 0: 6 +; RECOUNT-NEXT: 1: 6 +; RECOUNT-NEXT: 3: 287884 +; RECOUNT-NEXT: 4: 287864 _Z3fibi:315608 +; RECOUNT-NEXT: 15: 23 +; RECOUNT-NEXT: !Attributes: 2 +; RECOUNT-NEXT: !Attributes: 2 +; RECOUNT-NEXT:_Z5funcBi:501213:32 +; RECOUNT-NEXT: 0: 32 +; RECOUNT-NEXT: 1: 32 _Z8funcLeafi:20 +; RECOUNT-NEXT: 3: 12 +; RECOUNT-NEXT: 1: _Z8funcLeafi:500853 +; RECOUNT-NEXT: 0: 15 +; RECOUNT-NEXT: 1: 15 +; RECOUNT-NEXT: 3: 74946 +; RECOUNT-NEXT: 4: 74941 _Z3fibi:82359 +; RECOUNT-NEXT: 10: 23324 +; RECOUNT-NEXT: 11: 23327 _Z3fibi:25228 +; RECOUNT-NEXT: 15: 11 +; RECOUNT-NEXT: !Attributes: 2 + +; PROBE:main:1968679:12 +; PROBE-NEXT: 2: 24 +; PROBE-NEXT: 3: 28 _Z5funcAi:18 +; PROBE-NEXT: 3.1: 28 _Z5funcBi:30 +; PROBE-NEXT: 3: _Z5funcAi:1467398 +; PROBE-NEXT: 0: 10 +; PROBE-NEXT: 1: 10 _Z8funcLeafi:11 +; PROBE-NEXT: 3: 24 +; PROBE-NEXT: 1: _Z8funcLeafi:1467299 +; PROBE-NEXT: 0: 6 +; PROBE-NEXT: 1: 6 +; PROBE-NEXT: 3: 287884 +; PROBE-NEXT: 4: 287864 _Z3fibi:315608 +; PROBE-NEXT: 15: 23 +; PROBE-NEXT: !CFGChecksum: 281479271677951 +; PROBE-NEXT: !Attributes: 2 +; PROBE-NEXT: !CFGChecksum: 844530426352218 +; PROBE-NEXT: !Attributes: 2 +; PROBE-NEXT: 3.1: _Z5funcBi:500973 +; PROBE-NEXT: 0: 19 +; PROBE-NEXT: 1: 19 _Z8funcLeafi:20 +; PROBE-NEXT: 3: 12 +; PROBE-NEXT: 1: _Z8funcLeafi:500853 +; PROBE-NEXT: 0: 15 +; PROBE-NEXT: 1: 15 +; PROBE-NEXT: 3: 74946 +; PROBE-NEXT: 4: 74941 _Z3fibi:82359 +; PROBE-NEXT: 10: 23324 +; PROBE-NEXT: 11: 23327 _Z3fibi:25228 +; PROBE-NEXT: 15: 11 +; PROBE-NEXT: !CFGChecksum: 281479271677951 +; PROBE-NEXT: !Attributes: 2 +; PROBE-NEXT: !CFGChecksum: 563022570642068 +; PROBE-NEXT: !Attributes: 2 +; PROBE-NEXT: !CFGChecksum: 563125815542069 +; PROBE-NEXT:_Z5funcBi:240:13 +; PROBE-NEXT: 0: 13 +; PROBE-NEXT: 1: 13 +; PROBE-NEXT: !CFGChecksum: 563022570642068 + + +; PREINLINE: FunctionMetadata {{.*}} Flags: {attr,preinlined} + + diff --git a/llvm/test/tools/llvm-profdata/cs-sample-trimmer.test b/llvm/test/tools/llvm-profdata/cs-sample-trimmer.test --- a/llvm/test/tools/llvm-profdata/cs-sample-trimmer.test +++ b/llvm/test/tools/llvm-profdata/cs-sample-trimmer.test @@ -14,7 +14,6 @@ CHECK-TRIM-NEXT: 3: 287884 CHECK-TRIM-NEXT: 4: 287864 _Z3fibi:315608 CHECK-TRIM-NEXT: 15: 23 -CHECK-TRIM-NEXT: !Attributes: 0 CHECK-TRIM-NEXT: [main:3.1 @ _Z5funcBi:1 @ _Z8funcLeafi]:500853:20 CHECK-TRIM-NEXT: 0: 15 CHECK-TRIM-NEXT: 1: 15 @@ -29,14 +28,11 @@ CHECK-MERGE-NEXT: 0: 32 CHECK-MERGE-NEXT: 1: 32 _Z8funcLeafi:20 CHECK-MERGE-NEXT: 3: 12 -CHECK-MERGE-NEXT: !Attributes: 0 CHECK-MERGE-NEXT:[main]:308:12 CHECK-MERGE-NEXT: 2: 24 CHECK-MERGE-NEXT: 3: 28 _Z5funcAi:18 CHECK-MERGE-NEXT: 3.1: 28 _Z5funcBi:30 -CHECK-MERGE-NEXT: !Attributes: 0 CHECK-MERGE-NEXT:[_Z5funcAi]:99:11 CHECK-MERGE-NEXT: 0: 10 CHECK-MERGE-NEXT: 1: 10 _Z8funcLeafi:11 CHECK-MERGE-NEXT: 3: 24 -CHECK-MERGE-NEXT: !Attributes: 0 diff --git a/llvm/test/tools/llvm-profgen/cs-preinline.test b/llvm/test/tools/llvm-profgen/cs-preinline.test --- a/llvm/test/tools/llvm-profgen/cs-preinline.test +++ b/llvm/test/tools/llvm-profgen/cs-preinline.test @@ -15,6 +15,10 @@ ; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-TRIM +; Test llvm-profgen with preinliner on will merge not inlinable profile into base profile. +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --csspgo-preinliner=1 --gen-nested-cs-profile=1 +; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-PREINL-NEST + ; CHECK-DEFAULT: [main:1 @ foo]:44:0 ; CHECK-DEFAULT-NEXT: 2.1: 14 ; CHECK-DEFAULT-NEXT: 3: 15 @@ -49,3 +53,11 @@ ; CHECK-TRIM:[foo:3.1 @ bar]:14:0 ; CHECK-TRIM-NEXT: 1: 14 ; CHECK-TRIM-NEXT: !Attributes: 3 + +; CHECK-PREINL-NEST: foo:58:0 +; CHECK-PREINL-NEST-NEXT: 2.1: 14 +; CHECK-PREINL-NEST-NEXT: 3: 15 +; CHECK-PREINL-NEST-NEXT: 3.1: 14 bar:14 +; CHECK-PREINL-NEST-NEXT: 3.2: 1 +; CHECK-PREINL-NEST-NEXT: 3.1: bar:14 +; CHECK-PREINL-NEST-NEXT: 1: 14 diff --git a/llvm/test/tools/llvm-profgen/merge-cold-profile.test b/llvm/test/tools/llvm-profgen/merge-cold-profile.test --- a/llvm/test/tools/llvm-profgen/merge-cold-profile.test +++ b/llvm/test/tools/llvm-profgen/merge-cold-profile.test @@ -23,7 +23,6 @@ ; CHECK-NEXT: 7: 2 fb:2 ; CHECK-NEXT: 8: 1 fa:1 ; CHECK-NEXT: !CFGChecksum: 563070469352221 -; CHECK-NEXT: !Attributes: 0 ; CHECK-NEXT:[main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb]:13:4 ; CHECK-NEXT: 1: 4 ; CHECK-NEXT: 2: 3 @@ -41,7 +40,6 @@ ; CHECK-KEEP-COLD-NEXT: 5: 4 fb:4 ; CHECK-KEEP-COLD-NEXT: 6: 3 fa:3 ; CHECK-KEEP-COLD-NEXT: !CFGChecksum: 563022570642068 -; CHECK-KEEP-COLD-NEXT: !Attributes: 0 ; CHECK-KEEP-COLD-NEXT:[fa]:14:4 ; CHECK-KEEP-COLD-NEXT: 1: 4 ; CHECK-KEEP-COLD-NEXT: 3: 4 @@ -71,7 +69,6 @@ ; CHECK-COLD-CONTEXT-LENGTH-NEXT: 5: 4 fb:4 ; CHECK-COLD-CONTEXT-LENGTH-NEXT: 6: 1 fa:1 ; CHECK-COLD-CONTEXT-LENGTH-NEXT: !CFGChecksum: 563022570642068 -; CHECK-COLD-CONTEXT-LENGTH-NEXT: !Attributes: 0 ; CHECK-COLD-CONTEXT-LENGTH-NEXT:[fb:6 @ fa]:10:3 ; CHECK-COLD-CONTEXT-LENGTH-NEXT: 1: 3 ; CHECK-COLD-CONTEXT-LENGTH-NEXT: 3: 3 @@ -81,7 +78,6 @@ ; CHECK-COLD-CONTEXT-LENGTH-NEXT: 7: 1 fb:1 ; CHECK-COLD-CONTEXT-LENGTH-NEXT: 8: 1 fa:1 ; CHECK-COLD-CONTEXT-LENGTH-NEXT: !CFGChecksum: 563070469352221 -; CHECK-COLD-CONTEXT-LENGTH-NEXT: !Attributes: 0 ; CHECK-COLD-CONTEXT-LENGTH-NEXT:[fa:7 @ fb]:6:2 ; CHECK-COLD-CONTEXT-LENGTH-NEXT: 1: 2 ; CHECK-COLD-CONTEXT-LENGTH-NEXT: 2: 0 @@ -90,7 +86,6 @@ ; CHECK-COLD-CONTEXT-LENGTH-NEXT: 5: 0 ; CHECK-COLD-CONTEXT-LENGTH-NEXT: 6: 2 fa:2 ; CHECK-COLD-CONTEXT-LENGTH-NEXT: !CFGChecksum: 563022570642068 -; CHECK-COLD-CONTEXT-LENGTH-NEXT: !Attributes: 0 ; CHECK-COLD-CONTEXT-LENGTH-NEXT:[fa:8 @ fa]:4:1 ; CHECK-COLD-CONTEXT-LENGTH-NEXT: 1: 1 ; CHECK-COLD-CONTEXT-LENGTH-NEXT: 3: 1 @@ -100,7 +95,6 @@ ; CHECK-COLD-CONTEXT-LENGTH-NEXT: 7: 1 fb:1 ; CHECK-COLD-CONTEXT-LENGTH-NEXT: 8: 0 ; CHECK-COLD-CONTEXT-LENGTH-NEXT: !CFGChecksum: 563070469352221 -; CHECK-COLD-CONTEXT-LENGTH-NEXT: !Attributes: 0 ; clang -O3 -fexperimental-new-pass-manager -fuse-ld=lld -fpseudo-probe-for-profiling ; -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Xclang -mdisable-tail-calls diff --git a/llvm/test/tools/llvm-profgen/truncated-pseudoprobe.test b/llvm/test/tools/llvm-profgen/truncated-pseudoprobe.test --- a/llvm/test/tools/llvm-profgen/truncated-pseudoprobe.test +++ b/llvm/test/tools/llvm-profgen/truncated-pseudoprobe.test @@ -12,7 +12,6 @@ ; CHECK-NEXT: 8: 15 bar:15 ; CHECK-NEXT: 9: 0 ; CHECK-NEXT: !CFGChecksum: 563088904013236 -; CHECK-NEXT: !Attributes: 0 ; CHECK: [foo:8 @ bar]:30:15 ; CHECK-NEXT: 1: 15 ; CHECK-NEXT: 4: 15 diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -687,7 +687,7 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper, StringRef OutputFilename, ProfileFormat OutputFormat, StringRef ProfileSymbolListFile, bool CompressAllSections, - bool UseMD5, bool GenPartialProfile, + bool UseMD5, bool GenPartialProfile, bool GenNestedCSProfile, bool SampleMergeColdContext, bool SampleTrimColdContext, bool SampleColdContextFrameDepth, FailureMode FailMode) { using namespace sampleprof; @@ -696,7 +696,7 @@ LLVMContext Context; sampleprof::ProfileSymbolList WriterList; Optional ProfileIsProbeBased; - Optional ProfileIsCS; + Optional ProfileIsCSFlat; for (const auto &Input : Inputs) { auto ReaderOrErr = SampleProfileReader::create(Input.Filename, Context, FSDiscriminatorPassOption); @@ -723,9 +723,10 @@ exitWithError( "cannot merge probe-based profile with non-probe-based profile"); ProfileIsProbeBased = FunctionSamples::ProfileIsProbeBased; - if (ProfileIsCS.hasValue() && ProfileIsCS != FunctionSamples::ProfileIsCS) + if (ProfileIsCSFlat.hasValue() && + ProfileIsCSFlat != FunctionSamples::ProfileIsCSFlat) exitWithError("cannot merge CS profile with non-CS profile"); - ProfileIsCS = FunctionSamples::ProfileIsCS; + ProfileIsCSFlat = FunctionSamples::ProfileIsCSFlat; for (SampleProfileMap::iterator I = Profiles.begin(), E = Profiles.end(); I != E; ++I) { sampleprof_error Result = sampleprof_error::success; @@ -748,7 +749,7 @@ WriterList.merge(*ReaderList); } - if (ProfileIsCS && (SampleMergeColdContext || SampleTrimColdContext)) { + if (ProfileIsCSFlat && (SampleMergeColdContext || SampleTrimColdContext)) { // Use threshold calculated from profile summary unless specified. SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); auto Summary = Builder.computeSummaryForProfiles(ProfileMap); @@ -763,6 +764,12 @@ SampleMergeColdContext, SampleColdContextFrameDepth, false); } + if (ProfileIsCSFlat && GenNestedCSProfile) { + CSProfileConverter CSConverter(ProfileMap); + CSConverter.convertProfiles(); + ProfileIsCSFlat = FunctionSamples::ProfileIsCSFlat = false; + } + auto WriterOrErr = SampleProfileWriter::create(OutputFilename, FormatMap[OutputFormat]); if (std::error_code EC = WriterOrErr.getError()) @@ -941,7 +948,10 @@ cl::opt InstrProfColdThreshold( "instr-prof-cold-threshold", cl::init(0), cl::Hidden, cl::desc("User specified cold threshold for instr profile which will " - "override the cold threshold got from profile summary.")); + "override the cold threshold got from profile summary. ")); + cl::opt GenNestedCSProfile( + "gen-nested-cs-profile", cl::Hidden, cl::init(false), + cl::desc("Generate nested function profiles for CSSPGO")); cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n"); @@ -987,10 +997,9 @@ else mergeSampleProfile(WeightedInputs, Remapper.get(), OutputFilename, OutputFormat, ProfileSymbolListFile, CompressAllSections, - UseMD5, GenPartialProfile, SampleMergeColdContext, - SampleTrimColdContext, SampleColdContextFrameDepth, - FailureMode); - + UseMD5, GenPartialProfile, GenNestedCSProfile, + SampleMergeColdContext, SampleTrimColdContext, + SampleColdContextFrameDepth, FailureMode); return 0; } @@ -1911,7 +1920,7 @@ if (BaseReader->profileIsProbeBased() != TestReader->profileIsProbeBased()) exitWithError( "cannot compare probe-based profile with non-probe-based profile"); - if (BaseReader->profileIsCS() != TestReader->profileIsCS()) + if (BaseReader->profileIsCSFlat() != TestReader->profileIsCSFlat()) exitWithError("cannot compare CS profile with non-CS profile"); // Load BaseHotThreshold and TestHotThreshold as 99-percentile threshold in diff --git a/llvm/tools/llvm-profgen/PerfReader.h b/llvm/tools/llvm-profgen/PerfReader.h --- a/llvm/tools/llvm-profgen/PerfReader.h +++ b/llvm/tools/llvm-profgen/PerfReader.h @@ -538,14 +538,14 @@ const ContextSampleCounterMap &getSampleCounters() const { return SampleCounters; } - bool profileIsCS() { return ProfileIsCS; } + bool profileIsCSFlat() { return ProfileIsCSFlat; } protected: ProfiledBinary *Binary = nullptr; StringRef PerfTraceFile; ContextSampleCounterMap SampleCounters; - bool ProfileIsCS = false; + bool ProfileIsCSFlat = false; }; // Read perf script to parse the events and samples. diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp --- a/llvm/tools/llvm-profgen/PerfReader.cpp +++ b/llvm/tools/llvm-profgen/PerfReader.cpp @@ -728,7 +728,7 @@ for (auto &CI : OrderedCounters) { uint32_t Indent = 0; - if (ProfileIsCS) { + if (ProfileIsCSFlat) { // Context string key OS << "[" << CI.first << "]\n"; Indent = 2; @@ -815,7 +815,7 @@ StringRef Line = TraceIt.getCurrentLine(); // Read context stack for CS profile. if (Line.startswith("[")) { - ProfileIsCS = true; + ProfileIsCSFlat = true; auto I = ContextStrSet.insert(Line.str()); SampleContext::createCtxVectorFromStr(*I.first, Key->Context); TraceIt.advance(); @@ -1026,8 +1026,8 @@ } void HybridPerfReader::generateUnsymbolizedProfile() { - ProfileIsCS = !IgnoreStackSamples; - if (ProfileIsCS) + ProfileIsCSFlat = !IgnoreStackSamples; + if (ProfileIsCSFlat) unwindSamples(); else PerfScriptReader::generateUnsymbolizedProfile(); diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h --- a/llvm/tools/llvm-profgen/ProfileGenerator.h +++ b/llvm/tools/llvm-profgen/ProfileGenerator.h @@ -34,7 +34,7 @@ virtual ~ProfileGeneratorBase() = default; static std::unique_ptr create(ProfiledBinary *Binary, const ContextSampleCounterMap &SampleCounters, - bool ProfileIsCS); + bool ProfileIsCSFlat); virtual void generateProfile() = 0; void write(); diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -83,6 +83,10 @@ extern cl::opt ProfileSummaryCutoffHot; +static cl::opt GenNestedCSProfile( + "gen-nested-cs-profile", cl::Hidden, cl::init(false), + cl::desc("Generate nested function profiles for CSSPGO")); + using namespace llvm; using namespace sampleprof; @@ -99,9 +103,9 @@ std::unique_ptr ProfileGeneratorBase::create(ProfiledBinary *Binary, const ContextSampleCounterMap &SampleCounters, - bool ProfileIsCS) { + bool ProfileIsCSFlat) { std::unique_ptr Generator; - if (ProfileIsCS) { + if (ProfileIsCSFlat) { if (Binary->useFSDiscriminator()) exitWithError("FS discriminator is not supported in CS profile."); Generator.reset(new CSProfileGenerator(Binary, SampleCounters)); @@ -532,7 +536,7 @@ } void CSProfileGenerator::generateProfile() { - FunctionSamples::ProfileIsCS = true; + FunctionSamples::ProfileIsCSFlat = true; if (Binary->getTrackFuncContextSize()) computeSizeForProfiledFunctions(); @@ -746,6 +750,12 @@ } calculateAndShowDensity(ContextLessProfiles); + if (GenNestedCSProfile) { + CSProfileConverter CSConverter(ProfileMap); + CSConverter.convertProfiles(); + FunctionSamples::ProfileIsCSFlat = false; + FunctionSamples::ProfileIsCSNested = EnableCSPreInliner; + } } void ProfileGeneratorBase::computeSummaryAndThreshold() { diff --git a/llvm/tools/llvm-profgen/llvm-profgen.cpp b/llvm/tools/llvm-profgen/llvm-profgen.cpp --- a/llvm/tools/llvm-profgen/llvm-profgen.cpp +++ b/llvm/tools/llvm-profgen/llvm-profgen.cpp @@ -150,7 +150,7 @@ std::unique_ptr Generator = ProfileGeneratorBase::create(Binary.get(), Reader->getSampleCounters(), - Reader->profileIsCS()); + Reader->profileIsCSFlat()); Generator->generateProfile(); Generator->write();