diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -206,7 +206,8 @@ enum class SecFuncMetadataFlags : uint32_t { SecFlagInvalid = 0, SecFlagIsProbeBased = (1 << 0), - SecFlagHasAttribute = (1 << 1) + SecFlagHasAttribute = (1 << 1), + SecFlagIsPreInlined = (1 << 2), }; enum class SecFuncOffsetFlags : uint32_t { @@ -591,11 +592,11 @@ : hash_value(getName()); } - /// Set the name of the function. + /// Set the name of the function and clear the current context. void setName(StringRef FunctionName) { - assert(FullContext.empty() && - "setName should only be called for non-CS profile"); Name = FunctionName; + FullContext = SampleContextFrames(); + State = UnknownContext; } void setContext(SampleContextFrames Context, @@ -745,6 +746,16 @@ } } + // Set current context and all callee contexts to be synthetic. + void SetContextSynthetic() { + Context.setState(SyntheticContext); + for (auto &I : CallsiteSamples) { + for (auto &CS : I.second) { + CS.second.SetContextSynthetic(); + } + } + } + /// Return the number of samples collected at the given location. /// Each location is specified by \p LineOffset and \p Discriminator. /// If the location is not found in profile, return error. @@ -1008,7 +1019,13 @@ /// instruction. This is wrapper of two scenarios, the probe-based profile and /// regular profile, to hide implementation details from the sample loader and /// the context tracker. - static LineLocation getCallSiteIdentifier(const DILocation *DIL); + static LineLocation getCallSiteIdentifier(const DILocation *DIL, + bool ProfileIsFS = false); + + /// Returns a unique hash code for a combination of a callsite location and + /// the callee function name. + static uint64_t getCallSiteHash(StringRef CalleeName, + const LineLocation &Callsite); /// Get the FunctionSamples of the inline instance where DIL originates /// from. @@ -1029,6 +1046,8 @@ static bool ProfileIsCS; + static bool ProfileIsPreinlined; + SampleContext &getContext() const { return Context; } void setContext(const SampleContext &FContext) { Context = FContext; } @@ -1161,6 +1180,38 @@ SampleProfileMap &ProfileMap; }; +class CSProfileConverter { +public: + CSProfileConverter(SampleProfileMap &Profiles); + void convertProfiles(); + struct FrameNode { + FrameNode(StringRef FName = StringRef(), + FunctionSamples *FSamples = nullptr, + LineLocation CallLoc = {0, 0}) + : FuncName(FName), FuncSamples(FSamples), CallSiteLoc(CallLoc){}; + + // Map line+discriminator location to child frame + std::map AllChildFrames; + // Function name for current frame + StringRef FuncName; + // Function Samples for current frame + FunctionSamples *FuncSamples; + // Callsite location in parent context + LineLocation CallSiteLoc; + + FrameNode *getOrCreateChildFrame(const LineLocation &CallSite, + StringRef CalleeName); + }; + +private: + // Nest all children profiles into the profile of Node. + void convertProfiles(FrameNode &Node); + FrameNode *getOrCreateContextPath(const SampleContext &Context); + + SampleProfileMap &ProfileMap; + FrameNode RootFrame; +}; + /// ProfileSymbolList records the list of function symbols shown up /// in the binary used to generate the profile. It is useful to /// to discriminate a function being so cold as not to shown up diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h --- a/llvm/include/llvm/ProfileData/SampleProfReader.h +++ b/llvm/include/llvm/ProfileData/SampleProfReader.h @@ -476,6 +476,8 @@ /// Whether input profile is fully context-sensitive bool profileIsCS() const { return ProfileIsCS; } + bool profileIsPreinlined() const { return ProfileIsPreinlined; } + virtual std::unique_ptr getProfileSymbolList() { return nullptr; }; @@ -536,6 +538,10 @@ /// Whether function profiles are context-sensitive. bool ProfileIsCS = false; + /// Whether function profiles are generated by the CS preinliner with full + /// inline decisions made. + bool ProfileIsPreinlined = false; + /// Number of context-sensitive profiles. uint32_t CSProfileCount = 0; @@ -698,6 +704,8 @@ std::error_code readSecHdrTable(); std::error_code readFuncMetadata(bool ProfileHasAttribute); + std::error_code readFuncMetadata(bool ProfileHasAttribute, + FunctionSamples *FProfile, bool Inlined); std::error_code readFuncOffsetTable(); std::error_code readFuncProfiles(); std::error_code readMD5NameTable(); diff --git a/llvm/include/llvm/ProfileData/SampleProfWriter.h b/llvm/include/llvm/ProfileData/SampleProfWriter.h --- a/llvm/include/llvm/ProfileData/SampleProfWriter.h +++ b/llvm/include/llvm/ProfileData/SampleProfWriter.h @@ -269,6 +269,7 @@ std::error_code writeCSNameTableSection(); std::error_code writeFuncMetadata(const SampleProfileMap &Profiles); + std::error_code writeFuncMetadata(const FunctionSamples &Profile); // Functions to write various kinds of sections. std::error_code writeNameTableSection(const SampleProfileMap &ProfileMap); diff --git a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h --- a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h +++ b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h @@ -66,8 +66,6 @@ void dumpTree(); private: - static uint64_t nodeHash(StringRef ChildName, const LineLocation &Callsite); - // Map line+discriminator location to child context std::map AllChildContext; diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp --- a/llvm/lib/ProfileData/SampleProf.cpp +++ b/llvm/lib/ProfileData/SampleProf.cpp @@ -35,11 +35,18 @@ cl::desc("Cutoff value about how many symbols in profile symbol list " "will be used. This is very useful for performance debugging")); +cl::opt DuplicateContextProfilesIntoBaseProfile( + "duplicate-contexts-into-base", cl::init(true), cl::ZeroOrMore, + cl::desc("When converting a CS flat profile into a nested profile, " + "duplicating all context profiles of a function into its base " + "profile")); + namespace llvm { namespace sampleprof { SampleProfileFormat FunctionSamples::Format; bool FunctionSamples::ProfileIsProbeBased = false; bool FunctionSamples::ProfileIsCS = false; +bool FunctionSamples::ProfileIsPreinlined = false; bool FunctionSamples::UseMD5 = false; bool FunctionSamples::HasUniqSuffix = true; bool FunctionSamples::ProfileIsFS = false; @@ -218,8 +225,9 @@ 0xffff; } -LineLocation FunctionSamples::getCallSiteIdentifier(const DILocation *DIL) { - if (FunctionSamples::ProfileIsProbeBased) +LineLocation FunctionSamples::getCallSiteIdentifier(const DILocation *DIL, + bool ProfileIsFS) { + if (FunctionSamples::ProfileIsProbeBased) { // In a pseudo-probe based profile, a callsite is simply represented by the // ID of the probe associated with the call instruction. The probe ID is // encoded in the Discriminator field of the call instruction's debug @@ -227,9 +235,19 @@ return LineLocation(PseudoProbeDwarfDiscriminator::extractProbeIndex( DIL->getDiscriminator()), 0); - else - return LineLocation(FunctionSamples::getOffset(DIL), - DIL->getBaseDiscriminator()); + } else { + unsigned Discriminator = + ProfileIsFS ? DIL->getDiscriminator() : DIL->getBaseDiscriminator(); + return LineLocation(FunctionSamples::getOffset(DIL), Discriminator); + } +} + +uint64_t FunctionSamples::getCallSiteHash(StringRef CalleeName, + const LineLocation &Callsite) { + uint64_t NameHash = std::hash{}(CalleeName.str()); + uint64_t LocId = + (((uint64_t)Callsite.LineOffset) << 32) | Callsite.Discriminator; + return NameHash + (LocId << 5) + LocId; } const FunctionSamples *FunctionSamples::findFunctionSamples( @@ -239,21 +257,16 @@ const DILocation *PrevDIL = DIL; for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) { - unsigned Discriminator; - if (ProfileIsFS) - Discriminator = DIL->getDiscriminator(); - else - Discriminator = DIL->getBaseDiscriminator(); - // Use C++ linkage name if possible. StringRef Name = PrevDIL->getScope()->getSubprogram()->getLinkageName(); if (Name.empty()) Name = PrevDIL->getScope()->getSubprogram()->getName(); - - S.push_back( - std::make_pair(LineLocation(getOffset(DIL), Discriminator), Name)); + S.emplace_back(FunctionSamples::getCallSiteIdentifier( + DIL, FunctionSamples::ProfileIsFS), + Name); PrevDIL = DIL; } + if (S.size() == 0) return this; const FunctionSamples *FS = this; @@ -454,3 +467,81 @@ for (auto &Sym : SortedList) OS << Sym << "\n"; } + +CSProfileConverter::FrameNode * +CSProfileConverter::FrameNode::getOrCreateChildFrame( + const LineLocation &CallSite, StringRef CalleeName) { + uint64_t Hash = FunctionSamples::getCallSiteHash(CalleeName, CallSite); + auto It = AllChildFrames.find(Hash); + if (It != AllChildFrames.end()) { + assert(It->second.FuncName == CalleeName && + "Hash collision for child context node"); + return &It->second; + } + + AllChildFrames[Hash] = FrameNode(CalleeName, nullptr, CallSite); + return &AllChildFrames[Hash]; +} + +CSProfileConverter::CSProfileConverter(SampleProfileMap &Profiles) + : ProfileMap(Profiles) { + for (auto &FuncSample : Profiles) { + FunctionSamples *FSamples = &FuncSample.second; + auto *NewNode = getOrCreateContextPath(FSamples->getContext()); + assert(!NewNode->FuncSamples && "New node cannot have sample profile"); + NewNode->FuncSamples = FSamples; + } +} + +CSProfileConverter::FrameNode * +CSProfileConverter::getOrCreateContextPath(const SampleContext &Context) { + auto Node = &RootFrame; + LineLocation CallSiteLoc(0, 0); + for (auto &Callsite : Context.getContextFrames()) { + Node = Node->getOrCreateChildFrame(CallSiteLoc, Callsite.FuncName); + CallSiteLoc = Callsite.Location; + } + return Node; +} + +void CSProfileConverter::convertProfiles(CSProfileConverter::FrameNode &Node) { + // Process each child profile. Add each child profile to callsite profile map + // of the current node `Node` if `Node` comes with a profile. Otherwise + // promote the child profile to a standalone profile. + auto *NodeProfile = Node.FuncSamples; + for (auto &It : Node.AllChildFrames) { + auto &ChildNode = It.second; + convertProfiles(ChildNode); + auto *ChildProfile = ChildNode.FuncSamples; + if (!ChildProfile) + continue; + SampleContext OrigChildContext = ChildProfile->getContext(); + // Reset the child context to be contextless. + ChildProfile->getContext().setName(OrigChildContext.getName()); + if (NodeProfile) { + // Add child profile to the callsite profile map. + auto &SamplesMap = NodeProfile->functionSamplesAt(ChildNode.CallSiteLoc); + SamplesMap.emplace(OrigChildContext.getName(), *ChildProfile); + NodeProfile->addTotalSamples(ChildProfile->getTotalSamples()); + } + + // Separate child profile to be a standalone profile, if the current parent + // profile doesn't exist. This is a duplicating operation when the child + // profile is already incorporated into the parent which is still useful and + // thus done optionally. It is seen that duplicating context profiles into + // base profiles improves the code quality for thinlto build by allowing a + // profile in the prelink phase for to-be-fully-inlined functions. + if (!NodeProfile || DuplicateContextProfilesIntoBaseProfile) + ProfileMap[ChildProfile->getContext()].merge(*ChildProfile); + + // Contexts coming with a `ContextShouldBeInlined` attribute indicate this + // is a preinliner-computed profile. + if (OrigChildContext.hasAttribute(ContextShouldBeInlined)) + FunctionSamples::ProfileIsPreinlined = true; + + // Remove the original child profile. + ProfileMap.erase(OrigChildContext); + } +} + +void CSProfileConverter::convertProfiles() { convertProfiles(RootFrame); } diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -146,7 +146,7 @@ if (Depth == 0) return false; - if (Depth == 1 && Input[Depth] == '!') { + if (Input[Depth] == '!') { LineTy = LineType::Metadata; return parseMetadata(Input.substr(Depth), FunctionHash, Attributes); } @@ -248,7 +248,7 @@ // SeenMetadata tracks whether we have processed metadata for the current // top-level function profile. - bool SeenMetadata = false; + uint32_t DepthMetadata = 0; ProfileIsFS = ProfileIsFSDisciminator; FunctionSamples::ProfileIsFS = ProfileIsFS; @@ -275,7 +275,7 @@ "Expected 'mangled_name:NUM:NUM', found " + *LineIt); return sampleprof_error::malformed; } - SeenMetadata = false; + DepthMetadata = 0; SampleContext FContext(FName, CSNameTable); if (FContext.hasContext()) ++CSProfileCount; @@ -302,7 +302,7 @@ *LineIt); return sampleprof_error::malformed; } - if (SeenMetadata && LineTy != LineType::Metadata) { + if (LineTy != LineType::Metadata && Depth == DepthMetadata) { // Metadata must be put at the end of a function profile. reportError(LineIt.line_number(), "Found non-metadata after metadata: " + *LineIt); @@ -322,6 +322,7 @@ FSamples.setName(FName); MergeResult(Result, FSamples.addTotalSamples(NumSamples)); InlineStack.push_back(&FSamples); + DepthMetadata = 0; break; } case LineType::BodyProfile: { @@ -342,11 +343,13 @@ FunctionSamples &FProfile = *InlineStack.back(); if (FunctionHash) { FProfile.setFunctionHash(FunctionHash); - ++ProbeProfileCount; + if (Depth == 1) + ++ProbeProfileCount; } - if (Attributes) - FProfile.getContext().setAllAttributes(Attributes); - SeenMetadata = true; + FProfile.getContext().setAllAttributes(Attributes); + if (Attributes & (uint32_t)ContextShouldBeInlined) + ProfileIsPreinlined = true; + DepthMetadata = Depth; break; } } @@ -361,6 +364,7 @@ ProfileIsProbeBased = (ProbeProfileCount > 0); FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased; FunctionSamples::ProfileIsCS = ProfileIsCS; + FunctionSamples::ProfileIsPreinlined = ProfileIsPreinlined; if (Result == sampleprof_error::success) computeSummary(); @@ -683,6 +687,9 @@ ProfileIsProbeBased = hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased); FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased; + ProfileIsPreinlined = + hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsPreInlined); + FunctionSamples::ProfileIsPreinlined = ProfileIsPreinlined; bool HasAttribute = hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute); if (std::error_code EC = readFuncMetadata(HasAttribute)) @@ -1078,30 +1085,79 @@ } std::error_code -SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) { - while (Data < End) { - auto FContext(readSampleContextFromTable()); - if (std::error_code EC = FContext.getError()) - return EC; - bool ProfileInMap = Profiles.count(*FContext); +SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute, + FunctionSamples *FProfile, + bool Inlined) { + if (Data < End) { if (ProfileIsProbeBased) { auto Checksum = readNumber(); if (std::error_code EC = Checksum.getError()) return EC; - if (ProfileInMap) - Profiles[*FContext].setFunctionHash(*Checksum); + if (FProfile) + FProfile->setFunctionHash(*Checksum); } if (ProfileHasAttribute) { auto Attributes = readNumber(); if (std::error_code EC = Attributes.getError()) return EC; - if (ProfileInMap) - Profiles[*FContext].getContext().setAllAttributes(*Attributes); + if (FProfile) + FProfile->getContext().setAllAttributes(*Attributes); + } + + if (!ProfileIsCS) { + // Read all the attributes for inlined function calls. + auto NumCallsites = readNumber(); + if (std::error_code EC = NumCallsites.getError()) + return EC; + + for (uint32_t J = 0; J < *NumCallsites; ++J) { + auto LineOffset = readNumber(); + if (std::error_code EC = LineOffset.getError()) + return EC; + + auto Discriminator = readNumber(); + if (std::error_code EC = Discriminator.getError()) + return EC; + + auto FContext(readSampleContextFromTable()); + if (std::error_code EC = FContext.getError()) + return EC; + + FunctionSamples *CalleeProfile = nullptr; + if (FProfile) { + CalleeProfile = const_cast( + &FProfile->functionSamplesAt(LineLocation( + *LineOffset, + *Discriminator))[std::string(FContext.get().getName())]); + } + if (std::error_code EC = + readFuncMetadata(ProfileHasAttribute, CalleeProfile, true)) + return EC; + } } } + return sampleprof_error::success; +} + +std::error_code +SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) { + while (Data < End) { + auto FContext(readSampleContextFromTable()); + if (std::error_code EC = FContext.getError()) + return EC; + FunctionSamples *FProfile = nullptr; + auto It = Profiles.find(*FContext); + if (It != Profiles.end()) + FProfile = &It->second; + + if (std::error_code EC = + readFuncMetadata(ProfileHasAttribute, FProfile, false)) + return EC; + } + assert(Data == End && "More data is read than expected"); return sampleprof_error::success; } @@ -1233,6 +1289,8 @@ Flags.append("probe,"); if (hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute)) Flags.append("attr,"); + if (hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsPreInlined)) + Flags.append("preinlined,"); break; default: break; diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp --- a/llvm/lib/ProfileData/SampleProfWriter.cpp +++ b/llvm/lib/ProfileData/SampleProfWriter.cpp @@ -194,18 +194,46 @@ return sampleprof_error::success; } +std::error_code SampleProfileWriterExtBinaryBase::writeFuncMetadata( + const FunctionSamples &FunctionProfile) { + auto &OS = *OutputStream; + if (std::error_code EC = writeContextIdx(FunctionProfile.getContext())) + return EC; + + if (FunctionSamples::ProfileIsProbeBased) + encodeULEB128(FunctionProfile.getFunctionHash(), OS); + if (FunctionSamples::ProfileIsCS || FunctionSamples::ProfileIsPreinlined) { + encodeULEB128(FunctionProfile.getContext().getAllAttributes(), OS); + } + + if (!FunctionSamples::ProfileIsCS) { + // Recursively emit attributes for all callee samples. + uint64_t NumCallsites = 0; + for (const auto &J : FunctionProfile.getCallsiteSamples()) + NumCallsites += J.second.size(); + encodeULEB128(NumCallsites, OS); + for (const auto &J : FunctionProfile.getCallsiteSamples()) { + for (const auto &FS : J.second) { + LineLocation Loc = J.first; + encodeULEB128(Loc.LineOffset, OS); + encodeULEB128(Loc.Discriminator, OS); + if (std::error_code EC = writeFuncMetadata(FS.second)) + return EC; + } + } + } + + return sampleprof_error::success; +} + std::error_code SampleProfileWriterExtBinaryBase::writeFuncMetadata( const SampleProfileMap &Profiles) { - if (!FunctionSamples::ProfileIsProbeBased && !FunctionSamples::ProfileIsCS) + if (!FunctionSamples::ProfileIsProbeBased && !FunctionSamples::ProfileIsCS && + !FunctionSamples::ProfileIsPreinlined) return sampleprof_error::success; - auto &OS = *OutputStream; for (const auto &Entry : Profiles) { - if (std::error_code EC = writeContextIdx(Entry.second.getContext())) + if (std::error_code EC = writeFuncMetadata(Entry.second)) return EC; - if (FunctionSamples::ProfileIsProbeBased) - encodeULEB128(Entry.second.getFunctionHash(), OS); - if (FunctionSamples::ProfileIsCS) - encodeULEB128(Entry.second.getContext().getAllAttributes(), OS); } return sampleprof_error::success; } @@ -295,10 +323,13 @@ setToCompressSection(SecProfileSymbolList); if (Type == SecFuncMetadata && FunctionSamples::ProfileIsProbeBased) addSectionFlag(SecFuncMetadata, SecFuncMetadataFlags::SecFlagIsProbeBased); + if (Type == SecFuncMetadata && FunctionSamples::ProfileIsPreinlined) + addSectionFlag(SecFuncMetadata, SecFuncMetadataFlags::SecFlagIsPreInlined); + if (Type == SecFuncMetadata && + (FunctionSamples::ProfileIsCS || FunctionSamples::ProfileIsPreinlined)) + addSectionFlag(SecFuncMetadata, SecFuncMetadataFlags::SecFlagHasAttribute); if (Type == SecProfSummary && FunctionSamples::ProfileIsCS) addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagFullContext); - if (Type == SecFuncMetadata && FunctionSamples::ProfileIsCS) - addSectionFlag(SecFuncMetadata, SecFuncMetadataFlags::SecFlagHasAttribute); if (Type == SecProfSummary && FunctionSamples::ProfileIsFS) addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagFSDiscriminator); @@ -483,15 +514,14 @@ } Indent -= 1; - if (Indent == 0) { - if (FunctionSamples::ProfileIsProbeBased) { - OS.indent(Indent + 1); - OS << "!CFGChecksum: " << S.getFunctionHash() << "\n"; - } - if (FunctionSamples::ProfileIsCS) { - OS.indent(Indent + 1); - OS << "!Attributes: " << S.getContext().getAllAttributes() << "\n"; - } + if (FunctionSamples::ProfileIsProbeBased) { + OS.indent(Indent + 1); + OS << "!CFGChecksum: " << S.getFunctionHash() << "\n"; + } + + if (S.getContext().getAllAttributes()) { + OS.indent(Indent + 1); + OS << "!Attributes: " << S.getContext().getAllAttributes() << "\n"; } return sampleprof_error::success; diff --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp --- a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp +++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp @@ -32,7 +32,7 @@ if (CalleeName.empty()) return getHottestChildContext(CallSite); - uint64_t Hash = nodeHash(CalleeName, CallSite); + uint64_t Hash = FunctionSamples::getCallSiteHash(CalleeName, CallSite); auto It = AllChildContext.find(Hash); if (It != AllChildContext.end()) return &It->second; @@ -65,7 +65,8 @@ ContextTrieNode &ContextTrieNode::moveToChildContext( const LineLocation &CallSite, ContextTrieNode &&NodeToMove, uint32_t ContextFramesToRemove, bool DeleteNode) { - uint64_t Hash = nodeHash(NodeToMove.getFuncName(), CallSite); + uint64_t Hash = + FunctionSamples::getCallSiteHash(NodeToMove.getFuncName(), CallSite); assert(!AllChildContext.count(Hash) && "Node to remove must exist"); LineLocation OldCallSite = NodeToMove.CallSiteLoc; ContextTrieNode &OldParentContext = *NodeToMove.getParentContext(); @@ -108,7 +109,7 @@ void ContextTrieNode::removeChildContext(const LineLocation &CallSite, StringRef CalleeName) { - uint64_t Hash = nodeHash(CalleeName, CallSite); + uint64_t Hash = FunctionSamples::getCallSiteHash(CalleeName, CallSite); // Note this essentially calls dtor and destroys that child context AllChildContext.erase(Hash); } @@ -174,21 +175,9 @@ } } -uint64_t ContextTrieNode::nodeHash(StringRef ChildName, - const LineLocation &Callsite) { - // We still use child's name for child hash, this is - // because for children of root node, we don't have - // different line/discriminator, and we'll rely on name - // to differentiate children. - uint64_t NameHash = std::hash{}(ChildName.str()); - uint64_t LocId = - (((uint64_t)Callsite.LineOffset) << 32) | Callsite.Discriminator; - return NameHash + (LocId << 5) + LocId; -} - ContextTrieNode *ContextTrieNode::getOrCreateChildContext( const LineLocation &CallSite, StringRef CalleeName, bool AllowCreate) { - uint64_t Hash = nodeHash(CalleeName, CallSite); + uint64_t Hash = FunctionSamples::getCallSiteHash(CalleeName, CallSite); auto It = AllChildContext.find(Hash); if (It != AllChildContext.end()) { assert(It->second.getFuncName() == CalleeName && diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -467,6 +467,9 @@ void emitOptimizationRemarksForInlineCandidates( const SmallVectorImpl &Candidates, const Function &F, bool Hot); + void accumulateNonInlinedSamples( + DenseMap NonInlinedCallSites, + const Function &F); std::vector buildFunctionOrder(Module &M, CallGraph *CG); std::unique_ptr buildProfiledCallGraph(CallGraph &CG); void generateMDProfMetadata(Function &F); @@ -1198,53 +1201,9 @@ } // For CS profile, profile for not inlined context will be merged when - // base profile is being trieved - if (ProfileIsCS) - return Changed; - - // Accumulate not inlined callsite information into notInlinedSamples - for (const auto &Pair : LocalNotInlinedCallSites) { - CallBase *I = Pair.getFirst(); - Function *Callee = I->getCalledFunction(); - if (!Callee || Callee->isDeclaration()) - continue; - - ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "NotInline", - I->getDebugLoc(), I->getParent()) - << "previous inlining not repeated: '" - << ore::NV("Callee", Callee) << "' into '" - << ore::NV("Caller", &F) << "'"); - - ++NumCSNotInlined; - const FunctionSamples *FS = Pair.getSecond(); - if (FS->getTotalSamples() == 0 && FS->getEntrySamples() == 0) { - continue; - } - - if (ProfileMergeInlinee) { - // A function call can be replicated by optimizations like callsite - // splitting or jump threading and the replicates end up sharing the - // sample nested callee profile instead of slicing the original inlinee's - // profile. We want to do merge exactly once by filtering out callee - // profiles with a non-zero head sample count. - if (FS->getHeadSamples() == 0) { - // Use entry samples as head samples during the merge, as inlinees - // don't have head samples. - const_cast(FS)->addHeadSamples( - FS->getEntrySamples()); - - // Note that we have to do the merge right after processing function. - // This allows OutlineFS's profile to be used for annotation during - // top-down processing of functions' annotation. - FunctionSamples *OutlineFS = Reader->getOrCreateSamplesFor(*Callee); - OutlineFS->merge(*FS); - } - } else { - auto pair = - notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0}); - pair.first->second.entryCount += FS->getEntrySamples(); - } - } + // base profile is being retrieved. + if (!FunctionSamples::ProfileIsCS) + accumulateNonInlinedSamples(LocalNotInlinedCallSites, F); return Changed; } @@ -1430,7 +1389,6 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority( Function &F, DenseSet &InlinedGUIDs) { - assert(ProfileIsCS && "Prioritiy based inliner only works with CSSPGO now"); // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure // Profile symbol list is ignored when profile-sample-accurate is on. @@ -1467,6 +1425,8 @@ if (ExternalInlineAdvisor) SizeLimit = std::numeric_limits::max(); + DenseMap LocalNotInlinedCallSites; + // Perform iterative BFS call site prioritized inlining bool Changed = false; while (!CQueue.empty() && F.getInstructionCount() < SizeLimit) { @@ -1521,6 +1481,8 @@ } ICPCount++; Changed = true; + } else if (FunctionSamples::ProfileIsPreinlined) { + LocalNotInlinedCallSites.try_emplace(I, FS); } } } else if (CalledFunction && CalledFunction->getSubprogram() && @@ -1532,6 +1494,8 @@ CQueue.emplace(NewCandidate); } Changed = true; + } else if (FunctionSamples::ProfileIsPreinlined) { + LocalNotInlinedCallSites.try_emplace(I, Candidate.CalleeSamples); } } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) { findExternalInlineCandidate(I, findCalleeFunctionSamples(*I), @@ -1549,9 +1513,63 @@ ++NumCSInlinedHitGrowthLimit; } + // For CS profile, profile for not inlined context will be merged when + // base profile is being retrieved. + if (!FunctionSamples::ProfileIsCS) + accumulateNonInlinedSamples(LocalNotInlinedCallSites, F); return Changed; } +void SampleProfileLoader::accumulateNonInlinedSamples( + DenseMap NonInlinedCallSites, + const Function &F) { + // Accumulate not inlined callsite information into notInlinedSamples + for (const auto &Pair : NonInlinedCallSites) { + CallBase *I = Pair.getFirst(); + Function *Callee = I->getCalledFunction(); + if (!Callee || Callee->isDeclaration()) + continue; + + ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "NotInline", + I->getDebugLoc(), I->getParent()) + << "previous inlining not repeated: '" + << ore::NV("Callee", Callee) << "' into '" + << ore::NV("Caller", &F) << "'"); + + ++NumCSNotInlined; + const FunctionSamples *FS = Pair.getSecond(); + if (FS->getTotalSamples() == 0 && FS->getEntrySamples() == 0) { + continue; + } + + if (ProfileMergeInlinee) { + // A function call can be replicated by optimizations like callsite + // splitting or jump threading and the replicates end up sharing the + // sample nested callee profile instead of slicing the original + // inlinee's profile. We want to do merge exactly once by filtering out + // callee profiles with a non-zero head sample count. + if (FS->getHeadSamples() == 0) { + // Use entry samples as head samples during the merge, as inlinees + // don't have head samples. + const_cast(FS)->addHeadSamples( + FS->getEntrySamples()); + + // Note that we have to do the merge right after processing function. + // This allows OutlineFS's profile to be used for annotation during + // top-down processing of functions' annotation. + FunctionSamples *OutlineFS = Reader->getOrCreateSamplesFor(*Callee); + OutlineFS->merge(*FS, 1); + // Set outlined profile to be synthetic to not bias the inliner. + OutlineFS->SetContextSynthetic(); + } + } else { + auto pair = + notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0}); + pair.first->second.entryCount += FS->getEntrySamples(); + } + } +} + /// Returns the sorted CallTargetMap \p M by count in descending order. static SmallVector GetSortedValueDataFromCallTargets(const SampleRecord::CallTargetMap &M) { @@ -1754,7 +1772,7 @@ } DenseSet InlinedGUIDs; - if (ProfileIsCS && CallsitePrioritizedInline) + if (CallsitePrioritizedInline) Changed |= inlineHotFunctionsWithPriority(F, InlinedGUIDs); else Changed |= inlineHotFunctions(F, InlinedGUIDs); @@ -1961,10 +1979,8 @@ } // Apply tweaks if context-sensitive profile is available. - if (Reader->profileIsCS()) { - ProfileIsCS = true; - FunctionSamples::ProfileIsCS = true; - + if (Reader->profileIsCS() || Reader->profileIsPreinlined()) { + ProfileIsCS = Reader->profileIsCS(); // Enable priority-base inliner and size inline by default for CSSPGO. if (!ProfileSizeInline.getNumOccurrences()) ProfileSizeInline = true; @@ -1986,9 +2002,11 @@ if (!SampleProfileUseProfi.getNumOccurrences()) SampleProfileUseProfi = true; - // Tracker for profiles under different context - ContextTracker = std::make_unique( - Reader->getProfiles(), &GUIDToFuncNameMap); + if (FunctionSamples::ProfileIsCS) { + // Tracker for profiles under different context + ContextTracker = std::make_unique( + Reader->getProfiles(), &GUIDToFuncNameMap); + } } // Load pseudo probe descriptors for probe-based function samples. diff --git a/llvm/test/Transforms/SampleProfile/csspgo-inline.ll b/llvm/test/Transforms/SampleProfile/csspgo-inline.ll --- a/llvm/test/Transforms/SampleProfile/csspgo-inline.ll +++ b/llvm/test/Transforms/SampleProfile/csspgo-inline.ll @@ -10,8 +10,12 @@ ; RUN: llvm-profdata merge --sample --extbinary --use-md5 %S/Inputs/profile-context-tracker.prof -o %t.md5 ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.md5 -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE +; RUN: llvm-profdata merge --sample --text --gen-nested-profile %S/Inputs/profile-context-tracker.prof -o %t.prof +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.prof -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE + ; With new FDO early inliner, callee entry count is used to drive inlining instead of callee total samples, so we get less inlining for given profile ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-NEW +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.prof -sample-profile-prioritized-inline -sample-profile-inline-size -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-NEW ; ; With new FDO early inliner, callee entry count is used to drive inlining instead of callee total samples, tuning hot cutoff can get us the same inlining ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-summary-cutoff-hot=999900 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE diff --git a/llvm/test/Transforms/SampleProfile/csspgo-use-preinliner.ll b/llvm/test/Transforms/SampleProfile/csspgo-use-preinliner.ll --- a/llvm/test/Transforms/SampleProfile/csspgo-use-preinliner.ll +++ b/llvm/test/Transforms/SampleProfile/csspgo-use-preinliner.ll @@ -3,6 +3,9 @@ ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/csspgo-use-preinliner.prof -pass-remarks=inline -sample-profile-prioritized-inline -profile-sample-accurate -sample-profile-use-preinliner=0 -S 2>&1 | FileCheck %s --check-prefix=DEFAULT ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/csspgo-use-preinliner.prof -pass-remarks=inline -sample-profile-prioritized-inline -profile-sample-accurate -sample-profile-use-preinliner=1 -S 2>&1 | FileCheck %s --check-prefix=PREINLINE +; RUN: llvm-profdata merge --sample --text --gen-nested-profile -duplicate-contexts-into-base=0 %S/Inputs/csspgo-use-preinliner.prof -o %t.prof +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.prof -pass-remarks=inline -sample-profile-prioritized-inline -profile-sample-accurate -sample-profile-use-preinliner=0 -S 2>&1 | FileCheck %s --check-prefix=DEFAULT +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.prof -pass-remarks=inline -sample-profile-prioritized-inline -profile-sample-accurate -sample-profile-use-preinliner=1 -S 2>&1 | FileCheck %s --check-prefix=PREINLINE ; DEFAULT: '_Z5funcAi' inlined into 'main' ; DEFAULT-NOT: inlined into diff --git a/llvm/test/tools/llvm-profdata/Inputs/cs-sample.proftext b/llvm/test/tools/llvm-profdata/Inputs/cs-sample-preinline.proftext copy from llvm/test/tools/llvm-profdata/Inputs/cs-sample.proftext copy to llvm/test/tools/llvm-profdata/Inputs/cs-sample-preinline.proftext --- a/llvm/test/tools/llvm-profdata/Inputs/cs-sample.proftext +++ b/llvm/test/tools/llvm-profdata/Inputs/cs-sample-preinline.proftext @@ -4,7 +4,7 @@ 3: 287884 4: 287864 _Z3fibi:315608 15: 23 - !Attributes: 0 + !Attributes: 2 [main:3.1 @ _Z5funcBi:1 @ _Z8funcLeafi]:500853:20 0: 15 1: 15 @@ -13,32 +13,28 @@ 10: 23324 11: 23327 _Z3fibi:25228 15: 11 - !Attributes: 1 + !Attributes: 2 [external:12 @ main]:154:12 2: 12 3: 10 _Z5funcAi:7 3.1: 10 _Z5funcBi:11 - !Attributes: 0 [main]:154:0 2: 12 3: 18 _Z5funcAi:11 3.1: 18 _Z5funcBi:19 - !Attributes: 0 [external:10 @ _Z5funcBi]:120:10 0: 10 1: 10 - !Attributes: 0 [externalA:17 @ _Z5funcBi]:120:3 0: 3 1: 3 - !Attributes: 0 [main:3.1 @ _Z5funcBi]:120:19 0: 19 1: 19 _Z8funcLeafi:20 3: 12 - !Attributes: 1 + !Attributes: 2 [main:3 @ _Z5funcAi]:99:11 0: 10 1: 10 _Z8funcLeafi:11 3: 24 - !Attributes: 0 + !Attributes: 2 diff --git a/llvm/test/tools/llvm-profdata/Inputs/cs-sample.proftext b/llvm/test/tools/llvm-profdata/Inputs/cs-sample.proftext --- a/llvm/test/tools/llvm-profdata/Inputs/cs-sample.proftext +++ b/llvm/test/tools/llvm-profdata/Inputs/cs-sample.proftext @@ -4,7 +4,6 @@ 3: 287884 4: 287864 _Z3fibi:315608 15: 23 - !Attributes: 0 [main:3.1 @ _Z5funcBi:1 @ _Z8funcLeafi]:500853:20 0: 15 1: 15 @@ -18,20 +17,16 @@ 2: 12 3: 10 _Z5funcAi:7 3.1: 10 _Z5funcBi:11 - !Attributes: 0 [main]:154:0 2: 12 3: 18 _Z5funcAi:11 3.1: 18 _Z5funcBi:19 - !Attributes: 0 [external:10 @ _Z5funcBi]:120:10 0: 10 1: 10 - !Attributes: 0 [externalA:17 @ _Z5funcBi]:120:3 0: 3 1: 3 - !Attributes: 0 [main:3.1 @ _Z5funcBi]:120:19 0: 19 1: 19 _Z8funcLeafi:20 @@ -41,4 +36,3 @@ 0: 10 1: 10 _Z8funcLeafi:11 3: 24 - !Attributes: 0 diff --git a/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test b/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test @@ -0,0 +1,113 @@ +RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/cs-sample-preinline.proftext --gen-nested-profile=1 -duplicate-contexts-into-base=0 +RUN: FileCheck %s < %t.proftext --match-full-lines --strict-whitespace +RUN: llvm-profdata merge --sample --extbinary -output=%t.profbin %S/Inputs/cs-sample-preinline.proftext --gen-nested-profile=1 -duplicate-contexts-into-base=0 +RUN: llvm-profdata merge --sample --text -output=%t2.proftext %t.profbin +RUN: FileCheck %s < %t2.proftext --match-full-lines --strict-whitespace +RUN: llvm-profdata show --sample -show-sec-info-only %t.profbin | FileCheck %s -check-prefix=PREINLINE +RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/cs-sample-preinline.proftext --gen-nested-profile=1 -duplicate-contexts-into-base=1 +RUN: FileCheck %s < %t.proftext --match-full-lines --strict-whitespace -check-prefix=RECOUNT + + +; CHECK:main:1968679:12 +; CHECK-NEXT: 2: 24 +; CHECK-NEXT: 3: 28 _Z5funcAi:18 +; CHECK-NEXT: 3.1: 28 _Z5funcBi:30 +; CHECK-NEXT: 3: _Z5funcAi:1467398 +; CHECK-NEXT: 0: 10 +; CHECK-NEXT: 1: 10 _Z8funcLeafi:11 +; CHECK-NEXT: 3: 24 +; CHECK-NEXT: 1: _Z8funcLeafi:1467299 +; CHECK-NEXT: 0: 6 +; CHECK-NEXT: 1: 6 +; CHECK-NEXT: 3: 287884 +; CHECK-NEXT: 4: 287864 _Z3fibi:315608 +; CHECK-NEXT: 15: 23 +; CHECK-NEXT: !Attributes: 2 +; CHECK-NEXT: !Attributes: 2 +; CHECK-NEXT: 3.1: _Z5funcBi:500973 +; CHECK-NEXT: 0: 19 +; CHECK-NEXT: 1: 19 _Z8funcLeafi:20 +; CHECK-NEXT: 3: 12 +; CHECK-NEXT: 1: _Z8funcLeafi:500853 +; CHECK-NEXT: 0: 15 +; CHECK-NEXT: 1: 15 +; CHECK-NEXT: 3: 74946 +; CHECK-NEXT: 4: 74941 _Z3fibi:82359 +; CHECK-NEXT: 10: 23324 +; CHECK-NEXT: 11: 23327 _Z3fibi:25228 +; CHECK-NEXT: 15: 11 +; CHECK-NEXT: !Attributes: 2 +; CHECK-NEXT: !Attributes: 2 +; CHECK-NEXT:_Z5funcBi:240:13 +; CHECK-NEXT: 0: 13 +; CHECK-NEXT: 1: 13 + + + +; RECOUNT:main:1968679:12 +; RECOUNT-NEXT: 2: 24 +; RECOUNT-NEXT: 3: 28 _Z5funcAi:18 +; RECOUNT-NEXT: 3.1: 28 _Z5funcBi:30 +; RECOUNT-NEXT: 3: _Z5funcAi:1467398 +; RECOUNT-NEXT: 0: 10 +; RECOUNT-NEXT: 1: 10 _Z8funcLeafi:11 +; RECOUNT-NEXT: 3: 24 +; RECOUNT-NEXT: 1: _Z8funcLeafi:1467299 +; RECOUNT-NEXT: 0: 6 +; RECOUNT-NEXT: 1: 6 +; RECOUNT-NEXT: 3: 287884 +; RECOUNT-NEXT: 4: 287864 _Z3fibi:315608 +; RECOUNT-NEXT: 15: 23 +; RECOUNT-NEXT: !Attributes: 2 +; RECOUNT-NEXT: !Attributes: 2 +; RECOUNT-NEXT: 3.1: _Z5funcBi:500973 +; RECOUNT-NEXT: 0: 19 +; RECOUNT-NEXT: 1: 19 _Z8funcLeafi:20 +; RECOUNT-NEXT: 3: 12 +; RECOUNT-NEXT: 1: _Z8funcLeafi:500853 +; RECOUNT-NEXT: 0: 15 +; RECOUNT-NEXT: 1: 15 +; RECOUNT-NEXT: 3: 74946 +; RECOUNT-NEXT: 4: 74941 _Z3fibi:82359 +; RECOUNT-NEXT: 10: 23324 +; RECOUNT-NEXT: 11: 23327 _Z3fibi:25228 +; RECOUNT-NEXT: 15: 11 +; RECOUNT-NEXT: !Attributes: 2 +; RECOUNT-NEXT: !Attributes: 2 +; RECOUNT-NEXT:_Z8funcLeafi:1968152:31 +; RECOUNT-NEXT: 0: 21 +; RECOUNT-NEXT: 1: 21 +; RECOUNT-NEXT: 3: 362830 +; RECOUNT-NEXT: 4: 362805 _Z3fibi:397967 +; RECOUNT-NEXT: 10: 23324 +; RECOUNT-NEXT: 11: 23327 _Z3fibi:25228 +; RECOUNT-NEXT: 15: 34 +; RECOUNT-NEXT: !Attributes: 2 +; RECOUNT-NEXT:_Z5funcAi:1467398:11 +; RECOUNT-NEXT: 0: 10 +; RECOUNT-NEXT: 1: 10 _Z8funcLeafi:11 +; RECOUNT-NEXT: 3: 24 +; RECOUNT-NEXT: 1: _Z8funcLeafi:1467299 +; RECOUNT-NEXT: 0: 6 +; RECOUNT-NEXT: 1: 6 +; RECOUNT-NEXT: 3: 287884 +; RECOUNT-NEXT: 4: 287864 _Z3fibi:315608 +; RECOUNT-NEXT: 15: 23 +; RECOUNT-NEXT: !Attributes: 2 +; RECOUNT-NEXT: !Attributes: 2 +; RECOUNT-NEXT:_Z5funcBi:501213:32 +; RECOUNT-NEXT: 0: 32 +; RECOUNT-NEXT: 1: 32 _Z8funcLeafi:20 +; RECOUNT-NEXT: 3: 12 +; RECOUNT-NEXT: 1: _Z8funcLeafi:500853 +; RECOUNT-NEXT: 0: 15 +; RECOUNT-NEXT: 1: 15 +; RECOUNT-NEXT: 3: 74946 +; RECOUNT-NEXT: 4: 74941 _Z3fibi:82359 +; RECOUNT-NEXT: 10: 23324 +; RECOUNT-NEXT: 11: 23327 _Z3fibi:25228 +; RECOUNT-NEXT: 15: 11 +; RECOUNT-NEXT: !Attributes: 2 + + +; PREINLINE: FunctionMetadata {{.*}} Flags: {attr,preinlined} diff --git a/llvm/test/tools/llvm-profdata/cs-sample-trimmer.test b/llvm/test/tools/llvm-profdata/cs-sample-trimmer.test --- a/llvm/test/tools/llvm-profdata/cs-sample-trimmer.test +++ b/llvm/test/tools/llvm-profdata/cs-sample-trimmer.test @@ -14,7 +14,6 @@ CHECK-TRIM-NEXT: 3: 287884 CHECK-TRIM-NEXT: 4: 287864 _Z3fibi:315608 CHECK-TRIM-NEXT: 15: 23 -CHECK-TRIM-NEXT: !Attributes: 0 CHECK-TRIM-NEXT: [main:3.1 @ _Z5funcBi:1 @ _Z8funcLeafi]:500853:20 CHECK-TRIM-NEXT: 0: 15 CHECK-TRIM-NEXT: 1: 15 @@ -29,14 +28,11 @@ CHECK-MERGE-NEXT: 0: 32 CHECK-MERGE-NEXT: 1: 32 _Z8funcLeafi:20 CHECK-MERGE-NEXT: 3: 12 -CHECK-MERGE-NEXT: !Attributes: 0 CHECK-MERGE-NEXT:[main]:308:12 CHECK-MERGE-NEXT: 2: 24 CHECK-MERGE-NEXT: 3: 28 _Z5funcAi:18 CHECK-MERGE-NEXT: 3.1: 28 _Z5funcBi:30 -CHECK-MERGE-NEXT: !Attributes: 0 CHECK-MERGE-NEXT:[_Z5funcAi]:99:11 CHECK-MERGE-NEXT: 0: 10 CHECK-MERGE-NEXT: 1: 10 _Z8funcLeafi:11 CHECK-MERGE-NEXT: 3: 24 -CHECK-MERGE-NEXT: !Attributes: 0 diff --git a/llvm/test/tools/llvm-profgen/cs-preinline.test b/llvm/test/tools/llvm-profgen/cs-preinline.test --- a/llvm/test/tools/llvm-profgen/cs-preinline.test +++ b/llvm/test/tools/llvm-profgen/cs-preinline.test @@ -15,6 +15,10 @@ ; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-TRIM +; Test llvm-profgen with preinliner on will merge not inlinable profile into base profile. +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --csspgo-preinliner=1 --gen-nested-profile=1 +; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-PREINL-NEST + ; CHECK-DEFAULT: [main:1 @ foo]:44:0 ; CHECK-DEFAULT-NEXT: 2.1: 14 ; CHECK-DEFAULT-NEXT: 3: 15 @@ -49,3 +53,11 @@ ; CHECK-TRIM:[foo:3.1 @ bar]:14:0 ; CHECK-TRIM-NEXT: 1: 14 ; CHECK-TRIM-NEXT: !Attributes: 3 + +; CHECK-PREINL-NEST: foo:58:0 +; CHECK-PREINL-NEST-NEXT: 2.1: 14 +; CHECK-PREINL-NEST-NEXT: 3: 15 +; CHECK-PREINL-NEST-NEXT: 3.1: 14 bar:14 +; CHECK-PREINL-NEST-NEXT: 3.2: 1 +; CHECK-PREINL-NEST-NEXT: 3.1: bar:14 +; CHECK-PREINL-NEST-NEXT: 1: 14 diff --git a/llvm/test/tools/llvm-profgen/merge-cold-profile.test b/llvm/test/tools/llvm-profgen/merge-cold-profile.test --- a/llvm/test/tools/llvm-profgen/merge-cold-profile.test +++ b/llvm/test/tools/llvm-profgen/merge-cold-profile.test @@ -23,7 +23,6 @@ ; CHECK-NEXT: 7: 2 fb:2 ; CHECK-NEXT: 8: 1 fa:1 ; CHECK-NEXT: !CFGChecksum: 563070469352221 -; CHECK-NEXT: !Attributes: 0 ; CHECK-NEXT:[main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb]:13:4 ; CHECK-NEXT: 1: 4 ; CHECK-NEXT: 2: 3 @@ -41,7 +40,6 @@ ; CHECK-KEEP-COLD-NEXT: 5: 4 fb:4 ; CHECK-KEEP-COLD-NEXT: 6: 3 fa:3 ; CHECK-KEEP-COLD-NEXT: !CFGChecksum: 563022570642068 -; CHECK-KEEP-COLD-NEXT: !Attributes: 0 ; CHECK-KEEP-COLD-NEXT:[fa]:14:4 ; CHECK-KEEP-COLD-NEXT: 1: 4 ; CHECK-KEEP-COLD-NEXT: 3: 4 @@ -71,7 +69,6 @@ ; CHECK-COLD-CONTEXT-LENGTH-NEXT: 5: 4 fb:4 ; CHECK-COLD-CONTEXT-LENGTH-NEXT: 6: 1 fa:1 ; CHECK-COLD-CONTEXT-LENGTH-NEXT: !CFGChecksum: 563022570642068 -; CHECK-COLD-CONTEXT-LENGTH-NEXT: !Attributes: 0 ; CHECK-COLD-CONTEXT-LENGTH-NEXT:[fb:6 @ fa]:10:3 ; CHECK-COLD-CONTEXT-LENGTH-NEXT: 1: 3 ; CHECK-COLD-CONTEXT-LENGTH-NEXT: 3: 3 @@ -81,7 +78,6 @@ ; CHECK-COLD-CONTEXT-LENGTH-NEXT: 7: 1 fb:1 ; CHECK-COLD-CONTEXT-LENGTH-NEXT: 8: 1 fa:1 ; CHECK-COLD-CONTEXT-LENGTH-NEXT: !CFGChecksum: 563070469352221 -; CHECK-COLD-CONTEXT-LENGTH-NEXT: !Attributes: 0 ; CHECK-COLD-CONTEXT-LENGTH-NEXT:[fa:7 @ fb]:6:2 ; CHECK-COLD-CONTEXT-LENGTH-NEXT: 1: 2 ; CHECK-COLD-CONTEXT-LENGTH-NEXT: 2: 0 @@ -90,7 +86,6 @@ ; CHECK-COLD-CONTEXT-LENGTH-NEXT: 5: 0 ; CHECK-COLD-CONTEXT-LENGTH-NEXT: 6: 2 fa:2 ; CHECK-COLD-CONTEXT-LENGTH-NEXT: !CFGChecksum: 563022570642068 -; CHECK-COLD-CONTEXT-LENGTH-NEXT: !Attributes: 0 ; CHECK-COLD-CONTEXT-LENGTH-NEXT:[fa:8 @ fa]:4:1 ; CHECK-COLD-CONTEXT-LENGTH-NEXT: 1: 1 ; CHECK-COLD-CONTEXT-LENGTH-NEXT: 3: 1 @@ -100,7 +95,6 @@ ; CHECK-COLD-CONTEXT-LENGTH-NEXT: 7: 1 fb:1 ; CHECK-COLD-CONTEXT-LENGTH-NEXT: 8: 0 ; CHECK-COLD-CONTEXT-LENGTH-NEXT: !CFGChecksum: 563070469352221 -; CHECK-COLD-CONTEXT-LENGTH-NEXT: !Attributes: 0 ; clang -O3 -fexperimental-new-pass-manager -fuse-ld=lld -fpseudo-probe-for-profiling ; -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Xclang -mdisable-tail-calls diff --git a/llvm/test/tools/llvm-profgen/truncated-pseudoprobe.test b/llvm/test/tools/llvm-profgen/truncated-pseudoprobe.test --- a/llvm/test/tools/llvm-profgen/truncated-pseudoprobe.test +++ b/llvm/test/tools/llvm-profgen/truncated-pseudoprobe.test @@ -12,7 +12,6 @@ ; CHECK-NEXT: 8: 15 bar:15 ; CHECK-NEXT: 9: 0 ; CHECK-NEXT: !CFGChecksum: 563088904013236 -; CHECK-NEXT: !Attributes: 0 ; CHECK: [foo:8 @ bar]:30:15 ; CHECK-NEXT: 1: 15 ; CHECK-NEXT: 4: 15 diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -687,7 +687,7 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper, StringRef OutputFilename, ProfileFormat OutputFormat, StringRef ProfileSymbolListFile, bool CompressAllSections, - bool UseMD5, bool GenPartialProfile, + bool UseMD5, bool GenPartialProfile, bool GenNestedProfile, bool SampleMergeColdContext, bool SampleTrimColdContext, bool SampleColdContextFrameDepth, FailureMode FailMode) { using namespace sampleprof; @@ -742,6 +742,12 @@ } } + if (ProfileIsCS && GenNestedProfile) { + CSProfileConverter CSConverter(ProfileMap); + CSConverter.convertProfiles(); + ProfileIsCS = FunctionSamples::ProfileIsCS = false; + } + std::unique_ptr ReaderList = Reader->getProfileSymbolList(); if (ReaderList) @@ -941,7 +947,10 @@ cl::opt InstrProfColdThreshold( "instr-prof-cold-threshold", cl::init(0), cl::Hidden, cl::desc("User specified cold threshold for instr profile which will " - "override the cold threshold got from profile summary.")); + "override the cold threshold got from profile summary. ")); + cl::opt GenNestedProfile( + "gen-nested-profile", cl::Hidden, cl::init(false), + cl::desc("Generate nested function profiles for CSSPGO")); cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n"); @@ -987,10 +996,9 @@ else mergeSampleProfile(WeightedInputs, Remapper.get(), OutputFilename, OutputFormat, ProfileSymbolListFile, CompressAllSections, - UseMD5, GenPartialProfile, SampleMergeColdContext, - SampleTrimColdContext, SampleColdContextFrameDepth, - FailureMode); - + UseMD5, GenPartialProfile, GenNestedProfile, + SampleMergeColdContext, SampleTrimColdContext, + SampleColdContextFrameDepth, FailureMode); return 0; } diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -83,6 +83,10 @@ extern cl::opt ProfileSummaryCutoffHot; +static cl::opt + GenNestedProfile("gen-nested-profile", cl::Hidden, cl::init(false), + cl::desc("Generate nested function profiles for CSSPGO")); + using namespace llvm; using namespace sampleprof; @@ -746,6 +750,12 @@ } calculateAndShowDensity(ContextLessProfiles); + if (GenNestedProfile) { + CSProfileConverter CSConverter(ProfileMap); + CSConverter.convertProfiles(); + FunctionSamples::ProfileIsCS = false; + FunctionSamples::ProfileIsPreinlined = EnableCSPreInliner; + } } void ProfileGeneratorBase::computeSummaryAndThreshold() {