Index: llvm/include/llvm/ProfileData/SampleProf.h =================================================================== --- llvm/include/llvm/ProfileData/SampleProf.h +++ llvm/include/llvm/ProfileData/SampleProf.h @@ -342,6 +342,7 @@ raw_ostream &operator<<(raw_ostream &OS, const SampleRecord &Sample); class FunctionSamples; +class SampleProfileReaderItaniumRemapper; using BodySampleMap = std::map; // NOTE: Using a StringMap here makes parsed profiles consume around 17% more @@ -432,31 +433,9 @@ /// with callee \p CalleeName. If no callsite can be found, relax the /// restriction to return the FunctionSamples at callsite location \p Loc /// with the maximum total sample count. - const FunctionSamples *findFunctionSamplesAt(const LineLocation &Loc, - StringRef CalleeName) const { - std::string CalleeGUID; - CalleeName = getRepInFormat(CalleeName, UseMD5, CalleeGUID); - - auto iter = CallsiteSamples.find(Loc); - if (iter == CallsiteSamples.end()) - return nullptr; - auto FS = iter->second.find(CalleeName); - if (FS != iter->second.end()) - return &FS->second; - // If we cannot find exact match of the callee name, return the FS with - // the max total count. Only do this when CalleeName is not provided, - // i.e., only for indirect calls. - if (!CalleeName.empty()) - return nullptr; - uint64_t MaxTotalSamples = 0; - const FunctionSamples *R = nullptr; - for (const auto &NameFS : iter->second) - if (NameFS.second.getTotalSamples() >= MaxTotalSamples) { - MaxTotalSamples = NameFS.second.getTotalSamples(); - R = &NameFS.second; - } - return R; - } + const FunctionSamples *findFunctionSamplesAt( + const LineLocation &Loc, StringRef CalleeName, + SampleProfileReaderItaniumRemapper *Remapper = nullptr) const; bool empty() const { return TotalSamples == 0; } @@ -630,7 +609,9 @@ /// tree nodes in the profile. /// /// \returns the FunctionSamples pointer to the inlined instance. - const FunctionSamples *findFunctionSamples(const DILocation *DIL) const; + const FunctionSamples *findFunctionSamples( + const DILocation *DIL, + SampleProfileReaderItaniumRemapper *Remapper = nullptr) const; static SampleProfileFormat Format; Index: llvm/include/llvm/ProfileData/SampleProfReader.h =================================================================== --- llvm/include/llvm/ProfileData/SampleProfReader.h +++ llvm/include/llvm/ProfileData/SampleProfReader.h @@ -275,6 +275,9 @@ return Remappings->lookup(FunctionName); } + /// Clear function names inserted into remapper. + void clear() { Remappings->clear(); } + /// Return the samples collected for function \p F if remapper knows /// it is present in SampleMap. FunctionSamples *getSamplesFor(StringRef FunctionName); @@ -423,6 +426,11 @@ /// Return whether names in the profile are all MD5 numbers. virtual bool useMD5() { return false; } + /// Return temporary remapper used for name matching at individual callsite. + SampleProfileReaderItaniumRemapper *getTempRemapper() { + return TempRemapper.get(); + } + protected: /// Map every function to its associated profile. /// @@ -449,7 +457,16 @@ /// Compute summary for this profile. void computeSummary(); + /// Remapper records the names of all the outlined function in the profile. + /// It is used to check whether a function has outlined profile. Remapper + /// is populated after profile reading and is kept unchanged afterwards. std::unique_ptr Remapper; + /// TempRemapper first records the names of inline instances at a location + /// in the profile, and then it will be used to check whether the name of + /// given callsite can be found in the names recorded in TempRemapper, so + /// TempRemapper will be populated and cleared repeatedly for different + /// callsites. + std::unique_ptr TempRemapper; /// \brief The format of sample. SampleProfileFormat Format = SPF_None; Index: llvm/include/llvm/Support/ItaniumManglingCanonicalizer.h =================================================================== --- llvm/include/llvm/Support/ItaniumManglingCanonicalizer.h +++ llvm/include/llvm/Support/ItaniumManglingCanonicalizer.h @@ -85,6 +85,9 @@ /// formed. Otherwise returns Key(). Key lookup(StringRef Mangling); + /// Clear canonical keys. The equivalence remapping is kept. + void clearCanonicalKeys(); + private: struct Impl; Impl *P; Index: llvm/include/llvm/Support/SymbolRemappingReader.h =================================================================== --- llvm/include/llvm/Support/SymbolRemappingReader.h +++ llvm/include/llvm/Support/SymbolRemappingReader.h @@ -123,6 +123,9 @@ return Canonicalizer.lookup(FunctionName); } + /// Clear canonical keys. The equivalence remapping is kept. + void clear() { Canonicalizer.clearCanonicalKeys(); } + private: ItaniumManglingCanonicalizer Canonicalizer; }; Index: llvm/lib/ProfileData/SampleProf.cpp =================================================================== --- llvm/lib/ProfileData/SampleProf.cpp +++ llvm/lib/ProfileData/SampleProf.cpp @@ -14,6 +14,7 @@ #include "llvm/ProfileData/SampleProf.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/ProfileData/SampleProfReader.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Error.h" @@ -174,8 +175,8 @@ 0xffff; } -const FunctionSamples * -FunctionSamples::findFunctionSamples(const DILocation *DIL) const { +const FunctionSamples *FunctionSamples::findFunctionSamples( + const DILocation *DIL, SampleProfileReaderItaniumRemapper *Remapper) const { assert(DIL); SmallVector, 10> S; @@ -190,11 +191,57 @@ return this; const FunctionSamples *FS = this; for (int i = S.size() - 1; i >= 0 && FS != nullptr; i--) { - FS = FS->findFunctionSamplesAt(S[i].first, S[i].second); + FS = FS->findFunctionSamplesAt(S[i].first, S[i].second, Remapper); } return FS; } +/// Returns a pointer to FunctionSamples at the given callsite location \p Loc +/// with callee \p CalleeName. If no callsite can be found, relax the +/// restriction to return the FunctionSamples at callsite location \p Loc +/// with the maximum total sample count. +const FunctionSamples *FunctionSamples::findFunctionSamplesAt( + const LineLocation &Loc, StringRef CalleeName, + SampleProfileReaderItaniumRemapper *Remapper) const { + std::string CalleeGUID; + CalleeName = getRepInFormat(CalleeName, UseMD5, CalleeGUID); + + auto iter = CallsiteSamples.find(Loc); + if (iter == CallsiteSamples.end()) + return nullptr; + auto FS = iter->second.find(CalleeName); + if (FS != iter->second.end()) + return &FS->second; + + // If we cannot find FunctionSamples with exact CalleeName and if Remapper + // is given, try whether we can find FunctionSamples with equivalent + // CalleeName using remapping rules. To avoid mixing profile inline instances + // at one callsite with inline instances at other callsites, the Remapper + // has to be cleared everytime before it is used, however the remapping table + // containing the remapping rules in the Remapper will be kept in clearing. + if (Remapper) { + Remapper->clear(); + Remapper->insert(CalleeName); + for (const auto &NameFS : iter->second) { + if (Remapper->exist(NameFS.first)) + return &NameFS.second; + } + } + // If we cannot find exact match of the callee name, return the FS with + // the max total count. Only do this when CalleeName is not provided, + // i.e., only for indirect calls. + if (!CalleeName.empty()) + return nullptr; + uint64_t MaxTotalSamples = 0; + const FunctionSamples *R = nullptr; + for (const auto &NameFS : iter->second) + if (NameFS.second.getTotalSamples() >= MaxTotalSamples) { + MaxTotalSamples = NameFS.second.getTotalSamples(); + R = &NameFS.second; + } + return R; +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void FunctionSamples::dump() const { print(dbgs(), 0); } #endif Index: llvm/lib/ProfileData/SampleProfReader.cpp =================================================================== --- llvm/lib/ProfileData/SampleProfReader.cpp +++ llvm/lib/ProfileData/SampleProfReader.cpp @@ -1424,6 +1424,15 @@ return EC; } Reader->Remapper = std::move(ReaderOrErr.get()); + + ReaderOrErr = + SampleProfileReaderItaniumRemapper::create(RemapFilename, *Reader, C); + if (std::error_code EC = ReaderOrErr.getError()) { + std::string Msg = "Could not create remapper: " + EC.message(); + C.diagnose(DiagnosticInfoSampleProfile(RemapFilename, Msg)); + return EC; + } + Reader->TempRemapper = std::move(ReaderOrErr.get()); } FunctionSamples::Format = Reader->getFormat(); Index: llvm/lib/Support/ItaniumManglingCanonicalizer.cpp =================================================================== --- llvm/lib/Support/ItaniumManglingCanonicalizer.cpp +++ llvm/lib/Support/ItaniumManglingCanonicalizer.cpp @@ -8,6 +8,7 @@ #include "llvm/Support/ItaniumManglingCanonicalizer.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/Demangle/ItaniumDemangle.h" @@ -73,6 +74,9 @@ } class FoldingNodeAllocator { + BumpPtrAllocator RawAlloc; + +protected: class alignas(alignof(Node *)) NodeHeader : public llvm::FoldingSetNode { public: // 'Node' in this context names the injected-class-name of the base class. @@ -81,8 +85,6 @@ } void Profile(llvm::FoldingSetNodeID &ID) { profileNode(ID, getNode()); } }; - - BumpPtrAllocator RawAlloc; llvm::FoldingSet Nodes; public: @@ -187,8 +189,33 @@ TrackedNodeIsUsed = false; } bool trackedNodeIsUsed() const { return TrackedNodeIsUsed; } + + void clearNodesNotRemapping(); }; +/// Clear all the nodes unrelated with remapping rules. +void CanonicalizerAllocator::clearNodesNotRemapping() { + // Collect Nodes in Remappings. + DenseSet NodesInRemappings; + for (auto &NodePair : Remappings) { + NodesInRemappings.insert(NodePair.first); + NodesInRemappings.insert(NodePair.second); + } + + // Collect all the NodeHeaders in Nodes but not in Remappings + // into NodeHeaderSet. + DenseSet NodeHeaderSet; + for (auto &NH : Nodes) { + if (NodesInRemappings.contains(NH.getNode())) + continue; + NodeHeaderSet.insert(&NH); + } + + // Clear all the NodeHeaders in NodeHeaderSet. + for (auto &NH : NodeHeaderSet) + Nodes.RemoveNode(NH); +} + /// Convert St3foo to NSt3fooE so that equivalences naming one also affect the /// other. template<> @@ -320,3 +347,7 @@ ItaniumManglingCanonicalizer::lookup(StringRef Mangling) { return parseMaybeMangledName(P->Demangler, Mangling, false); } + +void ItaniumManglingCanonicalizer::clearCanonicalKeys() { + P->Demangler.ASTAllocator.clearNodesNotRemapping(); +} Index: llvm/lib/Transforms/IPO/SampleProfile.cpp =================================================================== --- llvm/lib/Transforms/IPO/SampleProfile.cpp +++ llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -840,7 +840,7 @@ return FS->findFunctionSamplesAt(LineLocation(FunctionSamples::getOffset(DIL), DIL->getBaseDiscriminator()), - CalleeName); + CalleeName, Reader->getTempRemapper()); } /// Returns a vector of FunctionSamples that are the indirect call targets @@ -903,7 +903,8 @@ auto it = DILocation2SampleMap.try_emplace(DIL,nullptr); if (it.second) - it.first->second = Samples->findFunctionSamples(DIL); + it.first->second = + Samples->findFunctionSamples(DIL, Reader->getTempRemapper()); return it.first->second; } Index: llvm/unittests/ProfileData/SampleProfTest.cpp =================================================================== --- llvm/unittests/ProfileData/SampleProfTest.cpp +++ llvm/unittests/ProfileData/SampleProfTest.cpp @@ -89,8 +89,8 @@ auto VerifySummary = [IsPartialProfile, PartialProfileRatio]( ProfileSummary &Summary) mutable { ASSERT_EQ(ProfileSummary::PSK_Sample, Summary.getKind()); - ASSERT_EQ(137392u, Summary.getTotalCount()); - ASSERT_EQ(8u, Summary.getNumCounts()); + ASSERT_EQ(138211u, Summary.getTotalCount()); + ASSERT_EQ(10u, Summary.getNumCounts()); ASSERT_EQ(4u, Summary.getNumFunctions()); ASSERT_EQ(1437u, Summary.getMaxFunctionCount()); ASSERT_EQ(60351u, Summary.getMaxCount()); @@ -112,7 +112,7 @@ ASSERT_EQ(60000u, EightyPerc->MinCount); ASSERT_EQ(12557u, NinetyPerc->MinCount); ASSERT_EQ(12557u, NinetyFivePerc->MinCount); - ASSERT_EQ(610u, NinetyNinePerc->MinCount); + ASSERT_EQ(600u, NinetyNinePerc->MinCount); }; VerifySummary(Summary); @@ -155,6 +155,22 @@ FooSamples.addBodySamples(8, 0, 60351); FooSamples.addBodySamples(10, 0, 605); + // Add inline instance with name "_Z3gooi". + StringRef GooName("_Z3gooi"); + auto &GooSamples = + FooSamples.functionSamplesAt(LineLocation(7, 0))[GooName.str()]; + GooSamples.setName(GooName); + GooSamples.addTotalSamples(502); + GooSamples.addBodySamples(3, 0, 502); + + // Add inline instance with name "_Z3hooi". + StringRef HooName("_Z3hooi"); + auto &HooSamples = + GooSamples.functionSamplesAt(LineLocation(9, 0))[HooName.str()]; + HooSamples.setName(HooName); + HooSamples.addTotalSamples(317); + HooSamples.addBodySamples(4, 0, 317); + StringRef BarName("_Z3bari"); FunctionSamples BarSamples; BarSamples.setName(BarName); @@ -197,6 +213,8 @@ createRemapFile(RemapPath, RemapFile); FooName = "_Z4fauxi"; BarName = "_Z3barl"; + GooName = "_Z3gool"; + HooName = "_Z3hool"; } M.getOrInsertFunction(FooName, fn_type); @@ -235,6 +253,33 @@ ASSERT_EQ(7711u, ReadFooSamples->getTotalSamples()); ASSERT_EQ(610u, ReadFooSamples->getHeadSamples()); + // Try to find a FunctionSamples with GooName at given callsites containing + // inline instance for GooName. Test the correct FunctionSamples can be + // found with Remapper support. + const FunctionSamples *ReadGooSamples = + ReadFooSamples->findFunctionSamplesAt(LineLocation(7, 0), GooName, + Reader->getTempRemapper()); + ASSERT_TRUE(ReadGooSamples != nullptr); + ASSERT_EQ(502u, ReadGooSamples->getTotalSamples()); + + // Try to find a FunctionSamples with GooName at given callsites containing + // no inline instance for GooName. Test no FunctionSamples will be + // found with Remapper support. + const FunctionSamples *ReadGooSamplesAgain = + ReadFooSamples->findFunctionSamplesAt(LineLocation(9, 0), GooName, + Reader->getTempRemapper()); + ASSERT_TRUE(ReadGooSamplesAgain == nullptr); + + // The inline instance of Hoo is inside of the inline instance of Goo. + // Try to find a FunctionSamples with HooName at given callsites containing + // inline instance for HooName. Test the correct FunctionSamples can be + // found with Remapper support. + const FunctionSamples *ReadHooSamples = + ReadGooSamples->findFunctionSamplesAt(LineLocation(9, 0), HooName, + Reader->getTempRemapper()); + ASSERT_TRUE(ReadHooSamples != nullptr); + ASSERT_EQ(317u, ReadHooSamples->getTotalSamples()); + FunctionSamples *ReadBarSamples = Reader->getSamplesFor(BarName); ASSERT_TRUE(ReadBarSamples != nullptr); if (!UseMD5) {