Index: llvm/include/llvm/ProfileData/SampleProf.h =================================================================== --- llvm/include/llvm/ProfileData/SampleProf.h +++ llvm/include/llvm/ProfileData/SampleProf.h @@ -342,6 +342,7 @@ raw_ostream &operator<<(raw_ostream &OS, const SampleRecord &Sample); class FunctionSamples; +class SampleProfileReaderItaniumRemapper; using BodySampleMap = std::map; // NOTE: Using a StringMap here makes parsed profiles consume around 17% more @@ -428,35 +429,15 @@ return &iter->second; } - /// Returns a pointer to FunctionSamples at the given callsite location \p Loc - /// with callee \p CalleeName. If no callsite can be found, relax the - /// restriction to return the FunctionSamples at callsite location \p Loc - /// with the maximum total sample count. - const FunctionSamples *findFunctionSamplesAt(const LineLocation &Loc, - StringRef CalleeName) const { - std::string CalleeGUID; - CalleeName = getRepInFormat(CalleeName, UseMD5, CalleeGUID); - - auto iter = CallsiteSamples.find(Loc); - if (iter == CallsiteSamples.end()) - return nullptr; - auto FS = iter->second.find(CalleeName); - if (FS != iter->second.end()) - return &FS->second; - // If we cannot find exact match of the callee name, return the FS with - // the max total count. Only do this when CalleeName is not provided, - // i.e., only for indirect calls. - if (!CalleeName.empty()) - return nullptr; - uint64_t MaxTotalSamples = 0; - const FunctionSamples *R = nullptr; - for (const auto &NameFS : iter->second) - if (NameFS.second.getTotalSamples() >= MaxTotalSamples) { - MaxTotalSamples = NameFS.second.getTotalSamples(); - R = &NameFS.second; - } - return R; - } + /// Returns a pointer to FunctionSamples at the given callsite location + /// \p Loc with callee \p CalleeName. If no callsite can be found, relax + /// the restriction to return the FunctionSamples at callsite location + /// \p Loc with the maximum total sample count. If \p Remapper is not + /// nullptr, use \p Remapper to find FunctionSamples with equivalent name + /// as \p CalleeName. + const FunctionSamples * + findFunctionSamplesAt(const LineLocation &Loc, StringRef CalleeName, + SampleProfileReaderItaniumRemapper *Remapper) const; bool empty() const { return TotalSamples == 0; } @@ -630,7 +611,11 @@ /// tree nodes in the profile. /// /// \returns the FunctionSamples pointer to the inlined instance. - const FunctionSamples *findFunctionSamples(const DILocation *DIL) const; + /// If \p Remapper is not nullptr, it will be used to find matching + /// FunctionSamples with not exactly the same but equivalent name. + const FunctionSamples *findFunctionSamples( + const DILocation *DIL, + SampleProfileReaderItaniumRemapper *Remapper = nullptr) const; static SampleProfileFormat Format; @@ -648,6 +633,10 @@ return UseMD5 ? std::stoull(Name.data()) : Function::getGUID(Name); } + // Find all the names in the current FunctionSamples including names in + // all the inline instances and names of call targets. + void findAllNames(DenseSet &NameSet) const; + private: /// Mangled name of the function. StringRef Name; Index: llvm/include/llvm/ProfileData/SampleProfReader.h =================================================================== --- llvm/include/llvm/ProfileData/SampleProfReader.h +++ llvm/include/llvm/ProfileData/SampleProfReader.h @@ -208,6 +208,7 @@ #ifndef LLVM_PROFILEDATA_SAMPLEPROFREADER_H #define LLVM_PROFILEDATA_SAMPLEPROFREADER_H +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" @@ -275,15 +276,18 @@ return Remappings->lookup(FunctionName); } - /// Return the samples collected for function \p F if remapper knows - /// it is present in SampleMap. - FunctionSamples *getSamplesFor(StringRef FunctionName); + /// Return the equivalent name in the profile for \p FunctionName if + /// it exists. + Optional lookUpNameInProfile(StringRef FunctionName); private: // The buffer holding the content read from remapping file. std::unique_ptr Buffer; std::unique_ptr Remappings; - DenseMap SampleMap; + // Map remapping key to the name in the profile. By looking up the + // key in the remapper, a given new name can be mapped to the + // cannonical name using the NameMap. + DenseMap NameMap; // The Reader the remapper is servicing. SampleProfileReader &Reader; // Indicate whether remapping has been applied to the profile read @@ -370,15 +374,19 @@ /// Return the samples collected for function \p F. virtual FunctionSamples *getSamplesFor(StringRef Fname) { - if (Remapper) { - if (auto FS = Remapper->getSamplesFor(Fname)) - return FS; - } std::string FGUID; Fname = getRepInFormat(Fname, useMD5(), FGUID); auto It = Profiles.find(Fname); if (It != Profiles.end()) return &It->second; + + if (Remapper) { + if (auto NameInProfile = Remapper->lookUpNameInProfile(Fname)) { + auto It = Profiles.find(*NameInProfile); + if (It != Profiles.end()) + return &It->second; + } + } return nullptr; } @@ -423,6 +431,8 @@ /// Return whether names in the profile are all MD5 numbers. virtual bool useMD5() { return false; } + SampleProfileReaderItaniumRemapper *getRemapper() { return Remapper.get(); } + protected: /// Map every function to its associated profile. /// Index: llvm/lib/ProfileData/SampleProf.cpp =================================================================== --- llvm/lib/ProfileData/SampleProf.cpp +++ llvm/lib/ProfileData/SampleProf.cpp @@ -14,6 +14,7 @@ #include "llvm/ProfileData/SampleProf.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/ProfileData/SampleProfReader.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Error.h" @@ -174,8 +175,8 @@ 0xffff; } -const FunctionSamples * -FunctionSamples::findFunctionSamples(const DILocation *DIL) const { +const FunctionSamples *FunctionSamples::findFunctionSamples( + const DILocation *DIL, SampleProfileReaderItaniumRemapper *Remapper) const { assert(DIL); SmallVector, 10> S; @@ -190,11 +191,59 @@ return this; const FunctionSamples *FS = this; for (int i = S.size() - 1; i >= 0 && FS != nullptr; i--) { - FS = FS->findFunctionSamplesAt(S[i].first, S[i].second); + FS = FS->findFunctionSamplesAt(S[i].first, S[i].second, Remapper); } return FS; } +void FunctionSamples::findAllNames(DenseSet &NameSet) const { + NameSet.insert(Name); + for (const auto &BS : BodySamples) + for (const auto &TS : BS.second.getCallTargets()) + NameSet.insert(TS.getKey()); + + for (const auto &CS : CallsiteSamples) { + for (const auto &NameFS : CS.second) { + NameSet.insert(NameFS.first); + NameFS.second.findAllNames(NameSet); + } + } +} + +const FunctionSamples *FunctionSamples::findFunctionSamplesAt( + const LineLocation &Loc, StringRef CalleeName, + SampleProfileReaderItaniumRemapper *Remapper) const { + std::string CalleeGUID; + CalleeName = getRepInFormat(CalleeName, UseMD5, CalleeGUID); + + auto iter = CallsiteSamples.find(Loc); + if (iter == CallsiteSamples.end()) + return nullptr; + auto FS = iter->second.find(CalleeName); + if (FS != iter->second.end()) + return &FS->second; + if (Remapper) { + if (auto NameInProfile = Remapper->lookUpNameInProfile(CalleeName)) { + auto FS = iter->second.find(*NameInProfile); + if (FS != iter->second.end()) + return &FS->second; + } + } + // If we cannot find exact match of the callee name, return the FS with + // the max total count. Only do this when CalleeName is not provided, + // i.e., only for indirect calls. + if (!CalleeName.empty()) + return nullptr; + uint64_t MaxTotalSamples = 0; + const FunctionSamples *R = nullptr; + for (const auto &NameFS : iter->second) + if (NameFS.second.getTotalSamples() >= MaxTotalSamples) { + MaxTotalSamples = NameFS.second.getTotalSamples(); + R = &NameFS.second; + } + return R; +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void FunctionSamples::dump() const { print(dbgs(), 0); } #endif Index: llvm/lib/ProfileData/SampleProfReader.cpp =================================================================== --- llvm/lib/ProfileData/SampleProfReader.cpp +++ llvm/lib/ProfileData/SampleProfReader.cpp @@ -1291,18 +1291,22 @@ } assert(Remappings && "should be initialized while creating remapper"); - for (auto &Sample : Reader.getProfiles()) - if (auto Key = Remappings->insert(Sample.first())) - SampleMap.insert({Key, &Sample.second}); + for (auto &Sample : Reader.getProfiles()) { + DenseSet NamesInSample; + Sample.second.findAllNames(NamesInSample); + for (auto &Name : NamesInSample) + if (auto Key = Remappings->insert(Name)) + NameMap.insert({Key, Name}); + } RemappingApplied = true; } -FunctionSamples * -SampleProfileReaderItaniumRemapper::getSamplesFor(StringRef Fname) { +Optional +SampleProfileReaderItaniumRemapper::lookUpNameInProfile(StringRef Fname) { if (auto Key = Remappings->lookup(Fname)) - return SampleMap.lookup(Key); - return nullptr; + return NameMap.lookup(Key); + return None; } /// Prepare a memory buffer for the contents of \p Filename. Index: llvm/lib/Transforms/IPO/SampleProfile.cpp =================================================================== --- llvm/lib/Transforms/IPO/SampleProfile.cpp +++ llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -840,7 +840,7 @@ return FS->findFunctionSamplesAt(LineLocation(FunctionSamples::getOffset(DIL), DIL->getBaseDiscriminator()), - CalleeName); + CalleeName, Reader->getRemapper()); } /// Returns a vector of FunctionSamples that are the indirect call targets @@ -903,7 +903,7 @@ auto it = DILocation2SampleMap.try_emplace(DIL,nullptr); if (it.second) - it.first->second = Samples->findFunctionSamples(DIL); + it.first->second = Samples->findFunctionSamples(DIL, Reader->getRemapper()); return it.first->second; } @@ -1050,24 +1050,23 @@ PSI->getOrCompHotCountThreshold()); continue; } - auto CalleeFunctionName = FS->getFuncName(); + if (!callsiteIsHot(FS, PSI)) + continue; + + const char *Reason = "Callee function not available"; + // R->getValue() != &F is to prevent promoting a recursive call. // If it is a recursive call, we do not inline it as it could bloat // the code exponentially. There is way to better handle this, e.g. // clone the caller first, and inline the cloned caller if it is // recursive. As llvm does not inline recursive calls, we will // simply ignore it instead of handling it explicitly. - if (CalleeFunctionName == F.getName()) - continue; - - if (!callsiteIsHot(FS, PSI)) - continue; - - const char *Reason = "Callee function not available"; + auto CalleeFunctionName = FS->getFuncName(); auto R = SymbolMap.find(CalleeFunctionName); if (R != SymbolMap.end() && R->getValue() && !R->getValue()->isDeclaration() && R->getValue()->getSubprogram() && R->getValue()->hasFnAttribute("use-sample-profile") && + R->getValue() != &F && isLegalToPromote(*I, R->getValue(), &Reason)) { uint64_t C = FS->getEntrySamples(); auto &DI = @@ -1854,7 +1853,6 @@ FunctionAnalysisManager *FAM) { auto &Ctx = M.getContext(); - std::unique_ptr RemapReader; auto ReaderOrErr = SampleProfileReader::create(Filename, Ctx, RemappingFilename); if (std::error_code EC = ReaderOrErr.getError()) { @@ -1910,6 +1908,7 @@ for (const auto &I : Reader->getProfiles()) TotalCollectedSamples += I.second.getTotalSamples(); + auto Remapper = Reader->getRemapper(); // Populate the symbol map. for (const auto &N_F : M.getValueSymbolTable()) { StringRef OrigName = N_F.getKey(); @@ -1927,6 +1926,15 @@ // to nullptr to avoid confusion. if (!r.second) r.first->second = nullptr; + OrigName = NewName; + } + // Insert the remapped names into SymbolMap. + if (Remapper) { + if (auto MapName = Remapper->lookUpNameInProfile(OrigName)) { + if (*MapName == OrigName) + continue; + SymbolMap.insert(std::make_pair(*MapName, F)); + } } } Index: llvm/test/Transforms/SampleProfile/Inputs/remap-2.prof =================================================================== --- /dev/null +++ llvm/test/Transforms/SampleProfile/Inputs/remap-2.prof @@ -0,0 +1,16 @@ +test:15680:2500 + 1: 100 + 4: 100 + 5: 3000 xoo:1000 + 5: _ZN3foo3barERKN1N1XINS_4quuxEEE:2000 + 1: 2000 + 6: _ZN1N1XE:2500 + 1: 2500 + +_ZN1M1X1YE:15680:2500 + 1: 100 + 4: 100 + 5: 3000 xoo:1000 + 5: _ZN1M1X1YE:2000 + 1: 2000 + Index: llvm/test/Transforms/SampleProfile/remap-2.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SampleProfile/remap-2.ll @@ -0,0 +1,74 @@ +; RUN: opt %s -passes=sample-profile -sample-profile-file=%S/Inputs/remap-2.prof -sample-profile-remapping-file=%S/Inputs/remap.map -S | FileCheck %s +; Check profile remapping works for searching inline instance, searching +; indirect call promotion candidate and prevent recursive inline. + +@x.addr = common global i32 zeroinitializer, align 16 +@y.addr = common global i32 zeroinitializer, align 16 + +define i32 @_ZN3foo3barERKN1M1XINS_6detail3quxEEE() #0 !dbg !9 { +entry: + %t0 = load i32, i32* @x.addr, align 4 + %t1 = load i32, i32* @y.addr, align 4 + %add = add nsw i32 %t0, %t1 + ret i32 %add +} + +define i32 @_ZN1M1XE() #0 !dbg !10 { +entry: + %t0 = load i32, i32* @x.addr, align 4 + %t1 = load i32, i32* @y.addr, align 4 + %sub = sub nsw i32 %t0, %t1 + ret i32 %sub +} + +define void @test(i32 ()*) #0 !dbg !4 { + %t2 = alloca i32 ()* + store i32 ()* %0, i32 ()** %t2 + %t3 = load i32 ()*, i32 ()** %t2 +; Check call i32 %t3 has been indirect call promoted and call i32 @_ZN1M1XE +; has been inlined. +; CHECK-LABEL: @test( +; CHECK: icmp eq i32 ()* %t3, @_ZN3foo3barERKN1M1XINS_6detail3quxEEE +; CHECK-NOT: call i32 @_ZN1M1XE + %t4 = call i32 %t3(), !dbg !7 + %t5 = call i32 @_ZN1M1XE(), !dbg !8 + ret void +} + +define void @_ZN1M1X1YE(i32 ()*) #0 !dbg !11 { + %t2 = alloca i32 ()* + store i32 ()* %0, i32 ()** %t2 + %t3 = load i32 ()*, i32 ()** %t2 +; Check call i32 %t3 has got its profile but is not indirect call promoted +; because the promotion candidate is a recursive call to the current function. +; CHECK-LABEL: @_ZN1M1X1YE( +; CHECK: call i32 %t3(), {{.*}} !prof ![[PROFID:[0-9]+]] +; CHECK-NOT: icmp eq i32 ()* %t3, @_ZN1M1X1YE + %t4 = call i32 %t3(), !dbg !12 + ret void +} + +; CHECK: ![[PROFID]] = !{!"VP", i32 0, i64 3000 + +attributes #0 = { "use-sample-profile" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!13, !14} +!llvm.ident = !{!15} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: FullDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2) +!1 = !DIFile(filename: "calls.cc", directory: ".") +!2 = !{} +!4 = distinct !DISubprogram(name: "test", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 3, file: !1, scope: !5, type: !6, retainedNodes: !2) +!5 = !DIFile(filename: "calls.cc", directory: ".") +!6 = !DISubroutineType(types: !2) +!7 = !DILocation(line: 8, scope: !4) +!8 = !DILocation(line: 9, scope: !4) +!9 = distinct !DISubprogram(name: "_ZN3foo3barERKN1M1XINS_6detail3quxEEE", line: 15, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 3, file: !1, scope: !5, type: !6, retainedNodes: !2) +!10 = distinct !DISubprogram(name: "_ZN1M1XE", line: 20, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 3, file: !1, scope: !5, type: !6, retainedNodes: !2) +!11 = distinct !DISubprogram(name: "_ZN1M1X1YE", line: 25, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 3, file: !1, scope: !5, type: !6, retainedNodes: !2) +!12 = !DILocation(line: 30, scope: !11) +!13 = !{i32 2, !"Dwarf Version", i32 4} +!14 = !{i32 1, !"Debug Info Version", i32 3} +!15 = !{!"clang version 3.5 "} + Index: llvm/unittests/ProfileData/SampleProfTest.cpp =================================================================== --- llvm/unittests/ProfileData/SampleProfTest.cpp +++ llvm/unittests/ProfileData/SampleProfTest.cpp @@ -89,8 +89,8 @@ auto VerifySummary = [IsPartialProfile, PartialProfileRatio]( ProfileSummary &Summary) mutable { ASSERT_EQ(ProfileSummary::PSK_Sample, Summary.getKind()); - ASSERT_EQ(137392u, Summary.getTotalCount()); - ASSERT_EQ(8u, Summary.getNumCounts()); + ASSERT_EQ(138211u, Summary.getTotalCount()); + ASSERT_EQ(10u, Summary.getNumCounts()); ASSERT_EQ(4u, Summary.getNumFunctions()); ASSERT_EQ(1437u, Summary.getMaxFunctionCount()); ASSERT_EQ(60351u, Summary.getMaxCount()); @@ -112,7 +112,7 @@ ASSERT_EQ(60000u, EightyPerc->MinCount); ASSERT_EQ(12557u, NinetyPerc->MinCount); ASSERT_EQ(12557u, NinetyFivePerc->MinCount); - ASSERT_EQ(610u, NinetyNinePerc->MinCount); + ASSERT_EQ(600u, NinetyNinePerc->MinCount); }; VerifySummary(Summary); @@ -155,6 +155,22 @@ FooSamples.addBodySamples(8, 0, 60351); FooSamples.addBodySamples(10, 0, 605); + // Add inline instance with name "_Z3gooi". + StringRef GooName("_Z3gooi"); + auto &GooSamples = + FooSamples.functionSamplesAt(LineLocation(7, 0))[GooName.str()]; + GooSamples.setName(GooName); + GooSamples.addTotalSamples(502); + GooSamples.addBodySamples(3, 0, 502); + + // Add inline instance with name "_Z3hooi". + StringRef HooName("_Z3hooi"); + auto &HooSamples = + GooSamples.functionSamplesAt(LineLocation(9, 0))[HooName.str()]; + HooSamples.setName(HooName); + HooSamples.addTotalSamples(317); + HooSamples.addBodySamples(4, 0, 317); + StringRef BarName("_Z3bari"); FunctionSamples BarSamples; BarSamples.setName(BarName); @@ -197,6 +213,8 @@ createRemapFile(RemapPath, RemapFile); FooName = "_Z4fauxi"; BarName = "_Z3barl"; + GooName = "_Z3gool"; + HooName = "_Z3hool"; } M.getOrInsertFunction(FooName, fn_type); @@ -235,6 +253,33 @@ ASSERT_EQ(7711u, ReadFooSamples->getTotalSamples()); ASSERT_EQ(610u, ReadFooSamples->getHeadSamples()); + // Try to find a FunctionSamples with GooName at given callsites containing + // inline instance for GooName. Test the correct FunctionSamples can be + // found with Remapper support. + const FunctionSamples *ReadGooSamples = + ReadFooSamples->findFunctionSamplesAt(LineLocation(7, 0), GooName, + Reader->getRemapper()); + ASSERT_TRUE(ReadGooSamples != nullptr); + ASSERT_EQ(502u, ReadGooSamples->getTotalSamples()); + + // Try to find a FunctionSamples with GooName at given callsites containing + // no inline instance for GooName. Test no FunctionSamples will be + // found with Remapper support. + const FunctionSamples *ReadGooSamplesAgain = + ReadFooSamples->findFunctionSamplesAt(LineLocation(9, 0), GooName, + Reader->getRemapper()); + ASSERT_TRUE(ReadGooSamplesAgain == nullptr); + + // The inline instance of Hoo is inside of the inline instance of Goo. + // Try to find a FunctionSamples with HooName at given callsites containing + // inline instance for HooName. Test the correct FunctionSamples can be + // found with Remapper support. + const FunctionSamples *ReadHooSamples = + ReadGooSamples->findFunctionSamplesAt(LineLocation(9, 0), HooName, + Reader->getRemapper()); + ASSERT_TRUE(ReadHooSamples != nullptr); + ASSERT_EQ(317u, ReadHooSamples->getTotalSamples()); + FunctionSamples *ReadBarSamples = Reader->getSamplesFor(BarName); ASSERT_TRUE(ReadBarSamples != nullptr); if (!UseMD5) {