Index: include/llvm/ProfileData/SampleProf.h =================================================================== --- include/llvm/ProfileData/SampleProf.h +++ include/llvm/ProfileData/SampleProf.h @@ -184,7 +184,8 @@ typedef std::map BodySampleMap; class FunctionSamples; -typedef std::map CallsiteSampleMap; +typedef StringMap FunctionSamplesMap; +typedef std::map CallsiteSampleMap; /// Representation of the samples collected for a function. /// @@ -252,18 +253,37 @@ } /// Return the function samples at the given callsite location. - FunctionSamples &functionSamplesAt(const LineLocation &Loc) { + FunctionSamplesMap &functionSamplesAt(const LineLocation &Loc) { return CallsiteSamples[Loc]; } - /// Return a pointer to function samples at the given callsite location. - const FunctionSamples *findFunctionSamplesAt(const LineLocation &Loc) const { + /// Returns the FunctionSamplesMap at the given \p Loc. + const FunctionSamplesMap * + findFunctionSamplesMapAt(const LineLocation &Loc) const { auto iter = CallsiteSamples.find(Loc); - if (iter == CallsiteSamples.end()) { + if (iter == CallsiteSamples.end()) return nullptr; - } else { - return &iter->second; - } + return &iter->second; + } + + /// Returns a pointer to FunctionSamples at the given callsite location \p Loc + /// with callee \p CalleeName. + const FunctionSamples *findFunctionSamplesAt(const LineLocation &Loc, + StringRef CalleeName) const { + auto iter = CallsiteSamples.find(Loc); + if (iter == CallsiteSamples.end()) + return nullptr; + const auto &FS = iter->second.find(CalleeName); + if (FS != iter->second.end()) + return &FS->getValue(); + uint64_t MaxTotalSamples = 0; + const FunctionSamples *R = nullptr; + for (const auto &NameFS : iter->second) + if (NameFS.second.getTotalSamples() >= MaxTotalSamples) { + MaxTotalSamples = NameFS.second.getTotalSamples(); + R = &NameFS.second; + } + return R; } bool empty() const { return TotalSamples == 0; } @@ -297,8 +317,9 @@ } for (const auto &I : Other.getCallsiteSamples()) { const LineLocation &Loc = I.first; - const FunctionSamples &Rec = I.second; - MergeResult(Result, functionSamplesAt(Loc).merge(Rec, Weight)); + FunctionSamplesMap &FSMap = functionSamplesAt(Loc); + for (const auto &Rec : I.second) + MergeResult(Result, FSMap[Rec.first()].merge(Rec.second, Weight)); } return Result; } @@ -314,7 +335,8 @@ if (!F || !F->getSubprogram()) S.insert(Function::getGUID(Name)); for (auto CS : CallsiteSamples) - CS.second.findImportedFunctions(S, M, Threshold); + for (const auto &NameFS : CS.second) + NameFS.second.findImportedFunctions(S, M, Threshold); } /// Set the name of the function. Index: lib/ProfileData/SampleProf.cpp =================================================================== --- lib/ProfileData/SampleProf.cpp +++ lib/ProfileData/SampleProf.cpp @@ -129,12 +129,14 @@ OS.indent(Indent); if (!CallsiteSamples.empty()) { OS << "Samples collected in inlined callsites {\n"; - SampleSorter SortedCallsiteSamples( + SampleSorter SortedCallsiteSamples( CallsiteSamples); for (const auto &CS : SortedCallsiteSamples.get()) { - OS.indent(Indent + 2); - OS << CS->first << ": inlined callee: " << CS->second.getName() << ": "; - CS->second.print(OS, Indent + 4); + for (const auto &FS : CS->second) { + OS.indent(Indent + 2); + OS << CS->first << ": inlined callee: " << FS.second.getName() << ": "; + FS.second.print(OS, Indent + 4); + } } OS << "}\n"; } else { Index: lib/ProfileData/SampleProfReader.cpp =================================================================== --- lib/ProfileData/SampleProfReader.cpp +++ lib/ProfileData/SampleProfReader.cpp @@ -211,7 +211,7 @@ InlineStack.pop_back(); } FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt( - LineLocation(LineOffset, Discriminator)); + LineLocation(LineOffset, Discriminator))[FName]; FSamples.setName(FName); MergeResult(Result, FSamples.addTotalSamples(NumSamples)); InlineStack.push_back(&FSamples); @@ -363,8 +363,8 @@ if (std::error_code EC = FName.getError()) return EC; - FunctionSamples &CalleeProfile = - FProfile.functionSamplesAt(LineLocation(*LineOffset, *Discriminator)); + FunctionSamples &CalleeProfile = FProfile.functionSamplesAt( + LineLocation(*LineOffset, *Discriminator))[*FName]; CalleeProfile.setName(*FName); if (std::error_code EC = readProfile(CalleeProfile)) return EC; @@ -636,7 +636,7 @@ uint32_t LineOffset = Offset >> 16; uint32_t Discriminator = Offset & 0xffff; FProfile = &CallerProfile->functionSamplesAt( - LineLocation(LineOffset, Discriminator)); + LineLocation(LineOffset, Discriminator))[Name]; } FProfile->setName(Name); Index: lib/ProfileData/SampleProfWriter.cpp =================================================================== --- lib/ProfileData/SampleProfWriter.cpp +++ lib/ProfileData/SampleProfWriter.cpp @@ -68,20 +68,21 @@ OS << "\n"; } - SampleSorter SortedCallsiteSamples( + SampleSorter SortedCallsiteSamples( S.getCallsiteSamples()); Indent += 1; - for (const auto &I : SortedCallsiteSamples.get()) { - LineLocation Loc = I->first; - const FunctionSamples &CalleeSamples = I->second; - OS.indent(Indent); - if (Loc.Discriminator == 0) - OS << Loc.LineOffset << ": "; - else - OS << Loc.LineOffset << "." << Loc.Discriminator << ": "; - if (std::error_code EC = write(CalleeSamples)) - return EC; - } + for (const auto &I : SortedCallsiteSamples.get()) + for (const auto &FS : I->second) { + LineLocation Loc = I->first; + const FunctionSamples &CalleeSamples = FS.second; + OS.indent(Indent); + if (Loc.Discriminator == 0) + OS << Loc.LineOffset << ": "; + else + OS << Loc.LineOffset << "." << Loc.Discriminator << ": "; + if (std::error_code EC = write(CalleeSamples)) + return EC; + } Indent -= 1; return sampleprof_error::success; @@ -109,11 +110,12 @@ } // Recursively add all the names for inlined callsites. - for (const auto &J : S.getCallsiteSamples()) { - const FunctionSamples &CalleeSamples = J.second; - addName(CalleeSamples.getName()); - addNames(CalleeSamples); - } + for (const auto &J : S.getCallsiteSamples()) + for (const auto &FS : J.second) { + const FunctionSamples &CalleeSamples = FS.second; + addName(CalleeSamples.getName()); + addNames(CalleeSamples); + } } std::error_code SampleProfileWriterBinary::writeHeader( @@ -187,14 +189,15 @@ // Recursively emit all the callsite samples. encodeULEB128(S.getCallsiteSamples().size(), OS); - for (const auto &J : S.getCallsiteSamples()) { - LineLocation Loc = J.first; - const FunctionSamples &CalleeSamples = J.second; - encodeULEB128(Loc.LineOffset, OS); - encodeULEB128(Loc.Discriminator, OS); - if (std::error_code EC = writeBody(CalleeSamples)) - return EC; - } + for (const auto &J : S.getCallsiteSamples()) + for (const auto &FS : J.second) { + LineLocation Loc = J.first; + const FunctionSamples &CalleeSamples = FS.second; + encodeULEB128(Loc.LineOffset, OS); + encodeULEB128(Loc.Discriminator, OS); + if (std::error_code EC = writeBody(CalleeSamples)) + return EC; + } return sampleprof_error::success; } Index: lib/Transforms/IPO/SampleProfile.cpp =================================================================== --- lib/Transforms/IPO/SampleProfile.cpp +++ lib/Transforms/IPO/SampleProfile.cpp @@ -162,6 +162,8 @@ ErrorOr getInstWeight(const Instruction &I); ErrorOr getBlockWeight(const BasicBlock *BB); const FunctionSamples *findCalleeFunctionSamples(const Instruction &I) const; + std::vector + findIndirectCallFunctionSamples(const Instruction &I) const; const FunctionSamples *findFunctionSamples(const Instruction &I) const; bool inlineHotFunctions(Function &F, DenseSet &ImportGUIDs); @@ -330,11 +332,12 @@ // If there are inlined callsites in this function, count the samples found // in the respective bodies. However, do not bother counting callees with 0 // total samples, these are callees that were never invoked at runtime. - for (const auto &I : FS->getCallsiteSamples()) { - const FunctionSamples *CalleeSamples = &I.second; - if (callsiteIsHot(FS, CalleeSamples)) - Count += countUsedRecords(CalleeSamples); - } + for (const auto &I : FS->getCallsiteSamples()) + for (const auto &J : I.second) { + const FunctionSamples *CalleeSamples = &J.second; + if (callsiteIsHot(FS, CalleeSamples)) + Count += countUsedRecords(CalleeSamples); + } return Count; } @@ -347,11 +350,12 @@ unsigned Count = FS->getBodySamples().size(); // Only count records in hot callsites. - for (const auto &I : FS->getCallsiteSamples()) { - const FunctionSamples *CalleeSamples = &I.second; - if (callsiteIsHot(FS, CalleeSamples)) - Count += countBodyRecords(CalleeSamples); - } + for (const auto &I : FS->getCallsiteSamples()) + for (const auto &J : I.second) { + const FunctionSamples *CalleeSamples = &J.second; + if (callsiteIsHot(FS, CalleeSamples)) + Count += countBodyRecords(CalleeSamples); + } return Count; } @@ -366,11 +370,12 @@ Total += I.second.getSamples(); // Only count samples in hot callsites. - for (const auto &I : FS->getCallsiteSamples()) { - const FunctionSamples *CalleeSamples = &I.second; - if (callsiteIsHot(FS, CalleeSamples)) - Total += countBodySamples(CalleeSamples); - } + for (const auto &I : FS->getCallsiteSamples()) + for (const auto &J : I.second) { + const FunctionSamples *CalleeSamples = &J.second; + if (callsiteIsHot(FS, CalleeSamples)) + Total += countBodySamples(CalleeSamples); + } return Total; } @@ -559,12 +564,47 @@ if (!DIL) { return nullptr; } + + StringRef CalleeName; + if (const CallInst *CI = dyn_cast(&Inst)) + if (Function *Callee = CI->getCalledFunction()) + CalleeName = Callee->getName(); + const FunctionSamples *FS = findFunctionSamples(Inst); if (FS == nullptr) return nullptr; return FS->findFunctionSamplesAt( - LineLocation(getOffset(DIL), DIL->getBaseDiscriminator())); + LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()), CalleeName); +} + +/// Returns a vector of FunctionSamples that are the indirect call targets +/// of \p Inst. The vector is sorted by the total number of samples. +std::vector +SampleProfileLoader::findIndirectCallFunctionSamples( + const Instruction &Inst) const { + const DILocation *DIL = Inst.getDebugLoc(); + std::vector R; + + if (!DIL) { + return R; + } + + const FunctionSamples *FS = findFunctionSamples(Inst); + if (FS == nullptr) + return R; + + if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt( + LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()))) { + for (const auto &NameFS : *M) { + R.push_back(&NameFS.second); + } + std::sort(R.begin(), R.end(), + [](const FunctionSamples *L, const FunctionSamples *R) { + return L->getTotalSamples() > R->getTotalSamples(); + }); + } + return R; } /// \brief Get the FunctionSamples for an instruction. @@ -578,18 +618,23 @@ /// \returns the FunctionSamples pointer to the inlined instance. const FunctionSamples * SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const { - SmallVector S; + SmallVector, 10> S; const DILocation *DIL = Inst.getDebugLoc(); - if (!DIL) { + if (!DIL) return Samples; + + const DILocation *PrevDIL = DIL; + for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) { + S.push_back(std::make_pair( + LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()), + PrevDIL->getScope()->getSubprogram()->getLinkageName())); + PrevDIL = DIL; } - for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) - S.push_back(LineLocation(getOffset(DIL), DIL->getBaseDiscriminator())); if (S.size() == 0) return Samples; const FunctionSamples *FS = Samples; for (int i = S.size() - 1; i >= 0 && FS != nullptr; i--) { - FS = FS->findFunctionSamplesAt(S[i]); + FS = FS->findFunctionSamplesAt(S[i].first, S[i].second); } return FS; } @@ -638,25 +683,27 @@ Function *CalledFunction = CallSite(I).getCalledFunction(); Instruction *DI = I; if (!CalledFunction && !PromotedInsns.count(I) && - CallSite(I).isIndirectCall()) { - auto CalleeFunctionName = findCalleeFunctionSamples(*I)->getName(); - const char *Reason = "Callee function not available"; - CalledFunction = F.getParent()->getFunction(CalleeFunctionName); - if (CalledFunction && isLegalToPromote(I, CalledFunction, &Reason)) { - // The indirect target was promoted and inlined in the profile, as a - // result, we do not have profile info for the branch probability. - // We set the probability to 80% taken to indicate that the static - // call is likely taken. - DI = dyn_cast( - promoteIndirectCall(I, CalledFunction, 80, 100, false) - ->stripPointerCasts()); - PromotedInsns.insert(I); - } else { - DEBUG(dbgs() << "\nFailed to promote indirect call to " - << CalleeFunctionName << " because " << Reason << "\n"); - continue; + CallSite(I).isIndirectCall()) + for (const auto *FS : findIndirectCallFunctionSamples(*I)) { + auto CalleeFunctionName = FS->getName(); + const char *Reason = "Callee function not available"; + CalledFunction = F.getParent()->getFunction(CalleeFunctionName); + if (CalledFunction && isLegalToPromote(I, CalledFunction, &Reason)) { + // The indirect target was promoted and inlined in the profile, as a + // result, we do not have profile info for the branch probability. + // We set the probability to 80% taken to indicate that the static + // call is likely taken. + DI = dyn_cast( + promoteIndirectCall(I, CalledFunction, 80, 100, false) + ->stripPointerCasts()); + PromotedInsns.insert(I); + } else { + DEBUG(dbgs() << "\nFailed to promote indirect call to " + << CalleeFunctionName << " because " << Reason + << "\n"); + continue; + } } - } if (!CalledFunction || !CalledFunction->getSubprogram()) { findCalleeFunctionSamples(*I)->findImportedFunctions( ImportGUIDs, F.getParent(), Index: test/Transforms/SampleProfile/Inputs/indirect-call.prof =================================================================== --- test/Transforms/SampleProfile/Inputs/indirect-call.prof +++ test/Transforms/SampleProfile/Inputs/indirect-call.prof @@ -1,8 +1,10 @@ test:63067:0 4: 3345 _Z3barv:1398 _Z3foov:2059 test_inline:3000:0 - 5: foo_inline:3000 + 5: foo_inline1:3000 1: 3000 + 5: foo_inline2:4000 + 1: 4000 test_noinline:3000:0 5: foo_noinline:3000 1: 3000 Index: test/Transforms/SampleProfile/indirect-call.ll =================================================================== --- test/Transforms/SampleProfile/indirect-call.ll +++ test/Transforms/SampleProfile/indirect-call.ll @@ -16,10 +16,14 @@ %2 = alloca i64* (i32*)* store i64* (i32*)* %0, i64* (i32*)** %2 %3 = load i64* (i32*)*, i64* (i32*)** %2 -; CHECK: icmp {{.*}} @foo_inline +; CHECK: icmp {{.*}} @foo_inline2 ; CHECK: if.true.direct_targ: ; CHECK-NOT: call ; CHECK: if.false.orig_indirect: +; CHECK: icmp {{.*}} @foo_inline1 +; CHECK: if.true.direct_targ1: +; CHECK-NOT: call +; CHECK: if.false.orig_indirect2: ; CHECK: call call i64* %3(i32* %x), !dbg !5 ret void @@ -39,7 +43,11 @@ @x = global i32 0, align 4 -define i32* @foo_inline(i32* %x) !dbg !3 { +define i32* @foo_inline1(i32* %x) !dbg !3 { + ret i32* %x +} + +define i32* @foo_inline2(i32* %x) !dbg !3 { ret i32* %x }