Index: lib/Transforms/IPO/SampleProfile.cpp =================================================================== --- lib/Transforms/IPO/SampleProfile.cpp +++ lib/Transforms/IPO/SampleProfile.cpp @@ -112,7 +112,7 @@ bool emitAnnotations(Function &F); ErrorOr getInstWeight(const Instruction &I) const; ErrorOr getBlockWeight(const BasicBlock *BB) const; - const FunctionSamples *findCalleeFunctionSamples(const CallInst &I) const; + const FunctionSamples *findCalleeFunctionSamples(const Instruction &I) const; const FunctionSamples *findFunctionSamples(const Instruction &I) const; bool inlineHotFunctions(Function &F); void printEdgeWeight(raw_ostream &OS, Edge E); @@ -210,6 +210,7 @@ void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); } + private: SampleProfileLoader SampleLoader; }; @@ -466,12 +467,12 @@ if (isa(Inst) || isa(Inst)) return std::error_code(); - // If a call instruction is inlined in profile, but not inlined here, + // If a call/invoke instruction is inlined in profile, but not inlined here, // it means that the inlined callsite has no sample, thus the call // instruction should have 0 count. - const CallInst *CI = dyn_cast(&Inst); - if (CI && findCalleeFunctionSamples(*CI)) - return 0; + bool IsCall = isa(Inst) || isa(Inst); + if (IsCall && findCalleeFunctionSamples(Inst)) + return 0; const DILocation *DIL = DLoc; unsigned Lineno = DLoc.getLine(); @@ -513,9 +514,11 @@ DenseMap CM; for (auto &I : BB->getInstList()) { const ErrorOr &R = getInstWeight(I); - if (R) CM[R.get()]++; + if (R) + CM[R.get()]++; } - if (CM.size() == 0) return std::error_code(); + if (CM.size() == 0) + return std::error_code(); uint64_t W = 0, C = 0; for (const auto &C_W : CM) { if (C_W.second == W) { @@ -552,18 +555,18 @@ /// \brief Get the FunctionSamples for a call instruction. /// -/// The FunctionSamples of a call instruction \p Inst is the inlined +/// The FunctionSamples of a call/invoke instruction \p Inst is the inlined /// instance in which that call instruction is calling to. It contains /// all samples that resides in the inlined instance. We first find the /// inlined instance in which the call instruction is from, then we /// traverse its children to find the callsite with the matching -/// location and callee function name. +/// location. /// -/// \param Inst Call instruction to query. +/// \param Inst Call/Invoke instruction to query. /// /// \returns The FunctionSamples pointer to the inlined instance. const FunctionSamples * -SampleProfileLoader::findCalleeFunctionSamples(const CallInst &Inst) const { +SampleProfileLoader::findCalleeFunctionSamples(const Instruction &Inst) const { const DILocation *DIL = Inst.getDebugLoc(); if (!DIL) { return nullptr; @@ -612,7 +615,6 @@ return FS; } - /// \brief Iteratively inline hot callsites of a function. /// /// Iteratively traverse all callsites of the function \p F, and find if @@ -632,20 +634,27 @@ Function &F) -> AssumptionCache & { return ACT->getAssumptionCache(F); }; while (true) { bool LocalChanged = false; - SmallVector CIS; + SmallVector CIS; for (auto &BB : F) { for (auto &I : BB.getInstList()) { - CallInst *CI = dyn_cast(&I); - if (CI && callsiteIsHot(Samples, findCalleeFunctionSamples(*CI))) - CIS.push_back(CI); + const FunctionSamples *FS = nullptr; + if ((isa(I) || isa(I)) && + (FS = findCalleeFunctionSamples(I))) { + + if (callsiteIsHot(Samples, FS)) + CIS.push_back(&I); + } } } - for (auto CI : CIS) { + for (auto I : CIS) { InlineFunctionInfo IFI(nullptr, ACT ? &GetAssumptionCache : nullptr); - Function *CalledFunction = CI->getCalledFunction(); - DebugLoc DLoc = CI->getDebugLoc(); - uint64_t NumSamples = findCalleeFunctionSamples(*CI)->getTotalSamples(); - if (InlineFunction(CI, IFI)) { + CallInst *CI = dyn_cast(I); + InvokeInst *II = dyn_cast(I); + Function *CalledFunction = + (CI == nullptr ? II->getCalledFunction() : CI->getCalledFunction()); + DebugLoc DLoc = I->getDebugLoc(); + uint64_t NumSamples = findCalleeFunctionSamples(*I)->getTotalSamples(); + if ((CI && InlineFunction(CI, IFI)) || (II && InlineFunction(II, IFI))) { LocalChanged = true; emitOptimizationRemark(Ctx, DEBUG_TYPE, F, DLoc, Twine("inlined hot callee '") + @@ -1067,7 +1076,7 @@ if (!dyn_cast(&I)) { SmallVector Weights; Weights.push_back(BlockWeights[BB]); - CI->setMetadata(LLVMContext::MD_prof, + CI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); } } @@ -1308,7 +1317,7 @@ } bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) { - // FIXME: pass in AssumptionCache correctly for the new pass manager. + // FIXME: pass in AssumptionCache correctly for the new pass manager. SampleLoader.setACT(&getAnalysis()); return SampleLoader.runOnModule(M); } Index: test/Transforms/SampleProfile/Inputs/einline.prof =================================================================== --- /dev/null +++ test/Transforms/SampleProfile/Inputs/einline.prof @@ -0,0 +1,3 @@ +_Z3foov:200:100 + 1: _Z3barv:100 + 3: _Z3barv:100 Index: test/Transforms/SampleProfile/early-inline.ll =================================================================== --- /dev/null +++ test/Transforms/SampleProfile/early-inline.ll @@ -0,0 +1,47 @@ +; RUN: opt < %s -instcombine -sample-profile -sample-profile-file=%S/Inputs/einline.prof | FileCheck %s + +; Checks if both call and invoke can be inlined early if their inlined +; instances are hot in profile. + +target triple = "x86_64-unknown-linux-gnu" + +@_ZTIi = external constant i8* + +; Function Attrs: uwtable +define void @_Z3foov() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg !6 { + %1 = alloca i8* + %2 = alloca i32 + %3 = alloca i32, align 4 +; CHECK-NOT: call + call void @_ZL3barv(), !dbg !9 +; CHECK-NOT: invoke + invoke void @_ZL3barv() + to label %4 unwind label %5, !dbg !10 + +;