Index: llvm/lib/Analysis/InlineCost.cpp =================================================================== --- llvm/lib/Analysis/InlineCost.cpp +++ llvm/lib/Analysis/InlineCost.cpp @@ -51,7 +51,7 @@ cl::desc("Control the amount of inlining to perform (default = 225)")); static cl::opt HintThreshold( - "inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore, + "inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore, cl::desc("Threshold for inlining functions with inline hint")); static cl::opt @@ -63,7 +63,7 @@ // PGO before we actually hook up inliner with analysis passes such as BPI and // BFI. static cl::opt ColdThreshold( - "inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore, + "inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore, cl::desc("Threshold for inlining functions with cold attribute")); static cl::opt @@ -149,6 +149,9 @@ bool HasUninlineableIntrinsic = false; bool InitsVargArgs = false; + /// Attempt to evaluate indirect calls to boost its inline cost. + bool BoostIndirectCalls; + /// Number of bytes allocated statically by the callee. uint64_t AllocatedSize = 0; unsigned NumInstructions = 0; @@ -295,13 +298,14 @@ std::function &GetAssumptionCache, Optional> &GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE, - Function &Callee, CallBase &Call, const InlineParams &Params) + Function &Callee, CallBase &Call, const InlineParams &Params, + bool BoostIndirect = true) : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI), PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE), CandidateCall(Call), Params(Params), Threshold(Params.DefaultThreshold), ComputeFullInlineCost(OptComputeFullInlineCost || Params.ComputeFullInlineCost || ORE), - EnableLoadElimination(true) {} + BoostIndirectCalls(BoostIndirect), EnableLoadElimination(true) {} InlineResult analyzeCall(CallBase &Call); @@ -423,9 +427,9 @@ Operands.push_back(GEP.getOperand(0)); for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I) if (Constant *SimpleOp = SimplifiedValues.lookup(*I)) - Operands.push_back(SimpleOp); - else - Operands.push_back(*I); + Operands.push_back(SimpleOp); + else + Operands.push_back(*I); return TargetTransformInfo::TCC_Free == TTI.getUserCost(&GEP, Operands); } @@ -1239,97 +1243,100 @@ if (isa(Call) && cast(Call).cannotDuplicate()) ContainsNoDuplicateCall = true; - if (Function *F = Call.getCalledFunction()) { - // When we have a concrete function, first try to simplify it directly. - if (simplifyCallSite(F, Call)) - return true; - - // Next check if it is an intrinsic we know about. - // FIXME: Lift this into part of the InstVisitor. - if (IntrinsicInst *II = dyn_cast(&Call)) { - switch (II->getIntrinsicID()) { - default: - if (!Call.onlyReadsMemory() && !isAssumeLikeIntrinsic(II)) - disableLoadElimination(); - return Base::visitCallBase(Call); - - case Intrinsic::load_relative: - // This is normally lowered to 4 LLVM instructions. - addCost(3 * InlineConstants::InstrCost); - return false; - - case Intrinsic::memset: - case Intrinsic::memcpy: - case Intrinsic::memmove: - disableLoadElimination(); - // SROA can usually chew through these intrinsics, but they aren't free. - return false; - case Intrinsic::icall_branch_funnel: - case Intrinsic::localescape: - HasUninlineableIntrinsic = true; - return false; - case Intrinsic::vastart: - InitsVargArgs = true; - return false; - } - } - - if (F == Call.getFunction()) { - // This flag will fully abort the analysis, so don't bother with anything - // else. - IsRecursiveCall = true; - return false; - } - - if (TTI.isLoweredToCall(F)) { - // We account for the average 1 instruction per call argument setup - // here. + Value *Callee = Call.getCalledOperand(); + Function *F = dyn_cast_or_null(Callee); + bool IsIndirectCall = !F; + if (IsIndirectCall) { + // Check if this happens to be an indirect function call to a known function + // in this inline context. If not, we've done all we can. + F = dyn_cast_or_null(SimplifiedValues.lookup(Callee)); + if (!F) { + // Pay the price of the argument setup. We account for the average 1 + // instruction per call argument setup here. addCost(Call.arg_size() * InlineConstants::InstrCost); // Everything other than inline ASM will also have a significant cost // merely from making the call. - if (!isa(Call.getCalledValue())) + if (!isa(Callee)) addCost(InlineConstants::CallPenalty); - } - if (!Call.onlyReadsMemory()) - disableLoadElimination(); - return Base::visitCallBase(Call); + if (!Call.onlyReadsMemory()) + disableLoadElimination(); + return Base::visitCallBase(Call); + } } - // Otherwise we're in a very special case -- an indirect function call. See - // if we can be particularly clever about this. - Value *Callee = Call.getCalledValue(); + assert(F && "Expected a call to a known function"); - // First, pay the price of the argument setup. We account for the average - // 1 instruction per call argument setup here. - addCost(Call.arg_size() * InlineConstants::InstrCost); + // When we have a concrete function, first try to simplify it directly. + if (simplifyCallSite(F, Call)) + return true; - // Next, check if this happens to be an indirect function call to a known - // function in this inline context. If not, we've done all we can. - Function *F = dyn_cast_or_null(SimplifiedValues.lookup(Callee)); - if (!F) { - if (!Call.onlyReadsMemory()) + // Next check if it is an intrinsic we know about. + // FIXME: Lift this into part of the InstVisitor. + if (IntrinsicInst *II = dyn_cast(&Call)) { + switch (II->getIntrinsicID()) { + default: + if (!Call.onlyReadsMemory() && !isAssumeLikeIntrinsic(II)) + disableLoadElimination(); + return Base::visitCallBase(Call); + + case Intrinsic::load_relative: + // This is normally lowered to 4 LLVM instructions. + addCost(3 * InlineConstants::InstrCost); + return false; + + case Intrinsic::memset: + case Intrinsic::memcpy: + case Intrinsic::memmove: disableLoadElimination(); - return Base::visitCallBase(Call); + // SROA can usually chew through these intrinsics, but they aren't free. + return false; + case Intrinsic::icall_branch_funnel: + case Intrinsic::localescape: + HasUninlineableIntrinsic = true; + return false; + case Intrinsic::vastart: + InitsVargArgs = true; + return false; + } + } + + if (F == Call.getFunction()) { + // This flag will fully abort the analysis, so don't bother with anything + // else. + IsRecursiveCall = true; + return false; } - // If we have a constant that we are calling as a function, we can peer - // through it and see the function target. This happens not infrequently - // during devirtualization and so we want to give it a hefty bonus for - // inlining, but cap that bonus in the event that inlining wouldn't pan - // out. Pretend to inline the function, with a custom threshold. - auto IndirectCallParams = Params; - IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold; - CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, Call, - IndirectCallParams); - if (CA.analyzeCall(Call)) { - // We were able to inline the indirect call! Subtract the cost from the - // threshold to get the bonus we want to apply, but don't go below zero. - Cost -= std::max(0, CA.getThreshold() - CA.getCost()); + if (TTI.isLoweredToCall(F)) { + // We account for the average 1 instruction per call argument setup here. + addCost(Call.arg_size() * InlineConstants::InstrCost); + + // If we have a constant that we are calling as a function, we can peer + // through it and see the function target. This happens not infrequently + // during devirtualization and so we want to give it a hefty bonus for + // inlining, but cap that bonus in the event that inlining wouldn't pan out. + // Pretend to inline the function, with a custom threshold. + if (IsIndirectCall && BoostIndirectCalls) { + auto IndirectCallParams = Params; + IndirectCallParams.DefaultThreshold = + InlineConstants::IndirectCallThreshold; + CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, Call, + IndirectCallParams, false); + if (CA.analyzeCall(Call)) { + // We were able to inline the indirect call! Subtract the cost from the + // threshold to get the bonus we want to apply, but don't go below zero. + Cost -= std::max(0, CA.getThreshold() - CA.getCost()); + } else + // Otherwise simply add the cost for merely making the call. + addCost(InlineConstants::CallPenalty); + } else + // Otherwise simply add the cost for merely making the call. + addCost(InlineConstants::CallPenalty); } - if (!F->onlyReadsMemory()) + if (!Call.onlyReadsMemory() || (IsIndirectCall && !F->onlyReadsMemory())) disableLoadElimination(); return Base::visitCallBase(Call); } @@ -1494,7 +1501,7 @@ int64_t ExpectedNumberOfCompare = 3 * (int64_t)NumCaseCluster / 2 - 1; int64_t SwitchCost = - ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost; + ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost; addCost(SwitchCost, (int64_t)CostUpperBound); return false; Index: llvm/test/Transforms/Inline/inline-indirect-chain.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Inline/inline-indirect-chain.ll @@ -0,0 +1,55 @@ +; RUN: opt -inline -early-cse < %s +; This test used to crash (PR35469). + +define void @func1() { + %t = bitcast void ()* @func2 to void ()* + tail call void %t() + ret void +} + +define void @func2() { + %t = bitcast void ()* @func3 to void ()* + tail call void %t() + ret void +} + +define void @func3() { + %t = bitcast void ()* @func4 to void ()* + tail call void %t() + ret void +} + +define void @func4() { + br i1 undef, label %left, label %right + +left: + %t = bitcast void ()* @func5 to void ()* + tail call void %t() + ret void + +right: + ret void +} + +define void @func5() { + %t = bitcast void ()* @func6 to void ()* + tail call void %t() + ret void +} + +define void @func6() { + %t = bitcast void ()* @func2 to void ()* + tail call void %t() + ret void +} + +define void @func7() { + %t = bitcast void ()* @func3 to void ()* + tail call void @func8(void()* %t) + ret void +} + +define void @func8(void()* %f) { + tail call void %f() + ret void +}