Index: include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- include/llvm/Analysis/TargetTransformInfo.h +++ include/llvm/Analysis/TargetTransformInfo.h @@ -174,6 +174,19 @@ /// individual classes of instructions would be better. unsigned getInliningThresholdMultiplier() const; + /// \brief Return a penalty heuristic to be applied to call or invoke + /// instructions. + /// + /// This allows a target to indicate that call instructions + /// are particularly costly. It can be viewed as a heuristic to account + /// for the increased register pressure across a call due to clobbering + /// of caller-save registers. + /// + /// FIXME: This is another blunt instrument. This and + /// getInliningThresholdMultiplier should probably be folded into getCallCost + /// and InlineCost changed to delegate to getCallCost. + int getInliningCallPenalty() const; + /// \brief Estimate the cost of an intrinsic when lowered. /// /// Mirrors the \c getCallCost method but uses an intrinsic identifier. @@ -680,6 +693,7 @@ virtual int getCallCost(const Function *F, ArrayRef Arguments) = 0; virtual unsigned getInliningThresholdMultiplier() = 0; + virtual int getInliningCallPenalty() = 0; virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef ParamTys) = 0; virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, @@ -831,6 +845,9 @@ unsigned getInliningThresholdMultiplier() override { return Impl.getInliningThresholdMultiplier(); } + int getInliningCallPenalty() override { + return Impl.getInliningCallPenalty(); + } int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef ParamTys) override { return Impl.getIntrinsicCost(IID, RetTy, ParamTys); Index: include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- include/llvm/Analysis/TargetTransformInfoImpl.h +++ include/llvm/Analysis/TargetTransformInfoImpl.h @@ -129,6 +129,7 @@ } unsigned getInliningThresholdMultiplier() { return 1; } + unsigned getInliningCallPenalty() { return 5 * TTI::TCC_Basic; } unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef ParamTys) { Index: lib/Analysis/InlineCost.cpp =================================================================== --- lib/Analysis/InlineCost.cpp +++ lib/Analysis/InlineCost.cpp @@ -44,6 +44,10 @@ "inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, cl::desc("Control the amount of inlining to perform (default = 225)")); +static cl::opt InliningCallPenalty( + "inline-call-penalty", cl::Hidden, cl::init(-1), cl::ZeroOrMore, + cl::desc("Override the target's penalty heuristic for calls")); + static cl::opt HintThreshold( "inlinehint-threshold", cl::Hidden, cl::init(325), cl::desc("Threshold for inlining functions with inline hint")); @@ -103,6 +107,7 @@ unsigned NumInstructions, NumVectorInstructions; int FiftyPercentVectorBonus, TenPercentVectorBonus; int VectorBonus; + int CallPenalty; /// While we walk the potentially-inlined instructions, we build up and /// maintain a mapping of simplified values specific to this callsite. The @@ -935,7 +940,7 @@ // Everything other than inline ASM will also have a significant cost // merely from making the call. if (!isa(CS.getCalledValue())) - Cost += InlineConstants::CallPenalty; + Cost += CallPenalty; } return Base::visitCallSite(CS); @@ -1197,6 +1202,13 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { ++NumCallsAnalyzed; + // Cache the call penalty, adjusting based on command line flags if needed. + if (InliningCallPenalty != -1) + CallPenalty = InliningCallPenalty; + else + CallPenalty = (TTI.getInliningCallPenalty() * InlineConstants::InstrCost) / + TargetTransformInfo::TCC_Basic; + // Perform some tweaks to the cost and threshold based on the direct // callsite information. @@ -1255,7 +1267,10 @@ Cost -= InlineConstants::InstrCost; } } - + // The call instruction also disappears after inlining. + Cost -= InlineConstants::InstrCost; + Cost -= CallPenalty; + // If there is only one call of the function, and it has internal linkage, // the cost of inlining it drops dramatically. bool OnlyOneCallAndLocalLinkage = Index: lib/Analysis/TargetTransformInfo.cpp =================================================================== --- lib/Analysis/TargetTransformInfo.cpp +++ lib/Analysis/TargetTransformInfo.cpp @@ -71,6 +71,10 @@ return TTIImpl->getInliningThresholdMultiplier(); } +int TargetTransformInfo::getInliningCallPenalty() const { + return TTIImpl->getInliningCallPenalty(); +} + int TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef Operands) const { return TTIImpl->getGEPCost(PointeeType, Ptr, Operands); Index: lib/Transforms/IPO/Inliner.cpp =================================================================== --- lib/Transforms/IPO/Inliner.cpp +++ lib/Transforms/IPO/Inliner.cpp @@ -285,7 +285,7 @@ // treating them as truly abstract units etc. TotalSecondaryCost = 0; // The candidate cost to be imposed upon the current function. - int CandidateCost = IC.getCost() - (InlineConstants::CallPenalty + 1); + int CandidateCost = IC.getCost() - 1; // This bool tracks what happens if we do NOT inline C into B. bool callerWillBeRemoved = Caller->hasLocalLinkage(); // This bool tracks what happens if we DO inline C into B. Index: test/Transforms/Inline/alloca-bonus.ll =================================================================== --- test/Transforms/Inline/alloca-bonus.ll +++ test/Transforms/Inline/alloca-bonus.ll @@ -1,4 +1,4 @@ -; RUN: opt -inline < %s -S -o - -inline-threshold=8 | FileCheck %s +; RUN: opt -inline < %s -S -o - -inline-threshold=8 -inline-call-penalty=0 | FileCheck %s target datalayout = "p:32:32" @@ -17,6 +17,7 @@ define void @inner1(i32 *%ptr) { %A = load i32, i32* %ptr store i32 0, i32* %ptr + store i32 0, i32* %ptr %C = getelementptr inbounds i32, i32* %ptr, i32 0 %D = getelementptr inbounds i32, i32* %ptr, i32 1 %E = bitcast i32* %ptr to i8* @@ -37,6 +38,7 @@ define void @inner2(i32 *%ptr) { %A = load i32, i32* %ptr store i32 0, i32* %ptr + store i32 0, i32* %ptr %C = getelementptr inbounds i32, i32* %ptr, i32 0 %D = getelementptr inbounds i32, i32* %ptr, i32 %A %E = bitcast i32* %ptr to i8* Index: test/Transforms/Inline/inline-cold-callee.ll =================================================================== --- test/Transforms/Inline/inline-cold-callee.ll +++ test/Transforms/Inline/inline-cold-callee.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -inline -inlinecold-threshold=0 -S | FileCheck %s +; RUN: opt < %s -inline -inlinecold-threshold=0 -inline-call-penalty=0 -S | FileCheck %s ; This tests that a cold callee gets the (lower) inlinecold-threshold even without ; Cold hint and does not get inlined because the cost exceeds the inlinecold-threshold. Index: test/Transforms/Inline/inline-cold.ll =================================================================== --- test/Transforms/Inline/inline-cold.ll +++ test/Transforms/Inline/inline-cold.ll @@ -1,13 +1,13 @@ -; RUN: opt < %s -inline -S -inlinecold-threshold=75 | FileCheck %s +; RUN: opt < %s -inline -S -inlinecold-threshold=75 -inline-call-penalty=0 | FileCheck %s ; Test that functions with attribute Cold are not inlined while the ; same function without attribute Cold will be inlined. -; RUN: opt < %s -inline -S -inline-threshold=600 | FileCheck %s -check-prefix=OVERRIDE +; RUN: opt < %s -inline -S -inline-threshold=600 -inline-call-penalty=0 | FileCheck %s -check-prefix=OVERRIDE ; The command line argument for inline-threshold should override ; the default cold threshold, so a cold function with size bigger ; than the default cold threshold (225) will be inlined. -; RUN: opt < %s -inline -S | FileCheck %s -check-prefix=DEFAULT +; RUN: opt < %s -inline -S -inline-call-penalty=0 | FileCheck %s -check-prefix=DEFAULT ; The same cold function will not be inlined with the default behavior. @a = global i32 4 Index: test/Transforms/Inline/inline-hot-callee.ll =================================================================== --- test/Transforms/Inline/inline-hot-callee.ll +++ test/Transforms/Inline/inline-hot-callee.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -inline -inline-threshold=0 -inlinehint-threshold=100 -S | FileCheck %s +; RUN: opt < %s -inline -inline-threshold=0 -inlinehint-threshold=100 -inline-call-penalty=0 -S | FileCheck %s ; This tests that a hot callee gets the (higher) inlinehint-threshold even without ; inline hints and gets inlined because the cost is less than inlinehint-threshold. Index: test/Transforms/Inline/inline-hot-callsite.ll =================================================================== --- test/Transforms/Inline/inline-hot-callsite.ll +++ test/Transforms/Inline/inline-hot-callsite.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -inline -inline-threshold=0 -hot-callsite-threshold=100 -S | FileCheck %s +; RUN: opt < %s -inline -inline-threshold=0 -hot-callsite-threshold=100 -inline-call-penalty=0 -S | FileCheck %s ; This tests that a hot callsite gets the (higher) inlinehint-threshold even without ; without inline hints and gets inlined because the cost is less than Index: test/Transforms/Inline/inline-optsize.ll =================================================================== --- test/Transforms/Inline/inline-optsize.ll +++ test/Transforms/Inline/inline-optsize.ll @@ -22,6 +22,7 @@ %x4 = add i32 %x3, %a4 %a5 = load volatile i32, i32* @a %x5 = add i32 %x3, %a5 + call void @extern() ret i32 %x5 } @@ -42,3 +43,5 @@ %r = call i32 @inner() ret i32 %r } + +declare void @extern() \ No newline at end of file Index: test/Transforms/Inline/inline_unreachable-2.ll =================================================================== --- test/Transforms/Inline/inline_unreachable-2.ll +++ test/Transforms/Inline/inline_unreachable-2.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -inline -S | FileCheck %s +; RUN: opt < %s -inline -inline-call-penalty=0 -S | FileCheck %s ; CHECK-LABEL: caller ; CHECK: call void @callee Index: test/Transforms/Inline/optimization-remarks-passed-yaml.ll =================================================================== --- test/Transforms/Inline/optimization-remarks-passed-yaml.ll +++ test/Transforms/Inline/optimization-remarks-passed-yaml.ll @@ -12,7 +12,7 @@ ; 4 return foo(); ; 5 } -; CHECK: remark: /tmp/s.c:4:10: foo can be inlined into bar with cost={{[0-9]+}} (threshold={{[0-9]+}}) (hotness: 30) +; CHECK: remark: /tmp/s.c:4:10: foo can be inlined into bar with cost={{[0-9\-]+}} (threshold={{[0-9]+}}) (hotness: 30) ; CHECK-NEXT: remark: /tmp/s.c:4:10: foo inlined into bar (hotness: 30) ; YAML: --- !Analysis @@ -26,7 +26,7 @@ ; YAML-NEXT: - String: ' can be inlined into ' ; YAML-NEXT: - Caller: bar ; YAML-NEXT: - String: ' with cost=' -; YAML-NEXT: - Cost: '{{[0-9]+}}' +; YAML-NEXT: - Cost: '{{[0-9\-]+}}' ; YAML-NEXT: - String: ' (threshold=' ; YAML-NEXT: - Threshold: '{{[0-9]+}}' ; YAML-NEXT: - String: ')' Index: test/Transforms/Inline/ptr-diff.ll =================================================================== --- test/Transforms/Inline/ptr-diff.ll +++ test/Transforms/Inline/ptr-diff.ll @@ -1,4 +1,4 @@ -; RUN: opt -inline < %s -S -o - -inline-threshold=10 | FileCheck %s +; RUN: opt -inline < %s -S -o - -inline-threshold=10 -inline-call-penalty=0 | FileCheck %s target datalayout = "p:32:32-p1:64:64-p2:16:16-n16:32:64"