Index: lib/Analysis/InlineCost.cpp =================================================================== --- lib/Analysis/InlineCost.cpp +++ lib/Analysis/InlineCost.cpp @@ -121,6 +121,7 @@ unsigned NumInstructions, NumVectorInstructions; int FiftyPercentVectorBonus, TenPercentVectorBonus; int VectorBonus; + bool DoNotApplyBonuses; /// While we walk the potentially-inlined instructions, we build up and /// maintain a mapping of simplified values specific to this callsite. The @@ -236,10 +237,11 @@ ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false), HasFrameEscape(false), AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0), FiftyPercentVectorBonus(0), - TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0), - NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0), - NumConstantPtrDiffs(0), NumInstructionsSimplified(0), - SROACostSavings(0), SROACostSavingsLost(0) {} + TenPercentVectorBonus(0), VectorBonus(0), DoNotApplyBonuses(false), + NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), + NumConstantPtrCmps(0), NumConstantPtrDiffs(0), + NumInstructionsSimplified(0), SROACostSavings(0), + SROACostSavingsLost(0) {} bool analyzeCall(CallSite CS); @@ -707,6 +709,7 @@ Threshold = Params.HotCallSiteThreshold.getValue(); } else if (isColdCallSite(CS, CallerBFI)) { DEBUG(dbgs() << "Cold callsite.\n"); + DoNotApplyBonuses = true; Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold); } } else { @@ -1296,14 +1299,18 @@ // Update the threshold based on callsite properties updateThreshold(CS, F); - FiftyPercentVectorBonus = 3 * Threshold / 2; - TenPercentVectorBonus = 3 * Threshold / 4; + if (!DoNotApplyBonuses) { + FiftyPercentVectorBonus = 3 * Threshold / 2; + TenPercentVectorBonus = 3 * Threshold / 4; + } // Track whether the post-inlining function would have more than one basic // block. A single basic block is often intended for inlining. Balloon the // threshold by 50% until we pass the single-BB phase. bool SingleBB = true; - int SingleBBBonus = Threshold / 2; + int SingleBBBonus = 0; + if (!DoNotApplyBonuses) + SingleBBBonus = Threshold / 2; // Speculatively apply all possible bonuses to Threshold. If cost exceeds // this Threshold any time, and cost cannot decrease, we can stop processing @@ -1318,7 +1325,7 @@ // the cost of inlining it drops dramatically. bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction(); - if (OnlyOneCallAndLocalLinkage) + if (OnlyOneCallAndLocalLinkage && !DoNotApplyBonuses) Cost -= InlineConstants::LastCallToStaticBonus; // If this function uses the coldcc calling convention, prefer not to inline Index: test/Transforms/Inline/last-call-bonus.ll =================================================================== --- test/Transforms/Inline/last-call-bonus.ll +++ test/Transforms/Inline/last-call-bonus.ll @@ -10,6 +10,7 @@ ; preprocess the test. ; RUN: opt < %s -loop-unroll -inline -unroll-threshold=15000 -inline-threshold=250 -S | FileCheck %s +; RUN: opt < %s -passes='function(require,loop(unroll)),require,cgscc(inline)' -unroll-threshold=15000 -inline-threshold=250 -S | FileCheck %s ; CHECK-LABEL: define internal i32 @bar() define internal i32 @baz() { Index: test/Transforms/Inline/last-call-no-bonus.ll =================================================================== --- /dev/null +++ test/Transforms/Inline/last-call-no-bonus.ll @@ -0,0 +1,58 @@ +; This code is virtually identical to last-call-bonus.ll, but the callsites +; to the internal functions are cold, thereby preventing the last call to +; static bonus from being applied. + +; RUN: opt < %s -passes='function(require,loop(unroll)),require,cgscc(inline)' -unroll-threshold=15000 -inline-threshold=250 -S | FileCheck %s + +; CHECK-LABEL: define internal i32 @baz +define internal i32 @baz() { +entry: + br label %bb1 + +bb1: + %ind = phi i32 [ 0, %entry ], [ %inc, %bb1 ] + call void @extern() + %inc = add nsw i32 %ind, 1 + %cmp = icmp sgt i32 %inc, 510 + br i1 %cmp, label %ret, label %bb1 + +ret: + ret i32 0 +} + +; CHECK-LABEL: define internal i32 @bar +define internal i32 @bar(i1 %b) { +entry: + br label %bb1 + +bb1: + %ind = phi i32 [ 0, %entry ], [ %inc, %bb1 ] + call void @extern() + %inc = add nsw i32 %ind, 1 + %cmp = icmp sgt i32 %inc, 510 + br i1 %cmp, label %for.exit, label %bb1 + +for.exit: + br i1 %b, label %bb2, label %ret, !prof !0 +bb2: +; CHECK: call i32 @baz + call i32 @baz() + br label %ret +ret: + ret i32 0 +} +; CHECK-LABEL: define i32 @foo +define i32 @foo(i1 %b) { +entry: + br i1 %b, label %bb1, label %ret, !prof !0 +bb1: +; CHECK: call i32 @bar + call i32 @bar(i1 %b) + br label %ret +ret: + ret i32 0 +} + +declare void @extern() + +!0 = !{!"branch_weights", i32 1, i32 2500} Index: test/Transforms/Inline/vector-no-bonus.ll =================================================================== --- /dev/null +++ test/Transforms/Inline/vector-no-bonus.ll @@ -0,0 +1,47 @@ +; The code in this test is very similar to vector-bonus.ll except for +; the fact that the call to bar is cold thereby preventing the application of +; the vector bonus. +; RUN: opt < %s -inline -inline-threshold=35 -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(inline)' -inline-threshold=35 -S | FileCheck %s + +define i32 @bar(<4 x i32> %v, i32 %i) #0 { +entry: + %cmp = icmp sgt i32 %i, 4 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + %mul1 = mul nsw i32 %i, %i + br label %return + +if.else: ; preds = %entry + %add1 = add nsw i32 %i, %i + %add2 = add nsw i32 %i, %i + %add3 = add nsw i32 %i, %i + %add4 = add nsw i32 %i, %i + %add5 = add nsw i32 %i, %i + %add6 = add nsw i32 %i, %i + %vecext = extractelement <4 x i32> %v, i32 0 + %vecext7 = extractelement <4 x i32> %v, i32 1 + %add7 = add nsw i32 %vecext, %vecext7 + br label %return + +return: ; preds = %if.else, %if.then + %retval.0 = phi i32 [ %mul1, %if.then ], [ %add7, %if.else ] + ret i32 %retval.0 +} + +define i32 @foo(<4 x i32> %v, i32 %a) #1 { +; CHECK-LABEL: @foo( +; CHECK-NOT: call i32 @bar +; CHECK: ret +entry: + %cmp = icmp eq i32 %a, 0 + br i1 %cmp, label %callbb, label %ret +callbb: + %call = call i32 @bar(<4 x i32> %v, i32 %a) + br label %ret +ret: + %call1 = phi i32 [%call, %callbb], [0, %entry] + ret i32 %call1 +} +