Index: lib/Analysis/IPA/InlineCost.cpp =================================================================== --- lib/Analysis/IPA/InlineCost.cpp +++ lib/Analysis/IPA/InlineCost.cpp @@ -951,16 +951,9 @@ AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) return false; - if (NumVectorInstructions > NumInstructions/2) - VectorBonus = FiftyPercentVectorBonus; - else if (NumVectorInstructions > NumInstructions/10) - VectorBonus = TenPercentVectorBonus; - else - VectorBonus = 0; - - // Check if we've past the threshold so we don't spin in huge basic - // blocks that will never inline. - if (Cost > (Threshold + VectorBonus)) + // Check if we've past the maximum possible threshold so we don't spin in + // huge basic blocks that will never inline. + if (Cost > Threshold) return false; } @@ -1016,13 +1009,6 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { ++NumCallsAnalyzed; - // Track whether the post-inlining function would have more than one basic - // block. A single basic block is often intended for inlining. Balloon the - // threshold by 50% until we pass the single-BB phase. - bool SingleBB = true; - int SingleBBBonus = Threshold / 2; - Threshold += SingleBBBonus; - // Perform some tweaks to the cost and threshold based on the direct // callsite information. @@ -1031,10 +1017,21 @@ // low. assert(NumInstructions == 0); assert(NumVectorInstructions == 0); - FiftyPercentVectorBonus = Threshold; - TenPercentVectorBonus = Threshold / 2; + FiftyPercentVectorBonus = 3 * Threshold / 2; + TenPercentVectorBonus = 3 * Threshold / 4; const DataLayout &DL = F.getParent()->getDataLayout(); + // Track whether the post-inlining function would have more than one basic + // block. A single basic block is often intended for inlining. Balloon the + // threshold by 50% until we pass the single-BB phase. + bool SingleBB = true; + int SingleBBBonus = Threshold / 2; + + // Speculatively apply all possible bonuses to Threshold. If cost exceeds + // this Threshold any time, and cost cannot decrease, we can stop processing + // the rest of the function body. + Threshold += (SingleBBBonus + FiftyPercentVectorBonus); + // Give out bonuses per argument, as the instructions setting them up will // be gone after inlining. for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) { @@ -1077,9 +1074,9 @@ Instruction *Instr = CS.getInstruction(); if (InvokeInst *II = dyn_cast(Instr)) { if (isa(II->getNormalDest()->begin())) - Threshold = 1; + Threshold = 0; } else if (isa(++BasicBlock::iterator(Instr))) - Threshold = 1; + Threshold = 0; // If this function uses the coldcc calling convention, prefer not to inline // it. @@ -1151,7 +1148,7 @@ for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { // Bail out the moment we cross the threshold. This means we'll under-count // the cost, but only when undercounting doesn't matter. - if (Cost > (Threshold + VectorBonus)) + if (Cost > Threshold) break; BasicBlock *BB = BBWorklist[Idx]; @@ -1229,7 +1226,13 @@ if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall) return false; - Threshold += VectorBonus; + // We applied the maximum possible vector bonus at the beginning. Now, + // subtract the excess bonus, if any, from the Threshold before + // comparing against Cost. + if (NumVectorInstructions <= NumInstructions / 10) + Threshold -= FiftyPercentVectorBonus; + else if (NumVectorInstructions <= NumInstructions / 2) + Threshold -= (FiftyPercentVectorBonus - TenPercentVectorBonus); return Cost < Threshold; } @@ -1244,12 +1247,12 @@ DEBUG_PRINT_STAT(NumConstantPtrCmps); DEBUG_PRINT_STAT(NumConstantPtrDiffs); DEBUG_PRINT_STAT(NumInstructionsSimplified); + DEBUG_PRINT_STAT(NumInstructions); DEBUG_PRINT_STAT(SROACostSavings); DEBUG_PRINT_STAT(SROACostSavingsLost); DEBUG_PRINT_STAT(ContainsNoDuplicateCall); DEBUG_PRINT_STAT(Cost); DEBUG_PRINT_STAT(Threshold); - DEBUG_PRINT_STAT(VectorBonus); #undef DEBUG_PRINT_STAT } #endif Index: test/Transforms/Inline/vector-bonus.ll =================================================================== --- /dev/null +++ test/Transforms/Inline/vector-bonus.ll @@ -0,0 +1,37 @@ +; RUN: opt < %s -inline -inline-threshold=35 -S | FileCheck %s + +define i32 @bar(<4 x i32> %v, i32 %i) #0 { +entry: + %cmp = icmp sgt i32 %i, 4 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + %mul1 = mul nsw i32 %i, %i + br label %return + +if.else: ; preds = %entry + %add1 = add nsw i32 %i, %i + %add2 = add nsw i32 %i, %i + %add3 = add nsw i32 %i, %i + %add4 = add nsw i32 %i, %i + %add5 = add nsw i32 %i, %i + %add6 = add nsw i32 %i, %i + %vecext = extractelement <4 x i32> %v, i32 0 + %vecext7 = extractelement <4 x i32> %v, i32 1 + %add7 = add nsw i32 %vecext, %vecext7 + br label %return + +return: ; preds = %if.else, %if.then + %retval.0 = phi i32 [ %mul1, %if.then ], [ %add7, %if.else ] + ret i32 %retval.0 +} + +define i32 @foo(<4 x i32> %v, i32 %a) #1 { +; CHECK-LABEL: @foo( +; CHECK-NOT: call i32 @bar +; CHECK: ret +entry: + %call = call i32 @bar(<4 x i32> %v, i32 %a) + ret i32 %call +} +