Index: lib/Analysis/IPA/InlineCost.cpp =================================================================== --- lib/Analysis/IPA/InlineCost.cpp +++ lib/Analysis/IPA/InlineCost.cpp @@ -55,6 +55,8 @@ Function &F; int Threshold; + // Maximum value Threshold can grow to. + int MaxThreshold; int Cost; bool IsCallerRecursive; @@ -951,16 +953,9 @@ AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) return false; - if (NumVectorInstructions > NumInstructions/2) - VectorBonus = FiftyPercentVectorBonus; - else if (NumVectorInstructions > NumInstructions/10) - VectorBonus = TenPercentVectorBonus; - else - VectorBonus = 0; - - // Check if we've past the threshold so we don't spin in huge basic - // blocks that will never inline. - if (Cost > (Threshold + VectorBonus)) + // Check if we've past the maximum possible threshold so we don't spin in + // huge basic blocks that will never inline. + if (Cost > MaxThreshold) return false; } @@ -1016,13 +1011,6 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { ++NumCallsAnalyzed; - // Track whether the post-inlining function would have more than one basic - // block. A single basic block is often intended for inlining. Balloon the - // threshold by 50% until we pass the single-BB phase. - bool SingleBB = true; - int SingleBBBonus = Threshold / 2; - Threshold += SingleBBBonus; - // Perform some tweaks to the cost and threshold based on the direct // callsite information. @@ -1031,10 +1019,21 @@ // low. assert(NumInstructions == 0); assert(NumVectorInstructions == 0); - FiftyPercentVectorBonus = Threshold; - TenPercentVectorBonus = Threshold / 2; + FiftyPercentVectorBonus = 3 * Threshold / 2; + TenPercentVectorBonus = 3 * Threshold / 4; const DataLayout &DL = F.getParent()->getDataLayout(); + // Track whether the post-inlining function would have more than one basic + // block. A single basic block is often intended for inlining. Balloon the + // threshold by 50% until we pass the single-BB phase. + bool SingleBB = true; + int SingleBBBonus = Threshold / 2; + Threshold += SingleBBBonus; + // We could end up with enough vector instructions for Threshold to be bumped + // up by FiftyPercentVectorBonus. Increase MaxThreshold by that bonus so + // that we do not bail out before we reach that threshold. + MaxThreshold = Threshold + FiftyPercentVectorBonus; + // Give out bonuses per argument, as the instructions setting them up will // be gone after inlining. for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) { @@ -1077,9 +1076,9 @@ Instruction *Instr = CS.getInstruction(); if (InvokeInst *II = dyn_cast(Instr)) { if (isa(II->getNormalDest()->begin())) - Threshold = 1; + MaxThreshold = Threshold = 0; } else if (isa(++BasicBlock::iterator(Instr))) - Threshold = 1; + MaxThreshold = Threshold = 0; // If this function uses the coldcc calling convention, prefer not to inline // it. @@ -1087,7 +1086,7 @@ Cost += InlineConstants::ColdccPenalty; // Check if we're done. This can happen due to bonuses and penalties. - if (Cost > Threshold) + if (Cost > MaxThreshold) return false; if (F.empty()) @@ -1151,7 +1150,7 @@ for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { // Bail out the moment we cross the threshold. This means we'll under-count // the cost, but only when undercounting doesn't matter. - if (Cost > (Threshold + VectorBonus)) + if (Cost > MaxThreshold) break; BasicBlock *BB = BBWorklist[Idx]; @@ -1219,6 +1218,7 @@ if (SingleBB && TI->getNumSuccessors() > 1) { // Take off the bonus we applied to the threshold. Threshold -= SingleBBBonus; + MaxThreshold -= SingleBBBonus; SingleBB = false; } } @@ -1229,6 +1229,13 @@ if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall) return false; + if (NumVectorInstructions > NumInstructions / 2) + VectorBonus = FiftyPercentVectorBonus; + else if (NumVectorInstructions > NumInstructions / 10) + VectorBonus = TenPercentVectorBonus; + else + VectorBonus = 0; + Threshold += VectorBonus; return Cost < Threshold; @@ -1244,11 +1251,13 @@ DEBUG_PRINT_STAT(NumConstantPtrCmps); DEBUG_PRINT_STAT(NumConstantPtrDiffs); DEBUG_PRINT_STAT(NumInstructionsSimplified); + DEBUG_PRINT_STAT(NumInstructions); DEBUG_PRINT_STAT(SROACostSavings); DEBUG_PRINT_STAT(SROACostSavingsLost); DEBUG_PRINT_STAT(ContainsNoDuplicateCall); DEBUG_PRINT_STAT(Cost); DEBUG_PRINT_STAT(Threshold); + DEBUG_PRINT_STAT(MaxThreshold); DEBUG_PRINT_STAT(VectorBonus); #undef DEBUG_PRINT_STAT } Index: test/Transforms/Inline/vector-bonus.ll =================================================================== --- /dev/null +++ test/Transforms/Inline/vector-bonus.ll @@ -0,0 +1,37 @@ +; RUN: opt < %s -inline -inline-threshold=40 -disable-output + +define i32 @bar(<4 x i32> %v, i32 %i) #0 { +entry: + %cmp = icmp sgt i32 %i, 4 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + %mul1 = mul nsw i32 %i, %i + br label %return + +if.else: ; preds = %entry + %add1 = add nsw i32 %i, %i + %add2 = add nsw i32 %i, %i + %add3 = add nsw i32 %i, %i + %add4 = add nsw i32 %i, %i + %add5 = add nsw i32 %i, %i + %add6 = add nsw i32 %i, %i + %vecext = extractelement <4 x i32> %v, i32 0 + %vecext7 = extractelement <4 x i32> %v, i32 1 + %add7 = add nsw i32 %vecext, %vecext7 + br label %return + +return: ; preds = %if.else, %if.then + %retval.0 = phi i32 [ %mul1, %if.then ], [ %add7, %if.else ] + ret i32 %retval.0 +} + +define i32 @foo(<4 x i32> %v, i32 %a) #1 { +entry: + %call = call i32 @bar(<4 x i32> %v, i32 %a) + ret i32 %call +} + +; CHECK: @bar() +; CHECK-NOT: call i32 @bar +; CHECK: ret