diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -6483,9 +6483,10 @@ // if-converted. This means that the block's instructions (aside from // stores and instructions that may divide by zero) will now be // unconditionally executed. For the scalar case, we may not always execute - // the predicated block. Thus, scale the block's cost by the probability of - // executing it. - if (VF.isScalar() && blockNeedsPredication(BB)) + // the predicated block, if it is an if-else block. Thus, scale the block's + // cost by the probability of executing it. blockNeedsPredication from + // Legal is used so as to not include all blocks in tail folded loops. + if (VF.isScalar() && Legal->blockNeedsPredication(BB)) BlockCost.first /= getReciprocalPredBlockProb(); Cost.first += BlockCost.first; diff --git a/llvm/test/Transforms/LoopVectorize/ARM/scalar-block-cost.ll b/llvm/test/Transforms/LoopVectorize/ARM/scalar-block-cost.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/scalar-block-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/scalar-block-cost.ll @@ -15,7 +15,7 @@ ; CHECK-COST-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %add1, i32* %arrayidx2, align 4 ; CHECK-COST-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction: %exitcond.not = icmp eq i32 %add, %n ; CHECK-COST-NEXT: LV: Found an estimated cost of 0 for VF 1 For instruction: br i1 %exitcond.not, label %exit.loopexit, label %for.body -; CHECK-COST-NEXT: LV: Scalar loop costs: 2. +; CHECK-COST-NEXT: LV: Scalar loop costs: 5. entry: %cmp8 = icmp sgt i32 %n, 0