Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1301,6 +1301,11 @@ return foldTailByMasking() || Legal->blockNeedsPredication(BB); } + /// Returns true if there are any outside-loop reductions. + bool hasOutOfLoopReductions() const { + return !Legal->getReductionVars().empty(); + } + /// Estimate cost of an intrinsic call instruction CI if it were vectorized /// with factor VF. Return the cost of the instruction, including /// scalarization overhead if it's needed. @@ -7346,7 +7351,7 @@ // Finally, if tail is folded by masking, introduce selects between the phi // and the live-out instruction of each reduction, at the end of the latch. - if (CM.foldTailByMasking()) { + if (CM.foldTailByMasking() && CM.hasOutOfLoopReductions()) { Builder.setInsertPoint(VPBB); auto *Cond = RecipeBuilder.createBlockInMask(OrigLoop->getHeader(), Plan); for (auto &Reduction : Legal->getReductionVars()) { Index: llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll +++ llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll @@ -39,8 +39,7 @@ ; CHECK-NEXT: "loop:\n" + ; CHECK-NEXT: "WIDEN-INDUCTION %iv = phi 0, %iv.next\l" + ; CHECK-NEXT: "WIDEN\l"" %cond0 = icmp %iv, 13\l" + -; CHECK-NEXT: "WIDEN-SELECT%s = select %cond0, 10, 20\l" + -; CHECK-NEXT: "EMIT vp<%1> = icmp ule ir<%iv> vp<%0>\l" +; CHECK-NEXT: "WIDEN-SELECT%s = select %cond0, 10, 20\l" ; CHECK-NEXT: ] define void @test() { entry: Index: llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll +++ llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll @@ -11,7 +11,7 @@ ; CHECK-REMARKS-NOT: remark: {{.*}} vectorized loop define void @VF1-VPlanExe() { -; CHECK-LABEL: @VF1-VPlanExe +; CHECK-LABEL: @VF1-VPlanExe( ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: @@ -22,13 +22,9 @@ ; CHECK-NEXT: [[INDUCTION1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[INDUCTION2:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[INDUCTION3:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP0:%.*]] = icmp ule i64 [[INDUCTION]], 14 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i64 [[INDUCTION1]], 14 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ule i64 [[INDUCTION2]], 14 -; CHECK-NEXT: [[TMP3:%.*]] = icmp ule i64 [[INDUCTION3]], 14 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 +; CHECK-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -56,7 +52,7 @@ } define void @VF1-VPWidenCanonicalIVRecipeExe(double* %ptr1) { -; CHECK-LABEL: @VF1-VPWidenCanonicalIVRecipeExe +; CHECK-LABEL: @VF1-VPWidenCanonicalIVRecipeExe( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds double, double* [[PTR1:%.*]], i64 15 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] @@ -73,17 +69,9 @@ ; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr double, double* [[PTR1]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 ; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr double, double* [[PTR1]], i64 [[TMP3]] -; CHECK-NEXT: [[VEC_IV:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[VEC_IV4:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[VEC_IV5:%.*]] = add i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IV6:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP4:%.*]] = icmp ule i64 [[VEC_IV]], 14 -; CHECK-NEXT: [[TMP5:%.*]] = icmp ule i64 [[VEC_IV4]], 14 -; CHECK-NEXT: [[TMP6:%.*]] = icmp ule i64 [[VEC_IV5]], 14 -; CHECK-NEXT: [[TMP7:%.*]] = icmp ule i64 [[VEC_IV6]], 14 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !3 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 +; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !3 ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: