diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -589,8 +589,6 @@ bool VPRecipeBase::mayHaveSideEffects() const { switch (getVPDefID()) { - case VPBranchOnMaskSC: - return false; case VPWidenIntOrFpInductionSC: case VPWidenPointerInductionSC: case VPWidenCanonicalIVSC: diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -50,8 +50,7 @@ /// recipe, if it exists. static void removeRedundantCanonicalIVs(VPlan &Plan); - /// Try to remove dead recipes. At the moment, only dead header recipes are - /// removed. + /// Try to remove dead recipes from \p Plan. static void removeDeadRecipes(VPlan &Plan); /// If any user of a VPWidenIntOrFpInductionRecipe needs scalar values, diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -362,16 +362,19 @@ } void VPlanTransforms::removeDeadRecipes(VPlan &Plan) { - VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock(); - // Remove dead recipes in header block. The recipes in the block are processed - // in reverse order, to catch chains of dead recipes. - // TODO: Remove dead recipes across whole plan. - for (VPRecipeBase &R : make_early_inc_range(reverse(*Header))) { - if (R.mayHaveSideEffects() || any_of(R.definedValues(), [](VPValue *V) { - return V->getNumUsers() > 0; - })) - continue; - R.eraseFromParent(); + ReversePostOrderTraversal> + RPOT(Plan.getEntry()); + + for (VPBasicBlock *VPBB : reverse(VPBlockUtils::blocksOnly(RPOT))) { + // The recipes in the block are processed in reverse order, to catch chains + // of dead recipes. + for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) { + if (R.mayHaveSideEffects() || any_of(R.definedValues(), [](VPValue *V) { + return V->getNumUsers() > 0; + })) + continue; + R.eraseFromParent(); + } } } diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -65,7 +65,6 @@ ; CHECK-NEXT: Successor(s): loop.1.split ; CHECK-EMPTY: ; CHECK-NEXT: loop.1.split: -; CHECK-NEXT: WIDEN ir<%add> = add ir<%conv>, vp<[[PRED2]]> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]> ; CHECK-NEXT: No successors @@ -310,9 +309,6 @@ ; CHECK-NEXT: } ; CHECK: loop.2: -; CHECK-NEXT: WIDEN ir<%add.1> = add ir<%conv>, vp<[[PRED1]]> -; CHECK-NEXT: WIDEN ir<%conv.lv.2> = sext vp<[[PRED2]]> -; CHECK-NEXT: WIDEN ir<%add> = add ir<%add.1>, ir<%conv.lv.2> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]> ; CHECK-NEXT: No successors diff --git a/llvm/test/Transforms/LoopVectorize/loop-form.ll b/llvm/test/Transforms/LoopVectorize/loop-form.ll --- a/llvm/test/Transforms/LoopVectorize/loop-form.ll +++ b/llvm/test/Transforms/LoopVectorize/loop-form.ll @@ -769,18 +769,15 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <2 x i16>* ; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP7]], align 4 -; CHECK-NEXT: [[TMP8:%.*]] = add nsw <2 x i32> [[VEC_IND]], ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll --- a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll +++ b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll @@ -29,32 +29,26 @@ ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE12:%.*]] ] -; CHECK-NEXT: [[OFFSET_IDX3:%.*]] = mul i64 [[INDEX]], [[INC]] -; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[OFFSET_IDX3]] to i8 -; CHECK-NEXT: [[TMP7:%.*]] = trunc i64 [[INC]] to i8 -; CHECK-NEXT: [[TMP9:%.*]] = mul i8 0, [[TMP7]] -; CHECK-NEXT: [[TMP10:%.*]] = add i8 [[TMP6]], [[TMP9]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT10:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT9]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[VEC_IV:%.*]] = add <2 x i64> [[BROADCAST_SPLAT10]], -; CHECK-NEXT: [[TMP11:%.*]] = icmp ule <2 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP11]], i32 0 -; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[VEC_IV:%.*]] = add <2 x i64> [[BROADCAST_SPLAT2]], +; CHECK-NEXT: [[TMP3:%.*]] = icmp ule <2 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: ; CHECK-NEXT: store i32 0, i32* [[PTR:%.*]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP11]], i32 1 -; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 +; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4]] ; CHECK: pred.store.if3: ; CHECK-NEXT: store i32 0, i32* [[PTR]], align 4 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]] +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] ; CHECK: pred.store.continue4: -; CHECK-NEXT: [[TMP14:%.*]] = add i8 [[TMP10]], 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/pr55100-expand-scev-predicate-used.ll b/llvm/test/Transforms/LoopVectorize/pr55100-expand-scev-predicate-used.ll --- a/llvm/test/Transforms/LoopVectorize/pr55100-expand-scev-predicate-used.ll +++ b/llvm/test/Transforms/LoopVectorize/pr55100-expand-scev-predicate-used.ll @@ -29,21 +29,16 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16 -; CHECK-NEXT: [[TMP8:%.*]] = add i16 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[N]], [[IV_1]] -; CHECK-NEXT: [[TMP10:%.*]] = sext i16 [[TMP8]] to i32 -; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP9]], [[TMP10]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_2_HEADER_PREHEADER]] ] ; CHECK-NEXT: br label [[LOOP_2_HEADER:%.*]] ; CHECK: vector.scevcheck: -; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[UMIN]] to i16 +; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[UMIN]] to i16 ; CHECK-NEXT: unreachable ; CHECK: loop.2.header: ; CHECK-NEXT: [[IV_2:%.*]] = phi i16 [ [[IV_2_NEXT:%.*]], [[LOOP_2_LATCH:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -49,8 +49,6 @@ ; CHECK-NEXT: } ; CHECK: loop.1: -; CHECK-NEXT: CLONE ir<%large> = icmp vp<[[STEPS]]>, ir<8> -; CHECK-NEXT: CLONE ir<%exitcond> = icmp vp<[[STEPS]]>, ir<%k> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]> ; CHECK-NEXT: No successors @@ -130,8 +128,6 @@ ; CHECK-NEXT: } ; CHECK: loop.1: -; CHECK-NEXT: CLONE ir<%large> = icmp vp<[[STEPS]]>, ir<8> -; CHECK-NEXT: CLONE ir<%exitcond> = icmp vp<[[STEPS]]>, ir<%k> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]> ; CHECK-NEXT: No successors @@ -211,8 +207,6 @@ ; CHECK-NEXT: } ; CHECK: loop.1: -; CHECK-NEXT: CLONE ir<%large> = icmp vp<[[STEPS]]>, ir<8> -; CHECK-NEXT: CLONE ir<%exitcond> = icmp vp<[[STEPS]]>, ir<%k> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]> ; CHECK-NEXT: No successors @@ -399,8 +393,6 @@ ; CHECK-NEXT: Successor(s): next.0.0 ; CHECK-EMPTY: ; CHECK-NEXT: next.0.0: -; CHECK-NEXT: CLONE ir<%large> = icmp vp<[[STEPS]]>, ir<8> -; CHECK-NEXT: CLONE ir<%exitcond> = icmp vp<[[STEPS]]>, ir<%k> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]> ; CHECK-NEXT: No successors @@ -510,8 +502,6 @@ ; CHECK-NEXT: Successor(s): next.1 ; CHECK-EMPTY: ; CHECK-NEXT: next.1: -; CHECK-NEXT: CLONE ir<%large> = icmp vp<[[STEPS]]>, ir<8> -; CHECK-NEXT: CLONE ir<%exitcond> = icmp vp<[[STEPS]]>, ir<%k> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]> ; CHECK-NEXT: No successors @@ -626,8 +616,6 @@ ; CHECK-NEXT: Successor(s): next.1 ; CHECK-EMPTY: ; CHECK-NEXT: next.1: -; CHECK-NEXT: CLONE ir<%large> = icmp vp<[[STEPS]]>, ir<8> -; CHECK-NEXT: CLONE ir<%exitcond> = icmp vp<[[STEPS]]>, ir<%k> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]> ; CHECK-NEXT: No successors @@ -743,8 +731,6 @@ ; CHECK-NEXT: Successor(s): latch ; CHECK-EMPTY: ; CHECK-NEXT: latch: -; CHECK-NEXT: CLONE ir<%large> = icmp vp<[[STEPS]]>, ir<8> -; CHECK-NEXT: CLONE ir<%exitcond> = icmp vp<[[STEPS]]>, ir<%k> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]> ; CHECK-NEXT: No successors @@ -827,8 +813,6 @@ ; CHECK-NEXT: Successor(s): loop.2 ; CHECK-EMPTY: ; CHECK-NEXT: loop.2: -; CHECK-NEXT: CLONE ir<%large> = icmp vp<[[STEPS]]>, ir<8> -; CHECK-NEXT: CLONE ir<%exitcond> = icmp vp<[[STEPS]]>, ir<%k> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]> ; CHECK-NEXT: No successors @@ -912,8 +896,6 @@ ; CHECK-NEXT: Successor(s): loop.2 ; CHECK-EMPTY: ; CHECK-NEXT: loop.2: -; CHECK-NEXT: CLONE ir<%large> = icmp vp<[[STEPS]]>, ir<8> -; CHECK-NEXT: CLONE ir<%exitcond> = icmp vp<[[STEPS]]>, ir<%k> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]> ; CHECK-NEXT: No successors @@ -978,7 +960,6 @@ ; CHECK-NEXT: Successor(s): loop.then.1 ; CHECK-EMPTY: ; CHECK-NEXT: loop.then.1: -; CHECK-NEXT: WIDEN ir<%sext.l1> = sext vp<[[PRED]]> ; CHECK-NEXT: Successor(s): loop.latch ; CHECK-EMPTY: ; CHECK-NEXT: loop.latch: