diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9132,8 +9132,8 @@ Plan->disableValue2VPValue(); VPlanTransforms::optimizeInductions(*Plan, *PSE.getSE()); - VPlanTransforms::sinkScalarOperands(*Plan); VPlanTransforms::removeDeadRecipes(*Plan); + VPlanTransforms::sinkScalarOperands(*Plan); VPlanTransforms::mergeReplicateRegions(*Plan); VPlanTransforms::removeRedundantExpandSCEVRecipes(*Plan); diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -468,7 +468,6 @@ ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%.pn> = phi ir<0>, ir<[[L:%.+]]> ; CHECK-NEXT: vp<[[TRANSFORMED_IV:%.+]]> = TRANSFORMED-IV vp<[[CAN_IV]]>, ir<2>, ir<1> -; CHECK-NEXT: vp<[[SCALAR_STEPS:%.+]]> = SCALAR-STEPS vp<[[TRANSFORMED_IV]]> ; CHECK-NEXT: EMIT vp<[[WIDE_IV:%.+]]> = WIDEN-CANONICAL-INDUCTION vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp ule vp<[[WIDE_IV]]> vp<[[BTC]]> ; CHECK-NEXT: Successor(s): loop.0 @@ -488,6 +487,7 @@ ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: ; CHECK-NEXT: REPLICATE ir<%val> = sdiv vp<[[SPLICE]]>, ir<%x> +; CHECK-NEXT: vp<[[SCALAR_STEPS:%.+]]> = SCALAR-STEPS vp<[[TRANSFORMED_IV]]> ; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr ir<%dst>, vp<[[SCALAR_STEPS]]> ; CHECK-NEXT: REPLICATE store ir<%val>, ir<%gep.dst> ; CHECK-NEXT: Successor(s): pred.store.continue diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -2951,9 +2951,9 @@ ; UNROLL-NO-VF-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[PRED_UDIV_CONTINUE5]] ] ; UNROLL-NO-VF-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]] ; UNROLL-NO-VF-NEXT: [[VEC_IV:%.*]] = add i32 [[INDEX]], 0 -; UNROLL-NO-VF-NEXT: [[VEC_IV3:%.*]] = add i32 [[INDEX]], 1 +; UNROLL-NO-VF-NEXT: [[VEC_IV2:%.*]] = add i32 [[INDEX]], 1 ; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = icmp ule i32 [[VEC_IV]], [[TRIP_COUNT_MINUS_1]] -; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = icmp ule i32 [[VEC_IV3]], [[TRIP_COUNT_MINUS_1]] +; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = icmp ule i32 [[VEC_IV2]], [[TRIP_COUNT_MINUS_1]] ; UNROLL-NO-VF-NEXT: br i1 [[TMP2]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] ; UNROLL-NO-VF: pred.udiv.if: ; UNROLL-NO-VF-NEXT: [[INDUCTION:%.*]] = add i32 [[OFFSET_IDX]], 0 @@ -2961,13 +2961,13 @@ ; UNROLL-NO-VF-NEXT: br label [[PRED_UDIV_CONTINUE]] ; UNROLL-NO-VF: pred.udiv.continue: ; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_UDIV_IF]] ] -; UNROLL-NO-VF-NEXT: br i1 [[TMP3]], label [[PRED_UDIV_IF4:%.*]], label [[PRED_UDIV_CONTINUE5]] -; UNROLL-NO-VF: pred.udiv.if4: -; UNROLL-NO-VF-NEXT: [[INDUCTION2:%.*]] = add i32 [[OFFSET_IDX]], -1 -; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = udiv i32 219220132, [[INDUCTION2]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP3]], label [[PRED_UDIV_IF3:%.*]], label [[PRED_UDIV_CONTINUE5]] +; UNROLL-NO-VF: pred.udiv.if3: +; UNROLL-NO-VF-NEXT: [[INDUCTION4:%.*]] = add i32 [[OFFSET_IDX]], -1 +; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = udiv i32 219220132, [[INDUCTION4]] ; UNROLL-NO-VF-NEXT: br label [[PRED_UDIV_CONTINUE5]] ; UNROLL-NO-VF: pred.udiv.continue5: -; UNROLL-NO-VF-NEXT: [[TMP7]] = phi i32 [ poison, [[PRED_UDIV_CONTINUE]] ], [ [[TMP6]], [[PRED_UDIV_IF4]] ] +; UNROLL-NO-VF-NEXT: [[TMP7]] = phi i32 [ poison, [[PRED_UDIV_CONTINUE]] ], [ [[TMP6]], [[PRED_UDIV_IF3]] ] ; UNROLL-NO-VF-NEXT: [[TMP8]] = add i32 [[VEC_PHI]], [[VECTOR_RECUR]] ; UNROLL-NO-VF-NEXT: [[TMP9]] = add i32 [[VEC_PHI1]], [[TMP5]] ; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = select i1 [[TMP2]], i32 [[TMP8]], i32 [[VEC_PHI]] diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll --- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll @@ -36,42 +36,43 @@ ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP9]], i32 0 ; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: store i8 95, i8* [[TMP4]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP]], i64 -1 +; CHECK-NEXT: store i8 95, i8* [[TMP11]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP9]], i32 1 -; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP9]], i32 1 +; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] ; CHECK: pred.store.if5: -; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], -1 -; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, i8* null, i64 [[TMP13]] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP2]], i64 -1 -; CHECK-NEXT: store i8 95, i8* [[TMP14]], align 1 +; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], -1 +; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, i8* null, i64 [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP2]], i64 -1 +; CHECK-NEXT: store i8 95, i8* [[TMP15]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] ; CHECK: pred.store.continue6: -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP9]], i32 2 -; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP9]], i32 2 +; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] ; CHECK: pred.store.if7: -; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], -1 -; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, i8* null, i64 [[TMP17]] -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP3]], i64 -1 -; CHECK-NEXT: store i8 95, i8* [[TMP18]], align 1 +; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], -1 +; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, i8* null, i64 [[TMP18]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP3]], i64 -1 +; CHECK-NEXT: store i8 95, i8* [[TMP19]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]] ; CHECK: pred.store.continue8: -; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[TMP9]], i32 3 -; CHECK-NEXT: br i1 [[TMP19]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP9]], i32 3 +; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10]] ; CHECK: pred.store.if9: -; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], -1 -; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, i8* null, i64 [[TMP21]] -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP4]], i64 -1 -; CHECK-NEXT: store i8 95, i8* [[TMP22]], align 1 +; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], -1 +; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, i8* null, i64 [[TMP22]] +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP4]], i64 -1 +; CHECK-NEXT: store i8 95, i8* [[TMP23]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE10]] ; CHECK: pred.store.continue10: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -85,8 +86,8 @@ ; CHECK: for.body: ; CHECK-NEXT: [[C_05:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[IF_END:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[C_05]], i64 -1 -; CHECK-NEXT: [[TMP24:%.*]] = load i8, i8* [[INCDEC_PTR]], align 1 -; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i8 [[TMP24]], 0 +; CHECK-NEXT: [[TMP25:%.*]] = load i8, i8* [[INCDEC_PTR]], align 1 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i8 [[TMP25]], 0 ; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END]], label [[IF_THEN:%.*]] ; CHECK: if.then: ; CHECK-NEXT: store i8 95, i8* [[INCDEC_PTR]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -89,7 +89,6 @@ ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1> -; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: Successor(s): pred.load @@ -99,6 +98,7 @@ ; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue ; CHECK: pred.load.if: +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]> ; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b> ; CHECK-NEXT: Successor(s): pred.load.continue @@ -168,7 +168,6 @@ ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1> -; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: Successor(s): pred.load @@ -178,6 +177,7 @@ ; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue ; CHECK: pred.load.if: +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]> ; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b> (S->V) ; CHECK-NEXT: Successor(s): pred.load.continue @@ -325,7 +325,6 @@ ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1> -; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT vp<[[MASK1:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: WIDEN ir<%c.1> = icmp ir<%iv>, ir<%j> ; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%iv>, ir<10> @@ -341,6 +340,7 @@ ; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.load.if: +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]> ; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b> (S->V) ; CHECK-NEXT: Successor(s): pred.load.continue @@ -426,7 +426,6 @@ ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1> -; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT vp<[[MASK1:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%iv>, ir<10> ; CHECK-NEXT: WIDEN ir<%c.0> = icmp ir<%iv>, ir<%j> @@ -443,6 +442,7 @@ ; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.load.if: +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]> ; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b> (S->V) ; CHECK-NEXT: Successor(s): pred.load.continue @@ -542,7 +542,6 @@ ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1> -; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT vp<[[MASK1:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%iv>, ir<10> ; CHECK-NEXT: WIDEN ir<%c.0> = icmp ir<%iv>, ir<%j> @@ -558,6 +557,7 @@ ; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.load.if: +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]> ; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b> ; CHECK-NEXT: Successor(s): pred.load.continue @@ -574,7 +574,7 @@ ; CHECK-NEXT: next.0: ; CHECK-NEXT: EMIT vp<[[NOT:%.+]]> = not ir<%c.0> ; CHECK-NEXT: EMIT vp<[[MASK3:%.+]]> = select vp<[[MASK1]]> vp<[[NOT]]> ir -; CHECK-NEXT: BLEND %p = ir<0>/vp<%14> vp<%12>/vp<%9> +; CHECK-NEXT: BLEND %p = ir<0>/vp<[[MASK3]]> vp<[[PRED]]>/vp<[[MASK2]]> ; CHECK-NEXT: Successor(s): then.1 ; CHECK-EMPTY: ; CHECK-NEXT: then.1: