diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9736,6 +9736,13 @@ // Calculate the pointer for the specific unroll-part. GetElementPtrInst *PartPtr = nullptr; + // Use i32 for the gep index type when the value is constant, + // or query DataLayout for a more suitable index type otherwise. + const DataLayout &DL = + Builder.GetInsertBlock()->getModule()->getDataLayout(); + Type *IndexTy = State.VF.isScalable() && (Reverse || Part > 0) + ? DL.getIndexType(ScalarDataTy->getPointerTo()) + : Builder.getInt32Ty(); bool InBounds = false; if (auto *gep = dyn_cast(Ptr->stripPointerCasts())) InBounds = gep->isInBounds(); @@ -9744,11 +9751,13 @@ // wide store needs to start at the last vector element. // RunTimeVF = VScale * VF.getKnownMinValue() // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue() - Value *RunTimeVF = getRuntimeVF(Builder, Builder.getInt32Ty(), State.VF); + Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF); // NumElt = -Part * RunTimeVF - Value *NumElt = Builder.CreateMul(Builder.getInt32(-Part), RunTimeVF); + Value *NumElt = + Builder.CreateMul(ConstantInt::get(IndexTy, -Part), RunTimeVF); // LastLane = 1 - RunTimeVF - Value *LastLane = Builder.CreateSub(Builder.getInt32(1), RunTimeVF); + Value *LastLane = + Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF); PartPtr = cast(Builder.CreateGEP(ScalarDataTy, Ptr, NumElt)); PartPtr->setIsInBounds(InBounds); @@ -9759,8 +9768,7 @@ BlockInMaskParts[Part] = Builder.CreateVectorReverse(BlockInMaskParts[Part], "reverse"); } else { - Value *Increment = - createStepForVF(Builder, Builder.getInt32Ty(), State.VF, Part); + Value *Increment = createStepForVF(Builder, IndexTy, State.VF, Part); PartPtr = cast( Builder.CreateGEP(ScalarDataTy, Ptr, Increment)); PartPtr->setIsInBounds(InBounds); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll @@ -34,9 +34,9 @@ ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP12]], align 4 -; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 2 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 [[TMP14]] +; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 2 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i64 [[TMP14]] ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP15]], align 4 ; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vector.reduce.and.nxv2i64( [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP17]] = and i64 [[TMP16]], [[VEC_PHI]] @@ -72,7 +72,7 @@ ; CHECK-NEXT: [[TMP27]] = and i64 [[TMP26]], [[VEC_PHI8]] ; CHECK-NEXT: [[INDEX_NEXT10]] = add nuw i64 [[INDEX7]], 2 ; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT10]], [[N_VEC5]] -; CHECK-NEXT: br i1 [[TMP28]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP28]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[N]], [[N_VEC5]] ; CHECK-NEXT: br i1 [[CMP_N6]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll @@ -34,9 +34,9 @@ ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP12]], align 4 -; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 2 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 [[TMP14]] +; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 2 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i64 [[TMP14]] ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP15]], align 4 ; CHECK-NEXT: [[TMP16]] = add [[WIDE_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP17]] = add [[WIDE_LOAD3]], [[VEC_PHI2]] @@ -71,7 +71,7 @@ ; CHECK-NEXT: [[TMP26]] = add <2 x i64> [[WIDE_LOAD9]], [[VEC_PHI8]] ; CHECK-NEXT: [[INDEX_NEXT10]] = add nuw i64 [[INDEX7]], 2 ; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT10]], [[N_VEC5]] -; CHECK-NEXT: br i1 [[TMP27]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP27]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[TMP28:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[TMP26]]) ; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[N]], [[N_VEC5]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll @@ -33,9 +33,9 @@ ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP9]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP12]], align 4 -; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 4 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i32 [[TMP14]] +; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 4 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i64 [[TMP14]] ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP15]], align 4 ; CHECK-NEXT: [[TMP16:%.*]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP17]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[TMP16]], [[WIDE_LOAD2]]) @@ -67,7 +67,7 @@ ; CHECK-NEXT: [[TMP24]] = call float @llvm.vector.reduce.fadd.v2f32(float [[VEC_PHI7]], <2 x float> [[WIDE_LOAD8]]) ; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 2 ; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC4]] -; CHECK-NEXT: br i1 [[TMP25]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP25]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N5:%.*]] = icmp eq i64 [[N]], [[N_VEC4]] ; CHECK-NEXT: br i1 [[CMP_N5]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll @@ -48,9 +48,9 @@ ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP11]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i32 0 ; CHECK-NEXT: store shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer), ptr [[TMP14]], align 1 -; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[TMP15]], 16 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i32 [[TMP16]] +; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 16 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 [[TMP16]] ; CHECK-NEXT: store shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer), ptr [[TMP17]], align 1 ; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 32 @@ -74,16 +74,16 @@ ; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 1024, [[N_MOD_VF2]] ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP25:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP25:%.*]] = add i64 [[INDEX5]], 0 ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP25]] ; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[TMP26]], i32 0 ; CHECK-NEXT: store shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer), ptr [[TMP27]], align 1 ; CHECK-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP29:%.*]] = mul i64 [[TMP28]], 8 -; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[OFFSET_IDX]], [[TMP29]] +; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX5]], [[TMP29]] ; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]] -; CHECK-NEXT: br i1 [[TMP30]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP30]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 1024, [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N4]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -126,9 +126,9 @@ ; CHECK-VF8-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP9]] ; CHECK-VF8-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 0 ; CHECK-VF8-NEXT: store shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer), ptr [[TMP12]], align 1 -; CHECK-VF8-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-VF8-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 16 -; CHECK-VF8-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 [[TMP14]] +; CHECK-VF8-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-VF8-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 16 +; CHECK-VF8-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i64 [[TMP14]] ; CHECK-VF8-NEXT: store shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer), ptr [[TMP15]], align 1 ; CHECK-VF8-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-VF8-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 32 @@ -146,14 +146,14 @@ ; CHECK-VF8-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-VF8-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK-VF8: vec.epilog.vector.body: -; CHECK-VF8-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-VF8-NEXT: [[TMP19:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-VF8-NEXT: [[INDEX2:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-VF8-NEXT: [[TMP19:%.*]] = add i64 [[INDEX2]], 0 ; CHECK-VF8-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP19]] ; CHECK-VF8-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP20]], i32 0 ; CHECK-VF8-NEXT: store <8 x i8> , ptr [[TMP21]], align 1 -; CHECK-VF8-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[OFFSET_IDX]], 8 +; CHECK-VF8-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX2]], 8 ; CHECK-VF8-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 1024 -; CHECK-VF8-NEXT: br i1 [[TMP22]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-VF8-NEXT: br i1 [[TMP22]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK-VF8: vec.epilog.middle.block: ; CHECK-VF8-NEXT: [[CMP_N1:%.*]] = icmp eq i64 1024, 1024 ; CHECK-VF8-NEXT: br i1 [[CMP_N1]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -227,9 +227,9 @@ ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 0 ; CHECK-NEXT: store shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer), ptr [[TMP12]], align 1 -; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 2 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 [[TMP14]] +; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 2 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i64 [[TMP14]] ; CHECK-NEXT: store shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer), ptr [[TMP15]], align 1 ; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 4 @@ -247,12 +247,12 @@ ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[INDEX2]], 0 ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP19]] ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[TMP20]], i32 0 ; CHECK-NEXT: store <8 x i64> , ptr [[TMP21]], align 1 -; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[OFFSET_IDX]], 8 +; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX2]], 8 ; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 1024 ; CHECK-NEXT: br i1 [[TMP22]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: vec.epilog.middle.block: @@ -297,9 +297,9 @@ ; CHECK-VF8-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]] ; CHECK-VF8-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 0 ; CHECK-VF8-NEXT: store shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer), ptr [[TMP12]], align 1 -; CHECK-VF8-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-VF8-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 2 -; CHECK-VF8-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 [[TMP14]] +; CHECK-VF8-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-VF8-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 2 +; CHECK-VF8-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i64 [[TMP14]] ; CHECK-VF8-NEXT: store shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer), ptr [[TMP15]], align 1 ; CHECK-VF8-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-VF8-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 4 @@ -317,12 +317,12 @@ ; CHECK-VF8-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-VF8-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK-VF8: vec.epilog.vector.body: -; CHECK-VF8-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-VF8-NEXT: [[TMP19:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-VF8-NEXT: [[INDEX2:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-VF8-NEXT: [[TMP19:%.*]] = add i64 [[INDEX2]], 0 ; CHECK-VF8-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP19]] ; CHECK-VF8-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[TMP20]], i32 0 ; CHECK-VF8-NEXT: store <8 x i64> , ptr [[TMP21]], align 1 -; CHECK-VF8-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[OFFSET_IDX]], 8 +; CHECK-VF8-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX2]], 8 ; CHECK-VF8-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 1024 ; CHECK-VF8-NEXT: br i1 [[TMP22]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-VF8: vec.epilog.middle.block: @@ -388,9 +388,9 @@ ; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP10]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0 ; CHECK-NEXT: store zeroinitializer, ptr [[TMP11]], align 1 -; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP13:%.*]] = mul i32 [[TMP12]], 16 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 [[TMP13]] +; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 16 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 [[TMP13]] ; CHECK-NEXT: store zeroinitializer, ptr [[TMP14]], align 1 ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 32 @@ -471,9 +471,9 @@ ; CHECK-VF8-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP8]] ; CHECK-VF8-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0 ; CHECK-VF8-NEXT: store zeroinitializer, ptr [[TMP9]], align 1 -; CHECK-VF8-NEXT: [[TMP10:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-VF8-NEXT: [[TMP11:%.*]] = mul i32 [[TMP10]], 16 -; CHECK-VF8-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 [[TMP11]] +; CHECK-VF8-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-VF8-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 16 +; CHECK-VF8-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 [[TMP11]] ; CHECK-VF8-NEXT: store zeroinitializer, ptr [[TMP12]], align 1 ; CHECK-VF8-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-VF8-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 32 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll @@ -39,18 +39,18 @@ ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds half, ptr [[S]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds half, ptr [[TMP9]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP10]], align 2 -; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP12:%.*]] = mul i32 [[TMP11]], 8 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds half, ptr [[TMP9]], i32 [[TMP12]] +; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 8 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds half, ptr [[TMP9]], i64 [[TMP12]] ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP13]], align 2 ; CHECK-NEXT: [[TMP14:%.*]] = fneg [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP15:%.*]] = fneg [[WIDE_LOAD3]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds half, ptr [[D]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds half, ptr [[TMP16]], i32 0 ; CHECK-NEXT: store [[TMP14]], ptr [[TMP17]], align 2 -; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], 8 -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds half, ptr [[TMP16]], i32 [[TMP19]] +; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 8 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds half, ptr [[TMP16]], i64 [[TMP19]] ; CHECK-NEXT: store [[TMP15]], ptr [[TMP20]], align 2 ; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 16 @@ -76,7 +76,7 @@ ; CHECK-NEXT: store half [[FNEG]], ptr [[ARRAYIDX2]], align 2 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; entry: %cmp6 = icmp sgt i32 %n, 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll @@ -19,15 +19,15 @@ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], [[WIDE_LOAD]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4f32.nxv4p0( [[TMP6]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]] -; CHECK-NEXT: store [[WIDE_MASKED_GATHER]], ptr [[TMP7]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw i64 [[TMP9]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] -; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], [[WIDE_LOAD]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4f32.nxv4p0( [[TMP5]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]] +; CHECK-NEXT: store [[WIDE_MASKED_GATHER]], ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] @@ -37,14 +37,14 @@ ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP12]] -; CHECK-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store float [[TMP13]], ptr [[ARRAYIDX5]], align 4 +; CHECK-NEXT: store float [[TMP11]], ptr [[ARRAYIDX5]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void ; @@ -87,16 +87,16 @@ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP6]], align 4 -; CHECK-NEXT: [[TMP8:%.*]] = sext [[WIDE_LOAD1]] to -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], [[TMP8]] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0( [[WIDE_LOAD]], [[TMP9]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i64 [[TMP10]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP5]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = sext [[WIDE_LOAD1]] to +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], [[TMP6]] +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0( [[WIDE_LOAD]], [[TMP7]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP8]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] @@ -106,12 +106,12 @@ ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4 -; CHECK-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP14]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4 +; CHECK-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP12]] to i64 ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IDXPROM4]] -; CHECK-NEXT: store float [[TMP13]], ptr [[ARRAYIDX5]], align 4 +; CHECK-NEXT: store float [[TMP11]], ptr [[ARRAYIDX5]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -157,13 +157,13 @@ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = icmp ne [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer), [[BROADCAST_SPLAT]], i32 4, [[TMP6]]) -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer), [[BROADCAST_SPLAT]], i32 4, [[TMP5]]) +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] @@ -173,8 +173,8 @@ ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP10]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP9]], 0 ; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]] ; CHECK: if.then: ; CHECK-NEXT: store i32 3, ptr [[INV]], align 4 @@ -228,14 +228,14 @@ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = icmp sgt [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[BROADCAST_SPLAT]], i32 4, [[TMP6]], poison) -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[WIDE_MASKED_GATHER]], ptr [[TMP4]], i32 4, [[TMP6]]) -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP8]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[BROADCAST_SPLAT]], i32 4, [[TMP5]], poison) +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[WIDE_MASKED_GATHER]], ptr [[TMP4]], i32 4, [[TMP5]]) +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] @@ -245,12 +245,12 @@ ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[TMP11]], 3 +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[TMP9]], 3 ; CHECK-NEXT: br i1 [[CMP2]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; CHECK: if.then: -; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[INV]], align 4 -; CHECK-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[INV]], align 4 +; CHECK-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 @@ -315,17 +315,16 @@ ; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.masked.gather.nxv4f32.nxv4p0( [[TMP10]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store [[WIDE_MASKED_GATHER]], ptr [[TMP11]], align 4 -; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP14:%.*]] = shl nuw nsw i32 [[TMP13]], 2 -; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP15]] -; CHECK-NEXT: store [[WIDE_MASKED_GATHER2]], ptr [[TMP16]], align 4 -; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP19:%.*]] = shl nuw nsw i64 [[TMP18]], 3 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] +; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP13:%.*]] = shl nuw nsw i64 [[TMP12]], 2 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP13]] +; CHECK-NEXT: store [[WIDE_MASKED_GATHER2]], ptr [[TMP14]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP16:%.*]] = shl nuw nsw i64 [[TMP15]], 3 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[STEP_ADD]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] @@ -336,9 +335,9 @@ ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[INDVARS_IV_STRIDE2:%.*]] = shl i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV_STRIDE2]] -; CHECK-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store float [[TMP21]], ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: store float [[TMP18]], ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll @@ -56,9 +56,9 @@ ; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[START_2]], i64 [[TMP28]] ; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0 ; CHECK-NEXT: store zeroinitializer, ptr [[TMP29]], align 4 -; CHECK-NEXT: [[TMP30:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP31:%.*]] = mul i32 [[TMP30]], 2 -; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 [[TMP31]] +; CHECK-NEXT: [[TMP30:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP31:%.*]] = mul i64 [[TMP30]], 2 +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 [[TMP31]] ; CHECK-NEXT: store zeroinitializer, ptr [[TMP32]], align 4 ; CHECK-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP34:%.*]] = mul i64 [[TMP33]], 4 @@ -68,8 +68,8 @@ ; CHECK-NEXT: br i1 [[TMP35]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] -; CHECK-NEXT: [[CAST_CMO:%.*]] = sub i64 [[N_VEC]], 1 -; CHECK-NEXT: [[TMP36:%.*]] = mul i64 [[CAST_CMO]], 8 +; CHECK-NEXT: [[CMO:%.*]] = sub i64 [[N_VEC]], 1 +; CHECK-NEXT: [[TMP36:%.*]] = mul i64 [[CMO]], 8 ; CHECK-NEXT: [[IND_ESCAPE:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[TMP36]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -83,7 +83,7 @@ ; CHECK-NEXT: [[IV_2_NEXT]] = getelementptr inbounds ptr, ptr [[IV_2]], i64 1 ; CHECK-NEXT: [[IV_1_NEXT]] = getelementptr inbounds ptr, ptr [[IV_1]], i64 1 ; CHECK-NEXT: [[CMP_I_I_NOT_I:%.*]] = icmp eq ptr [[IV_2_NEXT]], [[END]] -; CHECK-NEXT: br i1 [[CMP_I_I_NOT_I]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP_I_I_NOT_I]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi ptr [ [[IV_1]], [[LOOP]] ], [ [[IND_ESCAPE]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret ptr [[RES_LCSSA]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll @@ -62,15 +62,15 @@ ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i64, ptr [[SRC_2]], i64 [[TMP22]] ; CHECK-NEXT: [[TMP27:%.*]] = getelementptr i64, ptr [[TMP23]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP27]], align 4 -; CHECK-NEXT: [[TMP28:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP29:%.*]] = mul i32 [[TMP28]], 2 -; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i64, ptr [[TMP23]], i32 [[TMP29]] +; CHECK-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP29:%.*]] = mul i64 [[TMP28]], 2 +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i64, ptr [[TMP23]], i64 [[TMP29]] ; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load , ptr [[TMP30]], align 4 ; CHECK-NEXT: [[TMP31:%.*]] = getelementptr i64, ptr [[TMP25]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD13:%.*]] = load , ptr [[TMP31]], align 4 -; CHECK-NEXT: [[TMP32:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP33:%.*]] = mul i32 [[TMP32]], 2 -; CHECK-NEXT: [[TMP34:%.*]] = getelementptr i64, ptr [[TMP25]], i32 [[TMP33]] +; CHECK-NEXT: [[TMP32:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP33:%.*]] = mul i64 [[TMP32]], 2 +; CHECK-NEXT: [[TMP34:%.*]] = getelementptr i64, ptr [[TMP25]], i64 [[TMP33]] ; CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load , ptr [[TMP34]], align 4 ; CHECK-NEXT: [[TMP35:%.*]] = add [[WIDE_LOAD]], [[WIDE_LOAD13]] ; CHECK-NEXT: [[TMP36:%.*]] = add [[WIDE_LOAD12]], [[WIDE_LOAD14]] @@ -80,15 +80,15 @@ ; CHECK-NEXT: [[TMP40:%.*]] = getelementptr i64, ptr [[DST_2]], i64 [[TMP22]] ; CHECK-NEXT: [[TMP41:%.*]] = getelementptr i64, ptr [[TMP37]], i32 0 ; CHECK-NEXT: store [[TMP35]], ptr [[TMP41]], align 4 -; CHECK-NEXT: [[TMP42:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP43:%.*]] = mul i32 [[TMP42]], 2 -; CHECK-NEXT: [[TMP44:%.*]] = getelementptr i64, ptr [[TMP37]], i32 [[TMP43]] +; CHECK-NEXT: [[TMP42:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP43:%.*]] = mul i64 [[TMP42]], 2 +; CHECK-NEXT: [[TMP44:%.*]] = getelementptr i64, ptr [[TMP37]], i64 [[TMP43]] ; CHECK-NEXT: store [[TMP36]], ptr [[TMP44]], align 4 ; CHECK-NEXT: [[TMP45:%.*]] = getelementptr i64, ptr [[TMP39]], i32 0 ; CHECK-NEXT: store [[TMP35]], ptr [[TMP45]], align 4 -; CHECK-NEXT: [[TMP46:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP47:%.*]] = mul i32 [[TMP46]], 2 -; CHECK-NEXT: [[TMP48:%.*]] = getelementptr i64, ptr [[TMP39]], i32 [[TMP47]] +; CHECK-NEXT: [[TMP46:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP47:%.*]] = mul i64 [[TMP46]], 2 +; CHECK-NEXT: [[TMP48:%.*]] = getelementptr i64, ptr [[TMP39]], i64 [[TMP47]] ; CHECK-NEXT: store [[TMP36]], ptr [[TMP48]], align 4 ; CHECK-NEXT: [[TMP49:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP50:%.*]] = mul i64 [[TMP49]], 4 @@ -114,7 +114,7 @@ ; CHECK-NEXT: store i64 [[ADD]], ptr [[GEP_DST_2]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 ; CHECK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[CMP10]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP10]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll @@ -72,19 +72,19 @@ ; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i32, i32* [[TMP31]], i32 0 ; CHECK-NEXT: [[TMP36:%.*]] = bitcast i32* [[TMP35]] to * ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32( [[BROADCAST_SPLAT]], * [[TMP36]], i32 4, [[ACTIVE_LANE_MASK7]]) -; CHECK-NEXT: [[TMP37:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP38:%.*]] = mul i32 [[TMP37]], 4 -; CHECK-NEXT: [[TMP39:%.*]] = getelementptr i32, i32* [[TMP31]], i32 [[TMP38]] +; CHECK-NEXT: [[TMP37:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP38:%.*]] = mul i64 [[TMP37]], 4 +; CHECK-NEXT: [[TMP39:%.*]] = getelementptr i32, i32* [[TMP31]], i64 [[TMP38]] ; CHECK-NEXT: [[TMP40:%.*]] = bitcast i32* [[TMP39]] to * ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32( [[BROADCAST_SPLAT12]], * [[TMP40]], i32 4, [[ACTIVE_LANE_MASK8]]) -; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP42:%.*]] = mul i32 [[TMP41]], 8 -; CHECK-NEXT: [[TMP43:%.*]] = getelementptr i32, i32* [[TMP31]], i32 [[TMP42]] +; CHECK-NEXT: [[TMP41:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP42:%.*]] = mul i64 [[TMP41]], 8 +; CHECK-NEXT: [[TMP43:%.*]] = getelementptr i32, i32* [[TMP31]], i64 [[TMP42]] ; CHECK-NEXT: [[TMP44:%.*]] = bitcast i32* [[TMP43]] to * ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32( [[BROADCAST_SPLAT14]], * [[TMP44]], i32 4, [[ACTIVE_LANE_MASK9]]) -; CHECK-NEXT: [[TMP45:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP46:%.*]] = mul i32 [[TMP45]], 12 -; CHECK-NEXT: [[TMP47:%.*]] = getelementptr i32, i32* [[TMP31]], i32 [[TMP46]] +; CHECK-NEXT: [[TMP45:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP46:%.*]] = mul i64 [[TMP45]], 12 +; CHECK-NEXT: [[TMP47:%.*]] = getelementptr i32, i32* [[TMP31]], i64 [[TMP46]] ; CHECK-NEXT: [[TMP48:%.*]] = bitcast i32* [[TMP47]] to * ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32( [[BROADCAST_SPLAT16]], * [[TMP48]], i32 4, [[ACTIVE_LANE_MASK10]]) ; CHECK-NEXT: [[TMP49:%.*]] = call i64 @llvm.vscale.i64() @@ -196,19 +196,19 @@ ; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i32, i32* [[TMP31]], i32 0 ; CHECK-NEXT: [[TMP36:%.*]] = bitcast i32* [[TMP35]] to * ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0nxv4i32(* [[TMP36]], i32 4, [[ACTIVE_LANE_MASK7]], poison) -; CHECK-NEXT: [[TMP37:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP38:%.*]] = mul i32 [[TMP37]], 4 -; CHECK-NEXT: [[TMP39:%.*]] = getelementptr i32, i32* [[TMP31]], i32 [[TMP38]] +; CHECK-NEXT: [[TMP37:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP38:%.*]] = mul i64 [[TMP37]], 4 +; CHECK-NEXT: [[TMP39:%.*]] = getelementptr i32, i32* [[TMP31]], i64 [[TMP38]] ; CHECK-NEXT: [[TMP40:%.*]] = bitcast i32* [[TMP39]] to * ; CHECK-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call @llvm.masked.load.nxv4i32.p0nxv4i32(* [[TMP40]], i32 4, [[ACTIVE_LANE_MASK8]], poison) -; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP42:%.*]] = mul i32 [[TMP41]], 8 -; CHECK-NEXT: [[TMP43:%.*]] = getelementptr i32, i32* [[TMP31]], i32 [[TMP42]] +; CHECK-NEXT: [[TMP41:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP42:%.*]] = mul i64 [[TMP41]], 8 +; CHECK-NEXT: [[TMP43:%.*]] = getelementptr i32, i32* [[TMP31]], i64 [[TMP42]] ; CHECK-NEXT: [[TMP44:%.*]] = bitcast i32* [[TMP43]] to * ; CHECK-NEXT: [[WIDE_MASKED_LOAD12:%.*]] = call @llvm.masked.load.nxv4i32.p0nxv4i32(* [[TMP44]], i32 4, [[ACTIVE_LANE_MASK9]], poison) -; CHECK-NEXT: [[TMP45:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP46:%.*]] = mul i32 [[TMP45]], 12 -; CHECK-NEXT: [[TMP47:%.*]] = getelementptr i32, i32* [[TMP31]], i32 [[TMP46]] +; CHECK-NEXT: [[TMP45:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP46:%.*]] = mul i64 [[TMP45]], 12 +; CHECK-NEXT: [[TMP47:%.*]] = getelementptr i32, i32* [[TMP31]], i64 [[TMP46]] ; CHECK-NEXT: [[TMP48:%.*]] = bitcast i32* [[TMP47]] to * ; CHECK-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call @llvm.masked.load.nxv4i32.p0nxv4i32(* [[TMP48]], i32 4, [[ACTIVE_LANE_MASK10]], poison) ; CHECK-NEXT: [[TMP49:%.*]] = icmp ne [[WIDE_MASKED_LOAD]], zeroinitializer @@ -226,19 +226,19 @@ ; CHECK-NEXT: [[TMP61:%.*]] = getelementptr i32, i32* [[TMP53]], i32 0 ; CHECK-NEXT: [[TMP62:%.*]] = bitcast i32* [[TMP61]] to * ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32( [[BROADCAST_SPLAT]], * [[TMP62]], i32 4, [[TMP57]]) -; CHECK-NEXT: [[TMP63:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP64:%.*]] = mul i32 [[TMP63]], 4 -; CHECK-NEXT: [[TMP65:%.*]] = getelementptr i32, i32* [[TMP53]], i32 [[TMP64]] +; CHECK-NEXT: [[TMP63:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP64:%.*]] = mul i64 [[TMP63]], 4 +; CHECK-NEXT: [[TMP65:%.*]] = getelementptr i32, i32* [[TMP53]], i64 [[TMP64]] ; CHECK-NEXT: [[TMP66:%.*]] = bitcast i32* [[TMP65]] to * ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32( [[BROADCAST_SPLAT15]], * [[TMP66]], i32 4, [[TMP58]]) -; CHECK-NEXT: [[TMP67:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP68:%.*]] = mul i32 [[TMP67]], 8 -; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, i32* [[TMP53]], i32 [[TMP68]] +; CHECK-NEXT: [[TMP67:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP68:%.*]] = mul i64 [[TMP67]], 8 +; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, i32* [[TMP53]], i64 [[TMP68]] ; CHECK-NEXT: [[TMP70:%.*]] = bitcast i32* [[TMP69]] to * ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32( [[BROADCAST_SPLAT17]], * [[TMP70]], i32 4, [[TMP59]]) -; CHECK-NEXT: [[TMP71:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP72:%.*]] = mul i32 [[TMP71]], 12 -; CHECK-NEXT: [[TMP73:%.*]] = getelementptr i32, i32* [[TMP53]], i32 [[TMP72]] +; CHECK-NEXT: [[TMP71:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP72:%.*]] = mul i64 [[TMP71]], 12 +; CHECK-NEXT: [[TMP73:%.*]] = getelementptr i32, i32* [[TMP53]], i64 [[TMP72]] ; CHECK-NEXT: [[TMP74:%.*]] = bitcast i32* [[TMP73]] to * ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32( [[BROADCAST_SPLAT19]], * [[TMP74]], i32 4, [[TMP60]]) ; CHECK-NEXT: [[TMP75:%.*]] = call i64 @llvm.vscale.i64() diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll @@ -29,25 +29,23 @@ ; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[INDEX]], -1 ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], [[N]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP8:%.*]] = shl i32 [[TMP7]], 3 -; CHECK-NEXT: [[TMP9:%.*]] = sub i32 1, [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP10]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP11]], align 8 -; CHECK-NEXT: [[TMP12:%.*]] = fadd [[WIDE_LOAD]], shufflevector ( insertelement ( poison, double 1.000000e+00, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP15:%.*]] = shl i32 [[TMP14]], 3 -; CHECK-NEXT: [[TMP16:%.*]] = sub i32 1, [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[TMP16]] to i64 -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, ptr [[TMP13]], i64 [[TMP17]] -; CHECK-NEXT: store [[TMP12]], ptr [[TMP18]], align 8 -; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP20:%.*]] = shl i64 [[TMP19]], 3 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP20]] -; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 3 +; CHECK-NEXT: [[TMP9:%.*]] = sub i64 1, [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP9]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP10]], align 8 +; CHECK-NEXT: [[TMP11:%.*]] = fadd [[WIDE_LOAD]], shufflevector ( insertelement ( poison, double 1.000000e+00, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP14:%.*]] = shl i64 [[TMP13]], 3 +; CHECK-NEXT: [[TMP15:%.*]] = sub i64 1, [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds double, ptr [[TMP12]], i64 [[TMP15]] +; CHECK-NEXT: store [[TMP11]], ptr [[TMP16]], align 8 +; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP18:%.*]] = shl i64 [[TMP17]], 3 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP18]] +; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -62,12 +60,12 @@ ; CHECK-NEXT: [[I_08_IN:%.*]] = phi i64 [ [[I_08:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[I_08]] = add nsw i64 [[I_08_IN]], -1 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[I_08]] -; CHECK-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP22]], 1.000000e+00 +; CHECK-NEXT: [[TMP20:%.*]] = load double, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP20]], 1.000000e+00 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[I_08]] ; CHECK-NEXT: store double [[ADD]], ptr [[ARRAYIDX1]], align 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[I_08_IN]], 1 -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT]], !llvm.loop [[LOOP4:![0-9]+]] ; entry: %cmp7 = icmp sgt i64 %N, 0 @@ -121,25 +119,23 @@ ; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[INDEX]], -1 ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], [[N]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP14:%.*]] = shl i32 [[TMP13]], 3 -; CHECK-NEXT: [[TMP15:%.*]] = sub i32 1, [[TMP14]] -; CHECK-NEXT: [[TMP16:%.*]] = sext i32 [[TMP15]] to i64 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP16]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP17]], align 8 -; CHECK-NEXT: [[TMP18:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]] -; CHECK-NEXT: [[TMP20:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP21:%.*]] = shl i32 [[TMP20]], 3 -; CHECK-NEXT: [[TMP22:%.*]] = sub i32 1, [[TMP21]] -; CHECK-NEXT: [[TMP23:%.*]] = sext i32 [[TMP22]] to i64 -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[TMP19]], i64 [[TMP23]] -; CHECK-NEXT: store [[TMP18]], ptr [[TMP24]], align 8 -; CHECK-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP26:%.*]] = shl i64 [[TMP25]], 3 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP26]] -; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP14:%.*]] = shl i64 [[TMP13]], 3 +; CHECK-NEXT: [[TMP15:%.*]] = sub i64 1, [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP15]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP16]], align 8 +; CHECK-NEXT: [[TMP17:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP20:%.*]] = shl i64 [[TMP19]], 3 +; CHECK-NEXT: [[TMP21:%.*]] = sub i64 1, [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i64, ptr [[TMP18]], i64 [[TMP21]] +; CHECK-NEXT: store [[TMP17]], ptr [[TMP22]], align 8 +; CHECK-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP24:%.*]] = shl i64 [[TMP23]], 3 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP24]] +; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -154,8 +150,8 @@ ; CHECK-NEXT: [[I_09_IN:%.*]] = phi i64 [ [[I_09:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[I_09]] = add nsw i64 [[I_09_IN]], -1 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[I_09]] -; CHECK-NEXT: [[TMP28:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[ADD:%.*]] = add i64 [[TMP28]], 1 +; CHECK-NEXT: [[TMP26:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ADD:%.*]] = add i64 [[TMP26]], 1 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[I_09]] ; CHECK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX2]], align 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[I_09_IN]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll @@ -55,29 +55,27 @@ ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32* [[TMP17]] to * ; CHECK-NEXT: store [[TMP15]], * [[TMP18]], align 4 -; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP20:%.*]] = shl nuw nsw i32 [[TMP19]], 2 -; CHECK-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i64 [[TMP21]] -; CHECK-NEXT: [[TMP23:%.*]] = bitcast i32* [[TMP22]] to * -; CHECK-NEXT: store [[TMP16]], * [[TMP23]], align 4 -; CHECK-NEXT: [[TMP24:%.*]] = add nsw [[WIDE_MASKED_GATHER6]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP25:%.*]] = add nsw [[WIDE_MASKED_GATHER7]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to * -; CHECK-NEXT: store [[TMP24]], * [[TMP27]], align 4 -; CHECK-NEXT: [[TMP28:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP29:%.*]] = shl nuw nsw i32 [[TMP28]], 2 -; CHECK-NEXT: [[TMP30:%.*]] = zext i32 [[TMP29]] to i64 -; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[TMP30]] -; CHECK-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to * -; CHECK-NEXT: store [[TMP25]], * [[TMP32]], align 4 -; CHECK-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP34:%.*]] = shl nuw nsw i64 [[TMP33]], 3 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP34]] +; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP20:%.*]] = shl nuw nsw i64 [[TMP19]], 2 +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i64 [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to * +; CHECK-NEXT: store [[TMP16]], * [[TMP22]], align 4 +; CHECK-NEXT: [[TMP23:%.*]] = add nsw [[WIDE_MASKED_GATHER6]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP24:%.*]] = add nsw [[WIDE_MASKED_GATHER7]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP26:%.*]] = bitcast i32* [[TMP25]] to * +; CHECK-NEXT: store [[TMP23]], * [[TMP26]], align 4 +; CHECK-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP28:%.*]] = shl nuw nsw i64 [[TMP27]], 2 +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, i32* [[TMP25]], i64 [[TMP28]] +; CHECK-NEXT: [[TMP30:%.*]] = bitcast i32* [[TMP29]] to * +; CHECK-NEXT: store [[TMP24]], * [[TMP30]], align 4 +; CHECK-NEXT: [[TMP31:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP32:%.*]] = shl nuw nsw i64 [[TMP31]], 3 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP32]] ; CHECK-NEXT: [[PTR_IND]] = getelementptr i32, i32* [[POINTER_PHI]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP35]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] @@ -89,18 +87,18 @@ ; CHECK-NEXT: [[PTR_014:%.*]] = phi i32* [ [[INCDEC_PTR1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[I_013:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[PTR_014]], i64 1 -; CHECK-NEXT: [[TMP36:%.*]] = load i32, i32* [[PTR_014]], align 4 +; CHECK-NEXT: [[TMP34:%.*]] = load i32, i32* [[PTR_014]], align 4 ; CHECK-NEXT: [[INCDEC_PTR1]] = getelementptr inbounds i32, i32* [[PTR_014]], i64 2 -; CHECK-NEXT: [[TMP37:%.*]] = load i32, i32* [[INCDEC_PTR]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP36]], 1 +; CHECK-NEXT: [[TMP35:%.*]] = load i32, i32* [[INCDEC_PTR]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP34]], 1 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I_013]] ; CHECK-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP37]], 1 +; CHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP35]], 1 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[I_013]] ; CHECK-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX3]], align 4 ; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_013]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: for.exit: ; CHECK-NEXT: ret void ; @@ -161,27 +159,25 @@ ; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i32, i32* [[DST]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[NEXT_GEP]] to * ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP4]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i32 [[TMP5]], 2 -; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[NEXT_GEP]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to * -; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load , * [[TMP9]], align 4 -; CHECK-NEXT: [[TMP10:%.*]] = shl nsw [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP11:%.*]] = shl nsw [[WIDE_LOAD7]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[NEXT_GEP5]] to * -; CHECK-NEXT: store [[TMP10]], * [[TMP12]], align 4 -; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP14:%.*]] = shl nuw nsw i32 [[TMP13]], 2 -; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, i32* [[NEXT_GEP5]], i64 [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to * -; CHECK-NEXT: store [[TMP11]], * [[TMP17]], align 4 -; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP19:%.*]] = shl nuw nsw i64 [[TMP18]], 3 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] -; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 2 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[NEXT_GEP]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to * +; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load , * [[TMP8]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = shl nsw [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP10:%.*]] = shl nsw [[WIDE_LOAD7]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[NEXT_GEP5]] to * +; CHECK-NEXT: store [[TMP9]], * [[TMP11]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP13:%.*]] = shl nuw nsw i64 [[TMP12]], 2 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, i32* [[NEXT_GEP5]], i64 [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to * +; CHECK-NEXT: store [[TMP10]], * [[TMP15]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i64 [[TMP16]], 3 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] @@ -194,8 +190,8 @@ ; CHECK-NEXT: [[I_011:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[S_010:%.*]] = phi i32* [ [[INCDEC_PTR1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[D_09:%.*]] = phi i32* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[S_010]], align 4 -; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP21]], 1 +; CHECK-NEXT: [[TMP19:%.*]] = load i32, i32* [[S_010]], align 4 +; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP19]], 1 ; CHECK-NEXT: store i32 [[MUL]], i32* [[D_09]], align 4 ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i32, i32* [[D_09]], i64 1 ; CHECK-NEXT: [[INCDEC_PTR1]] = getelementptr inbounds i32, i32* [[S_010]], i64 1 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll --- a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll @@ -46,7 +46,7 @@ ; CHECK-NEXT: store i64 [[DIVREM]], ptr [[ARRAYIDX]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -90,7 +90,7 @@ ; FIXED-NEXT: store i64 [[DIVREM]], ptr [[ARRAYIDX]], align 8 ; FIXED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; FIXED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; FIXED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; FIXED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; FIXED: for.end: ; FIXED-NEXT: ret void ; @@ -966,9 +966,9 @@ ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP9]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP12]], align 1 -; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 8 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 [[TMP14]] +; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 8 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i64 [[TMP14]] ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP15]], align 1 ; CHECK-NEXT: [[TMP16:%.*]] = icmp ne [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i8 -128, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP17:%.*]] = icmp ne [[WIDE_LOAD1]], shufflevector ( insertelement ( poison, i8 -128, i64 0), poison, zeroinitializer) @@ -981,9 +981,9 @@ ; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP16]], [[TMP20]], [[WIDE_LOAD]] ; CHECK-NEXT: [[PREDPHI2:%.*]] = select [[TMP17]], [[TMP21]], [[WIDE_LOAD1]] ; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP12]], align 1 -; CHECK-NEXT: [[TMP24:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP25:%.*]] = mul i32 [[TMP24]], 8 -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 [[TMP25]] +; CHECK-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP25:%.*]] = mul i64 [[TMP24]], 8 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i64 [[TMP25]] ; CHECK-NEXT: store [[PREDPHI2]], ptr [[TMP26]], align 1 ; CHECK-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP28:%.*]] = mul i64 [[TMP27]], 16 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll --- a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll @@ -36,9 +36,9 @@ ; OUTLOOP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, ptr [[X]], i32 [[TMP9]] ; OUTLOOP-NEXT: [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 0 ; OUTLOOP-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP12]], align 2 -; OUTLOOP-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32() -; OUTLOOP-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 2 -; OUTLOOP-NEXT: [[TMP15:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP14]] +; OUTLOOP-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; OUTLOOP-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 2 +; OUTLOOP-NEXT: [[TMP15:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP14]] ; OUTLOOP-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP15]], align 2 ; OUTLOOP-NEXT: [[TMP16:%.*]] = sext [[WIDE_LOAD]] to ; OUTLOOP-NEXT: [[TMP17:%.*]] = sext [[WIDE_LOAD2]] to @@ -67,7 +67,7 @@ ; OUTLOOP-NEXT: [[ADD]] = add nsw i32 [[R_07]], [[CONV]] ; OUTLOOP-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 ; OUTLOOP-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; OUTLOOP-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; OUTLOOP-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; OUTLOOP: for.cond.cleanup.loopexit: ; OUTLOOP-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP23]], [[MIDDLE_BLOCK]] ] ; OUTLOOP-NEXT: br label [[FOR_COND_CLEANUP]] @@ -104,9 +104,9 @@ ; INLOOP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, ptr [[X]], i32 [[TMP9]] ; INLOOP-NEXT: [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 0 ; INLOOP-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP12]], align 2 -; INLOOP-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32() -; INLOOP-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 4 -; INLOOP-NEXT: [[TMP15:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP14]] +; INLOOP-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; INLOOP-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 4 +; INLOOP-NEXT: [[TMP15:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP14]] ; INLOOP-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP15]], align 2 ; INLOOP-NEXT: [[TMP16:%.*]] = sext [[WIDE_LOAD]] to ; INLOOP-NEXT: [[TMP17:%.*]] = sext [[WIDE_LOAD2]] to @@ -136,7 +136,7 @@ ; INLOOP-NEXT: [[ADD]] = add nsw i32 [[R_07]], [[CONV]] ; INLOOP-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 ; INLOOP-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; INLOOP-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; INLOOP-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; INLOOP: for.cond.cleanup.loopexit: ; INLOOP-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ] ; INLOOP-NEXT: br label [[FOR_COND_CLEANUP]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll --- a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll @@ -50,9 +50,9 @@ ; VLENUNK-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP15]] ; VLENUNK-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP18]], i32 0 ; VLENUNK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i32.p0(ptr [[TMP20]], i32 4, [[TMP16]], poison) -; VLENUNK-NEXT: [[TMP21:%.*]] = call i32 @llvm.vscale.i32() -; VLENUNK-NEXT: [[TMP22:%.*]] = mul i32 [[TMP21]], 2 -; VLENUNK-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP18]], i32 [[TMP22]] +; VLENUNK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 2 +; VLENUNK-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP18]], i64 [[TMP22]] ; VLENUNK-NEXT: [[WIDE_MASKED_LOAD2:%.*]] = call @llvm.masked.load.nxv2i32.p0(ptr [[TMP23]], i32 4, [[TMP17]], poison) ; VLENUNK-NEXT: [[TMP24:%.*]] = xor [[TMP16]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; VLENUNK-NEXT: [[TMP25:%.*]] = xor [[TMP17]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) @@ -64,9 +64,9 @@ ; VLENUNK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP15]] ; VLENUNK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP28]], i32 0 ; VLENUNK-NEXT: store [[TMP26]], ptr [[TMP30]], align 4 -; VLENUNK-NEXT: [[TMP31:%.*]] = call i32 @llvm.vscale.i32() -; VLENUNK-NEXT: [[TMP32:%.*]] = mul i32 [[TMP31]], 2 -; VLENUNK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[TMP28]], i32 [[TMP32]] +; VLENUNK-NEXT: [[TMP31:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[TMP32:%.*]] = mul i64 [[TMP31]], 2 +; VLENUNK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[TMP28]], i64 [[TMP32]] ; VLENUNK-NEXT: store [[TMP27]], ptr [[TMP33]], align 4 ; VLENUNK-NEXT: [[TMP34:%.*]] = call i64 @llvm.vscale.i64() ; VLENUNK-NEXT: [[TMP35:%.*]] = mul i64 [[TMP34]], 4 @@ -95,7 +95,7 @@ ; VLENUNK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4 ; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; VLENUNK: for.end: ; VLENUNK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll --- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll @@ -51,7 +51,7 @@ ; VLENUNK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX]], align 8 ; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; VLENUNK: for.end: ; VLENUNK-NEXT: ret void ; @@ -93,7 +93,7 @@ ; VLEN128-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX]], align 8 ; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; VLEN128: for.end: ; VLEN128-NEXT: ret void ; @@ -146,16 +146,16 @@ ; VLENUNK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP9]] ; VLENUNK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 0 ; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP12]], align 4 -; VLENUNK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32() -; VLENUNK-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 2 -; VLENUNK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP14]] +; VLENUNK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 2 +; VLENUNK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[TMP14]] ; VLENUNK-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP15]], align 4 ; VLENUNK-NEXT: [[TMP16:%.*]] = add [[WIDE_LOAD]], [[BROADCAST_SPLAT]] ; VLENUNK-NEXT: [[TMP17:%.*]] = add [[WIDE_LOAD1]], [[BROADCAST_SPLAT3]] ; VLENUNK-NEXT: store [[TMP16]], ptr [[TMP12]], align 4 -; VLENUNK-NEXT: [[TMP18:%.*]] = call i32 @llvm.vscale.i32() -; VLENUNK-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], 2 -; VLENUNK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP19]] +; VLENUNK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2 +; VLENUNK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[TMP19]] ; VLENUNK-NEXT: store [[TMP17]], ptr [[TMP20]], align 4 ; VLENUNK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() ; VLENUNK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 4 @@ -208,16 +208,16 @@ ; VLEN128-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP9]] ; VLEN128-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 0 ; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP12]], align 4 -; VLEN128-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32() -; VLEN128-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 2 -; VLEN128-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP14]] +; VLEN128-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; VLEN128-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 2 +; VLEN128-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[TMP14]] ; VLEN128-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP15]], align 4 ; VLEN128-NEXT: [[TMP16:%.*]] = add [[WIDE_LOAD]], [[BROADCAST_SPLAT]] ; VLEN128-NEXT: [[TMP17:%.*]] = add [[WIDE_LOAD1]], [[BROADCAST_SPLAT3]] ; VLEN128-NEXT: store [[TMP16]], ptr [[TMP12]], align 4 -; VLEN128-NEXT: [[TMP18:%.*]] = call i32 @llvm.vscale.i32() -; VLEN128-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], 2 -; VLEN128-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP19]] +; VLEN128-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; VLEN128-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2 +; VLEN128-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[TMP19]] ; VLEN128-NEXT: store [[TMP17]], ptr [[TMP20]], align 4 ; VLEN128-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() ; VLEN128-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll --- a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll @@ -31,26 +31,24 @@ ; CHECK-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP10:%.*]] = shl i32 [[TMP9]], 1 -; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i64 [[TMP11]] -; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP12]], align 8 -; CHECK-NEXT: [[TMP14:%.*]] = add nsw [[WIDE_LOAD]], [[VEC_IND]] -; CHECK-NEXT: [[TMP15:%.*]] = add nsw [[WIDE_LOAD2]], [[STEP_ADD]] -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: store [[TMP14]], ptr [[TMP16]], align 8 -; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP19:%.*]] = shl i32 [[TMP18]], 1 -; CHECK-NEXT: [[TMP20:%.*]] = sext i32 [[TMP19]] to i64 -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[TMP16]], i64 [[TMP20]] -; CHECK-NEXT: store [[TMP15]], ptr [[TMP21]], align 8 -; CHECK-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP24:%.*]] = shl i64 [[TMP23]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP24]] +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[TMP8]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i64 [[TMP9]] +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP10]], align 8 +; CHECK-NEXT: [[TMP11:%.*]] = add nsw [[WIDE_LOAD]], [[VEC_IND]] +; CHECK-NEXT: [[TMP12:%.*]] = add nsw [[WIDE_LOAD2]], [[STEP_ADD]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: store [[TMP11]], ptr [[TMP13]], align 8 +; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP15:%.*]] = shl i64 [[TMP14]], 1 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i64 [[TMP15]] +; CHECK-NEXT: store [[TMP12]], ptr [[TMP16]], align 8 +; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP18:%.*]] = shl i64 [[TMP17]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP18]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[STEP_ADD]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -60,13 +58,13 @@ ; CHECK: for.body: ; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[I_08]] -; CHECK-NEXT: [[TMP26:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP26]], [[I_08]] +; CHECK-NEXT: [[TMP20:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP20]], [[I_08]] ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[I_08]] ; CHECK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX1]], align 8 ; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -114,24 +112,22 @@ ; CHECK-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i64 [[TMP9]] -; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP10]], align 8 -; CHECK-NEXT: [[TMP12:%.*]] = add nsw [[WIDE_LOAD]], [[VEC_IND]] -; CHECK-NEXT: [[TMP13:%.*]] = add nsw [[WIDE_LOAD2]], [[STEP_ADD]] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: store [[TMP12]], ptr [[TMP14]], align 8 -; CHECK-NEXT: [[TMP16:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[TMP16]] to i64 -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[TMP14]], i64 [[TMP17]] -; CHECK-NEXT: store [[TMP13]], ptr [[TMP18]], align 8 -; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP21:%.*]] = shl i64 [[TMP20]], 1 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP21]] +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i64 [[TMP7]] +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP8]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = add nsw [[WIDE_LOAD]], [[VEC_IND]] +; CHECK-NEXT: [[TMP10:%.*]] = add nsw [[WIDE_LOAD2]], [[STEP_ADD]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: store [[TMP9]], ptr [[TMP11]], align 8 +; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP11]], i64 [[TMP12]] +; CHECK-NEXT: store [[TMP10]], ptr [[TMP13]], align 8 +; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP15:%.*]] = shl i64 [[TMP14]], 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP15]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[STEP_ADD]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -141,8 +137,8 @@ ; CHECK: for.body: ; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[I_08]] -; CHECK-NEXT: [[TMP23:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP23]], [[I_08]] +; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP17]], [[I_08]] ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[I_08]] ; CHECK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX1]], align 8 ; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1 @@ -191,8 +187,8 @@ ; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[CAST_CRD:%.*]] = trunc i64 [[N_VEC]] to i32 -; CHECK-NEXT: [[IND_END:%.*]] = shl i32 [[CAST_CRD]], 1 +; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32 +; CHECK-NEXT: [[IND_END:%.*]] = shl i32 [[DOTCAST]], 1 ; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv4i32() ; CHECK-NEXT: [[TMP5:%.*]] = shl [[TMP4]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() @@ -205,12 +201,12 @@ ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP5]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store [[VEC_IND]], ptr [[TMP8]], align 4 -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP11:%.*]] = shl i64 [[TMP10]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP10:%.*]] = shl i64 [[TMP9]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -268,8 +264,8 @@ ; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float -; CHECK-NEXT: [[TMP4:%.*]] = fmul float [[CAST_CRD]], 2.000000e+00 +; CHECK-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; CHECK-NEXT: [[TMP4:%.*]] = fmul float [[DOTCAST]], 2.000000e+00 ; CHECK-NEXT: [[IND_END:%.*]] = fadd float [[TMP4]], 0.000000e+00 ; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv4i32() ; CHECK-NEXT: [[TMP6:%.*]] = uitofp [[TMP5]] to @@ -287,12 +283,12 @@ ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store [[VEC_IND]], ptr [[TMP12]], align 4 -; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP15:%.*]] = shl i64 [[TMP14]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP15]] +; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP14:%.*]] = shl i64 [[TMP13]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP14]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = fadd [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll b/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll --- a/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll @@ -46,19 +46,17 @@ ; CHECKUF2: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] ; CHECKUF2: %[[IDXB:.*]] = getelementptr inbounds double, ptr %b, i64 %index ; CHECKUF2: %wide.load = load , ptr %[[IDXB]], align 8 -; CHECKUF2: %[[VSCALE:.*]] = call i32 @llvm.vscale.i32() -; CHECKUF2: %[[VSCALE2:.*]] = shl i32 %[[VSCALE]], 2 -; CHECKUF2: %[[VSCALE2_EXT:.*]] = sext i32 %[[VSCALE2]] to i64 -; CHECKUF2: %[[IDXB_NEXT:.*]] = getelementptr inbounds double, ptr %[[IDXB]], i64 %[[VSCALE2_EXT]] +; CHECKUF2: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() +; CHECKUF2: %[[VSCALE2:.*]] = shl i64 %[[VSCALE]], 2 +; CHECKUF2: %[[IDXB_NEXT:.*]] = getelementptr inbounds double, ptr %[[IDXB]], i64 %[[VSCALE2]] ; CHECKUF2: %wide.load{{[0-9]+}} = load , ptr %[[IDXB_NEXT]], align 8 ; CHECKUF2: %[[FADD:.*]] = fadd %wide.load, shufflevector ( insertelement ( poison, double 1.000000e+00, i64 0), poison, zeroinitializer) ; CHECKUF2: %[[FADD_NEXT:.*]] = fadd %wide.load{{[0-9]+}}, shufflevector ( insertelement ( poison, double 1.000000e+00, i64 0), poison, zeroinitializer) ; CHECKUF2: %[[IDXA:.*]] = getelementptr inbounds double, ptr %a, i64 %index ; CHECKUF2: store %[[FADD]], ptr %[[IDXA]], align 8 -; CHECKUF2: %[[VSCALE:.*]] = call i32 @llvm.vscale.i32() -; CHECKUF2: %[[VSCALE2:.*]] = shl i32 %[[VSCALE]], 2 -; CHECKUF2: %[[VSCALE2_EXT:.*]] = sext i32 %[[VSCALE2]] to i64 -; CHECKUF2: %[[IDXA_NEXT:.*]] = getelementptr inbounds double, ptr %[[IDXA]], i64 %[[VSCALE2_EXT]] +; CHECKUF2: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() +; CHECKUF2: %[[VSCALE2:.*]] = shl i64 %[[VSCALE]], 2 +; CHECKUF2: %[[IDXA_NEXT:.*]] = getelementptr inbounds double, ptr %[[IDXA]], i64 %[[VSCALE2]] ; CHECKUF2: store %[[FADD_NEXT]], ptr %[[IDXA_NEXT]], align 8 ; CHECKUF2: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() ; CHECKUF2: %[[VSCALEX8:.*]] = shl i64 %[[VSCALE]], 3