diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -480,9 +480,44 @@ class LVFolder final : public ConstantFolder { void anchor() override {} + Value *SimplifyBinOp(Instruction::BinaryOps Opc, Value *LHS, + Value *RHS) const { + using namespace llvm::PatternMatch; + switch (Opc) { + default: + return nullptr; + case Instruction::Add: + if (match(RHS, m_SpecificInt(0))) + return LHS; + if (match(LHS, m_SpecificInt(0))) + return RHS; + return nullptr; + case Instruction::Mul: + if (match(RHS, m_SpecificInt(1))) + return LHS; + if (match(LHS, m_SpecificInt(1))) + return RHS; + return nullptr; + } + } + public: LVFolder() {} + Value *FoldNoWrapBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, + bool HasNUW, bool HasNSW) const override { + if (auto *V = SimplifyBinOp(Opc, LHS, RHS)) + return V; + return ConstantFolder::FoldNoWrapBinOp(Opc, LHS, RHS, HasNUW, HasNSW); + } + + Value *FoldBinOp(Instruction::BinaryOps Opc, Value *LHS, + Value *RHS) const override { + if (auto *V = SimplifyBinOp(Opc, LHS, RHS)) + return V; + return ConstantFolder::FoldBinOp(Opc, LHS, RHS); + } + Value *FoldGEP(Type *Ty, Value *Ptr, ArrayRef IdxList, bool IsInBounds = false) const override { auto *CI = dyn_cast(IdxList[0]); diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll --- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll @@ -18,29 +18,25 @@ ; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i64 [[TMP3]], i64 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[TMP5]] ; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP7:%.*]] = add [[TMP6]], zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = mul [[TMP7]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 -; CHECK-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP10]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP12:%.*]] = mul nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i64 8, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[P:%.*]], [[TMP12]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP13]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP14:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP14]], [[TMP13]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP6]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP9:%.*]] = mul nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i64 8, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[P:%.*]], [[TMP9]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP10]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP11:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP11]], [[TMP10]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -91,28 +87,26 @@ ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 64 ; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP5:%.*]] = add [[TMP4]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = mul [[TMP5]], shufflevector ( insertelement ( poison, i64 64, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 64, [[TMP8]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = mul [[TMP4]], shufflevector ( insertelement ( poison, i64 64, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 64, [[TMP7]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[P:%.*]], [[VEC_IND]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP10]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP11:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP11]], [[TMP10]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP5]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[P:%.*]], [[VEC_IND]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP9]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP10:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP10]], [[TMP9]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -166,15 +160,14 @@ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <8 x i64> ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x ptr> [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP1]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[STRIDED_VEC]], -; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[TMP3]], <8 x ptr> [[TMP0]], i32 4, <8 x i1> ) +; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i32> [[STRIDED_VEC]], +; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[TMP2]], <8 x ptr> [[TMP0]], i32 4, <8 x i1> ) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 64 -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1016 -; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1016 +; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -232,18 +225,16 @@ ; NOSTRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; NOSTRIDED: vector.body: ; NOSTRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; NOSTRIDED-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; NOSTRIDED-NEXT: [[TMP6:%.*]] = mul nuw nsw i64 [[TMP5]], 1 -; NOSTRIDED-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP6]] -; NOSTRIDED-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i32 0 -; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 -; NOSTRIDED-NEXT: [[TMP9:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; NOSTRIDED-NEXT: store [[TMP9]], ptr [[TMP8]], align 4 -; NOSTRIDED-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; NOSTRIDED-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 -; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] -; NOSTRIDED-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; NOSTRIDED-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; NOSTRIDED-NEXT: [[TMP5:%.*]] = mul nuw nsw i64 [[INDEX]], 1 +; NOSTRIDED-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP5]] +; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 +; NOSTRIDED-NEXT: [[TMP7:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; NOSTRIDED-NEXT: store [[TMP7]], ptr [[TMP6]], align 4 +; NOSTRIDED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; NOSTRIDED-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 +; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; NOSTRIDED-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NOSTRIDED-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; NOSTRIDED: middle.block: ; NOSTRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; NOSTRIDED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -317,17 +308,15 @@ ; NOSTRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; NOSTRIDED: vector.body: ; NOSTRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; NOSTRIDED-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; NOSTRIDED-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP5]] -; NOSTRIDED-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i32 0 -; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 4 -; NOSTRIDED-NEXT: [[TMP8:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; NOSTRIDED-NEXT: store [[TMP8]], ptr [[TMP7]], align 4 -; NOSTRIDED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; NOSTRIDED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 -; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] -; NOSTRIDED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; NOSTRIDED-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; NOSTRIDED-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[INDEX]] +; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP5]], align 4 +; NOSTRIDED-NEXT: [[TMP6:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; NOSTRIDED-NEXT: store [[TMP6]], ptr [[TMP5]], align 4 +; NOSTRIDED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; NOSTRIDED-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 +; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; NOSTRIDED-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NOSTRIDED-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; NOSTRIDED: middle.block: ; NOSTRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; NOSTRIDED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -449,20 +438,17 @@ ; NOSTRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; NOSTRIDED: vector.body: ; NOSTRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; NOSTRIDED-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0 -; NOSTRIDED-NEXT: [[TMP10:%.*]] = mul nuw nsw i64 [[TMP9]], 1 -; NOSTRIDED-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP10]] -; NOSTRIDED-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP11]], i32 0 -; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP12]], align 4 -; NOSTRIDED-NEXT: [[TMP13:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; NOSTRIDED-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[P2]], i64 [[TMP10]] -; NOSTRIDED-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP14]], i32 0 -; NOSTRIDED-NEXT: store [[TMP13]], ptr [[TMP15]], align 4 -; NOSTRIDED-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; NOSTRIDED-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 4 -; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP17]] -; NOSTRIDED-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; NOSTRIDED-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; NOSTRIDED-NEXT: [[TMP9:%.*]] = mul nuw nsw i64 [[INDEX]], 1 +; NOSTRIDED-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP9]] +; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP10]], align 4 +; NOSTRIDED-NEXT: [[TMP11:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; NOSTRIDED-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[P2]], i64 [[TMP9]] +; NOSTRIDED-NEXT: store [[TMP11]], ptr [[TMP12]], align 4 +; NOSTRIDED-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; NOSTRIDED-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 4 +; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP14]] +; NOSTRIDED-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NOSTRIDED-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; NOSTRIDED: middle.block: ; NOSTRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; NOSTRIDED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -514,32 +500,28 @@ ; STRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP9]] ; STRIDED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; STRIDED-NEXT: [[TMP10:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; STRIDED-NEXT: [[TMP11:%.*]] = add [[TMP10]], zeroinitializer -; STRIDED-NEXT: [[TMP12:%.*]] = mul [[TMP11]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; STRIDED-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP12]] -; STRIDED-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; STRIDED-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 4 -; STRIDED-NEXT: [[TMP15:%.*]] = mul i64 1, [[TMP14]] -; STRIDED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP15]], i64 0 +; STRIDED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() +; STRIDED-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 4 +; STRIDED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 ; STRIDED-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[STRIDE]], i64 0 ; STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; STRIDED: vector.body: ; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; STRIDED-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; STRIDED-NEXT: [[TMP16:%.*]] = mul nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT]] -; STRIDED-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[P]], [[TMP16]] -; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP17]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison), !alias.scope !8 -; STRIDED-NEXT: [[TMP18:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; STRIDED-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[P2]], [[TMP16]] -; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP18]], [[TMP19]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)), !alias.scope !11, !noalias !8 -; STRIDED-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; STRIDED-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 4 -; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP21]] +; STRIDED-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP10]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; STRIDED-NEXT: [[TMP13:%.*]] = mul nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT]] +; STRIDED-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[P]], [[TMP13]] +; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP14]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison), !alias.scope !8 +; STRIDED-NEXT: [[TMP15:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; STRIDED-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[P2]], [[TMP13]] +; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP15]], [[TMP16]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)), !alias.scope !11, !noalias !8 +; STRIDED-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() +; STRIDED-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 4 +; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP18]] ; STRIDED-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; STRIDED-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; STRIDED-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; STRIDED-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; STRIDED-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; STRIDED: middle.block: ; STRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; STRIDED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -599,17 +581,15 @@ ; NOSTRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; NOSTRIDED: vector.body: ; NOSTRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; NOSTRIDED-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; NOSTRIDED-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP5]] -; NOSTRIDED-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i32 0 -; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 4 -; NOSTRIDED-NEXT: [[TMP8:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; NOSTRIDED-NEXT: store [[TMP8]], ptr [[TMP7]], align 4 -; NOSTRIDED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; NOSTRIDED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 -; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] -; NOSTRIDED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; NOSTRIDED-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; NOSTRIDED-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[INDEX]] +; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP5]], align 4 +; NOSTRIDED-NEXT: [[TMP6:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; NOSTRIDED-NEXT: store [[TMP6]], ptr [[TMP5]], align 4 +; NOSTRIDED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; NOSTRIDED-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 +; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; NOSTRIDED-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NOSTRIDED-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; NOSTRIDED: middle.block: ; NOSTRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; NOSTRIDED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -731,38 +711,36 @@ ; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; STRIDED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() ; STRIDED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 -; STRIDED-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 1 -; STRIDED-NEXT: [[TMP15:%.*]] = mul i64 [[STRIDE]], [[TMP14]] -; STRIDED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP13]], 0 -; STRIDED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP16]], i64 0 +; STRIDED-NEXT: [[TMP14:%.*]] = mul i64 [[STRIDE]], [[TMP13]] +; STRIDED-NEXT: [[TMP15:%.*]] = mul i64 [[TMP13]], 0 +; STRIDED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP15]], i64 0 ; STRIDED-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; STRIDED-NEXT: [[TMP17:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; STRIDED-NEXT: [[TMP18:%.*]] = add [[DOTSPLAT]], [[TMP17]] +; STRIDED-NEXT: [[TMP16:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; STRIDED-NEXT: [[TMP17:%.*]] = add [[DOTSPLAT]], [[TMP16]] ; STRIDED-NEXT: [[DOTSPLATINSERT9:%.*]] = insertelement poison, i64 [[STRIDE]], i64 0 ; STRIDED-NEXT: [[DOTSPLAT10:%.*]] = shufflevector [[DOTSPLATINSERT9]], poison, zeroinitializer -; STRIDED-NEXT: [[VECTOR_GEP:%.*]] = mul [[TMP18]], [[DOTSPLAT10]] -; STRIDED-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[VECTOR_GEP]] -; STRIDED-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; STRIDED-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 4 -; STRIDED-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 1 -; STRIDED-NEXT: [[TMP23:%.*]] = mul i64 [[STRIDE]], [[TMP22]] -; STRIDED-NEXT: [[TMP24:%.*]] = mul i64 [[TMP21]], 0 -; STRIDED-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement poison, i64 [[TMP24]], i64 0 +; STRIDED-NEXT: [[VECTOR_GEP:%.*]] = mul [[TMP17]], [[DOTSPLAT10]] +; STRIDED-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[VECTOR_GEP]] +; STRIDED-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() +; STRIDED-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 4 +; STRIDED-NEXT: [[TMP21:%.*]] = mul i64 [[STRIDE]], [[TMP20]] +; STRIDED-NEXT: [[TMP22:%.*]] = mul i64 [[TMP20]], 0 +; STRIDED-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement poison, i64 [[TMP22]], i64 0 ; STRIDED-NEXT: [[DOTSPLAT14:%.*]] = shufflevector [[DOTSPLATINSERT13]], poison, zeroinitializer -; STRIDED-NEXT: [[TMP25:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; STRIDED-NEXT: [[TMP26:%.*]] = add [[DOTSPLAT14]], [[TMP25]] -; STRIDED-NEXT: [[VECTOR_GEP17:%.*]] = mul [[TMP26]], [[DOTSPLAT10]] -; STRIDED-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[POINTER_PHI11]], [[VECTOR_GEP17]] -; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP19]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison), !alias.scope !15 -; STRIDED-NEXT: [[TMP28:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP28]], [[TMP27]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)), !alias.scope !18, !noalias !15 -; STRIDED-NEXT: [[TMP29:%.*]] = call i64 @llvm.vscale.i64() -; STRIDED-NEXT: [[TMP30:%.*]] = mul i64 [[TMP29]], 4 -; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP30]] -; STRIDED-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP15]] -; STRIDED-NEXT: [[PTR_IND12]] = getelementptr i8, ptr [[POINTER_PHI11]], i64 [[TMP23]] -; STRIDED-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; STRIDED-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; STRIDED-NEXT: [[TMP23:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; STRIDED-NEXT: [[TMP24:%.*]] = add [[DOTSPLAT14]], [[TMP23]] +; STRIDED-NEXT: [[VECTOR_GEP17:%.*]] = mul [[TMP24]], [[DOTSPLAT10]] +; STRIDED-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[POINTER_PHI11]], [[VECTOR_GEP17]] +; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP18]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison), !alias.scope !15 +; STRIDED-NEXT: [[TMP26:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP26]], [[TMP25]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)), !alias.scope !18, !noalias !15 +; STRIDED-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64() +; STRIDED-NEXT: [[TMP28:%.*]] = mul i64 [[TMP27]], 4 +; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP28]] +; STRIDED-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP14]] +; STRIDED-NEXT: [[PTR_IND12]] = getelementptr i8, ptr [[POINTER_PHI11]], i64 [[TMP21]] +; STRIDED-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; STRIDED-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; STRIDED: middle.block: ; STRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; STRIDED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]