diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8727,7 +8727,8 @@ // IV by VF * UF. bool HasNUW = Style == TailFoldingStyle::None; auto *CanonicalIVIncrement = - new VPInstruction(VPInstruction::CanonicalIVIncrement, {CanonicalIVPHI}, + new VPInstruction(VPInstruction::CanonicalIVIncrement, + {CanonicalIVPHI, &Plan.getRuntimeVFxUF()}, {HasNUW, false}, DL, "index.next"); CanonicalIVPHI->addOperand(CanonicalIVIncrement); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2459,6 +2459,9 @@ /// Represents the vector trip count. VPValue VectorTripCount; + /// Represents the runtime VF * UF of the vector loop region. + VPValue RuntimeVFxUF; + /// Holds a mapping between Values and their corresponding VPValue inside /// VPlan. Value2VPValueTy Value2VPValue; @@ -2536,6 +2539,9 @@ /// The vector trip count. VPValue &getVectorTripCount() { return VectorTripCount; } + /// Returns runtime VF * UF for the vector loop region. + VPValue &getRuntimeVFxUF() { return RuntimeVFxUF; } + /// Mark the plan to indicate that using Value2VPValue is not safe any /// longer, because it may be stale. void disableValue2VPValue() { Value2VPValueEnabled = false; } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -750,6 +750,12 @@ for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) State.set(&VectorTripCount, VectorTripCountV, Part); + IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); + // FIXME: Model runtime VF * UF computation completely in VPlan. + State.set(&RuntimeVFxUF, + createStepForVF(Builder, TripCountV->getType(), State.VF, State.UF), + 0); + // When vectorizing the epilogue loop, the canonical induction start value // needs to be changed from zero to the value after the main vector loop. // FIXME: Improve modeling for canonical IV start values in the epilogue loop. @@ -860,6 +866,12 @@ O << "VPlan '" << getName() << "' {"; + if (RuntimeVFxUF.getNumUsers() > 0) { + O << "\nLive-in "; + RuntimeVFxUF.printAsOperand(O, SlotTracker); + O << " = runtime VFxUF"; + } + if (VectorTripCount.getNumUsers() > 0) { O << "\nLive-in "; VectorTripCount.printAsOperand(O, SlotTracker); @@ -1216,6 +1228,7 @@ } void VPSlotTracker::assignSlots(const VPlan &Plan) { + assignSlot(&Plan.RuntimeVF); assignSlot(&Plan.VectorTripCount); if (Plan.BackedgeTakenCount) assignSlot(Plan.BackedgeTakenCount); diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -318,10 +318,7 @@ case VPInstruction::CanonicalIVIncrement: { if (Part == 0) { auto *Phi = State.get(getOperand(0), 0); - // The loop step is equal to the vectorization factor (num of SIMD - // elements) times the unroll factor (num of SIMD instructions). - Value *Step = - createStepForVF(Builder, Phi->getType(), State.VF, State.UF); + Value *Step = State.get(getOperand(1), 0); return Builder.CreateAdd(Phi, Step, Name, hasNoUnsignedWrap(), false); } return State.get(this, 0); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll b/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll @@ -18,16 +18,16 @@ ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 -; CHECK-NEXT: store shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer), ptr [[TMP6]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 +; CHECK-NEXT: store shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer), ptr [[TMP8]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll @@ -17,28 +17,28 @@ ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.experimental.stepvector.nxv4i64() ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[INDEX]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = add zeroinitializer, [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = mul [[TMP5]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP7:%.*]] = add [[DOTSPLAT]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 0 -; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 4 -; CHECK-NEXT: [[TMP15:%.*]] = sub i32 [[TMP14]], 1 -; CHECK-NEXT: [[TMP16:%.*]] = extractelement [[TMP7]], i32 [[TMP15]] -; CHECK-NEXT: store i64 [[TMP16]], ptr [[TMP12]], align 1 -; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP18]] +; CHECK-NEXT: [[TMP7:%.*]] = add zeroinitializer, [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = mul [[TMP7]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP9:%.*]] = add [[DOTSPLAT]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 0 +; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[TMP15]], 4 +; CHECK-NEXT: [[TMP17:%.*]] = sub i32 [[TMP16]], 1 +; CHECK-NEXT: [[TMP18:%.*]] = extractelement [[TMP9]], i32 [[TMP17]] +; CHECK-NEXT: store i64 [[TMP18]], ptr [[TMP14]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll @@ -24,33 +24,33 @@ ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], [[TMP5]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] ; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[IDX]], [[N_VEC]] +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP6]], 2 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[IDX]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.experimental.stepvector.nxv2i32() -; CHECK-NEXT: [[TMP7:%.*]] = add [[TMP6]], zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = mul [[TMP7]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[INDUCTION:%.*]] = add [[DOTSPLAT]], [[TMP8]] -; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], 2 -; CHECK-NEXT: [[TMP11:%.*]] = mul i32 1, [[TMP10]] -; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP11]], i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.experimental.stepvector.nxv2i32() +; CHECK-NEXT: [[TMP9:%.*]] = add [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = mul [[TMP9]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[INDUCTION:%.*]] = add [[DOTSPLAT]], [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP12:%.*]] = mul i32 [[TMP11]], 2 +; CHECK-NEXT: [[TMP13:%.*]] = mul i32 1, [[TMP12]] +; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP13]], i64 0 ; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector [[DOTSPLATINSERT1]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[IDX]], [[INDEX]] -; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[A:%.*]], [[VEC_IND]] -; CHECK-NEXT: [[TMP14:%.*]] = extractelement [[TMP13]], i32 0 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[TMP14]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP15]], align 8 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[B:%.*]], i32 [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i64, ptr [[A:%.*]], [[VEC_IND]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement [[TMP15]], i32 0 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr double, ptr [[TMP16]], i32 0 -; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP17]], align 8 -; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP19]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP17]], align 8 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i64, ptr [[B:%.*]], i32 [[TMP14]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr double, ptr [[TMP18]], i32 0 +; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP19]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP7]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT2]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -58,7 +58,7 @@ ; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP22:%.*]] = mul i32 [[TMP21]], 2 ; CHECK-NEXT: [[TMP23:%.*]] = sub i32 [[TMP22]], 1 -; CHECK-NEXT: [[TMP24:%.*]] = extractelement [[TMP13]], i32 [[TMP23]] +; CHECK-NEXT: [[TMP24:%.*]] = extractelement [[TMP15]], i32 [[TMP23]] ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[L_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll @@ -14,24 +14,24 @@ ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[COND:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = fcmp une [[WIDE_LOAD]], shufflevector ( insertelement ( poison, float 2.000000e+00, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[A:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr float, ptr [[TMP8]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr [[TMP9]], i32 4, [[TMP7]], poison) -; CHECK-NEXT: [[TMP10:%.*]] = select fast [[TMP7]], [[WIDE_MASKED_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, [[TMP10]]) -; CHECK-NEXT: [[TMP12]] = fadd fast float [[TMP11]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP14]] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[COND:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = fcmp une [[WIDE_LOAD]], shufflevector ( insertelement ( poison, float 2.000000e+00, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr float, ptr [[A:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[TMP10]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr [[TMP11]], i32 4, [[TMP9]], poison) +; CHECK-NEXT: [[TMP12:%.*]] = select fast [[TMP9]], [[WIDE_MASKED_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, [[TMP12]]) +; CHECK-NEXT: [[TMP14]] = fadd fast float [[TMP13]], [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -39,7 +39,7 @@ ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 1.000000e+00, [[ENTRY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 1.000000e+00, [[ENTRY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_NEXT:%.*]], [[FOR_INC:%.*]] ] @@ -57,9 +57,9 @@ ; CHECK-NEXT: [[RES]] = phi float [ [[FADD]], [[IF_THEN]] ], [ [[RDX]], [[FOR_BODY]] ] ; CHECK-NEXT: [[INDVARS_NEXT]] = add nuw nsw i64 [[INDVARS]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi float [ [[RES]], [[FOR_INC]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi float [ [[RES]], [[FOR_INC]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret float [[RES_LCSSA]] ; entry: @@ -101,25 +101,25 @@ ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[RDX_MINMAX_SELECT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[COND:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = fcmp une [[WIDE_LOAD]], shufflevector ( insertelement ( poison, float 3.000000e+00, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[A:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr float, ptr [[TMP8]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr [[TMP9]], i32 4, [[TMP7]], poison) -; CHECK-NEXT: [[TMP10:%.*]] = select fast [[TMP7]], [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, float 0x7FF0000000000000, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.vector.reduce.fmin.nxv4f32( [[TMP10]]) -; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt float [[TMP11]], [[VEC_PHI]] -; CHECK-NEXT: [[RDX_MINMAX_SELECT]] = select fast i1 [[RDX_MINMAX_CMP]], float [[TMP11]], float [[VEC_PHI]] -; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[COND:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = fcmp une [[WIDE_LOAD]], shufflevector ( insertelement ( poison, float 3.000000e+00, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr float, ptr [[A:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[TMP10]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr [[TMP11]], i32 4, [[TMP9]], poison) +; CHECK-NEXT: [[TMP12:%.*]] = select fast [[TMP9]], [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, float 0x7FF0000000000000, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fmin.nxv4f32( [[TMP12]]) +; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt float [[TMP13]], [[VEC_PHI]] +; CHECK-NEXT: [[RDX_MINMAX_SELECT]] = select fast i1 [[RDX_MINMAX_CMP]], float [[TMP13]], float [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll @@ -42,22 +42,22 @@ ; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8 ; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UNORDERED: vector.body: ; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP4]] -; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i32 0 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP7]] = fadd [[WIDE_LOAD]], [[VEC_PHI]] -; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 8 -; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]] +; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP9]] = fadd [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-UNORDERED: middle.block: -; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, [[TMP7]]) +; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, [[TMP9]]) ; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-UNORDERED: scalar.ph: @@ -89,18 +89,18 @@ ; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8 ; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 ; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED: vector.body: ; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP4]] -; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i32 0 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 -; CHECK-ORDERED-NEXT: [[TMP7]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[WIDE_LOAD]]) -; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 8 -; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]] +; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 +; CHECK-ORDERED-NEXT: [[TMP9]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[WIDE_LOAD]]) +; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-ORDERED-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-ORDERED: middle.block: @@ -108,7 +108,7 @@ ; CHECK-ORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-ORDERED: scalar.ph: ; CHECK-ORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-ORDERED: for.body: ; CHECK-ORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -120,7 +120,7 @@ ; CHECK-ORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK-ORDERED: for.end: -; CHECK-ORDERED-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-NEXT: ret float [[ADD_LCSSA]] ; ; CHECK-ORDERED-TF-LABEL: define float @fadd_strict @@ -138,25 +138,25 @@ ; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8 -; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] -; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP6]] -; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 +; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[TMP8]] +; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[N]], [[TMP8]] +; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i64 [[TMP9]], i64 0 ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[N]]) ; CHECK-ORDERED-TF-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED-TF: vector.body: ; CHECK-ORDERED-TF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 -; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP10]] -; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i32 0 -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP12]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = select [[ACTIVE_LANE_MASK]], [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) -; CHECK-ORDERED-TF-NEXT: [[TMP14]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP13]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 8 -; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP16]] +; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 0 +; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP12]] +; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i32 0 +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP14]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = select [[ACTIVE_LANE_MASK]], [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP16]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP15]]) +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP11]]) +; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = extractelement [[TMP17]], i32 0 ; CHECK-ORDERED-TF-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -164,7 +164,7 @@ ; CHECK-ORDERED-TF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-ORDERED-TF: scalar.ph: ; CHECK-ORDERED-TF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP16]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-TF-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-ORDERED-TF: for.body: ; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -176,7 +176,7 @@ ; CHECK-ORDERED-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-TF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK-ORDERED-TF: for.end: -; CHECK-ORDERED-TF-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-TF-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP16]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-TF-NEXT: ret float [[ADD_LCSSA]] ; @@ -230,60 +230,60 @@ ; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 32 ; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 32 ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UNORDERED: vector.body: ; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP34:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi [ shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer), [[VECTOR_PH]] ], [ [[TMP35:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi [ shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer), [[VECTOR_PH]] ], [ [[TMP36:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi [ shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer), [[VECTOR_PH]] ], [ [[TMP37:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8 -; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 0 -; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 1 -; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]] -; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 16 -; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], 0 -; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 1 -; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], [[TMP13]] -; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 24 -; CHECK-UNORDERED-NEXT: [[TMP17:%.*]] = add i64 [[TMP16]], 0 -; CHECK-UNORDERED-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 1 -; CHECK-UNORDERED-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], [[TMP18]] -; CHECK-UNORDERED-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP4]] -; CHECK-UNORDERED-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP9]] -; CHECK-UNORDERED-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP14]] -; CHECK-UNORDERED-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP19]] -; CHECK-UNORDERED-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 0 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP24]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 8 -; CHECK-UNORDERED-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP26]] -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP27]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP29:%.*]] = mul i64 [[TMP28]], 16 -; CHECK-UNORDERED-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP29]] -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP30]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP31:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP32:%.*]] = mul i64 [[TMP31]], 24 -; CHECK-UNORDERED-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP32]] -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP33]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP34]] = fadd [[WIDE_LOAD]], [[VEC_PHI]] -; CHECK-UNORDERED-NEXT: [[TMP35]] = fadd [[WIDE_LOAD4]], [[VEC_PHI1]] -; CHECK-UNORDERED-NEXT: [[TMP36]] = fadd [[WIDE_LOAD5]], [[VEC_PHI2]] -; CHECK-UNORDERED-NEXT: [[TMP37]] = fadd [[WIDE_LOAD6]], [[VEC_PHI3]] -; CHECK-UNORDERED-NEXT: [[TMP38:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP39:%.*]] = mul i64 [[TMP38]], 32 -; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP39]] +; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP36:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi [ shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer), [[VECTOR_PH]] ], [ [[TMP37:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi [ shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer), [[VECTOR_PH]] ], [ [[TMP38:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi [ shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer), [[VECTOR_PH]] ], [ [[TMP39:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 8 +; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 0 +; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 1 +; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], [[TMP10]] +; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 16 +; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = add i64 [[TMP13]], 0 +; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 1 +; CHECK-UNORDERED-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], [[TMP15]] +; CHECK-UNORDERED-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 24 +; CHECK-UNORDERED-NEXT: [[TMP19:%.*]] = add i64 [[TMP18]], 0 +; CHECK-UNORDERED-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 1 +; CHECK-UNORDERED-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], [[TMP20]] +; CHECK-UNORDERED-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]] +; CHECK-UNORDERED-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP11]] +; CHECK-UNORDERED-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP16]] +; CHECK-UNORDERED-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP21]] +; CHECK-UNORDERED-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i32 0 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP26]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP28:%.*]] = mul i64 [[TMP27]], 8 +; CHECK-UNORDERED-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[TMP28]] +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP29]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP30:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP31:%.*]] = mul i64 [[TMP30]], 16 +; CHECK-UNORDERED-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[TMP31]] +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP32]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP34:%.*]] = mul i64 [[TMP33]], 24 +; CHECK-UNORDERED-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[TMP34]] +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP35]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP36]] = fadd [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-UNORDERED-NEXT: [[TMP37]] = fadd [[WIDE_LOAD4]], [[VEC_PHI1]] +; CHECK-UNORDERED-NEXT: [[TMP38]] = fadd [[WIDE_LOAD5]], [[VEC_PHI2]] +; CHECK-UNORDERED-NEXT: [[TMP39]] = fadd [[WIDE_LOAD6]], [[VEC_PHI3]] +; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-UNORDERED-NEXT: [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-UNORDERED: middle.block: -; CHECK-UNORDERED-NEXT: [[BIN_RDX:%.*]] = fadd [[TMP35]], [[TMP34]] -; CHECK-UNORDERED-NEXT: [[BIN_RDX7:%.*]] = fadd [[TMP36]], [[BIN_RDX]] -; CHECK-UNORDERED-NEXT: [[BIN_RDX8:%.*]] = fadd [[TMP37]], [[BIN_RDX7]] +; CHECK-UNORDERED-NEXT: [[BIN_RDX:%.*]] = fadd [[TMP37]], [[TMP36]] +; CHECK-UNORDERED-NEXT: [[BIN_RDX7:%.*]] = fadd [[TMP38]], [[BIN_RDX]] +; CHECK-UNORDERED-NEXT: [[BIN_RDX8:%.*]] = fadd [[TMP39]], [[BIN_RDX7]] ; CHECK-UNORDERED-NEXT: [[TMP41:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, [[BIN_RDX8]]) ; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -316,51 +316,51 @@ ; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 32 ; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 32 ; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED: vector.body: ; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP37:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8 -; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 0 -; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 1 -; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]] -; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 16 -; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], 0 -; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 1 -; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], [[TMP13]] -; CHECK-ORDERED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 24 -; CHECK-ORDERED-NEXT: [[TMP17:%.*]] = add i64 [[TMP16]], 0 -; CHECK-ORDERED-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 1 -; CHECK-ORDERED-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], [[TMP18]] -; CHECK-ORDERED-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP4]] -; CHECK-ORDERED-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP9]] -; CHECK-ORDERED-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP14]] -; CHECK-ORDERED-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP19]] -; CHECK-ORDERED-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 0 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP24]], align 4 -; CHECK-ORDERED-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 8 -; CHECK-ORDERED-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP26]] -; CHECK-ORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP27]], align 4 -; CHECK-ORDERED-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP29:%.*]] = mul i64 [[TMP28]], 16 -; CHECK-ORDERED-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP29]] -; CHECK-ORDERED-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP30]], align 4 -; CHECK-ORDERED-NEXT: [[TMP31:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP32:%.*]] = mul i64 [[TMP31]], 24 -; CHECK-ORDERED-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP32]] -; CHECK-ORDERED-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP33]], align 4 -; CHECK-ORDERED-NEXT: [[TMP34:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[WIDE_LOAD]]) -; CHECK-ORDERED-NEXT: [[TMP35:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP34]], [[WIDE_LOAD1]]) -; CHECK-ORDERED-NEXT: [[TMP36:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP35]], [[WIDE_LOAD2]]) -; CHECK-ORDERED-NEXT: [[TMP37]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP36]], [[WIDE_LOAD3]]) -; CHECK-ORDERED-NEXT: [[TMP38:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP39:%.*]] = mul i64 [[TMP38]], 32 -; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP39]] +; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP39:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 8 +; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 0 +; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 1 +; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], [[TMP10]] +; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 16 +; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = add i64 [[TMP13]], 0 +; CHECK-ORDERED-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 1 +; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], [[TMP15]] +; CHECK-ORDERED-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 24 +; CHECK-ORDERED-NEXT: [[TMP19:%.*]] = add i64 [[TMP18]], 0 +; CHECK-ORDERED-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 1 +; CHECK-ORDERED-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], [[TMP20]] +; CHECK-ORDERED-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]] +; CHECK-ORDERED-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP11]] +; CHECK-ORDERED-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP16]] +; CHECK-ORDERED-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP21]] +; CHECK-ORDERED-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i32 0 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP26]], align 4 +; CHECK-ORDERED-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP28:%.*]] = mul i64 [[TMP27]], 8 +; CHECK-ORDERED-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[TMP28]] +; CHECK-ORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP29]], align 4 +; CHECK-ORDERED-NEXT: [[TMP30:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP31:%.*]] = mul i64 [[TMP30]], 16 +; CHECK-ORDERED-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[TMP31]] +; CHECK-ORDERED-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP32]], align 4 +; CHECK-ORDERED-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP34:%.*]] = mul i64 [[TMP33]], 24 +; CHECK-ORDERED-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[TMP34]] +; CHECK-ORDERED-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP35]], align 4 +; CHECK-ORDERED-NEXT: [[TMP36:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[WIDE_LOAD]]) +; CHECK-ORDERED-NEXT: [[TMP37:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP36]], [[WIDE_LOAD1]]) +; CHECK-ORDERED-NEXT: [[TMP38:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP37]], [[WIDE_LOAD2]]) +; CHECK-ORDERED-NEXT: [[TMP39]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP38]], [[WIDE_LOAD3]]) +; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-ORDERED-NEXT: [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-ORDERED-NEXT: br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-ORDERED: middle.block: @@ -368,7 +368,7 @@ ; CHECK-ORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-ORDERED: scalar.ph: ; CHECK-ORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP37]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP39]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-ORDERED: for.body: ; CHECK-ORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -380,7 +380,7 @@ ; CHECK-ORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK-ORDERED: for.end: -; CHECK-ORDERED-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP37]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP39]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-NEXT: ret float [[ADD_LCSSA]] ; ; CHECK-ORDERED-TF-LABEL: define float @fadd_strict_unroll @@ -397,34 +397,36 @@ ; CHECK-ORDERED-TF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8 -; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP6]] +; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 32 ; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 16 -; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP8]] +; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 8 +; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP8]] ; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 24 -; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP10]] +; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 16 +; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP10]] ; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 32 -; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = sub i64 [[N]], [[TMP12]] -; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = icmp ugt i64 [[N]], [[TMP12]] -; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i64 [[TMP13]], i64 0 -; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 32 -; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = sub i64 [[N]], [[TMP17]] -; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = icmp ugt i64 [[N]], [[TMP17]] -; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i64 [[TMP18]], i64 0 -; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 32 -; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = sub i64 [[N]], [[TMP22]] -; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = icmp ugt i64 [[N]], [[TMP22]] -; CHECK-ORDERED-TF-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i64 [[TMP23]], i64 0 -; CHECK-ORDERED-TF-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP27:%.*]] = mul i64 [[TMP26]], 32 -; CHECK-ORDERED-TF-NEXT: [[TMP28:%.*]] = sub i64 [[N]], [[TMP27]] -; CHECK-ORDERED-TF-NEXT: [[TMP29:%.*]] = icmp ugt i64 [[N]], [[TMP27]] -; CHECK-ORDERED-TF-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i64 [[TMP28]], i64 0 +; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 24 +; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP12]] +; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 32 +; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = sub i64 [[N]], [[TMP14]] +; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = icmp ugt i64 [[N]], [[TMP14]] +; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i64 [[TMP15]], i64 0 +; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 32 +; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = sub i64 [[N]], [[TMP19]] +; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = icmp ugt i64 [[N]], [[TMP19]] +; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i64 [[TMP20]], i64 0 +; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 32 +; CHECK-ORDERED-TF-NEXT: [[TMP25:%.*]] = sub i64 [[N]], [[TMP24]] +; CHECK-ORDERED-TF-NEXT: [[TMP26:%.*]] = icmp ugt i64 [[N]], [[TMP24]] +; CHECK-ORDERED-TF-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i64 [[TMP25]], i64 0 +; CHECK-ORDERED-TF-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP29:%.*]] = mul i64 [[TMP28]], 32 +; CHECK-ORDERED-TF-NEXT: [[TMP30:%.*]] = sub i64 [[N]], [[TMP29]] +; CHECK-ORDERED-TF-NEXT: [[TMP31:%.*]] = icmp ugt i64 [[N]], [[TMP29]] +; CHECK-ORDERED-TF-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], i64 [[TMP30]], i64 0 ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[N]]) ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_PART_NEXT]], i64 [[N]]) ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY4:%.*]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_PART_NEXT1]], i64 [[N]]) @@ -436,65 +438,63 @@ ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK6:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY3]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT12:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK7:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY4]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT13:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY5]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT14:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP68:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 0 -; CHECK-ORDERED-TF-NEXT: [[TMP32:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP33:%.*]] = mul i64 [[TMP32]], 8 -; CHECK-ORDERED-TF-NEXT: [[TMP34:%.*]] = add i64 [[TMP33]], 0 -; CHECK-ORDERED-TF-NEXT: [[TMP35:%.*]] = mul i64 [[TMP34]], 1 -; CHECK-ORDERED-TF-NEXT: [[TMP36:%.*]] = add i64 [[INDEX]], [[TMP35]] -; CHECK-ORDERED-TF-NEXT: [[TMP37:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP38:%.*]] = mul i64 [[TMP37]], 16 -; CHECK-ORDERED-TF-NEXT: [[TMP39:%.*]] = add i64 [[TMP38]], 0 -; CHECK-ORDERED-TF-NEXT: [[TMP40:%.*]] = mul i64 [[TMP39]], 1 -; CHECK-ORDERED-TF-NEXT: [[TMP41:%.*]] = add i64 [[INDEX]], [[TMP40]] -; CHECK-ORDERED-TF-NEXT: [[TMP42:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP43:%.*]] = mul i64 [[TMP42]], 24 -; CHECK-ORDERED-TF-NEXT: [[TMP44:%.*]] = add i64 [[TMP43]], 0 -; CHECK-ORDERED-TF-NEXT: [[TMP45:%.*]] = mul i64 [[TMP44]], 1 -; CHECK-ORDERED-TF-NEXT: [[TMP46:%.*]] = add i64 [[INDEX]], [[TMP45]] -; CHECK-ORDERED-TF-NEXT: [[TMP47:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP31]] -; CHECK-ORDERED-TF-NEXT: [[TMP48:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP36]] -; CHECK-ORDERED-TF-NEXT: [[TMP49:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP41]] -; CHECK-ORDERED-TF-NEXT: [[TMP50:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP46]] -; CHECK-ORDERED-TF-NEXT: [[TMP51:%.*]] = getelementptr inbounds float, ptr [[TMP47]], i32 0 -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP51]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-ORDERED-TF-NEXT: [[TMP52:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP53:%.*]] = mul i64 [[TMP52]], 8 -; CHECK-ORDERED-TF-NEXT: [[TMP54:%.*]] = getelementptr inbounds float, ptr [[TMP47]], i64 [[TMP53]] -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP54]], i32 4, [[ACTIVE_LANE_MASK6]], poison) -; CHECK-ORDERED-TF-NEXT: [[TMP55:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP56:%.*]] = mul i64 [[TMP55]], 16 -; CHECK-ORDERED-TF-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[TMP47]], i64 [[TMP56]] -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP57]], i32 4, [[ACTIVE_LANE_MASK7]], poison) -; CHECK-ORDERED-TF-NEXT: [[TMP58:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP59:%.*]] = mul i64 [[TMP58]], 24 -; CHECK-ORDERED-TF-NEXT: [[TMP60:%.*]] = getelementptr inbounds float, ptr [[TMP47]], i64 [[TMP59]] -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP60]], i32 4, [[ACTIVE_LANE_MASK8]], poison) -; CHECK-ORDERED-TF-NEXT: [[TMP61:%.*]] = select [[ACTIVE_LANE_MASK]], [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) -; CHECK-ORDERED-TF-NEXT: [[TMP62:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP61]]) -; CHECK-ORDERED-TF-NEXT: [[TMP63:%.*]] = select [[ACTIVE_LANE_MASK6]], [[WIDE_MASKED_LOAD9]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) -; CHECK-ORDERED-TF-NEXT: [[TMP64:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP62]], [[TMP63]]) -; CHECK-ORDERED-TF-NEXT: [[TMP65:%.*]] = select [[ACTIVE_LANE_MASK7]], [[WIDE_MASKED_LOAD10]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP70:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[TMP33:%.*]] = add i64 [[INDEX]], 0 +; CHECK-ORDERED-TF-NEXT: [[TMP34:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP35:%.*]] = mul i64 [[TMP34]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP36:%.*]] = add i64 [[TMP35]], 0 +; CHECK-ORDERED-TF-NEXT: [[TMP37:%.*]] = mul i64 [[TMP36]], 1 +; CHECK-ORDERED-TF-NEXT: [[TMP38:%.*]] = add i64 [[INDEX]], [[TMP37]] +; CHECK-ORDERED-TF-NEXT: [[TMP39:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP40:%.*]] = mul i64 [[TMP39]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP41:%.*]] = add i64 [[TMP40]], 0 +; CHECK-ORDERED-TF-NEXT: [[TMP42:%.*]] = mul i64 [[TMP41]], 1 +; CHECK-ORDERED-TF-NEXT: [[TMP43:%.*]] = add i64 [[INDEX]], [[TMP42]] +; CHECK-ORDERED-TF-NEXT: [[TMP44:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP45:%.*]] = mul i64 [[TMP44]], 24 +; CHECK-ORDERED-TF-NEXT: [[TMP46:%.*]] = add i64 [[TMP45]], 0 +; CHECK-ORDERED-TF-NEXT: [[TMP47:%.*]] = mul i64 [[TMP46]], 1 +; CHECK-ORDERED-TF-NEXT: [[TMP48:%.*]] = add i64 [[INDEX]], [[TMP47]] +; CHECK-ORDERED-TF-NEXT: [[TMP49:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP33]] +; CHECK-ORDERED-TF-NEXT: [[TMP50:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP38]] +; CHECK-ORDERED-TF-NEXT: [[TMP51:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP43]] +; CHECK-ORDERED-TF-NEXT: [[TMP52:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP48]] +; CHECK-ORDERED-TF-NEXT: [[TMP53:%.*]] = getelementptr inbounds float, ptr [[TMP49]], i32 0 +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP53]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-ORDERED-TF-NEXT: [[TMP54:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP55:%.*]] = mul i64 [[TMP54]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP56:%.*]] = getelementptr inbounds float, ptr [[TMP49]], i64 [[TMP55]] +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP56]], i32 4, [[ACTIVE_LANE_MASK6]], poison) +; CHECK-ORDERED-TF-NEXT: [[TMP57:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP58:%.*]] = mul i64 [[TMP57]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, ptr [[TMP49]], i64 [[TMP58]] +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP59]], i32 4, [[ACTIVE_LANE_MASK7]], poison) +; CHECK-ORDERED-TF-NEXT: [[TMP60:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP61:%.*]] = mul i64 [[TMP60]], 24 +; CHECK-ORDERED-TF-NEXT: [[TMP62:%.*]] = getelementptr inbounds float, ptr [[TMP49]], i64 [[TMP61]] +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP62]], i32 4, [[ACTIVE_LANE_MASK8]], poison) +; CHECK-ORDERED-TF-NEXT: [[TMP63:%.*]] = select [[ACTIVE_LANE_MASK]], [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP64:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP63]]) +; CHECK-ORDERED-TF-NEXT: [[TMP65:%.*]] = select [[ACTIVE_LANE_MASK6]], [[WIDE_MASKED_LOAD9]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) ; CHECK-ORDERED-TF-NEXT: [[TMP66:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP64]], [[TMP65]]) -; CHECK-ORDERED-TF-NEXT: [[TMP67:%.*]] = select [[ACTIVE_LANE_MASK8]], [[WIDE_MASKED_LOAD11]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) -; CHECK-ORDERED-TF-NEXT: [[TMP68]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP66]], [[TMP67]]) -; CHECK-ORDERED-TF-NEXT: [[TMP69:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP70:%.*]] = mul i64 [[TMP69]], 8 -; CHECK-ORDERED-TF-NEXT: [[TMP71:%.*]] = add i64 [[INDEX]], [[TMP70]] -; CHECK-ORDERED-TF-NEXT: [[TMP72:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP73:%.*]] = mul i64 [[TMP72]], 16 -; CHECK-ORDERED-TF-NEXT: [[TMP74:%.*]] = add i64 [[INDEX]], [[TMP73]] -; CHECK-ORDERED-TF-NEXT: [[TMP75:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP76:%.*]] = mul i64 [[TMP75]], 24 -; CHECK-ORDERED-TF-NEXT: [[TMP77:%.*]] = add i64 [[INDEX]], [[TMP76]] -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP15]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT12]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP71]], i64 [[TMP20]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT13]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP74]], i64 [[TMP25]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT14]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP77]], i64 [[TMP30]]) -; CHECK-ORDERED-TF-NEXT: [[TMP78:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP79:%.*]] = mul i64 [[TMP78]], 32 -; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP79]] +; CHECK-ORDERED-TF-NEXT: [[TMP67:%.*]] = select [[ACTIVE_LANE_MASK7]], [[WIDE_MASKED_LOAD10]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP68:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP66]], [[TMP67]]) +; CHECK-ORDERED-TF-NEXT: [[TMP69:%.*]] = select [[ACTIVE_LANE_MASK8]], [[WIDE_MASKED_LOAD11]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP70]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP68]], [[TMP69]]) +; CHECK-ORDERED-TF-NEXT: [[TMP71:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP72:%.*]] = mul i64 [[TMP71]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP73:%.*]] = add i64 [[INDEX]], [[TMP72]] +; CHECK-ORDERED-TF-NEXT: [[TMP74:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP75:%.*]] = mul i64 [[TMP74]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP76:%.*]] = add i64 [[INDEX]], [[TMP75]] +; CHECK-ORDERED-TF-NEXT: [[TMP77:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP78:%.*]] = mul i64 [[TMP77]], 24 +; CHECK-ORDERED-TF-NEXT: [[TMP79:%.*]] = add i64 [[INDEX]], [[TMP78]] +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP17]]) +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT12]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP73]], i64 [[TMP22]]) +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT13]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP76]], i64 [[TMP27]]) +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT14]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP79]], i64 [[TMP32]]) +; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; CHECK-ORDERED-TF-NEXT: [[TMP80:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-ORDERED-TF-NEXT: [[TMP81:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT12]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-ORDERED-TF-NEXT: [[TMP82:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT13]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) @@ -505,7 +505,7 @@ ; CHECK-ORDERED-TF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-ORDERED-TF: scalar.ph: ; CHECK-ORDERED-TF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP68]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP70]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-TF-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-ORDERED-TF: for.body: ; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -517,7 +517,7 @@ ; CHECK-ORDERED-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-TF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK-ORDERED-TF: for.end: -; CHECK-ORDERED-TF-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP68]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-TF-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP70]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-TF-NEXT: ret float [[ADD_LCSSA]] ; @@ -589,31 +589,31 @@ ; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP6]] ; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] ; CHECK-UNORDERED-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 2 -; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = insertelement shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer), float [[A2]], i32 0 -; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = insertelement shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer), float [[A1]], i32 0 +; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 +; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = insertelement shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer), float [[A2]], i32 0 +; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = insertelement shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer), float [[A1]], i32 0 ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UNORDERED: vector.body: ; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi [ [[TMP7]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi [ [[TMP8]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi [ [[TMP10]], [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP9]] -; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i32 0 -; CHECK-UNORDERED-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP11]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP11]] +; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 0 +; CHECK-UNORDERED-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP13]], align 4 ; CHECK-UNORDERED-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.experimental.vector.deinterleave2.nxv8f32( [[WIDE_VEC]]) -; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 -; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 -; CHECK-UNORDERED-NEXT: [[TMP14]] = fadd [[TMP12]], [[VEC_PHI1]] -; CHECK-UNORDERED-NEXT: [[TMP15]] = fadd [[TMP13]], [[VEC_PHI]] -; CHECK-UNORDERED-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 4 -; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP17]] +; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 +; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 +; CHECK-UNORDERED-NEXT: [[TMP16]] = fadd [[TMP14]], [[VEC_PHI1]] +; CHECK-UNORDERED-NEXT: [[TMP17]] = fadd [[TMP15]], [[VEC_PHI]] +; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; CHECK-UNORDERED-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-UNORDERED: middle.block: -; CHECK-UNORDERED-NEXT: [[TMP19:%.*]] = call float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, [[TMP14]]) -; CHECK-UNORDERED-NEXT: [[TMP20:%.*]] = call float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, [[TMP15]]) +; CHECK-UNORDERED-NEXT: [[TMP19:%.*]] = call float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, [[TMP16]]) +; CHECK-UNORDERED-NEXT: [[TMP20:%.*]] = call float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, [[TMP17]]) ; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-UNORDERED: scalar.ph: @@ -661,24 +661,24 @@ ; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP6]] ; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] ; CHECK-ORDERED-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 2 +; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 ; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED: vector.body: ; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ [[A2]], [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[VEC_PHI1:%.*]] = phi float [ [[A1]], [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ [[A2]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-NEXT: [[VEC_PHI1:%.*]] = phi float [ [[A1]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP7]] -; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 0 -; CHECK-ORDERED-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP9]], align 4 +; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP9]] +; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i32 0 +; CHECK-ORDERED-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP11]], align 4 ; CHECK-ORDERED-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.experimental.vector.deinterleave2.nxv8f32( [[WIDE_VEC]]) -; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 -; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 -; CHECK-ORDERED-NEXT: [[TMP12]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP11]]) -; CHECK-ORDERED-NEXT: [[TMP13]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI1]], [[TMP10]]) -; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 4 -; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP15]] +; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 +; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 +; CHECK-ORDERED-NEXT: [[TMP14]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP13]]) +; CHECK-ORDERED-NEXT: [[TMP15]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI1]], [[TMP12]]) +; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-ORDERED-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-ORDERED: middle.block: @@ -686,8 +686,8 @@ ; CHECK-ORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-ORDERED: scalar.ph: ; CHECK-ORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[A2]], [[ENTRY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] -; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX2:%.*]] = phi float [ [[A1]], [[ENTRY]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[A2]], [[ENTRY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX2:%.*]] = phi float [ [[A1]], [[ENTRY]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-ORDERED: for.body: ; CHECK-ORDERED-NEXT: [[ADD_PHI1:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD2:%.*]], [[FOR_BODY]] ] @@ -704,8 +704,8 @@ ; CHECK-ORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK-ORDERED: for.end: -; CHECK-ORDERED-NEXT: [[ADD1_LCSSA:%.*]] = phi float [ [[ADD1]], [[FOR_BODY]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ] -; CHECK-ORDERED-NEXT: [[ADD2_LCSSA:%.*]] = phi float [ [[ADD2]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-NEXT: [[ADD1_LCSSA:%.*]] = phi float [ [[ADD1]], [[FOR_BODY]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-NEXT: [[ADD2_LCSSA:%.*]] = phi float [ [[ADD2]], [[FOR_BODY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-NEXT: store float [[ADD1_LCSSA]], ptr [[A]], align 4 ; CHECK-ORDERED-NEXT: store float [[ADD2_LCSSA]], ptr [[ARRAYIDXA]], align 4 ; CHECK-ORDERED-NEXT: ret void @@ -732,33 +732,33 @@ ; CHECK-ORDERED-TF-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 2 ; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 -; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = sub i64 [[TMP2]], [[TMP9]] -; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[TMP2]], [[TMP9]] -; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i64 [[TMP10]], i64 0 +; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 +; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = sub i64 [[TMP2]], [[TMP11]] +; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[TMP2]], [[TMP11]] +; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0 ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[TMP2]]) ; CHECK-ORDERED-TF-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED-TF: vector.body: ; CHECK-ORDERED-TF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ [[A2]], [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[VEC_PHI1:%.*]] = phi float [ [[A1]], [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ [[A2]], [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[VEC_PHI1:%.*]] = phi float [ [[A1]], [[VECTOR_PH]] ], [ [[TMP23:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]] -; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0 +; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP15]] +; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i32 0 ; CHECK-ORDERED-TF-NEXT: [[INTERLEAVED_MASK:%.*]] = call @llvm.experimental.vector.interleave2.nxv8i1( [[ACTIVE_LANE_MASK]], [[ACTIVE_LANE_MASK]]) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_VEC:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP15]], i32 4, [[INTERLEAVED_MASK]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_VEC:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP17]], i32 4, [[INTERLEAVED_MASK]], poison) ; CHECK-ORDERED-TF-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.experimental.vector.deinterleave2.nxv8f32( [[WIDE_MASKED_VEC]]) -; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 -; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 -; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP17]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) -; CHECK-ORDERED-TF-NEXT: [[TMP19]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP18]]) -; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP16]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) -; CHECK-ORDERED-TF-NEXT: [[TMP21]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI1]], [[TMP20]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP12]]) -; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 4 -; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP23]] +; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 +; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 +; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP19]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP21]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP20]]) +; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP18]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP23]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI1]], [[TMP22]]) +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP14]]) +; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] ; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-ORDERED-TF-NEXT: [[TMP25:%.*]] = extractelement [[TMP24]], i32 0 ; CHECK-ORDERED-TF-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -766,8 +766,8 @@ ; CHECK-ORDERED-TF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-ORDERED-TF: scalar.ph: ; CHECK-ORDERED-TF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[A2]], [[ENTRY]] ], [ [[TMP19]], [[MIDDLE_BLOCK]] ] -; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX2:%.*]] = phi float [ [[A1]], [[ENTRY]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[A2]], [[ENTRY]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX2:%.*]] = phi float [ [[A1]], [[ENTRY]] ], [ [[TMP23]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-TF-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-ORDERED-TF: for.body: ; CHECK-ORDERED-TF-NEXT: [[ADD_PHI1:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD2:%.*]], [[FOR_BODY]] ] @@ -784,8 +784,8 @@ ; CHECK-ORDERED-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-TF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK-ORDERED-TF: for.end: -; CHECK-ORDERED-TF-NEXT: [[ADD1_LCSSA:%.*]] = phi float [ [[ADD1]], [[FOR_BODY]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ] -; CHECK-ORDERED-TF-NEXT: [[ADD2_LCSSA:%.*]] = phi float [ [[ADD2]], [[FOR_BODY]] ], [ [[TMP19]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-TF-NEXT: [[ADD1_LCSSA:%.*]] = phi float [ [[ADD1]], [[FOR_BODY]] ], [ [[TMP23]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-TF-NEXT: [[ADD2_LCSSA:%.*]] = phi float [ [[ADD2]], [[FOR_BODY]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-TF-NEXT: store float [[ADD1_LCSSA]], ptr [[A]], align 4 ; CHECK-ORDERED-TF-NEXT: store float [[ADD2_LCSSA]], ptr [[ARRAYIDXA]], align 4 ; CHECK-ORDERED-TF-NEXT: ret void @@ -867,26 +867,26 @@ ; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 ; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP4]] ; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UNORDERED: vector.body: ; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP5]] -; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 0 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP5]] +; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 +; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP7]] ; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 0 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP9]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = fadd [[WIDE_LOAD]], [[WIDE_LOAD1]] -; CHECK-UNORDERED-NEXT: [[TMP11]] = fadd [[VEC_PHI]], [[TMP10]] -; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 -; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]] +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP9]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP7]] +; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i32 0 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP11]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = fadd [[WIDE_LOAD]], [[WIDE_LOAD1]] +; CHECK-UNORDERED-NEXT: [[TMP13]] = fadd [[VEC_PHI]], [[TMP12]] +; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK-UNORDERED: middle.block: -; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = call float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, [[TMP11]]) +; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = call float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, [[TMP13]]) ; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK-UNORDERED: scalar.ph: @@ -929,22 +929,22 @@ ; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 ; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP4]] ; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED: vector.body: ; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP5]] -; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 0 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 4 -; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP5]] +; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 +; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP7]] ; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 0 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP9]], align 4 -; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = fadd [[WIDE_LOAD]], [[WIDE_LOAD1]] -; CHECK-ORDERED-NEXT: [[TMP11]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP10]]) -; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 -; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]] +; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP9]], align 4 +; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP7]] +; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i32 0 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP11]], align 4 +; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = fadd [[WIDE_LOAD]], [[WIDE_LOAD1]] +; CHECK-ORDERED-NEXT: [[TMP13]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP12]]) +; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-ORDERED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK-ORDERED: middle.block: @@ -952,7 +952,7 @@ ; CHECK-ORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK-ORDERED: scalar.ph: ; CHECK-ORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_PREHEADER]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_PREHEADER]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-ORDERED: for.body: ; CHECK-ORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -967,7 +967,7 @@ ; CHECK-ORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK-ORDERED: for.end.loopexit: -; CHECK-ORDERED-NEXT: [[RDX_LCSSA:%.*]] = phi float [ [[RDX]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-NEXT: [[RDX_LCSSA:%.*]] = phi float [ [[RDX]], [[FOR_BODY]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-NEXT: br label [[FOR_END]] ; CHECK-ORDERED: for.end: ; CHECK-ORDERED-NEXT: [[RES:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[RDX_LCSSA]], [[FOR_END_LOOPEXIT]] ] @@ -993,29 +993,29 @@ ; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 -; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = sub i64 [[N]], [[TMP7]] -; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[N]], [[TMP7]] -; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i64 [[TMP8]], i64 0 +; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 +; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = sub i64 [[N]], [[TMP9]] +; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[N]], [[TMP9]] +; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i64 [[TMP10]], i64 0 ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]]) ; CHECK-ORDERED-TF-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED-TF: vector.body: ; CHECK-ORDERED-TF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 0 -; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP11]] -; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 0 -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr [[TMP13]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP11]] +; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 0 +; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]] ; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0 -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr [[TMP15]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = fadd [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD1]] -; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP16]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) -; CHECK-ORDERED-TF-NEXT: [[TMP18]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP17]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP10]]) -; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 4 -; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP20]] +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr [[TMP15]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]] +; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i32 0 +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr [[TMP17]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = fadd [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD1]] +; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP18]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP20]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP19]]) +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP12]]) +; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP7]] ; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = extractelement [[TMP21]], i32 0 ; CHECK-ORDERED-TF-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -1023,7 +1023,7 @@ ; CHECK-ORDERED-TF-NEXT: br i1 true, label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK-ORDERED-TF: scalar.ph: ; CHECK-ORDERED-TF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_PREHEADER]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_PREHEADER]] ], [ [[TMP20]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-TF-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-ORDERED-TF: for.body: ; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -1038,7 +1038,7 @@ ; CHECK-ORDERED-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-TF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK-ORDERED-TF: for.end.loopexit: -; CHECK-ORDERED-TF-NEXT: [[RDX_LCSSA:%.*]] = phi float [ [[RDX]], [[FOR_BODY]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-TF-NEXT: [[RDX_LCSSA:%.*]] = phi float [ [[RDX]], [[FOR_BODY]] ], [ [[TMP20]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END]] ; CHECK-ORDERED-TF: for.end: ; CHECK-ORDERED-TF-NEXT: [[RES:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[RDX_LCSSA]], [[FOR_END_LOOPEXIT]] ] @@ -1110,28 +1110,28 @@ ; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UNORDERED: vector.body: ; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer), float 1.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP4]] -; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i32 0 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = fcmp une [[WIDE_LOAD]], zeroinitializer -; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[A]], i64 [[TMP4]] -; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = getelementptr float, ptr [[TMP8]], i32 0 -; CHECK-UNORDERED-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr [[TMP9]], i32 4, [[TMP7]], poison) -; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = xor [[TMP7]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-UNORDERED-NEXT: [[PREDPHI:%.*]] = select [[TMP10]], shufflevector ( insertelement ( poison, float 3.000000e+00, i64 0), poison, zeroinitializer), [[WIDE_MASKED_LOAD]] -; CHECK-UNORDERED-NEXT: [[TMP11]] = fadd [[VEC_PHI]], [[PREDPHI]] -; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 -; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]] +; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer), float 1.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP6]] +; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = fcmp une [[WIDE_LOAD]], zeroinitializer +; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = getelementptr float, ptr [[A]], i64 [[TMP6]] +; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[TMP10]], i32 0 +; CHECK-UNORDERED-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr [[TMP11]], i32 4, [[TMP9]], poison) +; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = xor [[TMP9]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-UNORDERED-NEXT: [[PREDPHI:%.*]] = select [[TMP12]], shufflevector ( insertelement ( poison, float 3.000000e+00, i64 0), poison, zeroinitializer), [[WIDE_MASKED_LOAD]] +; CHECK-UNORDERED-NEXT: [[TMP13]] = fadd [[VEC_PHI]], [[PREDPHI]] +; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK-UNORDERED: middle.block: -; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = call float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, [[TMP11]]) +; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = call float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, [[TMP13]]) ; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-UNORDERED: scalar.ph: @@ -1171,24 +1171,24 @@ ; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED: vector.body: ; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP4]] -; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i32 0 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 -; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = fcmp une [[WIDE_LOAD]], zeroinitializer -; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[A]], i64 [[TMP4]] -; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = getelementptr float, ptr [[TMP8]], i32 0 -; CHECK-ORDERED-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr [[TMP9]], i32 4, [[TMP7]], poison) -; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = xor [[TMP7]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-ORDERED-NEXT: [[PREDPHI:%.*]] = select [[TMP10]], shufflevector ( insertelement ( poison, float 3.000000e+00, i64 0), poison, zeroinitializer), [[WIDE_MASKED_LOAD]] -; CHECK-ORDERED-NEXT: [[TMP11]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[PREDPHI]]) -; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 -; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]] +; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP6]] +; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 +; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = fcmp une [[WIDE_LOAD]], zeroinitializer +; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = getelementptr float, ptr [[A]], i64 [[TMP6]] +; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[TMP10]], i32 0 +; CHECK-ORDERED-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr [[TMP11]], i32 4, [[TMP9]], poison) +; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = xor [[TMP9]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-ORDERED-NEXT: [[PREDPHI:%.*]] = select [[TMP12]], shufflevector ( insertelement ( poison, float 3.000000e+00, i64 0), poison, zeroinitializer), [[WIDE_MASKED_LOAD]] +; CHECK-ORDERED-NEXT: [[TMP13]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[PREDPHI]]) +; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-ORDERED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK-ORDERED: middle.block: @@ -1196,7 +1196,7 @@ ; CHECK-ORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-ORDERED: scalar.ph: ; CHECK-ORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 1.000000e+00, [[ENTRY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 1.000000e+00, [[ENTRY]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-ORDERED: for.body: ; CHECK-ORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] @@ -1216,7 +1216,7 @@ ; CHECK-ORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK-ORDERED: for.end: -; CHECK-ORDERED-NEXT: [[RDX:%.*]] = phi float [ [[FADD]], [[FOR_INC]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-NEXT: [[RDX:%.*]] = phi float [ [[FADD]], [[FOR_INC]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-NEXT: ret float [[RDX]] ; ; CHECK-ORDERED-TF-LABEL: define float @fadd_conditional @@ -1234,34 +1234,34 @@ ; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 -; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] -; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP6]] -; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 +; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 +; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[TMP8]] +; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[N]], [[TMP8]] +; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i64 [[TMP9]], i64 0 ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]]) ; CHECK-ORDERED-TF-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED-TF: vector.body: ; CHECK-ORDERED-TF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 -; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP10]] -; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i32 0 -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr [[TMP12]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = fcmp une [[WIDE_MASKED_LOAD]], zeroinitializer -; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = getelementptr float, ptr [[A]], i64 [[TMP10]] -; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP13]], zeroinitializer -; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = getelementptr float, ptr [[TMP14]], i32 0 -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr [[TMP16]], i32 4, [[TMP15]], poison) -; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = xor [[TMP13]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP17]], zeroinitializer -; CHECK-ORDERED-TF-NEXT: [[PREDPHI:%.*]] = select [[TMP18]], shufflevector ( insertelement ( poison, float 3.000000e+00, i64 0), poison, zeroinitializer), [[WIDE_MASKED_LOAD1]] -; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = or [[TMP15]], [[TMP18]] -; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = select [[TMP19]], [[PREDPHI]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) -; CHECK-ORDERED-TF-NEXT: [[TMP21]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP20]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 4 -; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP23]] +; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[TMP23:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 0 +; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP12]] +; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i32 0 +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr [[TMP14]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = fcmp une [[WIDE_MASKED_LOAD]], zeroinitializer +; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = getelementptr float, ptr [[A]], i64 [[TMP12]] +; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP15]], zeroinitializer +; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = getelementptr float, ptr [[TMP16]], i32 0 +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr [[TMP18]], i32 4, [[TMP17]], poison) +; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = xor [[TMP15]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP19]], zeroinitializer +; CHECK-ORDERED-TF-NEXT: [[PREDPHI:%.*]] = select [[TMP20]], shufflevector ( insertelement ( poison, float 3.000000e+00, i64 0), poison, zeroinitializer), [[WIDE_MASKED_LOAD1]] +; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = or [[TMP17]], [[TMP20]] +; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = select [[TMP21]], [[PREDPHI]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP23]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP22]]) +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP11]]) +; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-ORDERED-TF-NEXT: [[TMP25:%.*]] = extractelement [[TMP24]], i32 0 ; CHECK-ORDERED-TF-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -1269,7 +1269,7 @@ ; CHECK-ORDERED-TF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-ORDERED-TF: scalar.ph: ; CHECK-ORDERED-TF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 1.000000e+00, [[ENTRY]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 1.000000e+00, [[ENTRY]] ], [ [[TMP23]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-TF-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-ORDERED-TF: for.body: ; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] @@ -1289,7 +1289,7 @@ ; CHECK-ORDERED-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-TF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK-ORDERED-TF: for.end: -; CHECK-ORDERED-TF-NEXT: [[RDX:%.*]] = phi float [ [[FADD]], [[FOR_INC]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-TF-NEXT: [[RDX:%.*]] = phi float [ [[FADD]], [[FOR_INC]] ], [ [[TMP23]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-TF-NEXT: ret float [[RDX]] ; @@ -1358,26 +1358,26 @@ ; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8 ; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UNORDERED: vector.body: ; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer), float -0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP4]] -; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i32 0 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = fadd [[VEC_PHI]], [[WIDE_LOAD]] -; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP4]] -; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 0 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP9]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP10]] = fadd [[TMP7]], [[WIDE_LOAD1]] -; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 8 -; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] +; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer), float -0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]] +; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = fadd [[VEC_PHI]], [[WIDE_LOAD]] +; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP6]] +; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i32 0 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP11]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP12]] = fadd [[TMP9]], [[WIDE_LOAD1]] +; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK-UNORDERED: middle.block: -; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, [[TMP10]]) +; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, [[TMP12]]) ; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-UNORDERED: scalar.ph: @@ -1498,78 +1498,78 @@ ; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 32 ; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 32 ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UNORDERED: vector.body: ; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP48:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi [ shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer), [[VECTOR_PH]] ], [ [[TMP49:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi [ shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer), [[VECTOR_PH]] ], [ [[TMP50:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi [ shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer), [[VECTOR_PH]] ], [ [[TMP51:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8 -; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 0 -; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 1 -; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]] -; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 16 -; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], 0 -; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 1 -; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], [[TMP13]] -; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 24 -; CHECK-UNORDERED-NEXT: [[TMP17:%.*]] = add i64 [[TMP16]], 0 -; CHECK-UNORDERED-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 1 -; CHECK-UNORDERED-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], [[TMP18]] -; CHECK-UNORDERED-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP4]] -; CHECK-UNORDERED-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP9]] -; CHECK-UNORDERED-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP14]] -; CHECK-UNORDERED-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP19]] -; CHECK-UNORDERED-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 0 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP24]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 8 -; CHECK-UNORDERED-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP26]] -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP27]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP29:%.*]] = mul i64 [[TMP28]], 16 -; CHECK-UNORDERED-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP29]] -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP30]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP31:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP32:%.*]] = mul i64 [[TMP31]], 24 -; CHECK-UNORDERED-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP32]] -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP33]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP4]] -; CHECK-UNORDERED-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP9]] -; CHECK-UNORDERED-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP14]] -; CHECK-UNORDERED-NEXT: [[TMP37:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP19]] -; CHECK-UNORDERED-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, ptr [[TMP34]], i32 0 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD7:%.*]] = load , ptr [[TMP38]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP39:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP40:%.*]] = mul i64 [[TMP39]], 8 -; CHECK-UNORDERED-NEXT: [[TMP41:%.*]] = getelementptr inbounds float, ptr [[TMP34]], i64 [[TMP40]] -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD8:%.*]] = load , ptr [[TMP41]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP42:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP43:%.*]] = mul i64 [[TMP42]], 16 -; CHECK-UNORDERED-NEXT: [[TMP44:%.*]] = getelementptr inbounds float, ptr [[TMP34]], i64 [[TMP43]] -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD9:%.*]] = load , ptr [[TMP44]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP45:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP46:%.*]] = mul i64 [[TMP45]], 24 -; CHECK-UNORDERED-NEXT: [[TMP47:%.*]] = getelementptr inbounds float, ptr [[TMP34]], i64 [[TMP46]] -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD10:%.*]] = load , ptr [[TMP47]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP48]] = call @llvm.fmuladd.nxv8f32( [[WIDE_LOAD]], [[WIDE_LOAD7]], [[VEC_PHI]]) -; CHECK-UNORDERED-NEXT: [[TMP49]] = call @llvm.fmuladd.nxv8f32( [[WIDE_LOAD4]], [[WIDE_LOAD8]], [[VEC_PHI1]]) -; CHECK-UNORDERED-NEXT: [[TMP50]] = call @llvm.fmuladd.nxv8f32( [[WIDE_LOAD5]], [[WIDE_LOAD9]], [[VEC_PHI2]]) -; CHECK-UNORDERED-NEXT: [[TMP51]] = call @llvm.fmuladd.nxv8f32( [[WIDE_LOAD6]], [[WIDE_LOAD10]], [[VEC_PHI3]]) -; CHECK-UNORDERED-NEXT: [[TMP52:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP53:%.*]] = mul i64 [[TMP52]], 32 -; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP53]] +; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP50:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi [ shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer), [[VECTOR_PH]] ], [ [[TMP51:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi [ shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer), [[VECTOR_PH]] ], [ [[TMP52:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi [ shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer), [[VECTOR_PH]] ], [ [[TMP53:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 8 +; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 0 +; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 1 +; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], [[TMP10]] +; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 16 +; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = add i64 [[TMP13]], 0 +; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 1 +; CHECK-UNORDERED-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], [[TMP15]] +; CHECK-UNORDERED-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 24 +; CHECK-UNORDERED-NEXT: [[TMP19:%.*]] = add i64 [[TMP18]], 0 +; CHECK-UNORDERED-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 1 +; CHECK-UNORDERED-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], [[TMP20]] +; CHECK-UNORDERED-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]] +; CHECK-UNORDERED-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP11]] +; CHECK-UNORDERED-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP16]] +; CHECK-UNORDERED-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP21]] +; CHECK-UNORDERED-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i32 0 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP26]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP28:%.*]] = mul i64 [[TMP27]], 8 +; CHECK-UNORDERED-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[TMP28]] +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP29]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP30:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP31:%.*]] = mul i64 [[TMP30]], 16 +; CHECK-UNORDERED-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[TMP31]] +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP32]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP34:%.*]] = mul i64 [[TMP33]], 24 +; CHECK-UNORDERED-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[TMP34]] +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP35]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP6]] +; CHECK-UNORDERED-NEXT: [[TMP37:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP11]] +; CHECK-UNORDERED-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP16]] +; CHECK-UNORDERED-NEXT: [[TMP39:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP21]] +; CHECK-UNORDERED-NEXT: [[TMP40:%.*]] = getelementptr inbounds float, ptr [[TMP36]], i32 0 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD7:%.*]] = load , ptr [[TMP40]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP41:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP42:%.*]] = mul i64 [[TMP41]], 8 +; CHECK-UNORDERED-NEXT: [[TMP43:%.*]] = getelementptr inbounds float, ptr [[TMP36]], i64 [[TMP42]] +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD8:%.*]] = load , ptr [[TMP43]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP44:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP45:%.*]] = mul i64 [[TMP44]], 16 +; CHECK-UNORDERED-NEXT: [[TMP46:%.*]] = getelementptr inbounds float, ptr [[TMP36]], i64 [[TMP45]] +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD9:%.*]] = load , ptr [[TMP46]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP47:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP48:%.*]] = mul i64 [[TMP47]], 24 +; CHECK-UNORDERED-NEXT: [[TMP49:%.*]] = getelementptr inbounds float, ptr [[TMP36]], i64 [[TMP48]] +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD10:%.*]] = load , ptr [[TMP49]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP50]] = call @llvm.fmuladd.nxv8f32( [[WIDE_LOAD]], [[WIDE_LOAD7]], [[VEC_PHI]]) +; CHECK-UNORDERED-NEXT: [[TMP51]] = call @llvm.fmuladd.nxv8f32( [[WIDE_LOAD4]], [[WIDE_LOAD8]], [[VEC_PHI1]]) +; CHECK-UNORDERED-NEXT: [[TMP52]] = call @llvm.fmuladd.nxv8f32( [[WIDE_LOAD5]], [[WIDE_LOAD9]], [[VEC_PHI2]]) +; CHECK-UNORDERED-NEXT: [[TMP53]] = call @llvm.fmuladd.nxv8f32( [[WIDE_LOAD6]], [[WIDE_LOAD10]], [[VEC_PHI3]]) +; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-UNORDERED-NEXT: [[TMP54:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[TMP54]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK-UNORDERED: middle.block: -; CHECK-UNORDERED-NEXT: [[BIN_RDX:%.*]] = fadd [[TMP49]], [[TMP48]] -; CHECK-UNORDERED-NEXT: [[BIN_RDX11:%.*]] = fadd [[TMP50]], [[BIN_RDX]] -; CHECK-UNORDERED-NEXT: [[BIN_RDX12:%.*]] = fadd [[TMP51]], [[BIN_RDX11]] +; CHECK-UNORDERED-NEXT: [[BIN_RDX:%.*]] = fadd [[TMP51]], [[TMP50]] +; CHECK-UNORDERED-NEXT: [[BIN_RDX11:%.*]] = fadd [[TMP52]], [[BIN_RDX]] +; CHECK-UNORDERED-NEXT: [[BIN_RDX12:%.*]] = fadd [[TMP53]], [[BIN_RDX11]] ; CHECK-UNORDERED-NEXT: [[TMP55:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, [[BIN_RDX12]]) ; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -1604,73 +1604,73 @@ ; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 32 ; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 32 ; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED: vector.body: ; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP55:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8 -; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 0 -; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 1 -; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]] -; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 16 -; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], 0 -; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 1 -; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], [[TMP13]] -; CHECK-ORDERED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 24 -; CHECK-ORDERED-NEXT: [[TMP17:%.*]] = add i64 [[TMP16]], 0 -; CHECK-ORDERED-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 1 -; CHECK-ORDERED-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], [[TMP18]] -; CHECK-ORDERED-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP4]] -; CHECK-ORDERED-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP9]] -; CHECK-ORDERED-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP14]] -; CHECK-ORDERED-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP19]] -; CHECK-ORDERED-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 0 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP24]], align 4 -; CHECK-ORDERED-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 8 -; CHECK-ORDERED-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP26]] -; CHECK-ORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP27]], align 4 -; CHECK-ORDERED-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP29:%.*]] = mul i64 [[TMP28]], 16 -; CHECK-ORDERED-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP29]] -; CHECK-ORDERED-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP30]], align 4 -; CHECK-ORDERED-NEXT: [[TMP31:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP32:%.*]] = mul i64 [[TMP31]], 24 -; CHECK-ORDERED-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP32]] -; CHECK-ORDERED-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP33]], align 4 -; CHECK-ORDERED-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP4]] -; CHECK-ORDERED-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP9]] -; CHECK-ORDERED-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP14]] -; CHECK-ORDERED-NEXT: [[TMP37:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP19]] -; CHECK-ORDERED-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, ptr [[TMP34]], i32 0 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP38]], align 4 -; CHECK-ORDERED-NEXT: [[TMP39:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP40:%.*]] = mul i64 [[TMP39]], 8 -; CHECK-ORDERED-NEXT: [[TMP41:%.*]] = getelementptr inbounds float, ptr [[TMP34]], i64 [[TMP40]] -; CHECK-ORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP41]], align 4 -; CHECK-ORDERED-NEXT: [[TMP42:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP43:%.*]] = mul i64 [[TMP42]], 16 -; CHECK-ORDERED-NEXT: [[TMP44:%.*]] = getelementptr inbounds float, ptr [[TMP34]], i64 [[TMP43]] -; CHECK-ORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP44]], align 4 -; CHECK-ORDERED-NEXT: [[TMP45:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP46:%.*]] = mul i64 [[TMP45]], 24 -; CHECK-ORDERED-NEXT: [[TMP47:%.*]] = getelementptr inbounds float, ptr [[TMP34]], i64 [[TMP46]] -; CHECK-ORDERED-NEXT: [[WIDE_LOAD7:%.*]] = load , ptr [[TMP47]], align 4 -; CHECK-ORDERED-NEXT: [[TMP48:%.*]] = fmul [[WIDE_LOAD]], [[WIDE_LOAD4]] -; CHECK-ORDERED-NEXT: [[TMP49:%.*]] = fmul [[WIDE_LOAD1]], [[WIDE_LOAD5]] -; CHECK-ORDERED-NEXT: [[TMP50:%.*]] = fmul [[WIDE_LOAD2]], [[WIDE_LOAD6]] -; CHECK-ORDERED-NEXT: [[TMP51:%.*]] = fmul [[WIDE_LOAD3]], [[WIDE_LOAD7]] -; CHECK-ORDERED-NEXT: [[TMP52:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP48]]) -; CHECK-ORDERED-NEXT: [[TMP53:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP52]], [[TMP49]]) -; CHECK-ORDERED-NEXT: [[TMP54:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP53]], [[TMP50]]) -; CHECK-ORDERED-NEXT: [[TMP55]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP54]], [[TMP51]]) -; CHECK-ORDERED-NEXT: [[TMP56:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP57:%.*]] = mul i64 [[TMP56]], 32 -; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP57]] +; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP57:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 8 +; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 0 +; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 1 +; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], [[TMP10]] +; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 16 +; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = add i64 [[TMP13]], 0 +; CHECK-ORDERED-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 1 +; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], [[TMP15]] +; CHECK-ORDERED-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 24 +; CHECK-ORDERED-NEXT: [[TMP19:%.*]] = add i64 [[TMP18]], 0 +; CHECK-ORDERED-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 1 +; CHECK-ORDERED-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], [[TMP20]] +; CHECK-ORDERED-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]] +; CHECK-ORDERED-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP11]] +; CHECK-ORDERED-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP16]] +; CHECK-ORDERED-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP21]] +; CHECK-ORDERED-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i32 0 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP26]], align 4 +; CHECK-ORDERED-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP28:%.*]] = mul i64 [[TMP27]], 8 +; CHECK-ORDERED-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[TMP28]] +; CHECK-ORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP29]], align 4 +; CHECK-ORDERED-NEXT: [[TMP30:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP31:%.*]] = mul i64 [[TMP30]], 16 +; CHECK-ORDERED-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[TMP31]] +; CHECK-ORDERED-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP32]], align 4 +; CHECK-ORDERED-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP34:%.*]] = mul i64 [[TMP33]], 24 +; CHECK-ORDERED-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[TMP34]] +; CHECK-ORDERED-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP35]], align 4 +; CHECK-ORDERED-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP6]] +; CHECK-ORDERED-NEXT: [[TMP37:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP11]] +; CHECK-ORDERED-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP16]] +; CHECK-ORDERED-NEXT: [[TMP39:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP21]] +; CHECK-ORDERED-NEXT: [[TMP40:%.*]] = getelementptr inbounds float, ptr [[TMP36]], i32 0 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP40]], align 4 +; CHECK-ORDERED-NEXT: [[TMP41:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP42:%.*]] = mul i64 [[TMP41]], 8 +; CHECK-ORDERED-NEXT: [[TMP43:%.*]] = getelementptr inbounds float, ptr [[TMP36]], i64 [[TMP42]] +; CHECK-ORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP43]], align 4 +; CHECK-ORDERED-NEXT: [[TMP44:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP45:%.*]] = mul i64 [[TMP44]], 16 +; CHECK-ORDERED-NEXT: [[TMP46:%.*]] = getelementptr inbounds float, ptr [[TMP36]], i64 [[TMP45]] +; CHECK-ORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP46]], align 4 +; CHECK-ORDERED-NEXT: [[TMP47:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP48:%.*]] = mul i64 [[TMP47]], 24 +; CHECK-ORDERED-NEXT: [[TMP49:%.*]] = getelementptr inbounds float, ptr [[TMP36]], i64 [[TMP48]] +; CHECK-ORDERED-NEXT: [[WIDE_LOAD7:%.*]] = load , ptr [[TMP49]], align 4 +; CHECK-ORDERED-NEXT: [[TMP50:%.*]] = fmul [[WIDE_LOAD]], [[WIDE_LOAD4]] +; CHECK-ORDERED-NEXT: [[TMP51:%.*]] = fmul [[WIDE_LOAD1]], [[WIDE_LOAD5]] +; CHECK-ORDERED-NEXT: [[TMP52:%.*]] = fmul [[WIDE_LOAD2]], [[WIDE_LOAD6]] +; CHECK-ORDERED-NEXT: [[TMP53:%.*]] = fmul [[WIDE_LOAD3]], [[WIDE_LOAD7]] +; CHECK-ORDERED-NEXT: [[TMP54:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP50]]) +; CHECK-ORDERED-NEXT: [[TMP55:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP54]], [[TMP51]]) +; CHECK-ORDERED-NEXT: [[TMP56:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP55]], [[TMP52]]) +; CHECK-ORDERED-NEXT: [[TMP57]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP56]], [[TMP53]]) +; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-ORDERED-NEXT: [[TMP58:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-ORDERED-NEXT: br i1 [[TMP58]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-ORDERED: middle.block: @@ -1678,7 +1678,7 @@ ; CHECK-ORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-ORDERED: scalar.ph: ; CHECK-ORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP55]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP57]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-ORDERED: for.body: ; CHECK-ORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -1692,7 +1692,7 @@ ; CHECK-ORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK-ORDERED: for.end: -; CHECK-ORDERED-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP55]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP57]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-NEXT: ret float [[MULADD_LCSSA]] ; ; CHECK-ORDERED-TF-LABEL: define float @fmuladd_strict @@ -1709,34 +1709,36 @@ ; CHECK-ORDERED-TF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8 -; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP6]] +; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 32 ; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 16 -; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP8]] +; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 8 +; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP8]] ; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 24 -; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP10]] +; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 16 +; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP10]] ; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 32 -; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = sub i64 [[N]], [[TMP12]] -; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = icmp ugt i64 [[N]], [[TMP12]] -; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i64 [[TMP13]], i64 0 -; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 32 -; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = sub i64 [[N]], [[TMP17]] -; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = icmp ugt i64 [[N]], [[TMP17]] -; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i64 [[TMP18]], i64 0 -; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 32 -; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = sub i64 [[N]], [[TMP22]] -; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = icmp ugt i64 [[N]], [[TMP22]] -; CHECK-ORDERED-TF-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i64 [[TMP23]], i64 0 -; CHECK-ORDERED-TF-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP27:%.*]] = mul i64 [[TMP26]], 32 -; CHECK-ORDERED-TF-NEXT: [[TMP28:%.*]] = sub i64 [[N]], [[TMP27]] -; CHECK-ORDERED-TF-NEXT: [[TMP29:%.*]] = icmp ugt i64 [[N]], [[TMP27]] -; CHECK-ORDERED-TF-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i64 [[TMP28]], i64 0 +; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 24 +; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP12]] +; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 32 +; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = sub i64 [[N]], [[TMP14]] +; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = icmp ugt i64 [[N]], [[TMP14]] +; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i64 [[TMP15]], i64 0 +; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 32 +; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = sub i64 [[N]], [[TMP19]] +; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = icmp ugt i64 [[N]], [[TMP19]] +; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i64 [[TMP20]], i64 0 +; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 32 +; CHECK-ORDERED-TF-NEXT: [[TMP25:%.*]] = sub i64 [[N]], [[TMP24]] +; CHECK-ORDERED-TF-NEXT: [[TMP26:%.*]] = icmp ugt i64 [[N]], [[TMP24]] +; CHECK-ORDERED-TF-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i64 [[TMP25]], i64 0 +; CHECK-ORDERED-TF-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP29:%.*]] = mul i64 [[TMP28]], 32 +; CHECK-ORDERED-TF-NEXT: [[TMP30:%.*]] = sub i64 [[N]], [[TMP29]] +; CHECK-ORDERED-TF-NEXT: [[TMP31:%.*]] = icmp ugt i64 [[N]], [[TMP29]] +; CHECK-ORDERED-TF-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], i64 [[TMP30]], i64 0 ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[N]]) ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_PART_NEXT]], i64 [[N]]) ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY4:%.*]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_PART_NEXT1]], i64 [[N]]) @@ -1748,87 +1750,85 @@ ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK6:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY3]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT16:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK7:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY4]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT17:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY5]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT18:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP86:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 0 -; CHECK-ORDERED-TF-NEXT: [[TMP32:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP33:%.*]] = mul i64 [[TMP32]], 8 -; CHECK-ORDERED-TF-NEXT: [[TMP34:%.*]] = add i64 [[TMP33]], 0 -; CHECK-ORDERED-TF-NEXT: [[TMP35:%.*]] = mul i64 [[TMP34]], 1 -; CHECK-ORDERED-TF-NEXT: [[TMP36:%.*]] = add i64 [[INDEX]], [[TMP35]] -; CHECK-ORDERED-TF-NEXT: [[TMP37:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP38:%.*]] = mul i64 [[TMP37]], 16 -; CHECK-ORDERED-TF-NEXT: [[TMP39:%.*]] = add i64 [[TMP38]], 0 -; CHECK-ORDERED-TF-NEXT: [[TMP40:%.*]] = mul i64 [[TMP39]], 1 -; CHECK-ORDERED-TF-NEXT: [[TMP41:%.*]] = add i64 [[INDEX]], [[TMP40]] -; CHECK-ORDERED-TF-NEXT: [[TMP42:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP43:%.*]] = mul i64 [[TMP42]], 24 -; CHECK-ORDERED-TF-NEXT: [[TMP44:%.*]] = add i64 [[TMP43]], 0 -; CHECK-ORDERED-TF-NEXT: [[TMP45:%.*]] = mul i64 [[TMP44]], 1 -; CHECK-ORDERED-TF-NEXT: [[TMP46:%.*]] = add i64 [[INDEX]], [[TMP45]] -; CHECK-ORDERED-TF-NEXT: [[TMP47:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP31]] -; CHECK-ORDERED-TF-NEXT: [[TMP48:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP36]] -; CHECK-ORDERED-TF-NEXT: [[TMP49:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP41]] -; CHECK-ORDERED-TF-NEXT: [[TMP50:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP46]] -; CHECK-ORDERED-TF-NEXT: [[TMP51:%.*]] = getelementptr inbounds float, ptr [[TMP47]], i32 0 -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP51]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-ORDERED-TF-NEXT: [[TMP52:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP53:%.*]] = mul i64 [[TMP52]], 8 -; CHECK-ORDERED-TF-NEXT: [[TMP54:%.*]] = getelementptr inbounds float, ptr [[TMP47]], i64 [[TMP53]] -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP54]], i32 4, [[ACTIVE_LANE_MASK6]], poison) -; CHECK-ORDERED-TF-NEXT: [[TMP55:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP56:%.*]] = mul i64 [[TMP55]], 16 -; CHECK-ORDERED-TF-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[TMP47]], i64 [[TMP56]] -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP57]], i32 4, [[ACTIVE_LANE_MASK7]], poison) -; CHECK-ORDERED-TF-NEXT: [[TMP58:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP59:%.*]] = mul i64 [[TMP58]], 24 -; CHECK-ORDERED-TF-NEXT: [[TMP60:%.*]] = getelementptr inbounds float, ptr [[TMP47]], i64 [[TMP59]] -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP60]], i32 4, [[ACTIVE_LANE_MASK8]], poison) -; CHECK-ORDERED-TF-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP31]] -; CHECK-ORDERED-TF-NEXT: [[TMP62:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP36]] -; CHECK-ORDERED-TF-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP41]] -; CHECK-ORDERED-TF-NEXT: [[TMP64:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP46]] -; CHECK-ORDERED-TF-NEXT: [[TMP65:%.*]] = getelementptr inbounds float, ptr [[TMP61]], i32 0 -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD12:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP65]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-ORDERED-TF-NEXT: [[TMP66:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP67:%.*]] = mul i64 [[TMP66]], 8 -; CHECK-ORDERED-TF-NEXT: [[TMP68:%.*]] = getelementptr inbounds float, ptr [[TMP61]], i64 [[TMP67]] -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP68]], i32 4, [[ACTIVE_LANE_MASK6]], poison) -; CHECK-ORDERED-TF-NEXT: [[TMP69:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP70:%.*]] = mul i64 [[TMP69]], 16 -; CHECK-ORDERED-TF-NEXT: [[TMP71:%.*]] = getelementptr inbounds float, ptr [[TMP61]], i64 [[TMP70]] -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD14:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP71]], i32 4, [[ACTIVE_LANE_MASK7]], poison) -; CHECK-ORDERED-TF-NEXT: [[TMP72:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP73:%.*]] = mul i64 [[TMP72]], 24 -; CHECK-ORDERED-TF-NEXT: [[TMP74:%.*]] = getelementptr inbounds float, ptr [[TMP61]], i64 [[TMP73]] -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP74]], i32 4, [[ACTIVE_LANE_MASK8]], poison) -; CHECK-ORDERED-TF-NEXT: [[TMP75:%.*]] = fmul [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD12]] -; CHECK-ORDERED-TF-NEXT: [[TMP76:%.*]] = fmul [[WIDE_MASKED_LOAD9]], [[WIDE_MASKED_LOAD13]] -; CHECK-ORDERED-TF-NEXT: [[TMP77:%.*]] = fmul [[WIDE_MASKED_LOAD10]], [[WIDE_MASKED_LOAD14]] -; CHECK-ORDERED-TF-NEXT: [[TMP78:%.*]] = fmul [[WIDE_MASKED_LOAD11]], [[WIDE_MASKED_LOAD15]] -; CHECK-ORDERED-TF-NEXT: [[TMP79:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP75]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) -; CHECK-ORDERED-TF-NEXT: [[TMP80:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP79]]) -; CHECK-ORDERED-TF-NEXT: [[TMP81:%.*]] = select [[ACTIVE_LANE_MASK6]], [[TMP76]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) -; CHECK-ORDERED-TF-NEXT: [[TMP82:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP80]], [[TMP81]]) -; CHECK-ORDERED-TF-NEXT: [[TMP83:%.*]] = select [[ACTIVE_LANE_MASK7]], [[TMP77]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP88:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[TMP33:%.*]] = add i64 [[INDEX]], 0 +; CHECK-ORDERED-TF-NEXT: [[TMP34:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP35:%.*]] = mul i64 [[TMP34]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP36:%.*]] = add i64 [[TMP35]], 0 +; CHECK-ORDERED-TF-NEXT: [[TMP37:%.*]] = mul i64 [[TMP36]], 1 +; CHECK-ORDERED-TF-NEXT: [[TMP38:%.*]] = add i64 [[INDEX]], [[TMP37]] +; CHECK-ORDERED-TF-NEXT: [[TMP39:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP40:%.*]] = mul i64 [[TMP39]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP41:%.*]] = add i64 [[TMP40]], 0 +; CHECK-ORDERED-TF-NEXT: [[TMP42:%.*]] = mul i64 [[TMP41]], 1 +; CHECK-ORDERED-TF-NEXT: [[TMP43:%.*]] = add i64 [[INDEX]], [[TMP42]] +; CHECK-ORDERED-TF-NEXT: [[TMP44:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP45:%.*]] = mul i64 [[TMP44]], 24 +; CHECK-ORDERED-TF-NEXT: [[TMP46:%.*]] = add i64 [[TMP45]], 0 +; CHECK-ORDERED-TF-NEXT: [[TMP47:%.*]] = mul i64 [[TMP46]], 1 +; CHECK-ORDERED-TF-NEXT: [[TMP48:%.*]] = add i64 [[INDEX]], [[TMP47]] +; CHECK-ORDERED-TF-NEXT: [[TMP49:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP33]] +; CHECK-ORDERED-TF-NEXT: [[TMP50:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP38]] +; CHECK-ORDERED-TF-NEXT: [[TMP51:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP43]] +; CHECK-ORDERED-TF-NEXT: [[TMP52:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP48]] +; CHECK-ORDERED-TF-NEXT: [[TMP53:%.*]] = getelementptr inbounds float, ptr [[TMP49]], i32 0 +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP53]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-ORDERED-TF-NEXT: [[TMP54:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP55:%.*]] = mul i64 [[TMP54]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP56:%.*]] = getelementptr inbounds float, ptr [[TMP49]], i64 [[TMP55]] +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP56]], i32 4, [[ACTIVE_LANE_MASK6]], poison) +; CHECK-ORDERED-TF-NEXT: [[TMP57:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP58:%.*]] = mul i64 [[TMP57]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, ptr [[TMP49]], i64 [[TMP58]] +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP59]], i32 4, [[ACTIVE_LANE_MASK7]], poison) +; CHECK-ORDERED-TF-NEXT: [[TMP60:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP61:%.*]] = mul i64 [[TMP60]], 24 +; CHECK-ORDERED-TF-NEXT: [[TMP62:%.*]] = getelementptr inbounds float, ptr [[TMP49]], i64 [[TMP61]] +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP62]], i32 4, [[ACTIVE_LANE_MASK8]], poison) +; CHECK-ORDERED-TF-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP33]] +; CHECK-ORDERED-TF-NEXT: [[TMP64:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP38]] +; CHECK-ORDERED-TF-NEXT: [[TMP65:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP43]] +; CHECK-ORDERED-TF-NEXT: [[TMP66:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP48]] +; CHECK-ORDERED-TF-NEXT: [[TMP67:%.*]] = getelementptr inbounds float, ptr [[TMP63]], i32 0 +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD12:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP67]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-ORDERED-TF-NEXT: [[TMP68:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP69:%.*]] = mul i64 [[TMP68]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP70:%.*]] = getelementptr inbounds float, ptr [[TMP63]], i64 [[TMP69]] +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP70]], i32 4, [[ACTIVE_LANE_MASK6]], poison) +; CHECK-ORDERED-TF-NEXT: [[TMP71:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP72:%.*]] = mul i64 [[TMP71]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP73:%.*]] = getelementptr inbounds float, ptr [[TMP63]], i64 [[TMP72]] +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD14:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP73]], i32 4, [[ACTIVE_LANE_MASK7]], poison) +; CHECK-ORDERED-TF-NEXT: [[TMP74:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP75:%.*]] = mul i64 [[TMP74]], 24 +; CHECK-ORDERED-TF-NEXT: [[TMP76:%.*]] = getelementptr inbounds float, ptr [[TMP63]], i64 [[TMP75]] +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP76]], i32 4, [[ACTIVE_LANE_MASK8]], poison) +; CHECK-ORDERED-TF-NEXT: [[TMP77:%.*]] = fmul [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD12]] +; CHECK-ORDERED-TF-NEXT: [[TMP78:%.*]] = fmul [[WIDE_MASKED_LOAD9]], [[WIDE_MASKED_LOAD13]] +; CHECK-ORDERED-TF-NEXT: [[TMP79:%.*]] = fmul [[WIDE_MASKED_LOAD10]], [[WIDE_MASKED_LOAD14]] +; CHECK-ORDERED-TF-NEXT: [[TMP80:%.*]] = fmul [[WIDE_MASKED_LOAD11]], [[WIDE_MASKED_LOAD15]] +; CHECK-ORDERED-TF-NEXT: [[TMP81:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP77]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP82:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP81]]) +; CHECK-ORDERED-TF-NEXT: [[TMP83:%.*]] = select [[ACTIVE_LANE_MASK6]], [[TMP78]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) ; CHECK-ORDERED-TF-NEXT: [[TMP84:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP82]], [[TMP83]]) -; CHECK-ORDERED-TF-NEXT: [[TMP85:%.*]] = select [[ACTIVE_LANE_MASK8]], [[TMP78]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) -; CHECK-ORDERED-TF-NEXT: [[TMP86]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP84]], [[TMP85]]) -; CHECK-ORDERED-TF-NEXT: [[TMP87:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP88:%.*]] = mul i64 [[TMP87]], 8 -; CHECK-ORDERED-TF-NEXT: [[TMP89:%.*]] = add i64 [[INDEX]], [[TMP88]] -; CHECK-ORDERED-TF-NEXT: [[TMP90:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP91:%.*]] = mul i64 [[TMP90]], 16 -; CHECK-ORDERED-TF-NEXT: [[TMP92:%.*]] = add i64 [[INDEX]], [[TMP91]] -; CHECK-ORDERED-TF-NEXT: [[TMP93:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP94:%.*]] = mul i64 [[TMP93]], 24 -; CHECK-ORDERED-TF-NEXT: [[TMP95:%.*]] = add i64 [[INDEX]], [[TMP94]] -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP15]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT16]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP89]], i64 [[TMP20]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT17]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP92]], i64 [[TMP25]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT18]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP95]], i64 [[TMP30]]) -; CHECK-ORDERED-TF-NEXT: [[TMP96:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP97:%.*]] = mul i64 [[TMP96]], 32 -; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP97]] +; CHECK-ORDERED-TF-NEXT: [[TMP85:%.*]] = select [[ACTIVE_LANE_MASK7]], [[TMP79]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP86:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP84]], [[TMP85]]) +; CHECK-ORDERED-TF-NEXT: [[TMP87:%.*]] = select [[ACTIVE_LANE_MASK8]], [[TMP80]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP88]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP86]], [[TMP87]]) +; CHECK-ORDERED-TF-NEXT: [[TMP89:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP90:%.*]] = mul i64 [[TMP89]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP91:%.*]] = add i64 [[INDEX]], [[TMP90]] +; CHECK-ORDERED-TF-NEXT: [[TMP92:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP93:%.*]] = mul i64 [[TMP92]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP94:%.*]] = add i64 [[INDEX]], [[TMP93]] +; CHECK-ORDERED-TF-NEXT: [[TMP95:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP96:%.*]] = mul i64 [[TMP95]], 24 +; CHECK-ORDERED-TF-NEXT: [[TMP97:%.*]] = add i64 [[INDEX]], [[TMP96]] +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP17]]) +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT16]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP91]], i64 [[TMP22]]) +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT17]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP94]], i64 [[TMP27]]) +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT18]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP97]], i64 [[TMP32]]) +; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; CHECK-ORDERED-TF-NEXT: [[TMP98:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-ORDERED-TF-NEXT: [[TMP99:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT16]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-ORDERED-TF-NEXT: [[TMP100:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT17]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) @@ -1839,7 +1839,7 @@ ; CHECK-ORDERED-TF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-ORDERED-TF: scalar.ph: ; CHECK-ORDERED-TF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP86]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP88]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-TF-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-ORDERED-TF: for.body: ; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -1853,7 +1853,7 @@ ; CHECK-ORDERED-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-TF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK-ORDERED-TF: for.end: -; CHECK-ORDERED-TF-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP86]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-TF-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP88]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-TF-NEXT: ret float [[MULADD_LCSSA]] ; @@ -1912,78 +1912,78 @@ ; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 32 ; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 32 ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UNORDERED: vector.body: ; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP48:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi [ shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer), [[VECTOR_PH]] ], [ [[TMP49:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi [ shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer), [[VECTOR_PH]] ], [ [[TMP50:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi [ shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer), [[VECTOR_PH]] ], [ [[TMP51:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8 -; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 0 -; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 1 -; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]] -; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 16 -; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], 0 -; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 1 -; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], [[TMP13]] -; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 24 -; CHECK-UNORDERED-NEXT: [[TMP17:%.*]] = add i64 [[TMP16]], 0 -; CHECK-UNORDERED-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 1 -; CHECK-UNORDERED-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], [[TMP18]] -; CHECK-UNORDERED-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP4]] -; CHECK-UNORDERED-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP9]] -; CHECK-UNORDERED-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP14]] -; CHECK-UNORDERED-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP19]] -; CHECK-UNORDERED-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 0 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP24]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 8 -; CHECK-UNORDERED-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP26]] -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP27]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP29:%.*]] = mul i64 [[TMP28]], 16 -; CHECK-UNORDERED-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP29]] -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP30]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP31:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP32:%.*]] = mul i64 [[TMP31]], 24 -; CHECK-UNORDERED-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP32]] -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP33]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP4]] -; CHECK-UNORDERED-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP9]] -; CHECK-UNORDERED-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP14]] -; CHECK-UNORDERED-NEXT: [[TMP37:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP19]] -; CHECK-UNORDERED-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, ptr [[TMP34]], i32 0 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD7:%.*]] = load , ptr [[TMP38]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP39:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP40:%.*]] = mul i64 [[TMP39]], 8 -; CHECK-UNORDERED-NEXT: [[TMP41:%.*]] = getelementptr inbounds float, ptr [[TMP34]], i64 [[TMP40]] -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD8:%.*]] = load , ptr [[TMP41]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP42:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP43:%.*]] = mul i64 [[TMP42]], 16 -; CHECK-UNORDERED-NEXT: [[TMP44:%.*]] = getelementptr inbounds float, ptr [[TMP34]], i64 [[TMP43]] -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD9:%.*]] = load , ptr [[TMP44]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP45:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP46:%.*]] = mul i64 [[TMP45]], 24 -; CHECK-UNORDERED-NEXT: [[TMP47:%.*]] = getelementptr inbounds float, ptr [[TMP34]], i64 [[TMP46]] -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD10:%.*]] = load , ptr [[TMP47]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP48]] = call nnan @llvm.fmuladd.nxv8f32( [[WIDE_LOAD]], [[WIDE_LOAD7]], [[VEC_PHI]]) -; CHECK-UNORDERED-NEXT: [[TMP49]] = call nnan @llvm.fmuladd.nxv8f32( [[WIDE_LOAD4]], [[WIDE_LOAD8]], [[VEC_PHI1]]) -; CHECK-UNORDERED-NEXT: [[TMP50]] = call nnan @llvm.fmuladd.nxv8f32( [[WIDE_LOAD5]], [[WIDE_LOAD9]], [[VEC_PHI2]]) -; CHECK-UNORDERED-NEXT: [[TMP51]] = call nnan @llvm.fmuladd.nxv8f32( [[WIDE_LOAD6]], [[WIDE_LOAD10]], [[VEC_PHI3]]) -; CHECK-UNORDERED-NEXT: [[TMP52:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP53:%.*]] = mul i64 [[TMP52]], 32 -; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP53]] +; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP50:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi [ shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer), [[VECTOR_PH]] ], [ [[TMP51:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi [ shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer), [[VECTOR_PH]] ], [ [[TMP52:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi [ shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer), [[VECTOR_PH]] ], [ [[TMP53:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 8 +; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 0 +; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 1 +; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], [[TMP10]] +; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 16 +; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = add i64 [[TMP13]], 0 +; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 1 +; CHECK-UNORDERED-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], [[TMP15]] +; CHECK-UNORDERED-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 24 +; CHECK-UNORDERED-NEXT: [[TMP19:%.*]] = add i64 [[TMP18]], 0 +; CHECK-UNORDERED-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 1 +; CHECK-UNORDERED-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], [[TMP20]] +; CHECK-UNORDERED-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]] +; CHECK-UNORDERED-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP11]] +; CHECK-UNORDERED-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP16]] +; CHECK-UNORDERED-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP21]] +; CHECK-UNORDERED-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i32 0 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP26]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP28:%.*]] = mul i64 [[TMP27]], 8 +; CHECK-UNORDERED-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[TMP28]] +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP29]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP30:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP31:%.*]] = mul i64 [[TMP30]], 16 +; CHECK-UNORDERED-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[TMP31]] +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP32]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP34:%.*]] = mul i64 [[TMP33]], 24 +; CHECK-UNORDERED-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[TMP34]] +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP35]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP6]] +; CHECK-UNORDERED-NEXT: [[TMP37:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP11]] +; CHECK-UNORDERED-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP16]] +; CHECK-UNORDERED-NEXT: [[TMP39:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP21]] +; CHECK-UNORDERED-NEXT: [[TMP40:%.*]] = getelementptr inbounds float, ptr [[TMP36]], i32 0 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD7:%.*]] = load , ptr [[TMP40]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP41:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP42:%.*]] = mul i64 [[TMP41]], 8 +; CHECK-UNORDERED-NEXT: [[TMP43:%.*]] = getelementptr inbounds float, ptr [[TMP36]], i64 [[TMP42]] +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD8:%.*]] = load , ptr [[TMP43]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP44:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP45:%.*]] = mul i64 [[TMP44]], 16 +; CHECK-UNORDERED-NEXT: [[TMP46:%.*]] = getelementptr inbounds float, ptr [[TMP36]], i64 [[TMP45]] +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD9:%.*]] = load , ptr [[TMP46]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP47:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP48:%.*]] = mul i64 [[TMP47]], 24 +; CHECK-UNORDERED-NEXT: [[TMP49:%.*]] = getelementptr inbounds float, ptr [[TMP36]], i64 [[TMP48]] +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD10:%.*]] = load , ptr [[TMP49]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP50]] = call nnan @llvm.fmuladd.nxv8f32( [[WIDE_LOAD]], [[WIDE_LOAD7]], [[VEC_PHI]]) +; CHECK-UNORDERED-NEXT: [[TMP51]] = call nnan @llvm.fmuladd.nxv8f32( [[WIDE_LOAD4]], [[WIDE_LOAD8]], [[VEC_PHI1]]) +; CHECK-UNORDERED-NEXT: [[TMP52]] = call nnan @llvm.fmuladd.nxv8f32( [[WIDE_LOAD5]], [[WIDE_LOAD9]], [[VEC_PHI2]]) +; CHECK-UNORDERED-NEXT: [[TMP53]] = call nnan @llvm.fmuladd.nxv8f32( [[WIDE_LOAD6]], [[WIDE_LOAD10]], [[VEC_PHI3]]) +; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-UNORDERED-NEXT: [[TMP54:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[TMP54]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-UNORDERED: middle.block: -; CHECK-UNORDERED-NEXT: [[BIN_RDX:%.*]] = fadd nnan [[TMP49]], [[TMP48]] -; CHECK-UNORDERED-NEXT: [[BIN_RDX11:%.*]] = fadd nnan [[TMP50]], [[BIN_RDX]] -; CHECK-UNORDERED-NEXT: [[BIN_RDX12:%.*]] = fadd nnan [[TMP51]], [[BIN_RDX11]] +; CHECK-UNORDERED-NEXT: [[BIN_RDX:%.*]] = fadd nnan [[TMP51]], [[TMP50]] +; CHECK-UNORDERED-NEXT: [[BIN_RDX11:%.*]] = fadd nnan [[TMP52]], [[BIN_RDX]] +; CHECK-UNORDERED-NEXT: [[BIN_RDX12:%.*]] = fadd nnan [[TMP53]], [[BIN_RDX11]] ; CHECK-UNORDERED-NEXT: [[TMP55:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, [[BIN_RDX12]]) ; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -2018,73 +2018,73 @@ ; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 32 ; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 32 ; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED: vector.body: ; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP55:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8 -; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 0 -; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 1 -; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]] -; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 16 -; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], 0 -; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 1 -; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], [[TMP13]] -; CHECK-ORDERED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 24 -; CHECK-ORDERED-NEXT: [[TMP17:%.*]] = add i64 [[TMP16]], 0 -; CHECK-ORDERED-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 1 -; CHECK-ORDERED-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], [[TMP18]] -; CHECK-ORDERED-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP4]] -; CHECK-ORDERED-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP9]] -; CHECK-ORDERED-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP14]] -; CHECK-ORDERED-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP19]] -; CHECK-ORDERED-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 0 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP24]], align 4 -; CHECK-ORDERED-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 8 -; CHECK-ORDERED-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP26]] -; CHECK-ORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP27]], align 4 -; CHECK-ORDERED-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP29:%.*]] = mul i64 [[TMP28]], 16 -; CHECK-ORDERED-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP29]] -; CHECK-ORDERED-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP30]], align 4 -; CHECK-ORDERED-NEXT: [[TMP31:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP32:%.*]] = mul i64 [[TMP31]], 24 -; CHECK-ORDERED-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP32]] -; CHECK-ORDERED-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP33]], align 4 -; CHECK-ORDERED-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP4]] -; CHECK-ORDERED-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP9]] -; CHECK-ORDERED-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP14]] -; CHECK-ORDERED-NEXT: [[TMP37:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP19]] -; CHECK-ORDERED-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, ptr [[TMP34]], i32 0 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP38]], align 4 -; CHECK-ORDERED-NEXT: [[TMP39:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP40:%.*]] = mul i64 [[TMP39]], 8 -; CHECK-ORDERED-NEXT: [[TMP41:%.*]] = getelementptr inbounds float, ptr [[TMP34]], i64 [[TMP40]] -; CHECK-ORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP41]], align 4 -; CHECK-ORDERED-NEXT: [[TMP42:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP43:%.*]] = mul i64 [[TMP42]], 16 -; CHECK-ORDERED-NEXT: [[TMP44:%.*]] = getelementptr inbounds float, ptr [[TMP34]], i64 [[TMP43]] -; CHECK-ORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP44]], align 4 -; CHECK-ORDERED-NEXT: [[TMP45:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP46:%.*]] = mul i64 [[TMP45]], 24 -; CHECK-ORDERED-NEXT: [[TMP47:%.*]] = getelementptr inbounds float, ptr [[TMP34]], i64 [[TMP46]] -; CHECK-ORDERED-NEXT: [[WIDE_LOAD7:%.*]] = load , ptr [[TMP47]], align 4 -; CHECK-ORDERED-NEXT: [[TMP48:%.*]] = fmul nnan [[WIDE_LOAD]], [[WIDE_LOAD4]] -; CHECK-ORDERED-NEXT: [[TMP49:%.*]] = fmul nnan [[WIDE_LOAD1]], [[WIDE_LOAD5]] -; CHECK-ORDERED-NEXT: [[TMP50:%.*]] = fmul nnan [[WIDE_LOAD2]], [[WIDE_LOAD6]] -; CHECK-ORDERED-NEXT: [[TMP51:%.*]] = fmul nnan [[WIDE_LOAD3]], [[WIDE_LOAD7]] -; CHECK-ORDERED-NEXT: [[TMP52:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP48]]) -; CHECK-ORDERED-NEXT: [[TMP53:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP52]], [[TMP49]]) -; CHECK-ORDERED-NEXT: [[TMP54:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP53]], [[TMP50]]) -; CHECK-ORDERED-NEXT: [[TMP55]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP54]], [[TMP51]]) -; CHECK-ORDERED-NEXT: [[TMP56:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP57:%.*]] = mul i64 [[TMP56]], 32 -; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP57]] +; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP57:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 8 +; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 0 +; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 1 +; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], [[TMP10]] +; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 16 +; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = add i64 [[TMP13]], 0 +; CHECK-ORDERED-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 1 +; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], [[TMP15]] +; CHECK-ORDERED-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 24 +; CHECK-ORDERED-NEXT: [[TMP19:%.*]] = add i64 [[TMP18]], 0 +; CHECK-ORDERED-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 1 +; CHECK-ORDERED-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], [[TMP20]] +; CHECK-ORDERED-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]] +; CHECK-ORDERED-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP11]] +; CHECK-ORDERED-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP16]] +; CHECK-ORDERED-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP21]] +; CHECK-ORDERED-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i32 0 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP26]], align 4 +; CHECK-ORDERED-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP28:%.*]] = mul i64 [[TMP27]], 8 +; CHECK-ORDERED-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[TMP28]] +; CHECK-ORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP29]], align 4 +; CHECK-ORDERED-NEXT: [[TMP30:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP31:%.*]] = mul i64 [[TMP30]], 16 +; CHECK-ORDERED-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[TMP31]] +; CHECK-ORDERED-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP32]], align 4 +; CHECK-ORDERED-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP34:%.*]] = mul i64 [[TMP33]], 24 +; CHECK-ORDERED-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[TMP34]] +; CHECK-ORDERED-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP35]], align 4 +; CHECK-ORDERED-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP6]] +; CHECK-ORDERED-NEXT: [[TMP37:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP11]] +; CHECK-ORDERED-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP16]] +; CHECK-ORDERED-NEXT: [[TMP39:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP21]] +; CHECK-ORDERED-NEXT: [[TMP40:%.*]] = getelementptr inbounds float, ptr [[TMP36]], i32 0 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP40]], align 4 +; CHECK-ORDERED-NEXT: [[TMP41:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP42:%.*]] = mul i64 [[TMP41]], 8 +; CHECK-ORDERED-NEXT: [[TMP43:%.*]] = getelementptr inbounds float, ptr [[TMP36]], i64 [[TMP42]] +; CHECK-ORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP43]], align 4 +; CHECK-ORDERED-NEXT: [[TMP44:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP45:%.*]] = mul i64 [[TMP44]], 16 +; CHECK-ORDERED-NEXT: [[TMP46:%.*]] = getelementptr inbounds float, ptr [[TMP36]], i64 [[TMP45]] +; CHECK-ORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP46]], align 4 +; CHECK-ORDERED-NEXT: [[TMP47:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP48:%.*]] = mul i64 [[TMP47]], 24 +; CHECK-ORDERED-NEXT: [[TMP49:%.*]] = getelementptr inbounds float, ptr [[TMP36]], i64 [[TMP48]] +; CHECK-ORDERED-NEXT: [[WIDE_LOAD7:%.*]] = load , ptr [[TMP49]], align 4 +; CHECK-ORDERED-NEXT: [[TMP50:%.*]] = fmul nnan [[WIDE_LOAD]], [[WIDE_LOAD4]] +; CHECK-ORDERED-NEXT: [[TMP51:%.*]] = fmul nnan [[WIDE_LOAD1]], [[WIDE_LOAD5]] +; CHECK-ORDERED-NEXT: [[TMP52:%.*]] = fmul nnan [[WIDE_LOAD2]], [[WIDE_LOAD6]] +; CHECK-ORDERED-NEXT: [[TMP53:%.*]] = fmul nnan [[WIDE_LOAD3]], [[WIDE_LOAD7]] +; CHECK-ORDERED-NEXT: [[TMP54:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP50]]) +; CHECK-ORDERED-NEXT: [[TMP55:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP54]], [[TMP51]]) +; CHECK-ORDERED-NEXT: [[TMP56:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP55]], [[TMP52]]) +; CHECK-ORDERED-NEXT: [[TMP57]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP56]], [[TMP53]]) +; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-ORDERED-NEXT: [[TMP58:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-ORDERED-NEXT: br i1 [[TMP58]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK-ORDERED: middle.block: @@ -2092,7 +2092,7 @@ ; CHECK-ORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-ORDERED: scalar.ph: ; CHECK-ORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP55]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP57]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-ORDERED: for.body: ; CHECK-ORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -2106,7 +2106,7 @@ ; CHECK-ORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK-ORDERED: for.end: -; CHECK-ORDERED-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP55]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP57]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-NEXT: ret float [[MULADD_LCSSA]] ; ; CHECK-ORDERED-TF-LABEL: define float @fmuladd_strict_fmf @@ -2123,34 +2123,36 @@ ; CHECK-ORDERED-TF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8 -; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP6]] +; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 32 ; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 16 -; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP8]] +; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 8 +; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP8]] ; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 24 -; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP10]] +; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 16 +; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP10]] ; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 32 -; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = sub i64 [[N]], [[TMP12]] -; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = icmp ugt i64 [[N]], [[TMP12]] -; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i64 [[TMP13]], i64 0 -; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 32 -; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = sub i64 [[N]], [[TMP17]] -; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = icmp ugt i64 [[N]], [[TMP17]] -; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i64 [[TMP18]], i64 0 -; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 32 -; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = sub i64 [[N]], [[TMP22]] -; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = icmp ugt i64 [[N]], [[TMP22]] -; CHECK-ORDERED-TF-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i64 [[TMP23]], i64 0 -; CHECK-ORDERED-TF-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP27:%.*]] = mul i64 [[TMP26]], 32 -; CHECK-ORDERED-TF-NEXT: [[TMP28:%.*]] = sub i64 [[N]], [[TMP27]] -; CHECK-ORDERED-TF-NEXT: [[TMP29:%.*]] = icmp ugt i64 [[N]], [[TMP27]] -; CHECK-ORDERED-TF-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i64 [[TMP28]], i64 0 +; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 24 +; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP12]] +; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 32 +; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = sub i64 [[N]], [[TMP14]] +; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = icmp ugt i64 [[N]], [[TMP14]] +; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i64 [[TMP15]], i64 0 +; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 32 +; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = sub i64 [[N]], [[TMP19]] +; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = icmp ugt i64 [[N]], [[TMP19]] +; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i64 [[TMP20]], i64 0 +; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 32 +; CHECK-ORDERED-TF-NEXT: [[TMP25:%.*]] = sub i64 [[N]], [[TMP24]] +; CHECK-ORDERED-TF-NEXT: [[TMP26:%.*]] = icmp ugt i64 [[N]], [[TMP24]] +; CHECK-ORDERED-TF-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i64 [[TMP25]], i64 0 +; CHECK-ORDERED-TF-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP29:%.*]] = mul i64 [[TMP28]], 32 +; CHECK-ORDERED-TF-NEXT: [[TMP30:%.*]] = sub i64 [[N]], [[TMP29]] +; CHECK-ORDERED-TF-NEXT: [[TMP31:%.*]] = icmp ugt i64 [[N]], [[TMP29]] +; CHECK-ORDERED-TF-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], i64 [[TMP30]], i64 0 ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[N]]) ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_PART_NEXT]], i64 [[N]]) ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY4:%.*]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_PART_NEXT1]], i64 [[N]]) @@ -2162,87 +2164,85 @@ ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK6:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY3]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT16:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK7:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY4]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT17:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY5]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT18:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP86:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 0 -; CHECK-ORDERED-TF-NEXT: [[TMP32:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP33:%.*]] = mul i64 [[TMP32]], 8 -; CHECK-ORDERED-TF-NEXT: [[TMP34:%.*]] = add i64 [[TMP33]], 0 -; CHECK-ORDERED-TF-NEXT: [[TMP35:%.*]] = mul i64 [[TMP34]], 1 -; CHECK-ORDERED-TF-NEXT: [[TMP36:%.*]] = add i64 [[INDEX]], [[TMP35]] -; CHECK-ORDERED-TF-NEXT: [[TMP37:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP38:%.*]] = mul i64 [[TMP37]], 16 -; CHECK-ORDERED-TF-NEXT: [[TMP39:%.*]] = add i64 [[TMP38]], 0 -; CHECK-ORDERED-TF-NEXT: [[TMP40:%.*]] = mul i64 [[TMP39]], 1 -; CHECK-ORDERED-TF-NEXT: [[TMP41:%.*]] = add i64 [[INDEX]], [[TMP40]] -; CHECK-ORDERED-TF-NEXT: [[TMP42:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP43:%.*]] = mul i64 [[TMP42]], 24 -; CHECK-ORDERED-TF-NEXT: [[TMP44:%.*]] = add i64 [[TMP43]], 0 -; CHECK-ORDERED-TF-NEXT: [[TMP45:%.*]] = mul i64 [[TMP44]], 1 -; CHECK-ORDERED-TF-NEXT: [[TMP46:%.*]] = add i64 [[INDEX]], [[TMP45]] -; CHECK-ORDERED-TF-NEXT: [[TMP47:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP31]] -; CHECK-ORDERED-TF-NEXT: [[TMP48:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP36]] -; CHECK-ORDERED-TF-NEXT: [[TMP49:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP41]] -; CHECK-ORDERED-TF-NEXT: [[TMP50:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP46]] -; CHECK-ORDERED-TF-NEXT: [[TMP51:%.*]] = getelementptr inbounds float, ptr [[TMP47]], i32 0 -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP51]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-ORDERED-TF-NEXT: [[TMP52:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP53:%.*]] = mul i64 [[TMP52]], 8 -; CHECK-ORDERED-TF-NEXT: [[TMP54:%.*]] = getelementptr inbounds float, ptr [[TMP47]], i64 [[TMP53]] -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP54]], i32 4, [[ACTIVE_LANE_MASK6]], poison) -; CHECK-ORDERED-TF-NEXT: [[TMP55:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP56:%.*]] = mul i64 [[TMP55]], 16 -; CHECK-ORDERED-TF-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[TMP47]], i64 [[TMP56]] -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP57]], i32 4, [[ACTIVE_LANE_MASK7]], poison) -; CHECK-ORDERED-TF-NEXT: [[TMP58:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP59:%.*]] = mul i64 [[TMP58]], 24 -; CHECK-ORDERED-TF-NEXT: [[TMP60:%.*]] = getelementptr inbounds float, ptr [[TMP47]], i64 [[TMP59]] -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP60]], i32 4, [[ACTIVE_LANE_MASK8]], poison) -; CHECK-ORDERED-TF-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP31]] -; CHECK-ORDERED-TF-NEXT: [[TMP62:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP36]] -; CHECK-ORDERED-TF-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP41]] -; CHECK-ORDERED-TF-NEXT: [[TMP64:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP46]] -; CHECK-ORDERED-TF-NEXT: [[TMP65:%.*]] = getelementptr inbounds float, ptr [[TMP61]], i32 0 -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD12:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP65]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-ORDERED-TF-NEXT: [[TMP66:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP67:%.*]] = mul i64 [[TMP66]], 8 -; CHECK-ORDERED-TF-NEXT: [[TMP68:%.*]] = getelementptr inbounds float, ptr [[TMP61]], i64 [[TMP67]] -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP68]], i32 4, [[ACTIVE_LANE_MASK6]], poison) -; CHECK-ORDERED-TF-NEXT: [[TMP69:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP70:%.*]] = mul i64 [[TMP69]], 16 -; CHECK-ORDERED-TF-NEXT: [[TMP71:%.*]] = getelementptr inbounds float, ptr [[TMP61]], i64 [[TMP70]] -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD14:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP71]], i32 4, [[ACTIVE_LANE_MASK7]], poison) -; CHECK-ORDERED-TF-NEXT: [[TMP72:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP73:%.*]] = mul i64 [[TMP72]], 24 -; CHECK-ORDERED-TF-NEXT: [[TMP74:%.*]] = getelementptr inbounds float, ptr [[TMP61]], i64 [[TMP73]] -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP74]], i32 4, [[ACTIVE_LANE_MASK8]], poison) -; CHECK-ORDERED-TF-NEXT: [[TMP75:%.*]] = fmul nnan [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD12]] -; CHECK-ORDERED-TF-NEXT: [[TMP76:%.*]] = fmul nnan [[WIDE_MASKED_LOAD9]], [[WIDE_MASKED_LOAD13]] -; CHECK-ORDERED-TF-NEXT: [[TMP77:%.*]] = fmul nnan [[WIDE_MASKED_LOAD10]], [[WIDE_MASKED_LOAD14]] -; CHECK-ORDERED-TF-NEXT: [[TMP78:%.*]] = fmul nnan [[WIDE_MASKED_LOAD11]], [[WIDE_MASKED_LOAD15]] -; CHECK-ORDERED-TF-NEXT: [[TMP79:%.*]] = select nnan [[ACTIVE_LANE_MASK]], [[TMP75]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) -; CHECK-ORDERED-TF-NEXT: [[TMP80:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP79]]) -; CHECK-ORDERED-TF-NEXT: [[TMP81:%.*]] = select nnan [[ACTIVE_LANE_MASK6]], [[TMP76]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) -; CHECK-ORDERED-TF-NEXT: [[TMP82:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP80]], [[TMP81]]) -; CHECK-ORDERED-TF-NEXT: [[TMP83:%.*]] = select nnan [[ACTIVE_LANE_MASK7]], [[TMP77]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP88:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[TMP33:%.*]] = add i64 [[INDEX]], 0 +; CHECK-ORDERED-TF-NEXT: [[TMP34:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP35:%.*]] = mul i64 [[TMP34]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP36:%.*]] = add i64 [[TMP35]], 0 +; CHECK-ORDERED-TF-NEXT: [[TMP37:%.*]] = mul i64 [[TMP36]], 1 +; CHECK-ORDERED-TF-NEXT: [[TMP38:%.*]] = add i64 [[INDEX]], [[TMP37]] +; CHECK-ORDERED-TF-NEXT: [[TMP39:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP40:%.*]] = mul i64 [[TMP39]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP41:%.*]] = add i64 [[TMP40]], 0 +; CHECK-ORDERED-TF-NEXT: [[TMP42:%.*]] = mul i64 [[TMP41]], 1 +; CHECK-ORDERED-TF-NEXT: [[TMP43:%.*]] = add i64 [[INDEX]], [[TMP42]] +; CHECK-ORDERED-TF-NEXT: [[TMP44:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP45:%.*]] = mul i64 [[TMP44]], 24 +; CHECK-ORDERED-TF-NEXT: [[TMP46:%.*]] = add i64 [[TMP45]], 0 +; CHECK-ORDERED-TF-NEXT: [[TMP47:%.*]] = mul i64 [[TMP46]], 1 +; CHECK-ORDERED-TF-NEXT: [[TMP48:%.*]] = add i64 [[INDEX]], [[TMP47]] +; CHECK-ORDERED-TF-NEXT: [[TMP49:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP33]] +; CHECK-ORDERED-TF-NEXT: [[TMP50:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP38]] +; CHECK-ORDERED-TF-NEXT: [[TMP51:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP43]] +; CHECK-ORDERED-TF-NEXT: [[TMP52:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP48]] +; CHECK-ORDERED-TF-NEXT: [[TMP53:%.*]] = getelementptr inbounds float, ptr [[TMP49]], i32 0 +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP53]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-ORDERED-TF-NEXT: [[TMP54:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP55:%.*]] = mul i64 [[TMP54]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP56:%.*]] = getelementptr inbounds float, ptr [[TMP49]], i64 [[TMP55]] +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP56]], i32 4, [[ACTIVE_LANE_MASK6]], poison) +; CHECK-ORDERED-TF-NEXT: [[TMP57:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP58:%.*]] = mul i64 [[TMP57]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, ptr [[TMP49]], i64 [[TMP58]] +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP59]], i32 4, [[ACTIVE_LANE_MASK7]], poison) +; CHECK-ORDERED-TF-NEXT: [[TMP60:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP61:%.*]] = mul i64 [[TMP60]], 24 +; CHECK-ORDERED-TF-NEXT: [[TMP62:%.*]] = getelementptr inbounds float, ptr [[TMP49]], i64 [[TMP61]] +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP62]], i32 4, [[ACTIVE_LANE_MASK8]], poison) +; CHECK-ORDERED-TF-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP33]] +; CHECK-ORDERED-TF-NEXT: [[TMP64:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP38]] +; CHECK-ORDERED-TF-NEXT: [[TMP65:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP43]] +; CHECK-ORDERED-TF-NEXT: [[TMP66:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP48]] +; CHECK-ORDERED-TF-NEXT: [[TMP67:%.*]] = getelementptr inbounds float, ptr [[TMP63]], i32 0 +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD12:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP67]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-ORDERED-TF-NEXT: [[TMP68:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP69:%.*]] = mul i64 [[TMP68]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP70:%.*]] = getelementptr inbounds float, ptr [[TMP63]], i64 [[TMP69]] +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP70]], i32 4, [[ACTIVE_LANE_MASK6]], poison) +; CHECK-ORDERED-TF-NEXT: [[TMP71:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP72:%.*]] = mul i64 [[TMP71]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP73:%.*]] = getelementptr inbounds float, ptr [[TMP63]], i64 [[TMP72]] +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD14:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP73]], i32 4, [[ACTIVE_LANE_MASK7]], poison) +; CHECK-ORDERED-TF-NEXT: [[TMP74:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP75:%.*]] = mul i64 [[TMP74]], 24 +; CHECK-ORDERED-TF-NEXT: [[TMP76:%.*]] = getelementptr inbounds float, ptr [[TMP63]], i64 [[TMP75]] +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP76]], i32 4, [[ACTIVE_LANE_MASK8]], poison) +; CHECK-ORDERED-TF-NEXT: [[TMP77:%.*]] = fmul nnan [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD12]] +; CHECK-ORDERED-TF-NEXT: [[TMP78:%.*]] = fmul nnan [[WIDE_MASKED_LOAD9]], [[WIDE_MASKED_LOAD13]] +; CHECK-ORDERED-TF-NEXT: [[TMP79:%.*]] = fmul nnan [[WIDE_MASKED_LOAD10]], [[WIDE_MASKED_LOAD14]] +; CHECK-ORDERED-TF-NEXT: [[TMP80:%.*]] = fmul nnan [[WIDE_MASKED_LOAD11]], [[WIDE_MASKED_LOAD15]] +; CHECK-ORDERED-TF-NEXT: [[TMP81:%.*]] = select nnan [[ACTIVE_LANE_MASK]], [[TMP77]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP82:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP81]]) +; CHECK-ORDERED-TF-NEXT: [[TMP83:%.*]] = select nnan [[ACTIVE_LANE_MASK6]], [[TMP78]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) ; CHECK-ORDERED-TF-NEXT: [[TMP84:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP82]], [[TMP83]]) -; CHECK-ORDERED-TF-NEXT: [[TMP85:%.*]] = select nnan [[ACTIVE_LANE_MASK8]], [[TMP78]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) -; CHECK-ORDERED-TF-NEXT: [[TMP86]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP84]], [[TMP85]]) -; CHECK-ORDERED-TF-NEXT: [[TMP87:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP88:%.*]] = mul i64 [[TMP87]], 8 -; CHECK-ORDERED-TF-NEXT: [[TMP89:%.*]] = add i64 [[INDEX]], [[TMP88]] -; CHECK-ORDERED-TF-NEXT: [[TMP90:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP91:%.*]] = mul i64 [[TMP90]], 16 -; CHECK-ORDERED-TF-NEXT: [[TMP92:%.*]] = add i64 [[INDEX]], [[TMP91]] -; CHECK-ORDERED-TF-NEXT: [[TMP93:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP94:%.*]] = mul i64 [[TMP93]], 24 -; CHECK-ORDERED-TF-NEXT: [[TMP95:%.*]] = add i64 [[INDEX]], [[TMP94]] -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP15]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT16]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP89]], i64 [[TMP20]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT17]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP92]], i64 [[TMP25]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT18]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP95]], i64 [[TMP30]]) -; CHECK-ORDERED-TF-NEXT: [[TMP96:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP97:%.*]] = mul i64 [[TMP96]], 32 -; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP97]] +; CHECK-ORDERED-TF-NEXT: [[TMP85:%.*]] = select nnan [[ACTIVE_LANE_MASK7]], [[TMP79]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP86:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP84]], [[TMP85]]) +; CHECK-ORDERED-TF-NEXT: [[TMP87:%.*]] = select nnan [[ACTIVE_LANE_MASK8]], [[TMP80]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP88]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP86]], [[TMP87]]) +; CHECK-ORDERED-TF-NEXT: [[TMP89:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP90:%.*]] = mul i64 [[TMP89]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP91:%.*]] = add i64 [[INDEX]], [[TMP90]] +; CHECK-ORDERED-TF-NEXT: [[TMP92:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP93:%.*]] = mul i64 [[TMP92]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP94:%.*]] = add i64 [[INDEX]], [[TMP93]] +; CHECK-ORDERED-TF-NEXT: [[TMP95:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP96:%.*]] = mul i64 [[TMP95]], 24 +; CHECK-ORDERED-TF-NEXT: [[TMP97:%.*]] = add i64 [[INDEX]], [[TMP96]] +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP17]]) +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT16]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP91]], i64 [[TMP22]]) +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT17]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP94]], i64 [[TMP27]]) +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT18]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP97]], i64 [[TMP32]]) +; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; CHECK-ORDERED-TF-NEXT: [[TMP98:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-ORDERED-TF-NEXT: [[TMP99:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT16]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-ORDERED-TF-NEXT: [[TMP100:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT17]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) @@ -2253,7 +2253,7 @@ ; CHECK-ORDERED-TF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-ORDERED-TF: scalar.ph: ; CHECK-ORDERED-TF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP86]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP88]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-TF-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-ORDERED-TF: for.body: ; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -2267,7 +2267,7 @@ ; CHECK-ORDERED-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-TF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK-ORDERED-TF: for.end: -; CHECK-ORDERED-TF-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP86]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-TF-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP88]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-TF-NEXT: ret float [[MULADD_LCSSA]] ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll @@ -13,21 +13,21 @@ ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[DOTNEG]], [[N]] +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[INV:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[COND:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP3]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = icmp ne [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i16.nxv4p0( [[BROADCAST_SPLAT]], i32 2, [[TMP4]], poison) -; CHECK-NEXT: [[TMP5:%.*]] = sext [[WIDE_MASKED_GATHER]] to -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP5]], ptr [[TMP6]], i32 4, [[TMP4]]) -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[COND:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP5]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i16.nxv4p0( [[BROADCAST_SPLAT]], i32 2, [[TMP6]], poison) +; CHECK-NEXT: [[TMP7:%.*]] = sext [[WIDE_MASKED_GATHER]] to +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP7]], ptr [[TMP8]], i32 4, [[TMP6]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -92,20 +92,20 @@ ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[DOTNEG]], [[N]] +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[INV:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[COND:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP3]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = fcmp ogt [[WIDE_LOAD]], shufflevector ( insertelement ( poison, double 4.000000e-01, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4f64.nxv4p0( [[BROADCAST_SPLAT]], i32 8, [[TMP4]], poison) -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr double, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: call void @llvm.masked.store.nxv4f64.p0( [[WIDE_MASKED_GATHER]], ptr [[TMP5]], i32 8, [[TMP4]]) -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, ptr [[COND:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP5]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = fcmp ogt [[WIDE_LOAD]], shufflevector ( insertelement ( poison, double 4.000000e-01, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4f64.nxv4p0( [[BROADCAST_SPLAT]], i32 8, [[TMP6]], poison) +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr double, ptr [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: call void @llvm.masked.store.nxv4f64.p0( [[WIDE_MASKED_GATHER]], ptr [[TMP7]], i32 8, [[TMP6]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: @@ -168,24 +168,24 @@ ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[DOTNEG]], [[N]] +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 42 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 42 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[TMP5]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[COND:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = icmp ne [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP6]], i32 4, [[TMP5]], poison) -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[BROADCAST_SPLAT]], i32 4, [[TMP5]], poison) -; CHECK-NEXT: [[TMP7:%.*]] = add nsw [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_LOAD]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP7]], ptr [[TMP8]], i32 4, [[TMP5]]) -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw i64 [[TMP9]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP8]], i32 4, [[TMP7]], poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[BROADCAST_SPLAT]], i32 4, [[TMP7]], poison) +; CHECK-NEXT: [[TMP9:%.*]] = add nsw [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP9]], ptr [[TMP10]], i32 4, [[TMP7]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll @@ -19,36 +19,36 @@ ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i64 [ 1, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi i64 [ -1, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 0 -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 1 -; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP12]], align 4 -; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 2 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i64 [[TMP14]] -; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP15]], align 4 -; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vector.reduce.and.nxv2i64( [[WIDE_LOAD]]) -; CHECK-NEXT: [[TMP17]] = and i64 [[TMP16]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vector.reduce.and.nxv2i64( [[WIDE_LOAD3]]) -; CHECK-NEXT: [[TMP19]] = and i64 [[TMP18]], [[VEC_PHI2]] -; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP21]] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i64 [ 1, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi i64 [ -1, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP14]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP16]] +; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP17]], align 4 +; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vector.reduce.and.nxv2i64( [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP19]] = and i64 [[TMP18]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vector.reduce.and.nxv2i64( [[WIDE_LOAD3]]) +; CHECK-NEXT: [[TMP21]] = and i64 [[TMP20]], [[VEC_PHI2]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[BIN_RDX:%.*]] = and i64 [[TMP19]], [[TMP17]] +; CHECK-NEXT: [[BIN_RDX:%.*]] = and i64 [[TMP21]], [[TMP19]] ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll @@ -19,34 +19,34 @@ ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( zeroinitializer, i64 5, i32 0), [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 0 -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 1 -; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP12]], align 4 -; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 2 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i64 [[TMP14]] -; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP15]], align 4 -; CHECK-NEXT: [[TMP16]] = add [[WIDE_LOAD]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP17]] = add [[WIDE_LOAD3]], [[VEC_PHI2]] -; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( zeroinitializer, i64 5, i32 0), [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP14]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP16]] +; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP17]], align 4 +; CHECK-NEXT: [[TMP18]] = add [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP19]] = add [[WIDE_LOAD3]], [[VEC_PHI2]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[BIN_RDX:%.*]] = add [[TMP17]], [[TMP16]] +; CHECK-NEXT: [[BIN_RDX:%.*]] = add [[TMP19]], [[TMP18]] ; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64( [[BIN_RDX]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll @@ -19,29 +19,29 @@ ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 0xFFFFFFFFE0000000, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 0 -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 1 -; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP9]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP12]], align 4 -; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 4 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i64 [[TMP14]] -; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP15]], align 4 -; CHECK-NEXT: [[TMP16:%.*]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[WIDE_LOAD]]) -; CHECK-NEXT: [[TMP17]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[TMP16]], [[WIDE_LOAD2]]) -; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 0xFFFFFFFFE0000000, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP14]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[TMP16]] +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP17]], align 4 +; CHECK-NEXT: [[TMP18:%.*]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP19]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[TMP18]], [[WIDE_LOAD2]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -52,7 +52,7 @@ ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 2 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK: vec.epilog.ph: -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0xFFFFFFFFE0000000, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[TMP17]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0xFFFFFFFFE0000000, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[TMP19]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF3:%.*]] = urem i64 [[N]], 2 ; CHECK-NEXT: [[N_VEC4:%.*]] = sub i64 [[N]], [[N_MOD_VF3]] @@ -73,7 +73,7 @@ ; CHECK-NEXT: br i1 [[CMP_N5]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX10:%.*]] = phi float [ 0xFFFFFFFFE0000000, [[ITER_CHECK]] ], [ [[TMP17]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP24]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX10:%.*]] = phi float [ 0xFFFFFFFFE0000000, [[ITER_CHECK]] ], [ [[TMP19]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP24]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -85,7 +85,7 @@ ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ], [ [[TMP24]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP19]], [[MIDDLE_BLOCK]] ], [ [[TMP24]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret float [[ADD_LCSSA]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll @@ -35,26 +35,26 @@ ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 32 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP5]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 32 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 16 -; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 0 -; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 1 -; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP6]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP11]] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i32 0 -; CHECK-NEXT: store shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer), ptr [[TMP14]], align 1 -; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 16 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 [[TMP16]] -; CHECK-NEXT: store shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer), ptr [[TMP17]], align 1 -; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 32 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 16 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 1 +; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP13]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i32 0 +; CHECK-NEXT: store shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer), ptr [[TMP16]], align 1 +; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 16 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 [[TMP18]] +; CHECK-NEXT: store shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer), ptr [[TMP19]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -72,16 +72,16 @@ ; CHECK-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 8 ; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 1024, [[TMP24]] ; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 1024, [[N_MOD_VF2]] +; CHECK-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 8 ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP25:%.*]] = add i64 [[INDEX5]], 0 -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP25]] -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[TMP26]], i32 0 -; CHECK-NEXT: store shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer), ptr [[TMP27]], align 1 -; CHECK-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP29:%.*]] = mul i64 [[TMP28]], 8 -; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX5]], [[TMP29]] +; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[INDEX5]], 0 +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP27]] +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[TMP28]], i32 0 +; CHECK-NEXT: store shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer), ptr [[TMP29]], align 1 +; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX5]], [[TMP26]] ; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP30]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: vec.epilog.middle.block: @@ -113,26 +113,26 @@ ; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 32 ; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-VF8-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-VF8-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 32 ; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-VF8: vector.body: ; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-VF8-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-VF8-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 16 -; CHECK-VF8-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 0 -; CHECK-VF8-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 1 -; CHECK-VF8-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]] -; CHECK-VF8-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP4]] -; CHECK-VF8-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP9]] -; CHECK-VF8-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 0 -; CHECK-VF8-NEXT: store shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer), ptr [[TMP12]], align 1 -; CHECK-VF8-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 16 -; CHECK-VF8-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i64 [[TMP14]] -; CHECK-VF8-NEXT: store shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer), ptr [[TMP15]], align 1 -; CHECK-VF8-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 32 -; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP17]] +; CHECK-VF8-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-VF8-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-VF8-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 16 +; CHECK-VF8-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 0 +; CHECK-VF8-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 1 +; CHECK-VF8-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], [[TMP10]] +; CHECK-VF8-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP6]] +; CHECK-VF8-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP11]] +; CHECK-VF8-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i32 0 +; CHECK-VF8-NEXT: store shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer), ptr [[TMP14]], align 1 +; CHECK-VF8-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-VF8-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 16 +; CHECK-VF8-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 [[TMP16]] +; CHECK-VF8-NEXT: store shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer), ptr [[TMP17]], align 1 +; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-VF8-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF8-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-VF8: middle.block: @@ -214,26 +214,26 @@ ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 0 -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 1 -; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 0 -; CHECK-NEXT: store shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer), ptr [[TMP12]], align 1 -; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 2 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i64 [[TMP14]] -; CHECK-NEXT: store shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer), ptr [[TMP15]], align 1 -; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP17]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0 +; CHECK-NEXT: store shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer), ptr [[TMP14]], align 1 +; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP16]] +; CHECK-NEXT: store shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer), ptr [[TMP17]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: @@ -284,26 +284,26 @@ ; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-VF8-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-VF8-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-VF8: vector.body: ; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-VF8-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-VF8-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 -; CHECK-VF8-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 0 -; CHECK-VF8-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 1 -; CHECK-VF8-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]] -; CHECK-VF8-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] -; CHECK-VF8-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]] -; CHECK-VF8-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 0 -; CHECK-VF8-NEXT: store shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer), ptr [[TMP12]], align 1 -; CHECK-VF8-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 2 -; CHECK-VF8-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i64 [[TMP14]] -; CHECK-VF8-NEXT: store shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer), ptr [[TMP15]], align 1 -; CHECK-VF8-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 4 -; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP17]] +; CHECK-VF8-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-VF8-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-VF8-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 +; CHECK-VF8-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 0 +; CHECK-VF8-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 1 +; CHECK-VF8-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], [[TMP10]] +; CHECK-VF8-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] +; CHECK-VF8-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]] +; CHECK-VF8-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0 +; CHECK-VF8-NEXT: store shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer), ptr [[TMP14]], align 1 +; CHECK-VF8-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-VF8-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2 +; CHECK-VF8-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP16]] +; CHECK-VF8-NEXT: store shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer), ptr [[TMP17]], align 1 +; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-VF8-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF8-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK-VF8: middle.block: @@ -375,26 +375,26 @@ ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 32 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 10000, [[TMP5]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 10000, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 32 ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[N_VEC]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 16 -; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 0 -; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], [[TMP9]] -; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0 -; CHECK-NEXT: store zeroinitializer, ptr [[TMP11]], align 1 -; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 16 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 [[TMP13]] -; CHECK-NEXT: store zeroinitializer, ptr [[TMP14]], align 1 -; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 32 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]] +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 16 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], [[TMP11]] +; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0 +; CHECK-NEXT: store zeroinitializer, ptr [[TMP13]], align 1 +; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 16 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 [[TMP15]] +; CHECK-NEXT: store zeroinitializer, ptr [[TMP16]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: @@ -415,16 +415,16 @@ ; CHECK-NEXT: [[N_MOD_VF3:%.*]] = urem i64 10000, [[TMP21]] ; CHECK-NEXT: [[N_VEC4:%.*]] = sub i64 10000, [[N_MOD_VF3]] ; CHECK-NEXT: [[IND_END6:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC4]] +; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 8 ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX10:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT12:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[INDEX10]], 0 -; CHECK-NEXT: [[NEXT_GEP11:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP22]] -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[NEXT_GEP11]], i32 0 -; CHECK-NEXT: store zeroinitializer, ptr [[TMP23]], align 1 -; CHECK-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP25:%.*]] = mul i64 [[TMP24]], 8 -; CHECK-NEXT: [[INDEX_NEXT12]] = add nuw i64 [[INDEX10]], [[TMP25]] +; CHECK-NEXT: [[TMP24:%.*]] = add i64 [[INDEX10]], 0 +; CHECK-NEXT: [[NEXT_GEP11:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP24]] +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[NEXT_GEP11]], i32 0 +; CHECK-NEXT: store zeroinitializer, ptr [[TMP25]], align 1 +; CHECK-NEXT: [[INDEX_NEXT12]] = add nuw i64 [[INDEX10]], [[TMP23]] ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT12]], [[N_VEC4]] ; CHECK-NEXT: br i1 [[TMP26]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: vec.epilog.middle.block: @@ -458,26 +458,26 @@ ; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 32 ; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 10000, [[TMP3]] ; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 10000, [[N_MOD_VF]] +; CHECK-VF8-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-VF8-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 32 ; CHECK-VF8-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[N_VEC]] ; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-VF8: vector.body: ; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-VF8-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-VF8-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]] -; CHECK-VF8-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 16 -; CHECK-VF8-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 0 -; CHECK-VF8-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], [[TMP7]] -; CHECK-VF8-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP8]] -; CHECK-VF8-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0 -; CHECK-VF8-NEXT: store zeroinitializer, ptr [[TMP9]], align 1 -; CHECK-VF8-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 16 -; CHECK-VF8-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 [[TMP11]] -; CHECK-VF8-NEXT: store zeroinitializer, ptr [[TMP12]], align 1 -; CHECK-VF8-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 32 -; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP14]] +; CHECK-VF8-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-VF8-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP6]] +; CHECK-VF8-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-VF8-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 16 +; CHECK-VF8-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 0 +; CHECK-VF8-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], [[TMP9]] +; CHECK-VF8-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP10]] +; CHECK-VF8-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0 +; CHECK-VF8-NEXT: store zeroinitializer, ptr [[TMP11]], align 1 +; CHECK-VF8-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-VF8-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 16 +; CHECK-VF8-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 [[TMP13]] +; CHECK-VF8-NEXT: store zeroinitializer, ptr [[TMP14]], align 1 +; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-VF8-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF8-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK-VF8: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll @@ -31,29 +31,29 @@ ; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 16 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP7]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 16 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds half, ptr [[S]], i64 [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds half, ptr [[TMP9]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP10]], align 2 -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 8 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds half, ptr [[TMP9]], i64 [[TMP12]] -; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP13]], align 2 -; CHECK-NEXT: [[TMP14:%.*]] = fneg [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP15:%.*]] = fneg [[WIDE_LOAD3]] -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds half, ptr [[D]], i64 [[TMP8]] -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds half, ptr [[TMP16]], i32 0 -; CHECK-NEXT: store [[TMP14]], ptr [[TMP17]], align 2 -; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 8 -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds half, ptr [[TMP16]], i64 [[TMP19]] -; CHECK-NEXT: store [[TMP15]], ptr [[TMP20]], align 2 -; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 16 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP22]] +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds half, ptr [[S]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds half, ptr [[TMP11]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP12]], align 2 +; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 8 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds half, ptr [[TMP11]], i64 [[TMP14]] +; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP15]], align 2 +; CHECK-NEXT: [[TMP16:%.*]] = fneg [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP17:%.*]] = fneg [[WIDE_LOAD3]] +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds half, ptr [[D]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds half, ptr [[TMP18]], i32 0 +; CHECK-NEXT: store [[TMP16]], ptr [[TMP19]], align 2 +; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 8 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds half, ptr [[TMP18]], i64 [[TMP21]] +; CHECK-NEXT: store [[TMP17]], ptr [[TMP22]], align 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll @@ -13,18 +13,18 @@ ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[DOTNEG]], [[N]] +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP3]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], [[WIDE_LOAD]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4f32.nxv4p0( [[TMP4]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]] -; CHECK-NEXT: store [[WIDE_MASKED_GATHER]], ptr [[TMP5]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP5]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], [[WIDE_LOAD]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4f32.nxv4p0( [[TMP6]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]] +; CHECK-NEXT: store [[WIDE_MASKED_GATHER]], ptr [[TMP7]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -80,19 +80,19 @@ ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[DOTNEG]], [[N]] +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP3]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP4]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = sext [[WIDE_LOAD1]] to -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], [[TMP5]] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0( [[WIDE_LOAD]], [[TMP6]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP5]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = sext [[WIDE_LOAD1]] to +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], [[TMP7]] +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0( [[WIDE_LOAD]], [[TMP8]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: @@ -147,18 +147,18 @@ ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[DOTNEG]], [[N]] +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[INV:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP3]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = icmp ne [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer), [[BROADCAST_SPLAT]], i32 4, [[TMP4]]) -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP5]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer), [[BROADCAST_SPLAT]], i32 4, [[TMP6]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: @@ -217,19 +217,19 @@ ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[DOTNEG]], [[N]] +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[INV:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP3]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[BROADCAST_SPLAT]], i32 4, [[TMP4]], poison) -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[WIDE_MASKED_GATHER]], ptr [[TMP3]], i32 4, [[TMP4]]) -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP5]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp sgt [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[BROADCAST_SPLAT]], i32 4, [[TMP6]], poison) +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[WIDE_MASKED_GATHER]], ptr [[TMP5]], i32 4, [[TMP6]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: middle.block: @@ -292,31 +292,31 @@ ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -8 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[DOTNEG]], [[N]] -; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 2 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 3 +; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 2 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP7]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP3]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP5]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP6:%.*]] = shl [[VEC_IND]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP7:%.*]] = shl [[STEP_ADD]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[B]], [[TMP7]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4f32.nxv4p0( [[TMP8]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.masked.gather.nxv4f32.nxv4p0( [[TMP9]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: store [[WIDE_MASKED_GATHER]], ptr [[TMP10]], align 4 -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i64 [[TMP11]], 2 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i64 [[TMP12]] -; CHECK-NEXT: store [[WIDE_MASKED_GATHER2]], ptr [[TMP13]], align 4 -; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP14]], 3 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP15]] +; CHECK-NEXT: [[TMP8:%.*]] = shl [[VEC_IND]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP9:%.*]] = shl [[STEP_ADD]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[B]], [[TMP9]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4f32.nxv4p0( [[TMP10]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.masked.gather.nxv4f32.nxv4p0( [[TMP11]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: store [[WIDE_MASKED_GATHER]], ptr [[TMP12]], align 4 +; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP14:%.*]] = shl nuw nsw i64 [[TMP13]], 2 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[TMP14]] +; CHECK-NEXT: store [[WIDE_MASKED_GATHER2]], ptr [[TMP15]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[STEP_ADD]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll @@ -11,7 +11,10 @@ define void @induction_i7(ptr %dst) #0 { ; CHECK-LABEL: @induction_i7( ; CHECK: vector.ph: -; CHECK: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv2i8() +; CHECK: %ind.end = trunc i64 %n.vec to i7 +; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv2i8() ; CHECK: [[TMP5:%.*]] = trunc [[TMP4]] to ; CHECK-NEXT: [[TMP6:%.*]] = add [[TMP5]], zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP6]], shufflevector ( insertelement ( poison, i7 1, i64 0), poison, zeroinitializer) @@ -25,8 +28,6 @@ ; CHECK-NEXT: [[EXT:%.+]] = zext [[TMP11]] to ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0 ; CHECK-NEXT: store [[EXT]], ptr [[TMP13]], align 8 -; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], ; @@ -57,6 +58,9 @@ define void @induction_i3_zext(ptr %dst) #0 { ; CHECK-LABEL: @induction_i3_zext( ; CHECK: vector.ph: +; CHECK: %ind.end = trunc i64 %n.vec to i3 +; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2 ; CHECK: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv2i8() ; CHECK: [[TMP5:%.*]] = trunc [[TMP4]] to ; CHECK-NEXT: [[TMP6:%.*]] = add [[TMP5]], zeroinitializer @@ -70,8 +74,6 @@ ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[DST:%.*]], i64 [[TMP9]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0 ; CHECK-NEXT: store [[TMP10]], ptr [[TMP13]], align 8 -; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll @@ -23,26 +23,26 @@ ; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 2 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP6]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 2 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP4]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP7:%.*]] = trunc [[VEC_IND]] to -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP8]], i32 4, [[TMP7]], poison) -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[WIDE_MASKED_LOAD]], ptr [[TMP10]], i32 4, [[TMP7]]) -; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP13:%.*]] = shl i64 [[TMP12]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP6]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP9:%.*]] = trunc [[VEC_IND]] to +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP10]], i32 4, [[TMP9]], poison) +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[WIDE_MASKED_LOAD]], ptr [[TMP11]], i32 4, [[TMP9]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -56,14 +56,14 @@ ; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]] ; CHECK: if.then: ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I_08]] -; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I_08]] -; CHECK-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX1]], align 4 +; CHECK-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX1]], align 4 ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: ; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll @@ -26,6 +26,8 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[C:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[D:%.*]], i64 0 @@ -34,21 +36,19 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1024 x i32], ptr @AB, i64 0, i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP0]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1024 x i32], ptr @AB, i64 0, i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP2]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.experimental.vector.deinterleave2.nxv8i32( [[WIDE_VEC]]) -; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[OFFSET_IDX]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = add nsw [[TMP1]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP5:%.*]] = mul nsw [[TMP2]], [[BROADCAST_SPLAT2]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i32], ptr @CD, i64 0, i64 [[TMP3]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 -1 -; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.experimental.vector.interleave2.nxv8i32( [[TMP4]], [[TMP5]]) -; CHECK-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP7]], align 4 -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP8]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = or i64 [[OFFSET_IDX]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = add nsw [[TMP3]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP7:%.*]] = mul nsw [[TMP4]], [[BROADCAST_SPLAT2]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i32], ptr @CD, i64 0, i64 [[TMP5]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 -1 +; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.experimental.vector.interleave2.nxv8i32( [[TMP6]], [[TMP7]]) +; CHECK-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP9]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -106,11 +106,13 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP1:%.*]] = shl [[TMP0]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 3 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP3:%.*]] = shl [[TMP2]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 3 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[C:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer @@ -119,24 +121,22 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP1]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1024 x i16], ptr @AB_i16, i64 0, [[VEC_IND]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i16.nxv4p0( [[TMP4]], i32 2, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP5:%.*]] = or [[VEC_IND]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i16], ptr @AB_i16, i64 0, [[TMP5]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call @llvm.masked.gather.nxv4i16.nxv4p0( [[TMP6]], i32 2, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP7:%.*]] = sext [[WIDE_MASKED_GATHER]] to -; CHECK-NEXT: [[TMP8:%.*]] = add nsw [[BROADCAST_SPLAT]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = sext [[WIDE_MASKED_GATHER1]] to -; CHECK-NEXT: [[TMP10:%.*]] = mul nsw [[BROADCAST_SPLAT3]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = extractelement [[TMP5]], i64 0 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x i32], ptr @CD, i64 0, i64 [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 -1 -; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.experimental.vector.interleave2.nxv8i32( [[TMP8]], [[TMP10]]) -; CHECK-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP13]], align 4 -; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP14]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP15]] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP3]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i16], ptr @AB_i16, i64 0, [[VEC_IND]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i16.nxv4p0( [[TMP6]], i32 2, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP7:%.*]] = or [[VEC_IND]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i16], ptr @AB_i16, i64 0, [[TMP7]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call @llvm.masked.gather.nxv4i16.nxv4p0( [[TMP8]], i32 2, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP9:%.*]] = sext [[WIDE_MASKED_GATHER]] to +; CHECK-NEXT: [[TMP10:%.*]] = add nsw [[BROADCAST_SPLAT]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = sext [[WIDE_MASKED_GATHER1]] to +; CHECK-NEXT: [[TMP12:%.*]] = mul nsw [[BROADCAST_SPLAT3]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement [[TMP7]], i64 0 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1024 x i32], ptr @CD, i64 0, i64 [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 -1 +; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.experimental.vector.interleave2.nxv8i32( [[TMP10]], [[TMP12]]) +; CHECK-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP15]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -197,11 +197,13 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP1:%.*]] = shl [[TMP0]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 3 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP3:%.*]] = shl [[TMP2]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 3 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[C:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer @@ -210,25 +212,23 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP1]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP3]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1024 x i32], ptr @AB, i64 0, i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP4]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i32], ptr @AB, i64 0, i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP6]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.experimental.vector.deinterleave2.nxv8i32( [[WIDE_VEC]]) -; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 -; CHECK-NEXT: [[TMP7:%.*]] = or [[VEC_IND]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP8:%.*]] = add nsw [[TMP5]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP9:%.*]] = trunc [[TMP8]] to -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i16], ptr @CD_i16, i64 0, [[VEC_IND]] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0( [[TMP9]], [[TMP10]], i32 2, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP11:%.*]] = mul nsw [[TMP6]], [[BROADCAST_SPLAT2]] -; CHECK-NEXT: [[TMP12:%.*]] = trunc [[TMP11]] to -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1024 x i16], ptr @CD_i16, i64 0, [[TMP7]] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0( [[TMP12]], [[TMP13]], i32 2, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP14]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP15]] +; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 +; CHECK-NEXT: [[TMP9:%.*]] = or [[VEC_IND]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP10:%.*]] = add nsw [[TMP7]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP11:%.*]] = trunc [[TMP10]] to +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x i16], ptr @CD_i16, i64 0, [[VEC_IND]] +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0( [[TMP11]], [[TMP12]], i32 2, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP13:%.*]] = mul nsw [[TMP8]], [[BROADCAST_SPLAT2]] +; CHECK-NEXT: [[TMP14:%.*]] = trunc [[TMP13]] to +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x i16], ptr @CD_i16, i64 0, [[TMP9]] +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0( [[TMP14]], [[TMP15]], i32 2, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -277,42 +277,42 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP4]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP0]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ST6:%.*]], ptr [[S:%.*]], [[VEC_IND]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP3]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ST6]], ptr [[S]], [[VEC_IND]], i32 1 -; CHECK-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP4]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ST6]], ptr [[S]], [[VEC_IND]], i32 2 -; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP5]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ST6]], ptr [[S]], [[VEC_IND]], i32 3 -; CHECK-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP6]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ST6]], ptr [[S]], [[VEC_IND]], i32 4 -; CHECK-NEXT: [[WIDE_MASKED_GATHER4:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP7]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ST6]], ptr [[S]], [[VEC_IND]], i32 5 -; CHECK-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP8]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP9:%.*]] = add [[WIDE_MASKED_GATHER]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP10:%.*]] = add [[TMP9]], [[WIDE_MASKED_GATHER2]] -; CHECK-NEXT: [[TMP11:%.*]] = add [[WIDE_MASKED_GATHER1]], [[WIDE_MASKED_GATHER3]] -; CHECK-NEXT: [[TMP12:%.*]] = add [[TMP11]], [[WIDE_MASKED_GATHER4]] -; CHECK-NEXT: [[TMP13:%.*]] = add [[TMP12]], [[WIDE_MASKED_GATHER5]] -; CHECK-NEXT: [[TMP14]] = sub [[TMP10]], [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = shl nuw nsw i64 [[TMP15]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP2]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ST6:%.*]], ptr [[S:%.*]], [[VEC_IND]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP5]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ST6]], ptr [[S]], [[VEC_IND]], i32 1 +; CHECK-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP6]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ST6]], ptr [[S]], [[VEC_IND]], i32 2 +; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP7]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ST6]], ptr [[S]], [[VEC_IND]], i32 3 +; CHECK-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP8]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ST6]], ptr [[S]], [[VEC_IND]], i32 4 +; CHECK-NEXT: [[WIDE_MASKED_GATHER4:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP9]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ST6]], ptr [[S]], [[VEC_IND]], i32 5 +; CHECK-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP10]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP11:%.*]] = add [[WIDE_MASKED_GATHER]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP12:%.*]] = add [[TMP11]], [[WIDE_MASKED_GATHER2]] +; CHECK-NEXT: [[TMP13:%.*]] = add [[WIDE_MASKED_GATHER1]], [[WIDE_MASKED_GATHER3]] +; CHECK-NEXT: [[TMP14:%.*]] = add [[TMP13]], [[WIDE_MASKED_GATHER4]] +; CHECK-NEXT: [[TMP15:%.*]] = add [[TMP14]], [[WIDE_MASKED_GATHER5]] +; CHECK-NEXT: [[TMP16]] = sub [[TMP12]], [[TMP15]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP14]]) +; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP16]]) ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -381,10 +381,12 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.stepvector.nxv4i32() -; CHECK-NEXT: [[INDUCTION:%.*]] = sub shufflevector ( insertelement ( poison, i32 1023, i64 0), poison, zeroinitializer), [[TMP0]] -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i32 [[TMP1]], -4 +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.experimental.stepvector.nxv4i32() +; CHECK-NEXT: [[INDUCTION:%.*]] = sub shufflevector ( insertelement ( poison, i32 1023, i64 0), poison, zeroinitializer), [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i32 [[TMP3]], -4 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[DOTNEG]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -392,33 +394,31 @@ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], ptr [[A:%.*]], i64 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i32 [[TMP3]], 3 -; CHECK-NEXT: [[TMP5:%.*]] = sub nsw i32 2, [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP6]] -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP7]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], ptr [[A:%.*]], i64 [[OFFSET_IDX]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i32 [[TMP5]], 3 +; CHECK-NEXT: [[TMP7:%.*]] = sub nsw i32 2, [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP8]] +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP9]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.experimental.vector.deinterleave2.nxv8i32( [[WIDE_VEC]]) -; CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 -; CHECK-NEXT: [[REVERSE:%.*]] = call @llvm.experimental.vector.reverse.nxv4i32( [[TMP8]]) -; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 -; CHECK-NEXT: [[REVERSE1:%.*]] = call @llvm.experimental.vector.reverse.nxv4i32( [[TMP9]]) -; CHECK-NEXT: [[TMP10:%.*]] = add nsw [[REVERSE]], [[VEC_IND]] -; CHECK-NEXT: [[TMP11:%.*]] = sub nsw [[REVERSE1]], [[VEC_IND]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ST2]], ptr [[B:%.*]], i64 [[OFFSET_IDX]], i32 1 -; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP14:%.*]] = shl nuw nsw i32 [[TMP13]], 3 -; CHECK-NEXT: [[TMP15:%.*]] = sub nsw i32 1, [[TMP14]] -; CHECK-NEXT: [[TMP16:%.*]] = sext i32 [[TMP15]] to i64 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 [[TMP16]] -; CHECK-NEXT: [[REVERSE2:%.*]] = call @llvm.experimental.vector.reverse.nxv4i32( [[TMP10]]) -; CHECK-NEXT: [[REVERSE3:%.*]] = call @llvm.experimental.vector.reverse.nxv4i32( [[TMP11]]) +; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 +; CHECK-NEXT: [[REVERSE:%.*]] = call @llvm.experimental.vector.reverse.nxv4i32( [[TMP10]]) +; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 +; CHECK-NEXT: [[REVERSE1:%.*]] = call @llvm.experimental.vector.reverse.nxv4i32( [[TMP11]]) +; CHECK-NEXT: [[TMP12:%.*]] = add nsw [[REVERSE]], [[VEC_IND]] +; CHECK-NEXT: [[TMP13:%.*]] = sub nsw [[REVERSE1]], [[VEC_IND]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ST2]], ptr [[B:%.*]], i64 [[OFFSET_IDX]], i32 1 +; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP16:%.*]] = shl nuw nsw i32 [[TMP15]], 3 +; CHECK-NEXT: [[TMP17:%.*]] = sub nsw i32 1, [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = sext i32 [[TMP17]] to i64 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 [[TMP18]] +; CHECK-NEXT: [[REVERSE2:%.*]] = call @llvm.experimental.vector.reverse.nxv4i32( [[TMP12]]) +; CHECK-NEXT: [[REVERSE3:%.*]] = call @llvm.experimental.vector.reverse.nxv4i32( [[TMP13]]) ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.experimental.vector.interleave2.nxv8i32( [[REVERSE2]], [[REVERSE3]]) -; CHECK-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP17]], align 4 -; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP19:%.*]] = shl nuw nsw i64 [[TMP18]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] +; CHECK-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP19]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -475,21 +475,21 @@ ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub nuw nsw i64 512, [[TMP1]] ; CHECK-NEXT: [[IND_END:%.*]] = shl nuw nsw i64 [[N_VEC]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP2]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP4]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.experimental.vector.deinterleave2.nxv8i32( [[WIDE_VEC]]) -; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = shl nsw [[TMP3]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[INDEX]], 9223372036854775804 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP5]] -; CHECK-NEXT: store [[TMP4]], ptr [[TMP6]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = shl nsw [[TMP5]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP7:%.*]] = and i64 [[INDEX]], 9223372036854775804 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP7]] +; CHECK-NEXT: store [[TMP6]], ptr [[TMP8]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: @@ -561,21 +561,21 @@ ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP6]], i64 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP4]], [[TMP9]] ; CHECK-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i64 [[TMP10]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP10]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP12]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.experimental.vector.deinterleave2.nxv8i32( [[WIDE_VEC]]) -; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 -; CHECK-NEXT: [[TMP12:%.*]] = shl nsw [[TMP11]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[INDEX]], 9223372036854775804 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP13]] -; CHECK-NEXT: store [[TMP12]], ptr [[TMP14]], align 4 -; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = shl nuw nsw i64 [[TMP15]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]] +; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = shl nsw [[TMP13]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP15:%.*]] = and i64 [[INDEX]], 9223372036854775804 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP15]] +; CHECK-NEXT: store [[TMP14]], ptr [[TMP16]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: @@ -640,10 +640,12 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; CHECK-NEXT: [[INDUCTION:%.*]] = sub shufflevector ( insertelement ( poison, i64 1023, i64 0), poison, zeroinitializer), [[TMP0]] -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP1]], -4 +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; CHECK-NEXT: [[INDUCTION:%.*]] = sub shufflevector ( insertelement ( poison, i64 1023, i64 0), poison, zeroinitializer), [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP3]], -4 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[DOTNEG]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[X:%.*]], i64 0 @@ -652,16 +654,14 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = add nsw [[BROADCAST_SPLAT]], [[VEC_IND]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P1:%.*]], [[VEC_IND]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P2:%.*]], [[VEC_IND]], i32 1 -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i64.nxv4p0( [[TMP4]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP5:%.*]] = sub nsw [[WIDE_MASKED_GATHER]], [[VEC_IND]] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i64.nxv4p0( [[TMP2]], [[TMP3]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i64.nxv4p0( [[TMP5]], [[TMP4]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] +; CHECK-NEXT: [[TMP4:%.*]] = add nsw [[BROADCAST_SPLAT]], [[VEC_IND]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P1:%.*]], [[VEC_IND]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P2:%.*]], [[VEC_IND]], i32 1 +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i64.nxv4p0( [[TMP6]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP7:%.*]] = sub nsw [[WIDE_MASKED_GATHER]], [[VEC_IND]] +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i64.nxv4p0( [[TMP4]], [[TMP5]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i64.nxv4p0( [[TMP7]], [[TMP6]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] @@ -709,28 +709,28 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP0]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP2]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.experimental.vector.deinterleave2.nxv8i32( [[WIDE_VEC]]) -; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[OFFSET_IDX]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = mul nsw [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = or i64 [[OFFSET_IDX]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = mul nsw [[TMP4]], [[TMP3]] ; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = call { , } @llvm.experimental.vector.deinterleave2.nxv8i32( [[WIDE_VEC]]) -; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[STRIDED_VEC2]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[STRIDED_VEC2]], 1 -; CHECK-NEXT: [[TMP7:%.*]] = add nsw [[TMP6]], [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 -1 -; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.experimental.vector.interleave2.nxv8i32( [[TMP4]], [[TMP7]]) -; CHECK-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP9]], align 4 -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i64 [[TMP10]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] +; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[STRIDED_VEC2]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , } [[STRIDED_VEC2]], 1 +; CHECK-NEXT: [[TMP9:%.*]] = add nsw [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 -1 +; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.experimental.vector.interleave2.nxv8i32( [[TMP6]], [[TMP9]]) +; CHECK-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP11]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: @@ -802,27 +802,27 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( zeroinitializer, float undef, i32 0), [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi [ insertelement ( zeroinitializer, i32 undef, i32 0), [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_INTFLOAT:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP0]], align 4 +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( zeroinitializer, float undef, i32 0), [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi [ insertelement ( zeroinitializer, i32 undef, i32 0), [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_INTFLOAT:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP2]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.experimental.vector.deinterleave2.nxv8i32( [[WIDE_VEC]]) -; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -; CHECK-NEXT: [[TMP4]] = add [[TMP1]], [[VEC_PHI1]] -; CHECK-NEXT: [[TMP5]] = fadd fast [[VEC_PHI]], [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +; CHECK-NEXT: [[TMP6]] = add [[TMP3]], [[VEC_PHI1]] +; CHECK-NEXT: [[TMP7]] = fadd fast [[VEC_PHI]], [[TMP5]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP4]]) -; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, [[TMP5]]) +; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP6]]) +; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, [[TMP7]]) ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -892,28 +892,28 @@ ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP3]], i64 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP8]], 2 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2 +; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i64 [[TMP10]], 2 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[Z:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP7]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P:%.*]], [[VEC_IND]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], [[VEC_IND]], i32 1 -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[BROADCAST_SPLAT]], [[TMP10]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP12:%.*]] = extractelement [[TMP10]], i64 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP12]], align 4 +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP9]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P:%.*]], [[VEC_IND]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], [[VEC_IND]], i32 1 +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[BROADCAST_SPLAT]], [[TMP12]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP14:%.*]] = extractelement [[TMP12]], i64 0 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP14]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.experimental.vector.deinterleave2.nxv8i32( [[WIDE_VEC]]) -; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP13]], [[TMP11]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP14]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP15]] +; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP15]], [[TMP13]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] @@ -979,35 +979,35 @@ ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP3]], i64 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP8]], 2 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2 +; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i64 [[TMP10]], 2 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP7]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], [[VEC_IND]], i32 1 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP10]], align 4 +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP9]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], [[VEC_IND]], i32 1 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP12]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.experimental.vector.deinterleave2.nxv8i32( [[WIDE_VEC]]) -; CHECK-NEXT: [[TMP12:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP12]], [[TMP11]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP13:%.*]] = extractelement [[TMP11]], i64 0 -; CHECK-NEXT: [[WIDE_VEC1:%.*]] = load , ptr [[TMP13]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP14]], [[TMP13]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP15:%.*]] = extractelement [[TMP13]], i64 0 +; CHECK-NEXT: [[WIDE_VEC1:%.*]] = load , ptr [[TMP15]], align 4 ; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = call { , } @llvm.experimental.vector.deinterleave2.nxv8i32( [[WIDE_VEC1]]) -; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { , } [[STRIDED_VEC2]], 0 -; CHECK-NEXT: [[TMP15]] = add [[TMP14]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i64 [[TMP16]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP17]] +; CHECK-NEXT: [[TMP16:%.*]] = extractvalue { , } [[STRIDED_VEC2]], 0 +; CHECK-NEXT: [[TMP17]] = add [[TMP16]], [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP15]]) +; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP17]]) ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -1074,28 +1074,28 @@ ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP3]], i64 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP8]], 2 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2 +; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i64 [[TMP10]], 2 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[Z:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP7]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P:%.*]], [[VEC_IND]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 -1, i32 0 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], [[VEC_IND]], i32 1 -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[BROADCAST_SPLAT]], [[TMP10]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP11]], align 4 +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP9]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P:%.*]], [[VEC_IND]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 -1, i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], [[VEC_IND]], i32 1 +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[BROADCAST_SPLAT]], [[TMP12]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP13]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.experimental.vector.deinterleave2.nxv8i32( [[WIDE_VEC]]) -; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP13]], [[TMP12]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP14]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP15]] +; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP15]], [[TMP14]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] @@ -1164,36 +1164,36 @@ ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP3]], i64 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP8]], 2 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2 +; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i64 [[TMP10]], 2 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP7]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP10:%.*]] = add nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[INDEX]], i32 1 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], [[TMP10]], i32 1 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP11]], align 4 +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP9]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP12:%.*]] = add nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[INDEX]], i32 1 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], [[TMP12]], i32 1 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP13]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.experimental.vector.deinterleave2.nxv8i32( [[WIDE_VEC]]) -; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP14]], [[TMP13]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[WIDE_VEC1:%.*]] = load , ptr [[TMP12]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP16]], [[TMP15]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[WIDE_VEC1:%.*]] = load , ptr [[TMP14]], align 4 ; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = call { , } @llvm.experimental.vector.deinterleave2.nxv8i32( [[WIDE_VEC1]]) -; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , } [[STRIDED_VEC2]], 0 -; CHECK-NEXT: [[TMP16]] = add [[TMP15]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP18:%.*]] = shl nuw nsw i64 [[TMP17]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP18]] +; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { , } [[STRIDED_VEC2]], 0 +; CHECK-NEXT: [[TMP18]] = add [[TMP17]], [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP20:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP16]]) +; CHECK-NEXT: [[TMP20:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP18]]) ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -1267,11 +1267,13 @@ ; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP5]], -4 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], [[DOTNEG]] ; CHECK-NEXT: [[IND_END:%.*]] = shl nuw i64 [[N_VEC]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP7:%.*]] = shl [[TMP6]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP8]], 3 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 2 +; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP9:%.*]] = shl [[TMP8]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i64 [[TMP10]], 3 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[X:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer @@ -1282,18 +1284,16 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP7]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP9]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP10:%.*]] = or i64 [[OFFSET_IDX]], 1 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], [[VEC_IND]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[BROADCAST_SPLAT]], [[TMP11]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 -1 +; CHECK-NEXT: [[TMP12:%.*]] = or i64 [[OFFSET_IDX]], 1 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], [[VEC_IND]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP12]] +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[BROADCAST_SPLAT]], [[TMP13]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 -1 ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.experimental.vector.interleave2.nxv8i32( [[BROADCAST_SPLAT2]], [[BROADCAST_SPLAT4]]) -; CHECK-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP13]], align 4 -; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP14]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP15]] +; CHECK-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP15]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] @@ -1363,12 +1363,14 @@ ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], [[DOTNEG]] ; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[N_VEC]], 1 ; CHECK-NEXT: [[IND_END:%.*]] = or i64 [[TMP6]], 3 -; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP8:%.*]] = shl [[TMP7]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[INDUCTION:%.*]] = add [[TMP8]], shufflevector ( insertelement ( poison, i64 3, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw i64 [[TMP9]], 3 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP10]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2 +; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP10:%.*]] = shl [[TMP9]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[INDUCTION:%.*]] = add [[TMP10]], shufflevector ( insertelement ( poison, i64 3, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i64 [[TMP11]], 3 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[X:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer @@ -1380,17 +1382,15 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP11:%.*]] = add [[VEC_IND]], shufflevector ( insertelement ( poison, i64 -1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP12:%.*]] = add [[VEC_IND]], shufflevector ( insertelement ( poison, i64 -3, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], [[VEC_IND]] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], [[TMP11]] -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A]], [[TMP12]] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[BROADCAST_SPLAT]], [[TMP14]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[BROADCAST_SPLAT2]], [[TMP15]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[BROADCAST_SPLAT4]], [[TMP13]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i64 [[TMP16]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP17]] +; CHECK-NEXT: [[TMP13:%.*]] = add [[VEC_IND]], shufflevector ( insertelement ( poison, i64 -1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP14:%.*]] = add [[VEC_IND]], shufflevector ( insertelement ( poison, i64 -3, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], [[VEC_IND]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], [[TMP13]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], [[TMP14]] +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[BROADCAST_SPLAT]], [[TMP16]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[BROADCAST_SPLAT2]], [[TMP17]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[BROADCAST_SPLAT4]], [[TMP15]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] @@ -1473,38 +1473,38 @@ ; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP8]], -4 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP1]], [[DOTNEG]] ; CHECK-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 1 -; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw i32 [[TMP9]], 2 -; CHECK-NEXT: [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1 -; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i16 [[DOTPRE]], i32 [[TMP11]] -; CHECK-NEXT: [[TMP12:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP13:%.*]] = shl [[TMP12]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP14]], 3 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP15]], i64 0 +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw i64 [[TMP9]], 2 +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i32 [[TMP11]], 2 +; CHECK-NEXT: [[TMP13:%.*]] = add nsw i32 [[TMP12]], -1 +; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i16 [[DOTPRE]], i32 [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP15:%.*]] = shl [[TMP14]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i64 [[TMP16]], 3 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP17]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_MASKED_GATHER4:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP13]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP16:%.*]] = add nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP17:%.*]] = add nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i64 2, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[A]], [[TMP16]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i16.nxv4p0( [[TMP18]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison), !alias.scope !34 -; CHECK-NEXT: [[TMP19:%.*]] = sext [[WIDE_MASKED_GATHER]] to -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i16, ptr [[A]], [[TMP17]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER4]] = call @llvm.masked.gather.nxv4i16.nxv4p0( [[TMP20]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison), !alias.scope !34 -; CHECK-NEXT: [[TMP21:%.*]] = call @llvm.experimental.vector.splice.nxv4i16( [[VECTOR_RECUR]], [[WIDE_MASKED_GATHER4]], i32 -1) -; CHECK-NEXT: [[TMP22:%.*]] = sext [[TMP21]] to -; CHECK-NEXT: [[TMP23:%.*]] = sext [[WIDE_MASKED_GATHER4]] to -; CHECK-NEXT: [[TMP24:%.*]] = mul nsw [[TMP22]], [[TMP19]] -; CHECK-NEXT: [[TMP25:%.*]] = mul nsw [[TMP24]], [[TMP23]] -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: store [[TMP25]], ptr [[TMP26]], align 4, !alias.scope !37, !noalias !34 -; CHECK-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP28:%.*]] = shl nuw nsw i64 [[TMP27]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP28]] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP15]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP18:%.*]] = add nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP19:%.*]] = add nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i64 2, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i16, ptr [[A]], [[TMP18]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i16.nxv4p0( [[TMP20]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison), !alias.scope !34 +; CHECK-NEXT: [[TMP21:%.*]] = sext [[WIDE_MASKED_GATHER]] to +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i16, ptr [[A]], [[TMP19]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER4]] = call @llvm.masked.gather.nxv4i16.nxv4p0( [[TMP22]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison), !alias.scope !34 +; CHECK-NEXT: [[TMP23:%.*]] = call @llvm.experimental.vector.splice.nxv4i16( [[VECTOR_RECUR]], [[WIDE_MASKED_GATHER4]], i32 -1) +; CHECK-NEXT: [[TMP24:%.*]] = sext [[TMP23]] to +; CHECK-NEXT: [[TMP25:%.*]] = sext [[WIDE_MASKED_GATHER4]] to +; CHECK-NEXT: [[TMP26:%.*]] = mul nsw [[TMP24]], [[TMP21]] +; CHECK-NEXT: [[TMP27:%.*]] = mul nsw [[TMP26]], [[TMP25]] +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: store [[TMP27]], ptr [[TMP28]], align 4, !alias.scope !37, !noalias !34 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP39:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll @@ -15,21 +15,21 @@ ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[SRC:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 2 -; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP8:%.*]] = mul i32 [[TMP7]], 4 -; CHECK-NEXT: [[TMP9:%.*]] = sub i32 [[TMP8]], 1 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement [[WIDE_LOAD]], i32 [[TMP9]] -; CHECK-NEXT: store i16 [[TMP10]], ptr [[DST:%.*]], align 2 -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[SRC:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 2 +; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], 4 +; CHECK-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], 1 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement [[WIDE_LOAD]], i32 [[TMP11]] +; CHECK-NEXT: store i16 [[TMP12]], ptr [[DST:%.*]], align 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -45,7 +45,7 @@ ; CHECK-NEXT: store i16 [[LD]], ptr [[DST]], align 2 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_INC24]], label [[FOR_BODY14]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_INC24]], label [[FOR_BODY14]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: for.inc24: ; CHECK-NEXT: ret void ; @@ -78,20 +78,20 @@ ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[DST:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[WIDE_LOAD]], [[BROADCAST_SPLAT]], i32 4, [[TMP7]]) -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[WIDE_LOAD]], [[BROADCAST_SPLAT]], i32 4, [[TMP9]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll @@ -23,47 +23,47 @@ ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START_1:%.*]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[N_VEC]], 8 ; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr i8, ptr [[START_2]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START_1]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2 -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 2 -; CHECK-NEXT: [[TMP13:%.*]] = mul i64 8, [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP11]], 0 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP14]], i64 0 +; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2 +; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 2 +; CHECK-NEXT: [[TMP15:%.*]] = mul i64 8, [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP13]], 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP16]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP15:%.*]] = call @llvm.experimental.stepvector.nxv2i64() -; CHECK-NEXT: [[TMP16:%.*]] = add [[DOTSPLAT]], [[TMP15]] -; CHECK-NEXT: [[VECTOR_GEP:%.*]] = mul [[TMP16]], shufflevector ( insertelement ( poison, i64 8, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[VECTOR_GEP]] -; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[TMP11]], 1 -; CHECK-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement poison, i64 [[TMP18]], i64 0 +; CHECK-NEXT: [[TMP17:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; CHECK-NEXT: [[TMP18:%.*]] = add [[DOTSPLAT]], [[TMP17]] +; CHECK-NEXT: [[VECTOR_GEP:%.*]] = mul [[TMP18]], shufflevector ( insertelement ( poison, i64 8, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[VECTOR_GEP]] +; CHECK-NEXT: [[TMP20:%.*]] = mul i64 [[TMP13]], 1 +; CHECK-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement poison, i64 [[TMP20]], i64 0 ; CHECK-NEXT: [[DOTSPLAT6:%.*]] = shufflevector [[DOTSPLATINSERT5]], poison, zeroinitializer -; CHECK-NEXT: [[TMP19:%.*]] = call @llvm.experimental.stepvector.nxv2i64() -; CHECK-NEXT: [[TMP20:%.*]] = add [[DOTSPLAT6]], [[TMP19]] -; CHECK-NEXT: [[VECTOR_GEP7:%.*]] = mul [[TMP20]], shufflevector ( insertelement ( poison, i64 8, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[VECTOR_GEP7]] -; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 8 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_2]], i64 [[TMP23]] -; CHECK-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP25:%.*]] = mul i64 [[TMP24]], 2 -; CHECK-NEXT: [[TMP26:%.*]] = add i64 [[TMP25]], 0 -; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[INDEX]], [[TMP26]] -; CHECK-NEXT: [[TMP28:%.*]] = mul i64 [[TMP27]], 8 -; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[START_2]], i64 [[TMP28]] -; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0 -; CHECK-NEXT: store zeroinitializer, ptr [[TMP29]], align 4 -; CHECK-NEXT: [[TMP30:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP31:%.*]] = mul i64 [[TMP30]], 2 -; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 [[TMP31]] -; CHECK-NEXT: store zeroinitializer, ptr [[TMP32]], align 4 -; CHECK-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP34:%.*]] = mul i64 [[TMP33]], 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP34]] -; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP13]] +; CHECK-NEXT: [[TMP21:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; CHECK-NEXT: [[TMP22:%.*]] = add [[DOTSPLAT6]], [[TMP21]] +; CHECK-NEXT: [[VECTOR_GEP7:%.*]] = mul [[TMP22]], shufflevector ( insertelement ( poison, i64 8, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[VECTOR_GEP7]] +; CHECK-NEXT: [[TMP24:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP25:%.*]] = mul i64 [[TMP24]], 8 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_2]], i64 [[TMP25]] +; CHECK-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP27:%.*]] = mul i64 [[TMP26]], 2 +; CHECK-NEXT: [[TMP28:%.*]] = add i64 [[TMP27]], 0 +; CHECK-NEXT: [[TMP29:%.*]] = add i64 [[INDEX]], [[TMP28]] +; CHECK-NEXT: [[TMP30:%.*]] = mul i64 [[TMP29]], 8 +; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[START_2]], i64 [[TMP30]] +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0 +; CHECK-NEXT: store zeroinitializer, ptr [[TMP31]], align 4 +; CHECK-NEXT: [[TMP32:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP33:%.*]] = mul i64 [[TMP32]], 2 +; CHECK-NEXT: [[TMP34:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 [[TMP33]] +; CHECK-NEXT: store zeroinitializer, ptr [[TMP34]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] +; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP15]] ; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP35]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll @@ -33,34 +33,34 @@ ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP12:%.*]] = mul i32 [[TMP11]], 8 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP13:%.*]] = mul i32 [[TMP12]], 4 -; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP13]], 0 -; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 1 -; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[INDEX]], [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP11]] -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP16]] -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP19]], align 4 -; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 4 -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 [[TMP21]] -; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP22]], align 4 -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP11]] -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP16]] -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i32 0 -; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP25]], align 4 -; CHECK-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP27:%.*]] = mul i64 [[TMP26]], 4 -; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i64 [[TMP27]] -; CHECK-NEXT: store [[WIDE_LOAD3]], ptr [[TMP28]], align 4 -; CHECK-NEXT: [[TMP29:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP30:%.*]] = mul i32 [[TMP29]], 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP30]] +; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 4 +; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[TMP15]], 0 +; CHECK-NEXT: [[TMP17:%.*]] = mul i32 [[TMP16]], 1 +; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[INDEX]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP13]] +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP18]] +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP21]], align 4 +; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 4 +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 [[TMP23]] +; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP24]], align 4 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP13]] +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP18]] +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i32 0 +; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP27]], align 4 +; CHECK-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP29:%.*]] = mul i64 [[TMP28]], 4 +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i64 [[TMP29]] +; CHECK-NEXT: store [[WIDE_LOAD3]], ptr [[TMP30]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP12]] ; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -131,34 +131,34 @@ ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP12:%.*]] = mul i32 [[TMP11]], 8 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP13:%.*]] = mul i32 [[TMP12]], 4 -; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP13]], 0 -; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 1 -; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[INDEX]], [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP11]] -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP16]] -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP19]], align 4 -; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 4 -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 [[TMP21]] -; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP22]], align 4 -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP11]] -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP16]] -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i32 0 -; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP25]], align 4 -; CHECK-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP27:%.*]] = mul i64 [[TMP26]], 4 -; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i64 [[TMP27]] -; CHECK-NEXT: store [[WIDE_LOAD3]], ptr [[TMP28]], align 4 -; CHECK-NEXT: [[TMP29:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP30:%.*]] = mul i32 [[TMP29]], 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP30]] +; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 4 +; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[TMP15]], 0 +; CHECK-NEXT: [[TMP17:%.*]] = mul i32 [[TMP16]], 1 +; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[INDEX]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP13]] +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP18]] +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP21]], align 4 +; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 4 +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 [[TMP23]] +; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP24]], align 4 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP13]] +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP18]] +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i32 0 +; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP27]], align 4 +; CHECK-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP29:%.*]] = mul i64 [[TMP28]], 4 +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i64 [[TMP29]] +; CHECK-NEXT: store [[WIDE_LOAD3]], ptr [[TMP30]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP12]] ; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll @@ -47,52 +47,52 @@ ; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX]], [[TMP16]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2 -; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[TMP19]], 0 -; CHECK-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 1 -; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[INDEX]], [[TMP21]] -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i64, ptr [[SRC_1]], i64 [[TMP17]] -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i64, ptr [[SRC_1]], i64 [[TMP22]] -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i64, ptr [[SRC_2]], i64 [[TMP17]] -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i64, ptr [[SRC_2]], i64 [[TMP22]] -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr i64, ptr [[TMP23]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP27]], align 4 -; CHECK-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP29:%.*]] = mul i64 [[TMP28]], 2 -; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i64, ptr [[TMP23]], i64 [[TMP29]] -; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load , ptr [[TMP30]], align 4 -; CHECK-NEXT: [[TMP31:%.*]] = getelementptr i64, ptr [[TMP25]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD13:%.*]] = load , ptr [[TMP31]], align 4 -; CHECK-NEXT: [[TMP32:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP33:%.*]] = mul i64 [[TMP32]], 2 -; CHECK-NEXT: [[TMP34:%.*]] = getelementptr i64, ptr [[TMP25]], i64 [[TMP33]] -; CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load , ptr [[TMP34]], align 4 -; CHECK-NEXT: [[TMP35:%.*]] = add [[WIDE_LOAD]], [[WIDE_LOAD13]] -; CHECK-NEXT: [[TMP36:%.*]] = add [[WIDE_LOAD12]], [[WIDE_LOAD14]] -; CHECK-NEXT: [[TMP37:%.*]] = getelementptr i64, ptr [[DST_1]], i64 [[TMP17]] -; CHECK-NEXT: [[TMP38:%.*]] = getelementptr i64, ptr [[DST_1]], i64 [[TMP22]] -; CHECK-NEXT: [[TMP39:%.*]] = getelementptr i64, ptr [[DST_2]], i64 [[TMP17]] -; CHECK-NEXT: [[TMP40:%.*]] = getelementptr i64, ptr [[DST_2]], i64 [[TMP22]] -; CHECK-NEXT: [[TMP41:%.*]] = getelementptr i64, ptr [[TMP37]], i32 0 -; CHECK-NEXT: store [[TMP35]], ptr [[TMP41]], align 4 -; CHECK-NEXT: [[TMP42:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP43:%.*]] = mul i64 [[TMP42]], 2 -; CHECK-NEXT: [[TMP44:%.*]] = getelementptr i64, ptr [[TMP37]], i64 [[TMP43]] -; CHECK-NEXT: store [[TMP36]], ptr [[TMP44]], align 4 -; CHECK-NEXT: [[TMP45:%.*]] = getelementptr i64, ptr [[TMP39]], i32 0 -; CHECK-NEXT: store [[TMP35]], ptr [[TMP45]], align 4 -; CHECK-NEXT: [[TMP46:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP47:%.*]] = mul i64 [[TMP46]], 2 -; CHECK-NEXT: [[TMP48:%.*]] = getelementptr i64, ptr [[TMP39]], i64 [[TMP47]] -; CHECK-NEXT: store [[TMP36]], ptr [[TMP48]], align 4 -; CHECK-NEXT: [[TMP49:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP50:%.*]] = mul i64 [[TMP49]], 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP50]] +; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 2 +; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[TMP21]], 0 +; CHECK-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 1 +; CHECK-NEXT: [[TMP24:%.*]] = add i64 [[INDEX]], [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i64, ptr [[SRC_1]], i64 [[TMP19]] +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i64, ptr [[SRC_1]], i64 [[TMP24]] +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr i64, ptr [[SRC_2]], i64 [[TMP19]] +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i64, ptr [[SRC_2]], i64 [[TMP24]] +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i64, ptr [[TMP25]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP29]], align 4 +; CHECK-NEXT: [[TMP30:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP31:%.*]] = mul i64 [[TMP30]], 2 +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i64, ptr [[TMP25]], i64 [[TMP31]] +; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load , ptr [[TMP32]], align 4 +; CHECK-NEXT: [[TMP33:%.*]] = getelementptr i64, ptr [[TMP27]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD13:%.*]] = load , ptr [[TMP33]], align 4 +; CHECK-NEXT: [[TMP34:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP35:%.*]] = mul i64 [[TMP34]], 2 +; CHECK-NEXT: [[TMP36:%.*]] = getelementptr i64, ptr [[TMP27]], i64 [[TMP35]] +; CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load , ptr [[TMP36]], align 4 +; CHECK-NEXT: [[TMP37:%.*]] = add [[WIDE_LOAD]], [[WIDE_LOAD13]] +; CHECK-NEXT: [[TMP38:%.*]] = add [[WIDE_LOAD12]], [[WIDE_LOAD14]] +; CHECK-NEXT: [[TMP39:%.*]] = getelementptr i64, ptr [[DST_1]], i64 [[TMP19]] +; CHECK-NEXT: [[TMP40:%.*]] = getelementptr i64, ptr [[DST_1]], i64 [[TMP24]] +; CHECK-NEXT: [[TMP41:%.*]] = getelementptr i64, ptr [[DST_2]], i64 [[TMP19]] +; CHECK-NEXT: [[TMP42:%.*]] = getelementptr i64, ptr [[DST_2]], i64 [[TMP24]] +; CHECK-NEXT: [[TMP43:%.*]] = getelementptr i64, ptr [[TMP39]], i32 0 +; CHECK-NEXT: store [[TMP37]], ptr [[TMP43]], align 4 +; CHECK-NEXT: [[TMP44:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP45:%.*]] = mul i64 [[TMP44]], 2 +; CHECK-NEXT: [[TMP46:%.*]] = getelementptr i64, ptr [[TMP39]], i64 [[TMP45]] +; CHECK-NEXT: store [[TMP38]], ptr [[TMP46]], align 4 +; CHECK-NEXT: [[TMP47:%.*]] = getelementptr i64, ptr [[TMP41]], i32 0 +; CHECK-NEXT: store [[TMP37]], ptr [[TMP47]], align 4 +; CHECK-NEXT: [[TMP48:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP49:%.*]] = mul i64 [[TMP48]], 2 +; CHECK-NEXT: [[TMP50:%.*]] = getelementptr i64, ptr [[TMP41]], i64 [[TMP49]] +; CHECK-NEXT: store [[TMP38]], ptr [[TMP50]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP18]] ; CHECK-NEXT: [[TMP51:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP51]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll @@ -16,24 +16,24 @@ ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[SRC:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP8:%.*]] = shl nsw [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DST:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP9]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP10]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP11:%.*]] = add nsw [[WIDE_MASKED_LOAD1]], [[TMP8]] -; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP11]], ptr [[TMP10]], i32 8, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP13]] +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[SRC:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP10:%.*]] = shl nsw [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[DST:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP11]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP12]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP13:%.*]] = add nsw [[WIDE_MASKED_LOAD1]], [[TMP10]] +; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP13]], ptr [[TMP12]], i32 8, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; CHECK-NEXT: [[TMP14:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP15:%.*]] = extractelement [[TMP14]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll @@ -15,19 +15,19 @@ ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]]) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP0]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP1:%.*]] = add nsw [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, i32 42, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[INDEX]] -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP1]], ptr [[TMP2]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP2]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP3:%.*]] = add nsw [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, i32 42, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[INDEX]] +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP3]], ptr [[TMP4]], i32 4, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_NEXT]], i64 [[WIDE_TRIP_COUNT]]) ; CHECK-NEXT: [[TMP5:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 ; CHECK-NEXT: br i1 [[TMP5]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP0:![0-9]+]] @@ -78,21 +78,21 @@ ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 [[TMP3]]) ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]]) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP3]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP4:%.*]] = add nsw [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, i32 42, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[INDEX]] -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP4]], ptr [[TMP5]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP2]]) -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP7]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP5]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP6:%.*]] = add nsw [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, i32 42, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[INDEX]] +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP6]], ptr [[TMP7]], i32 4, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP4]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[TMP8:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 ; CHECK-NEXT: br i1 [[TMP8]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll @@ -19,34 +19,36 @@ ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 -; CHECK-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP6]] +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 16 ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 8 -; CHECK-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP8]] +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 +; CHECK-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP8]] ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 12 -; CHECK-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP10]] +; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8 +; CHECK-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP10]] ; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 16 -; CHECK-NEXT: [[TMP13:%.*]] = sub i64 [[UMAX]], [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = icmp ugt i64 [[UMAX]], [[TMP12]] -; CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i64 [[TMP13]], i64 0 -; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 16 -; CHECK-NEXT: [[TMP18:%.*]] = sub i64 [[UMAX]], [[TMP17]] -; CHECK-NEXT: [[TMP19:%.*]] = icmp ugt i64 [[UMAX]], [[TMP17]] -; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i64 [[TMP18]], i64 0 -; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 16 -; CHECK-NEXT: [[TMP23:%.*]] = sub i64 [[UMAX]], [[TMP22]] -; CHECK-NEXT: [[TMP24:%.*]] = icmp ugt i64 [[UMAX]], [[TMP22]] -; CHECK-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i64 [[TMP23]], i64 0 -; CHECK-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP27:%.*]] = mul i64 [[TMP26]], 16 -; CHECK-NEXT: [[TMP28:%.*]] = sub i64 [[UMAX]], [[TMP27]] -; CHECK-NEXT: [[TMP29:%.*]] = icmp ugt i64 [[UMAX]], [[TMP27]] -; CHECK-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i64 [[TMP28]], i64 0 +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 12 +; CHECK-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP12]] +; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 16 +; CHECK-NEXT: [[TMP15:%.*]] = sub i64 [[UMAX]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp ugt i64 [[UMAX]], [[TMP14]] +; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i64 [[TMP15]], i64 0 +; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 16 +; CHECK-NEXT: [[TMP20:%.*]] = sub i64 [[UMAX]], [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = icmp ugt i64 [[UMAX]], [[TMP19]] +; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i64 [[TMP20]], i64 0 +; CHECK-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 16 +; CHECK-NEXT: [[TMP25:%.*]] = sub i64 [[UMAX]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = icmp ugt i64 [[UMAX]], [[TMP24]] +; CHECK-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i64 [[TMP25]], i64 0 +; CHECK-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP29:%.*]] = mul i64 [[TMP28]], 16 +; CHECK-NEXT: [[TMP30:%.*]] = sub i64 [[UMAX]], [[TMP29]] +; CHECK-NEXT: [[TMP31:%.*]] = icmp ugt i64 [[UMAX]], [[TMP29]] +; CHECK-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], i64 [[TMP30]], i64 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]]) ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_PART_NEXT]], i64 [[UMAX]]) ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY4:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_PART_NEXT1]], i64 [[UMAX]]) @@ -60,56 +62,54 @@ ; CHECK-NEXT: [[ACTIVE_LANE_MASK7:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY3]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT10:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY4]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT11:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[ACTIVE_LANE_MASK9:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY5]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT12:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP31:%.*]] = add i64 [[INDEX6]], 0 -; CHECK-NEXT: [[TMP32:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP33:%.*]] = mul i64 [[TMP32]], 4 -; CHECK-NEXT: [[TMP34:%.*]] = add i64 [[TMP33]], 0 -; CHECK-NEXT: [[TMP35:%.*]] = mul i64 [[TMP34]], 1 -; CHECK-NEXT: [[TMP36:%.*]] = add i64 [[INDEX6]], [[TMP35]] -; CHECK-NEXT: [[TMP37:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP38:%.*]] = mul i64 [[TMP37]], 8 -; CHECK-NEXT: [[TMP39:%.*]] = add i64 [[TMP38]], 0 -; CHECK-NEXT: [[TMP40:%.*]] = mul i64 [[TMP39]], 1 -; CHECK-NEXT: [[TMP41:%.*]] = add i64 [[INDEX6]], [[TMP40]] -; CHECK-NEXT: [[TMP42:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP43:%.*]] = mul i64 [[TMP42]], 12 -; CHECK-NEXT: [[TMP44:%.*]] = add i64 [[TMP43]], 0 -; CHECK-NEXT: [[TMP45:%.*]] = mul i64 [[TMP44]], 1 -; CHECK-NEXT: [[TMP46:%.*]] = add i64 [[INDEX6]], [[TMP45]] -; CHECK-NEXT: [[TMP47:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP31]] -; CHECK-NEXT: [[TMP48:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[TMP36]] -; CHECK-NEXT: [[TMP49:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[TMP41]] -; CHECK-NEXT: [[TMP50:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[TMP46]] -; CHECK-NEXT: [[TMP51:%.*]] = getelementptr i32, ptr [[TMP47]], i32 0 -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP51]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP52:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP53:%.*]] = mul i64 [[TMP52]], 4 -; CHECK-NEXT: [[TMP54:%.*]] = getelementptr i32, ptr [[TMP47]], i64 [[TMP53]] -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP54]], i32 4, [[ACTIVE_LANE_MASK7]]) -; CHECK-NEXT: [[TMP55:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP56:%.*]] = mul i64 [[TMP55]], 8 -; CHECK-NEXT: [[TMP57:%.*]] = getelementptr i32, ptr [[TMP47]], i64 [[TMP56]] -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP57]], i32 4, [[ACTIVE_LANE_MASK8]]) -; CHECK-NEXT: [[TMP58:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP59:%.*]] = mul i64 [[TMP58]], 12 -; CHECK-NEXT: [[TMP60:%.*]] = getelementptr i32, ptr [[TMP47]], i64 [[TMP59]] -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP60]], i32 4, [[ACTIVE_LANE_MASK9]]) -; CHECK-NEXT: [[TMP61:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP62:%.*]] = mul i64 [[TMP61]], 4 -; CHECK-NEXT: [[TMP63:%.*]] = add i64 [[INDEX6]], [[TMP62]] -; CHECK-NEXT: [[TMP64:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP65:%.*]] = mul i64 [[TMP64]], 8 -; CHECK-NEXT: [[TMP66:%.*]] = add i64 [[INDEX6]], [[TMP65]] -; CHECK-NEXT: [[TMP67:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP68:%.*]] = mul i64 [[TMP67]], 12 -; CHECK-NEXT: [[TMP69:%.*]] = add i64 [[INDEX6]], [[TMP68]] -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX6]], i64 [[TMP15]]) -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT10]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP63]], i64 [[TMP20]]) -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT11]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP66]], i64 [[TMP25]]) -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT12]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP69]], i64 [[TMP30]]) -; CHECK-NEXT: [[TMP70:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP71:%.*]] = mul i64 [[TMP70]], 16 -; CHECK-NEXT: [[INDEX_NEXT13]] = add i64 [[INDEX6]], [[TMP71]] +; CHECK-NEXT: [[TMP33:%.*]] = add i64 [[INDEX6]], 0 +; CHECK-NEXT: [[TMP34:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP35:%.*]] = mul i64 [[TMP34]], 4 +; CHECK-NEXT: [[TMP36:%.*]] = add i64 [[TMP35]], 0 +; CHECK-NEXT: [[TMP37:%.*]] = mul i64 [[TMP36]], 1 +; CHECK-NEXT: [[TMP38:%.*]] = add i64 [[INDEX6]], [[TMP37]] +; CHECK-NEXT: [[TMP39:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP40:%.*]] = mul i64 [[TMP39]], 8 +; CHECK-NEXT: [[TMP41:%.*]] = add i64 [[TMP40]], 0 +; CHECK-NEXT: [[TMP42:%.*]] = mul i64 [[TMP41]], 1 +; CHECK-NEXT: [[TMP43:%.*]] = add i64 [[INDEX6]], [[TMP42]] +; CHECK-NEXT: [[TMP44:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP45:%.*]] = mul i64 [[TMP44]], 12 +; CHECK-NEXT: [[TMP46:%.*]] = add i64 [[TMP45]], 0 +; CHECK-NEXT: [[TMP47:%.*]] = mul i64 [[TMP46]], 1 +; CHECK-NEXT: [[TMP48:%.*]] = add i64 [[INDEX6]], [[TMP47]] +; CHECK-NEXT: [[TMP49:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP33]] +; CHECK-NEXT: [[TMP50:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[TMP38]] +; CHECK-NEXT: [[TMP51:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[TMP43]] +; CHECK-NEXT: [[TMP52:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[TMP48]] +; CHECK-NEXT: [[TMP53:%.*]] = getelementptr i32, ptr [[TMP49]], i32 0 +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP53]], i32 4, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[TMP54:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP55:%.*]] = mul i64 [[TMP54]], 4 +; CHECK-NEXT: [[TMP56:%.*]] = getelementptr i32, ptr [[TMP49]], i64 [[TMP55]] +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP56]], i32 4, [[ACTIVE_LANE_MASK7]]) +; CHECK-NEXT: [[TMP57:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP58:%.*]] = mul i64 [[TMP57]], 8 +; CHECK-NEXT: [[TMP59:%.*]] = getelementptr i32, ptr [[TMP49]], i64 [[TMP58]] +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP59]], i32 4, [[ACTIVE_LANE_MASK8]]) +; CHECK-NEXT: [[TMP60:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP61:%.*]] = mul i64 [[TMP60]], 12 +; CHECK-NEXT: [[TMP62:%.*]] = getelementptr i32, ptr [[TMP49]], i64 [[TMP61]] +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP62]], i32 4, [[ACTIVE_LANE_MASK9]]) +; CHECK-NEXT: [[TMP63:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP64:%.*]] = mul i64 [[TMP63]], 4 +; CHECK-NEXT: [[TMP65:%.*]] = add i64 [[INDEX6]], [[TMP64]] +; CHECK-NEXT: [[TMP66:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP67:%.*]] = mul i64 [[TMP66]], 8 +; CHECK-NEXT: [[TMP68:%.*]] = add i64 [[INDEX6]], [[TMP67]] +; CHECK-NEXT: [[TMP69:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP70:%.*]] = mul i64 [[TMP69]], 12 +; CHECK-NEXT: [[TMP71:%.*]] = add i64 [[INDEX6]], [[TMP70]] +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX6]], i64 [[TMP17]]) +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT10]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP65]], i64 [[TMP22]]) +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT11]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP68]], i64 [[TMP27]]) +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT12]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP71]], i64 [[TMP32]]) +; CHECK-NEXT: [[INDEX_NEXT13]] = add i64 [[INDEX6]], [[TMP6]] ; CHECK-NEXT: [[TMP72:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP73:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT10]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP74:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT11]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) @@ -161,34 +161,36 @@ ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 -; CHECK-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP6]] +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 16 ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 8 -; CHECK-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP8]] +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 +; CHECK-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP8]] ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 12 -; CHECK-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP10]] +; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8 +; CHECK-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP10]] ; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 16 -; CHECK-NEXT: [[TMP13:%.*]] = sub i64 [[UMAX]], [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = icmp ugt i64 [[UMAX]], [[TMP12]] -; CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i64 [[TMP13]], i64 0 -; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 16 -; CHECK-NEXT: [[TMP18:%.*]] = sub i64 [[UMAX]], [[TMP17]] -; CHECK-NEXT: [[TMP19:%.*]] = icmp ugt i64 [[UMAX]], [[TMP17]] -; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i64 [[TMP18]], i64 0 -; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 16 -; CHECK-NEXT: [[TMP23:%.*]] = sub i64 [[UMAX]], [[TMP22]] -; CHECK-NEXT: [[TMP24:%.*]] = icmp ugt i64 [[UMAX]], [[TMP22]] -; CHECK-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i64 [[TMP23]], i64 0 -; CHECK-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP27:%.*]] = mul i64 [[TMP26]], 16 -; CHECK-NEXT: [[TMP28:%.*]] = sub i64 [[UMAX]], [[TMP27]] -; CHECK-NEXT: [[TMP29:%.*]] = icmp ugt i64 [[UMAX]], [[TMP27]] -; CHECK-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i64 [[TMP28]], i64 0 +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 12 +; CHECK-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP12]] +; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 16 +; CHECK-NEXT: [[TMP15:%.*]] = sub i64 [[UMAX]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp ugt i64 [[UMAX]], [[TMP14]] +; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i64 [[TMP15]], i64 0 +; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 16 +; CHECK-NEXT: [[TMP20:%.*]] = sub i64 [[UMAX]], [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = icmp ugt i64 [[UMAX]], [[TMP19]] +; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i64 [[TMP20]], i64 0 +; CHECK-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 16 +; CHECK-NEXT: [[TMP25:%.*]] = sub i64 [[UMAX]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = icmp ugt i64 [[UMAX]], [[TMP24]] +; CHECK-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i64 [[TMP25]], i64 0 +; CHECK-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP29:%.*]] = mul i64 [[TMP28]], 16 +; CHECK-NEXT: [[TMP30:%.*]] = sub i64 [[UMAX]], [[TMP29]] +; CHECK-NEXT: [[TMP31:%.*]] = icmp ugt i64 [[UMAX]], [[TMP29]] +; CHECK-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], i64 [[TMP30]], i64 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]]) ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_PART_NEXT]], i64 [[UMAX]]) ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY4:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_PART_NEXT1]], i64 [[UMAX]]) @@ -202,82 +204,80 @@ ; CHECK-NEXT: [[ACTIVE_LANE_MASK7:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY3]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT13:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY4]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT14:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[ACTIVE_LANE_MASK9:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY5]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT15:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP31:%.*]] = add i64 [[INDEX6]], 0 -; CHECK-NEXT: [[TMP32:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP33:%.*]] = mul i64 [[TMP32]], 4 -; CHECK-NEXT: [[TMP34:%.*]] = add i64 [[TMP33]], 0 -; CHECK-NEXT: [[TMP35:%.*]] = mul i64 [[TMP34]], 1 -; CHECK-NEXT: [[TMP36:%.*]] = add i64 [[INDEX6]], [[TMP35]] -; CHECK-NEXT: [[TMP37:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP38:%.*]] = mul i64 [[TMP37]], 8 -; CHECK-NEXT: [[TMP39:%.*]] = add i64 [[TMP38]], 0 -; CHECK-NEXT: [[TMP40:%.*]] = mul i64 [[TMP39]], 1 -; CHECK-NEXT: [[TMP41:%.*]] = add i64 [[INDEX6]], [[TMP40]] -; CHECK-NEXT: [[TMP42:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP43:%.*]] = mul i64 [[TMP42]], 12 -; CHECK-NEXT: [[TMP44:%.*]] = add i64 [[TMP43]], 0 -; CHECK-NEXT: [[TMP45:%.*]] = mul i64 [[TMP44]], 1 -; CHECK-NEXT: [[TMP46:%.*]] = add i64 [[INDEX6]], [[TMP45]] -; CHECK-NEXT: [[TMP47:%.*]] = getelementptr i32, ptr [[COND_PTR:%.*]], i64 [[TMP31]] -; CHECK-NEXT: [[TMP48:%.*]] = getelementptr i32, ptr [[COND_PTR]], i64 [[TMP36]] -; CHECK-NEXT: [[TMP49:%.*]] = getelementptr i32, ptr [[COND_PTR]], i64 [[TMP41]] -; CHECK-NEXT: [[TMP50:%.*]] = getelementptr i32, ptr [[COND_PTR]], i64 [[TMP46]] -; CHECK-NEXT: [[TMP51:%.*]] = getelementptr i32, ptr [[TMP47]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP51]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP52:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP53:%.*]] = mul i64 [[TMP52]], 4 -; CHECK-NEXT: [[TMP54:%.*]] = getelementptr i32, ptr [[TMP47]], i64 [[TMP53]] -; CHECK-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP54]], i32 4, [[ACTIVE_LANE_MASK7]], poison) -; CHECK-NEXT: [[TMP55:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP56:%.*]] = mul i64 [[TMP55]], 8 -; CHECK-NEXT: [[TMP57:%.*]] = getelementptr i32, ptr [[TMP47]], i64 [[TMP56]] -; CHECK-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP57]], i32 4, [[ACTIVE_LANE_MASK8]], poison) -; CHECK-NEXT: [[TMP58:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP59:%.*]] = mul i64 [[TMP58]], 12 -; CHECK-NEXT: [[TMP60:%.*]] = getelementptr i32, ptr [[TMP47]], i64 [[TMP59]] -; CHECK-NEXT: [[WIDE_MASKED_LOAD12:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP60]], i32 4, [[ACTIVE_LANE_MASK9]], poison) -; CHECK-NEXT: [[TMP61:%.*]] = icmp ne [[WIDE_MASKED_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP62:%.*]] = icmp ne [[WIDE_MASKED_LOAD10]], zeroinitializer -; CHECK-NEXT: [[TMP63:%.*]] = icmp ne [[WIDE_MASKED_LOAD11]], zeroinitializer -; CHECK-NEXT: [[TMP64:%.*]] = icmp ne [[WIDE_MASKED_LOAD12]], zeroinitializer -; CHECK-NEXT: [[TMP65:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP31]] -; CHECK-NEXT: [[TMP66:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[TMP36]] -; CHECK-NEXT: [[TMP67:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[TMP41]] -; CHECK-NEXT: [[TMP68:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[TMP46]] -; CHECK-NEXT: [[TMP69:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP61]], zeroinitializer -; CHECK-NEXT: [[TMP70:%.*]] = select [[ACTIVE_LANE_MASK7]], [[TMP62]], zeroinitializer -; CHECK-NEXT: [[TMP71:%.*]] = select [[ACTIVE_LANE_MASK8]], [[TMP63]], zeroinitializer -; CHECK-NEXT: [[TMP72:%.*]] = select [[ACTIVE_LANE_MASK9]], [[TMP64]], zeroinitializer -; CHECK-NEXT: [[TMP73:%.*]] = getelementptr i32, ptr [[TMP65]], i32 0 -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP73]], i32 4, [[TMP69]]) -; CHECK-NEXT: [[TMP74:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP75:%.*]] = mul i64 [[TMP74]], 4 -; CHECK-NEXT: [[TMP76:%.*]] = getelementptr i32, ptr [[TMP65]], i64 [[TMP75]] -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP76]], i32 4, [[TMP70]]) -; CHECK-NEXT: [[TMP77:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP78:%.*]] = mul i64 [[TMP77]], 8 -; CHECK-NEXT: [[TMP79:%.*]] = getelementptr i32, ptr [[TMP65]], i64 [[TMP78]] -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP79]], i32 4, [[TMP71]]) -; CHECK-NEXT: [[TMP80:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP81:%.*]] = mul i64 [[TMP80]], 12 -; CHECK-NEXT: [[TMP82:%.*]] = getelementptr i32, ptr [[TMP65]], i64 [[TMP81]] -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP82]], i32 4, [[TMP72]]) -; CHECK-NEXT: [[TMP83:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP84:%.*]] = mul i64 [[TMP83]], 4 -; CHECK-NEXT: [[TMP85:%.*]] = add i64 [[INDEX6]], [[TMP84]] -; CHECK-NEXT: [[TMP86:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP87:%.*]] = mul i64 [[TMP86]], 8 -; CHECK-NEXT: [[TMP88:%.*]] = add i64 [[INDEX6]], [[TMP87]] -; CHECK-NEXT: [[TMP89:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP90:%.*]] = mul i64 [[TMP89]], 12 -; CHECK-NEXT: [[TMP91:%.*]] = add i64 [[INDEX6]], [[TMP90]] -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX6]], i64 [[TMP15]]) -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT13]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP85]], i64 [[TMP20]]) -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT14]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP88]], i64 [[TMP25]]) -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT15]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP91]], i64 [[TMP30]]) -; CHECK-NEXT: [[TMP92:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP93:%.*]] = mul i64 [[TMP92]], 16 -; CHECK-NEXT: [[INDEX_NEXT16]] = add i64 [[INDEX6]], [[TMP93]] +; CHECK-NEXT: [[TMP33:%.*]] = add i64 [[INDEX6]], 0 +; CHECK-NEXT: [[TMP34:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP35:%.*]] = mul i64 [[TMP34]], 4 +; CHECK-NEXT: [[TMP36:%.*]] = add i64 [[TMP35]], 0 +; CHECK-NEXT: [[TMP37:%.*]] = mul i64 [[TMP36]], 1 +; CHECK-NEXT: [[TMP38:%.*]] = add i64 [[INDEX6]], [[TMP37]] +; CHECK-NEXT: [[TMP39:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP40:%.*]] = mul i64 [[TMP39]], 8 +; CHECK-NEXT: [[TMP41:%.*]] = add i64 [[TMP40]], 0 +; CHECK-NEXT: [[TMP42:%.*]] = mul i64 [[TMP41]], 1 +; CHECK-NEXT: [[TMP43:%.*]] = add i64 [[INDEX6]], [[TMP42]] +; CHECK-NEXT: [[TMP44:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP45:%.*]] = mul i64 [[TMP44]], 12 +; CHECK-NEXT: [[TMP46:%.*]] = add i64 [[TMP45]], 0 +; CHECK-NEXT: [[TMP47:%.*]] = mul i64 [[TMP46]], 1 +; CHECK-NEXT: [[TMP48:%.*]] = add i64 [[INDEX6]], [[TMP47]] +; CHECK-NEXT: [[TMP49:%.*]] = getelementptr i32, ptr [[COND_PTR:%.*]], i64 [[TMP33]] +; CHECK-NEXT: [[TMP50:%.*]] = getelementptr i32, ptr [[COND_PTR]], i64 [[TMP38]] +; CHECK-NEXT: [[TMP51:%.*]] = getelementptr i32, ptr [[COND_PTR]], i64 [[TMP43]] +; CHECK-NEXT: [[TMP52:%.*]] = getelementptr i32, ptr [[COND_PTR]], i64 [[TMP48]] +; CHECK-NEXT: [[TMP53:%.*]] = getelementptr i32, ptr [[TMP49]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP53]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP54:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP55:%.*]] = mul i64 [[TMP54]], 4 +; CHECK-NEXT: [[TMP56:%.*]] = getelementptr i32, ptr [[TMP49]], i64 [[TMP55]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP56]], i32 4, [[ACTIVE_LANE_MASK7]], poison) +; CHECK-NEXT: [[TMP57:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP58:%.*]] = mul i64 [[TMP57]], 8 +; CHECK-NEXT: [[TMP59:%.*]] = getelementptr i32, ptr [[TMP49]], i64 [[TMP58]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP59]], i32 4, [[ACTIVE_LANE_MASK8]], poison) +; CHECK-NEXT: [[TMP60:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP61:%.*]] = mul i64 [[TMP60]], 12 +; CHECK-NEXT: [[TMP62:%.*]] = getelementptr i32, ptr [[TMP49]], i64 [[TMP61]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD12:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP62]], i32 4, [[ACTIVE_LANE_MASK9]], poison) +; CHECK-NEXT: [[TMP63:%.*]] = icmp ne [[WIDE_MASKED_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP64:%.*]] = icmp ne [[WIDE_MASKED_LOAD10]], zeroinitializer +; CHECK-NEXT: [[TMP65:%.*]] = icmp ne [[WIDE_MASKED_LOAD11]], zeroinitializer +; CHECK-NEXT: [[TMP66:%.*]] = icmp ne [[WIDE_MASKED_LOAD12]], zeroinitializer +; CHECK-NEXT: [[TMP67:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP33]] +; CHECK-NEXT: [[TMP68:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[TMP38]] +; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[TMP43]] +; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[TMP48]] +; CHECK-NEXT: [[TMP71:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP63]], zeroinitializer +; CHECK-NEXT: [[TMP72:%.*]] = select [[ACTIVE_LANE_MASK7]], [[TMP64]], zeroinitializer +; CHECK-NEXT: [[TMP73:%.*]] = select [[ACTIVE_LANE_MASK8]], [[TMP65]], zeroinitializer +; CHECK-NEXT: [[TMP74:%.*]] = select [[ACTIVE_LANE_MASK9]], [[TMP66]], zeroinitializer +; CHECK-NEXT: [[TMP75:%.*]] = getelementptr i32, ptr [[TMP67]], i32 0 +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP75]], i32 4, [[TMP71]]) +; CHECK-NEXT: [[TMP76:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP77:%.*]] = mul i64 [[TMP76]], 4 +; CHECK-NEXT: [[TMP78:%.*]] = getelementptr i32, ptr [[TMP67]], i64 [[TMP77]] +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP78]], i32 4, [[TMP72]]) +; CHECK-NEXT: [[TMP79:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP80:%.*]] = mul i64 [[TMP79]], 8 +; CHECK-NEXT: [[TMP81:%.*]] = getelementptr i32, ptr [[TMP67]], i64 [[TMP80]] +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP81]], i32 4, [[TMP73]]) +; CHECK-NEXT: [[TMP82:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP83:%.*]] = mul i64 [[TMP82]], 12 +; CHECK-NEXT: [[TMP84:%.*]] = getelementptr i32, ptr [[TMP67]], i64 [[TMP83]] +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP84]], i32 4, [[TMP74]]) +; CHECK-NEXT: [[TMP85:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP86:%.*]] = mul i64 [[TMP85]], 4 +; CHECK-NEXT: [[TMP87:%.*]] = add i64 [[INDEX6]], [[TMP86]] +; CHECK-NEXT: [[TMP88:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP89:%.*]] = mul i64 [[TMP88]], 8 +; CHECK-NEXT: [[TMP90:%.*]] = add i64 [[INDEX6]], [[TMP89]] +; CHECK-NEXT: [[TMP91:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP92:%.*]] = mul i64 [[TMP91]], 12 +; CHECK-NEXT: [[TMP93:%.*]] = add i64 [[INDEX6]], [[TMP92]] +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX6]], i64 [[TMP17]]) +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT13]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP87]], i64 [[TMP22]]) +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT14]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP90]], i64 [[TMP27]]) +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT15]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP93]], i64 [[TMP32]]) +; CHECK-NEXT: [[INDEX_NEXT16]] = add i64 [[INDEX6]], [[TMP6]] ; CHECK-NEXT: [[TMP94:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP95:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT13]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP96:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT14]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll @@ -20,9 +20,11 @@ ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMAX]], [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 +; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[UMAX]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[UMAX]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i64 [[TMP9]], i64 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]]) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[VAL:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer @@ -30,14 +32,12 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP11]], i32 0 -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP12]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) -; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 4 -; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP14]] +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX1]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP13]], i32 0 +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP14]], i32 4, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP11]]) +; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP6]] ; CHECK-NEXT: [[TMP15:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP16:%.*]] = extractelement [[TMP15]], i32 0 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -147,25 +147,25 @@ ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMAX]], [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 +; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[UMAX]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[UMAX]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i64 [[TMP9]], i64 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]]) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[SRC:%.*]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP11]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP12]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[DST:%.*]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX1]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[SRC:%.*]], i64 [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP13]], i32 0 -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[WIDE_MASKED_LOAD]], ptr [[TMP14]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) -; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 -; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP16]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP14]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[DST:%.*]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP15]], i32 0 +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[WIDE_MASKED_LOAD]], ptr [[TMP16]], i32 4, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP11]]) +; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP6]] ; CHECK-NEXT: [[TMP17:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP18:%.*]] = extractelement [[TMP17]], i32 0 ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -224,32 +224,32 @@ ; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 -; CHECK-NEXT: [[TMP10:%.*]] = sub i64 [[TMP2]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[TMP2]], [[TMP9]] -; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i64 [[TMP10]], i64 0 +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 +; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[TMP2]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[TMP2]], [[TMP11]] +; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[TMP2]]) -; CHECK-NEXT: [[TMP13:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP14:%.*]] = add [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP15:%.*]] = mul [[TMP14]], shufflevector ( insertelement ( poison, i64 4, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP15]] -; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 4 -; CHECK-NEXT: [[TMP18:%.*]] = mul i64 4, [[TMP17]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP18]], i64 0 +; CHECK-NEXT: [[TMP15:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP16:%.*]] = add [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = mul [[TMP16]], shufflevector ( insertelement ( poison, i64 4, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 4 +; CHECK-NEXT: [[TMP20:%.*]] = mul i64 4, [[TMP19]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP20]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[SRC:%.*]], [[VEC_IND]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP19]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[DST:%.*]], [[VEC_IND]] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[WIDE_MASKED_GATHER]], [[TMP20]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP12]]) -; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 4 -; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP22]] +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[SRC:%.*]], [[VEC_IND]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP21]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[DST:%.*]], [[VEC_IND]] +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[WIDE_MASKED_GATHER]], [[TMP22]], i32 4, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP14]]) +; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP9]] ; CHECK-NEXT: [[TMP23:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP24:%.*]] = extractelement [[TMP23]], i32 0 @@ -305,26 +305,26 @@ ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMAX]], [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 +; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[UMAX]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[UMAX]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i64 [[TMP9]], i64 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]]) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[IND:%.*]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP11]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP12]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[SRC:%.*]], [[WIDE_MASKED_LOAD]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP13]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[DST:%.*]], [[WIDE_MASKED_LOAD]] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[WIDE_MASKED_GATHER]], [[TMP14]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) -; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 -; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP16]] +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX1]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[IND:%.*]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP13]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP14]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[SRC:%.*]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP15]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[DST:%.*]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[WIDE_MASKED_GATHER]], [[TMP16]], i32 4, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP11]]) +; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP6]] ; CHECK-NEXT: [[TMP17:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP18:%.*]] = extractelement [[TMP17]], i32 0 ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -384,25 +384,25 @@ ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 +; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[N]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i64 [[TMP9]], i64 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]]) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[SRC:%.*]], align 4 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[TMP11]], i64 0 +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[SRC:%.*]], align 4 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[TMP13]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP13]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) -; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP15]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0 +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP15]], i32 4, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP11]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[TMP16:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP17:%.*]] = extractelement [[TMP16]], i32 0 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] @@ -459,9 +459,11 @@ ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 +; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[N]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i64 [[TMP9]], i64 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]]) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[SRC:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer @@ -469,24 +471,22 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[COND:%.*]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP12]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP13:%.*]] = icmp eq [[WIDE_MASKED_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP14:%.*]] = xor [[TMP13]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP15:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP14]], zeroinitializer -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[BROADCAST_SPLAT]], i32 4, [[TMP15]], poison) -; CHECK-NEXT: [[TMP16:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP13]], zeroinitializer -; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP16]], zeroinitializer, [[WIDE_MASKED_GATHER]] -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP18:%.*]] = or [[TMP15]], [[TMP16]] -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0 -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[PREDPHI]], ptr [[TMP19]], i32 4, [[TMP18]]) -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) -; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 4 -; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP21]] +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX1]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[COND:%.*]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP14]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq [[WIDE_MASKED_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = xor [[TMP15]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP17:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP16]], zeroinitializer +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[BROADCAST_SPLAT]], i32 4, [[TMP17]], poison) +; CHECK-NEXT: [[TMP18:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP15]], zeroinitializer +; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP18]], zeroinitializer, [[WIDE_MASKED_GATHER]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP20:%.*]] = or [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i32 0 +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[PREDPHI]], ptr [[TMP21]], i32 4, [[TMP20]]) +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP11]]) +; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP6]] ; CHECK-NEXT: [[TMP22:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP23:%.*]] = extractelement [[TMP22]], i32 0 ; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] @@ -559,9 +559,11 @@ ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 +; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[N]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i64 [[TMP9]], i64 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]]) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[DST:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer @@ -569,15 +571,13 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP12]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP14]], i32 4, [[ACTIVE_LANE_MASK]], poison) ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) -; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP14]] +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP11]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[TMP15:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP16:%.*]] = extractelement [[TMP15]], i32 0 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] @@ -631,27 +631,27 @@ ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMAX]], [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 +; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[UMAX]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[UMAX]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i64 [[TMP9]], i64 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]]) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[SRC:%.*]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr float, ptr [[DST:%.*]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr float, ptr [[TMP11]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr [[TMP13]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr float, ptr [[TMP12]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD2:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr [[TMP14]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP15:%.*]] = fdiv [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD2]] -; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0( [[TMP15]], ptr [[TMP14]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) -; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 4 -; CHECK-NEXT: [[INDEX_NEXT3]] = add i64 [[INDEX1]], [[TMP17]] +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX1]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr float, ptr [[SRC:%.*]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr float, ptr [[DST:%.*]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr float, ptr [[TMP13]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr [[TMP15]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr float, ptr [[TMP14]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD2:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr [[TMP16]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP17:%.*]] = fdiv [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD2]] +; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0( [[TMP17]], ptr [[TMP16]], i32 4, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP11]]) +; CHECK-NEXT: [[INDEX_NEXT3]] = add i64 [[INDEX1]], [[TMP6]] ; CHECK-NEXT: [[TMP18:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP19:%.*]] = extractelement [[TMP18]], i32 0 ; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] @@ -711,28 +711,28 @@ ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMAX]], [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 +; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[UMAX]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[UMAX]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i64 [[TMP9]], i64 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]]) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[SRC:%.*]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[DST:%.*]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP11]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP13]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP12]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD2:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP14]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP15:%.*]] = select [[ACTIVE_LANE_MASK]], [[WIDE_MASKED_LOAD2]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP16:%.*]] = udiv [[WIDE_MASKED_LOAD]], [[TMP15]] -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP16]], ptr [[TMP14]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) -; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 4 -; CHECK-NEXT: [[INDEX_NEXT3]] = add i64 [[INDEX1]], [[TMP18]] +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX1]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[SRC:%.*]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[DST:%.*]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP13]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP15]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP14]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD2:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP16]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP17:%.*]] = select [[ACTIVE_LANE_MASK]], [[WIDE_MASKED_LOAD2]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP18:%.*]] = udiv [[WIDE_MASKED_LOAD]], [[TMP17]] +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP18]], ptr [[TMP16]], i32 4, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP11]]) +; CHECK-NEXT: [[INDEX_NEXT3]] = add i64 [[INDEX1]], [[TMP6]] ; CHECK-NEXT: [[TMP19:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP20:%.*]] = extractelement [[TMP19]], i32 0 ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] @@ -786,18 +786,18 @@ ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[VAL:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX1]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i32 0 -; CHECK-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP6]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 -; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], [[TMP8]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX1]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i32 0 +; CHECK-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP8]], align 4 +; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], [[TMP5]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll @@ -23,42 +23,42 @@ ; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[INDEX]], -1 -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], [[N]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 3 -; CHECK-NEXT: [[TMP9:%.*]] = sub i64 1, [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP9]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP10]], align 8 -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP12:%.*]] = shl i64 [[TMP11]], 3 -; CHECK-NEXT: [[TMP13:%.*]] = sub i64 0, [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = sub i64 1, [[TMP12]] -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP13]] -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds double, ptr [[TMP15]], i64 [[TMP14]] -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP16]], align 8 -; CHECK-NEXT: [[TMP17:%.*]] = fadd [[WIDE_LOAD]], shufflevector ( insertelement ( poison, double 1.000000e+00, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP18:%.*]] = fadd [[WIDE_LOAD1]], shufflevector ( insertelement ( poison, double 1.000000e+00, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP21:%.*]] = shl i64 [[TMP20]], 3 -; CHECK-NEXT: [[TMP22:%.*]] = sub i64 1, [[TMP21]] -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds double, ptr [[TMP19]], i64 [[TMP22]] -; CHECK-NEXT: store [[TMP17]], ptr [[TMP23]], align 8 -; CHECK-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP25:%.*]] = shl i64 [[TMP24]], 3 -; CHECK-NEXT: [[TMP26:%.*]] = sub i64 0, [[TMP25]] -; CHECK-NEXT: [[TMP27:%.*]] = sub i64 1, [[TMP25]] -; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds double, ptr [[TMP19]], i64 [[TMP26]] -; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds double, ptr [[TMP28]], i64 [[TMP27]] -; CHECK-NEXT: store [[TMP18]], ptr [[TMP29]], align 8 -; CHECK-NEXT: [[TMP30:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP31:%.*]] = shl i64 [[TMP30]], 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP31]] +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[INDEX]], -1 +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], [[N]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP10:%.*]] = shl i64 [[TMP9]], 3 +; CHECK-NEXT: [[TMP11:%.*]] = sub i64 1, [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds double, ptr [[TMP8]], i64 [[TMP11]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP12]], align 8 +; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP14:%.*]] = shl i64 [[TMP13]], 3 +; CHECK-NEXT: [[TMP15:%.*]] = sub i64 0, [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = sub i64 1, [[TMP14]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, ptr [[TMP8]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, ptr [[TMP17]], i64 [[TMP16]] +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP18]], align 8 +; CHECK-NEXT: [[TMP19:%.*]] = fadd [[WIDE_LOAD]], shufflevector ( insertelement ( poison, double 1.000000e+00, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP20:%.*]] = fadd [[WIDE_LOAD1]], shufflevector ( insertelement ( poison, double 1.000000e+00, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP23:%.*]] = shl i64 [[TMP22]], 3 +; CHECK-NEXT: [[TMP24:%.*]] = sub i64 1, [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds double, ptr [[TMP21]], i64 [[TMP24]] +; CHECK-NEXT: store [[TMP19]], ptr [[TMP25]], align 8 +; CHECK-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP27:%.*]] = shl i64 [[TMP26]], 3 +; CHECK-NEXT: [[TMP28:%.*]] = sub i64 0, [[TMP27]] +; CHECK-NEXT: [[TMP29:%.*]] = sub i64 1, [[TMP27]] +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds double, ptr [[TMP21]], i64 [[TMP28]] +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds double, ptr [[TMP30]], i64 [[TMP29]] +; CHECK-NEXT: store [[TMP20]], ptr [[TMP31]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -128,42 +128,42 @@ ; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[TMP8]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP9]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP11:%.*]] = shl i64 [[TMP10]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[INDEX]], -1 -; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], [[N]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP14:%.*]] = shl i64 [[TMP13]], 3 -; CHECK-NEXT: [[TMP15:%.*]] = sub i64 1, [[TMP14]] -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP15]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP16]], align 8 -; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP18:%.*]] = shl i64 [[TMP17]], 3 -; CHECK-NEXT: [[TMP19:%.*]] = sub i64 0, [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = sub i64 1, [[TMP18]] -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP19]] -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i64 [[TMP20]] -; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP22]], align 8 -; CHECK-NEXT: [[TMP23:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP24:%.*]] = add [[WIDE_LOAD3]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]] -; CHECK-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP27:%.*]] = shl i64 [[TMP26]], 3 -; CHECK-NEXT: [[TMP28:%.*]] = sub i64 1, [[TMP27]] -; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[TMP25]], i64 [[TMP28]] -; CHECK-NEXT: store [[TMP23]], ptr [[TMP29]], align 8 -; CHECK-NEXT: [[TMP30:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP31:%.*]] = shl i64 [[TMP30]], 3 -; CHECK-NEXT: [[TMP32:%.*]] = sub i64 0, [[TMP31]] -; CHECK-NEXT: [[TMP33:%.*]] = sub i64 1, [[TMP31]] -; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i64, ptr [[TMP25]], i64 [[TMP32]] -; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i64, ptr [[TMP34]], i64 [[TMP33]] -; CHECK-NEXT: store [[TMP24]], ptr [[TMP35]], align 8 -; CHECK-NEXT: [[TMP36:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP37:%.*]] = shl i64 [[TMP36]], 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP37]] +; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[INDEX]], -1 +; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP12]], [[N]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP16:%.*]] = shl i64 [[TMP15]], 3 +; CHECK-NEXT: [[TMP17:%.*]] = sub i64 1, [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[TMP14]], i64 [[TMP17]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP18]], align 8 +; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP20:%.*]] = shl i64 [[TMP19]], 3 +; CHECK-NEXT: [[TMP21:%.*]] = sub i64 0, [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = sub i64 1, [[TMP20]] +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[TMP14]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[TMP23]], i64 [[TMP22]] +; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP24]], align 8 +; CHECK-NEXT: [[TMP25:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP26:%.*]] = add [[WIDE_LOAD3]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP13]] +; CHECK-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP29:%.*]] = shl i64 [[TMP28]], 3 +; CHECK-NEXT: [[TMP30:%.*]] = sub i64 1, [[TMP29]] +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[TMP27]], i64 [[TMP30]] +; CHECK-NEXT: store [[TMP25]], ptr [[TMP31]], align 8 +; CHECK-NEXT: [[TMP32:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP33:%.*]] = shl i64 [[TMP32]], 3 +; CHECK-NEXT: [[TMP34:%.*]] = sub i64 0, [[TMP33]] +; CHECK-NEXT: [[TMP35:%.*]] = sub i64 1, [[TMP33]] +; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds i64, ptr [[TMP27]], i64 [[TMP34]] +; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds i64, ptr [[TMP36]], i64 [[TMP35]] +; CHECK-NEXT: store [[TMP26]], ptr [[TMP37]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] ; CHECK-NEXT: [[TMP38:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP38]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll @@ -27,43 +27,43 @@ ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[DOTNEG]], [[N]] ; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[N_VEC]], 3 ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[C:%.*]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 3 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[INDEX]], 3 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[C]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 5 -; CHECK-NEXT: [[TMP7:%.*]] = shl i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[TMP6]], [[TMP7]] -; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[C]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[INDEX]], 3 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[C]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 5 +; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[C]], i64 [[TMP10]] ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[NEXT_GEP]], align 4 ; CHECK-NEXT: [[WIDE_VEC3:%.*]] = load , ptr [[NEXT_GEP2]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.experimental.vector.deinterleave2.nxv8i32( [[WIDE_VEC]]) -; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 -; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 ; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = call { , } @llvm.experimental.vector.deinterleave2.nxv8i32( [[WIDE_VEC3]]) -; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , } [[STRIDED_VEC4]], 0 -; CHECK-NEXT: [[TMP12:%.*]] = extractvalue { , } [[STRIDED_VEC4]], 1 -; CHECK-NEXT: [[TMP13:%.*]] = add nsw [[TMP9]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP14:%.*]] = add nsw [[TMP11]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: store [[TMP13]], ptr [[TMP15]], align 4 -; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i64 [[TMP16]], 2 -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 [[TMP17]] -; CHECK-NEXT: store [[TMP14]], ptr [[TMP18]], align 4 -; CHECK-NEXT: [[TMP19:%.*]] = add nsw [[TMP10]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP20:%.*]] = add nsw [[TMP12]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: store [[TMP19]], ptr [[TMP21]], align 4 -; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP23:%.*]] = shl nuw nsw i64 [[TMP22]], 2 -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 [[TMP23]] -; CHECK-NEXT: store [[TMP20]], ptr [[TMP24]], align 4 -; CHECK-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP26:%.*]] = shl nuw nsw i64 [[TMP25]], 3 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP26]] +; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , } [[STRIDED_VEC4]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { , } [[STRIDED_VEC4]], 1 +; CHECK-NEXT: [[TMP15:%.*]] = add nsw [[TMP11]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP16:%.*]] = add nsw [[TMP13]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: store [[TMP15]], ptr [[TMP17]], align 4 +; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP19:%.*]] = shl nuw nsw i64 [[TMP18]], 2 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 [[TMP19]] +; CHECK-NEXT: store [[TMP16]], ptr [[TMP20]], align 4 +; CHECK-NEXT: [[TMP21:%.*]] = add nsw [[TMP12]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP22:%.*]] = add nsw [[TMP14]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: store [[TMP21]], ptr [[TMP23]], align 4 +; CHECK-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP25:%.*]] = shl nuw nsw i64 [[TMP24]], 2 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i64 [[TMP25]] +; CHECK-NEXT: store [[TMP22]], ptr [[TMP26]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -143,28 +143,28 @@ ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[N_VEC]], 2 ; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 3 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[INDEX]], 2 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[INDEX]], 2 -; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = shl i64 [[INDEX]], 2 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[INDEX]], 2 +; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP8]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[NEXT_GEP]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i64 [[TMP8]] -; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load , ptr [[TMP9]], align 4 -; CHECK-NEXT: [[TMP10:%.*]] = shl nsw [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP11:%.*]] = shl nsw [[WIDE_LOAD7]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: store [[TMP10]], ptr [[NEXT_GEP5]], align 4 -; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP13:%.*]] = shl nuw nsw i64 [[TMP12]], 2 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[NEXT_GEP5]], i64 [[TMP13]] -; CHECK-NEXT: store [[TMP11]], ptr [[TMP14]], align 4 -; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = shl nuw nsw i64 [[TMP15]], 3 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]] +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw i64 [[TMP9]], 2 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i64 [[TMP10]] +; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load , ptr [[TMP11]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = shl nsw [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP13:%.*]] = shl nsw [[WIDE_LOAD7]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: store [[TMP12]], ptr [[NEXT_GEP5]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP14]], 2 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[NEXT_GEP5]], i64 [[TMP15]] +; CHECK-NEXT: store [[TMP13]], ptr [[TMP16]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: @@ -234,30 +234,30 @@ ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[N_VEC]], 3 ; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 1 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 3 -; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.experimental.stepvector.nxv2i64() -; CHECK-NEXT: [[VECTOR_GEP:%.*]] = shl [[TMP7]], shufflevector ( insertelement ( poison, i64 2, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[VECTOR_GEP]] -; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[INDEX]], 3 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP9]] -; CHECK-NEXT: [[TMP10:%.*]] = extractelement [[TMP8]], i64 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP10]], align 8 -; CHECK-NEXT: [[TMP11]] = add [[WIDE_LOAD]], [[VEC_PHI]] -; CHECK-NEXT: store [[TMP8]], ptr [[NEXT_GEP]], align 8 -; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP13:%.*]] = shl nuw nsw i64 [[TMP12]], 1 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]] -; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP6]] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 3 +; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; CHECK-NEXT: [[VECTOR_GEP:%.*]] = shl [[TMP9]], shufflevector ( insertelement ( poison, i64 2, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[VECTOR_GEP]] +; CHECK-NEXT: [[TMP11:%.*]] = shl i64 [[INDEX]], 3 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement [[TMP10]], i64 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP12]], align 8 +; CHECK-NEXT: [[TMP13]] = add [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-NEXT: store [[TMP10]], ptr [[NEXT_GEP]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] +; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.nxv2i32( [[TMP11]]) +; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.nxv2i32( [[TMP13]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -310,22 +310,22 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 1 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[PTR:%.*]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.experimental.stepvector.nxv2i64() -; CHECK-NEXT: [[VECTOR_GEP:%.*]] = shl [[TMP2]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[VECTOR_GEP]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp ne [[TMP3]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = extractelement [[TMP3]], i64 0 -; CHECK-NEXT: call void @llvm.masked.store.nxv2i16.p0( zeroinitializer, ptr [[TMP5]], i32 2, [[TMP4]]) -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 1 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] -; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; CHECK-NEXT: [[VECTOR_GEP:%.*]] = shl [[TMP4]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[VECTOR_GEP]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = extractelement [[TMP5]], i64 0 +; CHECK-NEXT: call void @llvm.masked.store.nxv2i16.p0( zeroinitializer, ptr [[TMP7]], i32 2, [[TMP6]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] +; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll b/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll @@ -22,18 +22,18 @@ ; NONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; NONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX]], [[TMP3]] ; NONE-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX]], [[N_MOD_VF]] +; NONE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; NONE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; NONE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[VAL:%.*]], i64 0 ; NONE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; NONE-NEXT: br label [[VECTOR_BODY:%.*]] ; NONE: vector.body: ; NONE-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] -; NONE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX1]], 0 -; NONE-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP4]] -; NONE-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i32 0 -; NONE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP6]], align 4 -; NONE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; NONE-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 -; NONE-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], [[TMP8]] +; NONE-NEXT: [[TMP6:%.*]] = add i64 [[INDEX1]], 0 +; NONE-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP6]] +; NONE-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i32 0 +; NONE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP8]], align 4 +; NONE-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], [[TMP5]] ; NONE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]] ; NONE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; NONE: middle.block: @@ -69,19 +69,19 @@ ; DATA-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP8]] ; DATA-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] ; DATA-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; DATA-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; DATA-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 ; DATA-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[VAL:%.*]], i64 0 ; DATA-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; DATA-NEXT: br label [[VECTOR_BODY:%.*]] ; DATA: vector.body: ; DATA-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] -; DATA-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], 0 -; DATA-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP9]], i64 [[UMAX]]) -; DATA-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP9]] -; DATA-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 0 -; DATA-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP11]], i32 4, [[ACTIVE_LANE_MASK]]) -; DATA-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; DATA-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 -; DATA-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP13]] +; DATA-NEXT: [[TMP11:%.*]] = add i64 [[INDEX1]], 0 +; DATA-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP11]], i64 [[UMAX]]) +; DATA-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP11]] +; DATA-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP12]], i32 0 +; DATA-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP13]], i32 4, [[ACTIVE_LANE_MASK]]) +; DATA-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP10]] ; DATA-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]] ; DATA-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; DATA: middle.block: @@ -119,24 +119,24 @@ ; DATA_NO_LANEMASK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[UMAX]], 1 ; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 ; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; DATA_NO_LANEMASK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; DATA_NO_LANEMASK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 ; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement poison, i32 [[VAL:%.*]], i64 0 ; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector [[BROADCAST_SPLATINSERT4]], poison, zeroinitializer ; DATA_NO_LANEMASK-NEXT: br label [[VECTOR_BODY:%.*]] ; DATA_NO_LANEMASK: vector.body: ; DATA_NO_LANEMASK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VECTOR_BODY]] ] -; DATA_NO_LANEMASK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], 0 +; DATA_NO_LANEMASK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX1]], 0 ; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, i64 [[INDEX1]], i64 0 ; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector [[BROADCAST_SPLATINSERT2]], poison, zeroinitializer -; DATA_NO_LANEMASK-NEXT: [[TMP10:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; DATA_NO_LANEMASK-NEXT: [[TMP11:%.*]] = add zeroinitializer, [[TMP10]] -; DATA_NO_LANEMASK-NEXT: [[VEC_IV:%.*]] = add [[BROADCAST_SPLAT3]], [[TMP11]] -; DATA_NO_LANEMASK-NEXT: [[TMP12:%.*]] = icmp ule [[VEC_IV]], [[BROADCAST_SPLAT]] -; DATA_NO_LANEMASK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP9]] -; DATA_NO_LANEMASK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP13]], i32 0 -; DATA_NO_LANEMASK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT5]], ptr [[TMP14]], i32 4, [[TMP12]]) -; DATA_NO_LANEMASK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; DATA_NO_LANEMASK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 -; DATA_NO_LANEMASK-NEXT: [[INDEX_NEXT6]] = add i64 [[INDEX1]], [[TMP16]] +; DATA_NO_LANEMASK-NEXT: [[TMP12:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; DATA_NO_LANEMASK-NEXT: [[TMP13:%.*]] = add zeroinitializer, [[TMP12]] +; DATA_NO_LANEMASK-NEXT: [[VEC_IV:%.*]] = add [[BROADCAST_SPLAT3]], [[TMP13]] +; DATA_NO_LANEMASK-NEXT: [[TMP14:%.*]] = icmp ule [[VEC_IV]], [[BROADCAST_SPLAT]] +; DATA_NO_LANEMASK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP11]] +; DATA_NO_LANEMASK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP15]], i32 0 +; DATA_NO_LANEMASK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT5]], ptr [[TMP16]], i32 4, [[TMP14]]) +; DATA_NO_LANEMASK-NEXT: [[INDEX_NEXT6]] = add i64 [[INDEX1]], [[TMP10]] ; DATA_NO_LANEMASK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC]] ; DATA_NO_LANEMASK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; DATA_NO_LANEMASK: middle.block: @@ -171,6 +171,8 @@ ; DATA_AND_CONTROL-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP8]] ; DATA_AND_CONTROL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] ; DATA_AND_CONTROL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; DATA_AND_CONTROL-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; DATA_AND_CONTROL-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 ; DATA_AND_CONTROL-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]]) ; DATA_AND_CONTROL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[VAL:%.*]], i64 0 ; DATA_AND_CONTROL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer @@ -178,13 +180,11 @@ ; DATA_AND_CONTROL: vector.body: ; DATA_AND_CONTROL-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] ; DATA_AND_CONTROL-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; DATA_AND_CONTROL-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], 0 -; DATA_AND_CONTROL-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP9]] -; DATA_AND_CONTROL-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 0 -; DATA_AND_CONTROL-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP11]], i32 4, [[ACTIVE_LANE_MASK]]) -; DATA_AND_CONTROL-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; DATA_AND_CONTROL-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 -; DATA_AND_CONTROL-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP13]] +; DATA_AND_CONTROL-NEXT: [[TMP11:%.*]] = add i64 [[INDEX1]], 0 +; DATA_AND_CONTROL-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP11]] +; DATA_AND_CONTROL-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP12]], i32 0 +; DATA_AND_CONTROL-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP13]], i32 4, [[ACTIVE_LANE_MASK]]) +; DATA_AND_CONTROL-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP10]] ; DATA_AND_CONTROL-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_NEXT2]], i64 [[UMAX]]) ; DATA_AND_CONTROL-NEXT: [[TMP14:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; DATA_AND_CONTROL-NEXT: [[TMP15:%.*]] = extractelement [[TMP14]], i32 0 @@ -219,9 +219,11 @@ ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMAX]], [[TMP6]] -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[UMAX]], [[TMP8]] +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[UMAX]], [[TMP8]] +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i64 [[TMP9]], i64 0 ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]]) ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[VAL:%.*]], i64 0 ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer @@ -229,14 +231,12 @@ ; DATA_AND_CONTROL_NO_RT_CHECK: vector.body: ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], 0 -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP10]] -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP11]], i32 0 -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP12]], i32 4, [[ACTIVE_LANE_MASK]]) -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 4 -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP14]] +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX1]], 0 +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP12]] +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP13]], i32 0 +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP14]], i32 4, [[ACTIVE_LANE_MASK]]) +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP11]]) +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP6]] ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP15:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP16:%.*]] = extractelement [[TMP15]], i32 0 ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll @@ -32,18 +32,18 @@ ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP6]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 8 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 1 -; CHECK-NEXT: [[TMP8:%.*]] = zext [[WIDE_LOAD]] to -; CHECK-NEXT: [[TMP9:%.*]] = add [[TMP8]], trunc ( shufflevector ( insertelement ( poison, i32 2, i64 0), poison, zeroinitializer) to ) -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDEX]] -; CHECK-NEXT: store [[TMP9]], ptr [[TMP10]], align 2 -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP10:%.*]] = zext [[WIDE_LOAD]] to +; CHECK-NEXT: [[TMP11:%.*]] = add [[TMP10]], trunc ( shufflevector ( insertelement ( poison, i32 2, i64 0), poison, zeroinitializer) to ) +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDEX]] +; CHECK-NEXT: store [[TMP11]], ptr [[TMP12]], align 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -115,18 +115,18 @@ ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP6]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 8 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 1 -; CHECK-NEXT: [[TMP8:%.*]] = sext [[WIDE_LOAD]] to -; CHECK-NEXT: [[TMP9:%.*]] = add [[TMP8]], trunc ( shufflevector ( insertelement ( poison, i32 2, i64 0), poison, zeroinitializer) to ) -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDEX]] -; CHECK-NEXT: store [[TMP9]], ptr [[TMP10]], align 2 -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP10:%.*]] = sext [[WIDE_LOAD]] to +; CHECK-NEXT: [[TMP11:%.*]] = add [[TMP10]], trunc ( shufflevector ( insertelement ( poison, i32 2, i64 0), poison, zeroinitializer) to ) +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDEX]] +; CHECK-NEXT: store [[TMP11]], ptr [[TMP12]], align 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll b/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll --- a/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll @@ -22,20 +22,20 @@ ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = add [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: store [[TMP7]], ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = add [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: store [[TMP9]], ptr [[TMP8]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -85,22 +85,22 @@ ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7]] = add [[VEC_PHI]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 8 +; CHECK-NEXT: [[TMP9]] = add [[VEC_PHI]], [[WIDE_LOAD]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64( [[TMP7]]) +; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64( [[TMP9]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll --- a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll @@ -19,20 +19,20 @@ ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = udiv [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: store [[TMP7]], ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = udiv [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: store [[TMP9]], ptr [[TMP8]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -124,20 +124,20 @@ ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = sdiv [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: store [[TMP7]], ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = sdiv [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: store [[TMP9]], ptr [[TMP8]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -229,20 +229,20 @@ ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = urem [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: store [[TMP7]], ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = urem [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: store [[TMP9]], ptr [[TMP8]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: @@ -334,20 +334,20 @@ ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = srem [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: store [[TMP7]], ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = srem [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: store [[TMP9]], ptr [[TMP8]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: @@ -439,24 +439,24 @@ ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = icmp ne [[BROADCAST_SPLAT]], zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = select [[TMP7]], [[BROADCAST_SPLAT]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP9:%.*]] = udiv [[WIDE_LOAD]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = xor [[TMP7]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP7]], [[TMP9]], [[WIDE_LOAD]] -; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne [[BROADCAST_SPLAT]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = select [[TMP9]], [[BROADCAST_SPLAT]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP11:%.*]] = udiv [[WIDE_LOAD]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = xor [[TMP9]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP9]], [[TMP11]], [[WIDE_LOAD]] +; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP8]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: @@ -574,24 +574,24 @@ ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = icmp ne [[BROADCAST_SPLAT]], zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = select [[TMP7]], [[BROADCAST_SPLAT]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP9:%.*]] = sdiv [[WIDE_LOAD]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = xor [[TMP7]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP7]], [[TMP9]], [[WIDE_LOAD]] -; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne [[BROADCAST_SPLAT]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = select [[TMP9]], [[BROADCAST_SPLAT]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP11:%.*]] = sdiv [[WIDE_LOAD]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = xor [[TMP9]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP9]], [[TMP11]], [[WIDE_LOAD]] +; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP8]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: @@ -709,21 +709,21 @@ ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = icmp ne [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 42, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP8:%.*]] = udiv [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 27, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP9:%.*]] = xor [[TMP7]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP7]], [[TMP8]], [[WIDE_LOAD]] -; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 42, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP10:%.*]] = udiv [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 27, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP11:%.*]] = xor [[TMP9]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP9]], [[TMP10]], [[WIDE_LOAD]] +; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP8]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: @@ -837,21 +837,21 @@ ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = icmp ne [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 42, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP8:%.*]] = sdiv [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 27, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP9:%.*]] = xor [[TMP7]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP7]], [[TMP8]], [[WIDE_LOAD]] -; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 42, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP10:%.*]] = sdiv [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 27, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP11:%.*]] = xor [[TMP9]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP9]], [[TMP10]], [[WIDE_LOAD]] +; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP8]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: @@ -965,22 +965,22 @@ ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 16 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 16 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 1 -; CHECK-NEXT: [[TMP7:%.*]] = icmp ne [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i8 -128, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP8:%.*]] = select [[TMP7]], shufflevector ( insertelement ( poison, i8 -1, i64 0), poison, zeroinitializer), shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP9:%.*]] = sdiv [[WIDE_LOAD]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = xor [[TMP7]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP7]], [[TMP9]], [[WIDE_LOAD]] -; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP6]], align 1 -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 16 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i8 -128, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP10:%.*]] = select [[TMP9]], shufflevector ( insertelement ( poison, i8 -1, i64 0), poison, zeroinitializer), shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP11:%.*]] = sdiv [[WIDE_LOAD]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = xor [[TMP9]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP9]], [[TMP11]], [[WIDE_LOAD]] +; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP8]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll --- a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll @@ -21,23 +21,23 @@ ; OUTLOOP-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 4 ; OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], [[TMP3]] ; OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] +; OUTLOOP-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() +; OUTLOOP-NEXT: [[TMP5:%.*]] = mul i32 [[TMP4]], 4 ; OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; OUTLOOP: vector.body: ; OUTLOOP-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] -; OUTLOOP-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 -; OUTLOOP-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[TMP4]] -; OUTLOOP-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0 -; OUTLOOP-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 2 -; OUTLOOP-NEXT: [[TMP7:%.*]] = sext [[WIDE_LOAD]] to -; OUTLOOP-NEXT: [[TMP8]] = add [[VEC_PHI]], [[TMP7]] -; OUTLOOP-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() -; OUTLOOP-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], 4 -; OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP10]] +; OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] +; OUTLOOP-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 0 +; OUTLOOP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[TMP6]] +; OUTLOOP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP7]], i32 0 +; OUTLOOP-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 2 +; OUTLOOP-NEXT: [[TMP9:%.*]] = sext [[WIDE_LOAD]] to +; OUTLOOP-NEXT: [[TMP10]] = add [[VEC_PHI]], [[TMP9]] +; OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]] ; OUTLOOP-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; OUTLOOP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; OUTLOOP: middle.block: -; OUTLOOP-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP8]]) +; OUTLOOP-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP10]]) ; OUTLOOP-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; OUTLOOP-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; OUTLOOP: scalar.ph: @@ -75,20 +75,20 @@ ; INLOOP-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 8 ; INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], [[TMP3]] ; INLOOP-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] +; INLOOP-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() +; INLOOP-NEXT: [[TMP5:%.*]] = mul i32 [[TMP4]], 8 ; INLOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; INLOOP: vector.body: ; INLOOP-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] -; INLOOP-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 -; INLOOP-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[TMP4]] -; INLOOP-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0 -; INLOOP-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 2 -; INLOOP-NEXT: [[TMP7:%.*]] = sext [[WIDE_LOAD]] to -; INLOOP-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.nxv8i32( [[TMP7]]) -; INLOOP-NEXT: [[TMP9]] = add i32 [[TMP8]], [[VEC_PHI]] -; INLOOP-NEXT: [[TMP10:%.*]] = call i32 @llvm.vscale.i32() -; INLOOP-NEXT: [[TMP11:%.*]] = mul i32 [[TMP10]], 8 -; INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP11]] +; INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] +; INLOOP-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 0 +; INLOOP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[TMP6]] +; INLOOP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP7]], i32 0 +; INLOOP-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 2 +; INLOOP-NEXT: [[TMP9:%.*]] = sext [[WIDE_LOAD]] to +; INLOOP-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.nxv8i32( [[TMP9]]) +; INLOOP-NEXT: [[TMP11]] = add i32 [[TMP10]], [[VEC_PHI]] +; INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]] ; INLOOP-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; INLOOP-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; INLOOP: middle.block: @@ -96,7 +96,7 @@ ; INLOOP-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; INLOOP: scalar.ph: ; INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] +; INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] ; INLOOP-NEXT: br label [[FOR_BODY:%.*]] ; INLOOP: for.body: ; INLOOP-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -109,7 +109,7 @@ ; INLOOP-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] ; INLOOP-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; INLOOP: for.cond.cleanup.loopexit: -; INLOOP-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] +; INLOOP-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] ; INLOOP-NEXT: br label [[FOR_COND_CLEANUP]] ; INLOOP: for.cond.cleanup: ; INLOOP-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll b/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll --- a/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll @@ -15,17 +15,17 @@ ; LMUL1-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() ; LMUL1-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] ; LMUL1-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; LMUL1-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; LMUL1-NEXT: br label [[VECTOR_BODY:%.*]] ; LMUL1: vector.body: ; LMUL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; LMUL1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; LMUL1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[TMP2]] -; LMUL1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; LMUL1-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 4 -; LMUL1-NEXT: [[TMP5:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; LMUL1-NEXT: store [[TMP5]], ptr [[TMP4]], align 4 -; LMUL1-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; LMUL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] +; LMUL1-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 +; LMUL1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[TMP3]] +; LMUL1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 +; LMUL1-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP5]], align 4 +; LMUL1-NEXT: [[TMP6:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; LMUL1-NEXT: store [[TMP6]], ptr [[TMP5]], align 4 +; LMUL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP2]] ; LMUL1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; LMUL1-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; LMUL1: middle.block: @@ -57,18 +57,18 @@ ; LMUL2-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; LMUL2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; LMUL2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; LMUL2-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; LMUL2-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; LMUL2-NEXT: br label [[VECTOR_BODY:%.*]] ; LMUL2: vector.body: ; LMUL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; LMUL2-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; LMUL2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[TMP4]] -; LMUL2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 -; LMUL2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 -; LMUL2-NEXT: [[TMP7:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; LMUL2-NEXT: store [[TMP7]], ptr [[TMP6]], align 4 -; LMUL2-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; LMUL2-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 -; LMUL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; LMUL2-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; LMUL2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[TMP6]] +; LMUL2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 +; LMUL2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 +; LMUL2-NEXT: [[TMP9:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; LMUL2-NEXT: store [[TMP9]], ptr [[TMP8]], align 4 +; LMUL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; LMUL2-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; LMUL2-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; LMUL2: middle.block: @@ -100,18 +100,18 @@ ; LMUL4-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; LMUL4-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; LMUL4-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; LMUL4-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; LMUL4-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; LMUL4-NEXT: br label [[VECTOR_BODY:%.*]] ; LMUL4: vector.body: ; LMUL4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; LMUL4-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; LMUL4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[TMP4]] -; LMUL4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 -; LMUL4-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 -; LMUL4-NEXT: [[TMP7:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; LMUL4-NEXT: store [[TMP7]], ptr [[TMP6]], align 4 -; LMUL4-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; LMUL4-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 -; LMUL4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; LMUL4-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; LMUL4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[TMP6]] +; LMUL4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 +; LMUL4-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 +; LMUL4-NEXT: [[TMP9:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; LMUL4-NEXT: store [[TMP9]], ptr [[TMP8]], align 4 +; LMUL4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; LMUL4-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; LMUL4-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; LMUL4: middle.block: @@ -143,18 +143,18 @@ ; LMUL8-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8 ; LMUL8-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; LMUL8-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; LMUL8-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; LMUL8-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 ; LMUL8-NEXT: br label [[VECTOR_BODY:%.*]] ; LMUL8: vector.body: ; LMUL8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; LMUL8-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; LMUL8-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[TMP4]] -; LMUL8-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 -; LMUL8-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 -; LMUL8-NEXT: [[TMP7:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; LMUL8-NEXT: store [[TMP7]], ptr [[TMP6]], align 4 -; LMUL8-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; LMUL8-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 8 -; LMUL8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; LMUL8-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; LMUL8-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[TMP6]] +; LMUL8-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 +; LMUL8-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 +; LMUL8-NEXT: [[TMP9:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; LMUL8-NEXT: store [[TMP9]], ptr [[TMP8]], align 4 +; LMUL8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; LMUL8-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; LMUL8-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; LMUL8: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll --- a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll @@ -16,23 +16,23 @@ ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 5, [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 16 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP5]], i64 5) -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv16i8.p0(ptr [[TMP7]], i32 1, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP8:%.*]] = shl [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call @llvm.masked.load.nxv16i8.p0(ptr [[TMP10]], i32 1, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP11:%.*]] = add [[TMP8]], [[WIDE_MASKED_LOAD1]] -; CHECK-NEXT: call void @llvm.masked.store.nxv16i8.p0( [[TMP11]], ptr [[TMP10]], i32 1, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 16 -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP13]] +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP7]], i64 5) +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv16i8.p0(ptr [[TMP9]], i32 1, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP10:%.*]] = shl [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call @llvm.masked.load.nxv16i8.p0(ptr [[TMP12]], i32 1, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP13:%.*]] = add [[TMP10]], [[WIDE_MASKED_LOAD1]] +; CHECK-NEXT: call void @llvm.masked.store.nxv16i8.p0( [[TMP13]], ptr [[TMP12]], i32 1, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll --- a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll @@ -20,14 +20,16 @@ ; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; VLENUNK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; VLENUNK-NEXT: [[TMP5:%.*]] = add [[TMP4]], zeroinitializer -; VLENUNK-NEXT: [[TMP6:%.*]] = mul [[TMP5]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; VLENUNK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP6]] -; VLENUNK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 -; VLENUNK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP8]] -; VLENUNK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; VLENUNK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 +; VLENUNK-NEXT: [[TMP6:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; VLENUNK-NEXT: [[TMP7:%.*]] = add [[TMP6]], zeroinitializer +; VLENUNK-NEXT: [[TMP8:%.*]] = mul [[TMP7]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; VLENUNK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] +; VLENUNK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 +; VLENUNK-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP10]] +; VLENUNK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 ; VLENUNK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[V:%.*]], i64 0 ; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer @@ -35,20 +37,18 @@ ; VLENUNK: vector.body: ; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VLENUNK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; VLENUNK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 -; VLENUNK-NEXT: [[TMP11:%.*]] = icmp ult [[VEC_IND]], shufflevector ( insertelement ( poison, i64 512, i64 0), poison, zeroinitializer) -; VLENUNK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[TMP10]] -; VLENUNK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP12]], i32 0 -; VLENUNK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP13]], i32 4, [[TMP11]], poison) -; VLENUNK-NEXT: [[TMP14:%.*]] = xor [[TMP11]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; VLENUNK-NEXT: [[PREDPHI:%.*]] = select [[TMP14]], zeroinitializer, [[WIDE_MASKED_LOAD]] -; VLENUNK-NEXT: [[TMP15:%.*]] = add [[PREDPHI]], [[BROADCAST_SPLAT]] -; VLENUNK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP10]] -; VLENUNK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0 -; VLENUNK-NEXT: store [[TMP15]], ptr [[TMP17]], align 4 -; VLENUNK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 4 -; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] +; VLENUNK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 0 +; VLENUNK-NEXT: [[TMP13:%.*]] = icmp ult [[VEC_IND]], shufflevector ( insertelement ( poison, i64 512, i64 0), poison, zeroinitializer) +; VLENUNK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[TMP12]] +; VLENUNK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP14]], i32 0 +; VLENUNK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP15]], i32 4, [[TMP13]], poison) +; VLENUNK-NEXT: [[TMP16:%.*]] = xor [[TMP13]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; VLENUNK-NEXT: [[PREDPHI:%.*]] = select [[TMP16]], zeroinitializer, [[WIDE_MASKED_LOAD]] +; VLENUNK-NEXT: [[TMP17:%.*]] = add [[PREDPHI]], [[BROADCAST_SPLAT]] +; VLENUNK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP12]] +; VLENUNK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 0 +; VLENUNK-NEXT: store [[TMP17]], ptr [[TMP19]], align 4 +; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; VLENUNK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; VLENUNK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VLENUNK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll --- a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll @@ -22,14 +22,14 @@ ; RV32-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 625, [[TMP2]] ; RV32-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; RV32: vector.memcheck: -; RV32-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 79880 -; RV32-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[TRIGGER:%.*]], i64 39940 -; RV32-NEXT: [[UGLYGEP2:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 159752 -; RV32-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[A]], [[UGLYGEP1]] -; RV32-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[TRIGGER]], [[UGLYGEP]] +; RV32-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 79880 +; RV32-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[TRIGGER:%.*]], i64 39940 +; RV32-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 159752 +; RV32-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[A]], [[SCEVGEP1]] +; RV32-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[TRIGGER]], [[SCEVGEP]] ; RV32-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; RV32-NEXT: [[BOUND03:%.*]] = icmp ult ptr [[A]], [[UGLYGEP2]] -; RV32-NEXT: [[BOUND14:%.*]] = icmp ult ptr [[B]], [[UGLYGEP]] +; RV32-NEXT: [[BOUND03:%.*]] = icmp ult ptr [[A]], [[SCEVGEP2]] +; RV32-NEXT: [[BOUND14:%.*]] = icmp ult ptr [[B]], [[SCEVGEP]] ; RV32-NEXT: [[FOUND_CONFLICT5:%.*]] = and i1 [[BOUND03]], [[BOUND14]] ; RV32-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]] ; RV32-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] @@ -39,32 +39,32 @@ ; RV32-NEXT: [[N_MOD_VF:%.*]] = urem i64 625, [[TMP4]] ; RV32-NEXT: [[N_VEC:%.*]] = sub i64 625, [[N_MOD_VF]] ; RV32-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 16 -; RV32-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv2i64() -; RV32-NEXT: [[TMP6:%.*]] = add [[TMP5]], zeroinitializer -; RV32-NEXT: [[TMP7:%.*]] = mul [[TMP6]], shufflevector ( insertelement ( poison, i64 16, i64 0), poison, zeroinitializer) -; RV32-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] -; RV32-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; RV32-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 -; RV32-NEXT: [[TMP10:%.*]] = mul i64 16, [[TMP9]] -; RV32-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP10]], i64 0 +; RV32-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; RV32-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 +; RV32-NEXT: [[TMP7:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; RV32-NEXT: [[TMP8:%.*]] = add [[TMP7]], zeroinitializer +; RV32-NEXT: [[TMP9:%.*]] = mul [[TMP8]], shufflevector ( insertelement ( poison, i64 16, i64 0), poison, zeroinitializer) +; RV32-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP9]] +; RV32-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; RV32-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2 +; RV32-NEXT: [[TMP12:%.*]] = mul i64 16, [[TMP11]] +; RV32-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 ; RV32-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; RV32-NEXT: br label [[VECTOR_BODY:%.*]] ; RV32: vector.body: ; RV32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; RV32-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; RV32-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], [[VEC_IND]] -; RV32-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i32.nxv2p0( [[TMP11]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison), !alias.scope !0 -; RV32-NEXT: [[TMP12:%.*]] = icmp slt [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 100, i64 0), poison, zeroinitializer) -; RV32-NEXT: [[TMP13:%.*]] = shl nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; RV32-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[B]], [[TMP13]] -; RV32-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call @llvm.masked.gather.nxv2f64.nxv2p0( [[TMP14]], i32 8, [[TMP12]], poison), !alias.scope !3 -; RV32-NEXT: [[TMP15:%.*]] = sitofp [[WIDE_MASKED_GATHER]] to -; RV32-NEXT: [[TMP16:%.*]] = fadd [[WIDE_MASKED_GATHER6]], [[TMP15]] -; RV32-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, ptr [[A]], [[VEC_IND]] -; RV32-NEXT: call void @llvm.masked.scatter.nxv2f64.nxv2p0( [[TMP16]], [[TMP17]], i32 8, [[TMP12]]), !alias.scope !5, !noalias !7 -; RV32-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; RV32-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2 -; RV32-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] +; RV32-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], [[VEC_IND]] +; RV32-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i32.nxv2p0( [[TMP13]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison), !alias.scope !0 +; RV32-NEXT: [[TMP14:%.*]] = icmp slt [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 100, i64 0), poison, zeroinitializer) +; RV32-NEXT: [[TMP15:%.*]] = shl nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; RV32-NEXT: [[TMP16:%.*]] = getelementptr inbounds double, ptr [[B]], [[TMP15]] +; RV32-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call @llvm.masked.gather.nxv2f64.nxv2p0( [[TMP16]], i32 8, [[TMP14]], poison), !alias.scope !3 +; RV32-NEXT: [[TMP17:%.*]] = sitofp [[WIDE_MASKED_GATHER]] to +; RV32-NEXT: [[TMP18:%.*]] = fadd [[WIDE_MASKED_GATHER6]], [[TMP17]] +; RV32-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[A]], [[VEC_IND]] +; RV32-NEXT: call void @llvm.masked.scatter.nxv2f64.nxv2p0( [[TMP18]], [[TMP19]], i32 8, [[TMP14]]), !alias.scope !5, !noalias !7 +; RV32-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; RV32-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; RV32-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; RV32-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -104,14 +104,14 @@ ; RV64-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 625, [[TMP2]] ; RV64-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; RV64: vector.memcheck: -; RV64-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 79880 -; RV64-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[TRIGGER:%.*]], i64 39940 -; RV64-NEXT: [[UGLYGEP2:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 159752 -; RV64-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[A]], [[UGLYGEP1]] -; RV64-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[TRIGGER]], [[UGLYGEP]] +; RV64-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 79880 +; RV64-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[TRIGGER:%.*]], i64 39940 +; RV64-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 159752 +; RV64-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[A]], [[SCEVGEP1]] +; RV64-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[TRIGGER]], [[SCEVGEP]] ; RV64-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; RV64-NEXT: [[BOUND03:%.*]] = icmp ult ptr [[A]], [[UGLYGEP2]] -; RV64-NEXT: [[BOUND14:%.*]] = icmp ult ptr [[B]], [[UGLYGEP]] +; RV64-NEXT: [[BOUND03:%.*]] = icmp ult ptr [[A]], [[SCEVGEP2]] +; RV64-NEXT: [[BOUND14:%.*]] = icmp ult ptr [[B]], [[SCEVGEP]] ; RV64-NEXT: [[FOUND_CONFLICT5:%.*]] = and i1 [[BOUND03]], [[BOUND14]] ; RV64-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]] ; RV64-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] @@ -121,32 +121,32 @@ ; RV64-NEXT: [[N_MOD_VF:%.*]] = urem i64 625, [[TMP4]] ; RV64-NEXT: [[N_VEC:%.*]] = sub i64 625, [[N_MOD_VF]] ; RV64-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 16 -; RV64-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv2i64() -; RV64-NEXT: [[TMP6:%.*]] = add [[TMP5]], zeroinitializer -; RV64-NEXT: [[TMP7:%.*]] = mul [[TMP6]], shufflevector ( insertelement ( poison, i64 16, i64 0), poison, zeroinitializer) -; RV64-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] -; RV64-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; RV64-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 -; RV64-NEXT: [[TMP10:%.*]] = mul i64 16, [[TMP9]] -; RV64-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP10]], i64 0 +; RV64-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; RV64-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 +; RV64-NEXT: [[TMP7:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; RV64-NEXT: [[TMP8:%.*]] = add [[TMP7]], zeroinitializer +; RV64-NEXT: [[TMP9:%.*]] = mul [[TMP8]], shufflevector ( insertelement ( poison, i64 16, i64 0), poison, zeroinitializer) +; RV64-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP9]] +; RV64-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; RV64-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2 +; RV64-NEXT: [[TMP12:%.*]] = mul i64 16, [[TMP11]] +; RV64-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 ; RV64-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; RV64-NEXT: br label [[VECTOR_BODY:%.*]] ; RV64: vector.body: ; RV64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; RV64-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; RV64-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], [[VEC_IND]] -; RV64-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i32.nxv2p0( [[TMP11]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison), !alias.scope !0 -; RV64-NEXT: [[TMP12:%.*]] = icmp slt [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 100, i64 0), poison, zeroinitializer) -; RV64-NEXT: [[TMP13:%.*]] = shl nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; RV64-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[B]], [[TMP13]] -; RV64-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call @llvm.masked.gather.nxv2f64.nxv2p0( [[TMP14]], i32 8, [[TMP12]], poison), !alias.scope !3 -; RV64-NEXT: [[TMP15:%.*]] = sitofp [[WIDE_MASKED_GATHER]] to -; RV64-NEXT: [[TMP16:%.*]] = fadd [[WIDE_MASKED_GATHER6]], [[TMP15]] -; RV64-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, ptr [[A]], [[VEC_IND]] -; RV64-NEXT: call void @llvm.masked.scatter.nxv2f64.nxv2p0( [[TMP16]], [[TMP17]], i32 8, [[TMP12]]), !alias.scope !5, !noalias !7 -; RV64-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; RV64-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2 -; RV64-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] +; RV64-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], [[VEC_IND]] +; RV64-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i32.nxv2p0( [[TMP13]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison), !alias.scope !0 +; RV64-NEXT: [[TMP14:%.*]] = icmp slt [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 100, i64 0), poison, zeroinitializer) +; RV64-NEXT: [[TMP15:%.*]] = shl nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; RV64-NEXT: [[TMP16:%.*]] = getelementptr inbounds double, ptr [[B]], [[TMP15]] +; RV64-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call @llvm.masked.gather.nxv2f64.nxv2p0( [[TMP16]], i32 8, [[TMP14]], poison), !alias.scope !3 +; RV64-NEXT: [[TMP17:%.*]] = sitofp [[WIDE_MASKED_GATHER]] to +; RV64-NEXT: [[TMP18:%.*]] = fadd [[WIDE_MASKED_GATHER6]], [[TMP17]] +; RV64-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[A]], [[VEC_IND]] +; RV64-NEXT: call void @llvm.masked.scatter.nxv2f64.nxv2p0( [[TMP18]], [[TMP19]], i32 8, [[TMP14]]), !alias.scope !5, !noalias !7 +; RV64-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; RV64-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; RV64-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; RV64-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll b/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll --- a/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll @@ -19,20 +19,20 @@ ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 200, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 200, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 32 -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP4]], 200 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP8]], i32 0 -; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP9]], align 32 -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 32 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP6]], 200 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[TMP10]], i32 0 +; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP11]], align 32 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -143,20 +143,20 @@ ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 200, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 200, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 32 -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP4]], 8192 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP8]], i32 0 -; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP9]], align 32 -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 32 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP6]], 8192 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[TMP10]], i32 0 +; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP11]], align 32 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: @@ -209,20 +209,20 @@ ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 200, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 200, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 32 -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP4]], 1024 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP8]], i32 0 -; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP9]], align 32 -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 32 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP6]], 1024 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[TMP10]], i32 0 +; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP11]], align 32 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll --- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll @@ -19,21 +19,21 @@ ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1025) -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP8:%.*]] = add [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP8]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP7]], i64 1025) +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP10:%.*]] = add [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP10]], ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -85,21 +85,21 @@ ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1025) -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_MASKED_LOAD]] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[BROADCAST_SPLAT]], [[TMP8]], i32 8, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP7]], i64 1025) +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[BROADCAST_SPLAT]], [[TMP10]], i32 8, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -149,26 +149,26 @@ ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1025) -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_MASKED_LOAD]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP8]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP9]] = add [[VEC_PHI]], [[WIDE_MASKED_GATHER]] -; CHECK-NEXT: [[TMP10:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP9]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP12]] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP7]], i64 1025) +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP10]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP11]] = add [[VEC_PHI]], [[WIDE_MASKED_GATHER]] +; CHECK-NEXT: [[TMP12:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP11]], [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64( [[TMP10]]) +; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64( [[TMP12]]) ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -221,19 +221,19 @@ ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1025) -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 -; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP7]], i64 1025) +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 +; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: @@ -279,20 +279,20 @@ ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1025) +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP7]], i64 1025) ; CHECK-NEXT: store i64 [[V]], ptr [[B:%.*]], align 8 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 -; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 +; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: @@ -373,21 +373,21 @@ ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1024) -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP8:%.*]] = add [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP8]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP7]], i64 1024) +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP10:%.*]] = add [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP10]], ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll --- a/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll @@ -38,6 +38,8 @@ ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] +; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[X:%.*]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[Y:%.*]], i64 0 @@ -52,8 +54,6 @@ ; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 ; SCALABLE-NEXT: [[TMP8:%.*]] = icmp slt [[WIDE_LOAD]], [[BROADCAST_SPLAT]] ; SCALABLE-NEXT: [[TMP9]] = select [[TMP8]], [[VEC_PHI]], [[BROADCAST_SPLAT2]] -; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] ; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -113,6 +113,8 @@ ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] +; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, float [[X:%.*]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[Y:%.*]], i64 0 @@ -127,8 +129,6 @@ ; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 ; SCALABLE-NEXT: [[TMP8:%.*]] = fcmp fast olt [[WIDE_LOAD]], [[BROADCAST_SPLAT]] ; SCALABLE-NEXT: [[TMP9]] = select [[TMP8]], [[VEC_PHI]], [[BROADCAST_SPLAT2]] -; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] ; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -184,6 +184,8 @@ ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] +; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -194,8 +196,6 @@ ; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 ; SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) ; SCALABLE-NEXT: [[TMP9]] = select [[TMP8]], [[VEC_PHI]], shufflevector ( insertelement ( poison, i32 7, i64 0), poison, zeroinitializer) -; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] ; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -257,6 +257,8 @@ ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] +; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 ; SCALABLE-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement poison, i32 [[A:%.*]], i64 0 ; SCALABLE-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector [[MINMAX_IDENT_SPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[B:%.*]], i64 0 @@ -271,8 +273,6 @@ ; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 ; SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) ; SCALABLE-NEXT: [[TMP9]] = select [[TMP8]], [[VEC_PHI]], [[BROADCAST_SPLAT]] -; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] ; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -330,6 +330,8 @@ ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] +; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -340,8 +342,6 @@ ; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 ; SCALABLE-NEXT: [[TMP8:%.*]] = fcmp fast ueq [[WIDE_LOAD]], shufflevector ( insertelement ( poison, float 3.000000e+00, i64 0), poison, zeroinitializer) ; SCALABLE-NEXT: [[TMP9]] = select [[TMP8]], [[VEC_PHI]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] ; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -428,6 +428,8 @@ ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] +; SCALABLE-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -444,8 +446,6 @@ ; SCALABLE-NEXT: [[TMP13:%.*]] = select [[TMP12]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer), [[VEC_PHI]] ; SCALABLE-NEXT: [[TMP14:%.*]] = xor [[TMP8]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; SCALABLE-NEXT: [[PREDPHI]] = select [[TMP8]], [[TMP13]], [[VEC_PHI]] -; SCALABLE-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]] ; SCALABLE-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll --- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll @@ -17,27 +17,27 @@ ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i64 [[TMP3]], i64 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[TMP5]] -; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP7:%.*]] = add [[TMP6]], zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = mul [[TMP7]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 -; CHECK-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP10]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 +; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP9:%.*]] = add [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = mul [[TMP9]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 4 +; CHECK-NEXT: [[TMP13:%.*]] = mul i64 1, [[TMP12]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP13]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP12:%.*]] = mul nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i64 8, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[P:%.*]], [[TMP12]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP13]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP14:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP14]], [[TMP13]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]] +; CHECK-NEXT: [[TMP14:%.*]] = mul nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i64 8, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[P:%.*]], [[TMP14]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP15]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP16:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP16]], [[TMP15]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -90,26 +90,26 @@ ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 64 -; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP5:%.*]] = add [[TMP4]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = mul [[TMP5]], shufflevector ( insertelement ( poison, i64 64, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 64, [[TMP8]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP7:%.*]] = add [[TMP6]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = mul [[TMP7]], shufflevector ( insertelement ( poison, i64 64, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 +; CHECK-NEXT: [[TMP11:%.*]] = mul i64 64, [[TMP10]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[P:%.*]], [[VEC_IND]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP10]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP11:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP11]], [[TMP10]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[P:%.*]], [[VEC_IND]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP12]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP13:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP13]], [[TMP12]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -229,19 +229,19 @@ ; NOSTRIDED-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 ; NOSTRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]] ; NOSTRIDED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; NOSTRIDED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; NOSTRIDED-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; NOSTRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; NOSTRIDED: vector.body: ; NOSTRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; NOSTRIDED-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; NOSTRIDED-NEXT: [[TMP6:%.*]] = mul nuw nsw i64 [[TMP5]], 1 -; NOSTRIDED-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP6]] -; NOSTRIDED-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i32 0 -; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 -; NOSTRIDED-NEXT: [[TMP9:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; NOSTRIDED-NEXT: store [[TMP9]], ptr [[TMP8]], align 4 -; NOSTRIDED-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; NOSTRIDED-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 -; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] +; NOSTRIDED-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 +; NOSTRIDED-NEXT: [[TMP8:%.*]] = mul nuw nsw i64 [[TMP7]], 1 +; NOSTRIDED-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP8]] +; NOSTRIDED-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP9]], i32 0 +; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP10]], align 4 +; NOSTRIDED-NEXT: [[TMP11:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; NOSTRIDED-NEXT: store [[TMP11]], ptr [[TMP10]], align 4 +; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; NOSTRIDED-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NOSTRIDED-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; NOSTRIDED: middle.block: @@ -314,18 +314,18 @@ ; NOSTRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]] ; NOSTRIDED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; NOSTRIDED-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], [[STRIDE]] +; NOSTRIDED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; NOSTRIDED-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; NOSTRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; NOSTRIDED: vector.body: ; NOSTRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; NOSTRIDED-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; NOSTRIDED-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP5]] -; NOSTRIDED-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i32 0 -; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 4 -; NOSTRIDED-NEXT: [[TMP8:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; NOSTRIDED-NEXT: store [[TMP8]], ptr [[TMP7]], align 4 -; NOSTRIDED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; NOSTRIDED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 -; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] +; NOSTRIDED-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 +; NOSTRIDED-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP7]] +; NOSTRIDED-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[TMP8]], i32 0 +; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP9]], align 4 +; NOSTRIDED-NEXT: [[TMP10:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; NOSTRIDED-NEXT: store [[TMP10]], ptr [[TMP9]], align 4 +; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; NOSTRIDED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NOSTRIDED-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; NOSTRIDED: middle.block: @@ -446,21 +446,21 @@ ; NOSTRIDED-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 ; NOSTRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP8]] ; NOSTRIDED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; NOSTRIDED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; NOSTRIDED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 ; NOSTRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; NOSTRIDED: vector.body: ; NOSTRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; NOSTRIDED-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0 -; NOSTRIDED-NEXT: [[TMP10:%.*]] = mul nuw nsw i64 [[TMP9]], 1 -; NOSTRIDED-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP10]] -; NOSTRIDED-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP11]], i32 0 -; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP12]], align 4 -; NOSTRIDED-NEXT: [[TMP13:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; NOSTRIDED-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[P2]], i64 [[TMP10]] -; NOSTRIDED-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP14]], i32 0 -; NOSTRIDED-NEXT: store [[TMP13]], ptr [[TMP15]], align 4 -; NOSTRIDED-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; NOSTRIDED-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 4 -; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP17]] +; NOSTRIDED-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 0 +; NOSTRIDED-NEXT: [[TMP12:%.*]] = mul nuw nsw i64 [[TMP11]], 1 +; NOSTRIDED-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP12]] +; NOSTRIDED-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP13]], i32 0 +; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP14]], align 4 +; NOSTRIDED-NEXT: [[TMP15:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; NOSTRIDED-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[P2]], i64 [[TMP12]] +; NOSTRIDED-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP16]], i32 0 +; NOSTRIDED-NEXT: store [[TMP15]], ptr [[TMP17]], align 4 +; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] ; NOSTRIDED-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NOSTRIDED-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; NOSTRIDED: middle.block: @@ -513,14 +513,16 @@ ; STRIDED-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 ; STRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP9]] ; STRIDED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; STRIDED-NEXT: [[TMP10:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; STRIDED-NEXT: [[TMP11:%.*]] = add [[TMP10]], zeroinitializer -; STRIDED-NEXT: [[TMP12:%.*]] = mul [[TMP11]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; STRIDED-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP12]] -; STRIDED-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; STRIDED-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 4 -; STRIDED-NEXT: [[TMP15:%.*]] = mul i64 1, [[TMP14]] -; STRIDED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP15]], i64 0 +; STRIDED-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; STRIDED-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 +; STRIDED-NEXT: [[TMP12:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; STRIDED-NEXT: [[TMP13:%.*]] = add [[TMP12]], zeroinitializer +; STRIDED-NEXT: [[TMP14:%.*]] = mul [[TMP13]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; STRIDED-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP14]] +; STRIDED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; STRIDED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 +; STRIDED-NEXT: [[TMP17:%.*]] = mul i64 1, [[TMP16]] +; STRIDED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP17]], i64 0 ; STRIDED-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[STRIDE]], i64 0 ; STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer @@ -528,15 +530,13 @@ ; STRIDED: vector.body: ; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; STRIDED-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; STRIDED-NEXT: [[TMP16:%.*]] = mul nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT]] -; STRIDED-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[P]], [[TMP16]] -; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP17]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison), !alias.scope !8 -; STRIDED-NEXT: [[TMP18:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; STRIDED-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[P2]], [[TMP16]] -; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP18]], [[TMP19]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)), !alias.scope !11, !noalias !8 -; STRIDED-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; STRIDED-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 4 -; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP21]] +; STRIDED-NEXT: [[TMP18:%.*]] = mul nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT]] +; STRIDED-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[P]], [[TMP18]] +; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP19]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison), !alias.scope !8 +; STRIDED-NEXT: [[TMP20:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; STRIDED-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[P2]], [[TMP18]] +; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP20]], [[TMP21]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)), !alias.scope !11, !noalias !8 +; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] ; STRIDED-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; STRIDED-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; STRIDED-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] @@ -596,18 +596,18 @@ ; NOSTRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]] ; NOSTRIDED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; NOSTRIDED-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], [[STRIDE]] +; NOSTRIDED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; NOSTRIDED-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; NOSTRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; NOSTRIDED: vector.body: ; NOSTRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; NOSTRIDED-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; NOSTRIDED-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP5]] -; NOSTRIDED-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i32 0 -; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 4 -; NOSTRIDED-NEXT: [[TMP8:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; NOSTRIDED-NEXT: store [[TMP8]], ptr [[TMP7]], align 4 -; NOSTRIDED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; NOSTRIDED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 -; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] +; NOSTRIDED-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 +; NOSTRIDED-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP7]] +; NOSTRIDED-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[TMP8]], i32 0 +; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP9]], align 4 +; NOSTRIDED-NEXT: [[TMP10:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; NOSTRIDED-NEXT: store [[TMP10]], ptr [[TMP9]], align 4 +; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; NOSTRIDED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NOSTRIDED-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; NOSTRIDED: middle.block: @@ -724,43 +724,43 @@ ; STRIDED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP10]] ; STRIDED-NEXT: [[TMP11:%.*]] = mul i64 [[N_VEC]], [[STRIDE]] ; STRIDED-NEXT: [[IND_END7:%.*]] = getelementptr i8, ptr [[P2]], i64 [[TMP11]] +; STRIDED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; STRIDED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 ; STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; STRIDED: vector.body: ; STRIDED-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; STRIDED-NEXT: [[POINTER_PHI11:%.*]] = phi ptr [ [[P2]], [[VECTOR_PH]] ], [ [[PTR_IND12:%.*]], [[VECTOR_BODY]] ] ; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; STRIDED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; STRIDED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 -; STRIDED-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 1 -; STRIDED-NEXT: [[TMP15:%.*]] = mul i64 [[STRIDE]], [[TMP14]] -; STRIDED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP13]], 0 -; STRIDED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP16]], i64 0 +; STRIDED-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() +; STRIDED-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 4 +; STRIDED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 1 +; STRIDED-NEXT: [[TMP17:%.*]] = mul i64 [[STRIDE]], [[TMP16]] +; STRIDED-NEXT: [[TMP18:%.*]] = mul i64 [[TMP15]], 0 +; STRIDED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP18]], i64 0 ; STRIDED-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; STRIDED-NEXT: [[TMP17:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; STRIDED-NEXT: [[TMP18:%.*]] = add [[DOTSPLAT]], [[TMP17]] +; STRIDED-NEXT: [[TMP19:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; STRIDED-NEXT: [[TMP20:%.*]] = add [[DOTSPLAT]], [[TMP19]] ; STRIDED-NEXT: [[DOTSPLATINSERT9:%.*]] = insertelement poison, i64 [[STRIDE]], i64 0 ; STRIDED-NEXT: [[DOTSPLAT10:%.*]] = shufflevector [[DOTSPLATINSERT9]], poison, zeroinitializer -; STRIDED-NEXT: [[VECTOR_GEP:%.*]] = mul [[TMP18]], [[DOTSPLAT10]] -; STRIDED-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[VECTOR_GEP]] -; STRIDED-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; STRIDED-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 4 -; STRIDED-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 1 -; STRIDED-NEXT: [[TMP23:%.*]] = mul i64 [[STRIDE]], [[TMP22]] -; STRIDED-NEXT: [[TMP24:%.*]] = mul i64 [[TMP21]], 0 -; STRIDED-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement poison, i64 [[TMP24]], i64 0 +; STRIDED-NEXT: [[VECTOR_GEP:%.*]] = mul [[TMP20]], [[DOTSPLAT10]] +; STRIDED-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[VECTOR_GEP]] +; STRIDED-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() +; STRIDED-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 4 +; STRIDED-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 1 +; STRIDED-NEXT: [[TMP25:%.*]] = mul i64 [[STRIDE]], [[TMP24]] +; STRIDED-NEXT: [[TMP26:%.*]] = mul i64 [[TMP23]], 0 +; STRIDED-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement poison, i64 [[TMP26]], i64 0 ; STRIDED-NEXT: [[DOTSPLAT14:%.*]] = shufflevector [[DOTSPLATINSERT13]], poison, zeroinitializer -; STRIDED-NEXT: [[TMP25:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; STRIDED-NEXT: [[TMP26:%.*]] = add [[DOTSPLAT14]], [[TMP25]] -; STRIDED-NEXT: [[VECTOR_GEP17:%.*]] = mul [[TMP26]], [[DOTSPLAT10]] -; STRIDED-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[POINTER_PHI11]], [[VECTOR_GEP17]] -; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP19]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison), !alias.scope !15 -; STRIDED-NEXT: [[TMP28:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP28]], [[TMP27]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)), !alias.scope !18, !noalias !15 -; STRIDED-NEXT: [[TMP29:%.*]] = call i64 @llvm.vscale.i64() -; STRIDED-NEXT: [[TMP30:%.*]] = mul i64 [[TMP29]], 4 -; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP30]] -; STRIDED-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP15]] -; STRIDED-NEXT: [[PTR_IND12]] = getelementptr i8, ptr [[POINTER_PHI11]], i64 [[TMP23]] +; STRIDED-NEXT: [[TMP27:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; STRIDED-NEXT: [[TMP28:%.*]] = add [[DOTSPLAT14]], [[TMP27]] +; STRIDED-NEXT: [[VECTOR_GEP17:%.*]] = mul [[TMP28]], [[DOTSPLAT10]] +; STRIDED-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[POINTER_PHI11]], [[VECTOR_GEP17]] +; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP21]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison), !alias.scope !15 +; STRIDED-NEXT: [[TMP30:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP30]], [[TMP29]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)), !alias.scope !18, !noalias !15 +; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]] +; STRIDED-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP17]] +; STRIDED-NEXT: [[PTR_IND12]] = getelementptr i8, ptr [[POINTER_PHI11]], i64 [[TMP25]] ; STRIDED-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; STRIDED-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; STRIDED: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll @@ -1,89 +1,91 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt < %s -passes=loop-vectorize,dce,instcombine -force-vector-interleave=1 -force-vector-width=4 -prefer-inloop-reductions -S | FileCheck %s define float @cond_fadd(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %cond, i64 %N){ -; CHECK-LABEL: @cond_fadd( +; CHECK-LABEL: define float @cond_fadd +; CHECK-SAME: (ptr noalias nocapture readonly [[A:%.*]], ptr noalias nocapture readonly [[COND:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[TMP27:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[COND:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[COND]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = fcmp une <4 x float> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i64 0 -; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = fcmp une <4 x float> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i64 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[TMP4]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[TMP3]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> poison, float [[TMP4]], i64 0 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] ; CHECK: pred.load.continue: -; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x float> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1 -; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] +; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x float> [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP1]], i64 1 +; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] ; CHECK: pred.load.if1: -; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[TMP10]], align 4 -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP11]], i64 1 +; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP9]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP10]], i64 1 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] ; CHECK: pred.load.continue2: -; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x float> [ [[TMP7]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP2]], i64 2 -; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] +; CHECK-NEXT: [[TMP12:%.*]] = phi <4 x float> [ [[TMP6]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP11]], [[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP1]], i64 2 +; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] ; CHECK: pred.load.if3: -; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = load float, ptr [[TMP16]], align 4 -; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP17]], i64 2 +; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x float> [[TMP12]], float [[TMP16]], i64 2 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] ; CHECK: pred.load.continue4: -; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x float> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP18]], [[PRED_LOAD_IF3]] ] -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP2]], i64 3 -; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] +; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x float> [ [[TMP12]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP17]], [[PRED_LOAD_IF3]] ] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[TMP1]], i64 3 +; CHECK-NEXT: br i1 [[TMP19]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.if5: -; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP21]] -; CHECK-NEXT: [[TMP23:%.*]] = load float, ptr [[TMP22]], align 4 -; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x float> [[TMP19]], float [[TMP23]], i64 3 +; CHECK-NEXT: [[TMP20:%.*]] = or i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = load float, ptr [[TMP21]], align 4 +; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x float> [[TMP18]], float [[TMP22]], i64 3 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.continue6: -; CHECK-NEXT: [[TMP25:%.*]] = phi <4 x float> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP26:%.*]] = select fast <4 x i1> [[TMP2]], <4 x float> [[TMP25]], <4 x float> zeroinitializer -; CHECK-NEXT: [[TMP27]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[VEC_PHI]], <4 x float> [[TMP26]]) +; CHECK-NEXT: [[TMP24:%.*]] = phi <4 x float> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP23]], [[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[TMP25:%.*]] = select fast <4 x i1> [[TMP1]], <4 x float> [[TMP24]], <4 x float> zeroinitializer +; CHECK-NEXT: [[TMP26]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[VEC_PHI]], <4 x float> [[TMP25]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP27]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP26]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] ; CHECK-NEXT: [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[RES:%.*]], [[FOR_INC]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[COND]], i64 [[IV]] -; CHECK-NEXT: [[TMP29:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP29]], 5.000000e+00 +; CHECK-NEXT: [[TMP28:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP28]], 5.000000e+00 ; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; CHECK: if.then: ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[TMP30:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[FADD:%.*]] = fadd fast float [[RDX]], [[TMP30]] +; CHECK-NEXT: [[TMP29:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[FADD:%.*]] = fadd fast float [[RDX]], [[TMP29]] ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: ; CHECK-NEXT: [[RES]] = phi float [ [[RDX]], [[FOR_BODY]] ], [ [[FADD]], [[IF_THEN]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi float [ [[RES]], [[FOR_INC]] ], [ [[TMP27]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi float [ [[RES]], [[FOR_INC]] ], [ [[TMP26]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret float [[RES_LCSSA]] ; entry: @@ -114,90 +116,91 @@ } define float @cond_cmp_sel(ptr noalias %a, ptr noalias %cond, i64 %N) { -; CHECK-LABEL: @cond_cmp_sel( +; CHECK-LABEL: define float @cond_cmp_sel +; CHECK-SAME: (ptr noalias [[A:%.*]], ptr noalias [[COND:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[TMP28:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[COND:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[RDX_MINMAX_SELECT:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[COND]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = fcmp une <4 x float> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i64 0 -; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = fcmp une <4 x float> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i64 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[TMP4]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[TMP3]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> poison, float [[TMP4]], i64 0 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] ; CHECK: pred.load.continue: -; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x float> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1 -; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] +; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x float> [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP1]], i64 1 +; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] ; CHECK: pred.load.if1: -; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[TMP10]], align 4 -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP11]], i64 1 +; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP9]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP10]], i64 1 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] ; CHECK: pred.load.continue2: -; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x float> [ [[TMP7]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP2]], i64 2 -; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] +; CHECK-NEXT: [[TMP12:%.*]] = phi <4 x float> [ [[TMP6]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP11]], [[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP1]], i64 2 +; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] ; CHECK: pred.load.if3: -; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = load float, ptr [[TMP16]], align 4 -; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP17]], i64 2 +; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x float> [[TMP12]], float [[TMP16]], i64 2 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] ; CHECK: pred.load.continue4: -; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x float> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP18]], [[PRED_LOAD_IF3]] ] -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP2]], i64 3 -; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] +; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x float> [ [[TMP12]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP17]], [[PRED_LOAD_IF3]] ] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[TMP1]], i64 3 +; CHECK-NEXT: br i1 [[TMP19]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.if5: -; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP21]] -; CHECK-NEXT: [[TMP23:%.*]] = load float, ptr [[TMP22]], align 4 -; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x float> [[TMP19]], float [[TMP23]], i64 3 +; CHECK-NEXT: [[TMP20:%.*]] = or i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = load float, ptr [[TMP21]], align 4 +; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x float> [[TMP18]], float [[TMP22]], i64 3 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.continue6: -; CHECK-NEXT: [[TMP25:%.*]] = phi <4 x float> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP26:%.*]] = select fast <4 x i1> [[TMP2]], <4 x float> [[TMP25]], <4 x float> -; CHECK-NEXT: [[TMP27:%.*]] = call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[TMP26]]) -; CHECK-NEXT: [[TMP28]] = call fast float @llvm.minnum.f32(float [[TMP27]], float [[VEC_PHI]]) +; CHECK-NEXT: [[TMP24:%.*]] = phi <4 x float> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP23]], [[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[TMP25:%.*]] = select fast <4 x i1> [[TMP1]], <4 x float> [[TMP24]], <4 x float> +; CHECK-NEXT: [[TMP26:%.*]] = call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[TMP25]]) +; CHECK-NEXT: [[RDX_MINMAX_SELECT]] = call fast float @llvm.minnum.f32(float [[TMP26]], float [[VEC_PHI]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP28]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] ; CHECK-NEXT: [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[RES:%.*]], [[FOR_INC]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[COND]], i64 [[IV]] -; CHECK-NEXT: [[TMP30:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP30]], 3.000000e+00 +; CHECK-NEXT: [[TMP28:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP28]], 3.000000e+00 ; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; CHECK: if.then: ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[TMP31:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[TMP32:%.*]] = call fast float @llvm.minnum.f32(float [[RDX]], float [[TMP31]]) +; CHECK-NEXT: [[TMP29:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[FSEL:%.*]] = call fast float @llvm.minnum.f32(float [[RDX]], float [[TMP29]]) ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: -; CHECK-NEXT: [[RES]] = phi float [ [[RDX]], [[FOR_BODY]] ], [ [[TMP32]], [[IF_THEN]] ] +; CHECK-NEXT: [[RES]] = phi float [ [[RDX]], [[FOR_BODY]] ], [ [[FSEL]], [[IF_THEN]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi float [ [[RES]], [[FOR_INC]] ], [ [[TMP28]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi float [ [[RES]], [[FOR_INC]] ], [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret float [[RES_LCSSA]] ; entry: @@ -229,84 +232,85 @@ } define i32 @conditional_and(ptr noalias %A, ptr noalias %B, i32 %cond, i64 noundef %N) #0 { -; CHECK-LABEL: @conditional_and( +; CHECK-LABEL: define i32 @conditional_and +; CHECK-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i32 [[COND:%.*]], i64 noundef [[N:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -4 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[COND:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[COND]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 7, [[VECTOR_PH]] ], [ [[TMP28:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 7, [[VECTOR_PH]] ], [ [[TMP27:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i64 0 -; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i64 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] ; CHECK: pred.load.continue: -; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1 -; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] +; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP1]], i64 1 +; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] ; CHECK: pred.load.if1: -; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP11]], i64 1 +; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP10]], i64 1 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] ; CHECK: pred.load.continue2: -; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x i32> [ [[TMP7]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP2]], i64 2 -; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] +; CHECK-NEXT: [[TMP12:%.*]] = phi <4 x i32> [ [[TMP6]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP11]], [[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP1]], i64 2 +; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] ; CHECK: pred.load.if3: -; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP17]], i64 2 +; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP16]], i64 2 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] ; CHECK: pred.load.continue4: -; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP18]], [[PRED_LOAD_IF3]] ] -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP2]], i64 3 -; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] +; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP12]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP17]], [[PRED_LOAD_IF3]] ] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[TMP1]], i64 3 +; CHECK-NEXT: br i1 [[TMP19]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.if5: -; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]] -; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP23]], i64 3 +; CHECK-NEXT: [[TMP20:%.*]] = or i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP22]], i64 3 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.continue6: -; CHECK-NEXT: [[TMP25:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP26:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP25]], <4 x i32> -; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP26]]) -; CHECK-NEXT: [[TMP28]] = and i32 [[TMP27]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP24:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP23]], [[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[TMP25:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP24]], <4 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP25]]) +; CHECK-NEXT: [[TMP27]] = and i32 [[TMP26]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP28]], [[MIDDLE_BLOCK]] ], [ 7, [[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP27]], [[MIDDLE_BLOCK]] ], [ 7, [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] ; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[RES:%.*]], [[FOR_INC]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TMP30]], [[COND]] +; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TMP29]], [[COND]] ; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; CHECK: if.then: ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[AND:%.*]] = and i32 [[TMP31]], [[RDX]] +; CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[AND:%.*]] = and i32 [[TMP30]], [[RDX]] ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: ; CHECK-NEXT: [[RES]] = phi i32 [ [[AND]], [[IF_THEN]] ], [ [[RDX]], [[FOR_BODY]] ] @@ -314,7 +318,7 @@ ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi i32 [ [[RES]], [[FOR_INC]] ], [ [[TMP28]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi i32 [ [[RES]], [[FOR_INC]] ], [ [[TMP27]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[RES_LCSSA]] ; entry: @@ -345,9 +349,10 @@ } define i32 @simple_chained_rdx(ptr noalias %a, ptr noalias %b, ptr noalias %cond, i64 noundef %N) { -; CHECK-LABEL: @simple_chained_rdx( +; CHECK-LABEL: define i32 @simple_chained_rdx +; CHECK-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[COND:%.*]], i64 noundef [[N:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -4 @@ -355,16 +360,16 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 5, [[VECTOR_PH]] ], [ [[TMP46:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[COND:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[COND]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i64 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i64 0 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] @@ -491,12 +496,140 @@ ; Reduction not performed in loop as the phi has more than two incoming values ; define i64 @nested_cond_and(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %cond, i64 %N){ -; CHECK-LABEL: @nested_cond_and( +; CHECK-LABEL: define i64 @nested_cond_and +; CHECK-SAME: (ptr noalias nocapture readonly [[A:%.*]], ptr noalias nocapture readonly [[B:%.*]], ptr noalias nocapture readonly [[COND:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -4 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NOT: @llvm.vector.reduce.and +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[PREDPHI15:%.*]], [[PRED_LOAD_CONTINUE14]] ] +; CHECK-NEXT: [[TMP0:%.*]] = or i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[COND]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP4]], i64 0 +; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] +; CHECK: pred.load.if: +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i64> poison, i64 [[TMP7]], i64 0 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] +; CHECK: pred.load.continue: +; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i64> [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP4]], i64 1 +; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] +; CHECK: pred.load.if1: +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 4 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i64> [[TMP9]], i64 [[TMP12]], i64 1 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] +; CHECK: pred.load.continue2: +; CHECK-NEXT: [[TMP14:%.*]] = phi <4 x i64> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP13]], [[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP4]], i64 2 +; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] +; CHECK: pred.load.if3: +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP16]], align 4 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP14]], i64 [[TMP17]], i64 2 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] +; CHECK: pred.load.continue4: +; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i64> [ [[TMP14]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP18]], [[PRED_LOAD_IF3]] ] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP4]], i64 3 +; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]] +; CHECK: pred.load.if5: +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[TMP21]], align 4 +; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP22]], i64 3 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] +; CHECK: pred.load.continue6: +; CHECK-NEXT: [[TMP24:%.*]] = phi <4 x i64> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP23]], [[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[TMP25:%.*]] = icmp eq <4 x i64> [[TMP24]], +; CHECK-NEXT: [[TMP26:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP25]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i1> [[TMP26]], i64 0 +; CHECK-NEXT: br i1 [[TMP27]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]] +; CHECK: pred.load.if7: +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP29:%.*]] = load i64, ptr [[TMP28]], align 4 +; CHECK-NEXT: [[TMP30:%.*]] = insertelement <4 x i64> poison, i64 [[TMP29]], i64 0 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]] +; CHECK: pred.load.continue8: +; CHECK-NEXT: [[TMP31:%.*]] = phi <4 x i64> [ poison, [[PRED_LOAD_CONTINUE6]] ], [ [[TMP30]], [[PRED_LOAD_IF7]] ] +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x i1> [[TMP26]], i64 1 +; CHECK-NEXT: br i1 [[TMP32]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]] +; CHECK: pred.load.if9: +; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP34:%.*]] = load i64, ptr [[TMP33]], align 4 +; CHECK-NEXT: [[TMP35:%.*]] = insertelement <4 x i64> [[TMP31]], i64 [[TMP34]], i64 1 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE10]] +; CHECK: pred.load.continue10: +; CHECK-NEXT: [[TMP36:%.*]] = phi <4 x i64> [ [[TMP31]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP35]], [[PRED_LOAD_IF9]] ] +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <4 x i1> [[TMP26]], i64 2 +; CHECK-NEXT: br i1 [[TMP37]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]] +; CHECK: pred.load.if11: +; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP39:%.*]] = load i64, ptr [[TMP38]], align 4 +; CHECK-NEXT: [[TMP40:%.*]] = insertelement <4 x i64> [[TMP36]], i64 [[TMP39]], i64 2 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE12]] +; CHECK: pred.load.continue12: +; CHECK-NEXT: [[TMP41:%.*]] = phi <4 x i64> [ [[TMP36]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP40]], [[PRED_LOAD_IF11]] ] +; CHECK-NEXT: [[TMP42:%.*]] = extractelement <4 x i1> [[TMP26]], i64 3 +; CHECK-NEXT: br i1 [[TMP42]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]] +; CHECK: pred.load.if13: +; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP44:%.*]] = load i64, ptr [[TMP43]], align 4 +; CHECK-NEXT: [[TMP45:%.*]] = insertelement <4 x i64> [[TMP41]], i64 [[TMP44]], i64 3 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE14]] +; CHECK: pred.load.continue14: +; CHECK-NEXT: [[TMP46:%.*]] = phi <4 x i64> [ [[TMP41]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP45]], [[PRED_LOAD_IF13]] ] +; CHECK-NEXT: [[TMP47:%.*]] = xor <4 x i1> [[TMP25]], +; CHECK-NEXT: [[TMP48:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP47]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[PREDPHI_V:%.*]] = select <4 x i1> [[TMP48]], <4 x i64> [[TMP24]], <4 x i64> [[TMP46]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x i64> [[PREDPHI_V]], <4 x i64> +; CHECK-NEXT: [[PREDPHI15]] = and <4 x i64> [[VEC_PHI]], [[PREDPHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: -; CHECK: @llvm.vector.reduce.and -; CHECK: scalar.ph +; CHECK-NEXT: [[TMP50:%.*]] = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> [[PREDPHI15]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP50]], [[MIDDLE_BLOCK]] ], [ 5, [[ENTRY]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[RES:%.*]], [[FOR_INC]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[COND]], i64 [[IV]] +; CHECK-NEXT: [[TMP51:%.*]] = load i64, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[TMP51]], 0 +; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[FOR_INC]] +; CHECK: if.then: +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP52:%.*]] = load i64, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[AND1:%.*]] = and i64 [[RDX]], [[TMP52]] +; CHECK-NEXT: [[TOBOOL2:%.*]] = icmp eq i64 [[TMP52]], 3 +; CHECK-NEXT: br i1 [[TOBOOL2]], label [[IF_THEN_2:%.*]], label [[FOR_INC]] +; CHECK: if.then.2: +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] +; CHECK-NEXT: [[TMP53:%.*]] = load i64, ptr [[ARRAYIDX3]], align 4 +; CHECK-NEXT: [[AND2:%.*]] = and i64 [[RDX]], [[TMP53]] +; CHECK-NEXT: br label [[FOR_INC]] +; CHECK: for.inc: +; CHECK-NEXT: [[RES]] = phi i64 [ [[AND2]], [[IF_THEN_2]] ], [ [[AND1]], [[IF_THEN]] ], [ [[RDX]], [[FOR_BODY]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK: for.end: +; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi i64 [ [[RES]], [[FOR_INC]] ], [ [[TMP50]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i64 [[RES_LCSSA]] +; entry: br label %for.body @@ -536,12 +669,99 @@ ; the Phi (%rdx1) as it has more than one use. ; define i32 @cond-uncond(ptr noalias %src1, ptr noalias %src2, ptr noalias %cond, i64 noundef %n) #0 { -; CHECK-LABEL: @cond-uncond( +; CHECK-LABEL: define i32 @cond-uncond +; CHECK-SAME: (ptr noalias [[SRC1:%.*]], ptr noalias [[SRC2:%.*]], ptr noalias [[COND:%.*]], i64 noundef [[N:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -4 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP27:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[COND]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i64 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] +; CHECK: pred.load.if: +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] +; CHECK: pred.load.continue: +; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP1]], i64 1 +; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] +; CHECK: pred.load.if1: +; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP10]], i64 1 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] +; CHECK: pred.load.continue2: +; CHECK-NEXT: [[TMP12:%.*]] = phi <4 x i32> [ [[TMP6]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP11]], [[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP1]], i64 2 +; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] +; CHECK: pred.load.if3: +; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP16]], i64 2 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] +; CHECK: pred.load.continue4: +; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP12]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP17]], [[PRED_LOAD_IF3]] ] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[TMP1]], i64 3 +; CHECK-NEXT: br i1 [[TMP19]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] +; CHECK: pred.load.if5: +; CHECK-NEXT: [[TMP20:%.*]] = or i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP22]], i64 3 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.continue6: -; CHECK-NOT: @llvm.vector.reduce.add +; CHECK-NEXT: [[TMP24:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP23]], [[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[TMP25:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP24]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI:%.*]] = add <4 x i32> [[VEC_PHI]], [[TMP25]] +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP26]], align 4 +; CHECK-NEXT: [[TMP27]] = add <4 x i32> [[WIDE_LOAD7]], [[PREDPHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: -; CHECK: @llvm.vector.reduce.add -; CHECK: scalar.ph +; CHECK-NEXT: [[TMP29:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP27]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP29]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[RDX1:%.*]] = phi i32 [ [[ADD2:%.*]], [[IF_END:%.*]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[IF_END]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[COND]], i64 [[IV]] +; CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP30]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 [[IV]] +; CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP31]], [[RDX1]] +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[ADD]], [[IF_THEN]] ], [ [[RDX1]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i64 [[IV]] +; CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[ADD2]] = add nsw i32 [[TMP32]], [[RES]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK: for.end: +; CHECK-NEXT: [[ADD2_LCSSA:%.*]] = phi i32 [ [[ADD2]], [[IF_END]] ], [ [[TMP29]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[ADD2_LCSSA]] +; entry: br label %for.body @@ -577,12 +797,141 @@ ; of the incoming values of the LoopExitInstruction (%res) is the reduction Phi (%rdx1). ; define float @cond_cond(ptr noalias %src1, ptr noalias %src2, ptr noalias %cond, i64 %n) #0 { -; CHECK-LABEL: @cond_cond( +; CHECK-LABEL: define float @cond_cond +; CHECK-SAME: (ptr noalias [[SRC1:%.*]], ptr noalias [[SRC2:%.*]], ptr noalias [[COND:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -4 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ , [[VECTOR_PH]] ], [ [[PREDPHI15:%.*]], [[PRED_LOAD_CONTINUE14]] ] +; CHECK-NEXT: [[TMP0:%.*]] = or i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[COND]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP4]], i64 0 +; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] +; CHECK: pred.load.if: +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[SRC1]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> poison, float [[TMP7]], i64 0 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] +; CHECK: pred.load.continue: +; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x float> [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP4]], i64 1 +; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] +; CHECK: pred.load.if1: +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[SRC1]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP12]], i64 1 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] +; CHECK: pred.load.continue2: +; CHECK-NEXT: [[TMP14:%.*]] = phi <4 x float> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP13]], [[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP4]], i64 2 +; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] +; CHECK: pred.load.if3: +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[SRC1]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP17:%.*]] = load float, ptr [[TMP16]], align 4 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x float> [[TMP14]], float [[TMP17]], i64 2 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] +; CHECK: pred.load.continue4: +; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x float> [ [[TMP14]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP18]], [[PRED_LOAD_IF3]] ] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP4]], i64 3 +; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]] +; CHECK: pred.load.if5: +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[SRC1]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP22:%.*]] = load float, ptr [[TMP21]], align 4 +; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x float> [[TMP19]], float [[TMP22]], i64 3 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] +; CHECK: pred.load.continue6: +; CHECK-NEXT: [[TMP24:%.*]] = phi <4 x float> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP23]], [[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[TMP25:%.*]] = select <4 x i1> [[TMP4]], <4 x float> [[TMP24]], <4 x float> +; CHECK-NEXT: [[PREDPHI:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[TMP25]] +; CHECK-NEXT: [[TMP26:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i1> [[TMP26]], i64 0 +; CHECK-NEXT: br i1 [[TMP27]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]] +; CHECK: pred.load.if7: +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, ptr [[SRC2]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP29:%.*]] = load float, ptr [[TMP28]], align 4 +; CHECK-NEXT: [[TMP30:%.*]] = insertelement <4 x float> poison, float [[TMP29]], i64 0 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]] +; CHECK: pred.load.continue8: +; CHECK-NEXT: [[TMP31:%.*]] = phi <4 x float> [ poison, [[PRED_LOAD_CONTINUE6]] ], [ [[TMP30]], [[PRED_LOAD_IF7]] ] +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x i1> [[TMP26]], i64 1 +; CHECK-NEXT: br i1 [[TMP32]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]] +; CHECK: pred.load.if9: +; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[SRC2]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP34:%.*]] = load float, ptr [[TMP33]], align 4 +; CHECK-NEXT: [[TMP35:%.*]] = insertelement <4 x float> [[TMP31]], float [[TMP34]], i64 1 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE10]] +; CHECK: pred.load.continue10: +; CHECK-NEXT: [[TMP36:%.*]] = phi <4 x float> [ [[TMP31]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP35]], [[PRED_LOAD_IF9]] ] +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <4 x i1> [[TMP26]], i64 2 +; CHECK-NEXT: br i1 [[TMP37]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]] +; CHECK: pred.load.if11: +; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, ptr [[SRC2]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP39:%.*]] = load float, ptr [[TMP38]], align 4 +; CHECK-NEXT: [[TMP40:%.*]] = insertelement <4 x float> [[TMP36]], float [[TMP39]], i64 2 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE12]] +; CHECK: pred.load.continue12: +; CHECK-NEXT: [[TMP41:%.*]] = phi <4 x float> [ [[TMP36]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP40]], [[PRED_LOAD_IF11]] ] +; CHECK-NEXT: [[TMP42:%.*]] = extractelement <4 x i1> [[TMP26]], i64 3 +; CHECK-NEXT: br i1 [[TMP42]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]] +; CHECK: pred.load.if13: +; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds float, ptr [[SRC2]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP44:%.*]] = load float, ptr [[TMP43]], align 4 +; CHECK-NEXT: [[TMP45:%.*]] = insertelement <4 x float> [[TMP41]], float [[TMP44]], i64 3 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE14]] ; CHECK: pred.load.continue14: -; CHECK-NOT: @llvm.vector.reduce.fadd +; CHECK-NEXT: [[TMP46:%.*]] = phi <4 x float> [ [[TMP41]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP45]], [[PRED_LOAD_IF13]] ] +; CHECK-NEXT: [[TMP47:%.*]] = select <4 x i1> [[TMP26]], <4 x float> [[TMP46]], <4 x float> +; CHECK-NEXT: [[PREDPHI15]] = fadd fast <4 x float> [[PREDPHI]], [[TMP47]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP48:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP48]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: -; CHECK: @llvm.vector.reduce.fadd -; CHECK: scalar.ph +; CHECK-NEXT: [[TMP49:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[PREDPHI15]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP49]], [[MIDDLE_BLOCK]] ], [ 2.000000e+00, [[ENTRY]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[RDX1:%.*]] = phi float [ [[RES:%.*]], [[FOR_INC:%.*]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_INC]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[COND]], i64 [[IV]] +; CHECK-NEXT: [[TMP50:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast oeq float [[TMP50]], 3.000000e+00 +; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[SRC1]], i64 [[IV]] +; CHECK-NEXT: [[TMP51:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP51]], [[RDX1]] +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[RDX2:%.*]] = phi float [ [[ADD]], [[IF_THEN]] ], [ [[RDX1]], [[FOR_BODY]] ] +; CHECK-NEXT: [[CMP5:%.*]] = fcmp fast oeq float [[TMP50]], 7.000000e+00 +; CHECK-NEXT: br i1 [[CMP5]], label [[IF_THEN6:%.*]], label [[FOR_INC]] +; CHECK: if.then6: +; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[SRC2]], i64 [[IV]] +; CHECK-NEXT: [[TMP52:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 +; CHECK-NEXT: [[ADD2:%.*]] = fadd fast float [[TMP52]], [[RDX2]] +; CHECK-NEXT: br label [[FOR_INC]] +; CHECK: for.inc: +; CHECK-NEXT: [[RES]] = phi float [ [[ADD2]], [[IF_THEN6]] ], [ [[RDX2]], [[IF_END]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK: for.end: +; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi float [ [[RES]], [[FOR_INC]] ], [ [[TMP49]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret float [[RES_LCSSA]] +; entry: br label %for.body @@ -626,12 +975,99 @@ ; incoming values of the LoopExitInstruction (%res) is the reduction Phi (%rdx). ; define i32 @uncond_cond(ptr noalias %src1, ptr noalias %src2, ptr noalias %cond, i64 %N) #0 { -; CHECK-LABEL: @uncond_cond( +; CHECK-LABEL: define i32 @uncond_cond +; CHECK-SAME: (ptr noalias [[SRC1:%.*]], ptr noalias [[SRC2:%.*]], ptr noalias [[COND:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -4 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE7:%.*]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[PRED_LOAD_CONTINUE7]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[COND]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i32> [[WIDE_LOAD1]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i64 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] +; CHECK: pred.load.if: +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] +; CHECK: pred.load.continue: +; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP3]], i64 1 +; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_LOAD_IF2:%.*]], label [[PRED_LOAD_CONTINUE3:%.*]] +; CHECK: pred.load.if2: +; CHECK-NEXT: [[TMP10:%.*]] = or i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP12]], i64 1 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE3]] +; CHECK: pred.load.continue3: +; CHECK-NEXT: [[TMP14:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP13]], [[PRED_LOAD_IF2]] ] +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP3]], i64 2 +; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_LOAD_IF4:%.*]], label [[PRED_LOAD_CONTINUE5:%.*]] +; CHECK: pred.load.if4: +; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[TMP18]], i64 2 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE5]] +; CHECK: pred.load.continue5: +; CHECK-NEXT: [[TMP20:%.*]] = phi <4 x i32> [ [[TMP14]], [[PRED_LOAD_CONTINUE3]] ], [ [[TMP19]], [[PRED_LOAD_IF4]] ] +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP3]], i64 3 +; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_LOAD_IF6:%.*]], label [[PRED_LOAD_CONTINUE7]] +; CHECK: pred.load.if6: +; CHECK-NEXT: [[TMP22:%.*]] = or i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +; CHECK-NEXT: [[TMP25:%.*]] = insertelement <4 x i32> [[TMP20]], i32 [[TMP24]], i64 3 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE7]] ; CHECK: pred.load.continue7: -; CHECK-NOT: @llvm.vector.reduce.add +; CHECK-NEXT: [[TMP26:%.*]] = phi <4 x i32> [ [[TMP20]], [[PRED_LOAD_CONTINUE5]] ], [ [[TMP25]], [[PRED_LOAD_IF6]] ] +; CHECK-NEXT: [[TMP27:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP26]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI]] = add <4 x i32> [[TMP1]], [[TMP27]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: -; CHECK: @llvm.vector.reduce.add -; CHECK: scalar.ph +; CHECK-NEXT: [[TMP29:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PREDPHI]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP29]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[RES:%.*]], [[FOR_INC:%.*]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_INC]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i64 [[IV]] +; CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP30]], [[RDX]] +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[COND]], i64 [[IV]] +; CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP31]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 [[IV]] +; CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP32]], [[ADD1]] +; CHECK-NEXT: br label [[FOR_INC]] +; CHECK: for.inc: +; CHECK-NEXT: [[RES]] = phi i32 [ [[ADD2]], [[IF_THEN]] ], [ [[ADD1]], [[FOR_BODY]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK: for.end: +; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi i32 [ [[RES]], [[FOR_INC]] ], [ [[TMP29]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[RES_LCSSA]] +; entry: br label %for.body @@ -667,12 +1103,101 @@ ; through the chain and check the number of uses of %add1, we find more than the expected one use. ; define i32 @uncond_cond_uncond(ptr noalias %src1, ptr noalias %src2, ptr noalias %cond, i64 noundef %N) { -; CHECK-LABEL: @uncond_cond_uncond( +; CHECK-LABEL: define i32 @uncond_cond_uncond +; CHECK-SAME: (ptr noalias [[SRC1:%.*]], ptr noalias [[SRC2:%.*]], ptr noalias [[COND:%.*]], i64 noundef [[N:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -4 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE7:%.*]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP28:%.*]], [[PRED_LOAD_CONTINUE7]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[COND]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i32> [[WIDE_LOAD1]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i64 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] +; CHECK: pred.load.if: +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] +; CHECK: pred.load.continue: +; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP3]], i64 1 +; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_LOAD_IF2:%.*]], label [[PRED_LOAD_CONTINUE3:%.*]] +; CHECK: pred.load.if2: +; CHECK-NEXT: [[TMP10:%.*]] = or i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP12]], i64 1 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE3]] +; CHECK: pred.load.continue3: +; CHECK-NEXT: [[TMP14:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP13]], [[PRED_LOAD_IF2]] ] +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP3]], i64 2 +; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_LOAD_IF4:%.*]], label [[PRED_LOAD_CONTINUE5:%.*]] +; CHECK: pred.load.if4: +; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[TMP18]], i64 2 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE5]] +; CHECK: pred.load.continue5: +; CHECK-NEXT: [[TMP20:%.*]] = phi <4 x i32> [ [[TMP14]], [[PRED_LOAD_CONTINUE3]] ], [ [[TMP19]], [[PRED_LOAD_IF4]] ] +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP3]], i64 3 +; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_LOAD_IF6:%.*]], label [[PRED_LOAD_CONTINUE7]] +; CHECK: pred.load.if6: +; CHECK-NEXT: [[TMP22:%.*]] = or i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +; CHECK-NEXT: [[TMP25:%.*]] = insertelement <4 x i32> [[TMP20]], i32 [[TMP24]], i64 3 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE7]] ; CHECK: pred.load.continue7: -; CHECK-NOT: @llvm.vector.reduce.add +; CHECK-NEXT: [[TMP26:%.*]] = phi <4 x i32> [ [[TMP20]], [[PRED_LOAD_CONTINUE5]] ], [ [[TMP25]], [[PRED_LOAD_IF6]] ] +; CHECK-NEXT: [[TMP27:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP26]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI:%.*]] = add <4 x i32> [[TMP1]], [[TMP27]] +; CHECK-NEXT: [[TMP28]] = add <4 x i32> [[PREDPHI]], [[WIDE_LOAD]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: -; CHECK: @llvm.vector.reduce.add -; CHECK: scalar.ph +; CHECK-NEXT: [[TMP30:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP28]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP30]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[ADD3:%.*]], [[IF_END:%.*]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[IF_END]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i64 [[IV]] +; CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP31]], [[RDX]] +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[COND]], i64 [[IV]] +; CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP32]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 [[IV]] +; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP33]], [[ADD1]] +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[ADD2]], [[IF_THEN]] ], [ [[ADD1]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ADD3]] = add nsw i32 [[RES]], [[TMP31]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; CHECK: for.end: +; CHECK-NEXT: [[ADD3_LCSSA:%.*]] = phi i32 [ [[ADD3]], [[IF_END]] ], [ [[TMP30]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[ADD3_LCSSA]] +; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll --- a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll @@ -19,33 +19,33 @@ ; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv2i64() -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 1 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP6]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 1 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP4]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP6]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[TMP8]], 1 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i64 [[TMP9]] -; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP10]], align 8 -; CHECK-NEXT: [[TMP11:%.*]] = add nsw [[WIDE_LOAD]], [[VEC_IND]] -; CHECK-NEXT: [[TMP12:%.*]] = add nsw [[WIDE_LOAD2]], [[STEP_ADD]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: store [[TMP11]], ptr [[TMP13]], align 8 -; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP15:%.*]] = shl i64 [[TMP14]], 1 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i64 [[TMP15]] -; CHECK-NEXT: store [[TMP12]], ptr [[TMP16]], align 8 -; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP18:%.*]] = shl i64 [[TMP17]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP18]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP9]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP11:%.*]] = shl i64 [[TMP10]], 1 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP9]], i64 [[TMP11]] +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP12]], align 8 +; CHECK-NEXT: [[TMP13:%.*]] = add nsw [[WIDE_LOAD]], [[VEC_IND]] +; CHECK-NEXT: [[TMP14:%.*]] = add nsw [[WIDE_LOAD2]], [[STEP_ADD]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: store [[TMP13]], ptr [[TMP15]], align 8 +; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP17:%.*]] = shl i64 [[TMP16]], 1 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[TMP15]], i64 [[TMP17]] +; CHECK-NEXT: store [[TMP14]], ptr [[TMP18]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[STEP_ADD]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -101,30 +101,30 @@ ; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv1i64() -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.experimental.stepvector.nxv1i64() +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP7]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP4]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP6]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i64 [[TMP7]] -; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP8]], align 8 -; CHECK-NEXT: [[TMP9:%.*]] = add nsw [[WIDE_LOAD]], [[VEC_IND]] -; CHECK-NEXT: [[TMP10:%.*]] = add nsw [[WIDE_LOAD2]], [[STEP_ADD]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: store [[TMP9]], ptr [[TMP11]], align 8 -; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP11]], i64 [[TMP12]] -; CHECK-NEXT: store [[TMP10]], ptr [[TMP13]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i64 [[TMP9]] +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP10]], align 8 +; CHECK-NEXT: [[TMP11:%.*]] = add nsw [[WIDE_LOAD]], [[VEC_IND]] +; CHECK-NEXT: [[TMP12:%.*]] = add nsw [[WIDE_LOAD2]], [[STEP_ADD]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: store [[TMP11]], ptr [[TMP13]], align 8 ; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP15:%.*]] = shl i64 [[TMP14]], 1 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP15]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i64 [[TMP14]] +; CHECK-NEXT: store [[TMP12]], ptr [[TMP15]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[STEP_ADD]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] @@ -189,21 +189,21 @@ ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32 ; CHECK-NEXT: [[IND_END:%.*]] = shl i32 [[DOTCAST]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv4i32() -; CHECK-NEXT: [[TMP5:%.*]] = shl [[TMP4]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP7:%.*]] = shl i32 [[TMP6]], 3 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP7]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.experimental.stepvector.nxv4i32() +; CHECK-NEXT: [[TMP7:%.*]] = shl [[TMP6]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP9:%.*]] = shl i32 [[TMP8]], 3 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP9]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP5]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: store [[VEC_IND]], ptr [[TMP8]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP10:%.*]] = shl i64 [[TMP9]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP7]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: store [[VEC_IND]], ptr [[TMP10]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] @@ -267,25 +267,25 @@ ; CHECK-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float ; CHECK-NEXT: [[TMP4:%.*]] = fmul float [[DOTCAST]], 2.000000e+00 ; CHECK-NEXT: [[IND_END:%.*]] = fadd float [[TMP4]], 0.000000e+00 -; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv4i32() -; CHECK-NEXT: [[TMP6:%.*]] = uitofp [[TMP5]] to -; CHECK-NEXT: [[TMP7:%.*]] = fmul [[TMP6]], shufflevector ( insertelement ( poison, float 2.000000e+00, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[INDUCTION:%.*]] = fadd [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP9:%.*]] = shl i32 [[TMP8]], 2 -; CHECK-NEXT: [[TMP10:%.*]] = uitofp i32 [[TMP9]] to float -; CHECK-NEXT: [[TMP11:%.*]] = fmul float [[TMP10]], 2.000000e+00 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, float [[TMP11]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 2 +; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.experimental.stepvector.nxv4i32() +; CHECK-NEXT: [[TMP8:%.*]] = uitofp [[TMP7]] to +; CHECK-NEXT: [[TMP9:%.*]] = fmul [[TMP8]], shufflevector ( insertelement ( poison, float 2.000000e+00, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[INDUCTION:%.*]] = fadd [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP11:%.*]] = shl i32 [[TMP10]], 2 +; CHECK-NEXT: [[TMP12:%.*]] = uitofp i32 [[TMP11]] to float +; CHECK-NEXT: [[TMP13:%.*]] = fmul float [[TMP12]], 2.000000e+00 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, float [[TMP13]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: store [[VEC_IND]], ptr [[TMP12]], align 4 -; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP14:%.*]] = shl i64 [[TMP13]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP14]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: store [[VEC_IND]], ptr [[TMP14]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = fadd [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll b/llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll --- a/llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll @@ -20,18 +20,18 @@ ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 128, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 128, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4096, ptr [[ARR]]) -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[D]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 -; CHECK-NEXT: store shufflevector ( insertelement ( poison, i32 100, i64 0), poison, zeroinitializer), ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[D]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 +; CHECK-NEXT: store shufflevector ( insertelement ( poison, i32 100, i64 0), poison, zeroinitializer), ptr [[TMP8]], align 8 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4096, ptr [[ARR]]) -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -92,18 +92,18 @@ ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 128, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 128, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4096, ptr [[ARR]]) -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[D]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 -; CHECK-NEXT: store shufflevector ( insertelement ( poison, i32 100, i64 0), poison, zeroinitializer), ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[D]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 +; CHECK-NEXT: store shufflevector ( insertelement ( poison, i32 100, i64 0), poison, zeroinitializer), ptr [[TMP8]], align 8 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4096, ptr [[ARR]]) -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll b/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll --- a/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll @@ -13,19 +13,19 @@ ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[VAL:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[VAL:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = trunc [[BROADCAST_SPLAT2]] to -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[HPTR:%.*]], i64 [[INDEX]] -; CHECK-NEXT: store [[TMP4]], ptr [[TMP5]], align 2 -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP6:%.*]] = trunc [[BROADCAST_SPLAT]] to +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[HPTR:%.*]], i64 [[INDEX]] +; CHECK-NEXT: store [[TMP6]], ptr [[TMP7]], align 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] @@ -39,7 +39,7 @@ ; CHECK-NEXT: store i16 [[CONV21]], ptr [[ARRAYIDX23]], align 2 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: for.exit: ; CHECK-NEXT: ret void ; @@ -73,22 +73,43 @@ ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[LEN]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[LEN]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[ARG1:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 1 -; CHECK-NEXT: [[TMP6:%.*]] = trunc [[BROADCAST_SPLAT]] to -; CHECK-NEXT: [[TMP7:%.*]] = xor [[WIDE_LOAD]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = mul [[TMP7]], [[WIDE_LOAD]] -; CHECK-NEXT: store [[TMP8]], ptr [[TMP4]], align 1 -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = trunc [[BROADCAST_SPLAT]] to +; CHECK-NEXT: [[TMP8:%.*]] = xor [[WIDE_LOAD]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = mul [[TMP8]], [[WIDE_LOAD]] +; CHECK-NEXT: store [[TMP9]], ptr [[TMP6]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[LEN]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP11]] to i32 +; CHECK-NEXT: [[CONV17:%.*]] = xor i32 [[CONV]], [[ARG1]] +; CHECK-NEXT: [[MUL18:%.*]] = mul nuw nsw i32 [[CONV17]], [[CONV]] +; CHECK-NEXT: [[CONV19:%.*]] = trunc i32 [[MUL18]] to i8 +; CHECK-NEXT: store i8 [[CONV19]], ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[LEN]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: for.exit: +; CHECK-NEXT: ret void +; entry: br label %for.body