diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2527,27 +2527,6 @@ return ScalarIV; }; - // Create the vector values from the scalar IV, in the absence of creating a - // vector IV. - auto CreateSplatIV = [&](Value *ScalarIV, Value *Step) { - Value *Broadcasted = getBroadcastInstrs(ScalarIV); - for (unsigned Part = 0; Part < UF; ++Part) { - Value *StartIdx; - if (Step->getType()->isFloatingPointTy()) - StartIdx = - getRuntimeVFAsFloat(Builder, Step->getType(), State.VF * Part); - else - StartIdx = getRuntimeVF(Builder, Step->getType(), State.VF * Part); - - Value *EntryPart = - getStepVector(Broadcasted, StartIdx, Step, ID.getInductionOpcode(), - State.VF, State.Builder); - State.set(Def, EntryPart, Part); - if (Trunc) - addMetadata(EntryPart, Trunc); - } - }; - // Fast-math-flags propagate from the original induction instruction. IRBuilder<>::FastMathFlagGuard FMFG(Builder); if (ID.getInductionBinOp() && isa(ID.getInductionBinOp())) @@ -2583,35 +2562,18 @@ return; } - // Determine if we want a scalar version of the induction variable. This is - // true if the induction variable itself is not widened, or if it has at - // least one user in the loop that is not widened. - if (Def->needsVectorIV() && !Def->needsScalarIV()) { + // Create a new independent vector induction variable, if one is needed. + if (Def->needsVectorIV()) createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, State); - return; - } - // Try to create a new independent vector induction variable. If we can't - // create the phi node, we will splat the scalar induction variable in each - // loop iteration. - if (Def->needsVectorIV() && Def->needsScalarIV()) { - createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, State); - Value *ScalarIV = CreateScalarIV(Step); + if (Def->needsScalarIV()) { // Create scalar steps that can be used by instructions we will later // scalarize. Note that the addition of the scalar steps will not increase // the number of instructions in the loop in the common case prior to // InstCombine. We will be trading one vector extract for each scalar step. + Value *ScalarIV = CreateScalarIV(Step); buildScalarSteps(ScalarIV, Step, EntryVal, ID, Def, State); - return; } - - // All IV users are scalar instructions, so only emit a scalar IV, not a - // vectorised IV. Except when we tail-fold, then the splat IV feeds the - // predicate used by the masked loads/stores. - Value *ScalarIV = CreateScalarIV(Step); - if (!Cost->isScalarEpilogueAllowed()) - CreateSplatIV(ScalarIV, Step); - buildScalarSteps(ScalarIV, Step, EntryVal, ID, Def, State); } void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step, diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -342,10 +342,20 @@ for (VPRecipeBase &Phi : HeaderVPBB->phis()) { auto *WidenOriginalIV = dyn_cast(&Phi); - // If the induction recipe is canonical and the types match, use it - // directly. - if (WidenOriginalIV && WidenOriginalIV->isCanonical() && - WidenOriginalIV->getScalarType() == WidenNewIV->getScalarType()) { + if (!WidenOriginalIV || !WidenOriginalIV->isCanonical() || + WidenOriginalIV->getScalarType() != WidenNewIV->getScalarType()) + continue; + + // Replace WidenNewIV with WidenOriginalIV if WidenOriginalIV provides + // everything WidenNewIV's users need. That is, WidenOriginalIV will + // generate a vector phi or all users of WidenNewIV demand the first lane + // only. + if (WidenOriginalIV->needsVectorIV() || + all_of(WidenNewIV->users(), [](VPUser *U) { + auto *R = dyn_cast(U); + auto *VPI = dyn_cast_or_null(R); + return VPI && VPI->getOpcode() == VPInstruction::ActiveLaneMask; + })) { WidenNewIV->replaceAllUsesWith(WidenOriginalIV); WidenNewIV->eraseFromParent(); return; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll @@ -25,12 +25,6 @@ ; CHECK-NEXT: br label %vector.body ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT2:%.*]], %vector.body ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i64 [[INDEX1]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP6:%.*]] = add [[TMP5]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP6]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) -; CHECK-NEXT: [[INDUCTION:%.*]] = add [[BROADCAST_SPLAT4]], [[TMP7]] ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP8]], i64 [[UMAX]]) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i64 [[TMP8]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll @@ -25,12 +25,6 @@ ; CHECK-NEXT: br label %vector.body ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT2:%.*]], %vector.body ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i64 [[INDEX1]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP6:%.*]] = add [[TMP5]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP6]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) -; CHECK-NEXT: [[INDUCTION:%.*]] = add [[BROADCAST_SPLAT4]], [[TMP7]] ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP8]], i64 [[UMAX]]) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i64 [[TMP8]] @@ -78,12 +72,6 @@ ; CHECK-NEXT: br label %vector.body ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT2:%.*]], %vector.body ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i64 [[INDEX1]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP6:%.*]] = add [[TMP5]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP6]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) -; CHECK-NEXT: [[INDUCTION:%.*]] = add [[BROADCAST_SPLAT4]], [[TMP7]] ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP8]], i64 [[UMAX]]) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[SRC:%.*]], i64 [[TMP8]] @@ -206,12 +194,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT2:%.*]], %vector.body ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i64 [[INDEX1]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP6:%.*]] = add [[TMP5]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP6]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) -; CHECK-NEXT: [[INDUCTION:%.*]] = add [[BROADCAST_SPLAT4]], [[TMP7]] ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP8]], i64 [[UMAX]]) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[IND:%.*]], i64 [[TMP8]] @@ -268,12 +250,6 @@ ; CHECK-NEXT: br label %vector.body ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP6:%.*]] = add [[TMP5]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP6]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) -; CHECK-NEXT: [[INDUCTION:%.*]] = add [[BROADCAST_SPLAT2]], [[TMP7]] ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP8]], i64 %n) ; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[SRC:%.*]], align 4 @@ -331,12 +307,6 @@ ; CHECK-NEXT: br label %vector.body ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT2:%.*]], %vector.body ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i64 [[INDEX1]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP6:%.*]] = add [[TMP5]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP6]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) -; CHECK-NEXT: [[INDUCTION:%.*]] = add [[BROADCAST_SPLAT4]], [[TMP7]] ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP8]], i64 %n) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[COND:%.*]], i64 [[TMP8]] @@ -410,12 +380,6 @@ ; CHECK-NEXT: br label %vector.body ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP6:%.*]] = add [[TMP5]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP6]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) -; CHECK-NEXT: [[INDUCTION:%.*]] = add [[BROADCAST_SPLAT2]], [[TMP7]] ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP8]], i64 %n) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[SRC:%.*]], i64 [[TMP8]] @@ -466,12 +430,6 @@ ; CHECK-NEXT: br label %vector.body ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT2:%.*]], %vector.body ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i64 [[INDEX1]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP6:%.*]] = add [[TMP5]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP6]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) -; CHECK-NEXT: [[INDUCTION:%.*]] = add [[BROADCAST_SPLAT4]], [[TMP7]] ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP8]], i64 [[UMAX]]) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr float, float* [[SRC:%.*]], i64 [[TMP8]] @@ -531,12 +489,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT2:%.*]], %vector.body ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, %vector.ph ], [ [[TMP14:%.*]], %vector.body ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[INDEX1]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP6:%.*]] = add [[TMP5]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP6]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) -; CHECK-NEXT: [[INDUCTION:%.*]] = add [[BROADCAST_SPLAT]], [[TMP7]] ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP8]], i64 [[UMAX]]) ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i64 [[TMP8]] @@ -589,12 +541,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT2:%.*]], %vector.body ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, %vector.ph ], [ [[TMP14:%.*]], %vector.body ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i64 [[INDEX1]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP6:%.*]] = add [[TMP5]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP6]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) -; CHECK-NEXT: [[INDUCTION:%.*]] = add [[BROADCAST_SPLAT4]], [[TMP7]] ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP8]], i64 [[UMAX]]) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr float, float* [[PTR:%.*]], i64 [[TMP8]] @@ -645,12 +591,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( zeroinitializer, i32 7, i32 0), %vector.ph ], [ [[PREDPHI:%.*]], %vector.body ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP6:%.*]] = add [[TMP5]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP6]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) -; CHECK-NEXT: [[INDUCTION:%.*]] = add [[BROADCAST_SPLAT]], [[TMP7]] ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP8]], i64 [[N]]) ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[COND:%.*]], i64 [[TMP8]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll b/llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll @@ -11,9 +11,6 @@ ; CHECK-LABEL: @uniform_load( ; CHECK: vector.body: ; CHECK-NEXT: [[IDX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[IDX_NEXT:%.*]], %vector.body ] -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> poison, i64 [[IDX]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[TMP2]], ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[IDX]], 0 ; CHECK-NEXT: [[LOOP_PRED:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP3]], i64 %n) ; CHECK-NEXT: [[LOAD_VAL:%.*]] = load i32, i32* %src, align 4 @@ -55,9 +52,6 @@ ; CHECK-NEXT: [[SRC_SPLAT:%.*]] = shufflevector <4 x i32*> [[TMP1]], <4 x i32*> poison, <4 x i32> zeroinitializer ; CHECK: vector.body: ; CHECK-NEXT: [[IDX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[IDX_NEXT:%.*]], %vector.body ] -; CHECK: [[TMP1:%.*]] = insertelement <4 x i64> poison, i64 [[IDX]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[TMP2]], ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[IDX]], 0 ; CHECK-NEXT: [[LOOP_PRED:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP3]], i64 %n) ; CHECK: [[COND_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* {{%.*}}, i32 4, <4 x i1> [[LOOP_PRED]], <4 x i32> poison) diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll @@ -14,9 +14,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw i32 [[TMP0]], 1 @@ -356,9 +353,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP1]], i32 [[N]]) ; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i32 [[TMP1]], [[STRIDE]] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll @@ -19,9 +19,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <16 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 [[TMP0]], i32 [[N]]) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i32 [[TMP0]] @@ -107,9 +104,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <16 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 [[TMP0]], i32 [[N]]) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i32 [[TMP0]] @@ -195,9 +189,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[TMP0]] @@ -266,9 +257,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[TMP0]] @@ -337,9 +325,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[TMP0]] @@ -408,9 +393,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[TMP0]] @@ -479,9 +461,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[TMP0]] @@ -550,9 +529,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i32 [[TMP0]] @@ -621,9 +597,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ , [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i32 [[TMP0]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll --- a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll @@ -19,9 +19,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i16> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[TMP0]] to i64 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [2 x i16*], [2 x i16*]* @b, i16 0, i64 [[TMP1]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/optsize.ll b/llvm/test/Transforms/LoopVectorize/X86/optsize.ll --- a/llvm/test/Transforms/LoopVectorize/X86/optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/optsize.ll @@ -17,10 +17,10 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <64 x i32> poison, i32 [[INDEX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <64 x i32> [[BROADCAST_SPLATINSERT]], <64 x i32> poison, <64 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <64 x i32> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <64 x i32> [[INDUCTION]], ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 0 @@ -32,7 +32,7 @@ ; CHECK-NEXT: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> [[TMP6]], <64 x i8>* [[TMP7]], i32 1, <64 x i1> [[TMP1]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 64 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -58,10 +58,10 @@ ; AUTOVF-NEXT: br label [[VECTOR_BODY:%.*]] ; AUTOVF: vector.body: ; AUTOVF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; AUTOVF-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; AUTOVF-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <32 x i32> poison, i32 [[INDEX]], i32 0 ; AUTOVF-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <32 x i32> [[BROADCAST_SPLATINSERT]], <32 x i32> poison, <32 x i32> zeroinitializer ; AUTOVF-NEXT: [[INDUCTION:%.*]] = add <32 x i32> [[BROADCAST_SPLAT]], -; AUTOVF-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; AUTOVF-NEXT: [[TMP1:%.*]] = icmp ule <32 x i32> [[INDUCTION]], ; AUTOVF-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 [[TMP0]] ; AUTOVF-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 0 @@ -73,7 +73,7 @@ ; AUTOVF-NEXT: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> [[TMP6]], <32 x i8>* [[TMP7]], i32 1, <32 x i1> [[TMP1]]) ; AUTOVF-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 32 ; AUTOVF-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 224 -; AUTOVF-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] +; AUTOVF-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; AUTOVF: middle.block: ; AUTOVF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; AUTOVF: scalar.ph: @@ -121,10 +121,10 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <64 x i32> poison, i32 [[INDEX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <64 x i32> [[BROADCAST_SPLATINSERT]], <64 x i32> poison, <64 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <64 x i32> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <64 x i32> [[INDUCTION]], ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 0 @@ -162,10 +162,10 @@ ; AUTOVF-NEXT: br label [[VECTOR_BODY:%.*]] ; AUTOVF: vector.body: ; AUTOVF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; AUTOVF-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; AUTOVF-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <32 x i32> poison, i32 [[INDEX]], i32 0 ; AUTOVF-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <32 x i32> [[BROADCAST_SPLATINSERT]], <32 x i32> poison, <32 x i32> zeroinitializer ; AUTOVF-NEXT: [[INDUCTION:%.*]] = add <32 x i32> [[BROADCAST_SPLAT]], -; AUTOVF-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; AUTOVF-NEXT: [[TMP1:%.*]] = icmp ule <32 x i32> [[INDUCTION]], ; AUTOVF-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 [[TMP0]] ; AUTOVF-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 0 @@ -177,7 +177,7 @@ ; AUTOVF-NEXT: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> [[TMP6]], <32 x i8>* [[TMP7]], i32 1, <32 x i1> [[TMP1]]) ; AUTOVF-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 32 ; AUTOVF-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 224 -; AUTOVF-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]] +; AUTOVF-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; AUTOVF: middle.block: ; AUTOVF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; AUTOVF: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr34438.ll b/llvm/test/Transforms/LoopVectorize/X86/pr34438.ll --- a/llvm/test/Transforms/LoopVectorize/X86/pr34438.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr34438.ll @@ -17,9 +17,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[TMP1]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll --- a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll @@ -154,7 +154,7 @@ ; CHECK-NEXT: [[TMP20:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT19]] ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP20]], i64 0 ; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_STORE_IF29:%.*]], label [[PRED_STORE_CONTINUE30:%.*]] -; CHECK: pred.store.if29: +; CHECK: pred.store.if24: ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[OFFSET_IDX]] @@ -163,10 +163,10 @@ ; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[OFFSET_IDX]] ; CHECK-NEXT: store i32 [[TMP26]], i32* [[TMP27]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE30]] -; CHECK: pred.store.continue30: +; CHECK: pred.store.continue25: ; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP20]], i64 1 ; CHECK-NEXT: br i1 [[TMP28]], label [[PRED_STORE_IF31:%.*]], label [[PRED_STORE_CONTINUE32:%.*]] -; CHECK: pred.store.if31: +; CHECK: pred.store.if26: ; CHECK-NEXT: [[TMP29:%.*]] = add i64 [[OFFSET_IDX]], 1 ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[TMP29]] ; CHECK-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 @@ -176,10 +176,10 @@ ; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[TMP29]] ; CHECK-NEXT: store i32 [[TMP34]], i32* [[TMP35]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE32]] -; CHECK: pred.store.continue32: +; CHECK: pred.store.continue27: ; CHECK-NEXT: [[TMP36:%.*]] = extractelement <4 x i1> [[TMP20]], i64 2 ; CHECK-NEXT: br i1 [[TMP36]], label [[PRED_STORE_IF33:%.*]], label [[PRED_STORE_CONTINUE34:%.*]] -; CHECK: pred.store.if33: +; CHECK: pred.store.if28: ; CHECK-NEXT: [[TMP37:%.*]] = add i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[TMP37]] ; CHECK-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 @@ -189,10 +189,10 @@ ; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[TMP37]] ; CHECK-NEXT: store i32 [[TMP42]], i32* [[TMP43]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE34]] -; CHECK: pred.store.continue34: +; CHECK: pred.store.continue29: ; CHECK-NEXT: [[TMP44:%.*]] = extractelement <4 x i1> [[TMP20]], i64 3 ; CHECK-NEXT: br i1 [[TMP44]], label [[PRED_STORE_IF35:%.*]], label [[PRED_STORE_CONTINUE36]] -; CHECK: pred.store.if35: +; CHECK: pred.store.if30: ; CHECK-NEXT: [[TMP45:%.*]] = add i64 [[OFFSET_IDX]], 3 ; CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[TMP45]] ; CHECK-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 @@ -202,7 +202,7 @@ ; CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[TMP45]] ; CHECK-NEXT: store i32 [[TMP50]], i32* [[TMP51]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE36]] -; CHECK: pred.store.continue36: +; CHECK: pred.store.continue31: ; CHECK-NEXT: [[INDEX_NEXT37]] = add i64 [[INDEX38]], 4 ; CHECK-NEXT: [[TMP52:%.*]] = icmp eq i64 [[INDEX_NEXT37]], [[N_VEC13]] ; CHECK-NEXT: br i1 [[TMP52]], label [[MIDDLE_BLOCK7:%.*]], label [[VECTOR_BODY9]], !llvm.loop [[LOOP5:![0-9]+]] @@ -292,7 +292,7 @@ ; CHECK: pred.store.continue: ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP4]], i64 1 ; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF16:%.*]], label [[PRED_STORE_CONTINUE17:%.*]] -; CHECK: pred.store.if16: +; CHECK: pred.store.if14: ; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[INDEX]], 1 ; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[INDEX]], 1 @@ -300,10 +300,10 @@ ; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[NEXT_GEP11]], align 16 ; CHECK-NEXT: store i32 [[TMP10]], i32* [[NEXT_GEP7]], align 16 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE17]] -; CHECK: pred.store.continue17: +; CHECK: pred.store.continue15: ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP4]], i64 2 ; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF18:%.*]], label [[PRED_STORE_CONTINUE19:%.*]] -; CHECK: pred.store.if18: +; CHECK: pred.store.if16: ; CHECK-NEXT: [[TMP12:%.*]] = or i64 [[INDEX]], 2 ; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP12]] ; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[INDEX]], 2 @@ -311,10 +311,10 @@ ; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[NEXT_GEP12]], align 16 ; CHECK-NEXT: store i32 [[TMP14]], i32* [[NEXT_GEP8]], align 16 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE19]] -; CHECK: pred.store.continue19: +; CHECK: pred.store.continue17: ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP4]], i64 3 ; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF20:%.*]], label [[PRED_STORE_CONTINUE21]] -; CHECK: pred.store.if20: +; CHECK: pred.store.if18: ; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[INDEX]], 3 ; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP16]] ; CHECK-NEXT: [[TMP17:%.*]] = or i64 [[INDEX]], 3 @@ -322,7 +322,7 @@ ; CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* [[NEXT_GEP13]], align 16 ; CHECK-NEXT: store i32 [[TMP18]], i32* [[NEXT_GEP9]], align 16 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE21]] -; CHECK: pred.store.continue21: +; CHECK: pred.store.continue19: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll b/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll --- a/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll @@ -13,10 +13,10 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[INDUCTION]], ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 0 @@ -84,10 +84,10 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[INDUCTION]], ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 0 @@ -173,10 +173,10 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT1]], <8 x i64> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT2]], -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <8 x i64> [[INDUCTION]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll --- a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll @@ -94,10 +94,10 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[INDUCTION]], ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 0 @@ -166,9 +166,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[TMP1]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll --- a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll +++ b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll @@ -14,11 +14,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[INDUCTION1:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[INDUCTION2:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll --- a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll +++ b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll @@ -18,9 +18,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0 @@ -87,9 +84,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -172,7 +172,8 @@ ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%and.red> = phi ir<1234>, ir<%and.red.next> -; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> +; CHECK-NEXT: EMIT vp<[[WIDEN_CAN:%.+]]> = WIDEN-CANONICAL-INDUCTION vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule vp<[[WIDEN_CAN]]> vp<[[BTC]]> ; CHECK-NEXT: Successor(s): loop.0 ; CHECK-EMPTY: ; CHECK-NEXT: loop.0: diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -6345,25 +6345,27 @@ ; UNROLL-NO-VF: pred.udiv.continue: ; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_UDIV_IF]] ] ; UNROLL-NO-VF-NEXT: br i1 [[TMP3]], label [[PRED_UDIV_IF6:%.*]], label [[PRED_UDIV_CONTINUE7:%.*]] -; UNROLL-NO-VF: pred.udiv.if6: +; UNROLL-NO-VF: pred.udiv.if7: ; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = udiv i32 219220132, [[INDUCTION2]] ; UNROLL-NO-VF-NEXT: br label [[PRED_UDIV_CONTINUE7]] -; UNROLL-NO-VF: pred.udiv.continue7: +; UNROLL-NO-VF: pred.udiv.continue8: ; UNROLL-NO-VF-NEXT: [[TMP7]] = phi i32 [ poison, [[PRED_UDIV_CONTINUE]] ], [ [[TMP6]], [[PRED_UDIV_IF6]] ] ; UNROLL-NO-VF-NEXT: [[TMP8]] = add i32 [[VEC_PHI]], [[VECTOR_RECUR]] ; UNROLL-NO-VF-NEXT: [[TMP9]] = add i32 [[VEC_PHI5]], [[TMP5]] ; UNROLL-NO-VF-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; UNROLL-NO-VF: pred.store.if: -; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[INDUCTION3]] +; UNROLL-NO-VF: [[SUNK_IND0:%.+]] = add i32 [[INDEX]], 0 +; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[SUNK_IND0]] ; UNROLL-NO-VF-NEXT: store i32 [[INDUCTION]], i32* [[TMP10]], align 4 ; UNROLL-NO-VF-NEXT: br label [[PRED_STORE_CONTINUE]] ; UNROLL-NO-VF: pred.store.continue: ; UNROLL-NO-VF-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9]] -; UNROLL-NO-VF: pred.store.if8: -; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[INDUCTION4]] +; UNROLL-NO-VF: pred.store.if9: +; UNROLL-NO-VF-NEXT: [[SUNK_IND1:%.+]] = add i32 [[INDEX]], 1 +; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[SUNK_IND1]] ; UNROLL-NO-VF-NEXT: store i32 [[INDUCTION2]], i32* [[TMP11]], align 4 ; UNROLL-NO-VF-NEXT: br label [[PRED_STORE_CONTINUE9]] -; UNROLL-NO-VF: pred.store.continue9: +; UNROLL-NO-VF: pred.store.continue10: ; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = select i1 [[TMP2]], i32 [[TMP8]], i32 [[VEC_PHI]] ; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = select i1 [[TMP3]], i32 [[TMP9]], i32 [[VEC_PHI5]] ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 diff --git a/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll b/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll --- a/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll @@ -18,9 +18,6 @@ ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_SREM_CONTINUE4:%.*]] ] ; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[INDEX]] to i16 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i16 99, [[TMP0]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i16> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = load i16, i16* @v_38, align 1 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i32 0 @@ -38,11 +35,11 @@ ; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i16> [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_SREM_IF]] ] ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1 ; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_SREM_IF3:%.*]], label [[PRED_SREM_CONTINUE4]] -; CHECK: pred.srem.if3: +; CHECK: pred.srem.if1: ; CHECK-NEXT: [[TMP11:%.*]] = srem i16 5786, [[TMP2]] ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i16> [[TMP9]], i16 [[TMP11]], i32 1 ; CHECK-NEXT: br label [[PRED_SREM_CONTINUE4]] -; CHECK: pred.srem.continue4: +; CHECK: pred.srem.continue2: ; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x i16> [ [[TMP9]], [[PRED_SREM_CONTINUE]] ], [ [[TMP12]], [[PRED_SREM_IF3]] ] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i16> , <2 x i16> [[TMP13]] ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i16> [[PREDPHI]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll --- a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll +++ b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll @@ -150,28 +150,32 @@ ; VF1UF4-NEXT: [[TMP3:%.*]] = icmp ule i32 [[INDUCTION3]], 13 ; VF1UF4-NEXT: br i1 [[TMP0]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; VF1UF4: pred.store.if: -; VF1UF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDUCTION]] +; VF1UF4-NEXT: [[SUNK_IND0:%.*]] = add i32 [[INDEX]], 0 +; VF1UF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[SUNK_IND0]] ; VF1UF4-NEXT: store i32 13, i32* [[TMP4]], align 1 ; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE]] ; VF1UF4: pred.store.continue: ; VF1UF4-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] -; VF1UF4: pred.store.if4: -; VF1UF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDUCTION1]] +; VF1UF4: pred.store.if7: +; VF1UF4-NEXT: [[SUNK_IND1:%.*]] = add i32 [[INDEX]], 1 +; VF1UF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[SUNK_IND1]] ; VF1UF4-NEXT: store i32 13, i32* [[TMP5]], align 1 ; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE5]] -; VF1UF4: pred.store.continue5: +; VF1UF4: pred.store.continue8: ; VF1UF4-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]] -; VF1UF4: pred.store.if6: -; VF1UF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDUCTION2]] +; VF1UF4: pred.store.if9: +; VF1UF4-NEXT: [[SUNK_IND2:%.*]] = add i32 [[INDEX]], 2 +; VF1UF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[SUNK_IND2]] ; VF1UF4-NEXT: store i32 13, i32* [[TMP6]], align 1 ; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE7]] -; VF1UF4: pred.store.continue7: +; VF1UF4: pred.store.continue10: ; VF1UF4-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9]] -; VF1UF4: pred.store.if8: -; VF1UF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDUCTION3]] +; VF1UF4: pred.store.if11: +; VF1UF4-NEXT: [[SUNK_IND3:%.*]] = add i32 [[INDEX]], 3 +; VF1UF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[SUNK_IND3]] ; VF1UF4-NEXT: store i32 13, i32* [[TMP7]], align 1 ; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE9]] -; VF1UF4: pred.store.continue9: +; VF1UF4: pred.store.continue12: ; VF1UF4-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; VF1UF4-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 ; VF1UF4-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]] diff --git a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll --- a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll +++ b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll @@ -31,23 +31,11 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE12:%.*]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], [[INC]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[INC]], i32 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i64> , [[DOTSPLAT]] -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i64> [[BROADCAST_SPLAT2]], [[TMP3]] ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 0, [[INC]] ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], [[TMP4]] ; CHECK-NEXT: [[OFFSET_IDX3:%.*]] = mul i64 [[INDEX]], [[INC]] ; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[OFFSET_IDX3]] to i8 ; CHECK-NEXT: [[TMP7:%.*]] = trunc i64 [[INC]] to i8 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <2 x i8> poison, i8 [[TMP6]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <2 x i8> [[BROADCAST_SPLATINSERT4]], <2 x i8> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[DOTSPLATINSERT6:%.*]] = insertelement <2 x i8> poison, i8 [[TMP7]], i32 0 -; CHECK-NEXT: [[DOTSPLAT7:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT6]], <2 x i8> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = mul <2 x i8> , [[DOTSPLAT7]] -; CHECK-NEXT: [[INDUCTION8:%.*]] = add <2 x i8> [[BROADCAST_SPLAT5]], [[TMP8]] ; CHECK-NEXT: [[TMP9:%.*]] = mul i8 0, [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = add i8 [[TMP6]], [[TMP9]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0 @@ -62,10 +50,10 @@ ; CHECK: pred.store.continue: ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP11]], i32 1 ; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12]] -; CHECK: pred.store.if11: +; CHECK: pred.store.if4: ; CHECK-NEXT: store i32 0, i32* [[PTR]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]] -; CHECK: pred.store.continue12: +; CHECK: pred.store.continue5: ; CHECK-NEXT: [[TMP14:%.*]] = add i8 [[TMP10]], 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll b/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll --- a/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll +++ b/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll @@ -19,9 +19,6 @@ ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[PRED_LOAD_CONTINUE4]] ] ; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[INDEX]] to i16 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i16 41, [[TMP0]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i16> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT1]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = add <2 x i32> [[BROADCAST_SPLAT2]], @@ -43,7 +40,7 @@ ; CHECK-NEXT: [[TMP12:%.*]] = phi <2 x i16> [ poison, [[VECTOR_BODY]] ], [ [[TMP10]], [[PRED_LOAD_IF]] ] ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1 ; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4]] -; CHECK: pred.load.if3: +; CHECK: pred.load.if1: ; CHECK-NEXT: [[TMP14:%.*]] = add i16 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP15:%.*]] = add nsw i16 [[TMP14]], -1 ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [40 x [4 x i16]], [40 x [4 x i16]]* @A, i16 0, i16 [[TMP15]], i16 0 @@ -53,7 +50,7 @@ ; CHECK-NEXT: [[TMP20:%.*]] = load i16, i16* [[TMP19]], align 1 ; CHECK-NEXT: [[TMP21:%.*]] = insertelement <2 x i16> [[TMP12]], i16 [[TMP20]], i32 1 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] -; CHECK: pred.load.continue4: +; CHECK: pred.load.continue2: ; CHECK-NEXT: [[TMP22:%.*]] = phi <2 x i16> [ [[TMP11]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP18]], [[PRED_LOAD_IF3]] ] ; CHECK-NEXT: [[TMP23:%.*]] = phi <2 x i16> [ [[TMP12]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP21]], [[PRED_LOAD_IF3]] ] ; CHECK-NEXT: [[TMP24:%.*]] = add nsw <2 x i16> [[TMP22]], [[TMP23]] diff --git a/llvm/test/Transforms/LoopVectorize/select-reduction.ll b/llvm/test/Transforms/LoopVectorize/select-reduction.ll --- a/llvm/test/Transforms/LoopVectorize/select-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/select-reduction.ll @@ -26,9 +26,6 @@ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[EXTRA_ITER]], [[INDEX]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT2]], ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT3]], <4 x i64> poison, <4 x i32> zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll --- a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll +++ b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll @@ -27,28 +27,32 @@ ; CHECK-NEXT: [[TMP3:%.*]] = icmp ule i64 [[INDUCTION3]], 14 ; CHECK-NEXT: br i1 [[TMP0]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i64 [[INDUCTION]] +; CHECK-NEXT: [[SUNK_IND0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i64 [[SUNK_IND0]] ; CHECK-NEXT: store i32 0, i32* [[TMP4]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] -; CHECK: pred.store.if4: -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 [[INDUCTION1]] +; CHECK: pred.store.if7: +; CHECK-NEXT: [[SUNK_IND1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 [[SUNK_IND1]] ; CHECK-NEXT: store i32 0, i32* [[TMP5]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE5]] -; CHECK: pred.store.continue5: +; CHECK: pred.store.continue8: ; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]] -; CHECK: pred.store.if6: -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 [[INDUCTION2]] +; CHECK: pred.store.if9: +; CHECK-NEXT: [[SUNK_IND2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 [[SUNK_IND2]] ; CHECK-NEXT: store i32 0, i32* [[TMP6]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE7]] -; CHECK: pred.store.continue7: +; CHECK: pred.store.continue10: ; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9]] -; CHECK: pred.store.if8: -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 [[INDUCTION3]] +; CHECK: pred.store.if11: +; CHECK-NEXT: [[SUNK_IND3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 [[SUNK_IND3]] ; CHECK-NEXT: store i32 0, i32* [[TMP7]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE9]] -; CHECK: pred.store.continue9: +; CHECK: pred.store.continue12: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]