diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9330,6 +9330,7 @@ VPlanTransforms::sinkScalarOperands(*Plan); VPlanTransforms::mergeReplicateRegions(*Plan); + VPlanTransforms::removeDeadRecipes(*Plan, *OrigLoop); std::string PlanName; raw_string_ostream RSO(PlanName); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -23,6 +23,7 @@ class Instruction; class PHINode; class ScalarEvolution; +class Loop; struct VPlanTransforms { /// Replaces the VPInstructions in \p Plan with corresponding @@ -49,6 +50,10 @@ /// Try to replace VPWidenCanonicalIVRecipes with a widened canonical IV /// recipe, if it exists. static void removeRedundantCanonicalIVs(VPlan &Plan); + + /// Try to remove dead recipes. At the moment, only dead header recipes are + /// removed. + static void removeDeadRecipes(VPlan &Plan, Loop &OrigLoop); }; } // namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -295,14 +295,19 @@ } void VPlanTransforms::removeRedundantInductionCasts(VPlan &Plan) { - SmallVector> CastsToRemove; for (auto &Phi : Plan.getEntry()->getEntryBasicBlock()->phis()) { auto *IV = dyn_cast(&Phi); if (!IV || IV->getTruncInst()) continue; - // Visit all casts connected to IV and in Casts. Collect them. - // remember them for removal. + // A sequence of IR Casts has potentially been recorded for IV, which + // *must be bypassed* when the IV is vectorized, because the vectorized IV + // will produce the desired casted value. This sequence forms a def-use + // chain and is provided in reverse order, ending with the cast that uses + // the IV phi. Search for the recipe of the last cast in the chain and + // replace it with the original IV. Note that only the final cast is + // expected to have users outside the cast-chain and the dead casts left + // over will be cleaned up later. auto &Casts = IV->getInductionDescriptor().getCastInsts(); VPValue *FindMyCast = IV; for (Instruction *IRCast : reverse(Casts)) { @@ -315,14 +320,9 @@ break; } } - assert(FoundUserCast && "Missing a cast to remove"); - CastsToRemove.emplace_back(FoundUserCast, IV); FindMyCast = FoundUserCast->getVPSingleValue(); } - } - for (auto &E : CastsToRemove) { - E.first->getVPSingleValue()->replaceAllUsesWith(E.second); - E.first->eraseFromParent(); + FindMyCast->replaceAllUsesWith(IV); } } @@ -358,3 +358,23 @@ } } } + +void VPlanTransforms::removeDeadRecipes(VPlan &Plan, Loop &OrigLoop) { + VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock(); + // Remove dead recipes in header block. The recipes in the block are processed + // in reverse order, to catch chains of dead recipes. + // TODO: Remove dead recipes across whole plan. + for (VPRecipeBase &R : make_early_inc_range(reverse(*Header))) { + if (R.mayHaveSideEffects() || + any_of(R.definedValues(), + [](VPValue *V) { return V->getNumUsers() > 0; }) || + (R.getUnderlyingInstr() && + any_of(R.getUnderlyingInstr()->users(), [&OrigLoop](User *U) { + // Check for live-out users currently not modeled in VPlan. + // TODO: Remove once live-outs are modeled in VPlan. + return !OrigLoop.contains(cast(U)); + }))) + continue; + R.eraseFromParent(); + } +} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-store-of-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-store-of-first-order-recurrence.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-store-of-first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-store-of-first-order-recurrence.ll @@ -14,7 +14,6 @@ ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[BROADCAST_SPLAT]], <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw nsw i64 [[TMP0]], 3 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i64 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i64 2 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 -2 ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <12 x i32>* diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll @@ -13,7 +13,6 @@ ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: loop.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION -; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: WIDEN-PHI %ptr.iv.1 = phi %start.1, %ptr.iv.1.next ; CHECK-NEXT: WIDEN-PHI %ptr.iv.2 = phi %start.2, %ptr.iv.2.next ; CHECK-NEXT: WIDEN-GEP Var[Inv] ir<%ptr.iv.2.next> = getelementptr ir<%ptr.iv.2>, ir<1> @@ -49,8 +48,7 @@ ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i8* [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8*, i8** [[START_1]], i64 [[TMP5]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8*, i8** [[START_1]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 2 ; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 1 @@ -140,8 +138,6 @@ ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, i8* [[START:%.*]], i64 [[N_VEC]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i8* [[START]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i8* [ [[START]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] @@ -157,7 +153,6 @@ ; CHECK-NEXT: [[TMP11:%.*]] = add [[DOTSPLAT]], [[TMP10]] ; CHECK-NEXT: [[VECTOR_GEP:%.*]] = mul [[TMP11]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[POINTER_PHI]], [[VECTOR_GEP]] -; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX2]], 0 ; CHECK-NEXT: [[TMP14:%.*]] = extractelement [[TMP12]], i32 0 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, i8* [[TMP14]], i32 0 ; CHECK-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP15]] to * @@ -165,8 +160,6 @@ ; CHECK-NEXT: [[TMP17:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i8 1, i32 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP15]] to * ; CHECK-NEXT: store [[TMP17]], * [[TMP18]], align 1 -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, [[TMP12]], i64 1 -; CHECK-NEXT: [[TMP20:%.*]] = icmp eq [[TMP19]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX2]], [[TMP22]] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-scalar-epilogue-fallback.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-scalar-epilogue-fallback.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-scalar-epilogue-fallback.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-scalar-epilogue-fallback.ll @@ -23,8 +23,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[SIZE]], [[INDEX]] -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[PTR]], i32 [[TMP1]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP]], i32 1 diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model-assert.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model-assert.ll --- a/llvm/test/Transforms/LoopVectorize/X86/cost-model-assert.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model-assert.ll @@ -34,12 +34,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* null, i64 [[TMP5]] -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 -; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, i8* null, i64 [[TMP7]] ; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT]] to <4 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT3]] to <4 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = shl nuw <4 x i32> [[TMP8]], diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll b/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll --- a/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll @@ -73,14 +73,8 @@ ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[DOTPROMOTED]], [[INDEX]] ; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP21:%.*]] = add i32 [[OFFSET_IDX]], 4 -; CHECK-NEXT: [[TMP22:%.*]] = trunc i32 [[INDEX]] to i8 -; CHECK-NEXT: [[OFFSET_IDX4:%.*]] = sub i8 [[CONV3]], [[TMP22]] -; CHECK-NEXT: [[TMP23:%.*]] = add i8 [[OFFSET_IDX4]], 0 -; CHECK-NEXT: [[TMP24:%.*]] = add i8 [[OFFSET_IDX4]], -4 ; CHECK-NEXT: [[TMP25:%.*]] = add i32 [[TMP20]], 1 ; CHECK-NEXT: [[TMP26:%.*]] = add i32 [[TMP21]], 1 -; CHECK-NEXT: [[TMP27:%.*]] = add i8 [[TMP23]], -1 -; CHECK-NEXT: [[TMP28:%.*]] = add i8 [[TMP24]], -1 ; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 [[TMP25]] ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 [[TMP26]] ; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i32 0 @@ -89,8 +83,6 @@ ; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i32 4 ; CHECK-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* [[TMP34]], align 4 -; CHECK-NEXT: [[TMP35:%.*]] = zext i8 [[TMP27]] to i32 -; CHECK-NEXT: [[TMP36:%.*]] = zext i8 [[TMP28]] to i32 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; CHECK-NEXT: [[TMP37:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP37]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr47437.ll b/llvm/test/Transforms/LoopVectorize/X86/pr47437.ll --- a/llvm/test/Transforms/LoopVectorize/X86/pr47437.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr47437.ll @@ -37,10 +37,7 @@ ; SSE2-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <8 x i16> [[WIDE_VEC2]], <8 x i16> poison, <4 x i32> ; SSE2-NEXT: [[TMP9:%.*]] = sext <4 x i16> [[STRIDED_VEC3]] to <4 x i32> ; SSE2-NEXT: [[TMP10:%.*]] = mul nsw <4 x i32> [[TMP9]], [[TMP5]] -; SSE2-NEXT: [[TMP11:%.*]] = or i64 [[TMP1]], 1 -; SSE2-NEXT: [[TMP12:%.*]] = getelementptr inbounds i16, i16* [[S1]], i64 [[TMP11]] ; SSE2-NEXT: [[TMP13:%.*]] = sext <4 x i16> [[STRIDED_VEC1]] to <4 x i32> -; SSE2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, i16* [[S2]], i64 [[TMP11]] ; SSE2-NEXT: [[TMP15:%.*]] = sext <4 x i16> [[STRIDED_VEC4]] to <4 x i32> ; SSE2-NEXT: [[TMP16:%.*]] = mul nsw <4 x i32> [[TMP15]], [[TMP13]] ; SSE2-NEXT: [[TMP17:%.*]] = add nsw <4 x i32> [[TMP16]], [[TMP10]] @@ -134,14 +131,8 @@ ; SSE41-NEXT: [[TMP19:%.*]] = sext <4 x i16> [[STRIDED_VEC8]] to <4 x i32> ; SSE41-NEXT: [[TMP20:%.*]] = mul nsw <4 x i32> [[TMP18]], [[TMP10]] ; SSE41-NEXT: [[TMP21:%.*]] = mul nsw <4 x i32> [[TMP19]], [[TMP11]] -; SSE41-NEXT: [[TMP22:%.*]] = or i64 [[TMP2]], 1 -; SSE41-NEXT: [[TMP23:%.*]] = or i64 [[TMP3]], 1 -; SSE41-NEXT: [[TMP24:%.*]] = getelementptr inbounds i16, i16* [[S1]], i64 [[TMP22]] -; SSE41-NEXT: [[TMP25:%.*]] = getelementptr inbounds i16, i16* [[S1]], i64 [[TMP23]] ; SSE41-NEXT: [[TMP26:%.*]] = sext <4 x i16> [[STRIDED_VEC3]] to <4 x i32> ; SSE41-NEXT: [[TMP27:%.*]] = sext <4 x i16> [[STRIDED_VEC4]] to <4 x i32> -; SSE41-NEXT: [[TMP28:%.*]] = getelementptr inbounds i16, i16* [[S2]], i64 [[TMP22]] -; SSE41-NEXT: [[TMP29:%.*]] = getelementptr inbounds i16, i16* [[S2]], i64 [[TMP23]] ; SSE41-NEXT: [[TMP30:%.*]] = sext <4 x i16> [[STRIDED_VEC9]] to <4 x i32> ; SSE41-NEXT: [[TMP31:%.*]] = sext <4 x i16> [[STRIDED_VEC10]] to <4 x i32> ; SSE41-NEXT: [[TMP32:%.*]] = mul nsw <4 x i32> [[TMP30]], [[TMP26]] @@ -276,22 +267,10 @@ ; AVX1-NEXT: [[TMP41:%.*]] = mul nsw <4 x i32> [[TMP37]], [[TMP21]] ; AVX1-NEXT: [[TMP42:%.*]] = mul nsw <4 x i32> [[TMP38]], [[TMP22]] ; AVX1-NEXT: [[TMP43:%.*]] = mul nsw <4 x i32> [[TMP39]], [[TMP23]] -; AVX1-NEXT: [[TMP44:%.*]] = or i64 [[TMP4]], 1 -; AVX1-NEXT: [[TMP45:%.*]] = or i64 [[TMP5]], 1 -; AVX1-NEXT: [[TMP46:%.*]] = or i64 [[TMP6]], 1 -; AVX1-NEXT: [[TMP47:%.*]] = or i64 [[TMP7]], 1 -; AVX1-NEXT: [[TMP48:%.*]] = getelementptr inbounds i16, i16* [[S1]], i64 [[TMP44]] -; AVX1-NEXT: [[TMP49:%.*]] = getelementptr inbounds i16, i16* [[S1]], i64 [[TMP45]] -; AVX1-NEXT: [[TMP50:%.*]] = getelementptr inbounds i16, i16* [[S1]], i64 [[TMP46]] -; AVX1-NEXT: [[TMP51:%.*]] = getelementptr inbounds i16, i16* [[S1]], i64 [[TMP47]] ; AVX1-NEXT: [[TMP52:%.*]] = sext <4 x i16> [[STRIDED_VEC7]] to <4 x i32> ; AVX1-NEXT: [[TMP53:%.*]] = sext <4 x i16> [[STRIDED_VEC8]] to <4 x i32> ; AVX1-NEXT: [[TMP54:%.*]] = sext <4 x i16> [[STRIDED_VEC9]] to <4 x i32> ; AVX1-NEXT: [[TMP55:%.*]] = sext <4 x i16> [[STRIDED_VEC10]] to <4 x i32> -; AVX1-NEXT: [[TMP56:%.*]] = getelementptr inbounds i16, i16* [[S2]], i64 [[TMP44]] -; AVX1-NEXT: [[TMP57:%.*]] = getelementptr inbounds i16, i16* [[S2]], i64 [[TMP45]] -; AVX1-NEXT: [[TMP58:%.*]] = getelementptr inbounds i16, i16* [[S2]], i64 [[TMP46]] -; AVX1-NEXT: [[TMP59:%.*]] = getelementptr inbounds i16, i16* [[S2]], i64 [[TMP47]] ; AVX1-NEXT: [[TMP60:%.*]] = sext <4 x i16> [[STRIDED_VEC19]] to <4 x i32> ; AVX1-NEXT: [[TMP61:%.*]] = sext <4 x i16> [[STRIDED_VEC20]] to <4 x i32> ; AVX1-NEXT: [[TMP62:%.*]] = sext <4 x i16> [[STRIDED_VEC21]] to <4 x i32> @@ -389,10 +368,7 @@ ; AVX2-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <16 x i16> [[WIDE_VEC2]], <16 x i16> poison, <8 x i32> ; AVX2-NEXT: [[TMP9:%.*]] = sext <8 x i16> [[STRIDED_VEC3]] to <8 x i32> ; AVX2-NEXT: [[TMP10:%.*]] = mul nsw <8 x i32> [[TMP9]], [[TMP5]] -; AVX2-NEXT: [[TMP11:%.*]] = or i64 [[TMP1]], 1 -; AVX2-NEXT: [[TMP12:%.*]] = getelementptr inbounds i16, i16* [[S1]], i64 [[TMP11]] ; AVX2-NEXT: [[TMP13:%.*]] = sext <8 x i16> [[STRIDED_VEC1]] to <8 x i32> -; AVX2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, i16* [[S2]], i64 [[TMP11]] ; AVX2-NEXT: [[TMP15:%.*]] = sext <8 x i16> [[STRIDED_VEC4]] to <8 x i32> ; AVX2-NEXT: [[TMP16:%.*]] = mul nsw <8 x i32> [[TMP15]], [[TMP13]] ; AVX2-NEXT: [[TMP17:%.*]] = add nsw <8 x i32> [[TMP16]], [[TMP10]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll --- a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll @@ -154,7 +154,7 @@ ; CHECK-NEXT: [[TMP20:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT19]] ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP20]], i64 0 ; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_STORE_IF29:%.*]], label [[PRED_STORE_CONTINUE30:%.*]] -; CHECK: pred.store.if24: +; CHECK: pred.store.if23: ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[OFFSET_IDX]] @@ -163,10 +163,10 @@ ; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[OFFSET_IDX]] ; CHECK-NEXT: store i32 [[TMP26]], i32* [[TMP27]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE30]] -; CHECK: pred.store.continue25: +; CHECK: pred.store.continue24: ; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP20]], i64 1 ; CHECK-NEXT: br i1 [[TMP28]], label [[PRED_STORE_IF31:%.*]], label [[PRED_STORE_CONTINUE32:%.*]] -; CHECK: pred.store.if26: +; CHECK: pred.store.if25: ; CHECK-NEXT: [[TMP29:%.*]] = add i64 [[OFFSET_IDX]], 1 ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[TMP29]] ; CHECK-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 @@ -176,10 +176,10 @@ ; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[TMP29]] ; CHECK-NEXT: store i32 [[TMP34]], i32* [[TMP35]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE32]] -; CHECK: pred.store.continue27: +; CHECK: pred.store.continue26: ; CHECK-NEXT: [[TMP36:%.*]] = extractelement <4 x i1> [[TMP20]], i64 2 ; CHECK-NEXT: br i1 [[TMP36]], label [[PRED_STORE_IF33:%.*]], label [[PRED_STORE_CONTINUE34:%.*]] -; CHECK: pred.store.if28: +; CHECK: pred.store.if27: ; CHECK-NEXT: [[TMP37:%.*]] = add i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[TMP37]] ; CHECK-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 @@ -189,10 +189,10 @@ ; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[TMP37]] ; CHECK-NEXT: store i32 [[TMP42]], i32* [[TMP43]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE34]] -; CHECK: pred.store.continue29: +; CHECK: pred.store.continue28: ; CHECK-NEXT: [[TMP44:%.*]] = extractelement <4 x i1> [[TMP20]], i64 3 ; CHECK-NEXT: br i1 [[TMP44]], label [[PRED_STORE_IF35:%.*]], label [[PRED_STORE_CONTINUE36]] -; CHECK: pred.store.if30: +; CHECK: pred.store.if29: ; CHECK-NEXT: [[TMP45:%.*]] = add i64 [[OFFSET_IDX]], 3 ; CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[TMP45]] ; CHECK-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 @@ -202,7 +202,7 @@ ; CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[TMP45]] ; CHECK-NEXT: store i32 [[TMP50]], i32* [[TMP51]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE36]] -; CHECK: pred.store.continue31: +; CHECK: pred.store.continue30: ; CHECK-NEXT: [[INDEX_NEXT37]] = add i64 [[INDEX38]], 4 ; CHECK-NEXT: [[TMP52:%.*]] = icmp eq i64 [[INDEX_NEXT37]], [[N_VEC13]] ; CHECK-NEXT: br i1 [[TMP52]], label [[MIDDLE_BLOCK7:%.*]], label [[VECTOR_BODY9]], !llvm.loop [[LOOP5:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll b/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll --- a/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll @@ -16,10 +16,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 12 ; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[ADDR:%.*]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ADDR]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[ADDR]], align 4 @@ -69,10 +65,6 @@ ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 12 ; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[ADDR:%.*]], align 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer @@ -139,10 +131,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 12 ; CHECK-NEXT: [[TMP4:%.*]] = udiv i32 [[BYTE_OFFSET:%.*]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = udiv i32 [[BYTE_OFFSET]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = udiv i32 [[BYTE_OFFSET]], 4 @@ -202,10 +190,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 12 ; CHECK-NEXT: store i32 0, i32* [[ADDR:%.*]], align 4 ; CHECK-NEXT: store i32 0, i32* [[ADDR]], align 4 ; CHECK-NEXT: store i32 0, i32* [[ADDR]], align 4 @@ -262,15 +246,7 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND4:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT9:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <4 x i64> [[STEP_ADD]], -; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <4 x i64> [[STEP_ADD1]], -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 12 ; CHECK-NEXT: [[STEP_ADD5:%.*]] = add <4 x i32> [[VEC_IND4]], ; CHECK-NEXT: [[STEP_ADD6:%.*]] = add <4 x i32> [[STEP_ADD5]], ; CHECK-NEXT: [[STEP_ADD7:%.*]] = add <4 x i32> [[STEP_ADD6]], @@ -307,7 +283,6 @@ ; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i32> [[STEP_ADD7]], i32 3 ; CHECK-NEXT: store i32 [[TMP19]], i32* [[ADDR]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD2]], ; CHECK-NEXT: [[VEC_IND_NEXT9]] = add <4 x i32> [[STEP_ADD7]], ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -392,10 +367,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 12 ; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4, !alias.scope !12 ; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[A]], align 4, !alias.scope !12 ; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4, !alias.scope !12 @@ -594,10 +565,6 @@ ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 12 ; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* @GAddr, align 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer @@ -669,10 +636,6 @@ ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 12 ; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* getelementptr (i32, i32* @GAddr, i64 5), align 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -170,7 +170,6 @@ ; CHECK-NEXT: loop: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next> -; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%and.red> = phi ir<1234>, ir<%and.red.next> ; CHECK-NEXT: EMIT vp<[[WIDEN_CAN:%.+]]> = WIDEN-CANONICAL-INDUCTION vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule vp<[[WIDEN_CAN]]> vp<[[BTC]]> diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -2317,8 +2317,6 @@ ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ , [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> , <4 x i32> ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef @@ -2348,8 +2346,6 @@ ; UNROLL-NO-VF: vector.body: ; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[INDUCTION:%.*]] = add i64 [[INDEX]], 0 -; UNROLL-NO-VF-NEXT: [[INDUCTION1:%.*]] = add i64 [[INDEX]], 1 ; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = add i64 0, 1 ; UNROLL-NO-VF-NEXT: [[TMP1]] = add i64 0, 1 ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 @@ -2380,7 +2376,6 @@ ; SINK-AFTER: vector.body: ; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ , [[VECTOR_BODY]] ] -; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; SINK-AFTER-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> , <4 x i32> ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef @@ -2410,7 +2405,6 @@ ; NO-SINK-AFTER: vector.body: ; NO-SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ , [[VECTOR_BODY]] ] -; NO-SINK-AFTER-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; NO-SINK-AFTER-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> , <4 x i32> ; NO-SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; NO-SINK-AFTER-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef @@ -2523,8 +2517,6 @@ ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 5 ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 6 ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 7 -; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = zext <4 x i32> [[VEC_IND]] to <4 x i64> -; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = zext <4 x i32> [[STEP_ADD]] to <4 x i64> ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = add <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; UNROLL-NO-IC-NEXT: [[TMP11]] = add <4 x i32> [[STEP_ADD]], [[BROADCAST_SPLAT3]] ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP10]], <4 x i32> @@ -2564,8 +2556,6 @@ ; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[INDUCTION:%.*]] = add i32 [[INDEX]], 0 ; UNROLL-NO-VF-NEXT: [[INDUCTION1:%.*]] = add i32 [[INDEX]], 1 -; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = zext i32 [[INDUCTION]] to i64 -; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = zext i32 [[INDUCTION1]] to i64 ; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add i32 [[INDUCTION]], [[X:%.*]] ; UNROLL-NO-VF-NEXT: [[TMP3]] = add i32 [[INDUCTION1]], [[X]] ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 @@ -2605,7 +2595,6 @@ ; SINK-AFTER-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 ; SINK-AFTER-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2 ; SINK-AFTER-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3 -; SINK-AFTER-NEXT: [[TMP4:%.*]] = zext <4 x i32> [[VEC_IND]] to <4 x i64> ; SINK-AFTER-NEXT: [[TMP5]] = add <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; SINK-AFTER-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP5]], <4 x i32> ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -2648,7 +2637,6 @@ ; NO-SINK-AFTER-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 ; NO-SINK-AFTER-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2 ; NO-SINK-AFTER-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3 -; NO-SINK-AFTER-NEXT: [[TMP4:%.*]] = zext <4 x i32> [[VEC_IND]] to <4 x i64> ; NO-SINK-AFTER-NEXT: [[TMP5]] = add <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; NO-SINK-AFTER-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP5]], <4 x i32> ; NO-SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -2885,9 +2873,6 @@ ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 7 ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 25 ; UNROLL-NO-IC-NEXT: [[NEXT_GEP8:%.*]] = getelementptr double, double* [[B]], i64 [[TMP15]] -; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 -; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = add i32 [[OFFSET_IDX]], 0 -; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = add i32 [[OFFSET_IDX]], 4 ; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP]], i64 [[IDXPROM]] ; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP2]], i64 [[IDXPROM]] ; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP3]], i64 [[IDXPROM]] @@ -2975,9 +2960,6 @@ ; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1 ; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 25 ; UNROLL-NO-VF-NEXT: [[NEXT_GEP2:%.*]] = getelementptr double, double* [[B]], i64 [[TMP3]] -; UNROLL-NO-VF-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 -; UNROLL-NO-VF-NEXT: [[INDUCTION:%.*]] = add i32 [[OFFSET_IDX]], 0 -; UNROLL-NO-VF-NEXT: [[INDUCTION3:%.*]] = add i32 [[OFFSET_IDX]], 1 ; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP]], i64 [[IDXPROM]] ; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP2]], i64 [[IDXPROM]] ; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = load double, double* [[TMP4]], align 8 @@ -3046,8 +3028,6 @@ ; SINK-AFTER-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 3 ; SINK-AFTER-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 25 ; SINK-AFTER-NEXT: [[NEXT_GEP4:%.*]] = getelementptr double, double* [[B]], i64 [[TMP7]] -; SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 -; SINK-AFTER-NEXT: [[TMP8:%.*]] = add i32 [[OFFSET_IDX]], 0 ; SINK-AFTER-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP]], i64 [[IDXPROM]] ; SINK-AFTER-NEXT: [[TMP10:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP2]], i64 [[IDXPROM]] ; SINK-AFTER-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP3]], i64 [[IDXPROM]] @@ -3123,8 +3103,6 @@ ; NO-SINK-AFTER-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 3 ; NO-SINK-AFTER-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 25 ; NO-SINK-AFTER-NEXT: [[NEXT_GEP4:%.*]] = getelementptr double, double* [[B]], i64 [[TMP7]] -; NO-SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 -; NO-SINK-AFTER-NEXT: [[TMP8:%.*]] = add i32 [[OFFSET_IDX]], 0 ; NO-SINK-AFTER-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP]], i64 [[IDXPROM]] ; NO-SINK-AFTER-NEXT: [[TMP10:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP2]], i64 [[IDXPROM]] ; NO-SINK-AFTER-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP3]], i64 [[IDXPROM]] @@ -5013,8 +4991,6 @@ ; UNROLL-NO-IC-NEXT: [[TMP7]] = add <4 x i16> [[TMP1]], ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP6]], <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> [[TMP7]], <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = sub <4 x i16> [[TMP8]], -; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = sub <4 x i16> [[TMP9]], ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40 @@ -5063,8 +5039,6 @@ ; UNROLL-NO-VF-NEXT: [[TMP4]] = zext i16 [[TMP2]] to i32 ; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = add i16 [[TMP1]], 5 ; UNROLL-NO-VF-NEXT: [[TMP6]] = add i16 [[TMP2]], 5 -; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = sub i16 [[VECTOR_RECUR]], 10 -; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = sub i16 [[TMP5]], 10 ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], 42 ; UNROLL-NO-VF-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]] @@ -5104,7 +5078,6 @@ ; SINK-AFTER-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR1]], <4 x i32> [[TMP1]], <4 x i32> ; SINK-AFTER-NEXT: [[TMP3]] = add <4 x i16> [[TMP0]], ; SINK-AFTER-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP3]], <4 x i32> -; SINK-AFTER-NEXT: [[TMP5:%.*]] = sub <4 x i16> [[TMP4]], ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], ; SINK-AFTER-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40 @@ -5149,7 +5122,6 @@ ; NO-SINK-AFTER-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR1]], <4 x i32> [[TMP1]], <4 x i32> ; NO-SINK-AFTER-NEXT: [[TMP3]] = add <4 x i16> [[TMP0]], ; NO-SINK-AFTER-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP3]], <4 x i32> -; NO-SINK-AFTER-NEXT: [[TMP5:%.*]] = sub <4 x i16> [[TMP4]], ; NO-SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; NO-SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], ; NO-SINK-AFTER-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40 @@ -6751,14 +6723,6 @@ ; UNROLL-NO-IC-NEXT: [[TMP13]] = zext <4 x i16> [[TMP11]] to <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP12]], <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = shufflevector <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = icmp eq <4 x i32> [[TMP14]], -; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = icmp eq <4 x i32> [[TMP15]], -; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = icmp eq <4 x i1> [[TMP16]], -; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = icmp eq <4 x i1> [[TMP17]], -; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = and <4 x i1> [[TMP16]], [[TMP18]] -; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = and <4 x i1> [[TMP17]], [[TMP19]] -; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = zext <4 x i1> [[TMP20]] to <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = zext <4 x i1> [[TMP21]] to <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = getelementptr i32, i32* [[A_PTR:%.*]], i16 [[TMP0]] ; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = getelementptr i32, i32* [[A_PTR]], i16 [[TMP4]] ; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = getelementptr i32, i32* [[TMP24]], i32 0 @@ -6814,14 +6778,6 @@ ; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = or i16 [[TMP1]], [[TMP1]] ; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = zext i16 [[TMP2]] to i32 ; UNROLL-NO-VF-NEXT: [[TMP5]] = zext i16 [[TMP3]] to i32 -; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = icmp eq i32 [[VECTOR_RECUR]], 15 -; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP4]], 15 -; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = icmp eq i1 [[TMP6]], true -; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = icmp eq i1 [[TMP7]], true -; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = and i1 [[TMP6]], [[TMP8]] -; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = and i1 [[TMP7]], [[TMP9]] -; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = zext i1 [[TMP10]] to i32 -; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = zext i1 [[TMP11]] to i32 ; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = getelementptr i32, i32* [[A_PTR:%.*]], i16 [[INDUCTION]] ; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = getelementptr i32, i32* [[A_PTR]], i16 [[INDUCTION1]] ; UNROLL-NO-VF-NEXT: store i32 0, i32* [[TMP14]], align 4 @@ -6871,10 +6827,6 @@ ; SINK-AFTER-NEXT: [[TMP5:%.*]] = or <4 x i16> [[TMP4]], [[TMP4]] ; SINK-AFTER-NEXT: [[TMP6]] = zext <4 x i16> [[TMP5]] to <4 x i32> ; SINK-AFTER-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP6]], <4 x i32> -; SINK-AFTER-NEXT: [[TMP8:%.*]] = icmp eq <4 x i32> [[TMP7]], -; SINK-AFTER-NEXT: [[TMP9:%.*]] = icmp eq <4 x i1> [[TMP8]], -; SINK-AFTER-NEXT: [[TMP10:%.*]] = and <4 x i1> [[TMP8]], [[TMP9]] -; SINK-AFTER-NEXT: [[TMP11:%.*]] = zext <4 x i1> [[TMP10]] to <4 x i32> ; SINK-AFTER-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[A_PTR:%.*]], i16 [[TMP0]] ; SINK-AFTER-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 0 ; SINK-AFTER-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>* @@ -6927,10 +6879,6 @@ ; NO-SINK-AFTER-NEXT: [[TMP5:%.*]] = or <4 x i16> [[TMP4]], [[TMP4]] ; NO-SINK-AFTER-NEXT: [[TMP6]] = zext <4 x i16> [[TMP5]] to <4 x i32> ; NO-SINK-AFTER-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP6]], <4 x i32> -; NO-SINK-AFTER-NEXT: [[TMP8:%.*]] = icmp eq <4 x i32> [[TMP7]], -; NO-SINK-AFTER-NEXT: [[TMP9:%.*]] = icmp eq <4 x i1> [[TMP8]], -; NO-SINK-AFTER-NEXT: [[TMP10:%.*]] = and <4 x i1> [[TMP8]], [[TMP9]] -; NO-SINK-AFTER-NEXT: [[TMP11:%.*]] = zext <4 x i1> [[TMP10]] to <4 x i32> ; NO-SINK-AFTER-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[A_PTR:%.*]], i16 [[TMP0]] ; NO-SINK-AFTER-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 0 ; NO-SINK-AFTER-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>* diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll --- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll @@ -461,9 +461,6 @@ ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] -; UNROLL-NEXT: [[OFFSET_IDX:%.*]] = sub i64 undef, [[INDEX]] -; UNROLL-NEXT: [[INDUCTION3:%.*]] = add i64 [[OFFSET_IDX]], 0 -; UNROLL-NEXT: [[INDUCTION4:%.*]] = add i64 [[OFFSET_IDX]], -1 ; UNROLL-NEXT: br i1 [[C:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE6]] ; UNROLL: pred.store.if: ; UNROLL-NEXT: [[INDUCTION:%.*]] = add i64 [[INDEX]], 0 @@ -479,7 +476,7 @@ ; UNROLL-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i8 ; UNROLL-NEXT: store i8 [[TMP7]], i8* [[TMP4]], align 1 ; UNROLL-NEXT: br label [[PRED_STORE_CONTINUE6]] -; UNROLL: pred.store.continue6: +; UNROLL: pred.store.continue4: ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; UNROLL-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef ; UNROLL-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -512,9 +509,6 @@ ; UNROLL-NOSIMPLIFY-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NOSIMPLIFY: vector.body: ; UNROLL-NOSIMPLIFY-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] -; UNROLL-NOSIMPLIFY-NEXT: [[OFFSET_IDX:%.*]] = sub i64 undef, [[INDEX]] -; UNROLL-NOSIMPLIFY-NEXT: [[INDUCTION3:%.*]] = add i64 [[OFFSET_IDX]], 0 -; UNROLL-NOSIMPLIFY-NEXT: [[INDUCTION4:%.*]] = add i64 [[OFFSET_IDX]], -1 ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[C:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; UNROLL-NOSIMPLIFY: pred.store.if: ; UNROLL-NOSIMPLIFY-NEXT: [[INDUCTION:%.*]] = add i64 [[INDEX]], 0 @@ -526,7 +520,7 @@ ; UNROLL-NOSIMPLIFY-NEXT: br label [[PRED_STORE_CONTINUE]] ; UNROLL-NOSIMPLIFY: pred.store.continue: ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[C]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]] -; UNROLL-NOSIMPLIFY: pred.store.if5: +; UNROLL-NOSIMPLIFY: pred.store.if3: ; UNROLL-NOSIMPLIFY-NEXT: [[INDUCTION2:%.*]] = add i64 [[INDEX]], 1 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* undef, i64 [[INDUCTION2]] ; UNROLL-NOSIMPLIFY-NEXT: [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 1 @@ -534,7 +528,7 @@ ; UNROLL-NOSIMPLIFY-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i8 ; UNROLL-NOSIMPLIFY-NEXT: store i8 [[TMP7]], i8* [[TMP4]], align 1 ; UNROLL-NOSIMPLIFY-NEXT: br label [[PRED_STORE_CONTINUE6]] -; UNROLL-NOSIMPLIFY: pred.store.continue6: +; UNROLL-NOSIMPLIFY: pred.store.continue4: ; UNROLL-NOSIMPLIFY-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] @@ -572,8 +566,6 @@ ; VEC: vector.body: ; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ] ; VEC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; VEC-NEXT: [[OFFSET_IDX:%.*]] = sub i64 undef, [[INDEX]] -; VEC-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VEC-NEXT: [[TMP2:%.*]] = getelementptr i8, i8* undef, i64 [[TMP0]] ; VEC-NEXT: [[TMP3:%.*]] = getelementptr i8, i8* [[TMP2]], i32 0 ; VEC-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <2 x i8>* @@ -683,9 +675,6 @@ ; UNROLL-NOSIMPLIFY-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] ; UNROLL-NOSIMPLIFY-NEXT: [[INDUCTION:%.*]] = add i64 [[INDEX]], 0 ; UNROLL-NOSIMPLIFY-NEXT: [[INDUCTION2:%.*]] = add i64 [[INDEX]], 1 -; UNROLL-NOSIMPLIFY-NEXT: [[OFFSET_IDX:%.*]] = sub i64 0, [[INDEX]] -; UNROLL-NOSIMPLIFY-NEXT: [[INDUCTION3:%.*]] = add i64 [[OFFSET_IDX]], 0 -; UNROLL-NOSIMPLIFY-NEXT: [[INDUCTION4:%.*]] = add i64 [[OFFSET_IDX]], -1 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP0:%.*]] = getelementptr i8, i8* [[PTR:%.*]], i64 [[INDUCTION]] ; UNROLL-NOSIMPLIFY-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[PTR]], i64 [[INDUCTION2]] ; UNROLL-NOSIMPLIFY-NEXT: store i8 0, i8* [[TMP0]], align 1 @@ -699,13 +688,13 @@ ; UNROLL-NOSIMPLIFY-NEXT: br label [[PRED_STORE_CONTINUE]] ; UNROLL-NOSIMPLIFY: pred.store.continue: ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[C]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]] -; UNROLL-NOSIMPLIFY: pred.store.if5: +; UNROLL-NOSIMPLIFY: pred.store.if3: ; UNROLL-NOSIMPLIFY-NEXT: [[TMP5:%.*]] = load i8, i8* [[TMP1]], align 1 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i8 ; UNROLL-NOSIMPLIFY-NEXT: store i8 [[TMP7]], i8* [[TMP1]], align 1 ; UNROLL-NOSIMPLIFY-NEXT: br label [[PRED_STORE_CONTINUE6]] -; UNROLL-NOSIMPLIFY: pred.store.continue6: +; UNROLL-NOSIMPLIFY: pred.store.continue4: ; UNROLL-NOSIMPLIFY-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0 ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/induction-ptrcasts.ll b/llvm/test/Transforms/LoopVectorize/induction-ptrcasts.ll --- a/llvm/test/Transforms/LoopVectorize/induction-ptrcasts.ll +++ b/llvm/test/Transforms/LoopVectorize/induction-ptrcasts.ll @@ -11,20 +11,10 @@ ; VF1-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4 ; VF1-NEXT: [[INDUCTION:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF1-NEXT: [[INDUCTION3:%.*]] = add i64 [[OFFSET_IDX]], 4 -; VF1-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 -; VF1-NEXT: [[NEXT_GEP:%.*]] = getelementptr i32, i32* null, i64 [[TMP3]] -; VF1-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1 -; VF1-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i32, i32* null, i64 [[TMP4]] -; VF1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[NEXT_GEP]], i64 1 -; VF1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[NEXT_GEP4]], i64 1 ; VF1-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i64 [[INDUCTION]] ; VF1-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[INDUCTION3]] ; VF1-NEXT: store i8 0, i8* [[TMP7]], align 1 ; VF1-NEXT: store i8 0, i8* [[TMP8]], align 1 -; VF1-NEXT: [[TMP9:%.*]] = ptrtoint i32* [[TMP5]] to i64 -; VF1-NEXT: [[TMP10:%.*]] = ptrtoint i32* [[TMP6]] to i64 -; VF1-NEXT: [[TMP11:%.*]] = sub i64 ptrtoint (i32* @f to i64), [[TMP9]] -; VF1-NEXT: [[TMP12:%.*]] = sub i64 ptrtoint (i32* @f to i64), [[TMP10]] ; VF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF1-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], ; VF1-NEXT: br i1 [[TMP13]], label %middle.block, label %vector.body @@ -35,19 +25,10 @@ ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4 ; VF2-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 4 -; VF2-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; VF2-NEXT: [[NEXT_GEP:%.*]] = getelementptr i32, i32* null, i64 [[TMP5]] -; VF2-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 1 -; VF2-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i32, i32* null, i64 [[TMP6]] -; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[NEXT_GEP]], i64 1 -; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[NEXT_GEP3]], i64 1 ; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i64 [[TMP3]] ; VF2-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[TMP4]] ; VF2-NEXT: store i8 0, i8* [[TMP9]], align 1 ; VF2-NEXT: store i8 0, i8* [[TMP10]], align 1 -; VF2-NEXT: [[TMP11:%.*]] = ptrtoint i32* [[TMP7]] to i64 -; VF2-NEXT: [[TMP12:%.*]] = ptrtoint i32* [[TMP8]] to i64 -; VF2-NEXT: [[TMP13:%.*]] = sub i64 ptrtoint (i32* @f to i64), [[TMP11]] ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], ; VF2-NEXT: br i1 [[TMP14]], label %middle.block, label %vector.body diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -2867,9 +2867,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[INDEX]] to i8 -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i8 0, [[TMP1]] -; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP3]] = and <2 x i32> [[VEC_PHI]], ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 @@ -2940,10 +2937,6 @@ ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = trunc i32 [[INDEX]] to i8 -; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = sub i8 0, [[TMP1]] -; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i8 [[OFFSET_IDX]], 0 -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i8 [[OFFSET_IDX]], -2 ; UNROLL-NO-IC-NEXT: [[TMP4]] = and <2 x i32> [[VEC_PHI]], ; UNROLL-NO-IC-NEXT: [[TMP5]] = and <2 x i32> [[VEC_PHI1]], ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -3013,9 +3006,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[INDEX]] to i16 -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i16 0, [[TMP1]] -; CHECK-NEXT: [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP3]] = and <2 x i32> [[VEC_PHI]], ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 65536 @@ -3086,10 +3076,6 @@ ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = trunc i32 [[INDEX]] to i16 -; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = sub i16 0, [[TMP1]] -; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], 0 -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i16 [[OFFSET_IDX]], -2 ; UNROLL-NO-IC-NEXT: [[TMP4]] = and <2 x i32> [[VEC_PHI]], ; UNROLL-NO-IC-NEXT: [[TMP5]] = and <2 x i32> [[VEC_PHI1]], ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -3163,8 +3149,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i32 0, [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2]] = and <2 x i32> [[VEC_PHI]], ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 0 @@ -3235,9 +3219,6 @@ ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = sub i32 0, [[INDEX]] -; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 0 -; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], -2 ; UNROLL-NO-IC-NEXT: [[TMP3]] = and <2 x i32> [[VEC_PHI]], ; UNROLL-NO-IC-NEXT: [[TMP4]] = and <2 x i32> [[VEC_PHI1]], ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -3328,9 +3309,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ [[TMP4]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[INDEX]] to i8 -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[DOTPR_I]], [[TMP5]] -; CHECK-NEXT: [[TMP6:%.*]] = add i8 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP7]] = and <2 x i32> [[BROADCAST_SPLAT]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] @@ -3465,10 +3443,6 @@ ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ [[TMP4]], [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = trunc i32 [[INDEX]] to i8 -; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[DOTPR_I]], [[TMP5]] -; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i8 [[OFFSET_IDX]], 0 -; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i8 [[OFFSET_IDX]], 2 ; UNROLL-NO-IC-NEXT: [[TMP8]] = and <2 x i32> [[BROADCAST_SPLAT]], [[VEC_PHI]] ; UNROLL-NO-IC-NEXT: [[TMP9]] = and <2 x i32> [[BROADCAST_SPLAT3]], [[VEC_PHI1]] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -3605,13 +3579,10 @@ ; CHECK-NEXT: [[TMP19:%.*]] = trunc i32 [[INDEX]] to i8 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[T]], [[TMP19]] ; CHECK-NEXT: [[TMP20:%.*]] = add i8 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP21:%.*]] = add i8 [[OFFSET_IDX]], 1 -; CHECK-NEXT: [[TMP22:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i8 [[TMP20]] ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i32 0 ; CHECK-NEXT: [[TMP25:%.*]] = bitcast i32* [[TMP24]] to <2 x i32>* ; CHECK-NEXT: store <2 x i32> [[VEC_IND7]], <2 x i32>* [[TMP25]], align 4 -; CHECK-NEXT: [[TMP26:%.*]] = add <2 x i8> [[VEC_IND]], ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i8> [[VEC_IND]], ; CHECK-NEXT: [[VEC_IND_NEXT8]] = add <2 x i32> [[VEC_IND7]], @@ -3827,11 +3798,7 @@ ; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = trunc i32 [[INDEX]] to i8 ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[T]], [[TMP19]] ; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = add i8 [[OFFSET_IDX]], 0 -; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = add i8 [[OFFSET_IDX]], 1 ; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = add i8 [[OFFSET_IDX]], 2 -; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = add i8 [[OFFSET_IDX]], 3 -; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = add i32 [[INDEX]], 0 -; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = add i32 [[INDEX]], 2 ; UNROLL-NO-IC-NEXT: [[STEP_ADD9:%.*]] = add <2 x i32> [[VEC_IND8]], ; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i8 [[TMP20]] ; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, i32* [[A]], i8 [[TMP22]] @@ -3841,8 +3808,6 @@ ; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i32 2 ; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = bitcast i32* [[TMP30]] to <2 x i32>* ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD9]], <2 x i32>* [[TMP31]], align 4 -; UNROLL-NO-IC-NEXT: [[TMP32:%.*]] = add <2 x i8> [[VEC_IND]], -; UNROLL-NO-IC-NEXT: [[TMP33:%.*]] = add <2 x i8> [[STEP_ADD]], ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i8> [[STEP_ADD]], ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT11]] = add <2 x i32> [[STEP_ADD9]], @@ -4021,14 +3986,10 @@ ; CHECK-NEXT: [[TMP20:%.*]] = trunc i32 [[INDEX]] to i8 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[T]], [[TMP20]] ; CHECK-NEXT: [[TMP21:%.*]] = add i8 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP22:%.*]] = add i8 [[OFFSET_IDX]], 1 -; CHECK-NEXT: [[TMP23:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i8 [[TMP21]] ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[TMP24]], i32 0 ; CHECK-NEXT: [[TMP26:%.*]] = bitcast i32* [[TMP25]] to <2 x i32>* ; CHECK-NEXT: store <2 x i32> [[VEC_IND7]], <2 x i32>* [[TMP26]], align 4 -; CHECK-NEXT: [[TMP27:%.*]] = add <2 x i8> [[VEC_IND]], -; CHECK-NEXT: [[TMP28:%.*]] = zext <2 x i8> [[TMP27]] to <2 x i32> ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i8> [[VEC_IND]], ; CHECK-NEXT: [[VEC_IND_NEXT8]] = add <2 x i32> [[VEC_IND7]], @@ -4253,12 +4214,8 @@ ; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = trunc i32 [[INDEX]] to i8 ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[T]], [[TMP20]] ; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = add i8 [[OFFSET_IDX]], 0 -; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = add i8 [[OFFSET_IDX]], 1 ; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = add i8 [[OFFSET_IDX]], 2 -; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = add i8 [[OFFSET_IDX]], 3 ; UNROLL-NO-IC-NEXT: [[STEP_ADD9:%.*]] = add <2 x i32> [[VEC_IND8]], -; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = add i32 [[INDEX]], 0 -; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = add i32 [[INDEX]], 2 ; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i8 [[TMP21]] ; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[A]], i8 [[TMP23]] ; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, i32* [[TMP27]], i32 0 @@ -4267,10 +4224,6 @@ ; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[TMP27]], i32 2 ; UNROLL-NO-IC-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to <2 x i32>* ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD9]], <2 x i32>* [[TMP32]], align 4 -; UNROLL-NO-IC-NEXT: [[TMP33:%.*]] = add <2 x i8> [[VEC_IND]], -; UNROLL-NO-IC-NEXT: [[TMP34:%.*]] = add <2 x i8> [[STEP_ADD]], -; UNROLL-NO-IC-NEXT: [[TMP35:%.*]] = zext <2 x i8> [[TMP33]] to <2 x i32> -; UNROLL-NO-IC-NEXT: [[TMP36:%.*]] = zext <2 x i8> [[TMP34]] to <2 x i32> ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i8> [[STEP_ADD]], ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT11]] = add <2 x i32> [[STEP_ADD9]], @@ -4637,7 +4590,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP10:%.*]] = trunc i64 [[INDEX]] to i32 ; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], 0 @@ -4647,7 +4599,6 @@ ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <2 x i32>* ; CHECK-NEXT: store <2 x i32> [[VEC_IND1]], <2 x i32>* [[TMP15]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <2 x i32> [[VEC_IND1]], ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] @@ -4773,9 +4724,7 @@ ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND2:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], ; UNROLL-NO-IC-NEXT: [[STEP_ADD3:%.*]] = add <2 x i32> [[VEC_IND2]], ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = trunc i64 [[INDEX]] to i32 ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], 0 @@ -4791,7 +4740,6 @@ ; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = bitcast i32* [[TMP19]] to <2 x i32>* ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD3]], <2 x i32>* [[TMP20]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT5]] = add <2 x i32> [[STEP_ADD3]], ; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] @@ -5129,7 +5077,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP0]] @@ -5137,7 +5084,6 @@ ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>* ; CHECK-NEXT: store <2 x i32> [[VEC_IND2]], <2 x i32>* [[TMP3]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <2 x i32> [[VEC_IND2]], ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]] @@ -5255,11 +5201,9 @@ ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND3:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], ; UNROLL-NO-IC-NEXT: [[STEP_ADD4:%.*]] = add <2 x i32> [[VEC_IND3]], ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP0]] ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP1]] @@ -5270,7 +5214,6 @@ ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <2 x i32>* ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD4]], <2 x i32>* [[TMP7]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT6]] = add <2 x i32> [[STEP_ADD4]], ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]] @@ -5370,7 +5313,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UREM_CONTINUE4:%.*]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UREM_CONTINUE4]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[PRED_UREM_CONTINUE4]] ] ; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <2 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[PRED_UREM_CONTINUE4]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 -20, [[INDEX]] @@ -5388,18 +5330,17 @@ ; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x i16> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_UREM_IF]] ] ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 ; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_UREM_IF3:%.*]], label [[PRED_UREM_CONTINUE4]] -; CHECK: pred.urem.if3: +; CHECK: pred.urem.if1: ; CHECK-NEXT: [[TMP10:%.*]] = add i16 [[TMP1]], 1 ; CHECK-NEXT: [[TMP11:%.*]] = urem i16 [[B]], [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i16> [[TMP8]], i16 [[TMP11]], i32 1 ; CHECK-NEXT: br label [[PRED_UREM_CONTINUE4]] -; CHECK: pred.urem.continue4: +; CHECK: pred.urem.continue2: ; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x i16> [ [[TMP8]], [[PRED_UREM_CONTINUE]] ], [ [[TMP12]], [[PRED_UREM_IF3]] ] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i16> zeroinitializer, <2 x i16> [[TMP13]] ; CHECK-NEXT: [[TMP14:%.*]] = sext <2 x i16> [[PREDPHI]] to <2 x i32> ; CHECK-NEXT: [[TMP15]] = or <2 x i32> [[VEC_PHI]], [[TMP14]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], ; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <2 x i16> [[VEC_IND1]], ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] @@ -5455,12 +5396,12 @@ ; IND-NEXT: [[TMP8:%.*]] = phi <2 x i16> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_UREM_IF]] ] ; IND-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP3]], i64 1 ; IND-NEXT: br i1 [[TMP9]], label [[PRED_UREM_IF3:%.*]], label [[PRED_UREM_CONTINUE4]] -; IND: pred.urem.if3: +; IND: pred.urem.if1: ; IND-NEXT: [[TMP10:%.*]] = add i16 [[TMP1]], -19 ; IND-NEXT: [[TMP11:%.*]] = urem i16 [[B]], [[TMP10]] ; IND-NEXT: [[TMP12:%.*]] = insertelement <2 x i16> [[TMP8]], i16 [[TMP11]], i64 1 ; IND-NEXT: br label [[PRED_UREM_CONTINUE4]] -; IND: pred.urem.continue4: +; IND: pred.urem.continue2: ; IND-NEXT: [[TMP13:%.*]] = phi <2 x i16> [ [[TMP8]], [[PRED_UREM_CONTINUE]] ], [ [[TMP12]], [[PRED_UREM_IF3]] ] ; IND-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i16> zeroinitializer, <2 x i16> [[TMP13]] ; IND-NEXT: [[TMP14:%.*]] = sext <2 x i16> [[PREDPHI]] to <2 x i32> @@ -5511,30 +5452,30 @@ ; UNROLL-NEXT: [[TMP10:%.*]] = phi <2 x i16> [ poison, [[VECTOR_BODY]] ], [ [[TMP9]], [[PRED_UREM_IF]] ] ; UNROLL-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP4]], i64 1 ; UNROLL-NEXT: br i1 [[TMP11]], label [[PRED_UREM_IF7:%.*]], label [[PRED_UREM_CONTINUE8:%.*]] -; UNROLL: pred.urem.if7: +; UNROLL: pred.urem.if3: ; UNROLL-NEXT: [[TMP12:%.*]] = add i16 [[TMP1]], -19 ; UNROLL-NEXT: [[TMP13:%.*]] = urem i16 [[B]], [[TMP12]] ; UNROLL-NEXT: [[TMP14:%.*]] = insertelement <2 x i16> [[TMP10]], i16 [[TMP13]], i64 1 ; UNROLL-NEXT: br label [[PRED_UREM_CONTINUE8]] -; UNROLL: pred.urem.continue8: +; UNROLL: pred.urem.continue4: ; UNROLL-NEXT: [[TMP15:%.*]] = phi <2 x i16> [ [[TMP10]], [[PRED_UREM_CONTINUE]] ], [ [[TMP14]], [[PRED_UREM_IF7]] ] ; UNROLL-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP5]], i64 0 ; UNROLL-NEXT: br i1 [[TMP16]], label [[PRED_UREM_IF9:%.*]], label [[PRED_UREM_CONTINUE10:%.*]] -; UNROLL: pred.urem.if9: +; UNROLL: pred.urem.if5: ; UNROLL-NEXT: [[TMP17:%.*]] = add i16 [[TMP1]], -18 ; UNROLL-NEXT: [[TMP18:%.*]] = urem i16 [[B]], [[TMP17]] ; UNROLL-NEXT: [[TMP19:%.*]] = insertelement <2 x i16> poison, i16 [[TMP18]], i64 0 ; UNROLL-NEXT: br label [[PRED_UREM_CONTINUE10]] -; UNROLL: pred.urem.continue10: +; UNROLL: pred.urem.continue6: ; UNROLL-NEXT: [[TMP20:%.*]] = phi <2 x i16> [ poison, [[PRED_UREM_CONTINUE8]] ], [ [[TMP19]], [[PRED_UREM_IF9]] ] ; UNROLL-NEXT: [[TMP21:%.*]] = extractelement <2 x i1> [[TMP5]], i64 1 ; UNROLL-NEXT: br i1 [[TMP21]], label [[PRED_UREM_IF11:%.*]], label [[PRED_UREM_CONTINUE12]] -; UNROLL: pred.urem.if11: +; UNROLL: pred.urem.if7: ; UNROLL-NEXT: [[TMP22:%.*]] = add i16 [[TMP1]], -17 ; UNROLL-NEXT: [[TMP23:%.*]] = urem i16 [[B]], [[TMP22]] ; UNROLL-NEXT: [[TMP24:%.*]] = insertelement <2 x i16> [[TMP20]], i16 [[TMP23]], i64 1 ; UNROLL-NEXT: br label [[PRED_UREM_CONTINUE12]] -; UNROLL: pred.urem.continue12: +; UNROLL: pred.urem.continue8: ; UNROLL-NEXT: [[TMP25:%.*]] = phi <2 x i16> [ [[TMP20]], [[PRED_UREM_CONTINUE10]] ], [ [[TMP24]], [[PRED_UREM_IF11]] ] ; UNROLL-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i16> zeroinitializer, <2 x i16> [[TMP15]] ; UNROLL-NEXT: [[PREDPHI13:%.*]] = select <2 x i1> [[TMP3]], <2 x i16> zeroinitializer, <2 x i16> [[TMP25]] @@ -5570,11 +5511,9 @@ ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UREM_CONTINUE12:%.*]] ] -; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UREM_CONTINUE12]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP28:%.*]], [[PRED_UREM_CONTINUE12]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[PRED_UREM_CONTINUE12]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND3:%.*]] = phi <2 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[PRED_UREM_CONTINUE12]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], ; UNROLL-NO-IC-NEXT: [[STEP_ADD4:%.*]] = add <2 x i16> [[VEC_IND3]], ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i32 -20, [[INDEX]] ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = trunc i32 [[OFFSET_IDX]] to i16 @@ -5593,30 +5532,30 @@ ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = phi <2 x i16> [ poison, [[VECTOR_BODY]] ], [ [[TMP9]], [[PRED_UREM_IF]] ] ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1 ; UNROLL-NO-IC-NEXT: br i1 [[TMP11]], label [[PRED_UREM_IF7:%.*]], label [[PRED_UREM_CONTINUE8:%.*]] -; UNROLL-NO-IC: pred.urem.if7: +; UNROLL-NO-IC: pred.urem.if3: ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = add i16 [[TMP1]], 1 ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = urem i16 [[B]], [[TMP12]] ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = insertelement <2 x i16> [[TMP10]], i16 [[TMP13]], i32 1 ; UNROLL-NO-IC-NEXT: br label [[PRED_UREM_CONTINUE8]] -; UNROLL-NO-IC: pred.urem.continue8: +; UNROLL-NO-IC: pred.urem.continue4: ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = phi <2 x i16> [ [[TMP10]], [[PRED_UREM_CONTINUE]] ], [ [[TMP14]], [[PRED_UREM_IF7]] ] ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0 ; UNROLL-NO-IC-NEXT: br i1 [[TMP16]], label [[PRED_UREM_IF9:%.*]], label [[PRED_UREM_CONTINUE10:%.*]] -; UNROLL-NO-IC: pred.urem.if9: +; UNROLL-NO-IC: pred.urem.if5: ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = add i16 [[TMP1]], 2 ; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = urem i16 [[B]], [[TMP17]] ; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = insertelement <2 x i16> poison, i16 [[TMP18]], i32 0 ; UNROLL-NO-IC-NEXT: br label [[PRED_UREM_CONTINUE10]] -; UNROLL-NO-IC: pred.urem.continue10: +; UNROLL-NO-IC: pred.urem.continue6: ; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = phi <2 x i16> [ poison, [[PRED_UREM_CONTINUE8]] ], [ [[TMP19]], [[PRED_UREM_IF9]] ] ; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1 ; UNROLL-NO-IC-NEXT: br i1 [[TMP21]], label [[PRED_UREM_IF11:%.*]], label [[PRED_UREM_CONTINUE12]] -; UNROLL-NO-IC: pred.urem.if11: +; UNROLL-NO-IC: pred.urem.if7: ; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = add i16 [[TMP1]], 3 ; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = urem i16 [[B]], [[TMP22]] ; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = insertelement <2 x i16> [[TMP20]], i16 [[TMP23]], i32 1 ; UNROLL-NO-IC-NEXT: br label [[PRED_UREM_CONTINUE12]] -; UNROLL-NO-IC: pred.urem.continue12: +; UNROLL-NO-IC: pred.urem.continue8: ; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = phi <2 x i16> [ [[TMP20]], [[PRED_UREM_CONTINUE10]] ], [ [[TMP24]], [[PRED_UREM_IF11]] ] ; UNROLL-NO-IC-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i16> zeroinitializer, <2 x i16> [[TMP15]] ; UNROLL-NO-IC-NEXT: [[PREDPHI13:%.*]] = select <2 x i1> [[TMP3]], <2 x i16> zeroinitializer, <2 x i16> [[TMP25]] @@ -5625,7 +5564,6 @@ ; UNROLL-NO-IC-NEXT: [[TMP28]] = or <2 x i32> [[VEC_PHI]], [[TMP26]] ; UNROLL-NO-IC-NEXT: [[TMP29]] = or <2 x i32> [[VEC_PHI2]], [[TMP27]] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT6]] = add <2 x i16> [[STEP_ADD4]], ; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20 ; UNROLL-NO-IC-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] @@ -5685,66 +5623,66 @@ ; INTERLEAVE-NEXT: [[TMP10:%.*]] = phi <4 x i16> [ poison, [[VECTOR_BODY]] ], [ [[TMP9]], [[PRED_UREM_IF]] ] ; INTERLEAVE-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP4]], i64 1 ; INTERLEAVE-NEXT: br i1 [[TMP11]], label [[PRED_UREM_IF7:%.*]], label [[PRED_UREM_CONTINUE8:%.*]] -; INTERLEAVE: pred.urem.if7: +; INTERLEAVE: pred.urem.if3: ; INTERLEAVE-NEXT: [[TMP12:%.*]] = add i16 [[TMP1]], -19 ; INTERLEAVE-NEXT: [[TMP13:%.*]] = urem i16 [[B]], [[TMP12]] ; INTERLEAVE-NEXT: [[TMP14:%.*]] = insertelement <4 x i16> [[TMP10]], i16 [[TMP13]], i64 1 ; INTERLEAVE-NEXT: br label [[PRED_UREM_CONTINUE8]] -; INTERLEAVE: pred.urem.continue8: +; INTERLEAVE: pred.urem.continue4: ; INTERLEAVE-NEXT: [[TMP15:%.*]] = phi <4 x i16> [ [[TMP10]], [[PRED_UREM_CONTINUE]] ], [ [[TMP14]], [[PRED_UREM_IF7]] ] ; INTERLEAVE-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP4]], i64 2 ; INTERLEAVE-NEXT: br i1 [[TMP16]], label [[PRED_UREM_IF9:%.*]], label [[PRED_UREM_CONTINUE10:%.*]] -; INTERLEAVE: pred.urem.if9: +; INTERLEAVE: pred.urem.if5: ; INTERLEAVE-NEXT: [[TMP17:%.*]] = add i16 [[TMP1]], -18 ; INTERLEAVE-NEXT: [[TMP18:%.*]] = urem i16 [[B]], [[TMP17]] ; INTERLEAVE-NEXT: [[TMP19:%.*]] = insertelement <4 x i16> [[TMP15]], i16 [[TMP18]], i64 2 ; INTERLEAVE-NEXT: br label [[PRED_UREM_CONTINUE10]] -; INTERLEAVE: pred.urem.continue10: +; INTERLEAVE: pred.urem.continue6: ; INTERLEAVE-NEXT: [[TMP20:%.*]] = phi <4 x i16> [ [[TMP15]], [[PRED_UREM_CONTINUE8]] ], [ [[TMP19]], [[PRED_UREM_IF9]] ] ; INTERLEAVE-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP4]], i64 3 ; INTERLEAVE-NEXT: br i1 [[TMP21]], label [[PRED_UREM_IF11:%.*]], label [[PRED_UREM_CONTINUE12:%.*]] -; INTERLEAVE: pred.urem.if11: +; INTERLEAVE: pred.urem.if7: ; INTERLEAVE-NEXT: [[TMP22:%.*]] = add i16 [[TMP1]], -17 ; INTERLEAVE-NEXT: [[TMP23:%.*]] = urem i16 [[B]], [[TMP22]] ; INTERLEAVE-NEXT: [[TMP24:%.*]] = insertelement <4 x i16> [[TMP20]], i16 [[TMP23]], i64 3 ; INTERLEAVE-NEXT: br label [[PRED_UREM_CONTINUE12]] -; INTERLEAVE: pred.urem.continue12: +; INTERLEAVE: pred.urem.continue8: ; INTERLEAVE-NEXT: [[TMP25:%.*]] = phi <4 x i16> [ [[TMP20]], [[PRED_UREM_CONTINUE10]] ], [ [[TMP24]], [[PRED_UREM_IF11]] ] ; INTERLEAVE-NEXT: [[TMP26:%.*]] = extractelement <4 x i1> [[TMP5]], i64 0 ; INTERLEAVE-NEXT: br i1 [[TMP26]], label [[PRED_UREM_IF13:%.*]], label [[PRED_UREM_CONTINUE14:%.*]] -; INTERLEAVE: pred.urem.if13: +; INTERLEAVE: pred.urem.if9: ; INTERLEAVE-NEXT: [[TMP27:%.*]] = add i16 [[TMP1]], -16 ; INTERLEAVE-NEXT: [[TMP28:%.*]] = urem i16 [[B]], [[TMP27]] ; INTERLEAVE-NEXT: [[TMP29:%.*]] = insertelement <4 x i16> poison, i16 [[TMP28]], i64 0 ; INTERLEAVE-NEXT: br label [[PRED_UREM_CONTINUE14]] -; INTERLEAVE: pred.urem.continue14: +; INTERLEAVE: pred.urem.continue10: ; INTERLEAVE-NEXT: [[TMP30:%.*]] = phi <4 x i16> [ poison, [[PRED_UREM_CONTINUE12]] ], [ [[TMP29]], [[PRED_UREM_IF13]] ] ; INTERLEAVE-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[TMP5]], i64 1 ; INTERLEAVE-NEXT: br i1 [[TMP31]], label [[PRED_UREM_IF15:%.*]], label [[PRED_UREM_CONTINUE16:%.*]] -; INTERLEAVE: pred.urem.if15: +; INTERLEAVE: pred.urem.if11: ; INTERLEAVE-NEXT: [[TMP32:%.*]] = add i16 [[TMP1]], -15 ; INTERLEAVE-NEXT: [[TMP33:%.*]] = urem i16 [[B]], [[TMP32]] ; INTERLEAVE-NEXT: [[TMP34:%.*]] = insertelement <4 x i16> [[TMP30]], i16 [[TMP33]], i64 1 ; INTERLEAVE-NEXT: br label [[PRED_UREM_CONTINUE16]] -; INTERLEAVE: pred.urem.continue16: +; INTERLEAVE: pred.urem.continue12: ; INTERLEAVE-NEXT: [[TMP35:%.*]] = phi <4 x i16> [ [[TMP30]], [[PRED_UREM_CONTINUE14]] ], [ [[TMP34]], [[PRED_UREM_IF15]] ] ; INTERLEAVE-NEXT: [[TMP36:%.*]] = extractelement <4 x i1> [[TMP5]], i64 2 ; INTERLEAVE-NEXT: br i1 [[TMP36]], label [[PRED_UREM_IF17:%.*]], label [[PRED_UREM_CONTINUE18:%.*]] -; INTERLEAVE: pred.urem.if17: +; INTERLEAVE: pred.urem.if13: ; INTERLEAVE-NEXT: [[TMP37:%.*]] = add i16 [[TMP1]], -14 ; INTERLEAVE-NEXT: [[TMP38:%.*]] = urem i16 [[B]], [[TMP37]] ; INTERLEAVE-NEXT: [[TMP39:%.*]] = insertelement <4 x i16> [[TMP35]], i16 [[TMP38]], i64 2 ; INTERLEAVE-NEXT: br label [[PRED_UREM_CONTINUE18]] -; INTERLEAVE: pred.urem.continue18: +; INTERLEAVE: pred.urem.continue14: ; INTERLEAVE-NEXT: [[TMP40:%.*]] = phi <4 x i16> [ [[TMP35]], [[PRED_UREM_CONTINUE16]] ], [ [[TMP39]], [[PRED_UREM_IF17]] ] ; INTERLEAVE-NEXT: [[TMP41:%.*]] = extractelement <4 x i1> [[TMP5]], i64 3 ; INTERLEAVE-NEXT: br i1 [[TMP41]], label [[PRED_UREM_IF19:%.*]], label [[PRED_UREM_CONTINUE20]] -; INTERLEAVE: pred.urem.if19: +; INTERLEAVE: pred.urem.if15: ; INTERLEAVE-NEXT: [[TMP42:%.*]] = add i16 [[TMP1]], -13 ; INTERLEAVE-NEXT: [[TMP43:%.*]] = urem i16 [[B]], [[TMP42]] ; INTERLEAVE-NEXT: [[TMP44:%.*]] = insertelement <4 x i16> [[TMP40]], i16 [[TMP43]], i64 3 ; INTERLEAVE-NEXT: br label [[PRED_UREM_CONTINUE20]] -; INTERLEAVE: pred.urem.continue20: +; INTERLEAVE: pred.urem.continue16: ; INTERLEAVE-NEXT: [[TMP45:%.*]] = phi <4 x i16> [ [[TMP40]], [[PRED_UREM_CONTINUE18]] ], [ [[TMP44]], [[PRED_UREM_IF19]] ] ; INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x i16> zeroinitializer, <4 x i16> [[TMP25]] ; INTERLEAVE-NEXT: [[PREDPHI21:%.*]] = select <4 x i1> [[TMP3]], <4 x i16> zeroinitializer, <4 x i16> [[TMP45]] @@ -5825,7 +5763,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND4]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ] @@ -5842,7 +5779,6 @@ ; CHECK-NEXT: [[TMP9:%.*]] = sext <2 x i32> [[TMP8]] to <2 x i64> ; CHECK-NEXT: [[TMP10]] = add <2 x i64> [[TMP6]], [[TMP9]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <2 x i32> [[VEC_IND2]], ; CHECK-NEXT: [[VEC_IND_NEXT5]] = add <2 x i32> [[VEC_IND4]], ; CHECK-NEXT: [[VEC_IND_NEXT7]] = add <2 x i32> [[VEC_IND6]], @@ -6038,12 +5974,10 @@ ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND4:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT7:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[STEP_ADD9:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND8:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND12:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT15:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], ; UNROLL-NO-IC-NEXT: [[STEP_ADD5:%.*]] = add <2 x i32> [[VEC_IND4]], ; UNROLL-NO-IC-NEXT: [[STEP_ADD9]] = add <2 x i32> [[VEC_IND8]], ; UNROLL-NO-IC-NEXT: [[STEP_ADD13:%.*]] = add <2 x i32> [[VEC_IND12]], @@ -6070,7 +6004,6 @@ ; UNROLL-NO-IC-NEXT: [[TMP20]] = add <2 x i64> [[TMP12]], [[TMP18]] ; UNROLL-NO-IC-NEXT: [[TMP21]] = add <2 x i64> [[TMP13]], [[TMP19]] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT7]] = add <2 x i32> [[STEP_ADD5]], ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT11]] = add <2 x i32> [[STEP_ADD9]], ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT15]] = add <2 x i32> [[STEP_ADD13]], @@ -6235,11 +6168,8 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND2]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDEX]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], 1 @@ -6254,7 +6184,6 @@ ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <2 x i32>* ; CHECK-NEXT: store <2 x i32> [[TMP8]], <2 x i32>* [[TMP10]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <2 x i32> [[VEC_IND2]], ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]] @@ -6367,13 +6296,8 @@ ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[STEP_ADD4:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND3:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], -; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 -; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 -; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 2 ; UNROLL-NO-IC-NEXT: [[STEP_ADD4]] = add <2 x i32> [[VEC_IND3]], ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = trunc i64 [[INDEX]] to i32 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 0 @@ -6401,7 +6325,6 @@ ; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = bitcast i32* [[TMP19]] to <2 x i32>* ; UNROLL-NO-IC-NEXT: store <2 x i32> [[TMP16]], <2 x i32>* [[TMP20]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT6]] = add <2 x i32> [[STEP_ADD4]], ; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; UNROLL-NO-IC-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll --- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll @@ -145,8 +145,7 @@ ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i8* [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8*, i8** [[START_1]], i64 [[TMP1]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8*, i8** [[START_1]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, i8* [[POINTER_PHI]], <4 x i64> ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, <4 x i8*> [[TMP2]], i64 1 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8*, i8** [[NEXT_GEP]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll b/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll --- a/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll @@ -16,13 +16,9 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_SREM_CONTINUE4:%.*]] ] -; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[INDEX]] to i16 -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i16 99, [[TMP0]] -; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = load i16, i16* @v_38, align 1 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT1]], <2 x i16> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i16> [[BROADCAST_SPLAT2]], ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <2 x i16> [[BROADCAST_SPLAT2]], zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP4]], ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll --- a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll +++ b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll @@ -30,9 +30,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE12:%.*]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], [[INC]] -; CHECK-NEXT: [[TMP4:%.*]] = mul i64 0, [[INC]] -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], [[TMP4]] ; CHECK-NEXT: [[OFFSET_IDX3:%.*]] = mul i64 [[INDEX]], [[INC]] ; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[OFFSET_IDX3]] to i8 ; CHECK-NEXT: [[TMP7:%.*]] = trunc i64 [[INC]] to i8 @@ -50,10 +47,10 @@ ; CHECK: pred.store.continue: ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP11]], i32 1 ; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12]] -; CHECK: pred.store.if4: +; CHECK: pred.store.if3: ; CHECK-NEXT: store i32 0, i32* [[PTR]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]] -; CHECK: pred.store.continue5: +; CHECK: pred.store.continue4: ; CHECK-NEXT: [[TMP14:%.*]] = add i8 [[TMP10]], 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/pr47343-expander-lcssa-after-cfg-update.ll b/llvm/test/Transforms/LoopVectorize/pr47343-expander-lcssa-after-cfg-update.ll --- a/llvm/test/Transforms/LoopVectorize/pr47343-expander-lcssa-after-cfg-update.ll +++ b/llvm/test/Transforms/LoopVectorize/pr47343-expander-lcssa-after-cfg-update.ll @@ -46,7 +46,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: store i32 0, i32* @f.e, align 1, !alias.scope !0, !noalias !3 ; CHECK-NEXT: store i32 0, i32* @f.e, align 1, !alias.scope !0, !noalias !3 ; CHECK-NEXT: store i8 10, i8* [[TMP0]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/select-reduction.ll b/llvm/test/Transforms/LoopVectorize/select-reduction.ll --- a/llvm/test/Transforms/LoopVectorize/select-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/select-reduction.ll @@ -25,8 +25,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[EXTRA_ITER]], [[INDEX]] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT3]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i64> [[BROADCAST_SPLAT4]], diff --git a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll --- a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll +++ b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll @@ -327,13 +327,10 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[VEC_IND]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i32 [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, i32* [[TMP2]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll --- a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll @@ -10,7 +10,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDEX]] to i16 ; CHECK-NEXT: [[TMP2:%.*]] = add i16 [[TMP1]], 0 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll b/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll --- a/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll +++ b/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll @@ -26,8 +26,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[SIZE]], [[INDEX]] -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[PTR]], i32 [[TMP1]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP]], i32 1 @@ -94,8 +92,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[SIZE]], [[INDEX]] -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[PTR]], i32 [[TMP1]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP]], i32 1 diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -214,15 +214,8 @@ ; CHECK-NEXT: ir<%AB.0> = load from index 0 ; CHECK-NEXT: ir<%AB.1> = load from index 1 ; CHECK-NEXT: ir<%AB.3> = load from index 3 -; CHECK-NEXT: CLONE ir<%iv.plus.1> = add ir<%iv>, ir<1> -; CHECK-NEXT: CLONE ir<%gep.AB.1> = getelementptr ir<@AB>, ir<0>, ir<%iv.plus.1> -; CHECK-NEXT: CLONE ir<%iv.plus.2> = add ir<%iv>, ir<2> ; CHECK-NEXT: CLONE ir<%iv.plus.3> = add ir<%iv>, ir<3> -; CHECK-NEXT: CLONE ir<%gep.AB.3> = getelementptr ir<@AB>, ir<0>, ir<%iv.plus.3> ; CHECK-NEXT: WIDEN ir<%add> = add ir<%AB.0>, ir<%AB.1> -; CHECK-NEXT: CLONE ir<%gep.CD.0> = getelementptr ir<@CD>, ir<0>, ir<%iv> -; CHECK-NEXT: CLONE ir<%gep.CD.1> = getelementptr ir<@CD>, ir<0>, ir<%iv.plus.1> -; CHECK-NEXT: CLONE ir<%gep.CD.2> = getelementptr ir<@CD>, ir<0>, ir<%iv.plus.2> ; CHECK-NEXT: CLONE ir<%gep.CD.3> = getelementptr ir<@CD>, ir<0>, ir<%iv.plus.3> ; CHECK-NEXT: INTERLEAVE-GROUP with factor 4 at , ir<%gep.CD.3> ; CHECK-NEXT: store ir<%add> to index 0 diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll @@ -14,7 +14,6 @@ ; CHECK-NEXT: for.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %tmp0 = phi %tmp6, 0 -; CHECK-NEXT: WIDEN-INDUCTION %tmp1 = phi %tmp7, 0 ; CHECK-NEXT: CLONE ir<%tmp2> = getelementptr ir<%ptr>, ir<%tmp0> ; CHECK-NEXT: CLONE ir<%tmp3> = load ir<%tmp2> ; CHECK-NEXT: CLONE store ir<0>, ir<%tmp2> diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -925,7 +925,6 @@ ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: loop.header: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION -; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: Successor(s): loop.then ; CHECK-EMPTY: ; CHECK-NEXT: loop.then: