Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -774,6 +774,13 @@ // Vector of original scalar PHIs whose corresponding widened PHIs need to be // fixed up at the end of vector code generation. SmallVector OrigPHIsToFix; + + // When we splat the scalar induction variable into a vector early in the + // pipeline, which expands to a sequence of a insertelement, shufflevector + // and add instruction, we may end up not using this vector IV. We save these + // instructions to this list, so that we can easily clean them up later if it + // isn't used. + SmallVector MaybeDeadVectorIV; }; class InnerLoopUnroller : public InnerLoopVectorizer { @@ -1861,9 +1868,14 @@ // vector IV. auto CreateSplatIV = [&](Value *ScalarIV, Value *Step) { Value *Broadcasted = getBroadcastInstrs(ScalarIV); + if (ScalarIV->getNumUses()) { + MaybeDeadVectorIV.push_back(*ScalarIV->user_begin()); + MaybeDeadVectorIV.push_back(Broadcasted); + } for (unsigned Part = 0; Part < UF; ++Part) { Value *EntryPart = getStepVector(Broadcasted, VF * Part, Step, ID.getInductionOpcode()); + MaybeDeadVectorIV.push_back(EntryPart); VectorLoopValueMap.setVectorValue(EntryVal, Part, EntryPart); if (Trunc) addMetadata(EntryPart, Trunc); @@ -1904,7 +1916,6 @@ // If we haven't yet vectorized the induction variable, splat the scalar // induction variable, and build the necessary step vectors. - // TODO: Don't do it unless the vectorized IV is really required. Value *ScalarIV = CreateScalarIV(Step); CreateSplatIV(ScalarIV, Step); buildScalarSteps(ScalarIV, Step, EntryVal, ID); @@ -3459,6 +3470,22 @@ } } + +// The vector IV consist of an insertelement, shufflevector and add +// instruction, which are added in this order to the list respectively. Reverse +// the list so that we first start checking the uses of the add, i.e. the last +// instruction in this chain. +static void FixMaybeDeadIV(SmallVector &MaybeDead) { + for (auto *V : reverse(MaybeDead)) { + if (Instruction *I = dyn_cast(V)) { + if (I->getNumUses()) + continue; + I->eraseFromParent(); + } + } + MaybeDead.clear(); +} + void InnerLoopVectorizer::fixVectorizedLoop() { // Insert truncates and extends for any truncated instructions as hints to // InstCombine. @@ -3472,6 +3499,9 @@ fixNonInductionPHIs(); } + // Remove the vector IV if it is dead. + FixMaybeDeadIV(MaybeDeadVectorIV); + // At this point every instruction in the original loop is widened to a // vector form. Now we need to fix the recurrences in the loop. These PHI // nodes are currently empty because we did not want to introduce cycles. Index: llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll @@ -16,9 +16,6 @@ ; FORCED-LABEL: vector.body: ; preds = %vector.body, %vector.ph ; FORCED-NEXT: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] -; FORCED-NEXT: %broadcast.splatinsert = insertelement <2 x i32> undef, i32 %index, i32 0 -; FORCED-NEXT: %broadcast.splat = shufflevector <2 x i32> %broadcast.splatinsert, <2 x i32> undef, <2 x i32> zeroinitializer -; FORCED-NEXT: %induction = add <2 x i32> %broadcast.splat, ; FORCED-NEXT: %0 = add i32 %index, 0 ; FORCED-NEXT: %1 = extractvalue { i64, i64 } %sv, 0 ; FORCED-NEXT: %2 = extractvalue { i64, i64 } %sv, 0 @@ -68,9 +65,6 @@ ; FORCED-LABEL: vector.body: ; preds = %vector.body, %vector.ph ; FORCED-NEXT: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] -; FORCED-NEXT: %broadcast.splatinsert = insertelement <2 x i32> undef, i32 %index, i32 0 -; FORCED-NEXT: %broadcast.splat = shufflevector <2 x i32> %broadcast.splatinsert, <2 x i32> undef, <2 x i32> zeroinitializer -; FORCED-NEXT: %induction = add <2 x i32> %broadcast.splat, ; FORCED-NEXT: %0 = add i32 %index, 0 ; FORCED-NEXT: %1 = extractvalue { float, float } %sv, 0 ; FORCED-NEXT: %2 = extractvalue { float, float } %sv, 0 Index: llvm/test/Transforms/LoopVectorize/AArch64/pr36032.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/pr36032.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/pr36032.ll @@ -65,15 +65,9 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[TMP0]], [[INDEX]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[OFFSET_IDX4:%.*]] = add i64 [[TMP0]], [[INDEX]] ; CHECK-NEXT: [[TMP18:%.*]] = trunc i64 [[OFFSET_IDX4]] to i32 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <4 x i32> undef, i32 [[TMP18]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT5]], <4 x i32> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION7:%.*]] = add <4 x i32> [[BROADCAST_SPLAT6]], ; CHECK-NEXT: [[TMP19:%.*]] = add i32 [[TMP18]], 0 ; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[CONV]], [[TMP19]] ; CHECK-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 Index: llvm/test/Transforms/LoopVectorize/ARM/sphinx.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/ARM/sphinx.ll +++ llvm/test/Transforms/LoopVectorize/ARM/sphinx.ll @@ -44,9 +44,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x double> [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> undef, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i32 [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 0 Index: llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll +++ llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll @@ -19,9 +19,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> undef, i16 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i16> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[TMP0]] to i64 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [2 x i16*], [2 x i16*]* @b, i16 0, i64 [[TMP1]] Index: llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll +++ llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll @@ -97,9 +97,6 @@ ; AVX: vector.body: ; AVX-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; AVX-NEXT: [[VEC_PHI:%.*]] = phi <4 x double> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[VECTOR_BODY]] ] -; AVX-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0 -; AVX-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer -; AVX-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; AVX-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; AVX-NEXT: [[TMP1:%.*]] = getelementptr double, double* [[ARR:%.*]], i32 [[TMP0]] ; AVX-NEXT: [[TMP2:%.*]] = getelementptr double, double* [[TMP1]], i32 0 Index: llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll +++ llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll @@ -168,12 +168,6 @@ ; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP81:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP82:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP83:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[INDUCTION1:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[INDUCTION2:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[INDUCTION3:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 @@ -349,12 +343,6 @@ ; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP101:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP102:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP103:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[INDUCTION1:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[INDUCTION2:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[INDUCTION3:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 @@ -541,12 +529,6 @@ ; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP181:%.*]], [[PRED_LOAD_CONTINUE36]] ] ; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP182:%.*]], [[PRED_LOAD_CONTINUE36]] ] ; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP183:%.*]], [[PRED_LOAD_CONTINUE36]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[INDUCTION1:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[INDUCTION2:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[INDUCTION3:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 @@ -888,12 +870,6 @@ ; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP82:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP83:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP84:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[INDUCTION1:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[INDUCTION2:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[INDUCTION3:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 2 @@ -1071,12 +1047,6 @@ ; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP82:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP83:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1024, [[INDEX]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[INDUCTION1:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[INDUCTION2:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[INDUCTION3:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 @@ -1304,12 +1274,6 @@ ; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP150:%.*]], [[PRED_LOAD_CONTINUE36]] ] ; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP151:%.*]], [[PRED_LOAD_CONTINUE36]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[INDUCTION1:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[INDUCTION2:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[INDUCTION3:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 @@ -1609,12 +1573,6 @@ ; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP81:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP82:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP83:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[INDUCTION1:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[INDUCTION2:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[INDUCTION3:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 @@ -1786,12 +1744,6 @@ ; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP81:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP82:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP83:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[INDUCTION1:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[INDUCTION2:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[INDUCTION3:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 @@ -1963,12 +1915,6 @@ ; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP81:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP82:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP83:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[INDUCTION1:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[INDUCTION2:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[INDUCTION3:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 Index: llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll +++ llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll @@ -44,9 +44,6 @@ ; AVX1-NEXT: br label [[VECTOR_BODY:%.*]] ; AVX1: vector.body: ; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; AVX1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[INDEX]], i32 0 -; AVX1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer -; AVX1-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], ; AVX1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; AVX1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP0]] ; AVX1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0 @@ -117,12 +114,6 @@ ; AVX2-NEXT: br label [[VECTOR_BODY:%.*]] ; AVX2: vector.body: ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; AVX2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[INDEX]], i32 0 -; AVX2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer -; AVX2-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], -; AVX2-NEXT: [[INDUCTION12:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], -; AVX2-NEXT: [[INDUCTION13:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], -; AVX2-NEXT: [[INDUCTION14:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], ; AVX2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; AVX2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 8 ; AVX2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 16 @@ -238,12 +229,6 @@ ; AVX512-NEXT: br label [[VECTOR_BODY:%.*]] ; AVX512: vector.body: ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; AVX512-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i64> undef, i64 [[INDEX]], i32 0 -; AVX512-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT]], <16 x i64> undef, <16 x i32> zeroinitializer -; AVX512-NEXT: [[INDUCTION:%.*]] = add <16 x i64> [[BROADCAST_SPLAT]], -; AVX512-NEXT: [[INDUCTION12:%.*]] = add <16 x i64> [[BROADCAST_SPLAT]], -; AVX512-NEXT: [[INDUCTION13:%.*]] = add <16 x i64> [[BROADCAST_SPLAT]], -; AVX512-NEXT: [[INDUCTION14:%.*]] = add <16 x i64> [[BROADCAST_SPLAT]], ; AVX512-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; AVX512-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 16 ; AVX512-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 32 @@ -389,9 +374,6 @@ ; AVX1-NEXT: br label [[VECTOR_BODY:%.*]] ; AVX1: vector.body: ; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; AVX1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[INDEX]], i32 0 -; AVX1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer -; AVX1-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], ; AVX1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; AVX1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TRIGGER]], i64 [[TMP0]] ; AVX1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP1]], i32 0 @@ -462,12 +444,6 @@ ; AVX2-NEXT: br label [[VECTOR_BODY:%.*]] ; AVX2: vector.body: ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; AVX2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[INDEX]], i32 0 -; AVX2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer -; AVX2-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], -; AVX2-NEXT: [[INDUCTION12:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], -; AVX2-NEXT: [[INDUCTION13:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], -; AVX2-NEXT: [[INDUCTION14:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], ; AVX2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; AVX2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 8 ; AVX2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 16 @@ -583,12 +559,6 @@ ; AVX512-NEXT: br label [[VECTOR_BODY:%.*]] ; AVX512: vector.body: ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; AVX512-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i64> undef, i64 [[INDEX]], i32 0 -; AVX512-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT]], <16 x i64> undef, <16 x i32> zeroinitializer -; AVX512-NEXT: [[INDUCTION:%.*]] = add <16 x i64> [[BROADCAST_SPLAT]], -; AVX512-NEXT: [[INDUCTION12:%.*]] = add <16 x i64> [[BROADCAST_SPLAT]], -; AVX512-NEXT: [[INDUCTION13:%.*]] = add <16 x i64> [[BROADCAST_SPLAT]], -; AVX512-NEXT: [[INDUCTION14:%.*]] = add <16 x i64> [[BROADCAST_SPLAT]], ; AVX512-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; AVX512-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 16 ; AVX512-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 32 @@ -743,9 +713,6 @@ ; AVX1-NEXT: br label [[VECTOR_BODY:%.*]] ; AVX1: vector.body: ; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; AVX1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[INDEX]], i32 0 -; AVX1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer -; AVX1-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], ; AVX1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; AVX1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP0]] ; AVX1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0 @@ -818,12 +785,6 @@ ; AVX2-NEXT: br label [[VECTOR_BODY:%.*]] ; AVX2: vector.body: ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; AVX2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[INDEX]], i32 0 -; AVX2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer -; AVX2-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], -; AVX2-NEXT: [[INDUCTION12:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], -; AVX2-NEXT: [[INDUCTION13:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], -; AVX2-NEXT: [[INDUCTION14:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], ; AVX2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; AVX2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 8 ; AVX2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 16 @@ -944,12 +905,6 @@ ; AVX512-NEXT: br label [[VECTOR_BODY:%.*]] ; AVX512: vector.body: ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; AVX512-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i64> undef, i64 [[INDEX]], i32 0 -; AVX512-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT]], <16 x i64> undef, <16 x i32> zeroinitializer -; AVX512-NEXT: [[INDUCTION:%.*]] = add <16 x i64> [[BROADCAST_SPLAT]], -; AVX512-NEXT: [[INDUCTION12:%.*]] = add <16 x i64> [[BROADCAST_SPLAT]], -; AVX512-NEXT: [[INDUCTION13:%.*]] = add <16 x i64> [[BROADCAST_SPLAT]], -; AVX512-NEXT: [[INDUCTION14:%.*]] = add <16 x i64> [[BROADCAST_SPLAT]], ; AVX512-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; AVX512-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 16 ; AVX512-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 32 @@ -1110,12 +1065,6 @@ ; AVX-NEXT: br label [[VECTOR_BODY:%.*]] ; AVX: vector.body: ; AVX-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; AVX-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[INDEX]], i32 0 -; AVX-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer -; AVX-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; AVX-NEXT: [[INDUCTION12:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; AVX-NEXT: [[INDUCTION13:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; AVX-NEXT: [[INDUCTION14:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], ; AVX-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; AVX-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 ; AVX-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8 @@ -1236,12 +1185,6 @@ ; AVX512-NEXT: br label [[VECTOR_BODY:%.*]] ; AVX512: vector.body: ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; AVX512-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[INDEX]], i32 0 -; AVX512-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer -; AVX512-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], -; AVX512-NEXT: [[INDUCTION12:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], -; AVX512-NEXT: [[INDUCTION13:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], -; AVX512-NEXT: [[INDUCTION14:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], ; AVX512-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; AVX512-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 8 ; AVX512-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 16 @@ -1631,12 +1574,6 @@ ; AVX2: vector.body: ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; AVX2-NEXT: [[OFFSET_IDX:%.*]] = sub i64 4095, [[INDEX]] -; AVX2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[OFFSET_IDX]], i32 0 -; AVX2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer -; AVX2-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; AVX2-NEXT: [[INDUCTION12:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; AVX2-NEXT: [[INDUCTION13:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; AVX2-NEXT: [[INDUCTION14:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], ; AVX2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; AVX2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -4 ; AVX2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -8 @@ -1781,12 +1718,6 @@ ; AVX512: vector.body: ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; AVX512-NEXT: [[OFFSET_IDX:%.*]] = sub i64 4095, [[INDEX]] -; AVX512-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[OFFSET_IDX]], i32 0 -; AVX512-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer -; AVX512-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], -; AVX512-NEXT: [[INDUCTION12:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], -; AVX512-NEXT: [[INDUCTION13:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], -; AVX512-NEXT: [[INDUCTION14:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], ; AVX512-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; AVX512-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -8 ; AVX512-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -16 @@ -1954,12 +1885,6 @@ ; AVX1-NEXT: br label [[VECTOR_BODY:%.*]] ; AVX1: vector.body: ; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; AVX1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[INDEX]], i32 0 -; AVX1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer -; AVX1-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; AVX1-NEXT: [[INDUCTION1:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; AVX1-NEXT: [[INDUCTION2:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; AVX1-NEXT: [[INDUCTION3:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], ; AVX1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; AVX1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 ; AVX1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8 @@ -2084,12 +2009,6 @@ ; AVX2-NEXT: br label [[VECTOR_BODY:%.*]] ; AVX2: vector.body: ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; AVX2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[INDEX]], i32 0 -; AVX2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer -; AVX2-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; AVX2-NEXT: [[INDUCTION1:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; AVX2-NEXT: [[INDUCTION2:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; AVX2-NEXT: [[INDUCTION3:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], ; AVX2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; AVX2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 ; AVX2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8 @@ -2214,12 +2133,6 @@ ; AVX512-NEXT: br label [[VECTOR_BODY:%.*]] ; AVX512: vector.body: ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; AVX512-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[INDEX]], i32 0 -; AVX512-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer -; AVX512-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], -; AVX512-NEXT: [[INDUCTION1:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], -; AVX512-NEXT: [[INDUCTION2:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], -; AVX512-NEXT: [[INDUCTION3:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], ; AVX512-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; AVX512-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 8 ; AVX512-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 16 @@ -2389,12 +2302,6 @@ ; AVX1-NEXT: br label [[VECTOR_BODY:%.*]] ; AVX1: vector.body: ; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; AVX1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[INDEX]], i32 0 -; AVX1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer -; AVX1-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; AVX1-NEXT: [[INDUCTION1:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; AVX1-NEXT: [[INDUCTION2:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; AVX1-NEXT: [[INDUCTION3:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], ; AVX1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; AVX1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 ; AVX1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8 @@ -2519,12 +2426,6 @@ ; AVX2-NEXT: br label [[VECTOR_BODY:%.*]] ; AVX2: vector.body: ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; AVX2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[INDEX]], i32 0 -; AVX2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer -; AVX2-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; AVX2-NEXT: [[INDUCTION1:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; AVX2-NEXT: [[INDUCTION2:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; AVX2-NEXT: [[INDUCTION3:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], ; AVX2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; AVX2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 ; AVX2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8 @@ -2649,12 +2550,6 @@ ; AVX512-NEXT: br label [[VECTOR_BODY:%.*]] ; AVX512: vector.body: ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; AVX512-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[INDEX]], i32 0 -; AVX512-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer -; AVX512-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], -; AVX512-NEXT: [[INDUCTION1:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], -; AVX512-NEXT: [[INDUCTION2:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], -; AVX512-NEXT: [[INDUCTION3:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], ; AVX512-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; AVX512-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 8 ; AVX512-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 16 Index: llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll +++ llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll @@ -1601,9 +1601,6 @@ ; O1VEC2-NEXT: br label [[VECTOR_BODY:%.*]] ; O1VEC2: vector.body: ; O1VEC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; O1VEC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[INDEX]], i32 0 -; O1VEC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer -; O1VEC2-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], ; O1VEC2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; O1VEC2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]] ; O1VEC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0 @@ -1646,9 +1643,6 @@ ; OzVEC2-NEXT: br label [[VECTOR_BODY:%.*]] ; OzVEC2: vector.body: ; OzVEC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; OzVEC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[INDEX]], i32 0 -; OzVEC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer -; OzVEC2-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], ; OzVEC2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; OzVEC2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]] ; OzVEC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0 Index: llvm/test/Transforms/LoopVectorize/X86/pr35432.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/X86/pr35432.ll +++ llvm/test/Transforms/LoopVectorize/X86/pr35432.ll @@ -40,16 +40,16 @@ ; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i32 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP2]], [[TMP4]] -; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP6]], i32 [[TMP2]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[UMAX]] +; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP6]], i32 [[TMP2]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[UMIN]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP7]], 8 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] ; CHECK: vector.scevcheck: ; CHECK-NEXT: [[TMP8:%.*]] = add i8 [[CONV3]], -1 ; CHECK-NEXT: [[TMP9:%.*]] = zext i8 [[TMP8]] to i32 ; CHECK-NEXT: [[TMP10:%.*]] = icmp ult i32 [[TMP2]], [[TMP9]] -; CHECK-NEXT: [[UMAX1:%.*]] = select i1 [[TMP10]], i32 [[TMP2]], i32 [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[UMAX1]] +; CHECK-NEXT: [[UMIN1:%.*]] = select i1 [[TMP10]], i32 [[TMP2]], i32 [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[UMIN1]] ; CHECK-NEXT: [[TMP12:%.*]] = trunc i32 [[TMP11]] to i8 ; CHECK-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 1, i8 [[TMP12]]) ; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0 @@ -77,10 +77,6 @@ ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP27:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[INDEX]] to i8 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i8 [[CONV3]], [[TMP23]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> undef, i8 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i8> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[INDUCTION3:%.*]] = add <4 x i8> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP24:%.*]] = add i8 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP25:%.*]] = add i8 [[OFFSET_IDX]], -4 ; CHECK-NEXT: [[TMP26]] = add <4 x i32> [[VEC_PHI]], Index: llvm/test/Transforms/LoopVectorize/X86/pr36524.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/X86/pr36524.ll +++ llvm/test/Transforms/LoopVectorize/X86/pr36524.ll @@ -14,9 +14,6 @@ ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 3 ; CHECK-NEXT: [[OFFSET_IDX1:%.*]] = add i64 2, [[INDEX]] ; CHECK-NEXT: [[TMP11:%.*]] = trunc i64 [[OFFSET_IDX1]] to i32 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[TMP11]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 0 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], Index: llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll +++ llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll @@ -19,9 +19,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 Index: llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll +++ llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll @@ -24,21 +24,18 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <8 x float>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, <8 x float>* [[TMP3]], align 4 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, <8 x float>* [[TMP3]], align 4, !llvm.access.group !0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[TMP5]] to <8 x float>* -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x float>, <8 x float>* [[TMP6]], align 4 +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x float>, <8 x float>* [[TMP6]], align 4, !llvm.access.group !0 ; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <8 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] ; CHECK-NEXT: [[TMP8:%.*]] = bitcast float* [[TMP5]] to <8 x float>* -; CHECK-NEXT: store <8 x float> [[TMP7]], <8 x float>* [[TMP8]], align 4 +; CHECK-NEXT: store <8 x float> [[TMP7]], <8 x float>* [[TMP8]], align 4, !llvm.access.group !0 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !1 @@ -112,13 +109,26 @@ ; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP7]], i32 4, <8 x i1> [[TMP2]], <8 x float> undef), !llvm.access.group !6 ; CHECK-NEXT: [[TMP8:%.*]] = fadd fast <8 x float> [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD1]] ; CHECK-NEXT: [[TMP9:%.*]] = bitcast float* [[TMP6]] to <8 x float>* -; CHECK-NEXT: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> [[TMP8]], <8 x float>* [[TMP9]], i32 4, <8 x i1> [[TMP2]]) +; CHECK-NEXT: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> [[TMP8]], <8 x float>* [[TMP9]], i32 4, <8 x i1> [[TMP2]]), !llvm.access.group !6 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24 ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !7 ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 24, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP11:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !6 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP12:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !6 +; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP11]], [[TMP12]] +; CHECK-NEXT: store float [[ADD]], float* [[ARRAYIDX2]], align 4, !llvm.access.group !6 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 20 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !9 ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -156,21 +166,18 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <8 x float>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, <8 x float>* [[TMP3]], align 4 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, <8 x float>* [[TMP3]], align 4, !llvm.access.group !6 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[TMP5]] to <8 x float>* -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x float>, <8 x float>* [[TMP6]], align 4 +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x float>, <8 x float>* [[TMP6]], align 4, !llvm.access.group !6 ; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <8 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] ; CHECK-NEXT: [[TMP8:%.*]] = bitcast float* [[TMP5]] to <8 x float>* -; CHECK-NEXT: store <8 x float> [[TMP7]], <8 x float>* [[TMP8]], align 4 +; CHECK-NEXT: store <8 x float> [[TMP7]], <8 x float>* [[TMP8]], align 4, !llvm.access.group !6 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !10 Index: llvm/test/Transforms/LoopVectorize/fcmp-vectorize.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/fcmp-vectorize.ll +++ llvm/test/Transforms/LoopVectorize/fcmp-vectorize.ll @@ -1,15 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck %s ; Avoid crashing while trying to vectorize fcmp that can be folded to vector of ; i1 true. define void @test1() { -; CHECK-LABEL: test1( -; CHECK-LABEL: vector.body: -; CHECK-NEXT: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] -; CHECK-NEXT: %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 -; CHECK: %induction = add <4 x i32> %broadcast.splat, -; CHECK: %index.next = add i32 %index, 4 - +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 144 +; CHECK-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 144, 144 +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 144, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IVNEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[FCMP:%.*]] = fcmp uno float 0.000000e+00, 0.000000e+00 +; CHECK-NEXT: [[IVNEXT]] = add nsw i32 [[IV]], 1 +; CHECK-NEXT: [[CND:%.*]] = icmp sgt i32 [[IV]], 142 +; CHECK-NEXT: br i1 [[CND]], label [[EXIT]], label [[LOOP]], !llvm.loop !2 +; CHECK: exit: +; CHECK-NEXT: ret void +; entry: br label %loop Index: llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll +++ llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll @@ -17,9 +17,6 @@ ; CHECK-NEXT: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] ; CHECK-NEXT: %vector.recur = phi <4 x i32> [ %vector.recur.init, %vector.ph ], [ %wide.load, %vector.body ] ; CHECK-NEXT: %offset.idx = add i64 1, %index -; CHECK-NEXT: %broadcast.splatinsert = insertelement <4 x i64> undef, i64 %offset.idx, i32 0 -; CHECK-NEXT: %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: %induction = add <4 x i64> %broadcast.splat, ; CHECK-NEXT: %0 = add i64 %offset.idx, 0 ; CHECK-NEXT: %1 = getelementptr inbounds [257 x i32], [257 x i32]* @p, i64 0, i64 %0 ; CHECK-NEXT: %2 = getelementptr inbounds i32, i32* %1, i32 0 @@ -74,9 +71,6 @@ ; CHECK-NEXT: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] ; CHECK-NEXT: %vector.recur = phi <4 x i32> [ %vector.recur.init, %vector.ph ], [ %wide.load, %vector.body ] ; CHECK-NEXT: %offset.idx = add i64 1, %index -; CHECK-NEXT: %broadcast.splatinsert = insertelement <4 x i64> undef, i64 %offset.idx, i32 0 -; CHECK-NEXT: %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: %induction = add <4 x i64> %broadcast.splat, ; CHECK-NEXT: %0 = add i64 %offset.idx, 0 ; CHECK-NEXT: %1 = getelementptr inbounds [257 x i32], [257 x i32]* @p, i64 0, i64 %0 ; CHECK-NEXT: %2 = getelementptr inbounds i32, i32* %1, i32 0 Index: llvm/test/Transforms/LoopVectorize/float-minmax-instruction-flag.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/float-minmax-instruction-flag.ll +++ llvm/test/Transforms/LoopVectorize/float-minmax-instruction-flag.ll @@ -57,9 +57,6 @@ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, float* [[ARG]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, float* [[TMP1]], i32 0 Index: llvm/test/Transforms/LoopVectorize/if-pred-stores.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/if-pred-stores.ll +++ llvm/test/Transforms/LoopVectorize/if-pred-stores.ll @@ -113,9 +113,6 @@ ; VEC-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC: vector.body: ; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] -; VEC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> undef, i64 [[INDEX]], i32 0 -; VEC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> undef, <2 x i32> zeroinitializer -; VEC-NEXT: [[INDUCTION:%.*]] = add <2 x i64> [[BROADCAST_SPLAT]], ; VEC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; VEC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[F:%.*]], i64 [[TMP0]] ; VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0 @@ -254,8 +251,6 @@ ; UNROLL-NOSIMPLIFY-NEXT: [[PREDPHI5]] = select i1 undef, i32 [[VEC_PHI2]], i32 [[TMP5]] ; UNROLL-NOSIMPLIFY-NEXT: [[OFFSET_IDX6:%.*]] = add i64 undef, [[INDEX]] ; UNROLL-NOSIMPLIFY-NEXT: [[TMP6:%.*]] = trunc i64 [[OFFSET_IDX6]] to i32 -; UNROLL-NOSIMPLIFY-NEXT: [[INDUCTION7:%.*]] = add i32 [[TMP6]], 0 -; UNROLL-NOSIMPLIFY-NEXT: [[INDUCTION8:%.*]] = add i32 [[TMP6]], 1 ; UNROLL-NOSIMPLIFY-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0 ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !3 @@ -354,8 +349,6 @@ ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] ; UNROLL-NEXT: [[OFFSET_IDX:%.*]] = sub i64 undef, [[INDEX]] -; UNROLL-NEXT: [[INDUCTION3:%.*]] = add i64 [[OFFSET_IDX]], 0 -; UNROLL-NEXT: [[INDUCTION4:%.*]] = add i64 [[OFFSET_IDX]], -1 ; UNROLL-NEXT: br i1 [[C:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE6]] ; UNROLL: pred.store.if: ; UNROLL-NEXT: [[INDUCTION:%.*]] = add i64 [[INDEX]], 0 @@ -405,8 +398,6 @@ ; UNROLL-NOSIMPLIFY: vector.body: ; UNROLL-NOSIMPLIFY-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] ; UNROLL-NOSIMPLIFY-NEXT: [[OFFSET_IDX:%.*]] = sub i64 undef, [[INDEX]] -; UNROLL-NOSIMPLIFY-NEXT: [[INDUCTION3:%.*]] = add i64 [[OFFSET_IDX]], 0 -; UNROLL-NOSIMPLIFY-NEXT: [[INDUCTION4:%.*]] = add i64 [[OFFSET_IDX]], -1 ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[C:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; UNROLL-NOSIMPLIFY: pred.store.if: ; UNROLL-NOSIMPLIFY-NEXT: [[INDUCTION:%.*]] = add i64 [[INDEX]], 0 @@ -463,14 +454,8 @@ ; VEC-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC: vector.body: ; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE8:%.*]] ] -; VEC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> undef, i64 [[INDEX]], i32 0 -; VEC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> undef, <2 x i32> zeroinitializer -; VEC-NEXT: [[INDUCTION:%.*]] = add <2 x i64> [[BROADCAST_SPLAT]], ; VEC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; VEC-NEXT: [[OFFSET_IDX:%.*]] = sub i64 undef, [[INDEX]] -; VEC-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> undef, i64 [[OFFSET_IDX]], i32 0 -; VEC-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT2]], <2 x i64> undef, <2 x i32> zeroinitializer -; VEC-NEXT: [[INDUCTION4:%.*]] = add <2 x i64> [[BROADCAST_SPLAT3]], ; VEC-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VEC-NEXT: [[TMP2:%.*]] = getelementptr i8, i8* undef, i64 [[TMP0]] ; VEC-NEXT: [[TMP3:%.*]] = getelementptr i8, i8* [[TMP2]], i32 0 Index: llvm/test/Transforms/LoopVectorize/multiple-strides-vectorization.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/multiple-strides-vectorization.ll +++ llvm/test/Transforms/LoopVectorize/multiple-strides-vectorization.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -loop-vectorize -force-vector-width=4 -S < %s | FileCheck %s ; This is the test case from PR26314. @@ -22,16 +23,95 @@ ; } ; } -; CHECK-LABEL: Test -; CHECK: <4 x i64> -; CHECK: <4 x i32>, <4 x i32> -; CHECK: !{!"llvm.loop.isvectorized", i32 1} - target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" %struct.s = type { [32 x i32], [32 x i32], [32 x [32 x i32]] } define void @Test(%struct.s* nocapture %obj, i64 %z) #0 { +; CHECK-LABEL: @Test( +; CHECK-NEXT: [[OBJ4:%.*]] = bitcast %struct.s* [[OBJ:%.*]] to i8* +; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr [[STRUCT_S:%.*]], %struct.s* [[OBJ]], i64 0, i32 0, i64 [[Z:%.*]] +; CHECK-NEXT: [[SCEVGEP56:%.*]] = bitcast i32* [[SCEVGEP5]] to i8* +; CHECK-NEXT: br label [[DOTOUTER_PREHEADER:%.*]] +; CHECK: .outer.preheader: +; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[I_NEXT:%.*]], [[DOTOUTER:%.*]] ] +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr [[STRUCT_S]], %struct.s* [[OBJ]], i64 0, i32 2, i64 [[I]], i64 0 +; CHECK-NEXT: [[SCEVGEP1:%.*]] = bitcast i32* [[SCEVGEP]] to i8* +; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr [[STRUCT_S]], %struct.s* [[OBJ]], i64 0, i32 2, i64 [[I]], i64 [[Z]] +; CHECK-NEXT: [[SCEVGEP23:%.*]] = bitcast i32* [[SCEVGEP2]] to i8* +; CHECK-NEXT: [[SCEVGEP7:%.*]] = getelementptr [[STRUCT_S]], %struct.s* [[OBJ]], i64 0, i32 1, i64 [[I]] +; CHECK-NEXT: [[SCEVGEP78:%.*]] = bitcast i32* [[SCEVGEP7]] to i8* +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[SCEVGEP78]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.s* [[OBJ]], i64 0, i32 1, i64 [[I]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[Z]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK: vector.memcheck: +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[SCEVGEP1]], [[SCEVGEP56]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[OBJ4]], [[SCEVGEP23]] +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT: [[BC:%.*]] = bitcast i32* [[TMP1]] to i8* +; CHECK-NEXT: [[BOUND09:%.*]] = icmp ult i8* [[SCEVGEP1]], [[UGLYGEP]] +; CHECK-NEXT: [[BOUND110:%.*]] = icmp ult i8* [[BC]], [[SCEVGEP23]] +; CHECK-NEXT: [[FOUND_CONFLICT11:%.*]] = and i1 [[BOUND09]], [[BOUND110]] +; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT11]] +; CHECK-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[CONFLICT_RDX]], true +; CHECK-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[Z]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[Z]], [[N_MOD_VF]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.s* [[OBJ]], i64 0, i32 0, i64 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP5]], align 4, !alias.scope !0 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP1]], !alias.scope !3 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP1]], !alias.scope !3 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP1]], !alias.scope !3 +; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP1]], !alias.scope !3 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> undef, i32 [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP7]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i32 2 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i32 3 +; CHECK-NEXT: [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.s* [[OBJ]], i64 0, i32 2, i64 [[I]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[TMP15]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP17]], align 4, !alias.scope !5, !noalias !7 +; CHECK-NEXT: [[TMP18:%.*]] = add nsw <4 x i32> [[TMP14]], [[WIDE_LOAD12]] +; CHECK-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP18]], <4 x i32>* [[TMP19]], align 4, !alias.scope !5, !noalias !7 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !8 +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[Z]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[DOTOUTER]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTOUTER_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: br label [[DOTINNER:%.*]] +; CHECK: .exit: +; CHECK-NEXT: ret void +; CHECK: .outer: +; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 +; CHECK-NEXT: [[EXITCOND_OUTER:%.*]] = icmp eq i64 [[I_NEXT]], 32 +; CHECK-NEXT: br i1 [[EXITCOND_OUTER]], label [[DOTEXIT:%.*]], label [[DOTOUTER_PREHEADER]] +; CHECK: .inner: +; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[J_NEXT:%.*]], [[DOTINNER]] ] +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.s* [[OBJ]], i64 0, i32 0, i64 [[J]] +; CHECK-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP1]] +; CHECK-NEXT: [[TMP24:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.s* [[OBJ]], i64 0, i32 2, i64 [[I]], i64 [[J]] +; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = add nsw i32 [[TMP24]], [[TMP26]] +; CHECK-NEXT: store i32 [[TMP27]], i32* [[TMP25]] +; CHECK-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J]], 1 +; CHECK-NEXT: [[EXITCOND_INNER:%.*]] = icmp eq i64 [[J_NEXT]], [[Z]] +; CHECK-NEXT: br i1 [[EXITCOND_INNER]], label [[DOTOUTER]], label [[DOTINNER]], !llvm.loop !10 +; br label %.outer.preheader @@ -42,7 +122,7 @@ .exit: ret void - + .outer: %i.next = add nuw nsw i64 %i, 1 %exitcond.outer = icmp eq i64 %i.next, 32 @@ -57,7 +137,7 @@ %6 = getelementptr inbounds %struct.s, %struct.s* %obj, i64 0, i32 2, i64 %i, i64 %j %7 = load i32, i32* %6 %8 = add nsw i32 %5, %7 - store i32 %8, i32* %6 + store i32 %8, i32* %6 %j.next = add nuw nsw i64 %j, 1 %exitcond.inner = icmp eq i64 %j.next, %z br i1 %exitcond.inner, label %.outer, label %.inner Index: llvm/test/Transforms/LoopVectorize/pr35773.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/pr35773.ll +++ llvm/test/Transforms/LoopVectorize/pr35773.ll @@ -31,9 +31,6 @@ ; CHECK-NEXT: [[I32_IV:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[I32_IV_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[IV_FROM_TRUNC:%.*]] = phi <4 x i8> [ , [[VECTOR_PH]] ], [ [[IV_FROM_TRUNC_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[MAIN_IV]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[MAIN_IV]], 0 ; CHECK-NEXT: [[I8_IV_NEXT]] = add <4 x i8> [[I8_IV]], [[IV_FROM_TRUNC]] Index: llvm/test/Transforms/LoopVectorize/pr44488-predication.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/pr44488-predication.ll +++ llvm/test/Transforms/LoopVectorize/pr44488-predication.ll @@ -13,9 +13,6 @@ ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], [[PRED_SREM_CONTINUE2:%.*]] ] ; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[INDEX]] to i16 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i16 99, [[TMP0]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> undef, i16 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i16> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = load i16, i16* @v_38, align 1 ; CHECK-NEXT: [[TMP3:%.*]] = load i16, i16* @v_38, align 1 Index: llvm/test/Transforms/LoopVectorize/vector-intrinsic-call-cost.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/vector-intrinsic-call-cost.ll +++ llvm/test/Transforms/LoopVectorize/vector-intrinsic-call-cost.ll @@ -3,9 +3,6 @@ ; CHECK-LABEL: @test_fshl ; CHECK-LABEL: vector.body: ; CHECK-NEXT: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] -; CHECK-NEXT: %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 -; CHECK-NEXT: %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: %induction = add <4 x i32> %broadcast.splat, ; CHECK-NEXT: %0 = add i32 %index, 0 ; CHECK-NEXT: %1 = call <4 x i16> @llvm.fshl.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i16> ) ; CHECK-NEXT: %index.next = add i32 %index, 4