diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -87,6 +87,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/InjectTLIMappings.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Vectorize.h" #include @@ -3204,13 +3205,25 @@ } // end namespace llvm BoUpSLP::~BoUpSLP() { - for (auto *I : DeletedInstructions) + SmallVector DeadInsts; + for (auto *I : DeletedInstructions) { + for (Use &U : I->operands()) { + auto *Op = dyn_cast(U.get()); + if (Op && !DeletedInstructions.count(Op) && Op->hasOneUser() && + wouldInstructionBeTriviallyDead(Op, TLI)) + DeadInsts.emplace_back(Op); + } I->dropAllReferences(); + } for (auto *I : DeletedInstructions) { assert(I->use_empty() && "trying to erase instruction with users."); I->eraseFromParent(); } + + // Cleanup any dead scalar code feeding the vectorized instructions + RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI); + #ifdef EXPENSIVE_CHECKS // If we could guarantee that this call is not extremely slow, we could // remove the ifdef limitation (see PR47712). diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/64-bit-vector.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/64-bit-vector.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/64-bit-vector.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/64-bit-vector.ll @@ -8,9 +8,7 @@ define void @f(float* %r, float* %w) { ; CHECK-LABEL: @f( ; CHECK-NEXT: [[R0:%.*]] = getelementptr inbounds float, float* [[R:%.*]], i64 0 -; CHECK-NEXT: [[R1:%.*]] = getelementptr inbounds float, float* [[R]], i64 1 ; CHECK-NEXT: [[W0:%.*]] = getelementptr inbounds float, float* [[W:%.*]], i64 0 -; CHECK-NEXT: [[W1:%.*]] = getelementptr inbounds float, float* [[W]], i64 1 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[R0]] to <2 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x float> [[TMP2]], [[TMP2]] diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/PR38339.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/PR38339.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/PR38339.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/PR38339.ll @@ -5,9 +5,6 @@ ; CHECK-LABEL: @f1( ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[X:%.*]], <2 x i16> poison, <4 x i32> ; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0 -; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1 -; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2 -; CHECK-NEXT: [[PTR3:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i16> [[X]], i32 0 ; CHECK-NEXT: store i16 [[TMP1]], i16* [[A:%.*]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[PTR0]] to <4 x i16>* @@ -37,9 +34,6 @@ ; CHECK-NEXT: [[AA:%.*]] = phi i16* [ [[A:%.*]], [[ENTRY]] ], [ undef, [[CONT]] ] ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[XX]], <2 x i16> poison, <4 x i32> ; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0 -; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1 -; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2 -; CHECK-NEXT: [[PTR3:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3 ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i16> [[XX]], i32 0 ; CHECK-NEXT: store i16 [[TMP0]], i16* [[A]], align 2 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[PTR0]] to <4 x i16>* @@ -84,9 +78,6 @@ ; CHECK-NEXT: [[AA:%.*]] = phi i16* [ [[A:%.*]], [[ENTRY]] ], [ undef, [[CONT]] ] ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[XX]], <2 x i16> poison, <4 x i32> ; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0 -; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1 -; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2 -; CHECK-NEXT: [[PTR3:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3 ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i16> [[XX]], i32 1 ; CHECK-NEXT: store i16 [[TMP0]], i16* [[A]], align 2 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[PTR0]] to <4 x i16>* diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/commute.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/commute.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/commute.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/commute.ll @@ -14,7 +14,6 @@ ; CHECK: for.body3.lr.ph: ; CHECK-NEXT: [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float> ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [[STRUCTA:%.*]], %structA* [[J:%.*]], i64 0, i32 0, i64 0 -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [[STRUCTA]], %structA* [[J]], i64 0, i32 0, i64 1 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX4]] to <2 x float>* ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = fsub fast <2 x float> [[TMP2]], [[TMP4]] @@ -59,7 +58,6 @@ ; CHECK: for.body3.lr.ph: ; CHECK-NEXT: [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float> ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [[STRUCTA:%.*]], %structA* [[J:%.*]], i64 0, i32 0, i64 0 -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [[STRUCTA]], %structA* [[J]], i64 0, i32 0, i64 1 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX4]] to <2 x float>* ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = fsub fast <2 x float> [[TMP2]], [[TMP4]] diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll @@ -32,12 +32,6 @@ ; CHECK-NEXT: [[J_025:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[P2_024:%.*]] = phi i32* [ [[BLK2:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR29:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[P1_023:%.*]] = phi i32* [ [[BLK1:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[P1_023]], i64 1 -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[P2_024]], i64 1 -; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, i32* [[P1_023]], i64 2 -; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[P2_024]], i64 2 -; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i32, i32* [[P1_023]], i64 3 -; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i32, i32* [[P2_024]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P1_023]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[P2_024]] to <4 x i32>* @@ -158,12 +152,6 @@ ; CHECK-NEXT: [[J_019:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[IF_END]] ] ; CHECK-NEXT: [[P2_018:%.*]] = phi i32* [ [[BLK2:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR16:%.*]], [[IF_END]] ] ; CHECK-NEXT: [[P1_017:%.*]] = phi i32* [ [[BLK1:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR:%.*]], [[IF_END]] ] -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[P1_017]], i64 1 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[P2_018]], i64 1 -; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[P1_017]], i64 2 -; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[P2_018]], i64 2 -; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[P1_017]], i64 3 -; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, i32* [[P2_018]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P1_017]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[P2_018]] to <4 x i32>* @@ -261,20 +249,6 @@ ; CHECK-NEXT: [[J_046:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[IF_END_86]] ] ; CHECK-NEXT: [[P2_045:%.*]] = phi i8* [ [[BLK2:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR88:%.*]], [[IF_END_86]] ] ; CHECK-NEXT: [[P1_044:%.*]] = phi i8* [ [[BLK1:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR:%.*]], [[IF_END_86]] ] -; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8, i8* [[P1_044]], i64 1 -; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, i8* [[P2_045]], i64 1 -; CHECK-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds i8, i8* [[P1_044]], i64 2 -; CHECK-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i8, i8* [[P2_045]], i64 2 -; CHECK-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds i8, i8* [[P1_044]], i64 3 -; CHECK-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds i8, i8* [[P2_045]], i64 3 -; CHECK-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds i8, i8* [[P1_044]], i64 4 -; CHECK-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds i8, i8* [[P2_045]], i64 4 -; CHECK-NEXT: [[ARRAYIDX50:%.*]] = getelementptr inbounds i8, i8* [[P1_044]], i64 5 -; CHECK-NEXT: [[ARRAYIDX52:%.*]] = getelementptr inbounds i8, i8* [[P2_045]], i64 5 -; CHECK-NEXT: [[ARRAYIDX61:%.*]] = getelementptr inbounds i8, i8* [[P1_044]], i64 6 -; CHECK-NEXT: [[ARRAYIDX63:%.*]] = getelementptr inbounds i8, i8* [[P2_045]], i64 6 -; CHECK-NEXT: [[ARRAYIDX72:%.*]] = getelementptr inbounds i8, i8* [[P1_044]], i64 7 -; CHECK-NEXT: [[ARRAYIDX74:%.*]] = getelementptr inbounds i8, i8* [[P2_045]], i64 7 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[P1_044]] to <8 x i8>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 1 ; CHECK-NEXT: [[TMP2:%.*]] = zext <8 x i8> [[TMP1]] to <8 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/loadi8.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/loadi8.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/loadi8.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/loadi8.ll @@ -13,13 +13,7 @@ ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[SCALE]], align 16 ; CHECK-NEXT: [[OFFSET:%.*]] = getelementptr inbounds [[STRUCT_WEIGHT_T]], %struct.weight_t* [[W]], i64 0, i32 1 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[OFFSET]], align 4 -; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i8, i8* [[SRC:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i8, i8* [[DST:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 2 -; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 2 -; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 3 -; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 3 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[SRC]] to <4 x i8>* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[SRC:%.*]] to <4 x i8>* ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i8>, <4 x i8>* [[TMP2]], align 1 ; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[TMP3]] to <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i32 0 @@ -33,7 +27,7 @@ ; CHECK-NEXT: [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> [[TMP8]], <4 x i32> [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = trunc <4 x i32> [[TMP12]] to <4 x i8> -; CHECK-NEXT: [[TMP14:%.*]] = bitcast i8* [[DST]] to <4 x i8>* +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i8* [[DST:%.*]] to <4 x i8>* ; CHECK-NEXT: store <4 x i8> [[TMP13]], <4 x i8>* [[TMP14]], align 1 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/matmul.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/matmul.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/matmul.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/matmul.ll @@ -16,17 +16,12 @@ ; CHECK-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[A]], i64 0, i64 1 ; CHECK-NEXT: [[TEMP2:%.*]] = load double, double* [[ARRAYIDX5_I]], align 8 ; CHECK-NEXT: [[ARRAYIDX7_I:%.*]] = getelementptr inbounds [4 x double], [4 x double]* [[B]], i64 1, i64 0 -; CHECK-NEXT: [[ARRAYIDX13_I:%.*]] = getelementptr inbounds [4 x double], [4 x double]* [[B]], i64 0, i64 1 -; CHECK-NEXT: [[ARRAYIDX18_I:%.*]] = getelementptr inbounds [4 x double], [4 x double]* [[B]], i64 1, i64 1 ; CHECK-NEXT: [[ARRAYIDX25_I:%.*]] = getelementptr inbounds [4 x double], [4 x double]* [[B]], i64 0, i64 2 ; CHECK-NEXT: [[ARRAYIDX30_I:%.*]] = getelementptr inbounds [4 x double], [4 x double]* [[B]], i64 1, i64 2 -; CHECK-NEXT: [[ARRAYIDX37_I:%.*]] = getelementptr inbounds [4 x double], [4 x double]* [[B]], i64 0, i64 3 -; CHECK-NEXT: [[ARRAYIDX42_I:%.*]] = getelementptr inbounds [4 x double], [4 x double]* [[B]], i64 1, i64 3 ; CHECK-NEXT: [[ARRAYIDX47_I:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[A]], i64 1, i64 0 ; CHECK-NEXT: [[TEMP10:%.*]] = load double, double* [[ARRAYIDX47_I]], align 8 ; CHECK-NEXT: [[ARRAYIDX52_I:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[A]], i64 1, i64 1 ; CHECK-NEXT: [[TEMP11:%.*]] = load double, double* [[ARRAYIDX52_I]], align 8 -; CHECK-NEXT: [[RES_I_SROA_4_0_OUT2_I_SROA_IDX2:%.*]] = getelementptr inbounds double, double* [[OUT:%.*]], i64 1 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[ARRAYIDX3_I]] to <2 x double>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[TEMP]], i32 0 @@ -38,9 +33,8 @@ ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> [[TMP8]], double [[TEMP2]], i32 1 ; CHECK-NEXT: [[TMP10:%.*]] = fmul <2 x double> [[TMP9]], [[TMP7]] ; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP5]], [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = bitcast double* [[OUT]] to <2 x double>* +; CHECK-NEXT: [[TMP12:%.*]] = bitcast double* [[OUT:%.*]] to <2 x double>* ; CHECK-NEXT: [[RES_I_SROA_5_0_OUT2_I_SROA_IDX4:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 2 -; CHECK-NEXT: [[RES_I_SROA_6_0_OUT2_I_SROA_IDX6:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 3 ; CHECK-NEXT: [[TMP13:%.*]] = bitcast double* [[ARRAYIDX25_I]] to <2 x double>* ; CHECK-NEXT: [[TMP14:%.*]] = load <2 x double>, <2 x double>* [[TMP13]], align 8 ; CHECK-NEXT: [[TMP15:%.*]] = fmul <2 x double> [[TMP4]], [[TMP14]] @@ -52,7 +46,6 @@ ; CHECK-NEXT: [[TMP20:%.*]] = bitcast double* [[RES_I_SROA_5_0_OUT2_I_SROA_IDX4]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP19]], <2 x double>* [[TMP20]], align 8 ; CHECK-NEXT: [[RES_I_SROA_7_0_OUT2_I_SROA_IDX8:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 4 -; CHECK-NEXT: [[RES_I_SROA_8_0_OUT2_I_SROA_IDX10:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 5 ; CHECK-NEXT: [[TMP21:%.*]] = insertelement <2 x double> poison, double [[TEMP10]], i32 0 ; CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x double> [[TMP21]], double [[TEMP10]], i32 1 ; CHECK-NEXT: [[TMP23:%.*]] = fmul <2 x double> [[TMP2]], [[TMP22]] @@ -63,7 +56,6 @@ ; CHECK-NEXT: [[TMP28:%.*]] = bitcast double* [[RES_I_SROA_7_0_OUT2_I_SROA_IDX8]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP27]], <2 x double>* [[TMP28]], align 8 ; CHECK-NEXT: [[RES_I_SROA_9_0_OUT2_I_SROA_IDX12:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 6 -; CHECK-NEXT: [[RES_I_SROA_10_0_OUT2_I_SROA_IDX14:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 7 ; CHECK-NEXT: [[TMP29:%.*]] = fmul <2 x double> [[TMP14]], [[TMP22]] ; CHECK-NEXT: [[TMP30:%.*]] = fmul <2 x double> [[TMP17]], [[TMP25]] ; CHECK-NEXT: [[TMP31:%.*]] = fadd <2 x double> [[TMP29]], [[TMP30]] diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/memory-runtime-checks.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/memory-runtime-checks.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/memory-runtime-checks.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/memory-runtime-checks.ll @@ -325,12 +325,10 @@ define void @no_version(i32* nocapture %dst, i32* nocapture readonly %src) { ; CHECK-LABEL: @no_version( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[SRC_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[SRC:%.*]], i64 1 -; CHECK-NEXT: [[DST_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i64 1 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[SRC]] to <2 x i32>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[SRC:%.*]] to <2 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = ashr <2 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[DST]] to <2 x i32>* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[DST:%.*]] to <2 x i32>* ; CHECK-NEXT: store <2 x i32> [[TMP2]], <2 x i32>* [[TMP3]], align 4 ; CHECK-NEXT: ret void ; @@ -610,7 +608,6 @@ ; CHECK-LABEL: @test_bounds_removed_before_runtime_checks( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT:%.*]], %struct* [[A:%.*]], i64 0, i32 0 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT]], %struct* [[A]], i64 0, i32 1 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[TMP11]] to <2 x i32>* ; CHECK-NEXT: store <2 x i32> , <2 x i32>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP13:%.*]] = load i32*, i32** [[B:%.*]], align 8 @@ -873,52 +870,7 @@ ; CHECK-NEXT: [[T19:%.*]] = bitcast i8* [[ARG1:%.*]] to <16 x i8>* ; CHECK-NEXT: [[A_GEP_0:%.*]] = getelementptr i8, i8* [[A:%.*]], i64 0 ; CHECK-NEXT: [[B_GEP_0:%.*]] = getelementptr i8, i8* [[B:%.*]], i64 0 -; CHECK-NEXT: [[A_GEP_1:%.*]] = getelementptr i8, i8* [[A]], i64 1 -; CHECK-NEXT: [[B_GEP_1:%.*]] = getelementptr i8, i8* [[B]], i64 1 -; CHECK-NEXT: [[A_GEP_2:%.*]] = getelementptr i8, i8* [[A]], i64 2 -; CHECK-NEXT: [[B_GEP_2:%.*]] = getelementptr i8, i8* [[B]], i64 2 -; CHECK-NEXT: [[A_GEP_3:%.*]] = getelementptr i8, i8* [[A]], i64 3 -; CHECK-NEXT: [[B_GEP_3:%.*]] = getelementptr i8, i8* [[B]], i64 3 -; CHECK-NEXT: [[A_GEP_4:%.*]] = getelementptr i8, i8* [[A]], i64 4 -; CHECK-NEXT: [[B_GEP_4:%.*]] = getelementptr i8, i8* [[B]], i64 4 -; CHECK-NEXT: [[A_GEP_5:%.*]] = getelementptr i8, i8* [[A]], i64 5 -; CHECK-NEXT: [[B_GEP_5:%.*]] = getelementptr i8, i8* [[B]], i64 5 -; CHECK-NEXT: [[A_GEP_6:%.*]] = getelementptr i8, i8* [[A]], i64 6 -; CHECK-NEXT: [[B_GEP_6:%.*]] = getelementptr i8, i8* [[B]], i64 6 -; CHECK-NEXT: [[A_GEP_7:%.*]] = getelementptr i8, i8* [[A]], i64 7 -; CHECK-NEXT: [[B_GEP_7:%.*]] = getelementptr i8, i8* [[B]], i64 7 -; CHECK-NEXT: [[A_GEP_8:%.*]] = getelementptr i8, i8* [[A]], i64 8 -; CHECK-NEXT: [[B_GEP_8:%.*]] = getelementptr i8, i8* [[B]], i64 8 -; CHECK-NEXT: [[A_GEP_9:%.*]] = getelementptr i8, i8* [[A]], i64 9 -; CHECK-NEXT: [[B_GEP_9:%.*]] = getelementptr i8, i8* [[B]], i64 9 -; CHECK-NEXT: [[A_GEP_10:%.*]] = getelementptr i8, i8* [[A]], i64 10 -; CHECK-NEXT: [[B_GEP_10:%.*]] = getelementptr i8, i8* [[B]], i64 10 -; CHECK-NEXT: [[A_GEP_11:%.*]] = getelementptr i8, i8* [[A]], i64 11 -; CHECK-NEXT: [[B_GEP_11:%.*]] = getelementptr i8, i8* [[B]], i64 11 -; CHECK-NEXT: [[A_GEP_12:%.*]] = getelementptr i8, i8* [[A]], i64 12 -; CHECK-NEXT: [[B_GEP_12:%.*]] = getelementptr i8, i8* [[B]], i64 12 -; CHECK-NEXT: [[A_GEP_13:%.*]] = getelementptr i8, i8* [[A]], i64 13 -; CHECK-NEXT: [[B_GEP_13:%.*]] = getelementptr i8, i8* [[B]], i64 13 -; CHECK-NEXT: [[A_GEP_14:%.*]] = getelementptr i8, i8* [[A]], i64 14 -; CHECK-NEXT: [[B_GEP_14:%.*]] = getelementptr i8, i8* [[B]], i64 14 -; CHECK-NEXT: [[A_GEP_15:%.*]] = getelementptr i8, i8* [[A]], i64 15 -; CHECK-NEXT: [[B_GEP_15:%.*]] = getelementptr i8, i8* [[B]], i64 15 ; CHECK-NEXT: [[R_GEP_0:%.*]] = getelementptr i8, i8* [[ARG1]], i64 0 -; CHECK-NEXT: [[R_GEP_1:%.*]] = getelementptr i8, i8* [[ARG1]], i64 1 -; CHECK-NEXT: [[R_GEP_2:%.*]] = getelementptr i8, i8* [[ARG1]], i64 2 -; CHECK-NEXT: [[R_GEP_3:%.*]] = getelementptr i8, i8* [[ARG1]], i64 3 -; CHECK-NEXT: [[R_GEP_4:%.*]] = getelementptr i8, i8* [[ARG1]], i64 4 -; CHECK-NEXT: [[R_GEP_5:%.*]] = getelementptr i8, i8* [[ARG1]], i64 5 -; CHECK-NEXT: [[R_GEP_6:%.*]] = getelementptr i8, i8* [[ARG1]], i64 6 -; CHECK-NEXT: [[R_GEP_7:%.*]] = getelementptr i8, i8* [[ARG1]], i64 7 -; CHECK-NEXT: [[R_GEP_8:%.*]] = getelementptr i8, i8* [[ARG1]], i64 8 -; CHECK-NEXT: [[R_GEP_9:%.*]] = getelementptr i8, i8* [[ARG1]], i64 9 -; CHECK-NEXT: [[R_GEP_10:%.*]] = getelementptr i8, i8* [[ARG1]], i64 10 -; CHECK-NEXT: [[R_GEP_11:%.*]] = getelementptr i8, i8* [[ARG1]], i64 11 -; CHECK-NEXT: [[R_GEP_12:%.*]] = getelementptr i8, i8* [[ARG1]], i64 12 -; CHECK-NEXT: [[R_GEP_13:%.*]] = getelementptr i8, i8* [[ARG1]], i64 13 -; CHECK-NEXT: [[R_GEP_14:%.*]] = getelementptr i8, i8* [[ARG1]], i64 14 -; CHECK-NEXT: [[R_GEP_15:%.*]] = getelementptr i8, i8* [[ARG1]], i64 15 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[A_GEP_0]] to <16 x i8>* ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]], align 1 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[B_GEP_0]] to <16 x i8>* @@ -1234,7 +1186,6 @@ ; CHECK-LABEL: @crash_instructions_deleted( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[T15:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 2 -; CHECK-NEXT: [[T16:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 3 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[T15]] to <2 x i32>* ; CHECK-NEXT: store <2 x i32> , <2 x i32>* [[TMP0]], align 8 ; CHECK-NEXT: [[T17:%.*]] = load i32*, i32** [[PTR:%.*]], align 8 diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/reorder-fmuladd-crash.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/reorder-fmuladd-crash.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/reorder-fmuladd-crash.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/reorder-fmuladd-crash.ll @@ -18,9 +18,6 @@ ; CHECK-NEXT: ] ; CHECK: sw.bb: ; CHECK-NEXT: [[ARRAYIDX43:%.*]] = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 1, i64 0 -; CHECK-NEXT: [[ARRAYIDX45:%.*]] = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 2, i64 0 -; CHECK-NEXT: [[ARRAYIDX51:%.*]] = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 2, i64 1 -; CHECK-NEXT: [[ARRAYIDX58:%.*]] = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 1, i64 1 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[ARRAYIDX43]] to <4 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x double> [[TMP1]], diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/sdiv-pow2.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/sdiv-pow2.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/sdiv-pow2.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/sdiv-pow2.ll @@ -6,22 +6,13 @@ define void @test1(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* noalias nocapture readonly %c) { ; CHECK-LABEL: @test1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2 -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 2 -; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2 -; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3 -; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 3 -; CHECK-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to <4 x i32>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[C]] to <4 x i32>* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[C:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP1]] ; CHECK-NEXT: [[TMP5:%.*]] = sdiv <4 x i32> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[A]] to <4 x i32>* +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/slp-and-reduction.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/slp-and-reduction.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/slp-and-reduction.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/slp-and-reduction.ll @@ -8,20 +8,6 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_BUF:%.*]], %struct.buf* [[A:%.*]], i64 0, i32 0, i64 0 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[B:%.*]], i64 0, i32 0, i64 0 -; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[A]], i64 0, i32 0, i64 1 -; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[B]], i64 0, i32 0, i64 1 -; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[A]], i64 0, i32 0, i64 2 -; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[B]], i64 0, i32 0, i64 2 -; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[A]], i64 0, i32 0, i64 3 -; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[B]], i64 0, i32 0, i64 3 -; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[A]], i64 0, i32 0, i64 4 -; CHECK-NEXT: [[ARRAYIDX3_4:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[B]], i64 0, i32 0, i64 4 -; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[A]], i64 0, i32 0, i64 5 -; CHECK-NEXT: [[ARRAYIDX3_5:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[B]], i64 0, i32 0, i64 5 -; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[A]], i64 0, i32 0, i64 6 -; CHECK-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[B]], i64 0, i32 0, i64 6 -; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[A]], i64 0, i32 0, i64 7 -; CHECK-NEXT: [[ARRAYIDX3_7:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[B]], i64 0, i32 0, i64 7 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[ARRAYIDX]] to <8 x i8>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 1 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[ARRAYIDX3]] to <8 x i8>* diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/slp-or-reduction.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/slp-or-reduction.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/slp-or-reduction.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/slp-or-reduction.ll @@ -8,20 +8,6 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_BUF:%.*]], %struct.buf* [[A:%.*]], i64 0, i32 0, i64 0 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[B:%.*]], i64 0, i32 0, i64 0 -; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[A]], i64 0, i32 0, i64 1 -; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[B]], i64 0, i32 0, i64 1 -; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[A]], i64 0, i32 0, i64 2 -; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[B]], i64 0, i32 0, i64 2 -; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[A]], i64 0, i32 0, i64 3 -; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[B]], i64 0, i32 0, i64 3 -; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[A]], i64 0, i32 0, i64 4 -; CHECK-NEXT: [[ARRAYIDX3_4:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[B]], i64 0, i32 0, i64 4 -; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[A]], i64 0, i32 0, i64 5 -; CHECK-NEXT: [[ARRAYIDX3_5:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[B]], i64 0, i32 0, i64 5 -; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[A]], i64 0, i32 0, i64 6 -; CHECK-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[B]], i64 0, i32 0, i64 6 -; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[A]], i64 0, i32 0, i64 7 -; CHECK-NEXT: [[ARRAYIDX3_7:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[B]], i64 0, i32 0, i64 7 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[ARRAYIDX]] to <8 x i8>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 1 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[ARRAYIDX3]] to <8 x i8>* diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/slp-xor-reduction.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/slp-xor-reduction.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/slp-xor-reduction.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/slp-xor-reduction.ll @@ -8,20 +8,6 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_BUF:%.*]], %struct.buf* [[A:%.*]], i64 0, i32 0, i64 0 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[B:%.*]], i64 0, i32 0, i64 0 -; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[A]], i64 0, i32 0, i64 1 -; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[B]], i64 0, i32 0, i64 1 -; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[A]], i64 0, i32 0, i64 2 -; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[B]], i64 0, i32 0, i64 2 -; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[A]], i64 0, i32 0, i64 3 -; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[B]], i64 0, i32 0, i64 3 -; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[A]], i64 0, i32 0, i64 4 -; CHECK-NEXT: [[ARRAYIDX3_4:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[B]], i64 0, i32 0, i64 4 -; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[A]], i64 0, i32 0, i64 5 -; CHECK-NEXT: [[ARRAYIDX3_5:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[B]], i64 0, i32 0, i64 5 -; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[A]], i64 0, i32 0, i64 6 -; CHECK-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[B]], i64 0, i32 0, i64 6 -; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[A]], i64 0, i32 0, i64 7 -; CHECK-NEXT: [[ARRAYIDX3_7:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[B]], i64 0, i32 0, i64 7 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[ARRAYIDX]] to <8 x i8>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 1 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[ARRAYIDX3]] to <8 x i8>* diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll @@ -14,15 +14,13 @@ ; CHECK-NEXT: call void @llvm.dbg.value(metadata %struct.S* [[P:%.*]], metadata [[META20:![0-9]+]], metadata !DIExpression()), !dbg [[DBG25:![0-9]+]] ; CHECK-NEXT: [[X1:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[P]], i64 [[N]], i32 0, !dbg [[DBG26:![0-9]+]] ; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 undef, metadata [[META21:![0-9]+]], metadata !DIExpression()), !dbg [[DBG27:![0-9]+]] -; CHECK-NEXT: [[Y3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 [[N]], i32 1, !dbg [[DBG28:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 undef, metadata [[META22:![0-9]+]], metadata !DIExpression()), !dbg [[DBG29:![0-9]+]] -; CHECK-NEXT: [[X5:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 [[I]], i32 0, !dbg [[DBG30:![0-9]+]] -; CHECK-NEXT: [[Y7:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 [[I]], i32 1, !dbg [[DBG31:![0-9]+]] +; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 undef, metadata [[META22:![0-9]+]], metadata !DIExpression()), !dbg [[DBG28:![0-9]+]] +; CHECK-NEXT: [[X5:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 [[I]], i32 0, !dbg [[DBG29:![0-9]+]] ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[X1]] to <2 x i64>*, !dbg [[DBG26]] -; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8, !dbg [[DBG26]], !tbaa [[TBAA32:![0-9]+]] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[X5]] to <2 x i64>*, !dbg [[DBG36:![0-9]+]] -; CHECK-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* [[TMP2]], align 8, !dbg [[DBG36]], !tbaa [[TBAA32]] -; CHECK-NEXT: ret void, !dbg [[DBG37:![0-9]+]] +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8, !dbg [[DBG26]], !tbaa [[TBAA30:![0-9]+]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[X5]] to <2 x i64>*, !dbg [[DBG34:![0-9]+]] +; CHECK-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* [[TMP2]], align 8, !dbg [[DBG34]], !tbaa [[TBAA30]] +; CHECK-NEXT: ret void, !dbg [[DBG35:![0-9]+]] ; entry: call void @llvm.dbg.value(metadata i64 %n, metadata !18, metadata !DIExpression()), !dbg !23 diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-order.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-order.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-order.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-order.ll @@ -14,9 +14,6 @@ ; CHECK: for.body: ; CHECK-NEXT: [[CALL_I_I:%.*]] = call i32* @get_ptr() ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr i32, i32* [[CALL_I_I]], i32 2 -; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr i32, i32* [[CALL_I_I]], i32 1 -; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr i32, i32* [[CALL_I_I]], i32 3 -; CHECK-NEXT: [[RES_1:%.*]] = getelementptr i64, i64* [[RES:%.*]], i64 1 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[CALL_I_I]] to <2 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[GEP_1]] to <2 x i32>* @@ -24,7 +21,7 @@ ; CHECK-NEXT: [[TMP4:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> ; CHECK-NEXT: [[TMP5:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> ; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <2 x i64> [[TMP4]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64* [[RES]] to <2 x i64>* +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64* [[RES:%.*]] to <2 x i64>* ; CHECK-NEXT: store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 8 ; CHECK-NEXT: [[C:%.*]] = call i1 @cond() ; CHECK-NEXT: br i1 [[C]], label [[FOR_BODY]], label [[EXIT:%.*]] diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s352.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s352.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s352.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s352.ll @@ -31,39 +31,33 @@ ; CHECK-NEXT: [[DOT_115:%.*]] = phi float [ 0.000000e+00, [[PREHEADER]] ], [ [[ADD39:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_GLOBALDATA:%.*]], %struct.GlobalData* @global_data, i64 0, i32 0, i64 [[INDVARS_IV]] ; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [[STRUCT_GLOBALDATA]], %struct.GlobalData* @global_data, i64 0, i32 3, i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [[STRUCT_GLOBALDATA]], %struct.GlobalData* @global_data, i64 0, i32 0, i64 [[TMP0]] -; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [[STRUCT_GLOBALDATA]], %struct.GlobalData* @global_data, i64 0, i32 3, i64 [[TMP0]] -; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[ARRAYIDX]] to <2 x float>* -; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX6]] to <2 x float>* -; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x float> [[TMP2]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP5]], i32 0 -; CHECK-NEXT: [[ADD:%.*]] = fadd float [[DOT_115]], [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[TMP5]], i32 1 -; CHECK-NEXT: [[ADD15:%.*]] = fadd float [[ADD]], [[TMP7]] -; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2 -; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [[STRUCT_GLOBALDATA]], %struct.GlobalData* @global_data, i64 0, i32 0, i64 [[TMP8]] -; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [[STRUCT_GLOBALDATA]], %struct.GlobalData* @global_data, i64 0, i32 3, i64 [[TMP8]] -; CHECK-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3 -; CHECK-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [[STRUCT_GLOBALDATA]], %struct.GlobalData* @global_data, i64 0, i32 0, i64 [[TMP9]] -; CHECK-NEXT: [[ARRAYIDX29:%.*]] = getelementptr inbounds [[STRUCT_GLOBALDATA]], %struct.GlobalData* @global_data, i64 0, i32 3, i64 [[TMP9]] -; CHECK-NEXT: [[TMP10:%.*]] = bitcast float* [[ARRAYIDX18]] to <2 x float>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[ARRAYIDX]] to <2 x float>* +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, <2 x float>* [[TMP0]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX6]] to <2 x float>* +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[TMP2]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0 +; CHECK-NEXT: [[ADD:%.*]] = fadd float [[DOT_115]], [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1 +; CHECK-NEXT: [[ADD15:%.*]] = fadd float [[ADD]], [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2 +; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [[STRUCT_GLOBALDATA]], %struct.GlobalData* @global_data, i64 0, i32 0, i64 [[TMP7]] +; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [[STRUCT_GLOBALDATA]], %struct.GlobalData* @global_data, i64 0, i32 3, i64 [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast float* [[ARRAYIDX18]] to <2 x float>* +; CHECK-NEXT: [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[TMP8]], align 4 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast float* [[ARRAYIDX21]] to <2 x float>* ; CHECK-NEXT: [[TMP11:%.*]] = load <2 x float>, <2 x float>* [[TMP10]], align 4 -; CHECK-NEXT: [[TMP12:%.*]] = bitcast float* [[ARRAYIDX21]] to <2 x float>* -; CHECK-NEXT: [[TMP13:%.*]] = load <2 x float>, <2 x float>* [[TMP12]], align 4 -; CHECK-NEXT: [[TMP14:%.*]] = fmul <2 x float> [[TMP11]], [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x float> [[TMP14]], i32 0 -; CHECK-NEXT: [[ADD23:%.*]] = fadd float [[ADD15]], [[TMP15]] -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[TMP14]], i32 1 -; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD23]], [[TMP16]] -; CHECK-NEXT: [[TMP17:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 4 -; CHECK-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds [[STRUCT_GLOBALDATA]], %struct.GlobalData* @global_data, i64 0, i32 0, i64 [[TMP17]] -; CHECK-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX34]], align 4 -; CHECK-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds [[STRUCT_GLOBALDATA]], %struct.GlobalData* @global_data, i64 0, i32 3, i64 [[TMP17]] -; CHECK-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX37]], align 4 -; CHECK-NEXT: [[MUL38:%.*]] = fmul float [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP12:%.*]] = fmul <2 x float> [[TMP9]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP12]], i32 0 +; CHECK-NEXT: [[ADD23:%.*]] = fadd float [[ADD15]], [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x float> [[TMP12]], i32 1 +; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD23]], [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 4 +; CHECK-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds [[STRUCT_GLOBALDATA]], %struct.GlobalData* @global_data, i64 0, i32 0, i64 [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX34]], align 4 +; CHECK-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds [[STRUCT_GLOBALDATA]], %struct.GlobalData* @global_data, i64 0, i32 3, i64 [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX37]], align 4 +; CHECK-NEXT: [[MUL38:%.*]] = fmul float [[TMP16]], [[TMP17]] ; CHECK-NEXT: [[ADD39]] = fadd float [[ADD31]], [[MUL38]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 5 ; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT]], 32000 diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-min-max.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-min-max.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-min-max.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-min-max.ll @@ -8,14 +8,7 @@ define void @select_umin_8xi16(i16* %ptr, i16 %x) { ; CHECK-LABEL: @select_umin_8xi16( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1 -; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2 -; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3 -; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4 -; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5 -; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6 -; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR:%.*]] to <8 x i16>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <8 x i16> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> @@ -76,10 +69,7 @@ define void @select_umin_4xi32(i32* %ptr, i32 %x) { ; CHECK-LABEL: @select_umin_4xi32( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1 -; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2 -; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <4 x i32> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> @@ -170,8 +160,7 @@ define void @select_umin_2xi64(i64* %ptr, i64 %x) { ; CHECK-LABEL: @select_umin_2xi64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR:%.*]] to <2 x i64>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i64> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> @@ -198,14 +187,7 @@ define void @select_umin_ule_8xi16(i16* %ptr, i16 %x) { ; CHECK-LABEL: @select_umin_ule_8xi16( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1 -; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2 -; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3 -; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4 -; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5 -; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6 -; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR:%.*]] to <8 x i16>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <8 x i16> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> @@ -266,10 +248,7 @@ define void @select_umin_ule_4xi32(i32* %ptr, i32 %x) { ; CHECK-LABEL: @select_umin_ule_4xi32( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1 -; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2 -; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <4 x i32> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> @@ -309,8 +288,7 @@ define void @select_umin_ule_2xi64(i64* %ptr, i64 %x) { ; CHECK-LABEL: @select_umin_ule_2xi64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR:%.*]] to <2 x i64>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <2 x i64> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> @@ -336,14 +314,7 @@ define void @select_smin_8xi16(i16* %ptr, i16 %x) { ; CHECK-LABEL: @select_smin_8xi16( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1 -; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2 -; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3 -; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4 -; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5 -; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6 -; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR:%.*]] to <8 x i16>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <8 x i16> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> @@ -404,10 +375,7 @@ define void @select_smin_4xi32(i32* %ptr, i32 %x) { ; CHECK-LABEL: @select_smin_4xi32( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1 -; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2 -; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <4 x i32> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> @@ -447,8 +415,7 @@ define void @select_smin_2xi64(i64* %ptr, i64 %x) { ; CHECK-LABEL: @select_smin_2xi64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR:%.*]] to <2 x i64>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <2 x i64> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> @@ -474,14 +441,7 @@ define void @select_smin_sle_8xi16(i16* %ptr, i16 %x) { ; CHECK-LABEL: @select_smin_sle_8xi16( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1 -; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2 -; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3 -; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4 -; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5 -; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6 -; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR:%.*]] to <8 x i16>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sle <8 x i16> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> @@ -542,10 +502,7 @@ define void @select_smin_sle_4xi32(i32* %ptr, i32 %x) { ; CHECK-LABEL: @select_smin_sle_4xi32( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1 -; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2 -; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sle <4 x i32> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> @@ -585,8 +542,7 @@ define void @select_smin_sle_2xi64(i64* %ptr, i64 %x) { ; CHECK-LABEL: @select_smin_sle_2xi64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR:%.*]] to <2 x i64>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sle <2 x i64> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> @@ -611,14 +567,7 @@ define void @select_umax_8xi16(i16* %ptr, i16 %x) { ; CHECK-LABEL: @select_umax_8xi16( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1 -; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2 -; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3 -; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4 -; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5 -; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6 -; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR:%.*]] to <8 x i16>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <8 x i16> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> @@ -679,10 +628,7 @@ define void @select_umax_4xi32(i32* %ptr, i32 %x) { ; CHECK-LABEL: @select_umax_4xi32( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1 -; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2 -; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i32> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> @@ -722,8 +668,7 @@ define void @select_umax_2xi64(i64* %ptr, i64 %x) { ; CHECK-LABEL: @select_umax_2xi64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR:%.*]] to <2 x i64>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <2 x i64> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> @@ -749,14 +694,7 @@ define void @select_umax_uge_8xi16(i16* %ptr, i16 %x) { ; CHECK-LABEL: @select_umax_uge_8xi16( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1 -; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2 -; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3 -; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4 -; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5 -; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6 -; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR:%.*]] to <8 x i16>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <8 x i16> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> @@ -817,10 +755,7 @@ define void @select_umax_uge_4xi32(i32* %ptr, i32 %x) { ; CHECK-LABEL: @select_umax_uge_4xi32( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1 -; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2 -; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <4 x i32> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> @@ -860,8 +795,7 @@ define void @select_umax_uge_2xi64(i64* %ptr, i64 %x) { ; CHECK-LABEL: @select_umax_uge_2xi64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR:%.*]] to <2 x i64>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <2 x i64> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> @@ -887,14 +821,7 @@ define void @select_smax_8xi16(i16* %ptr, i16 %x) { ; CHECK-LABEL: @select_smax_8xi16( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1 -; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2 -; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3 -; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4 -; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5 -; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6 -; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR:%.*]] to <8 x i16>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i16> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> @@ -955,10 +882,7 @@ define void @select_smax_4xi32(i32* %ptr, i32 %x) { ; CHECK-LABEL: @select_smax_4xi32( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1 -; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2 -; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> @@ -998,8 +922,7 @@ define void @select_smax_2xi64(i64* %ptr, i64 %x) { ; CHECK-LABEL: @select_smax_2xi64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR:%.*]] to <2 x i64>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i64> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> @@ -1026,14 +949,7 @@ define void @select_smax_sge_8xi16(i16* %ptr, i16 %x) { ; CHECK-LABEL: @select_smax_sge_8xi16( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1 -; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2 -; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3 -; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4 -; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5 -; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6 -; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR:%.*]] to <8 x i16>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <8 x i16> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> @@ -1094,10 +1010,7 @@ define void @select_smax_sge_4xi32(i32* %ptr, i32 %x) { ; CHECK-LABEL: @select_smax_sge_4xi32( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1 -; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2 -; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <4 x i32> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> @@ -1137,8 +1050,7 @@ define void @select_smax_sge_2xi64(i64* %ptr, i64 %x) { ; CHECK-LABEL: @select_smax_sge_2xi64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR:%.*]] to <2 x i64>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <2 x i64> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll @@ -193,14 +193,7 @@ define void @select_uniform_ugt_8xi8(i8* %ptr, i8 %x) { ; CHECK-LABEL: @select_uniform_ugt_8xi8( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i8, i8* [[PTR:%.*]], i8 1 -; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 2 -; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 3 -; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 4 -; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 5 -; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 6 -; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 7 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[PTR]] to <8 x i8>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[PTR:%.*]] to <8 x i8>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 1 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <8 x i8> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[X:%.*]], i32 0 @@ -263,14 +256,7 @@ define void @select_uniform_ugt_16xi8(i8* %ptr, i8 %x) { ; CHECK-LABEL: @select_uniform_ugt_16xi8( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i8, i8* [[PTR:%.*]], i8 1 -; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 2 -; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 3 -; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 4 -; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 5 -; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 6 -; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 7 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[PTR]] to <8 x i8>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[PTR:%.*]] to <8 x i8>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 1 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <8 x i8> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[X:%.*]], i32 0 @@ -425,10 +411,7 @@ define void @select_uniform_ugt_4xi16(i16* %ptr, i16 %x) { ; CHECK-LABEL: @select_uniform_ugt_4xi16( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1 -; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2 -; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <4 x i16>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR:%.*]] to <4 x i16>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i16> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[X:%.*]], i32 0 @@ -468,14 +451,7 @@ define void @select_uniform_ult_8xi16(i16* %ptr, i16 %x) { ; CHECK-LABEL: @select_uniform_ult_8xi16( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1 -; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2 -; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3 -; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4 -; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5 -; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6 -; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR:%.*]] to <8 x i16>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <8 x i16> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i16> poison, i16 [[X:%.*]], i32 0 @@ -538,8 +514,7 @@ define void @select_uniform_eq_2xi32(i32* %ptr, i32 %x) { ; CHECK-LABEL: @select_uniform_eq_2xi32( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <2 x i32>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR:%.*]] to <2 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[X:%.*]], i32 0 @@ -567,10 +542,7 @@ define void @select_uniform_eq_4xi32(i32* %ptr, i32 %x) { ; CHECK-LABEL: @select_uniform_eq_4xi32( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1 -; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2 -; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0 @@ -609,8 +581,7 @@ define void @select_uniform_ne_2xi64(i64* %ptr, i64 %x) { ; CHECK-LABEL: @select_uniform_ne_2xi64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR:%.*]] to <2 x i64>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i64> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/widen.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/widen.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/widen.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/widen.ll @@ -10,36 +10,8 @@ define void @PR50256(i8* %a, i16* %b, i32 %n) { ; CHECK-LABEL: @PR50256( -; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 2 -; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 3 -; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 4 -; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 5 -; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 7 -; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 6 -; CHECK-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 8 -; CHECK-NEXT: [[ARRAYIDX_9:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 9 -; CHECK-NEXT: [[ARRAYIDX_10:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 10 -; CHECK-NEXT: [[ARRAYIDX_11:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 11 -; CHECK-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 12 -; CHECK-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 13 -; CHECK-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 14 -; CHECK-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 15 -; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds i16, i16* [[B:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds i16, i16* [[B]], i64 2 -; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds i16, i16* [[B]], i64 3 -; CHECK-NEXT: [[ARRAYIDX3_4:%.*]] = getelementptr inbounds i16, i16* [[B]], i64 4 -; CHECK-NEXT: [[ARRAYIDX3_5:%.*]] = getelementptr inbounds i16, i16* [[B]], i64 5 -; CHECK-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds i16, i16* [[B]], i64 6 -; CHECK-NEXT: [[ARRAYIDX3_7:%.*]] = getelementptr inbounds i16, i16* [[B]], i64 7 -; CHECK-NEXT: [[ARRAYIDX3_8:%.*]] = getelementptr inbounds i16, i16* [[B]], i64 8 -; CHECK-NEXT: [[ARRAYIDX3_9:%.*]] = getelementptr inbounds i16, i16* [[B]], i64 9 -; CHECK-NEXT: [[ARRAYIDX3_10:%.*]] = getelementptr inbounds i16, i16* [[B]], i64 10 -; CHECK-NEXT: [[ARRAYIDX3_11:%.*]] = getelementptr inbounds i16, i16* [[B]], i64 11 -; CHECK-NEXT: [[ARRAYIDX3_12:%.*]] = getelementptr inbounds i16, i16* [[B]], i64 12 -; CHECK-NEXT: [[ARRAYIDX3_13:%.*]] = getelementptr inbounds i16, i16* [[B]], i64 13 -; CHECK-NEXT: [[ARRAYIDX3_14:%.*]] = getelementptr inbounds i16, i16* [[B]], i64 14 -; CHECK-NEXT: [[ARRAYIDX3_15:%.*]] = getelementptr inbounds i16, i16* [[B]], i64 15 +; CHECK-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i64 8 +; CHECK-NEXT: [[ARRAYIDX3_8:%.*]] = getelementptr inbounds i16, i16* [[B:%.*]], i64 8 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[A]] to <8 x i8>* ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 ; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16> diff --git a/llvm/test/Transforms/SLPVectorizer/NVPTX/v2f16.ll b/llvm/test/Transforms/SLPVectorizer/NVPTX/v2f16.ll --- a/llvm/test/Transforms/SLPVectorizer/NVPTX/v2f16.ll +++ b/llvm/test/Transforms/SLPVectorizer/NVPTX/v2f16.ll @@ -8,13 +8,10 @@ ; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP]], [[ARG3:%.*]] ; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i32 [[TMP4]], 2 ; CHECK-NEXT: [[TMP6:%.*]] = zext i32 [[TMP5]] to i64 -; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP6]], 1 ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8* [[ARG1:%.*]] to half* ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds half, half* [[TMP10]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8* [[ARG:%.*]] to half* ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds half, half* [[TMP15]], i64 [[TMP6]] -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds half, half* [[TMP10]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds half, half* [[TMP15]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast half* [[TMP11]] to <2 x half>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x half>, <2 x half>* [[TMP1]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x half> [[TMP2]], diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/rvv-min-vector-size.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/rvv-min-vector-size.ll --- a/llvm/test/Transforms/SLPVectorizer/RISCV/rvv-min-vector-size.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/rvv-min-vector-size.ll @@ -12,30 +12,22 @@ define void @foo(i64* nocapture writeonly %da) { ; CHECK-128-LABEL: @foo( ; CHECK-128-NEXT: entry: -; CHECK-128-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[DA:%.*]], i64 1 -; CHECK-128-NEXT: [[TMP0:%.*]] = bitcast i64* [[DA]] to <2 x i64>* +; CHECK-128-NEXT: [[TMP0:%.*]] = bitcast i64* [[DA:%.*]] to <2 x i64>* ; CHECK-128-NEXT: store <2 x i64> , <2 x i64>* [[TMP0]], align 8 ; CHECK-128-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, i64* [[DA]], i64 2 -; CHECK-128-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, i64* [[DA]], i64 3 ; CHECK-128-NEXT: [[TMP1:%.*]] = bitcast i64* [[ARRAYIDX2]] to <2 x i64>* ; CHECK-128-NEXT: store <2 x i64> , <2 x i64>* [[TMP1]], align 8 ; CHECK-128-NEXT: ret void ; ; CHECK-256-LABEL: @foo( ; CHECK-256-NEXT: entry: -; CHECK-256-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[DA:%.*]], i64 1 -; CHECK-256-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, i64* [[DA]], i64 2 -; CHECK-256-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, i64* [[DA]], i64 3 -; CHECK-256-NEXT: [[TMP0:%.*]] = bitcast i64* [[DA]] to <4 x i64>* +; CHECK-256-NEXT: [[TMP0:%.*]] = bitcast i64* [[DA:%.*]] to <4 x i64>* ; CHECK-256-NEXT: store <4 x i64> , <4 x i64>* [[TMP0]], align 8 ; CHECK-256-NEXT: ret void ; ; CHECK-512-LABEL: @foo( ; CHECK-512-NEXT: entry: -; CHECK-512-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[DA:%.*]], i64 1 -; CHECK-512-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, i64* [[DA]], i64 2 -; CHECK-512-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, i64* [[DA]], i64 3 -; CHECK-512-NEXT: [[TMP0:%.*]] = bitcast i64* [[DA]] to <4 x i64>* +; CHECK-512-NEXT: [[TMP0:%.*]] = bitcast i64* [[DA:%.*]] to <4 x i64>* ; CHECK-512-NEXT: store <4 x i64> , <4 x i64>* [[TMP0]], align 8 ; CHECK-512-NEXT: ret void ; @@ -53,8 +45,7 @@ define void @foo8(i8* nocapture writeonly %da) { ; CHECK-LABEL: @foo8( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[DA:%.*]], i8 1 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[DA]] to <2 x i8>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[DA:%.*]] to <2 x i8>* ; CHECK-NEXT: store <2 x i8> , <2 x i8>* [[TMP0]], align 8 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8* [[DA]], i8 2 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll --- a/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll +++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll @@ -11,10 +11,7 @@ ; CHECK-NEXT: store i32 [[ADD277]], i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 1), align 4 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 0), align 4 ; CHECK-NEXT: [[ARRAYIDX372:%.*]] = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 0 -; CHECK-NEXT: [[ARRAYIDX372_1:%.*]] = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 1 -; CHECK-NEXT: [[ARRAYIDX372_2:%.*]] = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 2 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast (i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 2) to <2 x i32>*), align 4 -; CHECK-NEXT: [[ARRAYIDX372_3:%.*]] = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 3 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[ADD277]], i32 1 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR32086.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR32086.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/PR32086.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR32086.ll @@ -3,14 +3,10 @@ define void @i64_simplified(i64* noalias %st, i64* noalias %ld) { ; CHECK-LABEL: @i64_simplified( -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[LD:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, i64* [[ST:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, i64* [[ST]], i64 2 -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i64, i64* [[ST]], i64 3 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[LD]] to <2 x i64>* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[LD:%.*]] to <2 x i64>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[ST]] to <4 x i64>* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[ST:%.*]] to <4 x i64>* ; CHECK-NEXT: store <4 x i64> [[SHUFFLE]], <4 x i64>* [[TMP3]], align 8 ; CHECK-NEXT: ret void ; @@ -32,14 +28,10 @@ define void @i64_simplifiedi_reversed(i64* noalias %st, i64* noalias %ld) { ; CHECK-LABEL: @i64_simplifiedi_reversed( -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[LD:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, i64* [[ST:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, i64* [[ST]], i64 2 -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i64, i64* [[ST]], i64 3 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[LD]] to <2 x i64>* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[LD:%.*]] to <2 x i64>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[ST]] to <4 x i64>* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[ST:%.*]] to <4 x i64>* ; CHECK-NEXT: store <4 x i64> [[SHUFFLE]], <4 x i64>* [[TMP3]], align 8 ; CHECK-NEXT: ret void ; @@ -61,14 +53,10 @@ define void @i64_simplifiedi_extract(i64* noalias %st, i64* noalias %ld) { ; CHECK-LABEL: @i64_simplifiedi_extract( -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[LD:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, i64* [[ST:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, i64* [[ST]], i64 2 -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i64, i64* [[ST]], i64 3 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[LD]] to <2 x i64>* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[LD:%.*]] to <2 x i64>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[ST]] to <4 x i64>* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[ST:%.*]] to <4 x i64>* ; CHECK-NEXT: store <4 x i64> [[SHUFFLE]], <4 x i64>* [[TMP3]], align 8 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[SHUFFLE]], i32 3 ; CHECK-NEXT: store i64 [[TMP4]], i64* [[LD]], align 8 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35628_1.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35628_1.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/PR35628_1.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35628_1.ll @@ -9,21 +9,18 @@ ; CHECK-NEXT: br i1 [[CMP]], label [[LOOP:%.*]], label [[BAIL_OUT:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[DUMMY_PHI:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[OP_EXTRA3:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 1 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 2 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 3 -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* -; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 2 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP4]], i32 1 -; CHECK-NEXT: [[TMP8:%.*]] = mul <4 x i32> [[TMP4]], [[TMP4]] -; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP6]] to i64 -; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP8]]) -; CHECK-NEXT: [[OP_EXTRA:%.*]] = add i32 [[TMP10]], 1 -; CHECK-NEXT: [[OP_EXTRA1:%.*]] = add i32 [[OP_EXTRA]], [[TMP7]] -; CHECK-NEXT: [[OP_EXTRA2:%.*]] = add i32 [[OP_EXTRA1]], [[TMP6]] -; CHECK-NEXT: [[OP_EXTRA3]] = add i32 [[OP_EXTRA2]], [[TMP5]] +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = mul <4 x i32> [[TMP1]], [[TMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]]) +; CHECK-NEXT: [[OP_EXTRA:%.*]] = add i32 [[TMP7]], 1 +; CHECK-NEXT: [[OP_EXTRA1:%.*]] = add i32 [[OP_EXTRA]], [[TMP4]] +; CHECK-NEXT: [[OP_EXTRA2:%.*]] = add i32 [[OP_EXTRA1]], [[TMP3]] +; CHECK-NEXT: [[OP_EXTRA3]] = add i32 [[OP_EXTRA2]], [[TMP2]] ; CHECK-NEXT: br label [[LOOP]] ; CHECK: bail_out: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/addsub.ll b/llvm/test/Transforms/SLPVectorizer/X86/addsub.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/addsub.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/addsub.ll @@ -301,22 +301,18 @@ define void @reorder_alt_rightsubTree(double* nocapture %c, double* noalias nocapture readonly %a, double* noalias nocapture readonly %b, double* noalias nocapture readonly %d) { ; CHECK-LABEL: @reorder_alt_rightsubTree( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, double* [[D:%.*]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 1 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, double* [[B:%.*]], i64 1 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, double* [[C:%.*]], i64 1 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[D]] to <2 x double>* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[D:%.*]] to <2 x double>* +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[A:%.*]] to <2 x double>* +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[TMP3]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[B:%.*]] to <2 x double>* ; CHECK-NEXT: [[TMP6:%.*]] = load <2 x double>, <2 x double>* [[TMP5]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast double* [[A]] to <2 x double>* -; CHECK-NEXT: [[TMP8:%.*]] = load <2 x double>, <2 x double>* [[TMP7]], align 8 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast double* [[B]] to <2 x double>* -; CHECK-NEXT: [[TMP10:%.*]] = load <2 x double>, <2 x double>* [[TMP9]], align 8 -; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP8]], [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = fsub <2 x double> [[TMP11]], [[TMP6]] -; CHECK-NEXT: [[TMP13:%.*]] = fadd <2 x double> [[TMP11]], [[TMP6]] -; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x double> [[TMP12]], <2 x double> [[TMP13]], <2 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = bitcast double* [[C]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP14]], <2 x double>* [[TMP15]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP4]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = fsub <2 x double> [[TMP7]], [[TMP2]] +; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> [[TMP7]], [[TMP2]] +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> [[TMP9]], <2 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast double* [[C:%.*]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP10]], <2 x double>* [[TMP11]], align 8 ; CHECK-NEXT: ret void ; %1 = load double, double* %a diff --git a/llvm/test/Transforms/SLPVectorizer/X86/align.ll b/llvm/test/Transforms/SLPVectorizer/X86/align.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/align.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/align.ll @@ -10,12 +10,9 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[AGG_TMP_I_I_SROA_0:%.*]] = alloca [3 x double], align 16 ; CHECK-NEXT: [[STORE1:%.*]] = getelementptr inbounds [3 x double], [3 x double]* [[AGG_TMP_I_I_SROA_0]], i64 0, i64 1 -; CHECK-NEXT: [[STORE2:%.*]] = getelementptr inbounds [3 x double], [3 x double]* [[AGG_TMP_I_I_SROA_0]], i64 0, i64 2 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds double, double* [[B:%.*]], i64 1 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[A]] to <2 x double>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[A:%.*]] to <2 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[B]] to <2 x double>* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[B:%.*]] to <2 x double>* ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8 ; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[STORE1]] to <2 x double>* @@ -45,15 +42,9 @@ define void @test2(float * %a, float * %b) { ; CHECK-LABEL: @test2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[A1:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 1 -; CHECK-NEXT: [[A2:%.*]] = getelementptr inbounds float, float* [[A]], i64 2 -; CHECK-NEXT: [[A3:%.*]] = getelementptr inbounds float, float* [[A]], i64 3 -; CHECK-NEXT: [[B1:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1 -; CHECK-NEXT: [[B2:%.*]] = getelementptr inbounds float, float* [[B]], i64 2 -; CHECK-NEXT: [[B3:%.*]] = getelementptr inbounds float, float* [[B]], i64 3 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[A]] to <4 x float>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[A:%.*]] to <4 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[B]] to <4 x float>* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[B:%.*]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float>* [[TMP2]], align 4 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll @@ -4,38 +4,26 @@ define void @test(i32* %0, i32* %1, i32* %2) { ; CHECK-LABEL: @test( ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP0:%.*]], i64 4 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP1:%.*]], i64 1 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 1 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 5 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 2 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 2 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 6 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 3 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 3 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 7 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP2:%.*]], i64 2 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 1 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 3 -; CHECK-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP1]] to <4 x i32>* -; CHECK-NEXT: [[TMP18:%.*]] = load <4 x i32>, <4 x i32>* [[TMP17]], align 4 -; CHECK-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* -; CHECK-NEXT: [[TMP20:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4 -; CHECK-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP4]] to <4 x i32>* -; CHECK-NEXT: [[TMP22:%.*]] = load <4 x i32>, <4 x i32>* [[TMP21]], align 4 -; CHECK-NEXT: [[TMP23:%.*]] = sub <4 x i32> , [[TMP20]] -; CHECK-NEXT: [[TMP24:%.*]] = sub <4 x i32> [[TMP23]], [[TMP22]] -; CHECK-NEXT: [[TMP25:%.*]] = add <4 x i32> [[TMP24]], [[TMP18]] -; CHECK-NEXT: [[TMP26:%.*]] = add <4 x i32> [[TMP25]], -; CHECK-NEXT: [[TMP27:%.*]] = sub <4 x i32> [[TMP25]], -; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <4 x i32> [[TMP26]], <4 x i32> [[TMP27]], <4 x i32> -; CHECK-NEXT: [[TMP29:%.*]] = add <4 x i32> [[TMP28]], zeroinitializer -; CHECK-NEXT: [[TMP30:%.*]] = sub <4 x i32> [[TMP28]], zeroinitializer -; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <4 x i32> [[TMP29]], <4 x i32> [[TMP30]], <4 x i32> -; CHECK-NEXT: [[TMP32:%.*]] = add <4 x i32> [[TMP31]], zeroinitializer -; CHECK-NEXT: [[TMP33:%.*]] = sub <4 x i32> [[TMP31]], zeroinitializer -; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <4 x i32> [[TMP32]], <4 x i32> [[TMP33]], <4 x i32> -; CHECK-NEXT: [[TMP35:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP34]], <4 x i32>* [[TMP35]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP1:%.*]] to <4 x i32>* +; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP5]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* +; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP7]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP4]] to <4 x i32>* +; CHECK-NEXT: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = sub <4 x i32> , [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = sub <4 x i32> [[TMP11]], [[TMP10]] +; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i32> [[TMP12]], [[TMP6]] +; CHECK-NEXT: [[TMP14:%.*]] = add <4 x i32> [[TMP13]], +; CHECK-NEXT: [[TMP15:%.*]] = sub <4 x i32> [[TMP13]], +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], <4 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = add <4 x i32> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = sub <4 x i32> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x i32> [[TMP17]], <4 x i32> [[TMP18]], <4 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = add <4 x i32> [[TMP19]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = sub <4 x i32> [[TMP19]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i32> [[TMP20]], <4 x i32> [[TMP21]], <4 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = bitcast i32* [[TMP2:%.*]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP22]], <4 x i32>* [[TMP23]], align 4 ; CHECK-NEXT: ret void ; %4 = load i32, i32* %1, align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/broadcast.ll b/llvm/test/Transforms/SLPVectorizer/X86/broadcast.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/broadcast.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/broadcast.ll @@ -18,9 +18,6 @@ ; CHECK-NEXT: [[V1:%.*]] = sub i64 [[A0]], 1 ; CHECK-NEXT: [[V2:%.*]] = sub i64 [[B0]], 1 ; CHECK-NEXT: [[IDXS0:%.*]] = getelementptr inbounds i64, i64* [[S:%.*]], i64 0 -; CHECK-NEXT: [[IDXS1:%.*]] = getelementptr inbounds i64, i64* [[S]], i64 1 -; CHECK-NEXT: [[IDXS2:%.*]] = getelementptr inbounds i64, i64* [[S]], i64 2 -; CHECK-NEXT: [[IDXS3:%.*]] = getelementptr inbounds i64, i64* [[S]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> poison, i64 [[V1]], i32 0 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> poison, i64 [[V2]], i32 0 @@ -68,9 +65,6 @@ ; CHECK-NEXT: [[A0:%.*]] = load i16, i16* [[A:%.*]], align 8 ; CHECK-NEXT: [[V1:%.*]] = sext i16 [[A0]] to i32 ; CHECK-NEXT: [[IDXS0:%.*]] = getelementptr inbounds i32, i32* [[S:%.*]], i64 0 -; CHECK-NEXT: [[IDXS1:%.*]] = getelementptr inbounds i32, i32* [[S]], i64 1 -; CHECK-NEXT: [[IDXS2:%.*]] = getelementptr inbounds i32, i32* [[S]], i64 2 -; CHECK-NEXT: [[IDXS3:%.*]] = getelementptr inbounds i32, i32* [[S]], i64 3 ; CHECK-NEXT: [[B0:%.*]] = load i16, i16* [[B:%.*]], align 8 ; CHECK-NEXT: [[C0:%.*]] = load i16, i16* [[C:%.*]], align 8 ; CHECK-NEXT: [[D0:%.*]] = load i16, i16* [[D:%.*]], align 8 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/broadcast_long.ll b/llvm/test/Transforms/SLPVectorizer/X86/broadcast_long.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/broadcast_long.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/broadcast_long.ll @@ -17,13 +17,6 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A0:%.*]] = load i32, i32* [[A:%.*]], align 8 ; CHECK-NEXT: [[IDXS0:%.*]] = getelementptr inbounds i32, i32* [[S:%.*]], i64 0 -; CHECK-NEXT: [[IDXS1:%.*]] = getelementptr inbounds i32, i32* [[S]], i64 1 -; CHECK-NEXT: [[IDXS2:%.*]] = getelementptr inbounds i32, i32* [[S]], i64 2 -; CHECK-NEXT: [[IDXS3:%.*]] = getelementptr inbounds i32, i32* [[S]], i64 3 -; CHECK-NEXT: [[IDXS4:%.*]] = getelementptr inbounds i32, i32* [[S]], i64 4 -; CHECK-NEXT: [[IDXS5:%.*]] = getelementptr inbounds i32, i32* [[S]], i64 5 -; CHECK-NEXT: [[IDXS6:%.*]] = getelementptr inbounds i32, i32* [[S]], i64 6 -; CHECK-NEXT: [[IDXS7:%.*]] = getelementptr inbounds i32, i32* [[S]], i64 7 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> poison, i32 [[A0]], i32 0 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDXS0]] to <8 x i32>* diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll b/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll @@ -4,21 +4,20 @@ define void @test(double* %0, double %1) { ; CHECK-LABEL: @test( ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, double* [[TMP0:%.*]], i32 6 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, double* [[TMP0]], i32 7 -; CHECK-NEXT: br label [[TMP5:%.*]] -; CHECK: 5: -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> , double [[TMP1:%.*]], i32 1 -; CHECK-NEXT: br label [[TMP7:%.*]] -; CHECK: 7: -; CHECK-NEXT: [[TMP8:%.*]] = load double, double* null, align 8 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> , double [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP10:%.*]] = fcmp olt <2 x double> [[TMP6]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = select <2 x i1> [[TMP10]], <2 x double> zeroinitializer, <2 x double> zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = fmul <2 x double> [[TMP11]], zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = fadd <2 x double> zeroinitializer, [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = bitcast double* [[TMP3]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP13]], <2 x double>* [[TMP14]], align 8 -; CHECK-NEXT: br label [[TMP7]] +; CHECK-NEXT: br label [[TMP4:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> , double [[TMP1:%.*]], i32 1 +; CHECK-NEXT: br label [[TMP6:%.*]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = load double, double* null, align 8 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> , double [[TMP7]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = fcmp olt <2 x double> [[TMP5]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[TMP9]], <2 x double> zeroinitializer, <2 x double> zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = fmul <2 x double> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = fadd <2 x double> zeroinitializer, [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast double* [[TMP3]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP12]], <2 x double>* [[TMP13]], align 8 +; CHECK-NEXT: br label [[TMP6]] ; %3 = getelementptr inbounds double, double* %0, i32 6 %4 = getelementptr inbounds double, double* %0, i32 7 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/combined-stores-chains.ll b/llvm/test/Transforms/SLPVectorizer/X86/combined-stores-chains.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/combined-stores-chains.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/combined-stores-chains.ll @@ -8,21 +8,11 @@ ; CHECK-NEXT: [[T02:%.*]] = bitcast i8* [[V0]] to i64* ; CHECK-NEXT: [[T12:%.*]] = bitcast i8* [[V1]] to i64* ; CHECK-NEXT: [[T14:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 4 -; CHECK-NEXT: [[T18:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 5 -; CHECK-NEXT: [[T22:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 6 -; CHECK-NEXT: [[T26:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 7 ; CHECK-NEXT: [[T142:%.*]] = getelementptr inbounds i64, i64* [[T12]], i64 8 -; CHECK-NEXT: [[T182:%.*]] = getelementptr inbounds i64, i64* [[T12]], i64 9 ; CHECK-NEXT: [[T222:%.*]] = getelementptr inbounds i64, i64* [[T12]], i64 10 -; CHECK-NEXT: [[T262:%.*]] = getelementptr inbounds i64, i64* [[T12]], i64 11 ; CHECK-NEXT: [[T21:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 4 -; CHECK-NEXT: [[T25:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 5 -; CHECK-NEXT: [[T29:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 6 -; CHECK-NEXT: [[T32:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 7 ; CHECK-NEXT: [[T212:%.*]] = getelementptr inbounds i64, i64* [[T02]], i64 8 -; CHECK-NEXT: [[T252:%.*]] = getelementptr inbounds i64, i64* [[T02]], i64 9 ; CHECK-NEXT: [[T292:%.*]] = getelementptr inbounds i64, i64* [[T02]], i64 10 -; CHECK-NEXT: [[T322:%.*]] = getelementptr inbounds i64, i64* [[T02]], i64 11 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[T142]] to <2 x i64>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i64> [[TMP2]], diff --git a/llvm/test/Transforms/SLPVectorizer/X86/consecutive-access.ll b/llvm/test/Transforms/SLPVectorizer/X86/consecutive-access.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/consecutive-access.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/consecutive-access.ll @@ -18,10 +18,6 @@ ; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[MUL]] to i64 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 [[IDXPROM]] ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 [[IDXPROM]] -; CHECK-NEXT: [[ADD11:%.*]] = add nsw i32 [[MUL]], 1 -; CHECK-NEXT: [[IDXPROM12:%.*]] = sext i32 [[ADD11]] to i64 -; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 [[IDXPROM12]] -; CHECK-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 [[IDXPROM12]] ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[ARRAYIDX4]] to <2 x double>* @@ -82,10 +78,6 @@ ; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[MUL]] to i64 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 [[IDXPROM]] ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 [[IDXPROM]] -; CHECK-NEXT: [[ADD11:%.*]] = add nsw i32 [[MUL]], 1 -; CHECK-NEXT: [[IDXPROM12:%.*]] = sext i32 [[ADD11]] to i64 -; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 [[IDXPROM12]] -; CHECK-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 [[IDXPROM12]] ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[ARRAYIDX4]] to <2 x double>* @@ -128,18 +120,6 @@ ; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[MUL]] to i64 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2000 x float], [2000 x float]* @C, i32 0, i64 [[IDXPROM]] ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2000 x float], [2000 x float]* @D, i32 0, i64 [[IDXPROM]] -; CHECK-NEXT: [[ADD11:%.*]] = add nsw i32 [[MUL]], 1 -; CHECK-NEXT: [[IDXPROM12:%.*]] = sext i32 [[ADD11]] to i64 -; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [2000 x float], [2000 x float]* @C, i32 0, i64 [[IDXPROM12]] -; CHECK-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [2000 x float], [2000 x float]* @D, i32 0, i64 [[IDXPROM12]] -; CHECK-NEXT: [[ADD24:%.*]] = add nsw i32 [[MUL]], 2 -; CHECK-NEXT: [[IDXPROM25:%.*]] = sext i32 [[ADD24]] to i64 -; CHECK-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [2000 x float], [2000 x float]* @C, i32 0, i64 [[IDXPROM25]] -; CHECK-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds [2000 x float], [2000 x float]* @D, i32 0, i64 [[IDXPROM25]] -; CHECK-NEXT: [[ADD37:%.*]] = add nsw i32 [[MUL]], 3 -; CHECK-NEXT: [[IDXPROM38:%.*]] = sext i32 [[ADD37]] to i64 -; CHECK-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds [2000 x float], [2000 x float]* @C, i32 0, i64 [[IDXPROM38]] -; CHECK-NEXT: [[ARRAYIDX43:%.*]] = getelementptr inbounds [2000 x float], [2000 x float]* @D, i32 0, i64 [[IDXPROM38]] ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX4]] to <4 x float>* @@ -210,9 +190,6 @@ ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 2 ; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[MUL]] to i64 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[IDXPROM]] -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[MUL]], 1 -; CHECK-NEXT: [[IDXPROM3:%.*]] = sext i32 [[ADD]] to i64 -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[IDXPROM3]] ; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>* ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8 ; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> , [[TMP3]] @@ -292,10 +269,6 @@ ; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD6]] to i64 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 [[IDXPROM]] ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 [[IDXPROM]] -; CHECK-NEXT: [[ADD7:%.*]] = add i32 [[MUL]], 7 -; CHECK-NEXT: [[IDXPROM12:%.*]] = sext i32 [[ADD7]] to i64 -; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 [[IDXPROM12]] -; CHECK-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 [[IDXPROM12]] ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[ARRAYIDX4]] to <2 x double>* @@ -340,10 +313,6 @@ ; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[ADD6]] to i64 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 [[IDXPROM]] ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 [[IDXPROM]] -; CHECK-NEXT: [[ADD7:%.*]] = add i32 [[MUL]], 7 -; CHECK-NEXT: [[IDXPROM12:%.*]] = zext i32 [[ADD7]] to i64 -; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 [[IDXPROM12]] -; CHECK-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 [[IDXPROM12]] ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[ARRAYIDX4]] to <2 x double>* @@ -402,9 +371,6 @@ ; CHECK-NEXT: [[ADD_5:%.*]] = add i32 [[MUL]], 5 ; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD_5]] to i64 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[IDXPROM]] -; CHECK-NEXT: [[ADD_6:%.*]] = add i32 [[MUL]], 6 -; CHECK-NEXT: [[IDXPROM3:%.*]] = sext i32 [[ADD_6]] to i64 -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[IDXPROM3]] ; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>* ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8 ; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> , [[TMP3]] @@ -506,9 +472,6 @@ ; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x double> [ [[TMP6]], [[FOR_BODY]] ], [ zeroinitializer, [[ENTRY]] ] ; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[I_018]] to i64 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[IDXPROM]] -; CHECK-NEXT: [[ADD1:%.*]] = or i32 [[I_018]], 1 -; CHECK-NEXT: [[IDXPROM2:%.*]] = zext i32 [[ADD1]] to i64 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[IDXPROM2]] ; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>* ; CHECK-NEXT: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[TMP4]], align 8 ; CHECK-NEXT: [[TMP6]] = fadd <2 x double> [[TMP3]], [[TMP5]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/continue_vectorizing.ll b/llvm/test/Transforms/SLPVectorizer/X86/continue_vectorizing.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/continue_vectorizing.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/continue_vectorizing.ll @@ -8,15 +8,12 @@ define void @test1(double* %a, double* %b, double* %c, double* %d) { ; CHECK-LABEL: @test1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds double, double* [[B:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[C:%.*]], i64 1 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[A]] to <2 x double>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[A:%.*]] to <2 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[B]] to <2 x double>* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[B:%.*]] to <2 x double>* ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8 ; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[C]] to <2 x double>* +; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[C:%.*]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[A]] to <4 x i32>* ; CHECK-NEXT: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 8 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/control-dependence.ll b/llvm/test/Transforms/SLPVectorizer/X86/control-dependence.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/control-dependence.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/control-dependence.ll @@ -8,15 +8,12 @@ ; Base case with no interesting control dependencies define void @test_no_control(i64* %a, i64* %b, i64* %c) { ; CHECK-LABEL: @test_no_control( -; CHECK-NEXT: [[A2:%.*]] = getelementptr i64, i64* [[A:%.*]], i32 1 -; CHECK-NEXT: [[CA2:%.*]] = getelementptr i64, i64* [[C:%.*]], i32 1 -; CHECK-NEXT: [[B2:%.*]] = getelementptr i64, i64* [[B:%.*]], i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[A]] to <2 x i64>* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[C]] to <2 x i64>* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[C:%.*]] to <2 x i64>* ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[TMP3]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[B]] to <2 x i64>* +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>* ; CHECK-NEXT: store <2 x i64> [[TMP5]], <2 x i64>* [[TMP6]], align 4 ; CHECK-NEXT: ret void ; @@ -38,16 +35,14 @@ define void @test1(i64* %a, i64* %b, i64* %c) { ; CHECK-LABEL: @test1( -; CHECK-NEXT: [[A2:%.*]] = getelementptr i64, i64* [[A:%.*]], i32 1 ; CHECK-NEXT: [[C1:%.*]] = load i64, i64* [[C:%.*]], align 4 ; CHECK-NEXT: [[C2:%.*]] = call i64 @may_inf_loop_ro() -; CHECK-NEXT: [[B2:%.*]] = getelementptr i64, i64* [[B:%.*]], i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[A]] to <2 x i64>* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[C1]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[C2]], i32 1 ; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[B]] to <2 x i64>* +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>* ; CHECK-NEXT: store <2 x i64> [[TMP5]], <2 x i64>* [[TMP6]], align 4 ; CHECK-NEXT: ret void ; @@ -70,14 +65,12 @@ ; CHECK-LABEL: @test2( ; CHECK-NEXT: [[C1:%.*]] = load i64, i64* [[C:%.*]], align 4 ; CHECK-NEXT: [[C2:%.*]] = call i64 @may_inf_loop_ro() -; CHECK-NEXT: [[A2:%.*]] = getelementptr i64, i64* [[A:%.*]], i32 1 -; CHECK-NEXT: [[B2:%.*]] = getelementptr i64, i64* [[B:%.*]], i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[A]] to <2 x i64>* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[C1]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[C2]], i32 1 ; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[B]] to <2 x i64>* +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>* ; CHECK-NEXT: store <2 x i64> [[TMP5]], <2 x i64>* [[TMP6]], align 4 ; CHECK-NEXT: ret void ; @@ -100,15 +93,13 @@ define void @test3(i64* %a, i64* %b, i64* %c) { ; CHECK-LABEL: @test3( ; CHECK-NEXT: [[C1:%.*]] = load i64, i64* [[C:%.*]], align 4 -; CHECK-NEXT: [[A2:%.*]] = getelementptr i64, i64* [[A:%.*]], i32 1 ; CHECK-NEXT: [[C2:%.*]] = call i64 @may_inf_loop_ro() -; CHECK-NEXT: [[B2:%.*]] = getelementptr i64, i64* [[B:%.*]], i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[A]] to <2 x i64>* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[C1]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[C2]], i32 1 ; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[B]] to <2 x i64>* +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>* ; CHECK-NEXT: store <2 x i64> [[TMP5]], <2 x i64>* [[TMP6]], align 4 ; CHECK-NEXT: ret void ; @@ -131,14 +122,12 @@ ; CHECK-LABEL: @test4( ; CHECK-NEXT: [[C1:%.*]] = load i64, i64* [[C:%.*]], align 4 ; CHECK-NEXT: [[C2:%.*]] = call i64 @may_inf_loop_ro() -; CHECK-NEXT: [[A2:%.*]] = getelementptr i64, i64* [[A:%.*]], i32 1 -; CHECK-NEXT: [[B2:%.*]] = getelementptr i64, i64* [[B:%.*]], i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[A]] to <2 x i64>* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[C1]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[C2]], i32 1 ; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[B]] to <2 x i64>* +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>* ; CHECK-NEXT: store <2 x i64> [[TMP5]], <2 x i64>* [[TMP6]], align 4 ; CHECK-NEXT: ret void ; @@ -159,16 +148,14 @@ define void @test5(i64* %a, i64* %b, i64* %c) { ; CHECK-LABEL: @test5( -; CHECK-NEXT: [[A2:%.*]] = getelementptr i64, i64* [[A:%.*]], i32 1 ; CHECK-NEXT: [[C2:%.*]] = call i64 @may_inf_loop_ro() ; CHECK-NEXT: [[C1:%.*]] = load i64, i64* [[C:%.*]], align 4 -; CHECK-NEXT: [[B2:%.*]] = getelementptr i64, i64* [[B:%.*]], i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[A]] to <2 x i64>* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[C1]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[C2]], i32 1 ; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[B]] to <2 x i64>* +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>* ; CHECK-NEXT: store <2 x i64> [[TMP5]], <2 x i64>* [[TMP6]], align 4 ; CHECK-NEXT: ret void ; @@ -190,15 +177,12 @@ define void @test6(i64* %a, i64* %b, i64* %c) { ; CHECK-LABEL: @test6( ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @may_inf_loop_ro() -; CHECK-NEXT: [[A2:%.*]] = getelementptr i64, i64* [[A:%.*]], i32 1 -; CHECK-NEXT: [[CA2:%.*]] = getelementptr i64, i64* [[C:%.*]], i32 1 -; CHECK-NEXT: [[B2:%.*]] = getelementptr i64, i64* [[B:%.*]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[A]] to <2 x i64>* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>* ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[TMP2]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64* [[C]] to <2 x i64>* +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64* [[C:%.*]] to <2 x i64>* ; CHECK-NEXT: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[TMP4]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i64> [[TMP3]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64* [[B]] to <2 x i64>* +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>* ; CHECK-NEXT: store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 4 ; CHECK-NEXT: ret void ; @@ -228,18 +212,16 @@ define void @test7(i64* %a, i64* %b, i64* %c) { ; CHECK-LABEL: @test7( ; CHECK-NEXT: [[A2:%.*]] = getelementptr i64, i64* [[A:%.*]], i32 1 -; CHECK-NEXT: [[CA2:%.*]] = getelementptr i64, i64* [[C:%.*]], i32 1 -; CHECK-NEXT: [[B2:%.*]] = getelementptr i64, i64* [[B:%.*]], i32 1 ; CHECK-NEXT: [[V1:%.*]] = load i64, i64* [[A]], align 4 ; CHECK-NEXT: store i64 0, i64* [[A]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @may_inf_loop_ro() ; CHECK-NEXT: [[V2:%.*]] = load i64, i64* [[A2]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[C]] to <2 x i64>* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[C:%.*]] to <2 x i64>* ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[V1]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[TMP4]], i64 [[V2]], i32 1 ; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i64> [[TMP5]], [[TMP3]] -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64* [[B]] to <2 x i64>* +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>* ; CHECK-NEXT: store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 4 ; CHECK-NEXT: ret void ; @@ -265,18 +247,16 @@ define void @test8(i64* %a, i64* %b, i64* %c) { ; CHECK-LABEL: @test8( ; CHECK-NEXT: [[A2:%.*]] = getelementptr i64, i64* [[A:%.*]], i32 1 -; CHECK-NEXT: [[CA2:%.*]] = getelementptr i64, i64* [[C:%.*]], i32 1 -; CHECK-NEXT: [[B2:%.*]] = getelementptr i64, i64* [[B:%.*]], i32 1 ; CHECK-NEXT: [[V1:%.*]] = load i64, i64* [[A]], align 4 ; CHECK-NEXT: store i64 0, i64* [[A]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @may_throw() #[[ATTR4:[0-9]+]] ; CHECK-NEXT: [[V2:%.*]] = load i64, i64* [[A2]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[C]] to <2 x i64>* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[C:%.*]] to <2 x i64>* ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[V1]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[TMP4]], i64 [[V2]], i32 1 ; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i64> [[TMP5]], [[TMP3]] -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64* [[B]] to <2 x i64>* +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>* ; CHECK-NEXT: store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 4 ; CHECK-NEXT: ret void ; @@ -302,18 +282,16 @@ define void @test9(i64* %a, i64* %b, i64* %c) { ; CHECK-LABEL: @test9( ; CHECK-NEXT: [[A2:%.*]] = getelementptr i64, i64* [[A:%.*]], i32 1 -; CHECK-NEXT: [[CA2:%.*]] = getelementptr i64, i64* [[C:%.*]], i32 1 -; CHECK-NEXT: [[B2:%.*]] = getelementptr i64, i64* [[B:%.*]], i32 1 ; CHECK-NEXT: [[V1:%.*]] = load i64, i64* [[A]], align 4 ; CHECK-NEXT: store i64 0, i64* [[A]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @may_throw() ; CHECK-NEXT: [[V2:%.*]] = load i64, i64* [[A2]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[C]] to <2 x i64>* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[C:%.*]] to <2 x i64>* ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[V1]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[TMP4]], i64 [[V2]], i32 1 ; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i64> [[TMP5]], [[TMP3]] -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64* [[B]] to <2 x i64>* +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>* ; CHECK-NEXT: store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 4 ; CHECK-NEXT: ret void ; @@ -341,18 +319,16 @@ ; CHECK-NEXT: [[V1:%.*]] = load i64, i64* [[A:%.*]], align 4 ; CHECK-NEXT: [[A2:%.*]] = getelementptr i64, i64* [[A]], i32 1 ; CHECK-NEXT: [[V2:%.*]] = load i64, i64* [[A2]], align 4 -; CHECK-NEXT: [[CA2:%.*]] = getelementptr i64, i64* [[C:%.*]], i32 1 -; CHECK-NEXT: [[B2:%.*]] = getelementptr i64, i64* [[B:%.*]], i32 1 ; CHECK-NEXT: [[U1:%.*]] = udiv i64 200, [[V1]] ; CHECK-NEXT: store i64 [[U1]], i64* [[A]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @may_inf_loop_ro() ; CHECK-NEXT: [[U2:%.*]] = udiv i64 200, [[V2]] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[C]] to <2 x i64>* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[C:%.*]] to <2 x i64>* ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[U1]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[TMP4]], i64 [[U2]], i32 1 ; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i64> [[TMP5]], [[TMP3]] -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64* [[B]] to <2 x i64>* +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>* ; CHECK-NEXT: store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 4 ; CHECK-NEXT: ret void ; @@ -381,13 +357,11 @@ ; FIXME: This is wrong, we're hoisting a faulting udiv above an infinite loop. define void @test11(i64 %x, i64 %y, i64* %b, i64* %c) { ; CHECK-LABEL: @test11( -; CHECK-NEXT: [[CA2:%.*]] = getelementptr i64, i64* [[C:%.*]], i32 1 -; CHECK-NEXT: [[B2:%.*]] = getelementptr i64, i64* [[B:%.*]], i32 1 ; CHECK-NEXT: [[U1:%.*]] = udiv i64 200, [[X:%.*]] -; CHECK-NEXT: store i64 [[U1]], i64* [[B]], align 4 +; CHECK-NEXT: store i64 [[U1]], i64* [[B:%.*]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @may_inf_loop_ro() ; CHECK-NEXT: [[U2:%.*]] = udiv i64 200, [[Y:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[C]] to <2 x i64>* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[C:%.*]] to <2 x i64>* ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[U1]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[TMP4]], i64 [[U2]], i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll @@ -19,42 +19,37 @@ define void @bar() { ; CHECK-LABEL: @bar( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[TMP0:%.*]], %0* undef, i64 0, i32 1, i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[TMP0]], %0* undef, i64 0, i32 1, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[TMP0]], %0* undef, i64 0, i32 1, i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[TMP0]], %0* undef, i64 0, i32 1, i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[TMP0]], %0* undef, i64 0, i32 1, i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[TMP0]], %0* undef, i64 0, i32 1, i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[TMP0]], %0* undef, i64 0, i32 1, i32 1 -; CHECK-NEXT: br label [[TMP7:%.*]] -; CHECK: 7: -; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x double> [ , [[TMP0]] ], [ [[TMP11:%.*]], [[TMP21:%.*]] ], [ [[TMP11]], [[TMP18:%.*]] ], [ [[TMP11]], [[TMP18]] ] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast double* [[TMP1]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP8]], <2 x double>* [[TMP9]], align 8 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast double* [[TMP3]] to <2 x double>* -; CHECK-NEXT: [[TMP11]] = load <2 x double>, <2 x double>* [[TMP10]], align 8 +; CHECK-NEXT: br label [[TMP4:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x double> [ , [[TMP0]] ], [ [[TMP8:%.*]], [[TMP16:%.*]] ], [ [[TMP8]], [[TMP15:%.*]] ], [ [[TMP8]], [[TMP15]] ] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[TMP1]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast double* [[TMP2]] to <2 x double>* +; CHECK-NEXT: [[TMP8]] = load <2 x double>, <2 x double>* [[TMP7]], align 8 +; CHECK-NEXT: br i1 undef, label [[TMP9:%.*]], label [[TMP10:%.*]] +; CHECK: 9: +; CHECK-NEXT: ret void +; CHECK: 10: +; CHECK-NEXT: [[TMP11:%.*]] = bitcast double* [[TMP3]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP8]], <2 x double>* [[TMP11]], align 8 ; CHECK-NEXT: br i1 undef, label [[TMP12:%.*]], label [[TMP13:%.*]] ; CHECK: 12: -; CHECK-NEXT: ret void +; CHECK-NEXT: br label [[TMP13]] ; CHECK: 13: -; CHECK-NEXT: [[TMP14:%.*]] = bitcast double* [[TMP5]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP11]], <2 x double>* [[TMP14]], align 8 -; CHECK-NEXT: br i1 undef, label [[TMP15:%.*]], label [[TMP16:%.*]] +; CHECK-NEXT: br i1 undef, label [[TMP14:%.*]], label [[TMP15]] +; CHECK: 14: +; CHECK-NEXT: unreachable ; CHECK: 15: -; CHECK-NEXT: br label [[TMP16]] +; CHECK-NEXT: switch i32 undef, label [[TMP16]] [ +; CHECK-NEXT: i32 32, label [[TMP4]] +; CHECK-NEXT: i32 103, label [[TMP4]] +; CHECK-NEXT: ] ; CHECK: 16: -; CHECK-NEXT: br i1 undef, label [[TMP17:%.*]], label [[TMP18]] +; CHECK-NEXT: br i1 undef, label [[TMP4]], label [[TMP17:%.*]] ; CHECK: 17: ; CHECK-NEXT: unreachable -; CHECK: 18: -; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x double> [[TMP11]], i32 0 -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x double> [[TMP11]], i32 1 -; CHECK-NEXT: switch i32 undef, label [[TMP21]] [ -; CHECK-NEXT: i32 32, label [[TMP7]] -; CHECK-NEXT: i32 103, label [[TMP7]] -; CHECK-NEXT: ] -; CHECK: 21: -; CHECK-NEXT: br i1 undef, label [[TMP7]], label [[TMP22:%.*]] -; CHECK: 22: -; CHECK-NEXT: unreachable ; %1 = getelementptr inbounds %0, %0* undef, i64 0, i32 1, i32 0 %2 = getelementptr inbounds %0, %0* undef, i64 0, i32 1, i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cse.ll b/llvm/test/Transforms/SLPVectorizer/X86/cse.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/cse.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/cse.ll @@ -15,8 +15,6 @@ ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[G:%.*]], i64 5 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[G]], i64 6 -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[G]], i64 1 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], @@ -27,7 +25,6 @@ ; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds double, double* [[G]], i64 2 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 ; CHECK-NEXT: [[MUL11:%.*]] = fmul double [[TMP6]], 4.000000e+00 -; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[G]], i64 3 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[MUL11]], i32 1 ; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> [[TMP8]], @@ -68,10 +65,7 @@ ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[N:%.*]] to double -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds double, double* [[A]], i64 2 -; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[A]], i64 3 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[A]] to <4 x double>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[A:%.*]] to <4 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x double> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> poison, double [[CONV]], i32 0 @@ -128,31 +122,29 @@ ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, double* [[G:%.*]], i64 5 ; CHECK-NEXT: [[TMP3:%.*]] = load double, double* [[TMP2]], align 8 ; CHECK-NEXT: [[TMP4:%.*]] = fmul double [[TMP3]], 4.000000e+00 -; CHECK-NEXT: br i1 [[TMP1]], label [[TMP14:%.*]], label [[TMP5:%.*]] +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP13:%.*]], label [[TMP5:%.*]] ; CHECK: 5: ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, double* [[G]], i64 6 ; CHECK-NEXT: [[TMP7:%.*]] = load double, double* [[TMP6]], align 8 ; CHECK-NEXT: [[TMP8:%.*]] = fmul double [[TMP7]], 3.000000e+00 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, double* [[G]], i64 1 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> poison, double [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x double> [[TMP10]], double [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = fadd <2 x double> [[TMP11]], -; CHECK-NEXT: [[TMP13:%.*]] = bitcast double* [[G]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP12]], <2 x double>* [[TMP13]], align 8 -; CHECK-NEXT: br label [[TMP24:%.*]] -; CHECK: 14: -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, double* [[G]], i64 2 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds double, double* [[G]], i64 6 -; CHECK-NEXT: [[TMP17:%.*]] = load double, double* [[TMP16]], align 8 -; CHECK-NEXT: [[TMP18:%.*]] = fmul double [[TMP17]], 3.000000e+00 -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, double* [[G]], i64 3 -; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x double> poison, double [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP21:%.*]] = insertelement <2 x double> [[TMP20]], double [[TMP18]], i32 1 -; CHECK-NEXT: [[TMP22:%.*]] = fadd <2 x double> [[TMP21]], -; CHECK-NEXT: [[TMP23:%.*]] = bitcast double* [[TMP15]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP22]], <2 x double>* [[TMP23]], align 8 -; CHECK-NEXT: br label [[TMP24]] -; CHECK: 24: +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> poison, double [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> [[TMP9]], double [[TMP8]], i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP10]], +; CHECK-NEXT: [[TMP12:%.*]] = bitcast double* [[G]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP11]], <2 x double>* [[TMP12]], align 8 +; CHECK-NEXT: br label [[TMP22:%.*]] +; CHECK: 13: +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, double* [[G]], i64 2 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, double* [[G]], i64 6 +; CHECK-NEXT: [[TMP16:%.*]] = load double, double* [[TMP15]], align 8 +; CHECK-NEXT: [[TMP17:%.*]] = fmul double [[TMP16]], 3.000000e+00 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x double> poison, double [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x double> [[TMP18]], double [[TMP17]], i32 1 +; CHECK-NEXT: [[TMP20:%.*]] = fadd <2 x double> [[TMP19]], +; CHECK-NEXT: [[TMP21:%.*]] = bitcast double* [[TMP14]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP20]], <2 x double>* [[TMP21]], align 8 +; CHECK-NEXT: br label [[TMP22]] +; CHECK: 22: ; CHECK-NEXT: ret i32 undef ; %1 = icmp eq i32 %k, 0 @@ -200,10 +192,7 @@ ; CHECK-LABEL: @foo4( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[N:%.*]] to double -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds double, double* [[A]], i64 2 -; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[A]], i64 3 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[A]] to <4 x double>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[A:%.*]] to <4 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x double> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> poison, double [[CONV]], i32 0 @@ -254,8 +243,7 @@ ; CHECK-LABEL: @partial_mrg( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[N:%.*]] to double -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 1 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[A]] to <2 x double>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[A:%.*]] to <2 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[CONV]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[CONV]], i32 1 @@ -266,7 +254,6 @@ ; CHECK-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[IF_END:%.*]] ; CHECK: if.end: ; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[A]], i64 2 -; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[A]], i64 3 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[N]], 4 ; CHECK-NEXT: [[CONV12:%.*]] = sitofp i32 [[ADD]] to double ; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[ARRAYIDX7]] to <2 x double>* diff --git a/llvm/test/Transforms/SLPVectorizer/X86/diamond.ll b/llvm/test/Transforms/SLPVectorizer/X86/diamond.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/diamond.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/diamond.ll @@ -16,18 +16,12 @@ ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[MUL238:%.*]] = add i32 [[M:%.*]], [[N:%.*]] -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2 -; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2 -; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3 -; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to <4 x i32>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[MUL238]], i32 0 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP1]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[B]] to <4 x i32>* +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4 ; CHECK-NEXT: ret i32 0 ; @@ -67,18 +61,12 @@ ; CHECK-LABEL: @extr_user( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[MUL238:%.*]] = add i32 [[M:%.*]], [[N:%.*]] -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2 -; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2 -; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3 -; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to <4 x i32>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[MUL238]], i32 0 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP1]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[B]] to <4 x i32>* +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0 ; CHECK-NEXT: ret i32 [[TMP5]] @@ -111,18 +99,12 @@ ; CHECK-LABEL: @extr_user1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[MUL238:%.*]] = add i32 [[M:%.*]], [[N:%.*]] -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2 -; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2 -; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3 -; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to <4 x i32>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[MUL238]], i32 0 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP1]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[B]] to <4 x i32>* +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1 ; CHECK-NEXT: ret i32 [[TMP5]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/diamond_broadcast.ll b/llvm/test/Transforms/SLPVectorizer/X86/diamond_broadcast.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/diamond_broadcast.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/diamond_broadcast.ll @@ -5,13 +5,10 @@ ; CHECK-LABEL: @diamond_broadcast( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LD:%.*]] = load i32, i32* [[A:%.*]], align 4 -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2 -; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[LD]], i32 0 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i32> [[SHUFFLE]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[B]] to <4 x i32>* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4 ; CHECK-NEXT: ret i32 0 ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/diamond_broadcast_extra_shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/diamond_broadcast_extra_shuffle.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/diamond_broadcast_extra_shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/diamond_broadcast_extra_shuffle.ll @@ -5,13 +5,10 @@ ; CHECK-LABEL: @diamond_broadcast( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LD:%.*]] = load i32, i32* [[A:%.*]], align 4 -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2 -; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[LD]], i32 0 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i32> [[SHUFFLE]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[B]] to <4 x i32>* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4 ; CHECK-NEXT: ret i32 0 ; @@ -35,13 +32,10 @@ ; CHECK-LABEL: @diamond_broadcast2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LD:%.*]] = load i32, i32* [[A:%.*]], align 4 -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2 -; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[LD]], i32 0 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i32> [[SHUFFLE]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[B]] to <4 x i32>* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4 ; CHECK-NEXT: ret i32 0 ; @@ -65,13 +59,10 @@ ; CHECK-LABEL: @diamond_broadcast3( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LD:%.*]] = load i32, i32* [[A:%.*]], align 4 -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2 -; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[LD]], i32 0 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i32> [[SHUFFLE]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[B]] to <4 x i32>* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4 ; CHECK-NEXT: ret i32 0 ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/different-vec-widths.ll b/llvm/test/Transforms/SLPVectorizer/X86/different-vec-widths.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/different-vec-widths.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/different-vec-widths.ll @@ -13,17 +13,11 @@ define void @PR28457(double* noalias nocapture align 32 %q, double* noalias nocapture readonly align 32 %p) { ; SSE-LABEL: @PR28457( ; SSE-NEXT: [[P0:%.*]] = getelementptr inbounds double, double* [[P:%.*]], i64 0 -; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds double, double* [[P]], i64 1 ; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds double, double* [[P]], i64 2 -; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds double, double* [[P]], i64 3 ; SSE-NEXT: [[P4:%.*]] = getelementptr inbounds double, double* [[P]], i64 4 -; SSE-NEXT: [[P5:%.*]] = getelementptr inbounds double, double* [[P]], i64 5 ; SSE-NEXT: [[Q0:%.*]] = getelementptr inbounds double, double* [[Q:%.*]], i64 0 -; SSE-NEXT: [[Q1:%.*]] = getelementptr inbounds double, double* [[Q]], i64 1 ; SSE-NEXT: [[Q2:%.*]] = getelementptr inbounds double, double* [[Q]], i64 2 -; SSE-NEXT: [[Q3:%.*]] = getelementptr inbounds double, double* [[Q]], i64 3 ; SSE-NEXT: [[Q4:%.*]] = getelementptr inbounds double, double* [[Q]], i64 4 -; SSE-NEXT: [[Q5:%.*]] = getelementptr inbounds double, double* [[Q]], i64 5 ; SSE-NEXT: [[TMP1:%.*]] = bitcast double* [[P0]] to <2 x double>* ; SSE-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8 ; SSE-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], @@ -43,17 +37,9 @@ ; ; AVX-LABEL: @PR28457( ; AVX-NEXT: [[P0:%.*]] = getelementptr inbounds double, double* [[P:%.*]], i64 0 -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds double, double* [[P]], i64 1 -; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds double, double* [[P]], i64 2 -; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds double, double* [[P]], i64 3 ; AVX-NEXT: [[P4:%.*]] = getelementptr inbounds double, double* [[P]], i64 4 -; AVX-NEXT: [[P5:%.*]] = getelementptr inbounds double, double* [[P]], i64 5 ; AVX-NEXT: [[Q0:%.*]] = getelementptr inbounds double, double* [[Q:%.*]], i64 0 -; AVX-NEXT: [[Q1:%.*]] = getelementptr inbounds double, double* [[Q]], i64 1 -; AVX-NEXT: [[Q2:%.*]] = getelementptr inbounds double, double* [[Q]], i64 2 -; AVX-NEXT: [[Q3:%.*]] = getelementptr inbounds double, double* [[Q]], i64 3 ; AVX-NEXT: [[Q4:%.*]] = getelementptr inbounds double, double* [[Q]], i64 4 -; AVX-NEXT: [[Q5:%.*]] = getelementptr inbounds double, double* [[Q]], i64 5 ; AVX-NEXT: [[TMP1:%.*]] = bitcast double* [[P0]] to <4 x double>* ; AVX-NEXT: [[TMP2:%.*]] = load <4 x double>, <4 x double>* [[TMP1]], align 8 ; AVX-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP2]], diff --git a/llvm/test/Transforms/SLPVectorizer/X86/dot-product.ll b/llvm/test/Transforms/SLPVectorizer/X86/dot-product.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/dot-product.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/dot-product.ll @@ -10,12 +10,8 @@ define double @dot4f64(double* dereferenceable(32) %ptrx, double* dereferenceable(32) %ptry) { ; CHECK-LABEL: @dot4f64( -; CHECK-NEXT: [[PTRX1:%.*]] = getelementptr inbounds double, double* [[PTRX:%.*]], i64 1 -; CHECK-NEXT: [[PTRY1:%.*]] = getelementptr inbounds double, double* [[PTRY:%.*]], i64 1 -; CHECK-NEXT: [[PTRX2:%.*]] = getelementptr inbounds double, double* [[PTRX]], i64 2 -; CHECK-NEXT: [[PTRY2:%.*]] = getelementptr inbounds double, double* [[PTRY]], i64 2 -; CHECK-NEXT: [[PTRX3:%.*]] = getelementptr inbounds double, double* [[PTRX]], i64 3 -; CHECK-NEXT: [[PTRY3:%.*]] = getelementptr inbounds double, double* [[PTRY]], i64 3 +; CHECK-NEXT: [[PTRX2:%.*]] = getelementptr inbounds double, double* [[PTRX:%.*]], i64 2 +; CHECK-NEXT: [[PTRY2:%.*]] = getelementptr inbounds double, double* [[PTRY:%.*]], i64 2 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[PTRX]] to <2 x double>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[PTRY]] to <2 x double>* @@ -61,12 +57,8 @@ define float @dot4f32(float* dereferenceable(16) %ptrx, float* dereferenceable(16) %ptry) { ; CHECK-LABEL: @dot4f32( -; CHECK-NEXT: [[PTRX1:%.*]] = getelementptr inbounds float, float* [[PTRX:%.*]], i64 1 -; CHECK-NEXT: [[PTRY1:%.*]] = getelementptr inbounds float, float* [[PTRY:%.*]], i64 1 -; CHECK-NEXT: [[PTRX2:%.*]] = getelementptr inbounds float, float* [[PTRX]], i64 2 -; CHECK-NEXT: [[PTRY2:%.*]] = getelementptr inbounds float, float* [[PTRY]], i64 2 -; CHECK-NEXT: [[PTRX3:%.*]] = getelementptr inbounds float, float* [[PTRX]], i64 3 -; CHECK-NEXT: [[PTRY3:%.*]] = getelementptr inbounds float, float* [[PTRY]], i64 3 +; CHECK-NEXT: [[PTRX2:%.*]] = getelementptr inbounds float, float* [[PTRX:%.*]], i64 2 +; CHECK-NEXT: [[PTRY2:%.*]] = getelementptr inbounds float, float* [[PTRY:%.*]], i64 2 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[PTRX]] to <2 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[PTRY]] to <2 x float>* @@ -112,15 +104,9 @@ define double @dot4f64_fast(double* dereferenceable(32) %ptrx, double* dereferenceable(32) %ptry) { ; CHECK-LABEL: @dot4f64_fast( -; CHECK-NEXT: [[PTRX1:%.*]] = getelementptr inbounds double, double* [[PTRX:%.*]], i64 1 -; CHECK-NEXT: [[PTRY1:%.*]] = getelementptr inbounds double, double* [[PTRY:%.*]], i64 1 -; CHECK-NEXT: [[PTRX2:%.*]] = getelementptr inbounds double, double* [[PTRX]], i64 2 -; CHECK-NEXT: [[PTRY2:%.*]] = getelementptr inbounds double, double* [[PTRY]], i64 2 -; CHECK-NEXT: [[PTRX3:%.*]] = getelementptr inbounds double, double* [[PTRX]], i64 3 -; CHECK-NEXT: [[PTRY3:%.*]] = getelementptr inbounds double, double* [[PTRY]], i64 3 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[PTRX]] to <4 x double>* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[PTRX:%.*]] to <4 x double>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x double>, <4 x double>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[PTRY]] to <4 x double>* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[PTRY:%.*]] to <4 x double>* ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x double>, <4 x double>* [[TMP3]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x double> [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double -0.000000e+00, <4 x double> [[TMP5]]) @@ -152,15 +138,9 @@ define float @dot4f32_fast(float* dereferenceable(16) %ptrx, float* dereferenceable(16) %ptry) { ; CHECK-LABEL: @dot4f32_fast( -; CHECK-NEXT: [[PTRX1:%.*]] = getelementptr inbounds float, float* [[PTRX:%.*]], i64 1 -; CHECK-NEXT: [[PTRY1:%.*]] = getelementptr inbounds float, float* [[PTRY:%.*]], i64 1 -; CHECK-NEXT: [[PTRX2:%.*]] = getelementptr inbounds float, float* [[PTRX]], i64 2 -; CHECK-NEXT: [[PTRY2:%.*]] = getelementptr inbounds float, float* [[PTRY]], i64 2 -; CHECK-NEXT: [[PTRX3:%.*]] = getelementptr inbounds float, float* [[PTRX]], i64 3 -; CHECK-NEXT: [[PTRY3:%.*]] = getelementptr inbounds float, float* [[PTRY]], i64 3 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[PTRX]] to <4 x float>* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[PTRX:%.*]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[PTRY]] to <4 x float>* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[PTRY:%.*]] to <4 x float>* ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x float> [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]]) @@ -198,8 +178,6 @@ ; CHECK-LABEL: @dot3f64( ; CHECK-NEXT: [[PTRX1:%.*]] = getelementptr inbounds double, double* [[PTRX:%.*]], i64 1 ; CHECK-NEXT: [[PTRY1:%.*]] = getelementptr inbounds double, double* [[PTRY:%.*]], i64 1 -; CHECK-NEXT: [[PTRX2:%.*]] = getelementptr inbounds double, double* [[PTRX]], i64 2 -; CHECK-NEXT: [[PTRY2:%.*]] = getelementptr inbounds double, double* [[PTRY]], i64 2 ; CHECK-NEXT: [[X0:%.*]] = load double, double* [[PTRX]], align 4 ; CHECK-NEXT: [[Y0:%.*]] = load double, double* [[PTRY]], align 4 ; CHECK-NEXT: [[MUL0:%.*]] = fmul double [[X0]], [[Y0]] @@ -236,8 +214,6 @@ ; CHECK-LABEL: @dot3f32( ; CHECK-NEXT: [[PTRX1:%.*]] = getelementptr inbounds float, float* [[PTRX:%.*]], i64 1 ; CHECK-NEXT: [[PTRY1:%.*]] = getelementptr inbounds float, float* [[PTRY:%.*]], i64 1 -; CHECK-NEXT: [[PTRX2:%.*]] = getelementptr inbounds float, float* [[PTRX]], i64 2 -; CHECK-NEXT: [[PTRY2:%.*]] = getelementptr inbounds float, float* [[PTRY]], i64 2 ; CHECK-NEXT: [[X0:%.*]] = load float, float* [[PTRX]], align 4 ; CHECK-NEXT: [[Y0:%.*]] = load float, float* [[PTRY]], align 4 ; CHECK-NEXT: [[MUL0:%.*]] = fmul float [[X0]], [[Y0]] @@ -274,8 +250,6 @@ ; CHECK-LABEL: @dot3f64_fast( ; CHECK-NEXT: [[PTRX1:%.*]] = getelementptr inbounds double, double* [[PTRX:%.*]], i64 1 ; CHECK-NEXT: [[PTRY1:%.*]] = getelementptr inbounds double, double* [[PTRY:%.*]], i64 1 -; CHECK-NEXT: [[PTRX2:%.*]] = getelementptr inbounds double, double* [[PTRX]], i64 2 -; CHECK-NEXT: [[PTRY2:%.*]] = getelementptr inbounds double, double* [[PTRY]], i64 2 ; CHECK-NEXT: [[X0:%.*]] = load double, double* [[PTRX]], align 4 ; CHECK-NEXT: [[Y0:%.*]] = load double, double* [[PTRY]], align 4 ; CHECK-NEXT: [[MUL0:%.*]] = fmul double [[X0]], [[Y0]] @@ -312,8 +286,6 @@ ; CHECK-LABEL: @dot3f32_fast( ; CHECK-NEXT: [[PTRX1:%.*]] = getelementptr inbounds float, float* [[PTRX:%.*]], i64 1 ; CHECK-NEXT: [[PTRY1:%.*]] = getelementptr inbounds float, float* [[PTRY:%.*]], i64 1 -; CHECK-NEXT: [[PTRX2:%.*]] = getelementptr inbounds float, float* [[PTRX]], i64 2 -; CHECK-NEXT: [[PTRY2:%.*]] = getelementptr inbounds float, float* [[PTRY]], i64 2 ; CHECK-NEXT: [[X0:%.*]] = load float, float* [[PTRX]], align 4 ; CHECK-NEXT: [[Y0:%.*]] = load float, float* [[PTRY]], align 4 ; CHECK-NEXT: [[MUL0:%.*]] = fmul float [[X0]], [[Y0]] @@ -352,11 +324,9 @@ define double @dot2f64(double* dereferenceable(16) %ptrx, double* dereferenceable(16) %ptry) { ; CHECK-LABEL: @dot2f64( -; CHECK-NEXT: [[PTRX1:%.*]] = getelementptr inbounds double, double* [[PTRX:%.*]], i64 1 -; CHECK-NEXT: [[PTRY1:%.*]] = getelementptr inbounds double, double* [[PTRY:%.*]], i64 1 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[PTRX]] to <2 x double>* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[PTRX:%.*]] to <2 x double>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[PTRY]] to <2 x double>* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[PTRY:%.*]] to <2 x double>* ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[TMP3]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 0 @@ -378,11 +348,9 @@ define float @dot2f32(float* dereferenceable(16) %ptrx, float* dereferenceable(16) %ptry) { ; CHECK-LABEL: @dot2f32( -; CHECK-NEXT: [[PTRX1:%.*]] = getelementptr inbounds float, float* [[PTRX:%.*]], i64 1 -; CHECK-NEXT: [[PTRY1:%.*]] = getelementptr inbounds float, float* [[PTRY:%.*]], i64 1 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[PTRX]] to <2 x float>* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[PTRX:%.*]] to <2 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[PTRY]] to <2 x float>* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[PTRY:%.*]] to <2 x float>* ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x float> [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP5]], i32 0 @@ -404,11 +372,9 @@ define double @dot2f64_fast(double* dereferenceable(16) %ptrx, double* dereferenceable(16) %ptry) { ; CHECK-LABEL: @dot2f64_fast( -; CHECK-NEXT: [[PTRX1:%.*]] = getelementptr inbounds double, double* [[PTRX:%.*]], i64 1 -; CHECK-NEXT: [[PTRY1:%.*]] = getelementptr inbounds double, double* [[PTRY:%.*]], i64 1 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[PTRX]] to <2 x double>* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[PTRX:%.*]] to <2 x double>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[PTRY]] to <2 x double>* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[PTRY:%.*]] to <2 x double>* ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[TMP3]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 0 @@ -430,11 +396,9 @@ define float @dot2f32_fast(float* dereferenceable(16) %ptrx, float* dereferenceable(16) %ptry) { ; CHECK-LABEL: @dot2f32_fast( -; CHECK-NEXT: [[PTRX1:%.*]] = getelementptr inbounds float, float* [[PTRX:%.*]], i64 1 -; CHECK-NEXT: [[PTRY1:%.*]] = getelementptr inbounds float, float* [[PTRY:%.*]], i64 1 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[PTRX]] to <2 x float>* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[PTRX:%.*]] to <2 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[PTRY]] to <2 x float>* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[PTRY:%.*]] to <2 x float>* ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x float> [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP5]], i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/external_user_jumbled_load-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/external_user_jumbled_load-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/external_user_jumbled_load-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/external_user_jumbled_load-inseltpoison.ll @@ -7,13 +7,10 @@ ; CHECK-LABEL: @hoge( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [20 x [13 x i32]], [20 x [13 x i32]]* @array, i64 0, i64 [[IDX:%.*]], i64 5 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [20 x [13 x i32]], [20 x [13 x i32]]* @array, i64 0, i64 [[IDX]], i64 6 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [20 x [13 x i32]], [20 x [13 x i32]]* @array, i64 0, i64 [[IDX]], i64 7 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [20 x [13 x i32]], [20 x [13 x i32]]* @array, i64 0, i64 [[IDX]], i64 8 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* -; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[SINK:%.*]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[SINK:%.*]], align 16 ; CHECK-NEXT: ret void ; bb: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/external_user_jumbled_load.ll b/llvm/test/Transforms/SLPVectorizer/X86/external_user_jumbled_load.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/external_user_jumbled_load.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/external_user_jumbled_load.ll @@ -7,13 +7,10 @@ ; CHECK-LABEL: @hoge( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [20 x [13 x i32]], [20 x [13 x i32]]* @array, i64 0, i64 [[IDX:%.*]], i64 5 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [20 x [13 x i32]], [20 x [13 x i32]]* @array, i64 0, i64 [[IDX]], i64 6 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [20 x [13 x i32]], [20 x [13 x i32]]* @array, i64 0, i64 [[IDX]], i64 7 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [20 x [13 x i32]], [20 x [13 x i32]]* @array, i64 0, i64 [[IDX]], i64 8 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* -; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[SINK:%.*]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[SINK:%.*]], align 16 ; CHECK-NEXT: ret void ; bb: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll @@ -10,7 +10,6 @@ ; CHECK-LABEL: @fn1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64*, i64** @a, align 8 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, i64* [[TMP0]], i64 12 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64*> poison, i64* [[TMP0]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64*> [[TMP1]], i64* [[TMP0]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, <2 x i64*> [[TMP2]], <2 x i64> @@ -36,24 +35,15 @@ define void @fn2(i32* %a, i32* %b, float* %c) { ; CHECK-LABEL: @fn2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 1 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 1 -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 2 -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 2 -; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 3 -; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 3 -; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, float* [[C:%.*]], i32 1 -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[C]], i32 2 -; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[C]], i32 3 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to <4 x i32>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[B]] to <4 x i32>* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = sitofp <4 x i32> [[TMP4]] to <4 x float> ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> [[TMP5]], i32 [[TMP6]]) -; CHECK-NEXT: [[TMP8:%.*]] = bitcast float* [[C]] to <4 x float>* +; CHECK-NEXT: [[TMP8:%.*]] = bitcast float* [[C:%.*]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP7]], <4 x float>* [[TMP8]], align 4 ; CHECK-NEXT: ret void ; @@ -103,7 +93,6 @@ ; CHECK-LABEL: @externally_used_ptrs( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64*, i64** @a, align 8 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, i64* [[TMP0]], i64 12 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64*> poison, i64* [[TMP0]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64*> [[TMP1]], i64* [[TMP0]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, <2 x i64*> [[TMP2]], <2 x i64> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll b/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll @@ -340,10 +340,7 @@ define float @reduction_v4f32_fast(float* %p) { ; CHECK-LABEL: @reduction_v4f32_fast( -; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 -; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 -; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[TMP2]]) ; CHECK-NEXT: ret float [[TMP3]] @@ -363,10 +360,7 @@ define float @reduction_v4f32_nnan(float* %p) { ; CHECK-LABEL: @reduction_v4f32_nnan( -; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 -; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 -; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = call nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[TMP2]]) ; CHECK-NEXT: ret float [[TMP3]] @@ -415,14 +409,7 @@ define float @reduction_v8f32_fast(float* %p) { ; CHECK-LABEL: @reduction_v8f32_fast( -; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 -; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 -; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 -; CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds float, float* [[P]], i64 4 -; CHECK-NEXT: [[G5:%.*]] = getelementptr inbounds float, float* [[P]], i64 5 -; CHECK-NEXT: [[G6:%.*]] = getelementptr inbounds float, float* [[P]], i64 6 -; CHECK-NEXT: [[G7:%.*]] = getelementptr inbounds float, float* [[P]], i64 7 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P]] to <8 x float>* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <8 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = call fast float @llvm.vector.reduce.fmax.v8f32(<8 x float> [[TMP2]]) ; CHECK-NEXT: ret float [[TMP3]] @@ -469,10 +456,7 @@ define double @reduction_v4f64_fast(double* %p) { ; CHECK-LABEL: @reduction_v4f64_fast( -; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds double, double* [[P:%.*]], i64 1 -; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds double, double* [[P]], i64 2 -; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds double, double* [[P]], i64 3 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[P]] to <4 x double>* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[P:%.*]] to <4 x double>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x double>, <4 x double>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = call fast double @llvm.vector.reduce.fmax.v4f64(<4 x double> [[TMP2]]) ; CHECK-NEXT: ret double [[TMP3]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll b/llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll @@ -340,10 +340,7 @@ define float @reduction_v4f32_fast(float* %p) { ; CHECK-LABEL: @reduction_v4f32_fast( -; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 -; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 -; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[TMP2]]) ; CHECK-NEXT: ret float [[TMP3]] @@ -363,10 +360,7 @@ define float @reduction_v4f32_nnan(float* %p) { ; CHECK-LABEL: @reduction_v4f32_nnan( -; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 -; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 -; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[TMP2]]) ; CHECK-NEXT: ret float [[TMP3]] @@ -415,14 +409,7 @@ define float @reduction_v8f32_fast(float* %p) { ; CHECK-LABEL: @reduction_v8f32_fast( -; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 -; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 -; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 -; CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds float, float* [[P]], i64 4 -; CHECK-NEXT: [[G5:%.*]] = getelementptr inbounds float, float* [[P]], i64 5 -; CHECK-NEXT: [[G6:%.*]] = getelementptr inbounds float, float* [[P]], i64 6 -; CHECK-NEXT: [[G7:%.*]] = getelementptr inbounds float, float* [[P]], i64 7 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P]] to <8 x float>* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <8 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = call fast float @llvm.vector.reduce.fmin.v8f32(<8 x float> [[TMP2]]) ; CHECK-NEXT: ret float [[TMP3]] @@ -469,10 +456,7 @@ define double @reduction_v4f64_fast(double* %p) { ; CHECK-LABEL: @reduction_v4f64_fast( -; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds double, double* [[P:%.*]], i64 1 -; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds double, double* [[P]], i64 2 -; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds double, double* [[P]], i64 3 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[P]] to <4 x double>* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[P:%.*]] to <4 x double>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x double>, <4 x double>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = call fast double @llvm.vector.reduce.fmin.v4f64(<4 x double> [[TMP2]]) ; CHECK-NEXT: ret double [[TMP3]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/funclet.ll b/llvm/test/Transforms/SLPVectorizer/X86/funclet.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/funclet.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/funclet.ll @@ -12,16 +12,13 @@ ; CHECK-NEXT: [[TMP0:%.*]] = catchswitch within none [label %catch] unwind to caller ; CHECK: catch: ; CHECK-NEXT: [[TMP1:%.*]] = catchpad within [[TMP0]] [i8* null, i32 64, i8* null] -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds double, double* [[B:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[C:%.*]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[A]] to <2 x double>* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[A:%.*]] to <2 x double>* ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[B]] to <2 x double>* +; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[B:%.*]] to <2 x double>* ; CHECK-NEXT: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[TMP4]], align 8 ; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[TMP3]], [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[TMP6]]) [ "funclet"(token [[TMP1]]) ] -; CHECK-NEXT: [[TMP8:%.*]] = bitcast double* [[C]] to <2 x double>* +; CHECK-NEXT: [[TMP8:%.*]] = bitcast double* [[C:%.*]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP7]], <2 x double>* [[TMP8]], align 8 ; CHECK-NEXT: catchret from [[TMP1]] to label [[TRY_CONT:%.*]] ; CHECK: try.cont: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll b/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll @@ -13,9 +13,6 @@ ; CHECK-NEXT: br label [[FOR_BODY92:%.*]] ; CHECK: for.body92: ; CHECK-NEXT: [[SUM_MVR_I:%.*]] = getelementptr i32, i32* undef, i32 0 -; CHECK-NEXT: [[SUM_MVR_ABS_I:%.*]] = getelementptr i32, i32* undef, i32 2 -; CHECK-NEXT: [[SUM_MVC_I:%.*]] = getelementptr i32, i32* undef, i32 1 -; CHECK-NEXT: [[SUM_MVC_ABS_I:%.*]] = getelementptr i32, i32* undef, i32 3 ; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> zeroinitializer, [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[SUM_MVR_I]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 8 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gep.ll b/llvm/test/Transforms/SLPVectorizer/X86/gep.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/gep.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/gep.ll @@ -13,13 +13,11 @@ ; CHECK-LABEL: @foo1( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* [[Y:%.*]], i64 0, i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* [[X:%.*]], i64 0, i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* [[Y]], i64 0, i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* [[X]], i64 0, i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32** [[TMP1]] to <2 x i32*>* -; CHECK-NEXT: [[TMP6:%.*]] = load <2 x i32*>, <2 x i32*>* [[TMP5]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, <2 x i32*> [[TMP6]], <2 x i64> -; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32** [[TMP2]] to <2 x i32*>* -; CHECK-NEXT: store <2 x i32*> [[TMP7]], <2 x i32*>* [[TMP8]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32** [[TMP1]] to <2 x i32*>* +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i32*>, <2 x i32*>* [[TMP3]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, <2 x i32*> [[TMP4]], <2 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32** [[TMP2]] to <2 x i32*>* +; CHECK-NEXT: store <2 x i32*> [[TMP5]], <2 x i32*>* [[TMP6]], align 8 ; CHECK-NEXT: ret void ; %1 = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* %y, i64 0, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll @@ -343,55 +343,9 @@ define float @f(float* nocapture readonly %x) { ; CHECK-LABEL: @f( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 -; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 -; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 -; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 -; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 -; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 -; CHECK-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 8 -; CHECK-NEXT: [[ARRAYIDX_9:%.*]] = getelementptr inbounds float, float* [[X]], i64 9 -; CHECK-NEXT: [[ARRAYIDX_10:%.*]] = getelementptr inbounds float, float* [[X]], i64 10 -; CHECK-NEXT: [[ARRAYIDX_11:%.*]] = getelementptr inbounds float, float* [[X]], i64 11 -; CHECK-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds float, float* [[X]], i64 12 -; CHECK-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 13 -; CHECK-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 14 -; CHECK-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 15 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <16 x float>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <16 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[TMP0]], align 4 ; CHECK-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 16 -; CHECK-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 17 -; CHECK-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 18 -; CHECK-NEXT: [[ARRAYIDX_19:%.*]] = getelementptr inbounds float, float* [[X]], i64 19 -; CHECK-NEXT: [[ARRAYIDX_20:%.*]] = getelementptr inbounds float, float* [[X]], i64 20 -; CHECK-NEXT: [[ARRAYIDX_21:%.*]] = getelementptr inbounds float, float* [[X]], i64 21 -; CHECK-NEXT: [[ARRAYIDX_22:%.*]] = getelementptr inbounds float, float* [[X]], i64 22 -; CHECK-NEXT: [[ARRAYIDX_23:%.*]] = getelementptr inbounds float, float* [[X]], i64 23 -; CHECK-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 24 -; CHECK-NEXT: [[ARRAYIDX_25:%.*]] = getelementptr inbounds float, float* [[X]], i64 25 -; CHECK-NEXT: [[ARRAYIDX_26:%.*]] = getelementptr inbounds float, float* [[X]], i64 26 -; CHECK-NEXT: [[ARRAYIDX_27:%.*]] = getelementptr inbounds float, float* [[X]], i64 27 -; CHECK-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 28 -; CHECK-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 29 -; CHECK-NEXT: [[ARRAYIDX_30:%.*]] = getelementptr inbounds float, float* [[X]], i64 30 -; CHECK-NEXT: [[ARRAYIDX_31:%.*]] = getelementptr inbounds float, float* [[X]], i64 31 -; CHECK-NEXT: [[ARRAYIDX_32:%.*]] = getelementptr inbounds float, float* [[X]], i64 32 -; CHECK-NEXT: [[ARRAYIDX_33:%.*]] = getelementptr inbounds float, float* [[X]], i64 33 -; CHECK-NEXT: [[ARRAYIDX_34:%.*]] = getelementptr inbounds float, float* [[X]], i64 34 -; CHECK-NEXT: [[ARRAYIDX_35:%.*]] = getelementptr inbounds float, float* [[X]], i64 35 -; CHECK-NEXT: [[ARRAYIDX_36:%.*]] = getelementptr inbounds float, float* [[X]], i64 36 -; CHECK-NEXT: [[ARRAYIDX_37:%.*]] = getelementptr inbounds float, float* [[X]], i64 37 -; CHECK-NEXT: [[ARRAYIDX_38:%.*]] = getelementptr inbounds float, float* [[X]], i64 38 -; CHECK-NEXT: [[ARRAYIDX_39:%.*]] = getelementptr inbounds float, float* [[X]], i64 39 -; CHECK-NEXT: [[ARRAYIDX_40:%.*]] = getelementptr inbounds float, float* [[X]], i64 40 -; CHECK-NEXT: [[ARRAYIDX_41:%.*]] = getelementptr inbounds float, float* [[X]], i64 41 -; CHECK-NEXT: [[ARRAYIDX_42:%.*]] = getelementptr inbounds float, float* [[X]], i64 42 -; CHECK-NEXT: [[ARRAYIDX_43:%.*]] = getelementptr inbounds float, float* [[X]], i64 43 -; CHECK-NEXT: [[ARRAYIDX_44:%.*]] = getelementptr inbounds float, float* [[X]], i64 44 -; CHECK-NEXT: [[ARRAYIDX_45:%.*]] = getelementptr inbounds float, float* [[X]], i64 45 -; CHECK-NEXT: [[ARRAYIDX_46:%.*]] = getelementptr inbounds float, float* [[X]], i64 46 -; CHECK-NEXT: [[ARRAYIDX_47:%.*]] = getelementptr inbounds float, float* [[X]], i64 47 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_16]] to <32 x float>* ; CHECK-NEXT: [[TMP3:%.*]] = load <32 x float>, <32 x float>* [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float -0.000000e+00, <32 x float> [[TMP3]]) @@ -401,55 +355,9 @@ ; ; THRESHOLD-LABEL: @f( ; THRESHOLD-NEXT: entry: -; THRESHOLD-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 -; THRESHOLD-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 -; THRESHOLD-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 -; THRESHOLD-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 -; THRESHOLD-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 -; THRESHOLD-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 -; THRESHOLD-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 -; THRESHOLD-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 8 -; THRESHOLD-NEXT: [[ARRAYIDX_9:%.*]] = getelementptr inbounds float, float* [[X]], i64 9 -; THRESHOLD-NEXT: [[ARRAYIDX_10:%.*]] = getelementptr inbounds float, float* [[X]], i64 10 -; THRESHOLD-NEXT: [[ARRAYIDX_11:%.*]] = getelementptr inbounds float, float* [[X]], i64 11 -; THRESHOLD-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds float, float* [[X]], i64 12 -; THRESHOLD-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 13 -; THRESHOLD-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 14 -; THRESHOLD-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 15 -; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <16 x float>* +; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <16 x float>* ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[TMP0]], align 4 ; THRESHOLD-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 16 -; THRESHOLD-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 17 -; THRESHOLD-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 18 -; THRESHOLD-NEXT: [[ARRAYIDX_19:%.*]] = getelementptr inbounds float, float* [[X]], i64 19 -; THRESHOLD-NEXT: [[ARRAYIDX_20:%.*]] = getelementptr inbounds float, float* [[X]], i64 20 -; THRESHOLD-NEXT: [[ARRAYIDX_21:%.*]] = getelementptr inbounds float, float* [[X]], i64 21 -; THRESHOLD-NEXT: [[ARRAYIDX_22:%.*]] = getelementptr inbounds float, float* [[X]], i64 22 -; THRESHOLD-NEXT: [[ARRAYIDX_23:%.*]] = getelementptr inbounds float, float* [[X]], i64 23 -; THRESHOLD-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 24 -; THRESHOLD-NEXT: [[ARRAYIDX_25:%.*]] = getelementptr inbounds float, float* [[X]], i64 25 -; THRESHOLD-NEXT: [[ARRAYIDX_26:%.*]] = getelementptr inbounds float, float* [[X]], i64 26 -; THRESHOLD-NEXT: [[ARRAYIDX_27:%.*]] = getelementptr inbounds float, float* [[X]], i64 27 -; THRESHOLD-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 28 -; THRESHOLD-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 29 -; THRESHOLD-NEXT: [[ARRAYIDX_30:%.*]] = getelementptr inbounds float, float* [[X]], i64 30 -; THRESHOLD-NEXT: [[ARRAYIDX_31:%.*]] = getelementptr inbounds float, float* [[X]], i64 31 -; THRESHOLD-NEXT: [[ARRAYIDX_32:%.*]] = getelementptr inbounds float, float* [[X]], i64 32 -; THRESHOLD-NEXT: [[ARRAYIDX_33:%.*]] = getelementptr inbounds float, float* [[X]], i64 33 -; THRESHOLD-NEXT: [[ARRAYIDX_34:%.*]] = getelementptr inbounds float, float* [[X]], i64 34 -; THRESHOLD-NEXT: [[ARRAYIDX_35:%.*]] = getelementptr inbounds float, float* [[X]], i64 35 -; THRESHOLD-NEXT: [[ARRAYIDX_36:%.*]] = getelementptr inbounds float, float* [[X]], i64 36 -; THRESHOLD-NEXT: [[ARRAYIDX_37:%.*]] = getelementptr inbounds float, float* [[X]], i64 37 -; THRESHOLD-NEXT: [[ARRAYIDX_38:%.*]] = getelementptr inbounds float, float* [[X]], i64 38 -; THRESHOLD-NEXT: [[ARRAYIDX_39:%.*]] = getelementptr inbounds float, float* [[X]], i64 39 -; THRESHOLD-NEXT: [[ARRAYIDX_40:%.*]] = getelementptr inbounds float, float* [[X]], i64 40 -; THRESHOLD-NEXT: [[ARRAYIDX_41:%.*]] = getelementptr inbounds float, float* [[X]], i64 41 -; THRESHOLD-NEXT: [[ARRAYIDX_42:%.*]] = getelementptr inbounds float, float* [[X]], i64 42 -; THRESHOLD-NEXT: [[ARRAYIDX_43:%.*]] = getelementptr inbounds float, float* [[X]], i64 43 -; THRESHOLD-NEXT: [[ARRAYIDX_44:%.*]] = getelementptr inbounds float, float* [[X]], i64 44 -; THRESHOLD-NEXT: [[ARRAYIDX_45:%.*]] = getelementptr inbounds float, float* [[X]], i64 45 -; THRESHOLD-NEXT: [[ARRAYIDX_46:%.*]] = getelementptr inbounds float, float* [[X]], i64 46 -; THRESHOLD-NEXT: [[ARRAYIDX_47:%.*]] = getelementptr inbounds float, float* [[X]], i64 47 ; THRESHOLD-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_16]] to <32 x float>* ; THRESHOLD-NEXT: [[TMP3:%.*]] = load <32 x float>, <32 x float>* [[TMP2]], align 4 ; THRESHOLD-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float -0.000000e+00, <32 x float> [[TMP3]]) @@ -608,38 +516,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[REM:%.*]] = srem i32 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[REM]] to float -; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 -; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 -; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 -; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 -; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 -; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 -; CHECK-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 8 -; CHECK-NEXT: [[ARRAYIDX_9:%.*]] = getelementptr inbounds float, float* [[X]], i64 9 -; CHECK-NEXT: [[ARRAYIDX_10:%.*]] = getelementptr inbounds float, float* [[X]], i64 10 -; CHECK-NEXT: [[ARRAYIDX_11:%.*]] = getelementptr inbounds float, float* [[X]], i64 11 -; CHECK-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds float, float* [[X]], i64 12 -; CHECK-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 13 -; CHECK-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 14 -; CHECK-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 15 -; CHECK-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 16 -; CHECK-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 17 -; CHECK-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 18 -; CHECK-NEXT: [[ARRAYIDX_19:%.*]] = getelementptr inbounds float, float* [[X]], i64 19 -; CHECK-NEXT: [[ARRAYIDX_20:%.*]] = getelementptr inbounds float, float* [[X]], i64 20 -; CHECK-NEXT: [[ARRAYIDX_21:%.*]] = getelementptr inbounds float, float* [[X]], i64 21 -; CHECK-NEXT: [[ARRAYIDX_22:%.*]] = getelementptr inbounds float, float* [[X]], i64 22 -; CHECK-NEXT: [[ARRAYIDX_23:%.*]] = getelementptr inbounds float, float* [[X]], i64 23 -; CHECK-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 24 -; CHECK-NEXT: [[ARRAYIDX_25:%.*]] = getelementptr inbounds float, float* [[X]], i64 25 -; CHECK-NEXT: [[ARRAYIDX_26:%.*]] = getelementptr inbounds float, float* [[X]], i64 26 -; CHECK-NEXT: [[ARRAYIDX_27:%.*]] = getelementptr inbounds float, float* [[X]], i64 27 -; CHECK-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 28 -; CHECK-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 29 -; CHECK-NEXT: [[ARRAYIDX_30:%.*]] = getelementptr inbounds float, float* [[X]], i64 30 -; CHECK-NEXT: [[ARRAYIDX_31:%.*]] = getelementptr inbounds float, float* [[X]], i64 31 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <32 x float>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <32 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x float>, <32 x float>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float -0.000000e+00, <32 x float> [[TMP1]]) ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[CONV]] @@ -649,38 +526,7 @@ ; THRESHOLD-NEXT: entry: ; THRESHOLD-NEXT: [[REM:%.*]] = srem i32 [[A:%.*]], [[B:%.*]] ; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[REM]] to float -; THRESHOLD-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 -; THRESHOLD-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 -; THRESHOLD-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 -; THRESHOLD-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 -; THRESHOLD-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 -; THRESHOLD-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 -; THRESHOLD-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 -; THRESHOLD-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 8 -; THRESHOLD-NEXT: [[ARRAYIDX_9:%.*]] = getelementptr inbounds float, float* [[X]], i64 9 -; THRESHOLD-NEXT: [[ARRAYIDX_10:%.*]] = getelementptr inbounds float, float* [[X]], i64 10 -; THRESHOLD-NEXT: [[ARRAYIDX_11:%.*]] = getelementptr inbounds float, float* [[X]], i64 11 -; THRESHOLD-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds float, float* [[X]], i64 12 -; THRESHOLD-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 13 -; THRESHOLD-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 14 -; THRESHOLD-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 15 -; THRESHOLD-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 16 -; THRESHOLD-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 17 -; THRESHOLD-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 18 -; THRESHOLD-NEXT: [[ARRAYIDX_19:%.*]] = getelementptr inbounds float, float* [[X]], i64 19 -; THRESHOLD-NEXT: [[ARRAYIDX_20:%.*]] = getelementptr inbounds float, float* [[X]], i64 20 -; THRESHOLD-NEXT: [[ARRAYIDX_21:%.*]] = getelementptr inbounds float, float* [[X]], i64 21 -; THRESHOLD-NEXT: [[ARRAYIDX_22:%.*]] = getelementptr inbounds float, float* [[X]], i64 22 -; THRESHOLD-NEXT: [[ARRAYIDX_23:%.*]] = getelementptr inbounds float, float* [[X]], i64 23 -; THRESHOLD-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 24 -; THRESHOLD-NEXT: [[ARRAYIDX_25:%.*]] = getelementptr inbounds float, float* [[X]], i64 25 -; THRESHOLD-NEXT: [[ARRAYIDX_26:%.*]] = getelementptr inbounds float, float* [[X]], i64 26 -; THRESHOLD-NEXT: [[ARRAYIDX_27:%.*]] = getelementptr inbounds float, float* [[X]], i64 27 -; THRESHOLD-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 28 -; THRESHOLD-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 29 -; THRESHOLD-NEXT: [[ARRAYIDX_30:%.*]] = getelementptr inbounds float, float* [[X]], i64 30 -; THRESHOLD-NEXT: [[ARRAYIDX_31:%.*]] = getelementptr inbounds float, float* [[X]], i64 31 -; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <32 x float>* +; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <32 x float>* ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <32 x float>, <32 x float>* [[TMP0]], align 4 ; THRESHOLD-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float -0.000000e+00, <32 x float> [[TMP1]]) ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[CONV]] @@ -795,37 +641,12 @@ ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 ; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX_1]], align 4 ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 -; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 -; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 -; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_2]] to <4 x float>* ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4 ; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 -; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 8 -; CHECK-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 9 -; CHECK-NEXT: [[ARRAYIDX_9:%.*]] = getelementptr inbounds float, float* [[X]], i64 10 -; CHECK-NEXT: [[ARRAYIDX_10:%.*]] = getelementptr inbounds float, float* [[X]], i64 11 -; CHECK-NEXT: [[ARRAYIDX_11:%.*]] = getelementptr inbounds float, float* [[X]], i64 12 -; CHECK-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds float, float* [[X]], i64 13 -; CHECK-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 14 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX_6]] to <8 x float>* ; CHECK-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4 ; CHECK-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 15 -; CHECK-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 16 -; CHECK-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 17 -; CHECK-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 18 -; CHECK-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 19 -; CHECK-NEXT: [[ARRAYIDX_19:%.*]] = getelementptr inbounds float, float* [[X]], i64 20 -; CHECK-NEXT: [[ARRAYIDX_20:%.*]] = getelementptr inbounds float, float* [[X]], i64 21 -; CHECK-NEXT: [[ARRAYIDX_21:%.*]] = getelementptr inbounds float, float* [[X]], i64 22 -; CHECK-NEXT: [[ARRAYIDX_22:%.*]] = getelementptr inbounds float, float* [[X]], i64 23 -; CHECK-NEXT: [[ARRAYIDX_23:%.*]] = getelementptr inbounds float, float* [[X]], i64 24 -; CHECK-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 25 -; CHECK-NEXT: [[ARRAYIDX_25:%.*]] = getelementptr inbounds float, float* [[X]], i64 26 -; CHECK-NEXT: [[ARRAYIDX_26:%.*]] = getelementptr inbounds float, float* [[X]], i64 27 -; CHECK-NEXT: [[ARRAYIDX_27:%.*]] = getelementptr inbounds float, float* [[X]], i64 28 -; CHECK-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 29 -; CHECK-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 30 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[ARRAYIDX_14]] to <16 x float>* ; CHECK-NEXT: [[TMP7:%.*]] = load <16 x float>, <16 x float>* [[TMP6]], align 4 ; CHECK-NEXT: [[TMP8:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> [[TMP7]]) @@ -844,37 +665,12 @@ ; THRESHOLD-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 ; THRESHOLD-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX_1]], align 4 ; THRESHOLD-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 -; THRESHOLD-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 -; THRESHOLD-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 -; THRESHOLD-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 ; THRESHOLD-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_2]] to <4 x float>* ; THRESHOLD-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4 ; THRESHOLD-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 -; THRESHOLD-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 8 -; THRESHOLD-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 9 -; THRESHOLD-NEXT: [[ARRAYIDX_9:%.*]] = getelementptr inbounds float, float* [[X]], i64 10 -; THRESHOLD-NEXT: [[ARRAYIDX_10:%.*]] = getelementptr inbounds float, float* [[X]], i64 11 -; THRESHOLD-NEXT: [[ARRAYIDX_11:%.*]] = getelementptr inbounds float, float* [[X]], i64 12 -; THRESHOLD-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds float, float* [[X]], i64 13 -; THRESHOLD-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 14 ; THRESHOLD-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX_6]] to <8 x float>* ; THRESHOLD-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4 ; THRESHOLD-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 15 -; THRESHOLD-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 16 -; THRESHOLD-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 17 -; THRESHOLD-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 18 -; THRESHOLD-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 19 -; THRESHOLD-NEXT: [[ARRAYIDX_19:%.*]] = getelementptr inbounds float, float* [[X]], i64 20 -; THRESHOLD-NEXT: [[ARRAYIDX_20:%.*]] = getelementptr inbounds float, float* [[X]], i64 21 -; THRESHOLD-NEXT: [[ARRAYIDX_21:%.*]] = getelementptr inbounds float, float* [[X]], i64 22 -; THRESHOLD-NEXT: [[ARRAYIDX_22:%.*]] = getelementptr inbounds float, float* [[X]], i64 23 -; THRESHOLD-NEXT: [[ARRAYIDX_23:%.*]] = getelementptr inbounds float, float* [[X]], i64 24 -; THRESHOLD-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 25 -; THRESHOLD-NEXT: [[ARRAYIDX_25:%.*]] = getelementptr inbounds float, float* [[X]], i64 26 -; THRESHOLD-NEXT: [[ARRAYIDX_26:%.*]] = getelementptr inbounds float, float* [[X]], i64 27 -; THRESHOLD-NEXT: [[ARRAYIDX_27:%.*]] = getelementptr inbounds float, float* [[X]], i64 28 -; THRESHOLD-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 29 -; THRESHOLD-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 30 ; THRESHOLD-NEXT: [[TMP6:%.*]] = bitcast float* [[ARRAYIDX_14]] to <16 x float>* ; THRESHOLD-NEXT: [[TMP7:%.*]] = load <16 x float>, <16 x float>* [[TMP6]], align 4 ; THRESHOLD-NEXT: [[TMP8:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> [[TMP7]]) @@ -985,14 +781,7 @@ ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]] ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float ; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[CONV]], 3.000000e+00 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 -; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 -; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 -; CHECK-NEXT: [[ARRAYIDX3_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 -; CHECK-NEXT: [[ARRAYIDX3_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 -; CHECK-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <8 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP1]]) ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] @@ -1004,14 +793,7 @@ ; THRESHOLD-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]] ; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float ; THRESHOLD-NEXT: [[ADD:%.*]] = fadd fast float [[CONV]], 3.000000e+00 -; THRESHOLD-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 -; THRESHOLD-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 -; THRESHOLD-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 -; THRESHOLD-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 -; THRESHOLD-NEXT: [[ARRAYIDX3_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 -; THRESHOLD-NEXT: [[ARRAYIDX3_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 -; THRESHOLD-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 -; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* +; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <8 x float>* ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 ; THRESHOLD-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP1]]) ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] @@ -1055,14 +837,7 @@ ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]] ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float ; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[CONV]], 3.000000e+00 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 -; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 -; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 -; CHECK-NEXT: [[ARRAYIDX3_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 -; CHECK-NEXT: [[ARRAYIDX3_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 -; CHECK-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <8 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP1]]) ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] @@ -1076,14 +851,7 @@ ; THRESHOLD-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]] ; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float ; THRESHOLD-NEXT: [[ADD:%.*]] = fadd fast float [[CONV]], 3.000000e+00 -; THRESHOLD-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 -; THRESHOLD-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 -; THRESHOLD-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 -; THRESHOLD-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 -; THRESHOLD-NEXT: [[ARRAYIDX3_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 -; THRESHOLD-NEXT: [[ARRAYIDX3_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 -; THRESHOLD-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 -; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* +; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <8 x float>* ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 ; THRESHOLD-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP1]]) ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] @@ -1133,14 +901,7 @@ ; CHECK-NEXT: [[CONVC:%.*]] = sitofp i32 [[C:%.*]] to float ; CHECK-NEXT: [[ADDC:%.*]] = fadd fast float [[CONVC]], 3.000000e+00 ; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[CONV]], [[ADDC]] -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 -; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 -; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 -; CHECK-NEXT: [[ARRAYIDX3_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 -; CHECK-NEXT: [[ARRAYIDX3_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 -; CHECK-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <8 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP1]]) ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] @@ -1154,14 +915,7 @@ ; THRESHOLD-NEXT: [[CONVC:%.*]] = sitofp i32 [[C:%.*]] to float ; THRESHOLD-NEXT: [[ADDC:%.*]] = fadd fast float [[CONVC]], 3.000000e+00 ; THRESHOLD-NEXT: [[ADD:%.*]] = fadd fast float [[CONV]], [[ADDC]] -; THRESHOLD-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 -; THRESHOLD-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 -; THRESHOLD-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 -; THRESHOLD-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 -; THRESHOLD-NEXT: [[ARRAYIDX3_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 -; THRESHOLD-NEXT: [[ARRAYIDX3_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 -; THRESHOLD-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 -; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* +; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <8 x float>* ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 ; THRESHOLD-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP1]]) ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll @@ -1245,40 +1245,19 @@ ; SSE-NEXT: ret i32 [[M]] ; ; AVX-LABEL: @smax_intrinsic_rdx_v8i32( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 -; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2 -; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3 -; AVX-NEXT: [[P4:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 4 -; AVX-NEXT: [[P5:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 5 -; AVX-NEXT: [[P6:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 6 -; AVX-NEXT: [[P7:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 7 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <8 x i32>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <8 x i32>* ; AVX-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* [[TMP1]], align 4 ; AVX-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> [[TMP2]]) ; AVX-NEXT: ret i32 [[TMP3]] ; ; AVX2-LABEL: @smax_intrinsic_rdx_v8i32( -; AVX2-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 -; AVX2-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2 -; AVX2-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3 -; AVX2-NEXT: [[P4:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 4 -; AVX2-NEXT: [[P5:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 5 -; AVX2-NEXT: [[P6:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 6 -; AVX2-NEXT: [[P7:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 7 -; AVX2-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <8 x i32>* +; AVX2-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <8 x i32>* ; AVX2-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* [[TMP1]], align 4 ; AVX2-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> [[TMP2]]) ; AVX2-NEXT: ret i32 [[TMP3]] ; ; THRESH-LABEL: @smax_intrinsic_rdx_v8i32( -; THRESH-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 -; THRESH-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2 -; THRESH-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3 -; THRESH-NEXT: [[P4:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 4 -; THRESH-NEXT: [[P5:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 5 -; THRESH-NEXT: [[P6:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 6 -; THRESH-NEXT: [[P7:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 7 -; THRESH-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <8 x i32>* +; THRESH-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <8 x i32>* ; THRESH-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* [[TMP1]], align 4 ; THRESH-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> [[TMP2]]) ; THRESH-NEXT: ret i32 [[TMP3]] @@ -1310,14 +1289,7 @@ define i16 @smin_intrinsic_rdx_v8i16(i16* %p0) { ; CHECK-LABEL: @smin_intrinsic_rdx_v8i16( -; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 -; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 -; CHECK-NEXT: [[P4:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 4 -; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 5 -; CHECK-NEXT: [[P6:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 6 -; CHECK-NEXT: [[P7:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 7 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <8 x i16>* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <8 x i16>* ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> [[TMP2]]) ; CHECK-NEXT: ret i16 [[TMP3]] @@ -1362,10 +1334,7 @@ ; DEFAULT-NEXT: ret i64 [[M]] ; ; THRESH-LABEL: @umax_intrinsic_rdx_v4i64( -; THRESH-NEXT: [[P1:%.*]] = getelementptr inbounds i64, i64* [[P0:%.*]], i64 1 -; THRESH-NEXT: [[P2:%.*]] = getelementptr inbounds i64, i64* [[P0]], i64 2 -; THRESH-NEXT: [[P3:%.*]] = getelementptr inbounds i64, i64* [[P0]], i64 3 -; THRESH-NEXT: [[TMP1:%.*]] = bitcast i64* [[P0]] to <4 x i64>* +; THRESH-NEXT: [[TMP1:%.*]] = bitcast i64* [[P0:%.*]] to <4 x i64>* ; THRESH-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* [[TMP1]], align 4 ; THRESH-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> [[TMP2]]) ; THRESH-NEXT: ret i64 [[TMP3]] @@ -1385,22 +1354,7 @@ define i8 @umin_intrinsic_rdx_v16i8(i8* %p0) { ; CHECK-LABEL: @umin_intrinsic_rdx_v16i8( -; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; CHECK-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 -; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 -; CHECK-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 -; CHECK-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 -; CHECK-NEXT: [[P8:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 8 -; CHECK-NEXT: [[P9:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 9 -; CHECK-NEXT: [[PA:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 10 -; CHECK-NEXT: [[PB:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 11 -; CHECK-NEXT: [[PC:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 12 -; CHECK-NEXT: [[PD:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 13 -; CHECK-NEXT: [[PE:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 14 -; CHECK-NEXT: [[PF:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 15 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <16 x i8>* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <16 x i8>* ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> [[TMP2]]) ; CHECK-NEXT: ret i8 [[TMP3]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll @@ -28,12 +28,6 @@ ; ALL-NEXT: [[SUM_032:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[ADD17:%.*]], [[FOR_BODY]] ] ; ALL-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_033]], 2 ; ALL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] -; ALL-NEXT: [[ADD28:%.*]] = or i64 [[MUL]], 1 -; ALL-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD28]] -; ALL-NEXT: [[ADD829:%.*]] = or i64 [[MUL]], 2 -; ALL-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD829]] -; ALL-NEXT: [[ADD1330:%.*]] = or i64 [[MUL]], 3 -; ALL-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1330]] ; ALL-NEXT: [[TMP1:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x float>* ; ALL-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 ; ALL-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], @@ -110,10 +104,7 @@ ; ALL-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[N:%.*]], 0 ; ALL-NEXT: br i1 [[CMP38]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] ; ALL: for.body.lr.ph: -; ALL-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1 -; ALL-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[B]], i64 2 -; ALL-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds float, float* [[B]], i64 3 -; ALL-NEXT: [[TMP0:%.*]] = bitcast float* [[B]] to <4 x float>* +; ALL-NEXT: [[TMP0:%.*]] = bitcast float* [[B:%.*]] to <4 x float>* ; ALL-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 ; ALL-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64 ; ALL-NEXT: br label [[FOR_BODY:%.*]] @@ -122,12 +113,6 @@ ; ALL-NEXT: [[SUM_039:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[MUL21:%.*]], [[FOR_BODY]] ] ; ALL-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_040]], 2 ; ALL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] -; ALL-NEXT: [[ADD35:%.*]] = or i64 [[MUL]], 1 -; ALL-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD35]] -; ALL-NEXT: [[ADD1136:%.*]] = or i64 [[MUL]], 2 -; ALL-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1136]] -; ALL-NEXT: [[ADD1737:%.*]] = or i64 [[MUL]], 3 -; ALL-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1737]] ; ALL-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>* ; ALL-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 ; ALL-NEXT: [[TMP5:%.*]] = fmul <4 x float> [[TMP1]], [[TMP4]] @@ -216,14 +201,7 @@ ; ALL-NEXT: [[CMP81:%.*]] = icmp sgt i32 [[N:%.*]], 0 ; ALL-NEXT: br i1 [[CMP81]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] ; ALL: for.body.lr.ph: -; ALL-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1 -; ALL-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[B]], i64 2 -; ALL-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds float, float* [[B]], i64 3 -; ALL-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds float, float* [[B]], i64 4 -; ALL-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds float, float* [[B]], i64 5 -; ALL-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds float, float* [[B]], i64 6 -; ALL-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds float, float* [[B]], i64 7 -; ALL-NEXT: [[TMP0:%.*]] = bitcast float* [[B]] to <8 x float>* +; ALL-NEXT: [[TMP0:%.*]] = bitcast float* [[B:%.*]] to <8 x float>* ; ALL-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 ; ALL-NEXT: [[ARRAYIDX45:%.*]] = getelementptr inbounds float, float* [[B]], i64 8 ; ALL-NEXT: [[TMP2:%.*]] = load float, float* [[ARRAYIDX45]], align 4 @@ -234,20 +212,6 @@ ; ALL-NEXT: [[SUM_082:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[ADD51:%.*]], [[FOR_BODY]] ] ; ALL-NEXT: [[MUL:%.*]] = mul nsw i64 [[I_083]], 6 ; ALL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] -; ALL-NEXT: [[ADD80:%.*]] = or i64 [[MUL]], 1 -; ALL-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD80]] -; ALL-NEXT: [[ADD11:%.*]] = add nsw i64 [[MUL]], 2 -; ALL-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD11]] -; ALL-NEXT: [[ADD17:%.*]] = add nsw i64 [[MUL]], 3 -; ALL-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD17]] -; ALL-NEXT: [[ADD23:%.*]] = add nsw i64 [[MUL]], 4 -; ALL-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD23]] -; ALL-NEXT: [[ADD29:%.*]] = add nsw i64 [[MUL]], 5 -; ALL-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD29]] -; ALL-NEXT: [[ADD35:%.*]] = add nsw i64 [[MUL]], 6 -; ALL-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD35]] -; ALL-NEXT: [[ADD41:%.*]] = add nsw i64 [[MUL]], 7 -; ALL-NEXT: [[ARRAYIDX42:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD41]] ; ALL-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX2]] to <8 x float>* ; ALL-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4 ; ALL-NEXT: [[TMP6:%.*]] = fmul fast <8 x float> [[TMP1]], [[TMP5]] @@ -371,10 +335,7 @@ ; ALL-NEXT: [[CMP41:%.*]] = icmp sgt i32 [[N:%.*]], 0 ; ALL-NEXT: br i1 [[CMP41]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] ; ALL: for.body.lr.ph: -; ALL-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1 -; ALL-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[B]], i64 2 -; ALL-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds float, float* [[B]], i64 3 -; ALL-NEXT: [[TMP0:%.*]] = bitcast float* [[B]] to <4 x float>* +; ALL-NEXT: [[TMP0:%.*]] = bitcast float* [[B:%.*]] to <4 x float>* ; ALL-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 ; ALL-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64 ; ALL-NEXT: br label [[FOR_BODY:%.*]] @@ -383,12 +344,6 @@ ; ALL-NEXT: [[SUM_042:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ] ; ALL-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_043]], 2 ; ALL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] -; ALL-NEXT: [[ADD638:%.*]] = or i64 [[MUL]], 1 -; ALL-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD638]] -; ALL-NEXT: [[ADD1239:%.*]] = or i64 [[MUL]], 2 -; ALL-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1239]] -; ALL-NEXT: [[ADD1840:%.*]] = or i64 [[MUL]], 3 -; ALL-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1840]] ; ALL-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>* ; ALL-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 ; ALL-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP4]] @@ -650,8 +605,7 @@ ; STORE-NEXT: [[CMP17:%.*]] = icmp sgt i32 [[N:%.*]], 0 ; STORE-NEXT: br i1 [[CMP17]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] ; STORE: for.body.lr.ph: -; STORE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds double, double* [[B:%.*]], i64 1 -; STORE-NEXT: [[TMP0:%.*]] = bitcast double* [[B]] to <2 x double>* +; STORE-NEXT: [[TMP0:%.*]] = bitcast double* [[B:%.*]] to <2 x double>* ; STORE-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 ; STORE-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64 ; STORE-NEXT: br label [[FOR_BODY:%.*]] @@ -659,8 +613,6 @@ ; STORE-NEXT: [[I_018:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] ; STORE-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_018]], 2 ; STORE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[MUL]] -; STORE-NEXT: [[ADD16:%.*]] = or i64 [[MUL]], 1 -; STORE-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[ADD16]] ; STORE-NEXT: [[TMP3:%.*]] = bitcast double* [[ARRAYIDX2]] to <2 x double>* ; STORE-NEXT: [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[TMP3]], align 8 ; STORE-NEXT: [[TMP5:%.*]] = fmul fast <2 x double> [[TMP1]], [[TMP4]] @@ -768,9 +720,6 @@ ; STORE-NEXT: [[CMP37:%.*]] = icmp sgt i32 [[N:%.*]], 0 ; STORE-NEXT: br i1 [[CMP37]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] ; STORE: for.body.lr.ph: -; STORE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1 -; STORE-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[B]], i64 2 -; STORE-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds float, float* [[B]], i64 3 ; STORE-NEXT: [[TMP0:%.*]] = sext i32 [[N]] to i64 ; STORE-NEXT: br label [[FOR_BODY:%.*]] ; STORE: for.body: @@ -778,13 +727,7 @@ ; STORE-NEXT: [[C_ADDR_038:%.*]] = phi float* [ [[C:%.*]], [[FOR_BODY_LR_PH]] ], [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ] ; STORE-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_039]], 2 ; STORE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] -; STORE-NEXT: [[ADD34:%.*]] = or i64 [[MUL]], 1 -; STORE-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD34]] -; STORE-NEXT: [[ADD1135:%.*]] = or i64 [[MUL]], 2 -; STORE-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1135]] -; STORE-NEXT: [[ADD1736:%.*]] = or i64 [[MUL]], 3 -; STORE-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1736]] -; STORE-NEXT: [[TMP1:%.*]] = bitcast float* [[B]] to <4 x float>* +; STORE-NEXT: [[TMP1:%.*]] = bitcast float* [[B:%.*]] to <4 x float>* ; STORE-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 ; STORE-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>* ; STORE-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 @@ -1386,10 +1329,7 @@ ; ALL-NEXT: entry: ; ALL-NEXT: br i1 [[B:%.*]], label [[BB:%.*]], label [[EXIT:%.*]] ; ALL: bb: -; ALL-NEXT: [[IDX_1:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 1 -; ALL-NEXT: [[IDX_2:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 2 -; ALL-NEXT: [[IDX_3:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 3 -; ALL-NEXT: [[TMP0:%.*]] = bitcast i32* [[DATA]] to <4 x i32>* +; ALL-NEXT: [[TMP0:%.*]] = bitcast i32* [[DATA:%.*]] to <4 x i32>* ; ALL-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; ALL-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]]) ; ALL-NEXT: br label [[EXIT]] @@ -1423,10 +1363,7 @@ ; ALL-NEXT: entry: ; ALL-NEXT: br i1 [[B:%.*]], label [[BB:%.*]], label [[EXIT:%.*]] ; ALL: bb: -; ALL-NEXT: [[IDX_1:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 1 -; ALL-NEXT: [[IDX_2:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 2 -; ALL-NEXT: [[IDX_3:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 3 -; ALL-NEXT: [[TMP0:%.*]] = bitcast i32* [[DATA]] to <4 x i32>* +; ALL-NEXT: [[TMP0:%.*]] = bitcast i32* [[DATA:%.*]] to <4 x i32>* ; ALL-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; ALL-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]]) ; ALL-NEXT: br label [[EXIT]] @@ -1484,10 +1421,7 @@ define float @fadd_v4f32_fmf(float* %p) { ; ALL-LABEL: @fadd_v4f32_fmf( -; ALL-NEXT: [[P1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 -; ALL-NEXT: [[P2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 -; ALL-NEXT: [[P3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 -; ALL-NEXT: [[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>* +; ALL-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* ; ALL-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 ; ALL-NEXT: [[TMP3:%.*]] = call reassoc nsz float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP2]]) ; ALL-NEXT: ret float [[TMP3]] @@ -1511,10 +1445,7 @@ define float @fadd_v4f32_fmf_intersect(float* %p) { ; ALL-LABEL: @fadd_v4f32_fmf_intersect( -; ALL-NEXT: [[P1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 -; ALL-NEXT: [[P2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 -; ALL-NEXT: [[P3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 -; ALL-NEXT: [[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>* +; ALL-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* ; ALL-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 ; ALL-NEXT: [[TMP3:%.*]] = call reassoc ninf nsz float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP2]]) ; ALL-NEXT: ret float [[TMP3]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll @@ -40,21 +40,6 @@ ; SSE-NEXT: [[D_ADDR_0353:%.*]] = phi i8* [ [[D:%.*]], [[ENTRY]] ], [ [[ADD_PTR191:%.*]], [[FOR_BODY]] ] ; SSE-NEXT: [[C_ADDR_0352:%.*]] = phi i8* [ [[C:%.*]], [[ENTRY]] ], [ [[ADD_PTR190:%.*]], [[FOR_BODY]] ] ; SSE-NEXT: [[B_ADDR_0351:%.*]] = phi i8* [ [[B:%.*]], [[ENTRY]] ], [ [[ADD_PTR189:%.*]], [[FOR_BODY]] ] -; SSE-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 1 -; SSE-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 1 -; SSE-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 1 -; SSE-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 1 -; SSE-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 1 -; SSE-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 2 -; SSE-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 2 -; SSE-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 2 -; SSE-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 2 -; SSE-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 2 -; SSE-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 3 -; SSE-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 3 -; SSE-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 3 -; SSE-NEXT: [[ARRAYIDX40:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 3 -; SSE-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 3 ; SSE-NEXT: [[TMP4:%.*]] = bitcast i8* [[C_ADDR_0352]] to <4 x i8>* ; SSE-NEXT: [[TMP5:%.*]] = load <4 x i8>, <4 x i8>* [[TMP4]], align 1 ; SSE-NEXT: [[TMP6:%.*]] = bitcast i8* [[D_ADDR_0353]] to <4 x i8>* @@ -75,21 +60,6 @@ ; SSE-NEXT: [[ARRAYIDX49:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 4 ; SSE-NEXT: [[ARRAYIDX52:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 4 ; SSE-NEXT: [[ARRAYIDX56:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 4 -; SSE-NEXT: [[ARRAYIDX57:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 5 -; SSE-NEXT: [[ARRAYIDX59:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 5 -; SSE-NEXT: [[ARRAYIDX61:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 5 -; SSE-NEXT: [[ARRAYIDX64:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 5 -; SSE-NEXT: [[ARRAYIDX68:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 5 -; SSE-NEXT: [[ARRAYIDX69:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 6 -; SSE-NEXT: [[ARRAYIDX71:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 6 -; SSE-NEXT: [[ARRAYIDX73:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 6 -; SSE-NEXT: [[ARRAYIDX76:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 6 -; SSE-NEXT: [[ARRAYIDX80:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 6 -; SSE-NEXT: [[ARRAYIDX81:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 7 -; SSE-NEXT: [[ARRAYIDX83:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 7 -; SSE-NEXT: [[ARRAYIDX85:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 7 -; SSE-NEXT: [[ARRAYIDX88:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 7 -; SSE-NEXT: [[ARRAYIDX92:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 7 ; SSE-NEXT: [[TMP18:%.*]] = bitcast i8* [[ARRAYIDX45]] to <4 x i8>* ; SSE-NEXT: [[TMP19:%.*]] = load <4 x i8>, <4 x i8>* [[TMP18]], align 1 ; SSE-NEXT: [[TMP20:%.*]] = bitcast i8* [[ARRAYIDX47]] to <4 x i8>* @@ -110,21 +80,6 @@ ; SSE-NEXT: [[ARRAYIDX97:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 8 ; SSE-NEXT: [[ARRAYIDX100:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 8 ; SSE-NEXT: [[ARRAYIDX104:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 8 -; SSE-NEXT: [[ARRAYIDX105:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 9 -; SSE-NEXT: [[ARRAYIDX107:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 9 -; SSE-NEXT: [[ARRAYIDX109:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 9 -; SSE-NEXT: [[ARRAYIDX112:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 9 -; SSE-NEXT: [[ARRAYIDX116:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 9 -; SSE-NEXT: [[ARRAYIDX117:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 10 -; SSE-NEXT: [[ARRAYIDX119:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 10 -; SSE-NEXT: [[ARRAYIDX121:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 10 -; SSE-NEXT: [[ARRAYIDX124:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 10 -; SSE-NEXT: [[ARRAYIDX128:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 10 -; SSE-NEXT: [[ARRAYIDX129:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 11 -; SSE-NEXT: [[ARRAYIDX131:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 11 -; SSE-NEXT: [[ARRAYIDX133:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 11 -; SSE-NEXT: [[ARRAYIDX136:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 11 -; SSE-NEXT: [[ARRAYIDX140:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 11 ; SSE-NEXT: [[TMP32:%.*]] = bitcast i8* [[ARRAYIDX93]] to <4 x i8>* ; SSE-NEXT: [[TMP33:%.*]] = load <4 x i8>, <4 x i8>* [[TMP32]], align 1 ; SSE-NEXT: [[TMP34:%.*]] = bitcast i8* [[ARRAYIDX95]] to <4 x i8>* @@ -145,21 +100,6 @@ ; SSE-NEXT: [[ARRAYIDX145:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 12 ; SSE-NEXT: [[ARRAYIDX148:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 12 ; SSE-NEXT: [[ARRAYIDX152:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 12 -; SSE-NEXT: [[ARRAYIDX153:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 13 -; SSE-NEXT: [[ARRAYIDX155:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 13 -; SSE-NEXT: [[ARRAYIDX157:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 13 -; SSE-NEXT: [[ARRAYIDX160:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 13 -; SSE-NEXT: [[ARRAYIDX164:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 13 -; SSE-NEXT: [[ARRAYIDX165:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 14 -; SSE-NEXT: [[ARRAYIDX167:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 14 -; SSE-NEXT: [[ARRAYIDX169:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 14 -; SSE-NEXT: [[ARRAYIDX172:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 14 -; SSE-NEXT: [[ARRAYIDX176:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 14 -; SSE-NEXT: [[ARRAYIDX177:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 15 -; SSE-NEXT: [[ARRAYIDX179:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 15 -; SSE-NEXT: [[ARRAYIDX181:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 15 -; SSE-NEXT: [[ARRAYIDX184:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 15 -; SSE-NEXT: [[ARRAYIDX188:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 15 ; SSE-NEXT: [[TMP46:%.*]] = bitcast i8* [[ARRAYIDX141]] to <4 x i8>* ; SSE-NEXT: [[TMP47:%.*]] = load <4 x i8>, <4 x i8>* [[TMP46]], align 1 ; SSE-NEXT: [[TMP48:%.*]] = bitcast i8* [[ARRAYIDX143]] to <4 x i8>* @@ -198,81 +138,6 @@ ; AVX512-NEXT: [[D_ADDR_0353:%.*]] = phi i8* [ [[D:%.*]], [[ENTRY]] ], [ [[ADD_PTR191:%.*]], [[FOR_BODY]] ] ; AVX512-NEXT: [[C_ADDR_0352:%.*]] = phi i8* [ [[C:%.*]], [[ENTRY]] ], [ [[ADD_PTR190:%.*]], [[FOR_BODY]] ] ; AVX512-NEXT: [[B_ADDR_0351:%.*]] = phi i8* [ [[B:%.*]], [[ENTRY]] ], [ [[ADD_PTR189:%.*]], [[FOR_BODY]] ] -; AVX512-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 1 -; AVX512-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 1 -; AVX512-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 1 -; AVX512-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 1 -; AVX512-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 1 -; AVX512-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 2 -; AVX512-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 2 -; AVX512-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 2 -; AVX512-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 2 -; AVX512-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 2 -; AVX512-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 3 -; AVX512-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 3 -; AVX512-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 3 -; AVX512-NEXT: [[ARRAYIDX40:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 3 -; AVX512-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 3 -; AVX512-NEXT: [[ARRAYIDX45:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 4 -; AVX512-NEXT: [[ARRAYIDX47:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 4 -; AVX512-NEXT: [[ARRAYIDX49:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 4 -; AVX512-NEXT: [[ARRAYIDX52:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 4 -; AVX512-NEXT: [[ARRAYIDX56:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 4 -; AVX512-NEXT: [[ARRAYIDX57:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 5 -; AVX512-NEXT: [[ARRAYIDX59:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 5 -; AVX512-NEXT: [[ARRAYIDX61:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 5 -; AVX512-NEXT: [[ARRAYIDX64:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 5 -; AVX512-NEXT: [[ARRAYIDX68:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 5 -; AVX512-NEXT: [[ARRAYIDX69:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 6 -; AVX512-NEXT: [[ARRAYIDX71:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 6 -; AVX512-NEXT: [[ARRAYIDX73:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 6 -; AVX512-NEXT: [[ARRAYIDX76:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 6 -; AVX512-NEXT: [[ARRAYIDX80:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 6 -; AVX512-NEXT: [[ARRAYIDX81:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 7 -; AVX512-NEXT: [[ARRAYIDX83:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 7 -; AVX512-NEXT: [[ARRAYIDX85:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 7 -; AVX512-NEXT: [[ARRAYIDX88:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 7 -; AVX512-NEXT: [[ARRAYIDX92:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 7 -; AVX512-NEXT: [[ARRAYIDX93:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 8 -; AVX512-NEXT: [[ARRAYIDX95:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 8 -; AVX512-NEXT: [[ARRAYIDX97:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 8 -; AVX512-NEXT: [[ARRAYIDX100:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 8 -; AVX512-NEXT: [[ARRAYIDX104:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 8 -; AVX512-NEXT: [[ARRAYIDX105:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 9 -; AVX512-NEXT: [[ARRAYIDX107:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 9 -; AVX512-NEXT: [[ARRAYIDX109:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 9 -; AVX512-NEXT: [[ARRAYIDX112:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 9 -; AVX512-NEXT: [[ARRAYIDX116:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 9 -; AVX512-NEXT: [[ARRAYIDX117:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 10 -; AVX512-NEXT: [[ARRAYIDX119:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 10 -; AVX512-NEXT: [[ARRAYIDX121:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 10 -; AVX512-NEXT: [[ARRAYIDX124:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 10 -; AVX512-NEXT: [[ARRAYIDX128:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 10 -; AVX512-NEXT: [[ARRAYIDX129:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 11 -; AVX512-NEXT: [[ARRAYIDX131:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 11 -; AVX512-NEXT: [[ARRAYIDX133:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 11 -; AVX512-NEXT: [[ARRAYIDX136:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 11 -; AVX512-NEXT: [[ARRAYIDX140:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 11 -; AVX512-NEXT: [[ARRAYIDX141:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 12 -; AVX512-NEXT: [[ARRAYIDX143:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 12 -; AVX512-NEXT: [[ARRAYIDX145:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 12 -; AVX512-NEXT: [[ARRAYIDX148:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 12 -; AVX512-NEXT: [[ARRAYIDX152:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 12 -; AVX512-NEXT: [[ARRAYIDX153:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 13 -; AVX512-NEXT: [[ARRAYIDX155:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 13 -; AVX512-NEXT: [[ARRAYIDX157:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 13 -; AVX512-NEXT: [[ARRAYIDX160:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 13 -; AVX512-NEXT: [[ARRAYIDX164:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 13 -; AVX512-NEXT: [[ARRAYIDX165:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 14 -; AVX512-NEXT: [[ARRAYIDX167:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 14 -; AVX512-NEXT: [[ARRAYIDX169:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 14 -; AVX512-NEXT: [[ARRAYIDX172:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 14 -; AVX512-NEXT: [[ARRAYIDX176:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 14 -; AVX512-NEXT: [[ARRAYIDX177:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 15 -; AVX512-NEXT: [[ARRAYIDX179:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 15 -; AVX512-NEXT: [[ARRAYIDX181:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 15 -; AVX512-NEXT: [[ARRAYIDX184:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 15 -; AVX512-NEXT: [[ARRAYIDX188:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 15 ; AVX512-NEXT: [[TMP1:%.*]] = bitcast i8* [[C_ADDR_0352]] to <16 x i8>* ; AVX512-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1 ; AVX512-NEXT: [[TMP3:%.*]] = bitcast i8* [[D_ADDR_0353]] to <16 x i8>* diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll @@ -8,7 +8,6 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load float, float* undef, align 4 ; CHECK-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_SW:%.*]], %struct.sw* [[V:%.*]], i64 0, i32 0 -; CHECK-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_SW]], %struct.sw* [[V]], i64 0, i32 1 ; CHECK-NEXT: [[TMP1:%.*]] = load float, float* undef, align 4 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[X]] to <2 x float>* ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[TMP2]], align 16 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insertvalue.ll b/llvm/test/Transforms/SLPVectorizer/X86/insertvalue.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/insertvalue.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insertvalue.ll @@ -6,10 +6,7 @@ ; CHECK-NEXT: top: ; CHECK-NEXT: [[PX0:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP2:%.*]], i64 0, i64 0 ; CHECK-NEXT: [[PY0:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP3:%.*]], i64 0, i64 0 -; CHECK-NEXT: [[PX1:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP2]], i64 0, i64 1 -; CHECK-NEXT: [[PY1:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP3]], i64 0, i64 1 ; CHECK-NEXT: [[PZ0:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP1:%.*]], i64 0, i64 0 -; CHECK-NEXT: [[PZ1:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP1]], i64 0, i64 1 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[PX0]] to <2 x double>* ; CHECK-NEXT: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[TMP4]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[PY0]] to <2 x double>* @@ -53,16 +50,7 @@ ; CHECK-NEXT: top: ; CHECK-NEXT: [[PX0:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP2:%.*]], i64 0, i64 0 ; CHECK-NEXT: [[PY0:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP3:%.*]], i64 0, i64 0 -; CHECK-NEXT: [[PX1:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP2]], i64 0, i64 1 -; CHECK-NEXT: [[PY1:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP3]], i64 0, i64 1 -; CHECK-NEXT: [[PX2:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP2]], i64 0, i64 2 -; CHECK-NEXT: [[PY2:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP3]], i64 0, i64 2 -; CHECK-NEXT: [[PX3:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP2]], i64 0, i64 3 -; CHECK-NEXT: [[PY3:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP3]], i64 0, i64 3 ; CHECK-NEXT: [[PZ0:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP1:%.*]], i64 0, i64 0 -; CHECK-NEXT: [[PZ1:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP1]], i64 0, i64 1 -; CHECK-NEXT: [[PZ2:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP1]], i64 0, i64 2 -; CHECK-NEXT: [[PZ3:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP1]], i64 0, i64 3 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[PX0]] to <4 x float>* ; CHECK-NEXT: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[TMP4]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[PY0]] to <4 x float>* @@ -128,10 +116,8 @@ ; CHECK-NEXT: top: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast [4 x float]* [[A:%.*]] to <4 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[A_ARR:%.*]] = load [4 x float], [4 x float]* [[A]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast [4 x float]* [[B:%.*]] to <4 x float>* ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4 -; CHECK-NEXT: [[B_ARR:%.*]] = load [4 x float], [4 x float]* [[B]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = fsub <4 x float> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP4]], i32 0 ; CHECK-NEXT: [[C_ARR0:%.*]] = insertvalue [4 x float] undef, float [[TMP5]], 0 @@ -172,10 +158,8 @@ ; CHECK-NEXT: top: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast [4 x i32]* [[A:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[A_ARR:%.*]] = load [4 x i32], [4 x i32]* [[A]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast [4 x i32]* [[B:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4 -; CHECK-NEXT: [[B_ARR:%.*]] = load [4 x i32], [4 x i32]* [[B]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0 ; CHECK-NEXT: [[C_ARR0:%.*]] = insertvalue [4 x i32] undef, i32 [[TMP5]], 0 @@ -267,13 +251,11 @@ ; CHECK-NEXT: top: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast %pseudovec* [[A:%.*]] to <4 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[A_STRUCT:%.*]] = load [[PSEUDOVEC:%.*]], %pseudovec* [[A]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast %pseudovec* [[B:%.*]] to <4 x float>* ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4 -; CHECK-NEXT: [[B_STRUCT:%.*]] = load [[PSEUDOVEC]], %pseudovec* [[B]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = fsub <4 x float> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP4]], i32 0 -; CHECK-NEXT: [[C_STRUCT0:%.*]] = insertvalue [[PSEUDOVEC]] undef, float [[TMP5]], 0 +; CHECK-NEXT: [[C_STRUCT0:%.*]] = insertvalue [[PSEUDOVEC:%.*]] undef, float [[TMP5]], 0 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP4]], i32 1 ; CHECK-NEXT: [[C_STRUCT1:%.*]] = insertvalue [[PSEUDOVEC]] [[C_STRUCT0]], float [[TMP6]], 1 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP4]], i32 2 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/inst_size_bug.ll b/llvm/test/Transforms/SLPVectorizer/X86/inst_size_bug.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/inst_size_bug.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/inst_size_bug.ll @@ -5,10 +5,7 @@ ; CHECK-LABEL: @inst_size( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[VAL:%.*]] = extractelement <2 x i64> [[B:%.*]], i32 0 -; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 1 -; CHECK-NEXT: [[PTR3:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 2 -; CHECK-NEXT: [[PTR4:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 3 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[A]] to <4 x i64>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[A:%.*]] to <4 x i64>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* [[TMP0]], align 4 ; CHECK-NEXT: [[T41:%.*]] = icmp sgt i64 0, [[VAL]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i64> zeroinitializer, [[TMP1]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/intrinsic_with_scalar_param.ll b/llvm/test/Transforms/SLPVectorizer/X86/intrinsic_with_scalar_param.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/intrinsic_with_scalar_param.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/intrinsic_with_scalar_param.ll @@ -5,16 +5,10 @@ define void @vec_powi_f32(float* %a, float* %c, i32 %P) { ; CHECK-LABEL: @vec_powi_f32( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i32 1 -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[A]], i32 2 -; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[A]], i32 3 -; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, float* [[C:%.*]], i32 1 -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[C]], i32 2 -; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[C]], i32 3 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[A]] to <4 x float>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[A:%.*]] to <4 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> [[TMP1]], i32 [[P:%.*]]) -; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[C]] to <4 x float>* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[C:%.*]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-shuffle-placement.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-shuffle-placement.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-shuffle-placement.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-shuffle-placement.ll @@ -17,22 +17,13 @@ ; CHECK-LABEL: @jumble1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 10 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 11 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 1 -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12 -; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3 -; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 13 -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2 -; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2 -; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[A]] to <4 x i32>* ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[TMP1]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[B]] to <4 x i32>* +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 4 ; CHECK-NEXT: ret void ; @@ -73,22 +64,13 @@ ; CHECK-LABEL: @jumble2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 10 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 11 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 1 -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12 -; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3 -; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 13 -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2 -; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2 -; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[A]] to <4 x i32>* ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], [[TMP1]] -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[B]] to <4 x i32>* +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 4 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-used-in-phi.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-used-in-phi.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-used-in-phi.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-used-in-phi.ll @@ -44,75 +44,48 @@ ; CHECK-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 75 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[ARRAYIDX64:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX65:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2 -; CHECK-NEXT: [[ARRAYIDX66:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[B]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP26:%.*]], <4 x i32>* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP14:%.*]], <4 x i32>* [[TMP1]], align 4 ; CHECK-NEXT: ret void ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] -; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x i32> [ poison, [[ENTRY]] ], [ [[TMP26]], [[FOR_INC]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x i32> [ poison, [[ENTRY]] ], [ [[TMP14]], [[FOR_INC]] ] ; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2 -; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3 -; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[ARRAYIDX2]] to <4 x i32>* -; CHECK-NEXT: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[ARRAYIDX2]] to <4 x i32>* +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: if.else: -; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX12]], align 4 -; CHECK-NEXT: [[CMP13:%.*]] = icmp eq i32 [[TMP8]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX12]], align 4 +; CHECK-NEXT: [[CMP13:%.*]] = icmp eq i32 [[TMP5]], 0 ; CHECK-NEXT: br i1 [[CMP13]], label [[IF_THEN14:%.*]], label [[IF_ELSE27:%.*]] ; CHECK: if.then14: ; CHECK-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP9]] -; CHECK-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2 -; CHECK-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP11:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3 -; CHECK-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP11]] -; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[ARRAYIDX17]] to <4 x i32>* -; CHECK-NEXT: [[TMP13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP12]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[ARRAYIDX17]] to <4 x i32>* +; CHECK-NEXT: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4 ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: if.else27: -; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX28]], align 4 -; CHECK-NEXT: [[CMP29:%.*]] = icmp eq i32 [[TMP14]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX28]], align 4 +; CHECK-NEXT: [[CMP29:%.*]] = icmp eq i32 [[TMP8]], 0 ; CHECK-NEXT: br i1 [[CMP29]], label [[IF_THEN30:%.*]], label [[IF_ELSE43:%.*]] ; CHECK: if.then30: ; CHECK-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP15:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP15]] -; CHECK-NEXT: [[TMP16:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2 -; CHECK-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP16]] -; CHECK-NEXT: [[TMP17:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3 -; CHECK-NEXT: [[ARRAYIDX42:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP17]] -; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32* [[ARRAYIDX33]] to <4 x i32>* -; CHECK-NEXT: [[TMP19:%.*]] = load <4 x i32>, <4 x i32>* [[TMP18]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[ARRAYIDX33]] to <4 x i32>* +; CHECK-NEXT: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4 ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: if.else43: -; CHECK-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARRAYIDX44]], align 4 -; CHECK-NEXT: [[CMP45:%.*]] = icmp eq i32 [[TMP20]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX44]], align 4 +; CHECK-NEXT: [[CMP45:%.*]] = icmp eq i32 [[TMP11]], 0 ; CHECK-NEXT: br i1 [[CMP45]], label [[IF_THEN46:%.*]], label [[FOR_INC]] ; CHECK: if.then46: ; CHECK-NEXT: [[ARRAYIDX49:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP21:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[ARRAYIDX52:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP21]] -; CHECK-NEXT: [[TMP22:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3 -; CHECK-NEXT: [[ARRAYIDX55:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP22]] -; CHECK-NEXT: [[TMP23:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2 -; CHECK-NEXT: [[ARRAYIDX58:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP23]] -; CHECK-NEXT: [[TMP24:%.*]] = bitcast i32* [[ARRAYIDX49]] to <4 x i32>* -; CHECK-NEXT: [[TMP25:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP25]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[ARRAYIDX49]] to <4 x i32>* +; CHECK-NEXT: [[TMP13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP12]], align 4 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP13]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: -; CHECK-NEXT: [[TMP26]] = phi <4 x i32> [ [[TMP7]], [[IF_THEN]] ], [ [[TMP13]], [[IF_THEN14]] ], [ [[TMP19]], [[IF_THEN30]] ], [ [[SHUFFLE]], [[IF_THEN46]] ], [ [[TMP2]], [[IF_ELSE43]] ] +; CHECK-NEXT: [[TMP14]] = phi <4 x i32> [ [[TMP4]], [[IF_THEN]] ], [ [[TMP7]], [[IF_THEN14]] ], [ [[TMP10]], [[IF_THEN30]] ], [ [[SHUFFLE]], [[IF_THEN46]] ], [ [[TMP2]], [[IF_ELSE43]] ] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 100 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll @@ -6,17 +6,8 @@ define i32 @jumbled-load(i32* noalias nocapture %in, i32* noalias nocapture %inn, i32* noalias nocapture %out) { ; CHECK-LABEL: @jumbled-load( ; CHECK-NEXT: [[IN_ADDR:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i64 0 -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 3 -; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 1 -; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 2 ; CHECK-NEXT: [[INN_ADDR:%.*]] = getelementptr inbounds i32, i32* [[INN:%.*]], i64 0 -; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i32, i32* [[INN_ADDR]], i64 2 -; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i32, i32* [[INN_ADDR]], i64 3 -; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i32, i32* [[INN_ADDR]], i64 1 ; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 0 -; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 1 -; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 2 -; CHECK-NEXT: [[GEP_10:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IN_ADDR]] to <4 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[INN_ADDR]] to <4 x i32>* @@ -64,13 +55,7 @@ define i32 @jumbled-load-multiuses(i32* noalias nocapture %in, i32* noalias nocapture %out) { ; CHECK-LABEL: @jumbled-load-multiuses( ; CHECK-NEXT: [[IN_ADDR:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i64 0 -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 3 -; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 1 -; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 2 ; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 0 -; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 1 -; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 2 -; CHECK-NEXT: [[GEP_10:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IN_ADDR]] to <4 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll @@ -16,10 +16,6 @@ ; CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** @b, align 8 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 4 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 12 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 5 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 13 -; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 14 -; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 15 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @a, align 4 ; CHECK-NEXT: [[CONV19:%.*]] = sitofp i32 [[TMP1]] to float ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[ARRAYIDX]] to <2 x i32>* diff --git a/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll @@ -53,7 +53,6 @@ ; CHECK-LABEL: @PR16739_byref( ; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[X:%.*]], i64 0, i64 0 ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[X]], i64 0, i64 1 -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[X]], i64 0, i64 2 ; CHECK-NEXT: [[X0:%.*]] = load float, float* [[GEP0]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP1]] to <2 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4 @@ -80,7 +79,6 @@ define <4 x float> @PR16739_byref_alt(<4 x float>* nocapture readonly dereferenceable(16) %x) { ; CHECK-LABEL: @PR16739_byref_alt( ; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[X:%.*]], i64 0, i64 0 -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[X]], i64 0, i64 1 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP0]] to <2 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> @@ -139,13 +137,9 @@ define void @PR43578_prefer128(i32* %r, i64* %p, i64* %q) #0 { ; CHECK-LABEL: @PR43578_prefer128( ; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i64, i64* [[P:%.*]], i64 0 -; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 1 ; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 2 -; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 3 ; CHECK-NEXT: [[Q0:%.*]] = getelementptr inbounds i64, i64* [[Q:%.*]], i64 0 -; CHECK-NEXT: [[Q1:%.*]] = getelementptr inbounds i64, i64* [[Q]], i64 1 ; CHECK-NEXT: [[Q2:%.*]] = getelementptr inbounds i64, i64* [[Q]], i64 2 -; CHECK-NEXT: [[Q3:%.*]] = getelementptr inbounds i64, i64* [[Q]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[P0]] to <2 x i64>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 2 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[Q0]] to <2 x i64>* diff --git a/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll b/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll @@ -53,7 +53,6 @@ ; CHECK-LABEL: @PR16739_byref( ; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[X:%.*]], i64 0, i64 0 ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[X]], i64 0, i64 1 -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[X]], i64 0, i64 2 ; CHECK-NEXT: [[X0:%.*]] = load float, float* [[GEP0]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP1]] to <2 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4 @@ -80,7 +79,6 @@ define <4 x float> @PR16739_byref_alt(<4 x float>* nocapture readonly dereferenceable(16) %x) { ; CHECK-LABEL: @PR16739_byref_alt( ; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[X:%.*]], i64 0, i64 0 -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[X]], i64 0, i64 1 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP0]] to <2 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> @@ -139,13 +137,9 @@ define void @PR43578_prefer128(i32* %r, i64* %p, i64* %q) #0 { ; CHECK-LABEL: @PR43578_prefer128( ; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i64, i64* [[P:%.*]], i64 0 -; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 1 ; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 2 -; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 3 ; CHECK-NEXT: [[Q0:%.*]] = getelementptr inbounds i64, i64* [[Q:%.*]], i64 0 -; CHECK-NEXT: [[Q1:%.*]] = getelementptr inbounds i64, i64* [[Q]], i64 1 ; CHECK-NEXT: [[Q2:%.*]] = getelementptr inbounds i64, i64* [[Q]], i64 2 -; CHECK-NEXT: [[Q3:%.*]] = getelementptr inbounds i64, i64* [[Q]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[P0]] to <2 x i64>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 2 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[Q0]] to <2 x i64>* diff --git a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll @@ -21,13 +21,9 @@ ; CHECK-LABEL: @lookahead_basic( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IDX0:%.*]] = getelementptr inbounds double, double* [[ARRAY:%.*]], i64 0 -; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 1 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 2 -; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 3 ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 4 -; CHECK-NEXT: [[IDX5:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 5 ; CHECK-NEXT: [[IDX6:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 6 -; CHECK-NEXT: [[IDX7:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 7 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[IDX0]] to <2 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[IDX2]] to <2 x double>* @@ -92,9 +88,7 @@ ; CHECK-LABEL: @lookahead_alt1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IDX0:%.*]] = getelementptr inbounds double, double* [[ARRAY:%.*]], i64 0 -; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 1 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 2 -; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 3 ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 4 ; CHECK-NEXT: [[IDX5:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 5 ; CHECK-NEXT: [[IDX6:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 6 @@ -155,13 +149,9 @@ ; CHECK-LABEL: @lookahead_alt2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IDX0:%.*]] = getelementptr inbounds double, double* [[ARRAY:%.*]], i64 0 -; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 1 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 2 -; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 3 ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 4 -; CHECK-NEXT: [[IDX5:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 5 ; CHECK-NEXT: [[IDX6:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 6 -; CHECK-NEXT: [[IDX7:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 7 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[IDX0]] to <2 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[IDX2]] to <2 x double>* @@ -234,12 +224,10 @@ ; CHECK-NEXT: [[IDXB0:%.*]] = getelementptr inbounds double, double* [[B:%.*]], i64 0 ; CHECK-NEXT: [[IDXC0:%.*]] = getelementptr inbounds double, double* [[C:%.*]], i64 0 ; CHECK-NEXT: [[IDXD0:%.*]] = getelementptr inbounds double, double* [[D:%.*]], i64 0 -; CHECK-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, double* [[A]], i64 1 ; CHECK-NEXT: [[IDXB2:%.*]] = getelementptr inbounds double, double* [[B]], i64 2 ; CHECK-NEXT: [[IDXA2:%.*]] = getelementptr inbounds double, double* [[A]], i64 2 ; CHECK-NEXT: [[IDXB1:%.*]] = getelementptr inbounds double, double* [[B]], i64 1 ; CHECK-NEXT: [[IDXS0:%.*]] = getelementptr inbounds double, double* [[S:%.*]], i64 0 -; CHECK-NEXT: [[IDXS1:%.*]] = getelementptr inbounds double, double* [[S]], i64 1 ; CHECK-NEXT: [[B0:%.*]] = load double, double* [[IDXB0]], align 8 ; CHECK-NEXT: [[C0:%.*]] = load double, double* [[IDXC0]], align 8 ; CHECK-NEXT: [[D0:%.*]] = load double, double* [[IDXD0]], align 8 @@ -326,12 +314,10 @@ ; CHECK-NEXT: [[IDXB0:%.*]] = getelementptr inbounds double, double* [[B:%.*]], i64 0 ; CHECK-NEXT: [[IDXC0:%.*]] = getelementptr inbounds double, double* [[C:%.*]], i64 0 ; CHECK-NEXT: [[IDXD0:%.*]] = getelementptr inbounds double, double* [[D:%.*]], i64 0 -; CHECK-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, double* [[A]], i64 1 ; CHECK-NEXT: [[IDXB2:%.*]] = getelementptr inbounds double, double* [[B]], i64 2 ; CHECK-NEXT: [[IDXA2:%.*]] = getelementptr inbounds double, double* [[A]], i64 2 ; CHECK-NEXT: [[IDXB1:%.*]] = getelementptr inbounds double, double* [[B]], i64 1 ; CHECK-NEXT: [[IDXS0:%.*]] = getelementptr inbounds double, double* [[S:%.*]], i64 0 -; CHECK-NEXT: [[IDXS1:%.*]] = getelementptr inbounds double, double* [[S]], i64 1 ; CHECK-NEXT: [[B0:%.*]] = load double, double* [[IDXB0]], align 8 ; CHECK-NEXT: [[C0:%.*]] = load double, double* [[IDXC0]], align 8 ; CHECK-NEXT: [[D0:%.*]] = load double, double* [[IDXD0]], align 8 @@ -416,9 +402,7 @@ define void @lookahead_crash(double* %A, double *%S, %Class *%Arg0) { ; CHECK-LABEL: @lookahead_crash( ; CHECK-NEXT: [[IDXA0:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 0 -; CHECK-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, double* [[A]], i64 1 ; CHECK-NEXT: [[IDXS0:%.*]] = getelementptr inbounds double, double* [[S:%.*]], i64 0 -; CHECK-NEXT: [[IDXS1:%.*]] = getelementptr inbounds double, double* [[S]], i64 1 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[IDXA0]] to <2 x double>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8 ; CHECK-NEXT: [[C0:%.*]] = call double @_ZN1i2ayEv(%Class* [[ARG0:%.*]]) @@ -459,7 +443,6 @@ ; CHECK-NEXT: [[LOADVEC:%.*]] = load <2 x double>, <2 x double>* [[VECPTR1:%.*]], align 4 ; CHECK-NEXT: [[LOADVEC2:%.*]] = load <2 x double>, <2 x double>* [[VECPTR2:%.*]], align 4 ; CHECK-NEXT: [[SIDX0:%.*]] = getelementptr inbounds double, double* [[STOREARRAY:%.*]], i64 0 -; CHECK-NEXT: [[SIDX1:%.*]] = getelementptr inbounds double, double* [[STOREARRAY]], i64 1 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[LOADA0]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[LOADA0]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[LOADVEC]], [[TMP2]] @@ -646,7 +629,6 @@ ; ; SSE-LABEL: @ChecksExtractScores_different_vectors( ; SSE-NEXT: [[IDX0:%.*]] = getelementptr inbounds double, double* [[ARRAY:%.*]], i64 0 -; SSE-NEXT: [[IDX1:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 1 ; SSE-NEXT: [[LOADVEC:%.*]] = load <2 x double>, <2 x double>* [[VECPTR1:%.*]], align 4 ; SSE-NEXT: [[LOADVEC2:%.*]] = load <2 x double>, <2 x double>* [[VECPTR2:%.*]], align 4 ; SSE-NEXT: [[EXTRA0:%.*]] = extractelement <2 x double> [[LOADVEC]], i32 0 @@ -656,7 +638,6 @@ ; SSE-NEXT: [[EXTRB0:%.*]] = extractelement <2 x double> [[LOADVEC3]], i32 0 ; SSE-NEXT: [[EXTRB1:%.*]] = extractelement <2 x double> [[LOADVEC4]], i32 1 ; SSE-NEXT: [[SIDX0:%.*]] = getelementptr inbounds double, double* [[STOREARRAY:%.*]], i64 0 -; SSE-NEXT: [[SIDX1:%.*]] = getelementptr inbounds double, double* [[STOREARRAY]], i64 1 ; SSE-NEXT: [[TMP1:%.*]] = bitcast double* [[IDX0]] to <2 x double>* ; SSE-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 4 ; SSE-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[EXTRA1]], i32 0 @@ -685,7 +666,6 @@ ; AVX-NEXT: [[EXTRB0:%.*]] = extractelement <2 x double> [[LOADVEC3]], i32 0 ; AVX-NEXT: [[EXTRB1:%.*]] = extractelement <2 x double> [[LOADVEC4]], i32 1 ; AVX-NEXT: [[SIDX0:%.*]] = getelementptr inbounds double, double* [[STOREARRAY:%.*]], i64 0 -; AVX-NEXT: [[SIDX1:%.*]] = getelementptr inbounds double, double* [[STOREARRAY]], i64 1 ; AVX-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[EXTRA0]], i32 0 ; AVX-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[EXTRA1]], i32 1 ; AVX-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[LOADA0]], i32 0 @@ -735,9 +715,7 @@ ; SSE-LABEL: @splat_loads( ; SSE-NEXT: entry: ; SSE-NEXT: [[GEP_1_0:%.*]] = getelementptr inbounds double, double* [[ARRAY1:%.*]], i64 0 -; SSE-NEXT: [[GEP_1_1:%.*]] = getelementptr inbounds double, double* [[ARRAY1]], i64 1 ; SSE-NEXT: [[GEP_2_0:%.*]] = getelementptr inbounds double, double* [[ARRAY2:%.*]], i64 0 -; SSE-NEXT: [[GEP_2_1:%.*]] = getelementptr inbounds double, double* [[ARRAY2]], i64 1 ; SSE-NEXT: [[TMP0:%.*]] = bitcast double* [[GEP_1_0]] to <2 x double>* ; SSE-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 ; SSE-NEXT: [[TMP2:%.*]] = bitcast double* [[GEP_2_0]] to <2 x double>* @@ -758,7 +736,6 @@ ; AVX-LABEL: @splat_loads( ; AVX-NEXT: entry: ; AVX-NEXT: [[GEP_1_0:%.*]] = getelementptr inbounds double, double* [[ARRAY1:%.*]], i64 0 -; AVX-NEXT: [[GEP_1_1:%.*]] = getelementptr inbounds double, double* [[ARRAY1]], i64 1 ; AVX-NEXT: [[GEP_2_0:%.*]] = getelementptr inbounds double, double* [[ARRAY2:%.*]], i64 0 ; AVX-NEXT: [[GEP_2_1:%.*]] = getelementptr inbounds double, double* [[ARRAY2]], i64 1 ; AVX-NEXT: [[LD_2_0:%.*]] = load double, double* [[GEP_2_0]], align 8 @@ -807,9 +784,7 @@ ; CHECK-LABEL: @splat_loads_with_internal_uses( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[GEP_1_0:%.*]] = getelementptr inbounds double, double* [[ARRAY1:%.*]], i64 0 -; CHECK-NEXT: [[GEP_1_1:%.*]] = getelementptr inbounds double, double* [[ARRAY1]], i64 1 ; CHECK-NEXT: [[GEP_2_0:%.*]] = getelementptr inbounds double, double* [[ARRAY2:%.*]], i64 0 -; CHECK-NEXT: [[GEP_2_1:%.*]] = getelementptr inbounds double, double* [[ARRAY2]], i64 1 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[GEP_1_0]] to <2 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[GEP_2_0]] to <2 x double>* diff --git a/llvm/test/Transforms/SLPVectorizer/X86/loopinvariant.ll b/llvm/test/Transforms/SLPVectorizer/X86/loopinvariant.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/loopinvariant.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/loopinvariant.ll @@ -12,30 +12,16 @@ ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP0:%.*]] = or i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[INDVARS_IV]], 2 -; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[INDVARS_IV]], 3 -; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[INDVARS_IV]], 4 -; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[INDVARS_IV]], 5 -; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP5:%.*]] = or i64 [[INDVARS_IV]], 6 -; CHECK-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP6:%.*]] = or i64 [[INDVARS_IV]], 7 -; CHECK-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[ARRAYIDX]] to <8 x i32>* -; CHECK-NEXT: [[TMP8:%.*]] = load <8 x i32>, <8 x i32>* [[TMP7]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> poison, i32 [[N]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP9]], <8 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = add nsw <8 x i32> [[TMP8]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[ARRAYIDX]] to <8 x i32>* -; CHECK-NEXT: store <8 x i32> [[TMP10]], <8 x i32>* [[TMP11]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARRAYIDX]] to <8 x i32>* +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[N]], i32 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = add nsw <8 x i32> [[TMP1]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[ARRAYIDX]] to <8 x i32>* +; CHECK-NEXT: store <8 x i32> [[TMP3]], <8 x i32>* [[TMP4]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 8 -; CHECK-NEXT: [[TMP12:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP12]], [[N]] +; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP5]], [[N]] ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]] ; CHECK: for.end: ; CHECK-NEXT: ret i32 undef diff --git a/llvm/test/Transforms/SLPVectorizer/X86/memory-runtime-checks.ll b/llvm/test/Transforms/SLPVectorizer/X86/memory-runtime-checks.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/memory-runtime-checks.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/memory-runtime-checks.ll @@ -191,10 +191,7 @@ ; CHECK-NEXT: bb: ; CHECK-NEXT: br i1 [[C_1:%.*]], label [[BB16:%.*]], label [[BB6:%.*]] ; CHECK: bb6: -; CHECK-NEXT: [[TMP:%.*]] = getelementptr inbounds float, float* [[ARG1:%.*]], i32 4 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[ARG1]], i32 5 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[ARG1]], i32 3 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[ARG1]], i32 6 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[ARG1:%.*]], i32 3 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> , float [[ARG2:%.*]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[ARG:%.*]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/opaque-ptr.ll b/llvm/test/Transforms/SLPVectorizer/X86/opaque-ptr.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/opaque-ptr.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/opaque-ptr.ll @@ -4,13 +4,7 @@ define void @test(ptr %r, ptr %p, ptr %q) #0 { ; CHECK-LABEL: @test( ; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 0 -; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 1 -; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 2 -; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 3 ; CHECK-NEXT: [[Q0:%.*]] = getelementptr inbounds i64, ptr [[Q:%.*]], i64 0 -; CHECK-NEXT: [[Q1:%.*]] = getelementptr inbounds i64, ptr [[Q]], i64 1 -; CHECK-NEXT: [[Q2:%.*]] = getelementptr inbounds i64, ptr [[Q]], i64 2 -; CHECK-NEXT: [[Q3:%.*]] = getelementptr inbounds i64, ptr [[Q]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr [[P0]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr [[Q0]], align 2 ; CHECK-NEXT: [[TMP3:%.*]] = sub nsw <4 x i64> [[TMP1]], [[TMP2]] @@ -61,7 +55,6 @@ ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[A:%.*]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[B:%.*]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, <2 x ptr> [[TMP2]], <2 x i64> -; CHECK-NEXT: [[A2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 2 ; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint <2 x ptr> [[TMP3]] to <2 x i64> ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x ptr> [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/powof2div.ll b/llvm/test/Transforms/SLPVectorizer/X86/powof2div.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/powof2div.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/powof2div.ll @@ -6,22 +6,13 @@ define void @powof2div_uniform(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* noalias nocapture readonly %c){ ; CHECK-LABEL: @powof2div_uniform( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2 -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 2 -; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2 -; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3 -; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 3 -; CHECK-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to <4 x i32>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[C]] to <4 x i32>* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[C:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP1]] ; CHECK-NEXT: [[TMP5:%.*]] = sdiv <4 x i32> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[A]] to <4 x i32>* +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4 ; CHECK-NEXT: ret void ; @@ -94,22 +85,13 @@ ; ; AVX-LABEL: @powof2div_nonuniform( ; AVX-NEXT: entry: -; AVX-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1 -; AVX-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 1 -; AVX-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 1 -; AVX-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2 -; AVX-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 2 -; AVX-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2 -; AVX-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3 -; AVX-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 3 -; AVX-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3 -; AVX-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to <4 x i32>* +; AVX-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* ; AVX-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 -; AVX-NEXT: [[TMP2:%.*]] = bitcast i32* [[C]] to <4 x i32>* +; AVX-NEXT: [[TMP2:%.*]] = bitcast i32* [[C:%.*]] to <4 x i32>* ; AVX-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4 ; AVX-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP1]] ; AVX-NEXT: [[TMP5:%.*]] = sdiv <4 x i32> [[TMP4]], -; AVX-NEXT: [[TMP6:%.*]] = bitcast i32* [[A]] to <4 x i32>* +; AVX-NEXT: [[TMP6:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* ; AVX-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4 ; AVX-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/powof2mul.ll b/llvm/test/Transforms/SLPVectorizer/X86/powof2mul.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/powof2mul.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/powof2mul.ll @@ -6,22 +6,13 @@ define void @powof2mul_uniform(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* noalias nocapture readonly %c){ ; CHECK-LABEL: @powof2mul_uniform( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2 -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 2 -; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2 -; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3 -; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 3 -; CHECK-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to <4 x i32>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[C]] to <4 x i32>* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[C:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP1]] ; CHECK-NEXT: [[TMP5:%.*]] = mul <4 x i32> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[A]] to <4 x i32>* +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4 ; CHECK-NEXT: ret void ; @@ -61,22 +52,13 @@ define void @negpowof2mul_uniform(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* noalias nocapture readonly %c){ ; CHECK-LABEL: @negpowof2mul_uniform( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2 -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 2 -; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2 -; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3 -; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 3 -; CHECK-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to <4 x i32>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[C]] to <4 x i32>* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[C:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP1]] ; CHECK-NEXT: [[TMP5:%.*]] = mul <4 x i32> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[A]] to <4 x i32>* +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4 ; CHECK-NEXT: ret void ; @@ -116,22 +98,13 @@ define void @powof2mul_nonuniform(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* noalias nocapture readonly %c){ ; CHECK-LABEL: @powof2mul_nonuniform( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2 -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 2 -; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2 -; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3 -; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 3 -; CHECK-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to <4 x i32>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[C]] to <4 x i32>* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[C:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP1]] ; CHECK-NEXT: [[TMP5:%.*]] = mul <4 x i32> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[A]] to <4 x i32>* +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4 ; CHECK-NEXT: ret void ; @@ -171,22 +144,13 @@ define void @negpowof2mul_nonuniform(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* noalias nocapture readonly %c){ ; CHECK-LABEL: @negpowof2mul_nonuniform( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2 -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 2 -; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2 -; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3 -; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 3 -; CHECK-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to <4 x i32>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[C]] to <4 x i32>* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[C:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP1]] ; CHECK-NEXT: [[TMP5:%.*]] = mul <4 x i32> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[A]] to <4 x i32>* +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4 ; CHECK-NEXT: ret void ; @@ -269,13 +233,7 @@ ; ; AVX-LABEL: @PR51436( ; AVX-NEXT: entry: -; AVX-NEXT: [[GEP1:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 1 -; AVX-NEXT: [[GEP2:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 2 -; AVX-NEXT: [[GEP3:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 3 -; AVX-NEXT: [[GEP4:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 4 -; AVX-NEXT: [[GEP5:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 5 -; AVX-NEXT: [[GEP6:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 6 -; AVX-NEXT: [[GEP7:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 7 +; AVX-NEXT: [[GEP4:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 4 ; AVX-NEXT: [[TMP0:%.*]] = bitcast i64* [[A]] to <4 x i64>* ; AVX-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* [[TMP0]], align 8 ; AVX-NEXT: [[TMP2:%.*]] = mul <4 x i64> [[TMP1]], diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr19657.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr19657.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/pr19657.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr19657.ll @@ -7,33 +7,28 @@ define void @store_chains(double* %x) { ; AVX-LABEL: @store_chains( -; AVX-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, double* [[X:%.*]], i64 1 -; AVX-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, double* [[X]], i64 2 -; AVX-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, double* [[X]], i64 3 -; AVX-NEXT: [[TMP4:%.*]] = bitcast double* [[X]] to <4 x double>* -; AVX-NEXT: [[TMP5:%.*]] = load <4 x double>, <4 x double>* [[TMP4]], align 8 -; AVX-NEXT: [[TMP6:%.*]] = fadd <4 x double> [[TMP5]], [[TMP5]] -; AVX-NEXT: [[TMP7:%.*]] = fadd <4 x double> [[TMP6]], [[TMP5]] -; AVX-NEXT: [[TMP8:%.*]] = bitcast double* [[X]] to <4 x double>* -; AVX-NEXT: store <4 x double> [[TMP7]], <4 x double>* [[TMP8]], align 8 +; AVX-NEXT: [[TMP1:%.*]] = bitcast double* [[X:%.*]] to <4 x double>* +; AVX-NEXT: [[TMP2:%.*]] = load <4 x double>, <4 x double>* [[TMP1]], align 8 +; AVX-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP2]], [[TMP2]] +; AVX-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP3]], [[TMP2]] +; AVX-NEXT: [[TMP5:%.*]] = bitcast double* [[X]] to <4 x double>* +; AVX-NEXT: store <4 x double> [[TMP4]], <4 x double>* [[TMP5]], align 8 ; AVX-NEXT: ret void ; ; MAX128-LABEL: @store_chains( -; MAX128-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, double* [[X:%.*]], i64 1 -; MAX128-NEXT: [[TMP2:%.*]] = bitcast double* [[X]] to <2 x double>* -; MAX128-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8 -; MAX128-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP3]], [[TMP3]] -; MAX128-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP4]], [[TMP3]] -; MAX128-NEXT: [[TMP6:%.*]] = bitcast double* [[X]] to <2 x double>* -; MAX128-NEXT: store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8 -; MAX128-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, double* [[X]], i64 2 -; MAX128-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, double* [[X]], i64 3 -; MAX128-NEXT: [[TMP9:%.*]] = bitcast double* [[TMP7]] to <2 x double>* -; MAX128-NEXT: [[TMP10:%.*]] = load <2 x double>, <2 x double>* [[TMP9]], align 8 -; MAX128-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP10]], [[TMP10]] -; MAX128-NEXT: [[TMP12:%.*]] = fadd <2 x double> [[TMP11]], [[TMP10]] -; MAX128-NEXT: [[TMP13:%.*]] = bitcast double* [[TMP7]] to <2 x double>* -; MAX128-NEXT: store <2 x double> [[TMP12]], <2 x double>* [[TMP13]], align 8 +; MAX128-NEXT: [[TMP1:%.*]] = bitcast double* [[X:%.*]] to <2 x double>* +; MAX128-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8 +; MAX128-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], [[TMP2]] +; MAX128-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP3]], [[TMP2]] +; MAX128-NEXT: [[TMP5:%.*]] = bitcast double* [[X]] to <2 x double>* +; MAX128-NEXT: store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8 +; MAX128-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, double* [[X]], i64 2 +; MAX128-NEXT: [[TMP7:%.*]] = bitcast double* [[TMP6]] to <2 x double>* +; MAX128-NEXT: [[TMP8:%.*]] = load <2 x double>, <2 x double>* [[TMP7]], align 8 +; MAX128-NEXT: [[TMP9:%.*]] = fadd <2 x double> [[TMP8]], [[TMP8]] +; MAX128-NEXT: [[TMP10:%.*]] = fadd <2 x double> [[TMP9]], [[TMP8]] +; MAX128-NEXT: [[TMP11:%.*]] = bitcast double* [[TMP6]] to <2 x double>* +; MAX128-NEXT: store <2 x double> [[TMP10]], <2 x double>* [[TMP11]], align 8 ; MAX128-NEXT: ret void ; %1 = load double, double* %x, align 8 @@ -60,21 +55,19 @@ define void @store_chains_prefer_width_attr(double* %x) #0 { ; ANY-LABEL: @store_chains_prefer_width_attr( -; ANY-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, double* [[X:%.*]], i64 1 -; ANY-NEXT: [[TMP2:%.*]] = bitcast double* [[X]] to <2 x double>* -; ANY-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8 -; ANY-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP3]], [[TMP3]] -; ANY-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP4]], [[TMP3]] -; ANY-NEXT: [[TMP6:%.*]] = bitcast double* [[X]] to <2 x double>* -; ANY-NEXT: store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8 -; ANY-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, double* [[X]], i64 2 -; ANY-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, double* [[X]], i64 3 -; ANY-NEXT: [[TMP9:%.*]] = bitcast double* [[TMP7]] to <2 x double>* -; ANY-NEXT: [[TMP10:%.*]] = load <2 x double>, <2 x double>* [[TMP9]], align 8 -; ANY-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP10]], [[TMP10]] -; ANY-NEXT: [[TMP12:%.*]] = fadd <2 x double> [[TMP11]], [[TMP10]] -; ANY-NEXT: [[TMP13:%.*]] = bitcast double* [[TMP7]] to <2 x double>* -; ANY-NEXT: store <2 x double> [[TMP12]], <2 x double>* [[TMP13]], align 8 +; ANY-NEXT: [[TMP1:%.*]] = bitcast double* [[X:%.*]] to <2 x double>* +; ANY-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8 +; ANY-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], [[TMP2]] +; ANY-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP3]], [[TMP2]] +; ANY-NEXT: [[TMP5:%.*]] = bitcast double* [[X]] to <2 x double>* +; ANY-NEXT: store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8 +; ANY-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, double* [[X]], i64 2 +; ANY-NEXT: [[TMP7:%.*]] = bitcast double* [[TMP6]] to <2 x double>* +; ANY-NEXT: [[TMP8:%.*]] = load <2 x double>, <2 x double>* [[TMP7]], align 8 +; ANY-NEXT: [[TMP9:%.*]] = fadd <2 x double> [[TMP8]], [[TMP8]] +; ANY-NEXT: [[TMP10:%.*]] = fadd <2 x double> [[TMP9]], [[TMP8]] +; ANY-NEXT: [[TMP11:%.*]] = bitcast double* [[TMP6]] to <2 x double>* +; ANY-NEXT: store <2 x double> [[TMP10]], <2 x double>* [[TMP11]], align 8 ; ANY-NEXT: ret void ; %1 = load double, double* %x, align 8 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr23510.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr23510.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/pr23510.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr23510.ll @@ -10,8 +10,7 @@ define void @_Z3fooPml(i64* nocapture %a, i64 %i) { ; CHECK-LABEL: @_Z3fooPml( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 1 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[A]] to <2 x i64>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[A]] to <2 x i64>* diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr27163.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr27163.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/pr27163.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr27163.ll @@ -9,7 +9,6 @@ ; CHECK-LABEL: @test1( ; CHECK-NEXT: invoke.cont: ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [[STRUCT_B:%.*]], %struct.B* [[P:%.*]], i64 0, i32 0 -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds [[STRUCT_B]], %struct.B* [[P]], i64 0, i32 1 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[GEP1]] to <2 x i64>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll @@ -34,10 +34,8 @@ ; AVX-NEXT: [[OR_1:%.*]] = or i64 undef, 1 ; AVX-NEXT: store i64 [[OR_1]], i64* undef, align 8 ; AVX-NEXT: [[FOO_1:%.*]] = getelementptr inbounds [[CLASS_1:%.*]], %class.1* undef, i64 0, i32 0, i32 0, i32 0, i32 0, i64 0 -; AVX-NEXT: [[FOO_2:%.*]] = getelementptr inbounds [[CLASS_1]], %class.1* undef, i64 0, i32 0, i32 0, i32 0, i32 0, i64 1 ; AVX-NEXT: [[BAR5:%.*]] = load i64, i64* undef, align 8 ; AVX-NEXT: [[BAR3:%.*]] = getelementptr inbounds [[CLASS_2:%.*]], %class.2* undef, i64 0, i32 0, i32 0, i32 0, i64 0 -; AVX-NEXT: [[BAR4:%.*]] = getelementptr inbounds [[CLASS_2]], %class.2* undef, i64 0, i32 0, i32 0, i32 0, i64 1 ; AVX-NEXT: [[TMP0:%.*]] = bitcast i64* [[FOO_1]] to <2 x i64>* ; AVX-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8 ; AVX-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[OR_1]], i32 0 @@ -71,9 +69,7 @@ ; SSE-NEXT: [[TMP0:%.*]] = load i64, i64* undef, align 1 ; SSE-NEXT: [[ADD:%.*]] = add i64 undef, undef ; SSE-NEXT: store i64 [[ADD]], i64* undef, align 1 -; SSE-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds [0 x i64], [0 x i64]* undef, i64 0, i64 5 ; SSE-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds [0 x i64], [0 x i64]* undef, i64 0, i64 4 -; SSE-NEXT: [[ARRAYIDX2_5:%.*]] = getelementptr inbounds [0 x i64], [0 x i64]* undef, i64 0, i64 1 ; SSE-NEXT: [[ARRAYIDX2_6:%.*]] = getelementptr inbounds [0 x i64], [0 x i64]* undef, i64 0, i64 0 ; SSE-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> , i64 [[TMP0]], i32 1 ; SSE-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[TMP1]], @@ -97,9 +93,7 @@ ; AVX-NEXT: [[TMP0:%.*]] = load i64, i64* undef, align 1 ; AVX-NEXT: [[ADD:%.*]] = add i64 undef, undef ; AVX-NEXT: store i64 [[ADD]], i64* undef, align 1 -; AVX-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds [0 x i64], [0 x i64]* undef, i64 0, i64 5 ; AVX-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds [0 x i64], [0 x i64]* undef, i64 0, i64 4 -; AVX-NEXT: [[ARRAYIDX2_5:%.*]] = getelementptr inbounds [0 x i64], [0 x i64]* undef, i64 0, i64 1 ; AVX-NEXT: [[ARRAYIDX2_6:%.*]] = getelementptr inbounds [0 x i64], [0 x i64]* undef, i64 0, i64 0 ; AVX-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> , i64 [[TMP0]], i32 1 ; AVX-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[TMP1]], diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr42022-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr42022-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/pr42022-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr42022-inseltpoison.ll @@ -7,9 +7,6 @@ define { <2 x float>, <2 x float> } @StructOfVectors(float *%Ptr) { ; CHECK-LABEL: @StructOfVectors( ; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds float, float* [[PTR:%.*]], i64 0 -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 1 -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 2 -; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP0]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], @@ -55,9 +52,6 @@ define [2 x %StructTy] @ArrayOfStruct(float *%Ptr) { ; CHECK-LABEL: @ArrayOfStruct( ; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds float, float* [[PTR:%.*]], i64 0 -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 1 -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 2 -; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP0]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], @@ -101,9 +95,6 @@ define {%StructTy, %StructTy} @StructOfStruct(float *%Ptr) { ; CHECK-LABEL: @StructOfStruct( ; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds float, float* [[PTR:%.*]], i64 0 -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 1 -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 2 -; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP0]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], @@ -194,13 +185,6 @@ define {%Struct2Ty, %Struct2Ty} @StructOfStructOfStruct(i16 *%Ptr) { ; CHECK-LABEL: @StructOfStructOfStruct( ; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i64 0 -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 1 -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 2 -; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 3 -; CHECK-NEXT: [[GEP4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 4 -; CHECK-NEXT: [[GEP5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 5 -; CHECK-NEXT: [[GEP6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 6 -; CHECK-NEXT: [[GEP7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 7 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[GEP0]] to <8 x i16>* ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2 ; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i16> [[TMP2]], diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll @@ -7,9 +7,6 @@ define { <2 x float>, <2 x float> } @StructOfVectors(float *%Ptr) { ; CHECK-LABEL: @StructOfVectors( ; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds float, float* [[PTR:%.*]], i64 0 -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 1 -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 2 -; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP0]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], @@ -55,9 +52,6 @@ define [2 x %StructTy] @ArrayOfStruct(float *%Ptr) { ; CHECK-LABEL: @ArrayOfStruct( ; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds float, float* [[PTR:%.*]], i64 0 -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 1 -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 2 -; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP0]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], @@ -101,9 +95,6 @@ define {%StructTy, %StructTy} @StructOfStruct(float *%Ptr) { ; CHECK-LABEL: @StructOfStruct( ; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds float, float* [[PTR:%.*]], i64 0 -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 1 -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 2 -; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP0]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], @@ -194,13 +185,6 @@ define {%Struct2Ty, %Struct2Ty} @StructOfStructOfStruct(i16 *%Ptr) { ; CHECK-LABEL: @StructOfStructOfStruct( ; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i64 0 -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 1 -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 2 -; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 3 -; CHECK-NEXT: [[GEP4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 4 -; CHECK-NEXT: [[GEP5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 5 -; CHECK-NEXT: [[GEP6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 6 -; CHECK-NEXT: [[GEP7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 7 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[GEP0]] to <8 x i16>* ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2 ; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i16> [[TMP2]], diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr44067-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr44067-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/pr44067-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr44067-inseltpoison.ll @@ -30,13 +30,6 @@ define {%Struct2Ty, %Struct2Ty} @StructOfStructOfStruct(i16 *%Ptr) { ; CHECK-LABEL: @StructOfStructOfStruct( ; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i64 0 -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 1 -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 2 -; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 3 -; CHECK-NEXT: [[GEP4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 4 -; CHECK-NEXT: [[GEP5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 5 -; CHECK-NEXT: [[GEP6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 6 -; CHECK-NEXT: [[GEP7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 7 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[GEP0]] to <8 x i16>* ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2 ; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i16> [[TMP2]], diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr44067.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr44067.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/pr44067.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr44067.ll @@ -30,13 +30,6 @@ define {%Struct2Ty, %Struct2Ty} @StructOfStructOfStruct(i16 *%Ptr) { ; CHECK-LABEL: @StructOfStructOfStruct( ; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i64 0 -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 1 -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 2 -; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 3 -; CHECK-NEXT: [[GEP4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 4 -; CHECK-NEXT: [[GEP5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 5 -; CHECK-NEXT: [[GEP6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 6 -; CHECK-NEXT: [[GEP7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 7 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[GEP0]] to <8 x i16>* ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2 ; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i16> [[TMP2]], diff --git a/llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll b/llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll @@ -11,9 +11,6 @@ define void @exact(i32* %x) { ; CHECK-LABEL: @exact( ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0 -; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1 -; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2 -; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = lshr exact <4 x i32> [[TMP2]], @@ -47,9 +44,6 @@ define void @not_exact(i32* %x) { ; CHECK-LABEL: @not_exact( ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0 -; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1 -; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2 -; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i32> [[TMP2]], @@ -83,9 +77,6 @@ define void @nsw(i32* %x) { ; CHECK-LABEL: @nsw( ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0 -; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1 -; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2 -; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], @@ -119,9 +110,6 @@ define void @not_nsw(i32* %x) { ; CHECK-LABEL: @not_nsw( ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0 -; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1 -; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2 -; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], @@ -155,9 +143,6 @@ define void @nuw(i32* %x) { ; CHECK-LABEL: @nuw( ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0 -; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1 -; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2 -; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = add nuw <4 x i32> [[TMP2]], @@ -191,9 +176,6 @@ define void @not_nuw(i32* %x) { ; CHECK-LABEL: @not_nuw( ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0 -; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1 -; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2 -; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], @@ -227,9 +209,6 @@ define void @not_nsw_but_nuw(i32* %x) { ; CHECK-LABEL: @not_nsw_but_nuw( ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0 -; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1 -; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2 -; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = add nuw <4 x i32> [[TMP2]], @@ -263,9 +242,6 @@ define void @nnan(float* %x) { ; CHECK-LABEL: @nnan( ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 0 -; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds float, float* [[X]], i64 1 -; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 -; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[IDX1]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = fadd nnan <4 x float> [[TMP2]], @@ -299,9 +275,6 @@ define void @not_nnan(float* %x) { ; CHECK-LABEL: @not_nnan( ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 0 -; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds float, float* [[X]], i64 1 -; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 -; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[IDX1]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP2]], @@ -335,9 +308,6 @@ define void @only_fast(float* %x) { ; CHECK-LABEL: @only_fast( ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 0 -; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds float, float* [[X]], i64 1 -; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 -; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[IDX1]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], @@ -371,9 +341,6 @@ define void @only_arcp(float* %x) { ; CHECK-LABEL: @only_arcp( ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 0 -; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds float, float* [[X]], i64 1 -; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 -; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[IDX1]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = fadd arcp <4 x float> [[TMP2]], @@ -407,9 +374,6 @@ define void @addsub_all_nsw(i32* %x) { ; CHECK-LABEL: @addsub_all_nsw( ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0 -; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1 -; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2 -; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], @@ -445,9 +409,6 @@ define void @addsub_some_nsw(i32* %x) { ; CHECK-LABEL: @addsub_some_nsw( ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0 -; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1 -; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2 -; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], @@ -483,9 +444,6 @@ define void @addsub_no_nsw(i32* %x) { ; CHECK-LABEL: @addsub_no_nsw( ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0 -; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1 -; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2 -; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], @@ -521,7 +479,6 @@ define void @fcmp_fast(double* %x) #1 { ; CHECK-LABEL: @fcmp_fast( ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds double, double* [[X:%.*]], i64 0 -; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds double, double* [[X]], i64 1 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[IDX1]] to <2 x double>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast oge <2 x double> [[TMP2]], zeroinitializer @@ -555,7 +512,6 @@ define void @fcmp_fast_unary_fneg(double* %x) #1 { ; CHECK-LABEL: @fcmp_fast_unary_fneg( ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds double, double* [[X:%.*]], i64 0 -; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds double, double* [[X]], i64 1 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[IDX1]] to <2 x double>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast oge <2 x double> [[TMP2]], zeroinitializer @@ -589,7 +545,6 @@ define void @fcmp_no_fast(double* %x) #1 { ; CHECK-LABEL: @fcmp_no_fast( ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds double, double* [[X:%.*]], i64 0 -; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds double, double* [[X]], i64 1 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[IDX1]] to <2 x double>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp oge <2 x double> [[TMP2]], zeroinitializer @@ -623,7 +578,6 @@ define void @fcmp_no_fast_unary_fneg(double* %x) #1 { ; CHECK-LABEL: @fcmp_no_fast_unary_fneg( ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds double, double* [[X:%.*]], i64 0 -; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds double, double* [[X]], i64 1 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[IDX1]] to <2 x double>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp oge <2 x double> [[TMP2]], zeroinitializer @@ -659,7 +613,6 @@ define void @call_fast(double* %x) { ; CHECK-LABEL: @call_fast( ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds double, double* [[X:%.*]], i64 0 -; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds double, double* [[X]], i64 1 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[IDX1]] to <2 x double>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = call fast <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP2]]) @@ -685,7 +638,6 @@ define void @call_no_fast(double* %x) { ; CHECK-LABEL: @call_no_fast( ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds double, double* [[X:%.*]], i64 0 -; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds double, double* [[X]], i64 1 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[IDX1]] to <2 x double>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP2]]) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction_loads.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction_loads.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction_loads.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction_loads.ll @@ -22,17 +22,10 @@ define i32 @test(i32* nocapture readonly %p) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 2 -; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 3 -; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 4 -; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 5 -; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 6 -; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to <8 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i32> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]]) @@ -102,26 +95,12 @@ define i32 @test2(i32* nocapture readonly %p, i32* nocapture readonly %q) { ; CHECK-LABEL: @test2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX_P_1:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX_P_2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 2 -; CHECK-NEXT: [[ARRAYIDX_P_3:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 3 -; CHECK-NEXT: [[ARRAYIDX_P_4:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 4 -; CHECK-NEXT: [[ARRAYIDX_P_5:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 5 -; CHECK-NEXT: [[ARRAYIDX_P_6:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 6 -; CHECK-NEXT: [[ARRAYIDX_P_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 -; CHECK-NEXT: [[ARRAYIDX_Q_1:%.*]] = getelementptr inbounds i32, i32* [[Q:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX_Q_2:%.*]] = getelementptr inbounds i32, i32* [[Q]], i64 2 -; CHECK-NEXT: [[ARRAYIDX_Q_3:%.*]] = getelementptr inbounds i32, i32* [[Q]], i64 3 -; CHECK-NEXT: [[ARRAYIDX_Q_4:%.*]] = getelementptr inbounds i32, i32* [[Q]], i64 4 -; CHECK-NEXT: [[ARRAYIDX_Q_5:%.*]] = getelementptr inbounds i32, i32* [[Q]], i64 5 -; CHECK-NEXT: [[ARRAYIDX_Q_6:%.*]] = getelementptr inbounds i32, i32* [[Q]], i64 6 -; CHECK-NEXT: [[ARRAYIDX_Q_7:%.*]] = getelementptr inbounds i32, i32* [[Q]], i64 7 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to <8 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[Q]] to <8 x i32>* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[Q:%.*]] to <8 x i32>* ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = mul <8 x i32> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP4]]) @@ -207,27 +186,13 @@ define i32 @test3(i32* nocapture readonly %p, i32* nocapture readonly %q) { ; CHECK-LABEL: @test3( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX_P_1:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX_P_2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 2 -; CHECK-NEXT: [[ARRAYIDX_P_3:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 3 -; CHECK-NEXT: [[ARRAYIDX_P_4:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 4 -; CHECK-NEXT: [[ARRAYIDX_P_5:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 5 -; CHECK-NEXT: [[ARRAYIDX_P_6:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 6 -; CHECK-NEXT: [[ARRAYIDX_P_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 -; CHECK-NEXT: [[ARRAYIDX_Q_1:%.*]] = getelementptr inbounds i32, i32* [[Q:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX_Q_2:%.*]] = getelementptr inbounds i32, i32* [[Q]], i64 2 -; CHECK-NEXT: [[ARRAYIDX_Q_3:%.*]] = getelementptr inbounds i32, i32* [[Q]], i64 3 -; CHECK-NEXT: [[ARRAYIDX_Q_4:%.*]] = getelementptr inbounds i32, i32* [[Q]], i64 4 -; CHECK-NEXT: [[ARRAYIDX_Q_5:%.*]] = getelementptr inbounds i32, i32* [[Q]], i64 5 -; CHECK-NEXT: [[ARRAYIDX_Q_6:%.*]] = getelementptr inbounds i32, i32* [[Q]], i64 6 -; CHECK-NEXT: [[ARRAYIDX_Q_7:%.*]] = getelementptr inbounds i32, i32* [[Q]], i64 7 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to <8 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[Q]] to <8 x i32>* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[Q:%.*]] to <8 x i32>* ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = mul <8 x i32> [[SHUFFLE]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP4]]) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll @@ -17,14 +17,7 @@ define i32 @test_add(i32* nocapture readonly %p) { ; CHECK-LABEL: @test_add( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 2 -; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 3 -; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 4 -; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 5 -; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 6 -; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to <8 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP1]]) ; CHECK-NEXT: ret i32 [[TMP2]] @@ -65,14 +58,7 @@ define i32 @test_mul(i32* nocapture readonly %p) { ; AVX-LABEL: @test_mul( ; AVX-NEXT: entry: -; AVX-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 -; AVX-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 2 -; AVX-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 3 -; AVX-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 4 -; AVX-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 5 -; AVX-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 6 -; AVX-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 -; AVX-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>* +; AVX-NEXT: [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to <8 x i32>* ; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 ; AVX-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[TMP1]]) ; AVX-NEXT: ret i32 [[TMP2]] @@ -139,14 +125,7 @@ define i32 @test_and(i32* nocapture readonly %p) { ; CHECK-LABEL: @test_and( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 2 -; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 3 -; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 4 -; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 5 -; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 6 -; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to <8 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP1]]) ; CHECK-NEXT: ret i32 [[TMP2]] @@ -187,14 +166,7 @@ define i32 @test_or(i32* nocapture readonly %p) { ; CHECK-LABEL: @test_or( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 2 -; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 3 -; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 4 -; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 5 -; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 6 -; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to <8 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> [[TMP1]]) ; CHECK-NEXT: ret i32 [[TMP2]] @@ -235,14 +207,7 @@ define i32 @test_xor(i32* nocapture readonly %p) { ; CHECK-LABEL: @test_xor( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 2 -; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 3 -; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 4 -; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 5 -; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 6 -; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to <8 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> [[TMP1]]) ; CHECK-NEXT: ret i32 [[TMP2]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll @@ -9,13 +9,6 @@ ; CHECK-NEXT: br label [[T:%.*]] ; CHECK: t: ; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i64 0 -; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 1 -; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 2 -; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 3 -; CHECK-NEXT: [[P4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 4 -; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 5 -; CHECK-NEXT: [[P6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 6 -; CHECK-NEXT: [[P7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 7 ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <8 x i16> [[LD]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i32 0 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_horcost.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_horcost.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/remark_horcost.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_horcost.ll @@ -16,30 +16,15 @@ ; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[TMP1]], 4 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[DIFF]], i64 [[TMP2]] ; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 [[INDVARS_IV]], i64 0 -; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[TMP1]], 1 -; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[DIFF]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP1]], 5 -; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, i32* [[DIFF]], i64 [[TMP4]] -; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 [[INDVARS_IV]], i64 1 -; CHECK-NEXT: [[TMP5:%.*]] = or i64 [[TMP1]], 2 -; CHECK-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds i32, i32* [[DIFF]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP1]], 6 -; CHECK-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds i32, i32* [[DIFF]], i64 [[TMP6]] -; CHECK-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 [[INDVARS_IV]], i64 2 -; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP1]], 3 -; CHECK-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds i32, i32* [[DIFF]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP1]], 7 -; CHECK-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds i32, i32* [[DIFF]], i64 [[TMP8]] -; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 [[INDVARS_IV]], i64 3 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>* -; CHECK-NEXT: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[ARRAYIDX2]] to <4 x i32>* -; CHECK-NEXT: [[TMP12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP11]], align 4 -; CHECK-NEXT: [[TMP13:%.*]] = add nsw <4 x i32> [[TMP12]], [[TMP10]] -; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32* [[ARRAYIDX6]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP13]], <4 x i32>* [[TMP14]], align 16 -; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP13]]) -; CHECK-NEXT: [[OP_EXTRA]] = add nsw i32 [[TMP15]], [[A_088]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>* +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[ARRAYIDX2]] to <4 x i32>* +; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP5]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[TMP6]], [[TMP4]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[ARRAYIDX6]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 16 +; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP7]]) +; CHECK-NEXT: [[OP_EXTRA]] = add nsw i32 [[TMP9]], [[A_088]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 8 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll @@ -4,36 +4,30 @@ define void @test() { ; CHECK-LABEL: @test( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* undef, i64 4 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* undef, i64 5 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* undef, i64 6 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* undef, i64 7 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* undef, i64 0, i64 1, i64 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* undef, i64 0, i64 1, i64 2 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* undef, i64 0, i64 1, i64 1 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* undef, i64 0, i64 1, i64 3 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP1]] to <4 x i8>* -; CHECK-NEXT: [[TMP10:%.*]] = load <4 x i8>, <4 x i8>* [[TMP9]], align 1 -; CHECK-NEXT: [[TMP11:%.*]] = zext <4 x i8> [[TMP10]] to <4 x i32> -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = sub nsw <4 x i32> zeroinitializer, [[SHUFFLE]] -; CHECK-NEXT: [[TMP13:%.*]] = shl nsw <4 x i32> [[TMP12]], zeroinitializer -; CHECK-NEXT: [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP14]], i32 1 -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> poison, i32 [[TMP15]], i32 0 -; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[TMP14]], i32 0 -; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i32> [[TMP16]], i32 [[TMP17]], i32 1 -; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i32> [[TMP14]], i32 3 -; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP19]], i32 2 -; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i32> [[TMP14]], i32 2 -; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i32> [[TMP20]], i32 [[TMP21]], i32 3 -; CHECK-NEXT: [[TMP23:%.*]] = add nsw <4 x i32> [[TMP14]], [[TMP22]] -; CHECK-NEXT: [[TMP24:%.*]] = sub nsw <4 x i32> [[TMP14]], [[TMP22]] -; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i32> [[TMP23]], <4 x i32> [[TMP24]], <4 x i32> -; CHECK-NEXT: [[TMP26:%.*]] = add nsw <4 x i32> zeroinitializer, [[TMP25]] -; CHECK-NEXT: [[TMP27:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP25]] -; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <4 x i32> [[TMP26]], <4 x i32> [[TMP27]], <4 x i32> -; CHECK-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP28]], <4 x i32>* [[TMP29]], align 16 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* undef, i64 0, i64 1, i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP1]] to <4 x i8>* +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i8>, <4 x i8>* [[TMP3]], align 1 +; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <4 x i32> zeroinitializer, [[SHUFFLE]] +; CHECK-NEXT: [[TMP7:%.*]] = shl nsw <4 x i32> [[TMP6]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 1 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP9]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i32> [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP11]], i32 1 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP13]], i32 2 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP8]], i32 2 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[TMP15]], i32 3 +; CHECK-NEXT: [[TMP17:%.*]] = add nsw <4 x i32> [[TMP8]], [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = sub nsw <4 x i32> [[TMP8]], [[TMP16]] +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x i32> [[TMP17]], <4 x i32> [[TMP18]], <4 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> zeroinitializer, [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP19]] +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i32> [[TMP20]], <4 x i32> [[TMP21]], <4 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP22]], <4 x i32>* [[TMP23]], align 16 ; CHECK-NEXT: ret void ; %1 = getelementptr inbounds i8, i8* undef, i64 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll @@ -10,21 +10,6 @@ ; CHECK-NEXT: br i1 undef, label [[IF_END50_I:%.*]], label [[IF_THEN22_I:%.*]] ; CHECK: if.then22.i: ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 0 -; CHECK-NEXT: [[ARRAYIDX_I_I7_1_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 1 -; CHECK-NEXT: [[ARRAYIDX_I_I7_2_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 2 -; CHECK-NEXT: [[ARRAYIDX_I_I7_3_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 3 -; CHECK-NEXT: [[ARRAYIDX_I_I7_4_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 4 -; CHECK-NEXT: [[ARRAYIDX_I_I7_5_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 5 -; CHECK-NEXT: [[ARRAYIDX_I_I7_6_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 6 -; CHECK-NEXT: [[ARRAYIDX_I_I7_7_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 7 -; CHECK-NEXT: [[ARRAYIDX_I_I7_8_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 8 -; CHECK-NEXT: [[ARRAYIDX_I_I7_9_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 9 -; CHECK-NEXT: [[ARRAYIDX_I_I7_10_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 10 -; CHECK-NEXT: [[ARRAYIDX_I_I7_11_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 11 -; CHECK-NEXT: [[ARRAYIDX_I_I7_12_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 12 -; CHECK-NEXT: [[ARRAYIDX_I_I7_13_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 13 -; CHECK-NEXT: [[ARRAYIDX_I_I7_14_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 14 -; CHECK-NEXT: [[ARRAYIDX_I_I7_15_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 15 ; CHECK-NEXT: [[SUB_I:%.*]] = add nsw i32 undef, -1 ; CHECK-NEXT: [[CONV31_I:%.*]] = and i32 undef, [[SUB_I]] ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[CONV31_I]], i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/return.ll b/llvm/test/Transforms/SLPVectorizer/X86/return.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/return.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/return.ll @@ -43,8 +43,6 @@ ; CHECK-LABEL: @return2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[X:%.*]], i32 2 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[X]], i32 1 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[X]], i32 3 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[X]] to <2 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[ARRAYIDX1]] to <2 x double>* diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reuse-extracts-in-wider-vect.ll b/llvm/test/Transforms/SLPVectorizer/X86/reuse-extracts-in-wider-vect.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/reuse-extracts-in-wider-vect.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reuse-extracts-in-wider-vect.ll @@ -10,7 +10,6 @@ ; CHECK-NEXT: [[T8:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[T4]], i64 0, i32 1 ; CHECK-NEXT: [[T9:%.*]] = getelementptr inbounds [3 x float], [3 x float]* [[T8]], i64 1, i64 0 ; CHECK-NEXT: [[T14:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[T4]], i64 0, i32 1, i64 0 -; CHECK-NEXT: [[T11:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[T4]], i64 0, i32 1, i64 1 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[T14]] to <2 x float>* ; CHECK-NEXT: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[TMP4]], align 4 ; CHECK-NEXT: br label [[T37:%.*]] @@ -19,9 +18,6 @@ ; CHECK-NEXT: [[TMP7:%.*]] = fdiv fast <2 x float> , [[TMP6]] ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[T21:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[T4]], i64 0, i32 2, i64 0 -; CHECK-NEXT: [[T25:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[T4]], i64 0, i32 2, i64 1 -; CHECK-NEXT: [[T31:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[T4]], i64 0, i32 2, i64 2 -; CHECK-NEXT: [[T33:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[T4]], i64 0, i32 2, i64 3 ; CHECK-NEXT: [[TMP8:%.*]] = bitcast float* [[T21]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[SHUFFLE]], <4 x float>* [[TMP8]], align 4 ; CHECK-NEXT: [[T88:%.*]] = bitcast float* [[T9]] to <2 x float>* diff --git a/llvm/test/Transforms/SLPVectorizer/X86/revectorized_rdx_crash.ll b/llvm/test/Transforms/SLPVectorizer/X86/revectorized_rdx_crash.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/revectorized_rdx_crash.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/revectorized_rdx_crash.ll @@ -19,9 +19,6 @@ ; CHECK: for.cond.preheader: ; CHECK-NEXT: [[I:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* undef, i64 0, i64 2 ; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* undef, i64 0, i64 3 -; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* undef, i64 0, i64 4 -; CHECK-NEXT: [[I3:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* undef, i64 0, i64 5 -; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* undef, i64 0, i64 6 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[I]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 8 ; CHECK-NEXT: [[I5:%.*]] = add i32 undef, undef diff --git a/llvm/test/Transforms/SLPVectorizer/X86/schedule_budget.ll b/llvm/test/Transforms/SLPVectorizer/X86/schedule_budget.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/schedule_budget.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/schedule_budget.ll @@ -12,10 +12,7 @@ define void @test(float * %a, float * %b, float * %c, float * %d) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[A1:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 1 -; CHECK-NEXT: [[A2:%.*]] = getelementptr inbounds float, float* [[A]], i64 2 -; CHECK-NEXT: [[A3:%.*]] = getelementptr inbounds float, float* [[A]], i64 3 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[A]] to <4 x float>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[A:%.*]] to <4 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 ; CHECK-NEXT: call void @unknown() ; CHECK-NEXT: call void @unknown() @@ -45,20 +42,11 @@ ; CHECK-NEXT: call void @unknown() ; CHECK-NEXT: call void @unknown() ; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: [[B1:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1 -; CHECK-NEXT: [[B2:%.*]] = getelementptr inbounds float, float* [[B]], i64 2 -; CHECK-NEXT: [[B3:%.*]] = getelementptr inbounds float, float* [[B]], i64 3 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[B]] to <4 x float>* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[B:%.*]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float>* [[TMP2]], align 4 -; CHECK-NEXT: [[C1:%.*]] = getelementptr inbounds float, float* [[C:%.*]], i64 1 -; CHECK-NEXT: [[C2:%.*]] = getelementptr inbounds float, float* [[C]], i64 2 -; CHECK-NEXT: [[C3:%.*]] = getelementptr inbounds float, float* [[C]], i64 3 -; CHECK-NEXT: [[D1:%.*]] = getelementptr inbounds float, float* [[D:%.*]], i64 1 -; CHECK-NEXT: [[D2:%.*]] = getelementptr inbounds float, float* [[D]], i64 2 -; CHECK-NEXT: [[D3:%.*]] = getelementptr inbounds float, float* [[D]], i64 3 -; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[C]] to <4 x float>* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[C:%.*]] to <4 x float>* ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast float* [[D]] to <4 x float>* +; CHECK-NEXT: [[TMP5:%.*]] = bitcast float* [[D:%.*]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP4]], <4 x float>* [[TMP5]], align 4 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/scheduling.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/scheduling.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/scheduling.ll @@ -15,30 +15,15 @@ ; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[TMP1]], 4 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[DIFF]], i64 [[TMP2]] ; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 [[INDVARS_IV]], i64 0 -; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[TMP1]], 1 -; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[DIFF]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP1]], 5 -; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, i32* [[DIFF]], i64 [[TMP4]] -; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 [[INDVARS_IV]], i64 1 -; CHECK-NEXT: [[TMP5:%.*]] = or i64 [[TMP1]], 2 -; CHECK-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds i32, i32* [[DIFF]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP1]], 6 -; CHECK-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds i32, i32* [[DIFF]], i64 [[TMP6]] -; CHECK-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 [[INDVARS_IV]], i64 2 -; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP1]], 3 -; CHECK-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds i32, i32* [[DIFF]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP1]], 7 -; CHECK-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds i32, i32* [[DIFF]], i64 [[TMP8]] -; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 [[INDVARS_IV]], i64 3 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>* -; CHECK-NEXT: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[ARRAYIDX2]] to <4 x i32>* -; CHECK-NEXT: [[TMP12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP11]], align 4 -; CHECK-NEXT: [[TMP13:%.*]] = add nsw <4 x i32> [[TMP12]], [[TMP10]] -; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32* [[ARRAYIDX6]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP13]], <4 x i32>* [[TMP14]], align 16 -; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP13]]) -; CHECK-NEXT: [[OP_EXTRA]] = add nsw i32 [[TMP15]], [[A_088]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>* +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[ARRAYIDX2]] to <4 x i32>* +; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP5]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[TMP6]], [[TMP4]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[ARRAYIDX6]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 16 +; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP7]]) +; CHECK-NEXT: [[OP_EXTRA]] = add nsw i32 [[TMP9]], [[A_088]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 8 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/sext-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/sext-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/sext-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/sext-inseltpoison.ll @@ -22,15 +22,13 @@ ; SSE2-NEXT: ret <2 x i64> [[V1]] ; ; SLM-LABEL: @loadext_2i8_to_2i64( -; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <2 x i8>* +; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <2 x i8>* ; SLM-NEXT: [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1 ; SLM-NEXT: [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i64> ; SLM-NEXT: ret <2 x i64> [[TMP3]] ; ; AVX-LABEL: @loadext_2i8_to_2i64( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <2 x i8>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <2 x i8>* ; AVX-NEXT: [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i64> ; AVX-NEXT: ret <2 x i64> [[TMP3]] @@ -47,19 +45,13 @@ define <4 x i32> @loadext_4i8_to_4i32(i8* %p0) { ; SSE-LABEL: @loadext_4i8_to_4i32( -; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; SSE-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>* +; SSE-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>* ; SSE-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 ; SSE-NEXT: [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i32> ; SSE-NEXT: ret <4 x i32> [[TMP3]] ; ; AVX-LABEL: @loadext_4i8_to_4i32( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>* ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i32> ; AVX-NEXT: ret <4 x i32> [[TMP3]] @@ -84,19 +76,13 @@ define <4 x i64> @loadext_4i8_to_4i64(i8* %p0) { ; SSE-LABEL: @loadext_4i8_to_4i64( -; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; SSE-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>* +; SSE-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>* ; SSE-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 ; SSE-NEXT: [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i64> ; SSE-NEXT: ret <4 x i64> [[TMP3]] ; ; AVX-LABEL: @loadext_4i8_to_4i64( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>* ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i64> ; AVX-NEXT: ret <4 x i64> [[TMP3]] @@ -121,27 +107,13 @@ define <8 x i16> @loadext_8i8_to_8i16(i8* %p0) { ; SSE-LABEL: @loadext_8i8_to_8i16( -; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; SSE-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 -; SSE-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 -; SSE-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 -; SSE-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 -; SSE-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>* +; SSE-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>* ; SSE-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 ; SSE-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i16> ; SSE-NEXT: ret <8 x i16> [[TMP3]] ; ; AVX-LABEL: @loadext_8i8_to_8i16( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; AVX-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 -; AVX-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 -; AVX-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 -; AVX-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>* ; AVX-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i16> ; AVX-NEXT: ret <8 x i16> [[TMP3]] @@ -182,27 +154,13 @@ define <8 x i32> @loadext_8i8_to_8i32(i8* %p0) { ; SSE-LABEL: @loadext_8i8_to_8i32( -; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; SSE-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 -; SSE-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 -; SSE-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 -; SSE-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 -; SSE-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>* +; SSE-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>* ; SSE-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 ; SSE-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i32> ; SSE-NEXT: ret <8 x i32> [[TMP3]] ; ; AVX-LABEL: @loadext_8i8_to_8i32( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; AVX-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 -; AVX-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 -; AVX-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 -; AVX-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>* ; AVX-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i32> ; AVX-NEXT: ret <8 x i32> [[TMP3]] @@ -243,43 +201,13 @@ define <16 x i16> @loadext_16i8_to_16i16(i8* %p0) { ; SSE-LABEL: @loadext_16i8_to_16i16( -; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; SSE-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 -; SSE-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 -; SSE-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 -; SSE-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 -; SSE-NEXT: [[P8:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 8 -; SSE-NEXT: [[P9:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 9 -; SSE-NEXT: [[P10:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 10 -; SSE-NEXT: [[P11:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 11 -; SSE-NEXT: [[P12:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 12 -; SSE-NEXT: [[P13:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 13 -; SSE-NEXT: [[P14:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 14 -; SSE-NEXT: [[P15:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 15 -; SSE-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <16 x i8>* +; SSE-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <16 x i8>* ; SSE-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1 ; SSE-NEXT: [[TMP3:%.*]] = sext <16 x i8> [[TMP2]] to <16 x i16> ; SSE-NEXT: ret <16 x i16> [[TMP3]] ; ; AVX-LABEL: @loadext_16i8_to_16i16( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; AVX-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 -; AVX-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 -; AVX-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 -; AVX-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 -; AVX-NEXT: [[P8:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 8 -; AVX-NEXT: [[P9:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 9 -; AVX-NEXT: [[P10:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 10 -; AVX-NEXT: [[P11:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 11 -; AVX-NEXT: [[P12:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 12 -; AVX-NEXT: [[P13:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 13 -; AVX-NEXT: [[P14:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 14 -; AVX-NEXT: [[P15:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 15 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <16 x i8>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <16 x i8>* ; AVX-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = sext <16 x i8> [[TMP2]] to <16 x i16> ; AVX-NEXT: ret <16 x i16> [[TMP3]] @@ -356,15 +284,13 @@ define <2 x i64> @loadext_2i16_to_2i64(i16* %p0) { ; SSE-LABEL: @loadext_2i16_to_2i64( -; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; SSE-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <2 x i16>* +; SSE-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <2 x i16>* ; SSE-NEXT: [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1 ; SSE-NEXT: [[TMP3:%.*]] = sext <2 x i16> [[TMP2]] to <2 x i64> ; SSE-NEXT: ret <2 x i64> [[TMP3]] ; ; AVX-LABEL: @loadext_2i16_to_2i64( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <2 x i16>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <2 x i16>* ; AVX-NEXT: [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = sext <2 x i16> [[TMP2]] to <2 x i64> ; AVX-NEXT: ret <2 x i64> [[TMP3]] @@ -381,19 +307,13 @@ define <4 x i32> @loadext_4i16_to_4i32(i16* %p0) { ; SSE-LABEL: @loadext_4i16_to_4i32( -; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 -; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 -; SSE-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>* +; SSE-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>* ; SSE-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1 ; SSE-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i32> ; SSE-NEXT: ret <4 x i32> [[TMP3]] ; ; AVX-LABEL: @loadext_4i16_to_4i32( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 -; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>* ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i32> ; AVX-NEXT: ret <4 x i32> [[TMP3]] @@ -418,19 +338,13 @@ define <4 x i64> @loadext_4i16_to_4i64(i16* %p0) { ; SSE-LABEL: @loadext_4i16_to_4i64( -; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 -; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 -; SSE-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>* +; SSE-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>* ; SSE-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1 ; SSE-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i64> ; SSE-NEXT: ret <4 x i64> [[TMP3]] ; ; AVX-LABEL: @loadext_4i16_to_4i64( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 -; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>* ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i64> ; AVX-NEXT: ret <4 x i64> [[TMP3]] @@ -455,27 +369,13 @@ define <8 x i32> @loadext_8i16_to_8i32(i16* %p0) { ; SSE-LABEL: @loadext_8i16_to_8i32( -; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 -; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 -; SSE-NEXT: [[P4:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 4 -; SSE-NEXT: [[P5:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 5 -; SSE-NEXT: [[P6:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 6 -; SSE-NEXT: [[P7:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 7 -; SSE-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <8 x i16>* +; SSE-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <8 x i16>* ; SSE-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 1 ; SSE-NEXT: [[TMP3:%.*]] = sext <8 x i16> [[TMP2]] to <8 x i32> ; SSE-NEXT: ret <8 x i32> [[TMP3]] ; ; AVX-LABEL: @loadext_8i16_to_8i32( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 -; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 -; AVX-NEXT: [[P4:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 4 -; AVX-NEXT: [[P5:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 5 -; AVX-NEXT: [[P6:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 6 -; AVX-NEXT: [[P7:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 7 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <8 x i16>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <8 x i16>* ; AVX-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = sext <8 x i16> [[TMP2]] to <8 x i32> ; AVX-NEXT: ret <8 x i32> [[TMP3]] @@ -520,15 +420,13 @@ define <2 x i64> @loadext_2i32_to_2i64(i32* %p0) { ; SSE-LABEL: @loadext_2i32_to_2i64( -; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 -; SSE-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <2 x i32>* +; SSE-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <2 x i32>* ; SSE-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 1 ; SSE-NEXT: [[TMP3:%.*]] = sext <2 x i32> [[TMP2]] to <2 x i64> ; SSE-NEXT: ret <2 x i64> [[TMP3]] ; ; AVX-LABEL: @loadext_2i32_to_2i64( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <2 x i32>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <2 x i32>* ; AVX-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = sext <2 x i32> [[TMP2]] to <2 x i64> ; AVX-NEXT: ret <2 x i64> [[TMP3]] @@ -545,19 +443,13 @@ define <4 x i64> @loadext_4i32_to_4i64(i32* %p0) { ; SSE-LABEL: @loadext_4i32_to_4i64( -; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 -; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2 -; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3 -; SSE-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <4 x i32>* +; SSE-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <4 x i32>* ; SSE-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1 ; SSE-NEXT: [[TMP3:%.*]] = sext <4 x i32> [[TMP2]] to <4 x i64> ; SSE-NEXT: ret <4 x i64> [[TMP3]] ; ; AVX-LABEL: @loadext_4i32_to_4i64( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 -; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2 -; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <4 x i32>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <4 x i32>* ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = sext <4 x i32> [[TMP2]] to <4 x i64> ; AVX-NEXT: ret <4 x i64> [[TMP3]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/sext.ll b/llvm/test/Transforms/SLPVectorizer/X86/sext.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/sext.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/sext.ll @@ -22,15 +22,13 @@ ; SSE2-NEXT: ret <2 x i64> [[V1]] ; ; SLM-LABEL: @loadext_2i8_to_2i64( -; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <2 x i8>* +; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <2 x i8>* ; SLM-NEXT: [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1 ; SLM-NEXT: [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i64> ; SLM-NEXT: ret <2 x i64> [[TMP3]] ; ; AVX-LABEL: @loadext_2i8_to_2i64( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <2 x i8>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <2 x i8>* ; AVX-NEXT: [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i64> ; AVX-NEXT: ret <2 x i64> [[TMP3]] @@ -47,19 +45,13 @@ define <4 x i32> @loadext_4i8_to_4i32(i8* %p0) { ; SSE-LABEL: @loadext_4i8_to_4i32( -; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; SSE-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>* +; SSE-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>* ; SSE-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 ; SSE-NEXT: [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i32> ; SSE-NEXT: ret <4 x i32> [[TMP3]] ; ; AVX-LABEL: @loadext_4i8_to_4i32( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>* ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i32> ; AVX-NEXT: ret <4 x i32> [[TMP3]] @@ -84,19 +76,13 @@ define <4 x i64> @loadext_4i8_to_4i64(i8* %p0) { ; SSE-LABEL: @loadext_4i8_to_4i64( -; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; SSE-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>* +; SSE-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>* ; SSE-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 ; SSE-NEXT: [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i64> ; SSE-NEXT: ret <4 x i64> [[TMP3]] ; ; AVX-LABEL: @loadext_4i8_to_4i64( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>* ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i64> ; AVX-NEXT: ret <4 x i64> [[TMP3]] @@ -121,27 +107,13 @@ define <8 x i16> @loadext_8i8_to_8i16(i8* %p0) { ; SSE-LABEL: @loadext_8i8_to_8i16( -; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; SSE-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 -; SSE-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 -; SSE-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 -; SSE-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 -; SSE-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>* +; SSE-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>* ; SSE-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 ; SSE-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i16> ; SSE-NEXT: ret <8 x i16> [[TMP3]] ; ; AVX-LABEL: @loadext_8i8_to_8i16( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; AVX-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 -; AVX-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 -; AVX-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 -; AVX-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>* ; AVX-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i16> ; AVX-NEXT: ret <8 x i16> [[TMP3]] @@ -182,27 +154,13 @@ define <8 x i32> @loadext_8i8_to_8i32(i8* %p0) { ; SSE-LABEL: @loadext_8i8_to_8i32( -; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; SSE-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 -; SSE-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 -; SSE-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 -; SSE-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 -; SSE-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>* +; SSE-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>* ; SSE-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 ; SSE-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i32> ; SSE-NEXT: ret <8 x i32> [[TMP3]] ; ; AVX-LABEL: @loadext_8i8_to_8i32( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; AVX-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 -; AVX-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 -; AVX-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 -; AVX-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>* ; AVX-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i32> ; AVX-NEXT: ret <8 x i32> [[TMP3]] @@ -243,43 +201,13 @@ define <16 x i16> @loadext_16i8_to_16i16(i8* %p0) { ; SSE-LABEL: @loadext_16i8_to_16i16( -; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; SSE-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 -; SSE-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 -; SSE-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 -; SSE-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 -; SSE-NEXT: [[P8:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 8 -; SSE-NEXT: [[P9:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 9 -; SSE-NEXT: [[P10:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 10 -; SSE-NEXT: [[P11:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 11 -; SSE-NEXT: [[P12:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 12 -; SSE-NEXT: [[P13:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 13 -; SSE-NEXT: [[P14:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 14 -; SSE-NEXT: [[P15:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 15 -; SSE-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <16 x i8>* +; SSE-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <16 x i8>* ; SSE-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1 ; SSE-NEXT: [[TMP3:%.*]] = sext <16 x i8> [[TMP2]] to <16 x i16> ; SSE-NEXT: ret <16 x i16> [[TMP3]] ; ; AVX-LABEL: @loadext_16i8_to_16i16( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; AVX-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 -; AVX-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 -; AVX-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 -; AVX-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 -; AVX-NEXT: [[P8:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 8 -; AVX-NEXT: [[P9:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 9 -; AVX-NEXT: [[P10:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 10 -; AVX-NEXT: [[P11:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 11 -; AVX-NEXT: [[P12:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 12 -; AVX-NEXT: [[P13:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 13 -; AVX-NEXT: [[P14:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 14 -; AVX-NEXT: [[P15:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 15 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <16 x i8>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <16 x i8>* ; AVX-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = sext <16 x i8> [[TMP2]] to <16 x i16> ; AVX-NEXT: ret <16 x i16> [[TMP3]] @@ -356,15 +284,13 @@ define <2 x i64> @loadext_2i16_to_2i64(i16* %p0) { ; SSE-LABEL: @loadext_2i16_to_2i64( -; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; SSE-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <2 x i16>* +; SSE-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <2 x i16>* ; SSE-NEXT: [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1 ; SSE-NEXT: [[TMP3:%.*]] = sext <2 x i16> [[TMP2]] to <2 x i64> ; SSE-NEXT: ret <2 x i64> [[TMP3]] ; ; AVX-LABEL: @loadext_2i16_to_2i64( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <2 x i16>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <2 x i16>* ; AVX-NEXT: [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = sext <2 x i16> [[TMP2]] to <2 x i64> ; AVX-NEXT: ret <2 x i64> [[TMP3]] @@ -381,19 +307,13 @@ define <4 x i32> @loadext_4i16_to_4i32(i16* %p0) { ; SSE-LABEL: @loadext_4i16_to_4i32( -; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 -; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 -; SSE-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>* +; SSE-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>* ; SSE-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1 ; SSE-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i32> ; SSE-NEXT: ret <4 x i32> [[TMP3]] ; ; AVX-LABEL: @loadext_4i16_to_4i32( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 -; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>* ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i32> ; AVX-NEXT: ret <4 x i32> [[TMP3]] @@ -418,19 +338,13 @@ define <4 x i64> @loadext_4i16_to_4i64(i16* %p0) { ; SSE-LABEL: @loadext_4i16_to_4i64( -; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 -; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 -; SSE-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>* +; SSE-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>* ; SSE-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1 ; SSE-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i64> ; SSE-NEXT: ret <4 x i64> [[TMP3]] ; ; AVX-LABEL: @loadext_4i16_to_4i64( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 -; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>* ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i64> ; AVX-NEXT: ret <4 x i64> [[TMP3]] @@ -455,27 +369,13 @@ define <8 x i32> @loadext_8i16_to_8i32(i16* %p0) { ; SSE-LABEL: @loadext_8i16_to_8i32( -; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 -; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 -; SSE-NEXT: [[P4:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 4 -; SSE-NEXT: [[P5:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 5 -; SSE-NEXT: [[P6:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 6 -; SSE-NEXT: [[P7:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 7 -; SSE-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <8 x i16>* +; SSE-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <8 x i16>* ; SSE-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 1 ; SSE-NEXT: [[TMP3:%.*]] = sext <8 x i16> [[TMP2]] to <8 x i32> ; SSE-NEXT: ret <8 x i32> [[TMP3]] ; ; AVX-LABEL: @loadext_8i16_to_8i32( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 -; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 -; AVX-NEXT: [[P4:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 4 -; AVX-NEXT: [[P5:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 5 -; AVX-NEXT: [[P6:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 6 -; AVX-NEXT: [[P7:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 7 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <8 x i16>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <8 x i16>* ; AVX-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = sext <8 x i16> [[TMP2]] to <8 x i32> ; AVX-NEXT: ret <8 x i32> [[TMP3]] @@ -520,15 +420,13 @@ define <2 x i64> @loadext_2i32_to_2i64(i32* %p0) { ; SSE-LABEL: @loadext_2i32_to_2i64( -; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 -; SSE-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <2 x i32>* +; SSE-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <2 x i32>* ; SSE-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 1 ; SSE-NEXT: [[TMP3:%.*]] = sext <2 x i32> [[TMP2]] to <2 x i64> ; SSE-NEXT: ret <2 x i64> [[TMP3]] ; ; AVX-LABEL: @loadext_2i32_to_2i64( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <2 x i32>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <2 x i32>* ; AVX-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = sext <2 x i32> [[TMP2]] to <2 x i64> ; AVX-NEXT: ret <2 x i64> [[TMP3]] @@ -545,19 +443,13 @@ define <4 x i64> @loadext_4i32_to_4i64(i32* %p0) { ; SSE-LABEL: @loadext_4i32_to_4i64( -; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 -; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2 -; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3 -; SSE-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <4 x i32>* +; SSE-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <4 x i32>* ; SSE-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1 ; SSE-NEXT: [[TMP3:%.*]] = sext <4 x i32> [[TMP2]] to <4 x i64> ; SSE-NEXT: ret <4 x i64> [[TMP3]] ; ; AVX-LABEL: @loadext_4i32_to_4i64( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 -; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2 -; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <4 x i32>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <4 x i32>* ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = sext <4 x i32> [[TMP2]] to <4 x i64> ; AVX-NEXT: ret <4 x i64> [[TMP3]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll @@ -4,12 +4,8 @@ define void @wombat(i32* %ptr, i32* %ptr1) { ; CHECK-LABEL: @wombat( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 1 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 0 ; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, i32* [[PTR1:%.*]], i32 3 -; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, i32* [[PTR1]], i32 4 -; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, i32* [[PTR1]], i32 5 -; CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[PTR1]], i32 6 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[TMP8]] to <2 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 8 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> @@ -70,13 +66,6 @@ ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[TMP0]], <2 x i16> poison, <8 x i32> ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: -; CHECK-NEXT: [[ARRAYIDX11_1:%.*]] = getelementptr inbounds i16, i16* undef, i32 1 -; CHECK-NEXT: [[ARRAYIDX11_2:%.*]] = getelementptr inbounds i16, i16* undef, i32 2 -; CHECK-NEXT: [[ARRAYIDX11_3:%.*]] = getelementptr inbounds i16, i16* undef, i32 3 -; CHECK-NEXT: [[ARRAYIDX11_4:%.*]] = getelementptr inbounds i16, i16* undef, i32 4 -; CHECK-NEXT: [[ARRAYIDX11_5:%.*]] = getelementptr inbounds i16, i16* undef, i32 5 -; CHECK-NEXT: [[ARRAYIDX11_6:%.*]] = getelementptr inbounds i16, i16* undef, i32 6 -; CHECK-NEXT: [[ARRAYIDX11_7:%.*]] = getelementptr inbounds i16, i16* undef, i32 7 ; CHECK-NEXT: store <8 x i16> [[SHUFFLE]], <8 x i16>* undef, align 2 ; CHECK-NEXT: [[SHRINK_SHUFFLE]] = shufflevector <8 x i16> [[SHUFFLE]], <8 x i16> poison, <2 x i32> ; CHECK-NEXT: br label [[FOR_BODY]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder2.ll b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder2.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder2.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder2.ll @@ -8,11 +8,7 @@ ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_A:%.*]], %struct.a* [[P:%.*]], i64 0, i32 0 -; CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_A]], %struct.a* [[P]], i64 0, i32 1 -; CHECK-NEXT: [[O:%.*]] = getelementptr inbounds [[STRUCT_A]], %struct.a* [[P]], i64 0, i32 2 -; CHECK-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_A]], %struct.a* [[P]], i64 0, i32 3 -; CHECK-NEXT: [[H:%.*]] = getelementptr inbounds [[CLASS_E:%.*]], %class.e* [[THIS:%.*]], i64 0, i32 1 -; CHECK-NEXT: [[G:%.*]] = getelementptr inbounds [[CLASS_E]], %class.e* [[THIS]], i64 0, i32 0 +; CHECK-NEXT: [[G:%.*]] = getelementptr inbounds [[CLASS_E:%.*]], %class.e* [[THIS:%.*]], i64 0, i32 0 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> , i32 [[ADD7:%.*]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = sdiv <2 x i32> [[TMP0]], ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/split-load8_2-unord.ll b/llvm/test/Transforms/SLPVectorizer/X86/split-load8_2-unord.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/split-load8_2-unord.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/split-load8_2-unord.ll @@ -9,27 +9,13 @@ ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[P:%.*]], i64 0, i32 1, i64 0 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 2, i64 15 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 0, i64 0 -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 1, i64 1 ; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 2, i64 7 -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 0, i64 1 -; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 1, i64 2 ; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 2, i64 6 -; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 0, i64 2 -; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 1, i64 3 ; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 2, i64 4 -; CHECK-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 0, i64 3 -; CHECK-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 1, i64 4 ; CHECK-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 2, i64 12 -; CHECK-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 0, i64 4 -; CHECK-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 1, i64 5 ; CHECK-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 2, i64 13 -; CHECK-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 0, i64 5 -; CHECK-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 1, i64 6 ; CHECK-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 2, i64 14 -; CHECK-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 0, i64 6 -; CHECK-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 1, i64 7 ; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 2, i64 5 -; CHECK-NEXT: [[ARRAYIDX51:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 0, i64 7 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARRAYIDX]] to <8 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32*> poison, i32* [[ARRAYIDX1]], i32 0 @@ -118,21 +104,8 @@ ; CHECK-NEXT: [[P1:%.*]] = alloca [16 x i32], align 16 ; CHECK-NEXT: [[P2:%.*]] = alloca [16 x i32], align 16 ; CHECK-NEXT: [[G10:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[P1]], i32 0, i64 4 -; CHECK-NEXT: [[G11:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[P1]], i32 0, i64 5 -; CHECK-NEXT: [[G12:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[P1]], i32 0, i64 6 -; CHECK-NEXT: [[G13:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[P1]], i32 0, i64 7 ; CHECK-NEXT: [[G20:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[P2]], i32 0, i64 12 -; CHECK-NEXT: [[G21:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[P2]], i32 0, i64 13 -; CHECK-NEXT: [[G22:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[P2]], i32 0, i64 14 -; CHECK-NEXT: [[G23:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[P2]], i32 0, i64 15 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[P:%.*]], i64 0, i32 0, i64 0 -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 0, i64 1 -; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 0, i64 2 -; CHECK-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 0, i64 3 -; CHECK-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 0, i64 4 -; CHECK-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 0, i64 5 -; CHECK-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 0, i64 6 -; CHECK-NEXT: [[ARRAYIDX51:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 0, i64 7 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[G10]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[G20]] to <4 x i32>* @@ -191,21 +164,10 @@ ; CHECK-NEXT: [[P3:%.*]] = alloca [16 x i32], align 16 ; CHECK-NEXT: [[P4:%.*]] = alloca [16 x i32], align 16 ; CHECK-NEXT: [[G10:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[P1]], i32 0, i64 4 -; CHECK-NEXT: [[G11:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[P1]], i32 0, i64 5 ; CHECK-NEXT: [[G12:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[P2]], i32 0, i64 6 -; CHECK-NEXT: [[G13:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[P2]], i32 0, i64 7 ; CHECK-NEXT: [[G20:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[P3]], i32 0, i64 12 -; CHECK-NEXT: [[G21:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[P3]], i32 0, i64 13 ; CHECK-NEXT: [[G22:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[P4]], i32 0, i64 14 -; CHECK-NEXT: [[G23:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[P4]], i32 0, i64 15 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[P:%.*]], i64 0, i32 0, i64 0 -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 0, i64 1 -; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 0, i64 2 -; CHECK-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 0, i64 3 -; CHECK-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 0, i64 4 -; CHECK-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 0, i64 5 -; CHECK-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 0, i64 6 -; CHECK-NEXT: [[ARRAYIDX51:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 0, i64 7 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[G10]] to <2 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[G12]] to <2 x i32>* diff --git a/llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll b/llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll @@ -6,13 +6,11 @@ ; Base case without allocas or stacksave define void @basecase(i8** %a, i8** %b, i8** %c) { ; CHECK-LABEL: @basecase( -; CHECK-NEXT: [[A2:%.*]] = getelementptr i8*, i8** [[A:%.*]], i32 1 -; CHECK-NEXT: [[B2:%.*]] = getelementptr i8*, i8** [[B:%.*]], i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8** [[A]] to <2 x i8*>* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8** [[A:%.*]] to <2 x i8*>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i8*>, <2 x i8*>* [[TMP1]], align 8 ; CHECK-NEXT: store i8* null, i8** [[A]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <2 x i8*> [[TMP2]], <2 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8** [[B]] to <2 x i8*>* +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8** [[B:%.*]] to <2 x i8*>* ; CHECK-NEXT: store <2 x i8*> [[TMP3]], <2 x i8*>* [[TMP4]], align 8 ; CHECK-NEXT: ret void ; @@ -34,7 +32,6 @@ ; Using two allocas and a buildvector define void @allocas(i8** %a, i8** %b, i8** %c) { ; CHECK-LABEL: @allocas( -; CHECK-NEXT: [[B2:%.*]] = getelementptr i8*, i8** [[B:%.*]], i32 1 ; CHECK-NEXT: [[V1:%.*]] = alloca i8, align 1 ; CHECK-NEXT: [[V2:%.*]] = alloca i8, align 1 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8*> poison, i8* [[V1]], i32 0 @@ -42,7 +39,7 @@ ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <2 x i8*> [[TMP2]], <2 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i8*> [[TMP3]], i32 0 ; CHECK-NEXT: store i8* [[TMP4]], i8** [[A:%.*]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8** [[B]] to <2 x i8*>* +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8** [[B:%.*]] to <2 x i8*>* ; CHECK-NEXT: store <2 x i8*> [[TMP3]], <2 x i8*>* [[TMP5]], align 8 ; CHECK-NEXT: ret void ; @@ -127,7 +124,6 @@ define void @stacksave2(i8** %a, i8** %b, i8** %c) { ; CHECK-LABEL: @stacksave2( -; CHECK-NEXT: [[B2:%.*]] = getelementptr i8*, i8** [[B:%.*]], i32 1 ; CHECK-NEXT: [[V1:%.*]] = alloca i8, align 1 ; CHECK-NEXT: [[STACK:%.*]] = call i8* @llvm.stacksave() ; CHECK-NEXT: [[V2:%.*]] = alloca inalloca i8, align 1 @@ -138,7 +134,7 @@ ; CHECK-NEXT: store i8* [[TMP4]], i8** [[A:%.*]], align 8 ; CHECK-NEXT: call void @use(i8* inalloca(i8) [[V2]]) #[[ATTR5:[0-9]+]] ; CHECK-NEXT: call void @llvm.stackrestore(i8* [[STACK]]) -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8** [[B]] to <2 x i8*>* +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8** [[B:%.*]] to <2 x i8*>* ; CHECK-NEXT: store <2 x i8*> [[TMP3]], <2 x i8*>* [[TMP5]], align 8 ; CHECK-NEXT: ret void ; @@ -163,7 +159,6 @@ define void @stacksave3(i8** %a, i8** %b, i8** %c) { ; CHECK-LABEL: @stacksave3( ; CHECK-NEXT: [[STACK:%.*]] = call i8* @llvm.stacksave() -; CHECK-NEXT: [[B2:%.*]] = getelementptr i8*, i8** [[B:%.*]], i32 1 ; CHECK-NEXT: [[V1:%.*]] = alloca i8, align 1 ; CHECK-NEXT: [[V2:%.*]] = alloca inalloca i8, align 1 ; CHECK-NEXT: call void @use(i8* inalloca(i8) [[V2]]) #[[ATTR4]] @@ -171,7 +166,7 @@ ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8*> poison, i8* [[V1]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i8*> [[TMP1]], i8* [[V2]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <2 x i8*> [[TMP2]], <2 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8** [[B]] to <2 x i8*>* +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8** [[B:%.*]] to <2 x i8*>* ; CHECK-NEXT: store <2 x i8*> [[TMP3]], <2 x i8*>* [[TMP4]], align 8 ; CHECK-NEXT: ret void ; @@ -197,16 +192,14 @@ ; encountered during dependency scanning via the memory chain. define void @stacksave4(i8** %a, i8** %b, i8** %c) { ; CHECK-LABEL: @stacksave4( -; CHECK-NEXT: [[A2:%.*]] = getelementptr i8*, i8** [[A:%.*]], i32 1 -; CHECK-NEXT: [[B2:%.*]] = getelementptr i8*, i8** [[B:%.*]], i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8** [[A]] to <2 x i8*>* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8** [[A:%.*]] to <2 x i8*>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i8*>, <2 x i8*>* [[TMP1]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <2 x i8*> [[TMP2]], <2 x i32> ; CHECK-NEXT: [[STACK:%.*]] = call i8* @llvm.stacksave() ; CHECK-NEXT: [[X:%.*]] = alloca inalloca i8, align 1 ; CHECK-NEXT: call void @use(i8* inalloca(i8) [[X]]) #[[ATTR4]] ; CHECK-NEXT: call void @llvm.stackrestore(i8* [[STACK]]) -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8** [[B]] to <2 x i8*>* +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8** [[B:%.*]] to <2 x i8*>* ; CHECK-NEXT: store <2 x i8*> [[TMP3]], <2 x i8*>* [[TMP4]], align 8 ; CHECK-NEXT: ret void ; @@ -231,16 +224,14 @@ define void @stacksave5(i8** %a, i8** %b, i8** %c) { ; CHECK-LABEL: @stacksave5( -; CHECK-NEXT: [[A2:%.*]] = getelementptr i8*, i8** [[A:%.*]], i32 1 -; CHECK-NEXT: [[B2:%.*]] = getelementptr i8*, i8** [[B:%.*]], i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8** [[A]] to <2 x i8*>* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8** [[A:%.*]] to <2 x i8*>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i8*>, <2 x i8*>* [[TMP1]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <2 x i8*> [[TMP2]], <2 x i32> ; CHECK-NEXT: [[STACK:%.*]] = call i8* @llvm.stacksave() ; CHECK-NEXT: [[X:%.*]] = alloca inalloca i8, align 1 ; CHECK-NEXT: call void @use(i8* inalloca(i8) [[X]]) #[[ATTR4]] ; CHECK-NEXT: call void @llvm.stackrestore(i8* [[STACK]]) -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8** [[B]] to <2 x i8*>* +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8** [[B:%.*]] to <2 x i8*>* ; CHECK-NEXT: store <2 x i8*> [[TMP3]], <2 x i8*>* [[TMP4]], align 8 ; CHECK-NEXT: ret void ; @@ -269,7 +260,6 @@ define void @stackrestore1(i8** %a, i8** %b, i8** %c) { ; CHECK-LABEL: @stackrestore1( ; CHECK-NEXT: [[STACK:%.*]] = call i8* @llvm.stacksave() -; CHECK-NEXT: [[B2:%.*]] = getelementptr i8*, i8** [[B:%.*]], i32 1 ; CHECK-NEXT: [[V1:%.*]] = alloca i8, align 1 ; CHECK-NEXT: store i8 0, i8* [[V1]], align 1 ; CHECK-NEXT: call void @llvm.stackrestore(i8* [[STACK]]) @@ -278,7 +268,7 @@ ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8*> poison, i8* [[V1]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i8*> [[TMP1]], i8* [[V2]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <2 x i8*> [[TMP2]], <2 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8** [[B]] to <2 x i8*>* +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8** [[B:%.*]] to <2 x i8*>* ; CHECK-NEXT: store <2 x i8*> [[TMP3]], <2 x i8*>* [[TMP4]], align 8 ; CHECK-NEXT: ret void ; @@ -316,14 +306,8 @@ ; CHECK-NEXT: [[VAR15:%.*]] = call i8* @wibble(i8* [[VAR2]]) ; CHECK-NEXT: [[VAR16:%.*]] = call i8* @wibble(i8* [[VAR3]]) ; CHECK-NEXT: [[VAR32:%.*]] = getelementptr inbounds [12 x i8*], [12 x i8*]* [[VAR12]], i32 0, i32 0 -; CHECK-NEXT: [[VAR33:%.*]] = getelementptr inbounds [12 x i8*], [12 x i8*]* [[VAR12]], i32 0, i32 1 -; CHECK-NEXT: [[VAR34:%.*]] = getelementptr inbounds [12 x i8*], [12 x i8*]* [[VAR12]], i32 0, i32 2 -; CHECK-NEXT: [[VAR35:%.*]] = getelementptr inbounds [12 x i8*], [12 x i8*]* [[VAR12]], i32 0, i32 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8** [[VAR32]] to <4 x i8*>* ; CHECK-NEXT: [[VAR36:%.*]] = getelementptr inbounds [12 x i8*], [12 x i8*]* [[VAR12]], i32 0, i32 4 -; CHECK-NEXT: [[VAR37:%.*]] = getelementptr inbounds [12 x i8*], [12 x i8*]* [[VAR12]], i32 0, i32 5 -; CHECK-NEXT: [[VAR38:%.*]] = getelementptr inbounds [12 x i8*], [12 x i8*]* [[VAR12]], i32 0, i32 6 -; CHECK-NEXT: [[VAR39:%.*]] = getelementptr inbounds [12 x i8*], [12 x i8*]* [[VAR12]], i32 0, i32 7 ; CHECK-NEXT: [[VAR4:%.*]] = alloca i8, align 1 ; CHECK-NEXT: [[VAR5:%.*]] = alloca i8, align 1 ; CHECK-NEXT: [[VAR17:%.*]] = call i8* @wibble(i8* [[VAR4]]) @@ -375,9 +359,6 @@ ; CHECK-LABEL: @spam( ; CHECK-NEXT: [[VAR12:%.*]] = alloca [12 x i8*], align 8 ; CHECK-NEXT: [[VAR36:%.*]] = getelementptr inbounds [12 x i8*], [12 x i8*]* [[VAR12]], i32 0, i32 4 -; CHECK-NEXT: [[VAR37:%.*]] = getelementptr inbounds [12 x i8*], [12 x i8*]* [[VAR12]], i32 0, i32 5 -; CHECK-NEXT: [[VAR38:%.*]] = getelementptr inbounds [12 x i8*], [12 x i8*]* [[VAR12]], i32 0, i32 6 -; CHECK-NEXT: [[VAR39:%.*]] = getelementptr inbounds [12 x i8*], [12 x i8*]* [[VAR12]], i32 0, i32 7 ; CHECK-NEXT: [[VAR4:%.*]] = alloca i8, align 1 ; CHECK-NEXT: [[VAR5:%.*]] = alloca i8, align 1 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i8*> poison, i8* [[VAR4]], i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/store-jumbled.ll b/llvm/test/Transforms/SLPVectorizer/X86/store-jumbled.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/store-jumbled.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/store-jumbled.ll @@ -6,17 +6,8 @@ define i32 @jumbled-load(i32* noalias nocapture %in, i32* noalias nocapture %inn, i32* noalias nocapture %out) { ; CHECK-LABEL: @jumbled-load( ; CHECK-NEXT: [[IN_ADDR:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i64 0 -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 1 -; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 2 -; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 3 ; CHECK-NEXT: [[INN_ADDR:%.*]] = getelementptr inbounds i32, i32* [[INN:%.*]], i64 0 -; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i32, i32* [[INN_ADDR]], i64 1 -; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i32, i32* [[INN_ADDR]], i64 2 -; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i32, i32* [[INN_ADDR]], i64 3 ; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 0 -; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 1 -; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 2 -; CHECK-NEXT: [[GEP_10:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IN_ADDR]] to <4 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[INN_ADDR]] to <4 x i32>* diff --git a/llvm/test/Transforms/SLPVectorizer/X86/stores-non-ordered.ll b/llvm/test/Transforms/SLPVectorizer/X86/stores-non-ordered.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/stores-non-ordered.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/stores-non-ordered.ll @@ -36,10 +36,8 @@ ; CHECK-NEXT: br label [[BLOCK2:%.*]] ; CHECK: block2: ; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 0 -; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 1 ; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 2 ; CHECK-NEXT: [[GEP_10:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 3 -; CHECK-NEXT: [[GEP_11:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 4 ; CHECK-NEXT: store i32 [[LOAD_9]], i32* [[GEP_9]], align 4 ; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[GEP_10]] to <2 x i32>* ; CHECK-NEXT: store <2 x i32> [[TMP5]], <2 x i32>* [[TMP11]], align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/stores_vectorize.ll b/llvm/test/Transforms/SLPVectorizer/X86/stores_vectorize.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/stores_vectorize.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/stores_vectorize.ll @@ -25,9 +25,6 @@ ; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP0]], [[TMP1]] ; CHECK-NEXT: store float [[ADD]], float* [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 1 -; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 2 -; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 3 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[P3]] to <4 x i64>* ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 8 ; CHECK-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[TMP3]], @@ -82,15 +79,6 @@ ; CHECK-LABEL: @store_reverse( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[P3:%.*]], i64 8 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 7 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 1 -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 9 -; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 6 -; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 2 -; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 10 -; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 5 -; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 3 -; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 11 ; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 4 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[P3]] to <4 x i64>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* [[TMP0]], align 8 @@ -145,9 +133,6 @@ ; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP0]], [[TMP1]] ; CHECK-NEXT: store float [[ADD]], float* [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 1 -; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 2 -; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 3 ; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 5 ; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8 ; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8 @@ -227,9 +212,6 @@ ; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP0]], [[TMP1]] ; CHECK-NEXT: store float [[ADD]], float* [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 1 -; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 2 -; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 3 ; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 5 ; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8 ; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/supernode.ll b/llvm/test/Transforms/SLPVectorizer/X86/supernode.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/supernode.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/supernode.ll @@ -10,11 +10,9 @@ ; ENABLED-NEXT: [[IDXA0:%.*]] = getelementptr inbounds double, double* [[AARRAY:%.*]], i64 0 ; ENABLED-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, double* [[AARRAY]], i64 1 ; ENABLED-NEXT: [[IDXB0:%.*]] = getelementptr inbounds double, double* [[BARRAY:%.*]], i64 0 -; ENABLED-NEXT: [[IDXB1:%.*]] = getelementptr inbounds double, double* [[BARRAY]], i64 1 ; ENABLED-NEXT: [[IDXC0:%.*]] = getelementptr inbounds double, double* [[CARRAY:%.*]], i64 0 ; ENABLED-NEXT: [[IDXC1:%.*]] = getelementptr inbounds double, double* [[CARRAY]], i64 1 ; ENABLED-NEXT: [[IDXS0:%.*]] = getelementptr inbounds double, double* [[SARRAY:%.*]], i64 0 -; ENABLED-NEXT: [[IDXS1:%.*]] = getelementptr inbounds double, double* [[SARRAY]], i64 1 ; ENABLED-NEXT: [[A0:%.*]] = load double, double* [[IDXA0]], align 8 ; ENABLED-NEXT: [[A1:%.*]] = load double, double* [[IDXA1]], align 8 ; ENABLED-NEXT: [[TMP0:%.*]] = bitcast double* [[IDXB0]] to <2 x double>* @@ -69,11 +67,9 @@ ; ENABLED-NEXT: [[IDXA0:%.*]] = getelementptr inbounds double, double* [[AARRAY:%.*]], i64 0 ; ENABLED-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, double* [[AARRAY]], i64 1 ; ENABLED-NEXT: [[IDXB0:%.*]] = getelementptr inbounds double, double* [[BARRAY:%.*]], i64 0 -; ENABLED-NEXT: [[IDXB1:%.*]] = getelementptr inbounds double, double* [[BARRAY]], i64 1 ; ENABLED-NEXT: [[IDXC0:%.*]] = getelementptr inbounds double, double* [[CARRAY:%.*]], i64 0 ; ENABLED-NEXT: [[IDXC1:%.*]] = getelementptr inbounds double, double* [[CARRAY]], i64 1 ; ENABLED-NEXT: [[IDXS0:%.*]] = getelementptr inbounds double, double* [[SARRAY:%.*]], i64 0 -; ENABLED-NEXT: [[IDXS1:%.*]] = getelementptr inbounds double, double* [[SARRAY]], i64 1 ; ENABLED-NEXT: [[A0:%.*]] = load double, double* [[IDXA0]], align 8 ; ENABLED-NEXT: [[A1:%.*]] = load double, double* [[IDXA1]], align 8 ; ENABLED-NEXT: [[TMP0:%.*]] = bitcast double* [[IDXB0]] to <2 x double>* @@ -208,13 +204,11 @@ ; ENABLED-LABEL: @supernode_scheduling( ; ENABLED-NEXT: entry: ; ENABLED-NEXT: [[IDXA0:%.*]] = getelementptr inbounds double, double* [[AARRAY:%.*]], i64 0 -; ENABLED-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, double* [[AARRAY]], i64 1 ; ENABLED-NEXT: [[IDXB0:%.*]] = getelementptr inbounds double, double* [[BARRAY:%.*]], i64 0 ; ENABLED-NEXT: [[IDXB1:%.*]] = getelementptr inbounds double, double* [[BARRAY]], i64 1 ; ENABLED-NEXT: [[IDXC:%.*]] = getelementptr inbounds double, double* [[CARRAY:%.*]], i64 0 ; ENABLED-NEXT: [[IDXD:%.*]] = getelementptr inbounds double, double* [[DARRAY:%.*]], i64 0 ; ENABLED-NEXT: [[IDXS0:%.*]] = getelementptr inbounds double, double* [[SARRAY:%.*]], i64 0 -; ENABLED-NEXT: [[IDXS1:%.*]] = getelementptr inbounds double, double* [[SARRAY]], i64 1 ; ENABLED-NEXT: [[C:%.*]] = load double, double* [[IDXC]], align 8 ; ENABLED-NEXT: [[B0:%.*]] = load double, double* [[IDXB0]], align 8 ; ENABLED-NEXT: [[TMP0:%.*]] = bitcast double* [[IDXA0]] to <2 x double>* @@ -283,7 +277,6 @@ ; ENABLED-NEXT: [[IDXB0:%.*]] = getelementptr inbounds double, double* [[BARRAY:%.*]], i64 0 ; ENABLED-NEXT: [[IDXB1:%.*]] = getelementptr inbounds double, double* [[BARRAY]], i64 1 ; ENABLED-NEXT: [[IDXS0:%.*]] = getelementptr inbounds double, double* [[SARRAY:%.*]], i64 0 -; ENABLED-NEXT: [[IDXS1:%.*]] = getelementptr inbounds double, double* [[SARRAY]], i64 1 ; ENABLED-NEXT: [[A0:%.*]] = load double, double* [[IDXA0]], align 8 ; ENABLED-NEXT: [[B1:%.*]] = load double, double* [[IDXB1]], align 8 ; ENABLED-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[A0]], i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll b/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll @@ -10,8 +10,6 @@ ; CHECK-NEXT: [[I_015:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[DST_ADDR_014:%.*]] = phi double* [ [[ADD_PTR4:%.*]], [[FOR_BODY]] ], [ [[DST:%.*]], [[ENTRY]] ] ; CHECK-NEXT: [[SRC_ADDR_013:%.*]] = phi double* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[SRC:%.*]], [[ENTRY]] ] -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[SRC_ADDR_013]], i64 1 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[DST_ADDR_014]], i64 1 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[SRC_ADDR_013]] to <2 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[DST_ADDR_014]] to <2 x double>* @@ -57,12 +55,6 @@ ; CHECK-NEXT: [[I_023:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[DST_ADDR_022:%.*]] = phi float* [ [[ADD_PTR8:%.*]], [[FOR_BODY]] ], [ [[DST:%.*]], [[ENTRY]] ] ; CHECK-NEXT: [[SRC_ADDR_021:%.*]] = phi float* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[SRC:%.*]], [[ENTRY]] ] -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[SRC_ADDR_021]], i64 1 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[DST_ADDR_022]], i64 1 -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[SRC_ADDR_021]], i64 2 -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[DST_ADDR_022]], i64 2 -; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[SRC_ADDR_021]], i64 3 -; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[DST_ADDR_022]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[SRC_ADDR_021]] to <4 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[DST_ADDR_022]] to <4 x float>* @@ -166,11 +158,7 @@ ; CHECK-NEXT: [[DST_ADDR_022:%.*]] = phi float* [ [[ADD_PTR8:%.*]], [[FOR_BODY]] ], [ [[DST:%.*]], [[ENTRY]] ] ; CHECK-NEXT: [[SRC_ADDR_021:%.*]] = phi float* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[SRC:%.*]], [[ENTRY]] ] ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[SRC_ADDR_021]], i64 4 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[DST_ADDR_022]], i64 1 ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[SRC_ADDR_021]], i64 2 -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[DST_ADDR_022]], i64 2 -; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[SRC_ADDR_021]], i64 3 -; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[DST_ADDR_022]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[SRC_ADDR_021]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX4]] to <2 x float>* @@ -224,13 +212,10 @@ define void @store_splat(float*, float) { ; CHECK-LABEL: @store_splat( ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, float* [[TMP0:%.*]], i64 0 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[TMP0]], i64 1 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP0]], i64 2 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP0]], i64 3 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> poison, float [[TMP1:%.*]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = bitcast float* [[TMP3]] to <4 x float>* -; CHECK-NEXT: store <4 x float> [[SHUFFLE]], <4 x float>* [[TMP8]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> poison, float [[TMP1:%.*]], i32 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = bitcast float* [[TMP3]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[SHUFFLE]], <4 x float>* [[TMP5]], align 4 ; CHECK-NEXT: ret void ; %3 = getelementptr inbounds float, float* %0, i64 0 @@ -248,9 +233,6 @@ ; CHECK-LABEL: @store_const( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 0 -; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 1 -; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2 -; CHECK-NEXT: [[PTR3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR0]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> , <4 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: ret void @@ -315,13 +297,6 @@ ; CHECK-NEXT: [[TMP1:%.*]] = load i16, i16* [[V1:%.*]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 undef to i16 ; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds i16, i16* [[A:%.*]], i64 0 -; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 1 -; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 2 -; CHECK-NEXT: [[PTR3:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 3 -; CHECK-NEXT: [[PTR4:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 4 -; CHECK-NEXT: [[PTR5:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 5 -; CHECK-NEXT: [[PTR6:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 6 -; CHECK-NEXT: [[PTR7:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 7 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i16> poison, i16 [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i16> [[TMP3]], i16 [[TMP2]], i32 1 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> poison, <8 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/undef_vect.ll b/llvm/test/Transforms/SLPVectorizer/X86/undef_vect.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/undef_vect.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/undef_vect.ll @@ -7,13 +7,6 @@ ; CHECK-LABEL: @_Z2azv( ; CHECK-NEXT: for.body.lr.ph: ; CHECK-NEXT: [[DOTSROA_CAST_4:%.*]] = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76"* undef, i64 4, i32 0 -; CHECK-NEXT: [[DOTSROA_RAW_IDX_4:%.*]] = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76"* undef, i64 4, i32 1 -; CHECK-NEXT: [[DOTSROA_CAST_5:%.*]] = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76"* undef, i64 5, i32 0 -; CHECK-NEXT: [[DOTSROA_RAW_IDX_5:%.*]] = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76"* undef, i64 5, i32 1 -; CHECK-NEXT: [[DOTSROA_CAST_6:%.*]] = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76"* undef, i64 6, i32 0 -; CHECK-NEXT: [[DOTSROA_RAW_IDX_6:%.*]] = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76"* undef, i64 6, i32 1 -; CHECK-NEXT: [[DOTSROA_CAST_7:%.*]] = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76"* undef, i64 7, i32 0 -; CHECK-NEXT: [[DOTSROA_RAW_IDX_7:%.*]] = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76"* undef, i64 7, i32 1 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[DOTSROA_CAST_4]] to <8 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> [[TMP1]]) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/unreachable.ll b/llvm/test/Transforms/SLPVectorizer/X86/unreachable.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/unreachable.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/unreachable.ll @@ -12,22 +12,11 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[T3:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 4 -; CHECK-NEXT: [[T4:%.*]] = load i32, i32* [[T3]], align 4 -; CHECK-NEXT: [[T5:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 5 -; CHECK-NEXT: [[T6:%.*]] = load i32, i32* [[T5]], align 4 ; CHECK-NEXT: [[BAD:%.*]] = fadd float [[BAD]], 0.000000e+00 -; CHECK-NEXT: [[T7:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 6 -; CHECK-NEXT: [[T8:%.*]] = load i32, i32* [[T7]], align 4 -; CHECK-NEXT: [[T9:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 7 -; CHECK-NEXT: [[T10:%.*]] = load i32, i32* [[T9]], align 4 ; CHECK-NEXT: br label [[BB2]] ; CHECK: bb2: ; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x i32> [ poison, [[BB1:%.*]] ], [ , [[ENTRY:%.*]] ] -; CHECK-NEXT: [[T12:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1 -; CHECK-NEXT: [[T13:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2 -; CHECK-NEXT: [[T14:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[X]] to <4 x i32>* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[X:%.*]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP0]], <4 x i32>* [[TMP1]], align 4 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll b/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll @@ -4,16 +4,10 @@ define void @add0(i32* noalias %dst, i32* noalias %src) { ; CHECK-LABEL: @add0( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[SRC:%.*]], i64 1 -; CHECK-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i64 1 -; CHECK-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 2 -; CHECK-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 2 -; CHECK-NEXT: [[INCDEC_PTR5:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 3 -; CHECK-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 3 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[SRC]] to <4 x i32>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[SRC:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[DST]] to <4 x i32>* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[DST:%.*]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* [[TMP3]], align 4 ; CHECK-NEXT: ret void ; @@ -128,16 +122,10 @@ define void @sub1(i32* noalias %dst, i32* noalias %src) { ; CHECK-LABEL: @sub1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[SRC:%.*]], i64 1 -; CHECK-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i64 1 -; CHECK-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 2 -; CHECK-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 2 -; CHECK-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 3 -; CHECK-NEXT: [[INCDEC_PTR6:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 3 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[SRC]] to <4 x i32>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[SRC:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[DST]] to <4 x i32>* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[DST:%.*]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* [[TMP3]], align 4 ; CHECK-NEXT: ret void ; @@ -166,16 +154,10 @@ define void @sub2(i32* noalias %dst, i32* noalias %src) { ; CHECK-LABEL: @sub2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[SRC:%.*]], i64 1 -; CHECK-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i64 1 -; CHECK-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 2 -; CHECK-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 2 -; CHECK-NEXT: [[INCDEC_PTR5:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 3 -; CHECK-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 3 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[SRC]] to <4 x i32>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[SRC:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[DST]] to <4 x i32>* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[DST:%.*]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* [[TMP3]], align 4 ; CHECK-NEXT: ret void ; @@ -376,16 +358,10 @@ define void @shl1(i32* noalias %dst, i32* noalias %src) { ; CHECK-LABEL: @shl1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[SRC:%.*]], i64 1 -; CHECK-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i64 1 -; CHECK-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 2 -; CHECK-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 2 -; CHECK-NEXT: [[INCDEC_PTR5:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 3 -; CHECK-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 3 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[SRC]] to <4 x i32>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[SRC:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[DST]] to <4 x i32>* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[DST:%.*]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* [[TMP3]], align 4 ; CHECK-NEXT: ret void ; @@ -414,16 +390,10 @@ define void @add0f(float* noalias %dst, float* noalias %src) { ; CHECK-LABEL: @add0f( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, float* [[SRC:%.*]], i64 1 -; CHECK-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, float* [[DST:%.*]], i64 1 -; CHECK-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, float* [[SRC]], i64 2 -; CHECK-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds float, float* [[DST]], i64 2 -; CHECK-NEXT: [[INCDEC_PTR5:%.*]] = getelementptr inbounds float, float* [[SRC]], i64 3 -; CHECK-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds float, float* [[DST]], i64 3 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[SRC]] to <4 x float>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[SRC:%.*]] to <4 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <4 x float> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[DST]] to <4 x float>* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[DST:%.*]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4 ; CHECK-NEXT: ret void ; @@ -538,16 +508,10 @@ define void @sub1f(float* noalias %dst, float* noalias %src) { ; CHECK-LABEL: @sub1f( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, float* [[SRC:%.*]], i64 1 -; CHECK-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, float* [[DST:%.*]], i64 1 -; CHECK-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, float* [[SRC]], i64 2 -; CHECK-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds float, float* [[DST]], i64 2 -; CHECK-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds float, float* [[SRC]], i64 3 -; CHECK-NEXT: [[INCDEC_PTR6:%.*]] = getelementptr inbounds float, float* [[DST]], i64 3 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[SRC]] to <4 x float>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[SRC:%.*]] to <4 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <4 x float> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[DST]] to <4 x float>* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[DST:%.*]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4 ; CHECK-NEXT: ret void ; @@ -576,16 +540,10 @@ define void @sub2f(float* noalias %dst, float* noalias %src) { ; CHECK-LABEL: @sub2f( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, float* [[SRC:%.*]], i64 1 -; CHECK-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, float* [[DST:%.*]], i64 1 -; CHECK-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, float* [[SRC]], i64 2 -; CHECK-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds float, float* [[DST]], i64 2 -; CHECK-NEXT: [[INCDEC_PTR5:%.*]] = getelementptr inbounds float, float* [[SRC]], i64 3 -; CHECK-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds float, float* [[DST]], i64 3 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[SRC]] to <4 x float>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[SRC:%.*]] to <4 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <4 x float> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[DST]] to <4 x float>* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[DST:%.*]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4 ; CHECK-NEXT: ret void ; @@ -743,16 +701,10 @@ define void @add0fn(float* noalias %dst, float* noalias %src) { ; CHECK-LABEL: @add0fn( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, float* [[SRC:%.*]], i64 1 -; CHECK-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, float* [[DST:%.*]], i64 1 -; CHECK-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, float* [[SRC]], i64 2 -; CHECK-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds float, float* [[DST]], i64 2 -; CHECK-NEXT: [[INCDEC_PTR5:%.*]] = getelementptr inbounds float, float* [[SRC]], i64 3 -; CHECK-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds float, float* [[DST]], i64 3 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[SRC]] to <4 x float>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[SRC:%.*]] to <4 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[DST]] to <4 x float>* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[DST:%.*]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4 ; CHECK-NEXT: ret void ; @@ -867,16 +819,10 @@ define void @sub1fn(float* noalias %dst, float* noalias %src) { ; CHECK-LABEL: @sub1fn( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, float* [[SRC:%.*]], i64 1 -; CHECK-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, float* [[DST:%.*]], i64 1 -; CHECK-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, float* [[SRC]], i64 2 -; CHECK-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds float, float* [[DST]], i64 2 -; CHECK-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds float, float* [[SRC]], i64 3 -; CHECK-NEXT: [[INCDEC_PTR6:%.*]] = getelementptr inbounds float, float* [[DST]], i64 3 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[SRC]] to <4 x float>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[SRC:%.*]] to <4 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[DST]] to <4 x float>* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[DST:%.*]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4 ; CHECK-NEXT: ret void ; @@ -905,16 +851,10 @@ define void @sub2fn(float* noalias %dst, float* noalias %src) { ; CHECK-LABEL: @sub2fn( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, float* [[SRC:%.*]], i64 1 -; CHECK-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, float* [[DST:%.*]], i64 1 -; CHECK-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, float* [[SRC]], i64 2 -; CHECK-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds float, float* [[DST]], i64 2 -; CHECK-NEXT: [[INCDEC_PTR5:%.*]] = getelementptr inbounds float, float* [[SRC]], i64 3 -; CHECK-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds float, float* [[DST]], i64 3 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[SRC]] to <4 x float>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[SRC:%.*]] to <4 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[DST]] to <4 x float>* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[DST:%.*]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-cmps.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-cmps.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-cmps.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-cmps.ll @@ -5,7 +5,6 @@ ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[ISEC:%.*]], i64 0 -; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[ISEC]], i64 1 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[ARRAYIDX5]] to <2 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> , float [[TMP0:%.*]], i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-alt-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-alt-shuffle.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-alt-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-alt-shuffle.ll @@ -4,14 +4,8 @@ define void @foo(i8* %c, float* %d) { ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[C:%.*]], i64 4 -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[C]], i64 1 -; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i8, i8* [[C]], i64 2 -; CHECK-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds i8, i8* [[C]], i64 3 -; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, float* [[D:%.*]], i64 -1 -; CHECK-NEXT: [[ADD_PTR37:%.*]] = getelementptr inbounds float, float* [[D]], i64 -2 -; CHECK-NEXT: [[ADD_PTR45:%.*]] = getelementptr inbounds float, float* [[D]], i64 -3 -; CHECK-NEXT: [[ADD_PTR53:%.*]] = getelementptr inbounds float, float* [[D]], i64 -4 +; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[C:%.*]], i64 1 +; CHECK-NEXT: [[ADD_PTR53:%.*]] = getelementptr inbounds float, float* [[D:%.*]], i64 -4 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[ARRAYIDX4]] to <4 x i8>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* [[TMP0]], align 1 ; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll @@ -4,8 +4,7 @@ define i32 @foo(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8) { ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[ARR:%.*]], i64 1 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARR]] to <2 x i32>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARR:%.*]] to <2 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[A1:%.*]], i32 0 @@ -52,10 +51,7 @@ define i32 @foo1(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8) { ; CHECK-LABEL: @foo1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[ARR:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[ARR]], i64 2 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[ARR]], i64 3 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARR]] to <4 x i32>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARR:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[A1:%.*]], i32 0 @@ -106,10 +102,7 @@ define i32 @foo2(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8) { ; CHECK-LABEL: @foo2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[ARR:%.*]], i64 3 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[ARR]], i64 2 -; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[ARR]], i64 1 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARR]] to <4 x i32>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARR:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[A1:%.*]], i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reordered-list.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reordered-list.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reordered-list.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reordered-list.ll @@ -4,9 +4,7 @@ define void @test(double* %isec) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[ISEC:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, double* [[ISEC]], i64 0 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[ISEC]], i64 3 +; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, double* [[ISEC:%.*]], i64 0 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[ISEC]], i64 2 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[ARRAYIDX10]] to <2 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/zext-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/zext-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/zext-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/zext-inseltpoison.ll @@ -22,15 +22,13 @@ ; SSE2-NEXT: ret <2 x i64> [[V1]] ; ; SLM-LABEL: @loadext_2i8_to_2i64( -; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <2 x i8>* +; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <2 x i8>* ; SLM-NEXT: [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1 ; SLM-NEXT: [[TMP3:%.*]] = zext <2 x i8> [[TMP2]] to <2 x i64> ; SLM-NEXT: ret <2 x i64> [[TMP3]] ; ; AVX-LABEL: @loadext_2i8_to_2i64( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <2 x i8>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <2 x i8>* ; AVX-NEXT: [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = zext <2 x i8> [[TMP2]] to <2 x i64> ; AVX-NEXT: ret <2 x i64> [[TMP3]] @@ -47,28 +45,19 @@ define <4 x i32> @loadext_4i8_to_4i32(i8* %p0) { ; SSE2-LABEL: @loadext_4i8_to_4i32( -; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>* +; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>* ; SSE2-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 ; SSE2-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32> ; SSE2-NEXT: ret <4 x i32> [[TMP3]] ; ; SLM-LABEL: @loadext_4i8_to_4i32( -; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>* +; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>* ; SLM-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 ; SLM-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32> ; SLM-NEXT: ret <4 x i32> [[TMP3]] ; ; AVX-LABEL: @loadext_4i8_to_4i32( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>* ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32> ; AVX-NEXT: ret <4 x i32> [[TMP3]] @@ -93,28 +82,19 @@ define <4 x i64> @loadext_4i8_to_4i64(i8* %p0) { ; SSE2-LABEL: @loadext_4i8_to_4i64( -; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>* +; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>* ; SSE2-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 ; SSE2-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64> ; SSE2-NEXT: ret <4 x i64> [[TMP3]] ; ; SLM-LABEL: @loadext_4i8_to_4i64( -; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>* +; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>* ; SLM-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 ; SLM-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64> ; SLM-NEXT: ret <4 x i64> [[TMP3]] ; ; AVX-LABEL: @loadext_4i8_to_4i64( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>* ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64> ; AVX-NEXT: ret <4 x i64> [[TMP3]] @@ -139,40 +119,19 @@ define <8 x i16> @loadext_8i8_to_8i16(i8* %p0) { ; SSE2-LABEL: @loadext_8i8_to_8i16( -; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; SSE2-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 -; SSE2-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 -; SSE2-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 -; SSE2-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 -; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>* +; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>* ; SSE2-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 ; SSE2-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16> ; SSE2-NEXT: ret <8 x i16> [[TMP3]] ; ; SLM-LABEL: @loadext_8i8_to_8i16( -; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; SLM-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 -; SLM-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 -; SLM-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 -; SLM-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 -; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>* +; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>* ; SLM-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 ; SLM-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16> ; SLM-NEXT: ret <8 x i16> [[TMP3]] ; ; AVX-LABEL: @loadext_8i8_to_8i16( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; AVX-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 -; AVX-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 -; AVX-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 -; AVX-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>* ; AVX-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16> ; AVX-NEXT: ret <8 x i16> [[TMP3]] @@ -213,40 +172,19 @@ define <8 x i32> @loadext_8i8_to_8i32(i8* %p0) { ; SSE2-LABEL: @loadext_8i8_to_8i32( -; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; SSE2-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 -; SSE2-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 -; SSE2-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 -; SSE2-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 -; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>* +; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>* ; SSE2-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 ; SSE2-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i32> ; SSE2-NEXT: ret <8 x i32> [[TMP3]] ; ; SLM-LABEL: @loadext_8i8_to_8i32( -; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; SLM-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 -; SLM-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 -; SLM-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 -; SLM-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 -; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>* +; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>* ; SLM-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 ; SLM-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i32> ; SLM-NEXT: ret <8 x i32> [[TMP3]] ; ; AVX-LABEL: @loadext_8i8_to_8i32( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; AVX-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 -; AVX-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 -; AVX-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 -; AVX-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>* ; AVX-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i32> ; AVX-NEXT: ret <8 x i32> [[TMP3]] @@ -287,64 +225,19 @@ define <16 x i16> @loadext_16i8_to_16i16(i8* %p0) { ; SSE2-LABEL: @loadext_16i8_to_16i16( -; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; SSE2-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 -; SSE2-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 -; SSE2-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 -; SSE2-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 -; SSE2-NEXT: [[P8:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 8 -; SSE2-NEXT: [[P9:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 9 -; SSE2-NEXT: [[P10:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 10 -; SSE2-NEXT: [[P11:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 11 -; SSE2-NEXT: [[P12:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 12 -; SSE2-NEXT: [[P13:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 13 -; SSE2-NEXT: [[P14:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 14 -; SSE2-NEXT: [[P15:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 15 -; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <16 x i8>* +; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <16 x i8>* ; SSE2-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1 ; SSE2-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i16> ; SSE2-NEXT: ret <16 x i16> [[TMP3]] ; ; SLM-LABEL: @loadext_16i8_to_16i16( -; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; SLM-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 -; SLM-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 -; SLM-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 -; SLM-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 -; SLM-NEXT: [[P8:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 8 -; SLM-NEXT: [[P9:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 9 -; SLM-NEXT: [[P10:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 10 -; SLM-NEXT: [[P11:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 11 -; SLM-NEXT: [[P12:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 12 -; SLM-NEXT: [[P13:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 13 -; SLM-NEXT: [[P14:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 14 -; SLM-NEXT: [[P15:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 15 -; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <16 x i8>* +; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <16 x i8>* ; SLM-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1 ; SLM-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i16> ; SLM-NEXT: ret <16 x i16> [[TMP3]] ; ; AVX-LABEL: @loadext_16i8_to_16i16( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; AVX-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 -; AVX-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 -; AVX-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 -; AVX-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 -; AVX-NEXT: [[P8:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 8 -; AVX-NEXT: [[P9:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 9 -; AVX-NEXT: [[P10:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 10 -; AVX-NEXT: [[P11:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 11 -; AVX-NEXT: [[P12:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 12 -; AVX-NEXT: [[P13:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 13 -; AVX-NEXT: [[P14:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 14 -; AVX-NEXT: [[P15:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 15 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <16 x i8>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <16 x i8>* ; AVX-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i16> ; AVX-NEXT: ret <16 x i16> [[TMP3]] @@ -421,22 +314,19 @@ define <2 x i64> @loadext_2i16_to_2i64(i16* %p0) { ; SSE2-LABEL: @loadext_2i16_to_2i64( -; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; SSE2-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <2 x i16>* +; SSE2-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <2 x i16>* ; SSE2-NEXT: [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1 ; SSE2-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i64> ; SSE2-NEXT: ret <2 x i64> [[TMP3]] ; ; SLM-LABEL: @loadext_2i16_to_2i64( -; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; SLM-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <2 x i16>* +; SLM-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <2 x i16>* ; SLM-NEXT: [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1 ; SLM-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i64> ; SLM-NEXT: ret <2 x i64> [[TMP3]] ; ; AVX-LABEL: @loadext_2i16_to_2i64( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <2 x i16>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <2 x i16>* ; AVX-NEXT: [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i64> ; AVX-NEXT: ret <2 x i64> [[TMP3]] @@ -453,28 +343,19 @@ define <4 x i32> @loadext_4i16_to_4i32(i16* %p0) { ; SSE2-LABEL: @loadext_4i16_to_4i32( -; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 -; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 -; SSE2-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>* +; SSE2-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>* ; SSE2-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1 ; SSE2-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32> ; SSE2-NEXT: ret <4 x i32> [[TMP3]] ; ; SLM-LABEL: @loadext_4i16_to_4i32( -; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 -; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 -; SLM-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>* +; SLM-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>* ; SLM-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1 ; SLM-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32> ; SLM-NEXT: ret <4 x i32> [[TMP3]] ; ; AVX-LABEL: @loadext_4i16_to_4i32( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 -; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>* ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32> ; AVX-NEXT: ret <4 x i32> [[TMP3]] @@ -499,28 +380,19 @@ define <4 x i64> @loadext_4i16_to_4i64(i16* %p0) { ; SSE2-LABEL: @loadext_4i16_to_4i64( -; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 -; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 -; SSE2-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>* +; SSE2-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>* ; SSE2-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1 ; SSE2-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i64> ; SSE2-NEXT: ret <4 x i64> [[TMP3]] ; ; SLM-LABEL: @loadext_4i16_to_4i64( -; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 -; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 -; SLM-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>* +; SLM-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>* ; SLM-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1 ; SLM-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i64> ; SLM-NEXT: ret <4 x i64> [[TMP3]] ; ; AVX-LABEL: @loadext_4i16_to_4i64( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 -; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>* ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i64> ; AVX-NEXT: ret <4 x i64> [[TMP3]] @@ -545,40 +417,19 @@ define <8 x i32> @loadext_8i16_to_8i32(i16* %p0) { ; SSE2-LABEL: @loadext_8i16_to_8i32( -; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 -; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 -; SSE2-NEXT: [[P4:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 4 -; SSE2-NEXT: [[P5:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 5 -; SSE2-NEXT: [[P6:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 6 -; SSE2-NEXT: [[P7:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 7 -; SSE2-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <8 x i16>* +; SSE2-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <8 x i16>* ; SSE2-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 1 ; SSE2-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32> ; SSE2-NEXT: ret <8 x i32> [[TMP3]] ; ; SLM-LABEL: @loadext_8i16_to_8i32( -; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 -; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 -; SLM-NEXT: [[P4:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 4 -; SLM-NEXT: [[P5:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 5 -; SLM-NEXT: [[P6:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 6 -; SLM-NEXT: [[P7:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 7 -; SLM-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <8 x i16>* +; SLM-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <8 x i16>* ; SLM-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 1 ; SLM-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32> ; SLM-NEXT: ret <8 x i32> [[TMP3]] ; ; AVX-LABEL: @loadext_8i16_to_8i32( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 -; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 -; AVX-NEXT: [[P4:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 4 -; AVX-NEXT: [[P5:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 5 -; AVX-NEXT: [[P6:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 6 -; AVX-NEXT: [[P7:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 7 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <8 x i16>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <8 x i16>* ; AVX-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32> ; AVX-NEXT: ret <8 x i32> [[TMP3]] @@ -623,22 +474,19 @@ define <2 x i64> @loadext_2i32_to_2i64(i32* %p0) { ; SSE2-LABEL: @loadext_2i32_to_2i64( -; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 -; SSE2-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <2 x i32>* +; SSE2-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <2 x i32>* ; SSE2-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 1 ; SSE2-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64> ; SSE2-NEXT: ret <2 x i64> [[TMP3]] ; ; SLM-LABEL: @loadext_2i32_to_2i64( -; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 -; SLM-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <2 x i32>* +; SLM-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <2 x i32>* ; SLM-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 1 ; SLM-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64> ; SLM-NEXT: ret <2 x i64> [[TMP3]] ; ; AVX-LABEL: @loadext_2i32_to_2i64( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <2 x i32>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <2 x i32>* ; AVX-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64> ; AVX-NEXT: ret <2 x i64> [[TMP3]] @@ -655,28 +503,19 @@ define <4 x i64> @loadext_4i32_to_4i64(i32* %p0) { ; SSE2-LABEL: @loadext_4i32_to_4i64( -; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 -; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2 -; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3 -; SSE2-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <4 x i32>* +; SSE2-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <4 x i32>* ; SSE2-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1 ; SSE2-NEXT: [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64> ; SSE2-NEXT: ret <4 x i64> [[TMP3]] ; ; SLM-LABEL: @loadext_4i32_to_4i64( -; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 -; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2 -; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3 -; SLM-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <4 x i32>* +; SLM-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <4 x i32>* ; SLM-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1 ; SLM-NEXT: [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64> ; SLM-NEXT: ret <4 x i64> [[TMP3]] ; ; AVX-LABEL: @loadext_4i32_to_4i64( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 -; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2 -; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <4 x i32>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <4 x i32>* ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64> ; AVX-NEXT: ret <4 x i64> [[TMP3]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/zext.ll b/llvm/test/Transforms/SLPVectorizer/X86/zext.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/zext.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/zext.ll @@ -22,15 +22,13 @@ ; SSE2-NEXT: ret <2 x i64> [[V1]] ; ; SLM-LABEL: @loadext_2i8_to_2i64( -; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <2 x i8>* +; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <2 x i8>* ; SLM-NEXT: [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1 ; SLM-NEXT: [[TMP3:%.*]] = zext <2 x i8> [[TMP2]] to <2 x i64> ; SLM-NEXT: ret <2 x i64> [[TMP3]] ; ; AVX-LABEL: @loadext_2i8_to_2i64( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <2 x i8>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <2 x i8>* ; AVX-NEXT: [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = zext <2 x i8> [[TMP2]] to <2 x i64> ; AVX-NEXT: ret <2 x i64> [[TMP3]] @@ -47,28 +45,19 @@ define <4 x i32> @loadext_4i8_to_4i32(i8* %p0) { ; SSE2-LABEL: @loadext_4i8_to_4i32( -; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>* +; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>* ; SSE2-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 ; SSE2-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32> ; SSE2-NEXT: ret <4 x i32> [[TMP3]] ; ; SLM-LABEL: @loadext_4i8_to_4i32( -; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>* +; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>* ; SLM-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 ; SLM-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32> ; SLM-NEXT: ret <4 x i32> [[TMP3]] ; ; AVX-LABEL: @loadext_4i8_to_4i32( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>* ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32> ; AVX-NEXT: ret <4 x i32> [[TMP3]] @@ -93,28 +82,19 @@ define <4 x i64> @loadext_4i8_to_4i64(i8* %p0) { ; SSE2-LABEL: @loadext_4i8_to_4i64( -; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>* +; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>* ; SSE2-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 ; SSE2-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64> ; SSE2-NEXT: ret <4 x i64> [[TMP3]] ; ; SLM-LABEL: @loadext_4i8_to_4i64( -; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>* +; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>* ; SLM-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 ; SLM-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64> ; SLM-NEXT: ret <4 x i64> [[TMP3]] ; ; AVX-LABEL: @loadext_4i8_to_4i64( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>* ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64> ; AVX-NEXT: ret <4 x i64> [[TMP3]] @@ -139,40 +119,19 @@ define <8 x i16> @loadext_8i8_to_8i16(i8* %p0) { ; SSE2-LABEL: @loadext_8i8_to_8i16( -; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; SSE2-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 -; SSE2-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 -; SSE2-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 -; SSE2-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 -; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>* +; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>* ; SSE2-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 ; SSE2-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16> ; SSE2-NEXT: ret <8 x i16> [[TMP3]] ; ; SLM-LABEL: @loadext_8i8_to_8i16( -; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; SLM-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 -; SLM-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 -; SLM-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 -; SLM-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 -; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>* +; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>* ; SLM-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 ; SLM-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16> ; SLM-NEXT: ret <8 x i16> [[TMP3]] ; ; AVX-LABEL: @loadext_8i8_to_8i16( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; AVX-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 -; AVX-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 -; AVX-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 -; AVX-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>* ; AVX-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16> ; AVX-NEXT: ret <8 x i16> [[TMP3]] @@ -213,40 +172,19 @@ define <8 x i32> @loadext_8i8_to_8i32(i8* %p0) { ; SSE2-LABEL: @loadext_8i8_to_8i32( -; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; SSE2-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 -; SSE2-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 -; SSE2-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 -; SSE2-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 -; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>* +; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>* ; SSE2-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 ; SSE2-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i32> ; SSE2-NEXT: ret <8 x i32> [[TMP3]] ; ; SLM-LABEL: @loadext_8i8_to_8i32( -; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; SLM-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 -; SLM-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 -; SLM-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 -; SLM-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 -; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>* +; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>* ; SLM-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 ; SLM-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i32> ; SLM-NEXT: ret <8 x i32> [[TMP3]] ; ; AVX-LABEL: @loadext_8i8_to_8i32( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; AVX-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 -; AVX-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 -; AVX-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 -; AVX-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>* ; AVX-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i32> ; AVX-NEXT: ret <8 x i32> [[TMP3]] @@ -287,64 +225,19 @@ define <16 x i16> @loadext_16i8_to_16i16(i8* %p0) { ; SSE2-LABEL: @loadext_16i8_to_16i16( -; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; SSE2-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 -; SSE2-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 -; SSE2-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 -; SSE2-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 -; SSE2-NEXT: [[P8:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 8 -; SSE2-NEXT: [[P9:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 9 -; SSE2-NEXT: [[P10:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 10 -; SSE2-NEXT: [[P11:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 11 -; SSE2-NEXT: [[P12:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 12 -; SSE2-NEXT: [[P13:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 13 -; SSE2-NEXT: [[P14:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 14 -; SSE2-NEXT: [[P15:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 15 -; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <16 x i8>* +; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <16 x i8>* ; SSE2-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1 ; SSE2-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i16> ; SSE2-NEXT: ret <16 x i16> [[TMP3]] ; ; SLM-LABEL: @loadext_16i8_to_16i16( -; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; SLM-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 -; SLM-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 -; SLM-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 -; SLM-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 -; SLM-NEXT: [[P8:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 8 -; SLM-NEXT: [[P9:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 9 -; SLM-NEXT: [[P10:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 10 -; SLM-NEXT: [[P11:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 11 -; SLM-NEXT: [[P12:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 12 -; SLM-NEXT: [[P13:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 13 -; SLM-NEXT: [[P14:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 14 -; SLM-NEXT: [[P15:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 15 -; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <16 x i8>* +; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <16 x i8>* ; SLM-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1 ; SLM-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i16> ; SLM-NEXT: ret <16 x i16> [[TMP3]] ; ; AVX-LABEL: @loadext_16i8_to_16i16( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; AVX-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 -; AVX-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 -; AVX-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 -; AVX-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 -; AVX-NEXT: [[P8:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 8 -; AVX-NEXT: [[P9:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 9 -; AVX-NEXT: [[P10:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 10 -; AVX-NEXT: [[P11:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 11 -; AVX-NEXT: [[P12:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 12 -; AVX-NEXT: [[P13:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 13 -; AVX-NEXT: [[P14:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 14 -; AVX-NEXT: [[P15:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 15 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <16 x i8>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <16 x i8>* ; AVX-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i16> ; AVX-NEXT: ret <16 x i16> [[TMP3]] @@ -421,22 +314,19 @@ define <2 x i64> @loadext_2i16_to_2i64(i16* %p0) { ; SSE2-LABEL: @loadext_2i16_to_2i64( -; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; SSE2-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <2 x i16>* +; SSE2-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <2 x i16>* ; SSE2-NEXT: [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1 ; SSE2-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i64> ; SSE2-NEXT: ret <2 x i64> [[TMP3]] ; ; SLM-LABEL: @loadext_2i16_to_2i64( -; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; SLM-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <2 x i16>* +; SLM-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <2 x i16>* ; SLM-NEXT: [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1 ; SLM-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i64> ; SLM-NEXT: ret <2 x i64> [[TMP3]] ; ; AVX-LABEL: @loadext_2i16_to_2i64( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <2 x i16>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <2 x i16>* ; AVX-NEXT: [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i64> ; AVX-NEXT: ret <2 x i64> [[TMP3]] @@ -453,28 +343,19 @@ define <4 x i32> @loadext_4i16_to_4i32(i16* %p0) { ; SSE2-LABEL: @loadext_4i16_to_4i32( -; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 -; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 -; SSE2-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>* +; SSE2-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>* ; SSE2-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1 ; SSE2-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32> ; SSE2-NEXT: ret <4 x i32> [[TMP3]] ; ; SLM-LABEL: @loadext_4i16_to_4i32( -; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 -; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 -; SLM-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>* +; SLM-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>* ; SLM-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1 ; SLM-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32> ; SLM-NEXT: ret <4 x i32> [[TMP3]] ; ; AVX-LABEL: @loadext_4i16_to_4i32( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 -; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>* ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32> ; AVX-NEXT: ret <4 x i32> [[TMP3]] @@ -499,28 +380,19 @@ define <4 x i64> @loadext_4i16_to_4i64(i16* %p0) { ; SSE2-LABEL: @loadext_4i16_to_4i64( -; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 -; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 -; SSE2-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>* +; SSE2-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>* ; SSE2-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1 ; SSE2-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i64> ; SSE2-NEXT: ret <4 x i64> [[TMP3]] ; ; SLM-LABEL: @loadext_4i16_to_4i64( -; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 -; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 -; SLM-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>* +; SLM-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>* ; SLM-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1 ; SLM-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i64> ; SLM-NEXT: ret <4 x i64> [[TMP3]] ; ; AVX-LABEL: @loadext_4i16_to_4i64( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 -; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>* ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i64> ; AVX-NEXT: ret <4 x i64> [[TMP3]] @@ -545,40 +417,19 @@ define <8 x i32> @loadext_8i16_to_8i32(i16* %p0) { ; SSE2-LABEL: @loadext_8i16_to_8i32( -; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 -; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 -; SSE2-NEXT: [[P4:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 4 -; SSE2-NEXT: [[P5:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 5 -; SSE2-NEXT: [[P6:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 6 -; SSE2-NEXT: [[P7:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 7 -; SSE2-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <8 x i16>* +; SSE2-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <8 x i16>* ; SSE2-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 1 ; SSE2-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32> ; SSE2-NEXT: ret <8 x i32> [[TMP3]] ; ; SLM-LABEL: @loadext_8i16_to_8i32( -; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 -; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 -; SLM-NEXT: [[P4:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 4 -; SLM-NEXT: [[P5:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 5 -; SLM-NEXT: [[P6:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 6 -; SLM-NEXT: [[P7:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 7 -; SLM-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <8 x i16>* +; SLM-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <8 x i16>* ; SLM-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 1 ; SLM-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32> ; SLM-NEXT: ret <8 x i32> [[TMP3]] ; ; AVX-LABEL: @loadext_8i16_to_8i32( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 -; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 -; AVX-NEXT: [[P4:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 4 -; AVX-NEXT: [[P5:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 5 -; AVX-NEXT: [[P6:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 6 -; AVX-NEXT: [[P7:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 7 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <8 x i16>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <8 x i16>* ; AVX-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32> ; AVX-NEXT: ret <8 x i32> [[TMP3]] @@ -623,22 +474,19 @@ define <2 x i64> @loadext_2i32_to_2i64(i32* %p0) { ; SSE2-LABEL: @loadext_2i32_to_2i64( -; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 -; SSE2-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <2 x i32>* +; SSE2-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <2 x i32>* ; SSE2-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 1 ; SSE2-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64> ; SSE2-NEXT: ret <2 x i64> [[TMP3]] ; ; SLM-LABEL: @loadext_2i32_to_2i64( -; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 -; SLM-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <2 x i32>* +; SLM-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <2 x i32>* ; SLM-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 1 ; SLM-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64> ; SLM-NEXT: ret <2 x i64> [[TMP3]] ; ; AVX-LABEL: @loadext_2i32_to_2i64( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <2 x i32>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <2 x i32>* ; AVX-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64> ; AVX-NEXT: ret <2 x i64> [[TMP3]] @@ -655,28 +503,19 @@ define <4 x i64> @loadext_4i32_to_4i64(i32* %p0) { ; SSE2-LABEL: @loadext_4i32_to_4i64( -; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 -; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2 -; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3 -; SSE2-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <4 x i32>* +; SSE2-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <4 x i32>* ; SSE2-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1 ; SSE2-NEXT: [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64> ; SSE2-NEXT: ret <4 x i64> [[TMP3]] ; ; SLM-LABEL: @loadext_4i32_to_4i64( -; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 -; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2 -; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3 -; SLM-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <4 x i32>* +; SLM-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <4 x i32>* ; SLM-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1 ; SLM-NEXT: [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64> ; SLM-NEXT: ret <4 x i64> [[TMP3]] ; ; AVX-LABEL: @loadext_4i32_to_4i64( -; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 -; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2 -; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3 -; AVX-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <4 x i32>* +; AVX-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <4 x i32>* ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1 ; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64> ; AVX-NEXT: ret <4 x i64> [[TMP3]] diff --git a/llvm/test/Transforms/SLPVectorizer/int_sideeffect.ll b/llvm/test/Transforms/SLPVectorizer/int_sideeffect.ll --- a/llvm/test/Transforms/SLPVectorizer/int_sideeffect.ll +++ b/llvm/test/Transforms/SLPVectorizer/int_sideeffect.ll @@ -8,9 +8,6 @@ define void @test_sideeffect(float* %p) { ; CHECK-LABEL: @test_sideeffect( ; CHECK-NEXT: [[P0:%.*]] = getelementptr float, float* [[P:%.*]], i64 0 -; CHECK-NEXT: [[P1:%.*]] = getelementptr float, float* [[P]], i64 1 -; CHECK-NEXT: [[P2:%.*]] = getelementptr float, float* [[P]], i64 2 -; CHECK-NEXT: [[P3:%.*]] = getelementptr float, float* [[P]], i64 3 ; CHECK-NEXT: call void @llvm.sideeffect() ; CHECK-NEXT: call void @llvm.sideeffect() ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P0]] to <4 x float>* @@ -41,9 +38,6 @@ define void @test_inaccessiblememonly(float* %p) { ; CHECK-LABEL: @test_inaccessiblememonly( ; CHECK-NEXT: [[P0:%.*]] = getelementptr float, float* [[P:%.*]], i64 0 -; CHECK-NEXT: [[P1:%.*]] = getelementptr float, float* [[P]], i64 1 -; CHECK-NEXT: [[P2:%.*]] = getelementptr float, float* [[P]], i64 2 -; CHECK-NEXT: [[P3:%.*]] = getelementptr float, float* [[P]], i64 3 ; CHECK-NEXT: call void @foo() #[[ATTR1:[0-9]+]] ; CHECK-NEXT: call void @foo() #[[ATTR1]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P0]] to <4 x float>*