diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -7561,8 +7561,10 @@ UniqueValues.clear(); UniqueValues.append(VL.begin(), std::next(VL.begin(), NumValues)); } - UniqueValues.append(VF - UniqueValues.size(), - PoisonValue::get(VL[0]->getType())); + // Pad UniqueValues with any existing undef/poison values. + UniqueValues.append( + std::next(VL.begin(), VL.size() - (VF - UniqueValues.size())), + VL.end()); VL = UniqueValues; } diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/loadi8.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/loadi8.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/loadi8.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/loadi8.ll @@ -17,18 +17,24 @@ ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i8>, <4 x i8>* [[TMP2]], align 1 ; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[TMP3]] to <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], [[TMP4]] -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP7]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP6]], [[SHUFFLE1]] -; CHECK-NEXT: [[TMP9:%.*]] = icmp ult <4 x i32> [[TMP8]], -; CHECK-NEXT: [[TMP10:%.*]] = icmp sgt <4 x i32> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> [[TMP8]], <4 x i32> [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = trunc <4 x i32> [[TMP12]] to <4 x i8> -; CHECK-NEXT: [[TMP14:%.*]] = bitcast i8* [[DST:%.*]] to <4 x i8>* -; CHECK-NEXT: store <4 x i8> [[TMP13]], <4 x i8>* [[TMP14]], align 1 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP0]], i32 2 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP0]], i32 3 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], [[TMP4]] +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP1]], i32 2 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP1]], i32 3 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP13]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = add nsw <4 x i32> [[TMP9]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp ult <4 x i32> [[TMP14]], +; CHECK-NEXT: [[TMP16:%.*]] = icmp sgt <4 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = sext <4 x i1> [[TMP16]] to <4 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = select <4 x i1> [[TMP15]], <4 x i32> [[TMP14]], <4 x i32> [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = trunc <4 x i32> [[TMP18]] to <4 x i8> +; CHECK-NEXT: [[TMP20:%.*]] = bitcast i8* [[DST:%.*]] to <4 x i8>* +; CHECK-NEXT: store <4 x i8> [[TMP19]], <4 x i8>* [[TMP20]], align 1 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll @@ -197,10 +197,17 @@ ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 1 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <8 x i8> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[X:%.*]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP2]], <8 x i8> [[TMP1]], <8 x i8> [[SHUFFLE]] -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[PTR]] to <8 x i8>* -; CHECK-NEXT: store <8 x i8> [[TMP4]], <8 x i8>* [[TMP5]], align 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i8> [[TMP3]], i8 [[X]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i8> [[TMP4]], i8 [[X]], i32 2 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i8> [[TMP5]], i8 [[X]], i32 3 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i8> [[TMP6]], i8 [[X]], i32 4 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i8> [[TMP7]], i8 [[X]], i32 5 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i8> [[TMP8]], i8 [[X]], i32 6 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x i8> [[TMP9]], i8 [[X]], i32 7 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i8> [[TMP10]], <8 x i8> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP2]], <8 x i8> [[TMP1]], <8 x i8> [[SHUFFLE]] +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8* [[PTR]] to <8 x i8>* +; CHECK-NEXT: store <8 x i8> [[TMP11]], <8 x i8>* [[TMP12]], align 2 ; CHECK-NEXT: ret void ; entry: @@ -260,17 +267,24 @@ ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 1 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <8 x i8> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[X:%.*]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP2]], <8 x i8> [[TMP1]], <8 x i8> [[SHUFFLE]] -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[PTR]] to <8 x i8>* -; CHECK-NEXT: store <8 x i8> [[TMP4]], <8 x i8>* [[TMP5]], align 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i8> [[TMP3]], i8 [[X]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i8> [[TMP4]], i8 [[X]], i32 2 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i8> [[TMP5]], i8 [[X]], i32 3 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i8> [[TMP6]], i8 [[X]], i32 4 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i8> [[TMP7]], i8 [[X]], i32 5 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i8> [[TMP8]], i8 [[X]], i32 6 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x i8> [[TMP9]], i8 [[X]], i32 7 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i8> [[TMP10]], <8 x i8> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP2]], <8 x i8> [[TMP1]], <8 x i8> [[SHUFFLE]] +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8* [[PTR]] to <8 x i8>* +; CHECK-NEXT: store <8 x i8> [[TMP11]], <8 x i8>* [[TMP12]], align 2 ; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 8 ; CHECK-NEXT: [[L_8:%.*]] = load i8, i8* [[GEP_8]], align 1 ; CHECK-NEXT: [[CMP_8:%.*]] = icmp ugt i8 [[L_8]], -1 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i8> [[TMP1]], i32 0 -; CHECK-NEXT: [[S_8:%.*]] = select i1 [[CMP_8]], i8 [[TMP6]], i8 [[X]] -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i8> [[TMP4]], i32 0 -; CHECK-NEXT: store i8 [[TMP7]], i8* [[GEP_8]], align 2 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i8> [[TMP1]], i32 0 +; CHECK-NEXT: [[S_8:%.*]] = select i1 [[CMP_8]], i8 [[TMP13]], i8 [[X]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i8> [[TMP11]], i32 0 +; CHECK-NEXT: store i8 [[TMP14]], i8* [[GEP_8]], align 2 ; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 9 ; CHECK-NEXT: [[L_9:%.*]] = load i8, i8* [[GEP_9]], align 1 ; CHECK-NEXT: [[CMP_9:%.*]] = icmp ugt i8 [[L_9]], -1 @@ -415,10 +429,13 @@ ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i16> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[X:%.*]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP2]], <4 x i16> [[TMP1]], <4 x i16> [[SHUFFLE]] -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16* [[PTR]] to <4 x i16>* -; CHECK-NEXT: store <4 x i16> [[TMP4]], <4 x i16>* [[TMP5]], align 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i16> [[TMP3]], i16 [[X]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i16> [[TMP4]], i16 [[X]], i32 2 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i16> [[TMP5]], i16 [[X]], i32 3 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP2]], <4 x i16> [[TMP1]], <4 x i16> [[SHUFFLE]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[PTR]] to <4 x i16>* +; CHECK-NEXT: store <4 x i16> [[TMP7]], <4 x i16>* [[TMP8]], align 2 ; CHECK-NEXT: ret void ; entry: @@ -455,10 +472,17 @@ ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <8 x i16> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i16> poison, i16 [[X:%.*]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> [[SHUFFLE]] -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* -; CHECK-NEXT: store <8 x i16> [[TMP4]], <8 x i16>* [[TMP5]], align 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i16> [[TMP3]], i16 [[X]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i16> [[TMP4]], i16 [[X]], i32 2 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i16> [[TMP5]], i16 [[X]], i32 3 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i16> [[TMP6]], i16 [[X]], i32 4 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i16> [[TMP7]], i16 [[X]], i32 5 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i16> [[TMP8]], i16 [[X]], i32 6 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x i16> [[TMP9]], i16 [[X]], i32 7 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[TMP10]], <8 x i16> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> [[SHUFFLE]] +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* +; CHECK-NEXT: store <8 x i16> [[TMP11]], <8 x i16>* [[TMP12]], align 2 ; CHECK-NEXT: ret void ; entry: @@ -546,10 +570,13 @@ ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> [[SHUFFLE]] -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[X]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[X]], i32 2 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[X]], i32 3 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> [[SHUFFLE]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 2 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll @@ -232,14 +232,16 @@ ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[V1_LANE_1]], i32 3 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> poison, double [[V2_LANE_2]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x double> [[TMP4]], double [[V2_LANE_0]], i32 1 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> poison, <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = fmul <4 x double> [[TMP3]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x double> [[TMP6]], <4 x double> poison, <9 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x double> [[TMP5]], double [[V2_LANE_2]], i32 2 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x double> [[TMP6]], double [[V2_LANE_0]], i32 3 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = fmul <4 x double> [[TMP3]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x double> [[TMP8]], <4 x double> poison, <9 x i32> ; CHECK-NEXT: call void @use(double [[V1_LANE_0]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_1]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_2]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_3]]) -; CHECK-NEXT: store <9 x double> [[TMP7]], <9 x double>* [[PTR_1]], align 8 +; CHECK-NEXT: store <9 x double> [[TMP9]], <9 x double>* [[PTR_1]], align 8 ; CHECK-NEXT: ret void ; bb: @@ -357,27 +359,37 @@ ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x double> poison, double [[V2_LANE_0]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x double> [[TMP8]], double [[V2_LANE_2]], i32 1 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x double> [[TMP9]], double [[V2_LANE_1]], i32 2 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x double> [[TMP10]], <8 x double> poison, <8 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = fmul <8 x double> [[TMP7]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x double> [[TMP10]], double [[V2_LANE_0]], i32 3 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <8 x double> [[TMP11]], double [[V2_LANE_2]], i32 4 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <8 x double> [[TMP12]], double [[V2_LANE_0]], i32 5 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x double> [[TMP13]], double [[V2_LANE_2]], i32 6 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x double> [[TMP14]], double [[V2_LANE_1]], i32 7 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x double> [[TMP15]], <8 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = fmul <8 x double> [[TMP7]], [[SHUFFLE1]] ; CHECK-NEXT: [[A_LANE_8:%.*]] = fmul double [[V1_LANE_2]], [[V2_LANE_0]] -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x double> [[TMP11]], <8 x double> poison, <9 x i32> -; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[TMP12]], double [[A_LANE_8]], i32 8 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <8 x double> poison, double [[V1_LANE_6]], i32 0 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x double> [[TMP13]], double [[V1_LANE_7]], i32 1 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x double> [[TMP14]], double [[V1_LANE_8]], i32 2 -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x double> [[TMP15]], double [[V1_LANE_0]], i32 3 -; CHECK-NEXT: [[TMP17:%.*]] = insertelement <8 x double> [[TMP16]], double [[V1_LANE_1]], i32 4 -; CHECK-NEXT: [[TMP18:%.*]] = insertelement <8 x double> [[TMP17]], double [[V1_LANE_2]], i32 5 -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <8 x double> [[TMP18]], double [[V1_LANE_3]], i32 6 -; CHECK-NEXT: [[TMP20:%.*]] = insertelement <8 x double> [[TMP19]], double [[V1_LANE_4]], i32 7 -; CHECK-NEXT: [[TMP21:%.*]] = insertelement <8 x double> poison, double [[V2_LANE_2]], i32 0 -; CHECK-NEXT: [[TMP22:%.*]] = insertelement <8 x double> [[TMP21]], double [[V2_LANE_1]], i32 1 -; CHECK-NEXT: [[TMP23:%.*]] = insertelement <8 x double> [[TMP22]], double [[V2_LANE_0]], i32 2 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x double> [[TMP23]], <8 x double> poison, <8 x i32> -; CHECK-NEXT: [[TMP24:%.*]] = fmul <8 x double> [[TMP20]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <8 x double> [[TMP16]], <8 x double> poison, <9 x i32> +; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[TMP17]], double [[A_LANE_8]], i32 8 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <8 x double> poison, double [[V1_LANE_6]], i32 0 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <8 x double> [[TMP18]], double [[V1_LANE_7]], i32 1 +; CHECK-NEXT: [[TMP20:%.*]] = insertelement <8 x double> [[TMP19]], double [[V1_LANE_8]], i32 2 +; CHECK-NEXT: [[TMP21:%.*]] = insertelement <8 x double> [[TMP20]], double [[V1_LANE_0]], i32 3 +; CHECK-NEXT: [[TMP22:%.*]] = insertelement <8 x double> [[TMP21]], double [[V1_LANE_1]], i32 4 +; CHECK-NEXT: [[TMP23:%.*]] = insertelement <8 x double> [[TMP22]], double [[V1_LANE_2]], i32 5 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <8 x double> [[TMP23]], double [[V1_LANE_3]], i32 6 +; CHECK-NEXT: [[TMP25:%.*]] = insertelement <8 x double> [[TMP24]], double [[V1_LANE_4]], i32 7 +; CHECK-NEXT: [[TMP26:%.*]] = insertelement <8 x double> poison, double [[V2_LANE_2]], i32 0 +; CHECK-NEXT: [[TMP27:%.*]] = insertelement <8 x double> [[TMP26]], double [[V2_LANE_1]], i32 1 +; CHECK-NEXT: [[TMP28:%.*]] = insertelement <8 x double> [[TMP27]], double [[V2_LANE_0]], i32 2 +; CHECK-NEXT: [[TMP29:%.*]] = insertelement <8 x double> [[TMP28]], double [[V2_LANE_2]], i32 3 +; CHECK-NEXT: [[TMP30:%.*]] = insertelement <8 x double> [[TMP29]], double [[V2_LANE_1]], i32 4 +; CHECK-NEXT: [[TMP31:%.*]] = insertelement <8 x double> [[TMP30]], double [[V2_LANE_0]], i32 5 +; CHECK-NEXT: [[TMP32:%.*]] = insertelement <8 x double> [[TMP31]], double [[V2_LANE_2]], i32 6 +; CHECK-NEXT: [[TMP33:%.*]] = insertelement <8 x double> [[TMP32]], double [[V2_LANE_1]], i32 7 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x double> [[TMP33]], <8 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP34:%.*]] = fmul <8 x double> [[TMP25]], [[SHUFFLE]] ; CHECK-NEXT: [[B_LANE_8:%.*]] = fmul double [[V1_LANE_5]], [[V2_LANE_0]] -; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <8 x double> [[TMP24]], <8 x double> poison, <9 x i32> -; CHECK-NEXT: [[B_INS_8:%.*]] = insertelement <9 x double> [[TMP25]], double [[B_LANE_8]], i32 8 +; CHECK-NEXT: [[TMP35:%.*]] = shufflevector <8 x double> [[TMP34]], <8 x double> poison, <9 x i32> +; CHECK-NEXT: [[B_INS_8:%.*]] = insertelement <9 x double> [[TMP35]], double [[B_LANE_8]], i32 8 ; CHECK-NEXT: [[RES:%.*]] = fsub <9 x double> [[A_INS_8]], [[B_INS_8]] ; CHECK-NEXT: store <9 x double> [[RES]], <9 x double>* [[PTR_1]], align 8 ; CHECK-NEXT: ret void @@ -474,23 +486,28 @@ ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x double> poison, double [[V2_LANE_1]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x double> [[TMP8]], double [[V2_LANE_0]], i32 1 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x double> [[TMP9]], double [[V2_LANE_2]], i32 2 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x double> [[TMP10]], <8 x double> poison, <8 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = fmul <8 x double> [[TMP7]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x double> [[TMP10]], double [[V2_LANE_0]], i32 3 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <8 x double> [[TMP11]], double [[V2_LANE_2]], i32 4 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <8 x double> [[TMP12]], double [[V2_LANE_1]], i32 5 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x double> [[TMP13]], double [[V2_LANE_0]], i32 6 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x double> [[TMP14]], double [[V2_LANE_2]], i32 7 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x double> [[TMP15]], <8 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = fmul <8 x double> [[TMP7]], [[SHUFFLE1]] ; CHECK-NEXT: [[A_LANE_8:%.*]] = fmul double [[V1_LANE_2]], [[V2_LANE_1]] -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x double> [[TMP11]], <8 x double> poison, <9 x i32> -; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[TMP12]], double [[A_LANE_8]], i32 8 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <8 x double> poison, double [[V1_LANE_6]], i32 0 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x double> [[TMP13]], double [[V1_LANE_7]], i32 1 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x double> [[TMP14]], double [[V1_LANE_8]], i32 2 -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x double> [[TMP15]], double [[V1_LANE_0]], i32 3 -; CHECK-NEXT: [[TMP17:%.*]] = insertelement <8 x double> [[TMP16]], double [[V1_LANE_1]], i32 4 -; CHECK-NEXT: [[TMP18:%.*]] = insertelement <8 x double> [[TMP17]], double [[V1_LANE_2]], i32 5 -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <8 x double> [[TMP18]], double [[V1_LANE_3]], i32 6 -; CHECK-NEXT: [[TMP20:%.*]] = insertelement <8 x double> [[TMP19]], double [[V1_LANE_4]], i32 7 -; CHECK-NEXT: [[TMP21:%.*]] = fmul <8 x double> [[TMP20]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <8 x double> [[TMP16]], <8 x double> poison, <9 x i32> +; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[TMP17]], double [[A_LANE_8]], i32 8 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <8 x double> poison, double [[V1_LANE_6]], i32 0 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <8 x double> [[TMP18]], double [[V1_LANE_7]], i32 1 +; CHECK-NEXT: [[TMP20:%.*]] = insertelement <8 x double> [[TMP19]], double [[V1_LANE_8]], i32 2 +; CHECK-NEXT: [[TMP21:%.*]] = insertelement <8 x double> [[TMP20]], double [[V1_LANE_0]], i32 3 +; CHECK-NEXT: [[TMP22:%.*]] = insertelement <8 x double> [[TMP21]], double [[V1_LANE_1]], i32 4 +; CHECK-NEXT: [[TMP23:%.*]] = insertelement <8 x double> [[TMP22]], double [[V1_LANE_2]], i32 5 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <8 x double> [[TMP23]], double [[V1_LANE_3]], i32 6 +; CHECK-NEXT: [[TMP25:%.*]] = insertelement <8 x double> [[TMP24]], double [[V1_LANE_4]], i32 7 +; CHECK-NEXT: [[TMP26:%.*]] = fmul <8 x double> [[TMP25]], [[SHUFFLE1]] ; CHECK-NEXT: [[B_LANE_8:%.*]] = fmul double [[V1_LANE_5]], [[V2_LANE_0]] -; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <8 x double> [[TMP21]], <8 x double> poison, <9 x i32> -; CHECK-NEXT: [[B_INS_8:%.*]] = insertelement <9 x double> [[TMP22]], double [[B_LANE_8]], i32 8 +; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <8 x double> [[TMP26]], <8 x double> poison, <9 x i32> +; CHECK-NEXT: [[B_INS_8:%.*]] = insertelement <9 x double> [[TMP27]], double [[B_LANE_8]], i32 8 ; CHECK-NEXT: [[RES:%.*]] = fsub <9 x double> [[A_INS_8]], [[B_INS_8]] ; CHECK-NEXT: store <9 x double> [[RES]], <9 x double>* [[PTR_1]], align 8 ; CHECK-NEXT: ret void @@ -587,27 +604,37 @@ ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x double> poison, double [[V2_LANE_0]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x double> [[TMP8]], double [[V2_LANE_2]], i32 1 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x double> [[TMP9]], double [[V2_LANE_1]], i32 2 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x double> [[TMP10]], <8 x double> poison, <8 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = fmul <8 x double> [[TMP7]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x double> [[TMP10]], double [[V2_LANE_2]], i32 3 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <8 x double> [[TMP11]], double [[V2_LANE_1]], i32 4 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <8 x double> [[TMP12]], double [[V2_LANE_0]], i32 5 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x double> [[TMP13]], double [[V2_LANE_2]], i32 6 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x double> [[TMP14]], double [[V2_LANE_1]], i32 7 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x double> [[TMP15]], <8 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = fmul <8 x double> [[TMP7]], [[SHUFFLE1]] ; CHECK-NEXT: [[A_LANE_8:%.*]] = fmul double [[V1_LANE_2]], [[V2_LANE_0]] -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x double> [[TMP11]], <8 x double> poison, <9 x i32> -; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[TMP12]], double [[A_LANE_8]], i32 8 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <8 x double> poison, double [[V1_LANE_7]], i32 0 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x double> [[TMP13]], double [[V1_LANE_6]], i32 1 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x double> [[TMP14]], double [[V1_LANE_8]], i32 2 -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x double> [[TMP15]], double [[V1_LANE_1]], i32 3 -; CHECK-NEXT: [[TMP17:%.*]] = insertelement <8 x double> [[TMP16]], double [[V1_LANE_0]], i32 4 -; CHECK-NEXT: [[TMP18:%.*]] = insertelement <8 x double> [[TMP17]], double [[V1_LANE_3]], i32 5 -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <8 x double> [[TMP18]], double [[V1_LANE_2]], i32 6 -; CHECK-NEXT: [[TMP20:%.*]] = insertelement <8 x double> [[TMP19]], double [[V1_LANE_5]], i32 7 -; CHECK-NEXT: [[TMP21:%.*]] = insertelement <8 x double> poison, double [[V2_LANE_2]], i32 0 -; CHECK-NEXT: [[TMP22:%.*]] = insertelement <8 x double> [[TMP21]], double [[V2_LANE_1]], i32 1 -; CHECK-NEXT: [[TMP23:%.*]] = insertelement <8 x double> [[TMP22]], double [[V2_LANE_0]], i32 2 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x double> [[TMP23]], <8 x double> poison, <8 x i32> -; CHECK-NEXT: [[TMP24:%.*]] = fmul <8 x double> [[TMP20]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <8 x double> [[TMP16]], <8 x double> poison, <9 x i32> +; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[TMP17]], double [[A_LANE_8]], i32 8 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <8 x double> poison, double [[V1_LANE_7]], i32 0 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <8 x double> [[TMP18]], double [[V1_LANE_6]], i32 1 +; CHECK-NEXT: [[TMP20:%.*]] = insertelement <8 x double> [[TMP19]], double [[V1_LANE_8]], i32 2 +; CHECK-NEXT: [[TMP21:%.*]] = insertelement <8 x double> [[TMP20]], double [[V1_LANE_1]], i32 3 +; CHECK-NEXT: [[TMP22:%.*]] = insertelement <8 x double> [[TMP21]], double [[V1_LANE_0]], i32 4 +; CHECK-NEXT: [[TMP23:%.*]] = insertelement <8 x double> [[TMP22]], double [[V1_LANE_3]], i32 5 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <8 x double> [[TMP23]], double [[V1_LANE_2]], i32 6 +; CHECK-NEXT: [[TMP25:%.*]] = insertelement <8 x double> [[TMP24]], double [[V1_LANE_5]], i32 7 +; CHECK-NEXT: [[TMP26:%.*]] = insertelement <8 x double> poison, double [[V2_LANE_2]], i32 0 +; CHECK-NEXT: [[TMP27:%.*]] = insertelement <8 x double> [[TMP26]], double [[V2_LANE_1]], i32 1 +; CHECK-NEXT: [[TMP28:%.*]] = insertelement <8 x double> [[TMP27]], double [[V2_LANE_0]], i32 2 +; CHECK-NEXT: [[TMP29:%.*]] = insertelement <8 x double> [[TMP28]], double [[V2_LANE_2]], i32 3 +; CHECK-NEXT: [[TMP30:%.*]] = insertelement <8 x double> [[TMP29]], double [[V2_LANE_0]], i32 4 +; CHECK-NEXT: [[TMP31:%.*]] = insertelement <8 x double> [[TMP30]], double [[V2_LANE_2]], i32 5 +; CHECK-NEXT: [[TMP32:%.*]] = insertelement <8 x double> [[TMP31]], double [[V2_LANE_1]], i32 6 +; CHECK-NEXT: [[TMP33:%.*]] = insertelement <8 x double> [[TMP32]], double [[V2_LANE_0]], i32 7 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x double> [[TMP33]], <8 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP34:%.*]] = fmul <8 x double> [[TMP25]], [[SHUFFLE]] ; CHECK-NEXT: [[B_LANE_8:%.*]] = fmul double [[V1_LANE_4]], [[V2_LANE_2]] -; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <8 x double> [[TMP24]], <8 x double> poison, <9 x i32> -; CHECK-NEXT: [[B_INS_8:%.*]] = insertelement <9 x double> [[TMP25]], double [[B_LANE_8]], i32 8 +; CHECK-NEXT: [[TMP35:%.*]] = shufflevector <8 x double> [[TMP34]], <8 x double> poison, <9 x i32> +; CHECK-NEXT: [[B_INS_8:%.*]] = insertelement <9 x double> [[TMP35]], double [[B_LANE_8]], i32 8 ; CHECK-NEXT: [[RES:%.*]] = fsub <9 x double> [[A_INS_8]], [[B_INS_8]] ; CHECK-NEXT: store <9 x double> [[RES]], <9 x double>* [[PTR_1]], align 8 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll @@ -8,19 +8,22 @@ ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[DUMMY_PHI:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[OP_RDX1:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 2, [[ENTRY]] ], [ [[TMP3:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 2, [[ENTRY]] ], [ [[TMP6:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> poison, i64 [[TMP0]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i64> [[SHUFFLE]], -; CHECK-NEXT: [[TMP3]] = extractelement <4 x i64> [[TMP2]], i32 3 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> [[TMP1]], i64 [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[TMP0]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i64> [[TMP3]], i64 [[TMP0]], i32 3 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i64> [[SHUFFLE]], +; CHECK-NEXT: [[TMP6]] = extractelement <4 x i64> [[TMP5]], i32 3 ; CHECK-NEXT: [[DUMMY_ADD:%.*]] = add i16 0, 0 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP2]], i32 1 -; CHECK-NEXT: [[DUMMY_SHL:%.*]] = shl i64 [[TMP4]], 32 -; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i64> , [[TMP2]] -; CHECK-NEXT: [[TMP6:%.*]] = ashr exact <4 x i64> [[TMP5]], -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP6]]) -; CHECK-NEXT: [[OP_RDX:%.*]] = add i64 [[TMP3]], 0 -; CHECK-NEXT: [[OP_RDX1]] = add i64 [[TMP7]], [[OP_RDX]] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP5]], i32 1 +; CHECK-NEXT: [[DUMMY_SHL:%.*]] = shl i64 [[TMP7]], 32 +; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i64> , [[TMP5]] +; CHECK-NEXT: [[TMP9:%.*]] = ashr exact <4 x i64> [[TMP8]], +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP9]]) +; CHECK-NEXT: [[OP_RDX:%.*]] = add i64 [[TMP6]], 0 +; CHECK-NEXT: [[OP_RDX1]] = add i64 [[TMP10]], [[OP_RDX]] ; CHECK-NEXT: br label [[LOOP]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll b/llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll @@ -546,7 +546,10 @@ ; CHECK-NEXT: [[Z:%.*]] = zext i16 [[X]] to i32 ; CHECK-NEXT: [[S:%.*]] = shl nuw nsw i32 [[Z]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[S]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[S]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[S]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[S]], i32 3 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: store <4 x i32> [[SHUFFLE]], <4 x i32>* bitcast ([8 x i32]* @output to <4 x i32>*), align 16 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll b/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll @@ -9,13 +9,16 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = tail call i32 (...) @bar() ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], -; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[SHUFFLE]], -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[N]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[N]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[N]], i32 3 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], +; CHECK-NEXT: [[TMP5:%.*]] = shl <4 x i32> [[SHUFFLE]], +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[TMP6]], +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 4 ; CHECK-NEXT: ret i32 undef ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/broadcast.ll b/llvm/test/Transforms/SLPVectorizer/X86/broadcast.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/broadcast.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/broadcast.ll @@ -19,12 +19,18 @@ ; CHECK-NEXT: [[V2:%.*]] = sub i64 [[B0]], 1 ; CHECK-NEXT: [[IDXS0:%.*]] = getelementptr inbounds i64, i64* [[S:%.*]], i64 0 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> poison, i64 [[V1]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> poison, i64 [[V2]], i32 0 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i64> [[SHUFFLE]], [[SHUFFLE1]] -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[IDXS0]] to <4 x i64>* -; CHECK-NEXT: store <4 x i64> [[TMP2]], <4 x i64>* [[TMP3]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> [[TMP0]], i64 [[V1]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> [[TMP1]], i64 [[V1]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[V1]], i32 3 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i64> poison, i64 [[V2]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i64> [[TMP4]], i64 [[V2]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i64> [[TMP5]], i64 [[V2]], i32 2 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i64> [[TMP6]], i64 [[V2]], i32 3 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i64> [[SHUFFLE]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64* [[IDXS0]] to <4 x i64>* +; CHECK-NEXT: store <4 x i64> [[TMP8]], <4 x i64>* [[TMP9]], align 8 ; CHECK-NEXT: ret void ; entry: @@ -75,10 +81,13 @@ ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[D0]], i32 3 ; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[V1]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[SHUFFLE]], [[TMP4]] -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[IDXS0]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[V1]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[V1]], i32 2 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[V1]], i32 3 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[SHUFFLE]], [[TMP4]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[IDXS0]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* [[TMP10]], align 8 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/broadcast_long.ll b/llvm/test/Transforms/SLPVectorizer/X86/broadcast_long.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/broadcast_long.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/broadcast_long.ll @@ -17,10 +17,16 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A0:%.*]] = load i32, i32* [[A:%.*]], align 8 ; CHECK-NEXT: [[IDXS0:%.*]] = getelementptr inbounds i32, i32* [[S:%.*]], i64 0 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> poison, i32 [[A0]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDXS0]] to <8 x i32>* -; CHECK-NEXT: store <8 x i32> [[SHUFFLE]], <8 x i32>* [[TMP1]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> , i32 [[A0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> [[TMP0]], i32 [[A0]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[A0]], i32 3 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[A0]], i32 4 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[A0]], i32 5 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[A0]], i32 6 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[A0]], i32 7 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[IDXS0]] to <8 x i32>* +; CHECK-NEXT: store <8 x i32> [[SHUFFLE]], <8 x i32>* [[TMP7]], align 8 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll b/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll @@ -18,21 +18,79 @@ ; SSE-LABEL: @splat( ; SSE-NEXT: [[TMP1:%.*]] = insertelement <16 x i8> poison, i8 [[A:%.*]], i32 0 ; SSE-NEXT: [[TMP2:%.*]] = insertelement <16 x i8> [[TMP1]], i8 [[B:%.*]], i32 1 -; SSE-NEXT: [[SHUFFLE:%.*]] = shufflevector <16 x i8> [[TMP2]], <16 x i8> poison, <16 x i32> -; SSE-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> poison, i8 [[C:%.*]], i32 0 -; SSE-NEXT: [[SHUFFLE1:%.*]] = shufflevector <16 x i8> [[TMP3]], <16 x i8> poison, <16 x i32> zeroinitializer -; SSE-NEXT: [[TMP4:%.*]] = xor <16 x i8> [[SHUFFLE]], [[SHUFFLE1]] -; SSE-NEXT: store <16 x i8> [[TMP4]], <16 x i8>* bitcast ([32 x i8]* @cle to <16 x i8>*), align 16 +; SSE-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> [[TMP2]], i8 [[A]], i32 2 +; SSE-NEXT: [[TMP4:%.*]] = insertelement <16 x i8> [[TMP3]], i8 [[A]], i32 3 +; SSE-NEXT: [[TMP5:%.*]] = insertelement <16 x i8> [[TMP4]], i8 [[A]], i32 4 +; SSE-NEXT: [[TMP6:%.*]] = insertelement <16 x i8> [[TMP5]], i8 [[B]], i32 5 +; SSE-NEXT: [[TMP7:%.*]] = insertelement <16 x i8> [[TMP6]], i8 [[A]], i32 6 +; SSE-NEXT: [[TMP8:%.*]] = insertelement <16 x i8> [[TMP7]], i8 [[B]], i32 7 +; SSE-NEXT: [[TMP9:%.*]] = insertelement <16 x i8> [[TMP8]], i8 [[A]], i32 8 +; SSE-NEXT: [[TMP10:%.*]] = insertelement <16 x i8> [[TMP9]], i8 [[A]], i32 9 +; SSE-NEXT: [[TMP11:%.*]] = insertelement <16 x i8> [[TMP10]], i8 [[A]], i32 10 +; SSE-NEXT: [[TMP12:%.*]] = insertelement <16 x i8> [[TMP11]], i8 [[A]], i32 11 +; SSE-NEXT: [[TMP13:%.*]] = insertelement <16 x i8> [[TMP12]], i8 [[A]], i32 12 +; SSE-NEXT: [[TMP14:%.*]] = insertelement <16 x i8> [[TMP13]], i8 [[A]], i32 13 +; SSE-NEXT: [[TMP15:%.*]] = insertelement <16 x i8> [[TMP14]], i8 [[A]], i32 14 +; SSE-NEXT: [[TMP16:%.*]] = insertelement <16 x i8> [[TMP15]], i8 [[A]], i32 15 +; SSE-NEXT: [[SHUFFLE:%.*]] = shufflevector <16 x i8> [[TMP16]], <16 x i8> poison, <16 x i32> +; SSE-NEXT: [[TMP17:%.*]] = insertelement <16 x i8> poison, i8 [[C:%.*]], i32 0 +; SSE-NEXT: [[TMP18:%.*]] = insertelement <16 x i8> [[TMP17]], i8 [[C]], i32 1 +; SSE-NEXT: [[TMP19:%.*]] = insertelement <16 x i8> [[TMP18]], i8 [[C]], i32 2 +; SSE-NEXT: [[TMP20:%.*]] = insertelement <16 x i8> [[TMP19]], i8 [[C]], i32 3 +; SSE-NEXT: [[TMP21:%.*]] = insertelement <16 x i8> [[TMP20]], i8 [[C]], i32 4 +; SSE-NEXT: [[TMP22:%.*]] = insertelement <16 x i8> [[TMP21]], i8 [[C]], i32 5 +; SSE-NEXT: [[TMP23:%.*]] = insertelement <16 x i8> [[TMP22]], i8 [[C]], i32 6 +; SSE-NEXT: [[TMP24:%.*]] = insertelement <16 x i8> [[TMP23]], i8 [[C]], i32 7 +; SSE-NEXT: [[TMP25:%.*]] = insertelement <16 x i8> [[TMP24]], i8 [[C]], i32 8 +; SSE-NEXT: [[TMP26:%.*]] = insertelement <16 x i8> [[TMP25]], i8 [[C]], i32 9 +; SSE-NEXT: [[TMP27:%.*]] = insertelement <16 x i8> [[TMP26]], i8 [[C]], i32 10 +; SSE-NEXT: [[TMP28:%.*]] = insertelement <16 x i8> [[TMP27]], i8 [[C]], i32 11 +; SSE-NEXT: [[TMP29:%.*]] = insertelement <16 x i8> [[TMP28]], i8 [[C]], i32 12 +; SSE-NEXT: [[TMP30:%.*]] = insertelement <16 x i8> [[TMP29]], i8 [[C]], i32 13 +; SSE-NEXT: [[TMP31:%.*]] = insertelement <16 x i8> [[TMP30]], i8 [[C]], i32 14 +; SSE-NEXT: [[TMP32:%.*]] = insertelement <16 x i8> [[TMP31]], i8 [[C]], i32 15 +; SSE-NEXT: [[SHUFFLE1:%.*]] = shufflevector <16 x i8> [[TMP32]], <16 x i8> poison, <16 x i32> zeroinitializer +; SSE-NEXT: [[TMP33:%.*]] = xor <16 x i8> [[SHUFFLE]], [[SHUFFLE1]] +; SSE-NEXT: store <16 x i8> [[TMP33]], <16 x i8>* bitcast ([32 x i8]* @cle to <16 x i8>*), align 16 ; SSE-NEXT: ret void ; ; AVX-LABEL: @splat( ; AVX-NEXT: [[TMP1:%.*]] = insertelement <16 x i8> poison, i8 [[A:%.*]], i32 0 ; AVX-NEXT: [[TMP2:%.*]] = insertelement <16 x i8> [[TMP1]], i8 [[B:%.*]], i32 1 -; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <16 x i8> [[TMP2]], <16 x i8> poison, <16 x i32> -; AVX-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> poison, i8 [[C:%.*]], i32 0 -; AVX-NEXT: [[SHUFFLE1:%.*]] = shufflevector <16 x i8> [[TMP3]], <16 x i8> poison, <16 x i32> zeroinitializer -; AVX-NEXT: [[TMP4:%.*]] = xor <16 x i8> [[SHUFFLE]], [[SHUFFLE1]] -; AVX-NEXT: store <16 x i8> [[TMP4]], <16 x i8>* bitcast ([32 x i8]* @cle to <16 x i8>*), align 16 +; AVX-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> [[TMP2]], i8 [[A]], i32 2 +; AVX-NEXT: [[TMP4:%.*]] = insertelement <16 x i8> [[TMP3]], i8 [[A]], i32 3 +; AVX-NEXT: [[TMP5:%.*]] = insertelement <16 x i8> [[TMP4]], i8 [[A]], i32 4 +; AVX-NEXT: [[TMP6:%.*]] = insertelement <16 x i8> [[TMP5]], i8 [[B]], i32 5 +; AVX-NEXT: [[TMP7:%.*]] = insertelement <16 x i8> [[TMP6]], i8 [[A]], i32 6 +; AVX-NEXT: [[TMP8:%.*]] = insertelement <16 x i8> [[TMP7]], i8 [[B]], i32 7 +; AVX-NEXT: [[TMP9:%.*]] = insertelement <16 x i8> [[TMP8]], i8 [[A]], i32 8 +; AVX-NEXT: [[TMP10:%.*]] = insertelement <16 x i8> [[TMP9]], i8 [[A]], i32 9 +; AVX-NEXT: [[TMP11:%.*]] = insertelement <16 x i8> [[TMP10]], i8 [[A]], i32 10 +; AVX-NEXT: [[TMP12:%.*]] = insertelement <16 x i8> [[TMP11]], i8 [[A]], i32 11 +; AVX-NEXT: [[TMP13:%.*]] = insertelement <16 x i8> [[TMP12]], i8 [[A]], i32 12 +; AVX-NEXT: [[TMP14:%.*]] = insertelement <16 x i8> [[TMP13]], i8 [[A]], i32 13 +; AVX-NEXT: [[TMP15:%.*]] = insertelement <16 x i8> [[TMP14]], i8 [[A]], i32 14 +; AVX-NEXT: [[TMP16:%.*]] = insertelement <16 x i8> [[TMP15]], i8 [[A]], i32 15 +; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <16 x i8> [[TMP16]], <16 x i8> poison, <16 x i32> +; AVX-NEXT: [[TMP17:%.*]] = insertelement <16 x i8> poison, i8 [[C:%.*]], i32 0 +; AVX-NEXT: [[TMP18:%.*]] = insertelement <16 x i8> [[TMP17]], i8 [[C]], i32 1 +; AVX-NEXT: [[TMP19:%.*]] = insertelement <16 x i8> [[TMP18]], i8 [[C]], i32 2 +; AVX-NEXT: [[TMP20:%.*]] = insertelement <16 x i8> [[TMP19]], i8 [[C]], i32 3 +; AVX-NEXT: [[TMP21:%.*]] = insertelement <16 x i8> [[TMP20]], i8 [[C]], i32 4 +; AVX-NEXT: [[TMP22:%.*]] = insertelement <16 x i8> [[TMP21]], i8 [[C]], i32 5 +; AVX-NEXT: [[TMP23:%.*]] = insertelement <16 x i8> [[TMP22]], i8 [[C]], i32 6 +; AVX-NEXT: [[TMP24:%.*]] = insertelement <16 x i8> [[TMP23]], i8 [[C]], i32 7 +; AVX-NEXT: [[TMP25:%.*]] = insertelement <16 x i8> [[TMP24]], i8 [[C]], i32 8 +; AVX-NEXT: [[TMP26:%.*]] = insertelement <16 x i8> [[TMP25]], i8 [[C]], i32 9 +; AVX-NEXT: [[TMP27:%.*]] = insertelement <16 x i8> [[TMP26]], i8 [[C]], i32 10 +; AVX-NEXT: [[TMP28:%.*]] = insertelement <16 x i8> [[TMP27]], i8 [[C]], i32 11 +; AVX-NEXT: [[TMP29:%.*]] = insertelement <16 x i8> [[TMP28]], i8 [[C]], i32 12 +; AVX-NEXT: [[TMP30:%.*]] = insertelement <16 x i8> [[TMP29]], i8 [[C]], i32 13 +; AVX-NEXT: [[TMP31:%.*]] = insertelement <16 x i8> [[TMP30]], i8 [[C]], i32 14 +; AVX-NEXT: [[TMP32:%.*]] = insertelement <16 x i8> [[TMP31]], i8 [[C]], i32 15 +; AVX-NEXT: [[SHUFFLE1:%.*]] = shufflevector <16 x i8> [[TMP32]], <16 x i8> poison, <16 x i32> zeroinitializer +; AVX-NEXT: [[TMP33:%.*]] = xor <16 x i8> [[SHUFFLE]], [[SHUFFLE1]] +; AVX-NEXT: store <16 x i8> [[TMP33]], <16 x i8>* bitcast ([32 x i8]* @cle to <16 x i8>*), align 16 ; AVX-NEXT: ret void ; %1 = xor i8 %c, %a @@ -91,15 +149,21 @@ ; ; AVX-LABEL: @same_opcode_on_one_side( ; AVX-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[C:%.*]], i32 0 -; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer -; AVX-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i32 0 -; AVX-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer -; AVX-NEXT: [[TMP3:%.*]] = add <4 x i32> [[SHUFFLE]], [[SHUFFLE1]] -; AVX-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[B:%.*]], i32 1 -; AVX-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[C]], i32 2 -; AVX-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[A]], i32 3 -; AVX-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[TMP3]], [[TMP6]] -; AVX-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* bitcast ([32 x i32]* @cle32 to <4 x i32>*), align 16 +; AVX-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[C]], i32 1 +; AVX-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[C]], i32 2 +; AVX-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[C]], i32 3 +; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> zeroinitializer +; AVX-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i32 0 +; AVX-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[A]], i32 1 +; AVX-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[A]], i32 2 +; AVX-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[A]], i32 3 +; AVX-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> poison, <4 x i32> zeroinitializer +; AVX-NEXT: [[TMP9:%.*]] = add <4 x i32> [[SHUFFLE]], [[SHUFFLE1]] +; AVX-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[B:%.*]], i32 1 +; AVX-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[C]], i32 2 +; AVX-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[A]], i32 3 +; AVX-NEXT: [[TMP13:%.*]] = xor <4 x i32> [[TMP9]], [[TMP12]] +; AVX-NEXT: store <4 x i32> [[TMP13]], <4 x i32>* bitcast ([32 x i32]* @cle32 to <4 x i32>*), align 16 ; AVX-NEXT: ret void ; %add1 = add i32 %c, %a diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cse.ll b/llvm/test/Transforms/SLPVectorizer/X86/cse.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/cse.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/cse.ll @@ -69,11 +69,14 @@ ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x double> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> poison, double [[CONV]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = fmul <4 x double> [[SHUFFLE]], [[TMP2]] -; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x double> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[A]] to <4 x double>* -; CHECK-NEXT: store <4 x double> [[TMP5]], <4 x double>* [[TMP6]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> [[TMP3]], double [[CONV]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x double> [[TMP4]], double [[CONV]], i32 2 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x double> [[TMP5]], double [[CONV]], i32 3 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP6]], <4 x double> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x double> [[SHUFFLE]], [[TMP2]] +; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x double> [[TMP7]], +; CHECK-NEXT: [[TMP9:%.*]] = bitcast double* [[A]] to <4 x double>* +; CHECK-NEXT: store <4 x double> [[TMP8]], <4 x double>* [[TMP9]], align 8 ; CHECK-NEXT: ret i32 undef ; entry: @@ -196,11 +199,14 @@ ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x double> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> poison, double [[CONV]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = fmul <4 x double> [[SHUFFLE]], [[TMP2]] -; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x double> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[A]] to <4 x double>* -; CHECK-NEXT: store <4 x double> [[TMP5]], <4 x double>* [[TMP6]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> [[TMP3]], double [[CONV]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x double> [[TMP4]], double [[CONV]], i32 2 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x double> [[TMP5]], double [[CONV]], i32 3 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP6]], <4 x double> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x double> [[SHUFFLE]], [[TMP2]] +; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x double> [[TMP7]], +; CHECK-NEXT: [[TMP9:%.*]] = bitcast double* [[A]] to <4 x double>* +; CHECK-NEXT: store <4 x double> [[TMP8]], <4 x double>* [[TMP9]], align 8 ; CHECK-NEXT: ret i32 undef ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/diamond.ll b/llvm/test/Transforms/SLPVectorizer/X86/diamond.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/diamond.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/diamond.ll @@ -19,10 +19,13 @@ ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[MUL238]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP1]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[MUL238]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[MUL238]], i32 2 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[MUL238]], i32 3 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = mul <4 x i32> [[TMP1]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 4 ; CHECK-NEXT: ret i32 0 ; entry: @@ -64,12 +67,15 @@ ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[MUL238]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP1]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0 -; CHECK-NEXT: ret i32 [[TMP5]] +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[MUL238]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[MUL238]], i32 2 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[MUL238]], i32 3 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = mul <4 x i32> [[TMP1]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0 +; CHECK-NEXT: ret i32 [[TMP8]] ; entry: %0 = load i32, i32* %A, align 4 @@ -102,12 +108,15 @@ ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[MUL238]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP1]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1 -; CHECK-NEXT: ret i32 [[TMP5]] +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[MUL238]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[MUL238]], i32 2 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[MUL238]], i32 3 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = mul <4 x i32> [[TMP1]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1 +; CHECK-NEXT: ret i32 [[TMP8]] ; entry: %0 = load i32, i32* %A, align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/diamond_broadcast.ll b/llvm/test/Transforms/SLPVectorizer/X86/diamond_broadcast.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/diamond_broadcast.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/diamond_broadcast.ll @@ -6,10 +6,13 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LD:%.*]] = load i32, i32* [[A:%.*]], align 4 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[LD]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i32> [[SHUFFLE]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[LD]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[LD]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[LD]], i32 3 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> [[SHUFFLE]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 4 ; CHECK-NEXT: ret i32 0 ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/diamond_broadcast_extra_shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/diamond_broadcast_extra_shuffle.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/diamond_broadcast_extra_shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/diamond_broadcast_extra_shuffle.ll @@ -6,10 +6,17 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LD:%.*]] = load i32, i32* [[A:%.*]], align 4 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[LD]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i32> [[SHUFFLE]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[LD]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[LD]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[LD]], i32 3 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> , i32 [[LD]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[LD]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[LD]], i32 2 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = mul <4 x i32> [[SHUFFLE]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 4 ; CHECK-NEXT: ret i32 0 ; entry: @@ -33,10 +40,14 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LD:%.*]] = load i32, i32* [[A:%.*]], align 4 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[LD]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i32> [[SHUFFLE]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[LD]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[LD]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[LD]], i32 3 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> [[SHUFFLE]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 4 ; CHECK-NEXT: ret i32 0 ; entry: @@ -59,11 +70,18 @@ ; CHECK-LABEL: @diamond_broadcast2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LD:%.*]] = load i32, i32* [[A:%.*]], align 4 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[LD]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i32> [[SHUFFLE]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> , i32 [[LD]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[LD]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[LD]], i32 2 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> poison, i32 [[LD]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[LD]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[LD]], i32 2 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[LD]], i32 3 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = mul <4 x i32> [[SHUFFLE]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 4 ; CHECK-NEXT: ret i32 0 ; entry: @@ -87,10 +105,14 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LD:%.*]] = load i32, i32* [[A:%.*]], align 4 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[LD]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i32> [[SHUFFLE]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[LD]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[LD]], i32 2 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[LD]], i32 3 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> [[SHUFFLE]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 4 ; CHECK-NEXT: ret i32 0 ; entry: @@ -113,11 +135,17 @@ ; CHECK-LABEL: @diamond_broadcast3( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LD:%.*]] = load i32, i32* [[A:%.*]], align 4 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[LD]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i32> [[SHUFFLE]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> , i32 [[LD]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[LD]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[LD]], i32 2 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> , i32 [[LD]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[LD]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[LD]], i32 3 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = mul <4 x i32> [[SHUFFLE]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 4 ; CHECK-NEXT: ret i32 0 ; entry: @@ -141,10 +169,14 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LD:%.*]] = load i32, i32* [[A:%.*]], align 4 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[LD]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i32> [[SHUFFLE]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[LD]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[LD]], i32 2 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[LD]], i32 3 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> [[SHUFFLE]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 4 ; CHECK-NEXT: ret i32 0 ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll @@ -8,15 +8,16 @@ ; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> [[TMP0]], zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 undef, i32 4 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <8 x i32> , [[SHUFFLE]] -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <8 x i32> , [[SHUFFLE]] -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP5]], <8 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = add <8 x i32> zeroinitializer, [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP8]]) -; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> zeroinitializer) -; CHECK-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 undef, i32 6 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = sub nsw <8 x i32> , [[SHUFFLE]] +; CHECK-NEXT: [[TMP6:%.*]] = add nsw <8 x i32> , [[SHUFFLE]] +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> [[TMP6]], <8 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = add <8 x i32> zeroinitializer, [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i32> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP9]]) +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> zeroinitializer) +; CHECK-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP10]], [[TMP11]] ; CHECK-NEXT: [[TMP64:%.*]] = zext i32 [[OP_RDX]] to i64 ; CHECK-NEXT: ret i64 [[TMP64]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll @@ -8,16 +8,19 @@ ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], -; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[SHUFFLE]], -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0 -; CHECK-NEXT: [[EXTERNALUSE1:%.*]] = add nsw i32 [[TMP6]], [[M:%.*]] -; CHECK-NEXT: [[EXTERNALUSE2:%.*]] = mul nsw i32 [[TMP6]], [[M]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[N]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[N]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[N]], i32 3 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], +; CHECK-NEXT: [[TMP5:%.*]] = shl <4 x i32> [[SHUFFLE]], +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[TMP6]], +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP7]], i32 0 +; CHECK-NEXT: [[EXTERNALUSE1:%.*]] = add nsw i32 [[TMP9]], [[M:%.*]] +; CHECK-NEXT: [[EXTERNALUSE2:%.*]] = mul nsw i32 [[TMP9]], [[M]] ; CHECK-NEXT: [[ADD10:%.*]] = add nsw i32 [[EXTERNALUSE1]], [[EXTERNALUSE2]] ; CHECK-NEXT: ret i32 [[ADD10]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hoist.ll b/llvm/test/Transforms/SLPVectorizer/X86/hoist.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/hoist.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/hoist.ll @@ -18,16 +18,18 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[K:%.*]], i32 1 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[N]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[K]], i32 3 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_024:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD10:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[I_024]] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>* -; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>* +; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[TMP5]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 4 ; CHECK-NEXT: [[ADD10]] = add nsw i32 [[I_024]], 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[ADD10]], 10000 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll @@ -993,31 +993,43 @@ ; CHECK-LABEL: @wobble( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[ARG:%.*]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[BAR:%.*]], i32 0 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[SHUFFLE]], [[SHUFFLE1]] -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i32> [[TMP2]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i1> [[TMP4]] to <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]]) -; CHECK-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP3]], [[ARG]] -; CHECK-NEXT: [[OP_RDX2:%.*]] = add i32 [[TMP6]], [[OP_RDX]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[ARG]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[ARG]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[ARG]], i32 3 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[BAR:%.*]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[BAR]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[BAR]], i32 2 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[BAR]], i32 3 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP7]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i32> [[SHUFFLE]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3 +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <4 x i32> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP11]]) +; CHECK-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP9]], [[ARG]] +; CHECK-NEXT: [[OP_RDX2:%.*]] = add i32 [[TMP12]], [[OP_RDX]] ; CHECK-NEXT: ret i32 [[OP_RDX2]] ; ; THRESHOLD-LABEL: @wobble( ; THRESHOLD-NEXT: bb: ; THRESHOLD-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[ARG:%.*]], i32 0 -; THRESHOLD-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer -; THRESHOLD-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[BAR:%.*]], i32 0 -; THRESHOLD-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer -; THRESHOLD-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[SHUFFLE]], [[SHUFFLE1]] -; THRESHOLD-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3 -; THRESHOLD-NEXT: [[TMP4:%.*]] = icmp eq <4 x i32> [[TMP2]], zeroinitializer -; THRESHOLD-NEXT: [[TMP5:%.*]] = sext <4 x i1> [[TMP4]] to <4 x i32> -; THRESHOLD-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]]) -; THRESHOLD-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP3]], [[ARG]] -; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = add i32 [[TMP6]], [[OP_RDX]] +; THRESHOLD-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[ARG]], i32 1 +; THRESHOLD-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[ARG]], i32 2 +; THRESHOLD-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[ARG]], i32 3 +; THRESHOLD-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> zeroinitializer +; THRESHOLD-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[BAR:%.*]], i32 0 +; THRESHOLD-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[BAR]], i32 1 +; THRESHOLD-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[BAR]], i32 2 +; THRESHOLD-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[BAR]], i32 3 +; THRESHOLD-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP7]], <4 x i32> poison, <4 x i32> zeroinitializer +; THRESHOLD-NEXT: [[TMP8:%.*]] = xor <4 x i32> [[SHUFFLE]], [[SHUFFLE1]] +; THRESHOLD-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3 +; THRESHOLD-NEXT: [[TMP10:%.*]] = icmp eq <4 x i32> [[TMP8]], zeroinitializer +; THRESHOLD-NEXT: [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i32> +; THRESHOLD-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP11]]) +; THRESHOLD-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP9]], [[ARG]] +; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = add i32 [[TMP12]], [[OP_RDX]] ; THRESHOLD-NEXT: ret i32 [[OP_RDX2]] ; bb: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll @@ -25,7 +25,22 @@ ; CHECK-LABEL: @bar( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <16 x i32> poison, i32 [[W:%.*]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i32> [[TMP0]], i32 [[W]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i32> [[TMP1]], i32 [[W]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i32> [[TMP2]], i32 [[W]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x i32> [[TMP3]], i32 [[W]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i32> [[TMP4]], i32 [[W]], i32 5 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x i32> [[TMP5]], i32 [[W]], i32 6 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i32> [[TMP6]], i32 [[W]], i32 7 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[W]], i32 8 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[W]], i32 9 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i32> [[TMP9]], i32 [[W]], i32 10 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x i32> [[TMP10]], i32 [[W]], i32 11 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <16 x i32> [[TMP11]], i32 [[W]], i32 12 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <16 x i32> [[TMP12]], i32 [[W]], i32 13 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <16 x i32> [[TMP13]], i32 [[W]], i32 14 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <16 x i32> [[TMP14]], i32 [[W]], i32 15 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <16 x i32> [[TMP15]], <16 x i32> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_0356:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] @@ -34,21 +49,21 @@ ; CHECK-NEXT: [[D_ADDR_0353:%.*]] = phi i8* [ [[D:%.*]], [[ENTRY]] ], [ [[ADD_PTR191:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[C_ADDR_0352:%.*]] = phi i8* [ [[C:%.*]], [[ENTRY]] ], [ [[ADD_PTR190:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[B_ADDR_0351:%.*]] = phi i8* [ [[B:%.*]], [[ENTRY]] ], [ [[ADD_PTR189:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[C_ADDR_0352]] to <16 x i8>* -; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1 -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[D_ADDR_0353]] to <16 x i8>* -; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[TMP3]], align 1 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[A_ADDR_0355]] to <16 x i8>* -; CHECK-NEXT: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[TMP5]], align 1 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8* [[B_ADDR_0351]] to <16 x i8>* -; CHECK-NEXT: [[TMP8:%.*]] = load <16 x i8>, <16 x i8>* [[TMP7]], align 1 -; CHECK-NEXT: [[TMP9:%.*]] = icmp ult <16 x i8> [[TMP2]], [[TMP4]] -; CHECK-NEXT: [[TMP10:%.*]] = select <16 x i1> [[TMP9]], <16 x i8> [[TMP8]], <16 x i8> [[TMP6]] -; CHECK-NEXT: [[TMP11:%.*]] = zext <16 x i8> [[TMP10]] to <16 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = mul <16 x i32> [[TMP11]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP13:%.*]] = trunc <16 x i32> [[TMP12]] to <16 x i8> -; CHECK-NEXT: [[TMP14:%.*]] = bitcast i8* [[E_ADDR_0354]] to <16 x i8>* -; CHECK-NEXT: store <16 x i8> [[TMP13]], <16 x i8>* [[TMP14]], align 1 +; CHECK-NEXT: [[TMP16:%.*]] = bitcast i8* [[C_ADDR_0352]] to <16 x i8>* +; CHECK-NEXT: [[TMP17:%.*]] = load <16 x i8>, <16 x i8>* [[TMP16]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8* [[D_ADDR_0353]] to <16 x i8>* +; CHECK-NEXT: [[TMP19:%.*]] = load <16 x i8>, <16 x i8>* [[TMP18]], align 1 +; CHECK-NEXT: [[TMP20:%.*]] = bitcast i8* [[A_ADDR_0355]] to <16 x i8>* +; CHECK-NEXT: [[TMP21:%.*]] = load <16 x i8>, <16 x i8>* [[TMP20]], align 1 +; CHECK-NEXT: [[TMP22:%.*]] = bitcast i8* [[B_ADDR_0351]] to <16 x i8>* +; CHECK-NEXT: [[TMP23:%.*]] = load <16 x i8>, <16 x i8>* [[TMP22]], align 1 +; CHECK-NEXT: [[TMP24:%.*]] = icmp ult <16 x i8> [[TMP17]], [[TMP19]] +; CHECK-NEXT: [[TMP25:%.*]] = select <16 x i1> [[TMP24]], <16 x i8> [[TMP23]], <16 x i8> [[TMP21]] +; CHECK-NEXT: [[TMP26:%.*]] = zext <16 x i8> [[TMP25]] to <16 x i32> +; CHECK-NEXT: [[TMP27:%.*]] = mul <16 x i32> [[TMP26]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP28:%.*]] = trunc <16 x i32> [[TMP27]] to <16 x i8> +; CHECK-NEXT: [[TMP29:%.*]] = bitcast i8* [[E_ADDR_0354]] to <16 x i8>* +; CHECK-NEXT: store <16 x i8> [[TMP28]], <16 x i8>* [[TMP29]], align 1 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0356]], 1 ; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 16 ; CHECK-NEXT: [[ADD_PTR189]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 16 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll @@ -12,17 +12,18 @@ ; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[X]] to <2 x float>* ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[TMP2]], align 16 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> , float [[TMP0]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP4]], float [[TMP1]], i32 1 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = fmul <4 x float> [[SHUFFLE]], [[SHUFFLE1]] -; CHECK-NEXT: [[TMP7:%.*]] = fadd <4 x float> [[TMP6]], undef +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float [[TMP1]], i32 2 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x float> [[SHUFFLE]], [[SHUFFLE1]] ; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x float> [[TMP7]], undef ; CHECK-NEXT: [[TMP9:%.*]] = fadd <4 x float> [[TMP8]], undef -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> poison, <2 x i32> -; CHECK-NEXT: [[INS1:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[TMP10]], 0 -; CHECK-NEXT: [[INS2:%.*]] = insertvalue { <2 x float>, <2 x float> } [[INS1]], <2 x float> [[TMP11]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = fadd <4 x float> [[TMP9]], undef +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[INS1:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[TMP11]], 0 +; CHECK-NEXT: [[INS2:%.*]] = insertvalue { <2 x float>, <2 x float> } [[INS1]], <2 x float> [[TMP12]], 1 ; CHECK-NEXT: ret { <2 x float>, <2 x float> } [[INS2]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll b/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll @@ -27,27 +27,26 @@ ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb7: ; CHECK-NEXT: [[LOCAL_5_84111:%.*]] = phi i32 [ poison, [[BB8]] ], [ poison, [[BB5]] ] -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[LOCAL_5_84111]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[LOCAL_5_84111]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = invoke i32 poison(i8 addrspace(1)* nonnull poison, i32 poison, i32 poison, i32 poison) [ "deopt"() ] ; CHECK-NEXT: to label [[BB8]] unwind label [[BB12:%.*]] ; CHECK: bb8: ; CHECK-NEXT: br i1 poison, label [[BB7]], label [[BB6]] ; CHECK: bb9: ; CHECK-NEXT: [[INDVARS_IV528799:%.*]] = phi i64 [ poison, [[BB10]] ], [ poison, [[BB12]] ] -; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x i32> [ [[SHUFFLE1:%.*]], [[BB10]] ], [ [[TMP11:%.*]], [[BB12]] ] -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <4 x i32> -; CHECK-NEXT: [[SHUFFLE]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x i32> [ [[TMP9:%.*]], [[BB10]] ], [ [[TMP10:%.*]], [[BB12]] ] +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[SHUFFLE]] = shufflevector <4 x i32> [[SHUFFLE1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: br label [[BB2]] ; CHECK: bb10: -; CHECK-NEXT: [[TMP10:%.*]] = phi <2 x i32> [ [[TMP1]], [[BB3]] ] +; CHECK-NEXT: [[TMP9]] = phi <2 x i32> [ [[TMP1]], [[BB3]] ] ; CHECK-NEXT: [[LANDING_PAD68:%.*]] = landingpad { i8*, i32 } ; CHECK-NEXT: cleanup -; CHECK-NEXT: [[SHUFFLE1]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: br label [[BB9]] ; CHECK: bb11: ; CHECK-NEXT: ret void ; CHECK: bb12: -; CHECK-NEXT: [[TMP11]] = phi <2 x i32> [ [[TMP6]], [[BB7]] ] +; CHECK-NEXT: [[TMP10]] = phi <2 x i32> [ [[TMP6]], [[BB7]] ] ; CHECK-NEXT: [[LANDING_PAD149:%.*]] = landingpad { i8*, i32 } ; CHECK-NEXT: cleanup ; CHECK-NEXT: br label [[BB9]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/loopinvariant.ll b/llvm/test/Transforms/SLPVectorizer/X86/loopinvariant.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/loopinvariant.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/loopinvariant.ll @@ -15,13 +15,20 @@ ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARRAYIDX]] to <8 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[N]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = add nsw <8 x i32> [[TMP1]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[ARRAYIDX]] to <8 x i32>* -; CHECK-NEXT: store <8 x i32> [[TMP3]], <8 x i32>* [[TMP4]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[N]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[N]], i32 2 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[N]], i32 3 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[N]], i32 4 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[N]], i32 5 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[N]], i32 6 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[N]], i32 7 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP9]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = add nsw <8 x i32> [[TMP1]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[ARRAYIDX]] to <8 x i32>* +; CHECK-NEXT: store <8 x i32> [[TMP10]], <8 x i32>* [[TMP11]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 8 -; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP5]], [[N]] +; CHECK-NEXT: [[TMP12:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP12]], [[N]] ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]] ; CHECK: for.end: ; CHECK-NEXT: ret i32 undef diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll @@ -10,28 +10,38 @@ ; CHECK-NEXT: [[SUB102_1:%.*]] = sub nsw i32 undef, undef ; CHECK-NEXT: [[ADD78_2:%.*]] = add nsw i32 undef, undef ; CHECK-NEXT: [[SUB102_3:%.*]] = sub nsw i32 undef, undef -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <16 x i32> poison, i32 [[SUB102_1]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <16 x i32> , i32 [[SUB102_1]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i32> [[TMP0]], i32 [[ADD94_1]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i32> [[TMP1]], i32 [[ADD78_1]], i32 2 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i32> [[TMP2]], i32 [[SUB86_1]], i32 3 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x i32> [[TMP3]], i32 [[ADD78_2]], i32 4 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i32> poison, i32 [[SUB86_1]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x i32> [[TMP5]], i32 [[ADD78_1]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i32> [[TMP6]], i32 [[ADD94_1]], i32 2 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[SUB102_1]], i32 3 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[SUB102_3]], i32 4 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <16 x i32> [[TMP9]], <16 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = add nsw <16 x i32> [[SHUFFLE]], [[SHUFFLE1]] -; CHECK-NEXT: [[TMP11:%.*]] = sub nsw <16 x i32> [[SHUFFLE]], [[SHUFFLE1]] -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x i32> [[TMP10]], <16 x i32> [[TMP11]], <16 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = lshr <16 x i32> [[TMP12]], -; CHECK-NEXT: [[TMP14:%.*]] = and <16 x i32> [[TMP13]], -; CHECK-NEXT: [[TMP15:%.*]] = mul nuw <16 x i32> [[TMP14]], -; CHECK-NEXT: [[TMP16:%.*]] = add <16 x i32> [[TMP15]], [[TMP12]] -; CHECK-NEXT: [[TMP17:%.*]] = xor <16 x i32> [[TMP16]], [[TMP15]] -; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP17]]) -; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[TMP18]], 16 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i32> [[TMP4]], i32 [[ADD94_1]], i32 5 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x i32> [[TMP5]], i32 [[ADD78_1]], i32 6 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i32> [[TMP6]], i32 [[SUB86_1]], i32 7 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[ADD78_2]], i32 9 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[ADD78_2]], i32 10 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <16 x i32> [[TMP9]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i32> , i32 [[SUB86_1]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x i32> [[TMP10]], i32 [[ADD78_1]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <16 x i32> [[TMP11]], i32 [[ADD94_1]], i32 2 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <16 x i32> [[TMP12]], i32 [[SUB102_1]], i32 3 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <16 x i32> [[TMP13]], i32 [[SUB102_3]], i32 4 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <16 x i32> [[TMP14]], i32 [[ADD78_1]], i32 5 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <16 x i32> [[TMP15]], i32 [[ADD94_1]], i32 6 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <16 x i32> [[TMP16]], i32 [[SUB102_1]], i32 7 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <16 x i32> [[TMP17]], i32 [[SUB102_3]], i32 12 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <16 x i32> [[TMP18]], i32 [[SUB102_3]], i32 15 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <16 x i32> [[TMP19]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = add nsw <16 x i32> [[SHUFFLE]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP21:%.*]] = sub nsw <16 x i32> [[SHUFFLE]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <16 x i32> [[TMP20]], <16 x i32> [[TMP21]], <16 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = lshr <16 x i32> [[TMP22]], +; CHECK-NEXT: [[TMP24:%.*]] = and <16 x i32> [[TMP23]], +; CHECK-NEXT: [[TMP25:%.*]] = mul nuw <16 x i32> [[TMP24]], +; CHECK-NEXT: [[TMP26:%.*]] = add <16 x i32> [[TMP25]], [[TMP22]] +; CHECK-NEXT: [[TMP27:%.*]] = xor <16 x i32> [[TMP26]], [[TMP25]] +; CHECK-NEXT: [[TMP28:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP27]]) +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[TMP28]], 16 ; CHECK-NEXT: [[ADD119:%.*]] = add nuw nsw i32 undef, [[SHR]] ; CHECK-NEXT: [[SHR120:%.*]] = lshr i32 [[ADD119]], 1 ; CHECK-NEXT: ret i32 [[SHR120]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/multi_user.ll b/llvm/test/Transforms/SLPVectorizer/X86/multi_user.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/multi_user.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/multi_user.ll @@ -16,18 +16,21 @@ ; CHECK-LABEL: @foo( ; CHECK-NEXT: [[TMP1:%.*]] = mul nsw i32 [[N:%.*]], 5 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[SHUFFLE]], -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* -; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[A]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 4 -; CHECK-NEXT: [[TMP8:%.*]] = add nsw i32 [[TMP1]], 11 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4 -; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -; CHECK-NEXT: [[TMP11:%.*]] = add nsw i32 [[TMP8]], [[TMP10]] -; CHECK-NEXT: store i32 [[TMP11]], i32* [[TMP9]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[TMP1]], i32 2 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP1]], i32 3 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[SHUFFLE]], +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* +; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP7]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP6]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[A]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* [[TMP10]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = add nsw i32 [[TMP1]], 11 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4 +; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = add nsw i32 [[TMP11]], [[TMP13]] +; CHECK-NEXT: store i32 [[TMP14]], i32* [[TMP12]], align 4 ; CHECK-NEXT: ret i32 undef ; %1 = mul nsw i32 %n, 5 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/odd_store.ll b/llvm/test/Transforms/SLPVectorizer/X86/odd_store.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/odd_store.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/odd_store.ll @@ -161,9 +161,12 @@ ; CHECK-LABEL: @test_v4f32_v4f32_splat_store( ; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x float> [[F:%.*]], i64 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[X0]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* -; CHECK-NEXT: store <4 x float> [[SHUFFLE]], <4 x float>* [[TMP2]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float [[X0]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float [[X0]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float [[X0]], i32 3 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[SHUFFLE]], <4 x float>* [[TMP5]], align 4 ; CHECK-NEXT: ret void ; %x0 = extractelement <4 x float> %f, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/partail.ll b/llvm/test/Transforms/SLPVectorizer/X86/partail.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/partail.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/partail.ll @@ -15,25 +15,28 @@ ; CHECK-NEXT: [[SHR15:%.*]] = ashr i32 [[SUB14]], 2 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[SHR15]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[SUB14]], i32 1 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[SHUFFLE]], -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP0]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <4 x i32> [[TMP3]], undef -; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> [[TMP3]], <4 x i32> undef -; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i32> [[TMP5]] to <4 x i64> -; CHECK-NEXT: [[TMP7:%.*]] = trunc <4 x i64> [[TMP6]] to <4 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP7]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64 -; CHECK-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP9]] -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP7]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64 -; CHECK-NEXT: [[ARRAYIDX31_1:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP11]] -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP7]], i32 2 -; CHECK-NEXT: [[TMP13:%.*]] = sext i32 [[TMP12]] to i64 -; CHECK-NEXT: [[ARRAYIDX31_2:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP13]] -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP7]], i32 3 -; CHECK-NEXT: [[TMP15:%.*]] = sext i32 [[TMP14]] to i64 -; CHECK-NEXT: [[ARRAYIDX31_3:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP15]] +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[SUB14]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[SUB14]], i32 3 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i32> [[SHUFFLE]], +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> , i32 [[SHR15]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = icmp slt <4 x i32> [[TMP6]], undef +; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> [[TMP6]], <4 x i32> undef +; CHECK-NEXT: [[TMP9:%.*]] = sext <4 x i32> [[TMP8]] to <4 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = trunc <4 x i64> [[TMP9]] to <4 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i32> [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i64 +; CHECK-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP12]] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP10]], i32 1 +; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP13]] to i64 +; CHECK-NEXT: [[ARRAYIDX31_1:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP10]], i32 2 +; CHECK-NEXT: [[TMP16:%.*]] = sext i32 [[TMP15]] to i64 +; CHECK-NEXT: [[ARRAYIDX31_2:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[TMP10]], i32 3 +; CHECK-NEXT: [[TMP18:%.*]] = sext i32 [[TMP17]] to i64 +; CHECK-NEXT: [[ARRAYIDX31_3:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP18]] ; CHECK-NEXT: unreachable ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi-undef-input.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi-undef-input.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/phi-undef-input.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/phi-undef-input.ll @@ -14,7 +14,7 @@ ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[ARG3:%.*]], i32 3 ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: ret i32 [[TMP6]] @@ -51,7 +51,7 @@ ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[ARG3:%.*]], i32 3 ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: ret i32 [[TMP6]] @@ -88,7 +88,7 @@ ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[ARG3:%.*]], i32 3 ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: ret i32 [[TMP6]] @@ -126,7 +126,7 @@ ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[ARG3:%.*]], i32 3 ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: ret i32 [[TMP6]] @@ -164,7 +164,7 @@ ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[ARG3:%.*]], i32 3 ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: ret i32 [[TMP6]] @@ -201,7 +201,7 @@ ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[ARG2:%.*]], i32 3 ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: ret i32 [[TMP6]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll @@ -349,15 +349,19 @@ ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[X0]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[X2]], i32 2 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[X3]], i32 3 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> , i32 [[Y0]], i32 4 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[Y1]], i32 5 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[Y2]], i32 6 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[Y3]], i32 7 -; CHECK-NEXT: [[TMP9:%.*]] = icmp slt <8 x i32> [[SHUFFLE]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = freeze <8 x i1> [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP10]]) -; CHECK-NEXT: ret i1 [[TMP11]] +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[X0]], i32 4 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[X1]], i32 5 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[X2]], i32 6 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[X3]], i32 7 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> , i32 [[Y0]], i32 4 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x i32> [[TMP9]], i32 [[Y1]], i32 5 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x i32> [[TMP10]], i32 [[Y2]], i32 6 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[Y3]], i32 7 +; CHECK-NEXT: [[TMP13:%.*]] = icmp slt <8 x i32> [[SHUFFLE]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = freeze <8 x i1> [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP14]]) +; CHECK-NEXT: ret i1 [[TMP15]] ; %x0 = extractelement <8 x i32> %x, i32 0 %x1 = extractelement <8 x i32> %x, i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll @@ -9,10 +9,11 @@ ; CHECK-NEXT: br label [[T:%.*]] ; CHECK: t: ; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i64 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[LD]], <8 x i16> poison, <8 x i32> -; CHECK-NEXT: [[TMP0:%.*]] = add <8 x i16> [[LD]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <8 x i16>* -; CHECK-NEXT: store <8 x i16> [[TMP0]], <8 x i16>* [[TMP1]], align 2 +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i16> [[LD]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = add <8 x i16> [[LD]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[P0]] to <8 x i16>* +; CHECK-NEXT: store <8 x i16> [[TMP1]], <8 x i16>* [[TMP2]], align 2 ; CHECK-NEXT: ret void ; ; YAML: Pass: slp-vectorizer diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_external_users.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_external_users.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_external_users.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_external_users.ll @@ -56,23 +56,26 @@ ; CHECK-NEXT: bb1: ; CHECK-NEXT: [[LD:%.*]] = load double, double* undef, align 8 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x double> poison, double [[LD]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x double> [[SHUFFLE]], -; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x double> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x double> [[TMP2]], +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double [[LD]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[LD]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[LD]], i32 3 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[SHUFFLE]], +; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x double> [[TMP4]], +; CHECK-NEXT: [[TMP6:%.*]] = fmul <4 x double> [[TMP5]], ; CHECK-NEXT: [[PTRA1:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 0 ; CHECK-NEXT: [[PTRA4:%.*]] = getelementptr inbounds double, double* [[A]], i64 3 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x double> [[TMP3]], i32 3 -; CHECK-NEXT: store double [[TMP4]], double* [[PTRA1]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x double> [[TMP3]], i32 2 -; CHECK-NEXT: store double [[TMP5]], double* [[PTRA1]], align 8 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[TMP3]], i32 1 -; CHECK-NEXT: store double [[TMP6]], double* [[PTRA4]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x double> [[TMP3]], i32 0 -; CHECK-NEXT: store double [[TMP7]], double* [[PTRA4]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x double> [[TMP6]], i32 3 +; CHECK-NEXT: store double [[TMP7]], double* [[PTRA1]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x double> [[TMP6]], i32 2 +; CHECK-NEXT: store double [[TMP8]], double* [[PTRA1]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x double> [[TMP6]], i32 1 +; CHECK-NEXT: store double [[TMP9]], double* [[PTRA4]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x double> [[TMP6]], i32 0 +; CHECK-NEXT: store double [[TMP10]], double* [[PTRA4]], align 8 ; CHECK-NEXT: br label [[SEED_LOOP:%.*]] ; CHECK: seed_loop: -; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x double> [ [[TMP3]], [[BB1:%.*]] ], [ zeroinitializer, [[SEED_LOOP]] ] +; CHECK-NEXT: [[TMP11:%.*]] = phi <4 x double> [ [[TMP6]], [[BB1:%.*]] ], [ zeroinitializer, [[SEED_LOOP]] ] ; CHECK-NEXT: br label [[SEED_LOOP]] ; bb1: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll @@ -13,27 +13,37 @@ ; CHECK-NEXT: [[SUB_I:%.*]] = add nsw i32 undef, -1 ; CHECK-NEXT: [[CONV31_I:%.*]] = and i32 undef, [[SUB_I]] ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[CONV31_I]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = lshr <8 x i32> [[SHUFFLE]], -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> poison, i32 [[CONV31_I]], i32 0 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[SHUFFLE1]], +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[CONV31_I]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[CONV31_I]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[CONV31_I]], i32 3 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[CONV31_I]], i32 4 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[CONV31_I]], i32 5 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[CONV31_I]], i32 6 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[CONV31_I]], i32 7 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = lshr <8 x i32> [[SHUFFLE]], +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[CONV31_I]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[CONV31_I]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[CONV31_I]], i32 2 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[CONV31_I]], i32 3 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP13]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = lshr <4 x i32> [[SHUFFLE1]], ; CHECK-NEXT: [[SHR_12_I_I:%.*]] = lshr i32 [[CONV31_I]], 13 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[CONV31_I]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[CONV31_I]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = lshr <2 x i32> [[TMP6]], -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i32> poison, i32 [[SUB_I]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <16 x i32> [[TMP8]], <16 x i32> [[TMP9]], <16 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x i32> [[TMP10]], <16 x i32> [[TMP11]], <16 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <16 x i32> [[TMP12]], i32 [[SHR_12_I_I]], i32 13 -; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <16 x i32> [[TMP13]], <16 x i32> [[TMP14]], <16 x i32> -; CHECK-NEXT: [[TMP16:%.*]] = trunc <16 x i32> [[TMP15]] to <16 x i8> -; CHECK-NEXT: [[TMP17:%.*]] = and <16 x i8> [[TMP16]], -; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* -; CHECK-NEXT: store <16 x i8> [[TMP17]], <16 x i8>* [[TMP18]], align 1 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> poison, i32 [[CONV31_I]], i32 0 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP15]], i32 [[CONV31_I]], i32 1 +; CHECK-NEXT: [[TMP17:%.*]] = lshr <2 x i32> [[TMP16]], +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <16 x i32> poison, i32 [[SUB_I]], i32 0 +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x i32> [[TMP9]], <8 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <16 x i32> [[TMP18]], <16 x i32> [[TMP19]], <16 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x i32> [[TMP14]], <4 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <16 x i32> [[TMP20]], <16 x i32> [[TMP21]], <16 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = insertelement <16 x i32> [[TMP22]], i32 [[SHR_12_I_I]], i32 13 +; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <2 x i32> [[TMP17]], <2 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <16 x i32> [[TMP23]], <16 x i32> [[TMP24]], <16 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = trunc <16 x i32> [[TMP25]] to <16 x i8> +; CHECK-NEXT: [[TMP27:%.*]] = and <16 x i8> [[TMP26]], +; CHECK-NEXT: [[TMP28:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* +; CHECK-NEXT: store <16 x i8> [[TMP27]], <16 x i8>* [[TMP28]], align 1 ; CHECK-NEXT: unreachable ; CHECK: if.end50.i: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reused-undefs.ll b/llvm/test/Transforms/SLPVectorizer/X86/reused-undefs.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/reused-undefs.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reused-undefs.ll @@ -6,15 +6,20 @@ ; CHECK-NEXT: for.cond.preheader: ; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[FOR_INC_PREHEADER:%.*]] ; CHECK: for.inc.preheader: -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> , i32 [[TMP0:%.*]], i32 6 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> , i32 [[TMP0:%.*]], i32 6 ; CHECK-NEXT: br i1 false, label [[FOR_END]], label [[L1_PREHEADER:%.*]] ; CHECK: for.end: ; CHECK-NEXT: [[DOTPR:%.*]] = phi i32 [ 0, [[FOR_INC_PREHEADER]] ], [ 0, [[FOR_COND_PREHEADER:%.*]] ] -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[DOTPR]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> , i32 [[DOTPR]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[DOTPR]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[DOTPR]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[DOTPR]], i32 5 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[DOTPR]], i32 6 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[DOTPR]], i32 7 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP7]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: br label [[L1_PREHEADER]] ; CHECK: L1.preheader: -; CHECK-NEXT: [[TMP3:%.*]] = phi <8 x i32> [ [[SHUFFLE]], [[FOR_END]] ], [ [[TMP1]], [[FOR_INC_PREHEADER]] ] +; CHECK-NEXT: [[TMP8:%.*]] = phi <8 x i32> [ [[SHUFFLE]], [[FOR_END]] ], [ [[TMP1]], [[FOR_INC_PREHEADER]] ] ; CHECK-NEXT: ret i32 0 ; for.cond.preheader: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/saxpy.ll b/llvm/test/Transforms/SLPVectorizer/X86/saxpy.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/saxpy.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/saxpy.ll @@ -12,13 +12,16 @@ ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP1]] to <4 x i32>* ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = mul nsw <4 x i32> [[TMP4]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* -; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP7]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP6]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP1]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* [[TMP10]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[A]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[A]], i32 2 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[A]], i32 3 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP4]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* +; CHECK-NEXT: [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = add nsw <4 x i32> [[TMP9]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP1]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP12]], <4 x i32>* [[TMP13]], align 4 ; CHECK-NEXT: ret void ; %1 = getelementptr inbounds i32, i32* %x, i64 %i diff --git a/llvm/test/Transforms/SLPVectorizer/X86/slp-fma-loss.ll b/llvm/test/Transforms/SLPVectorizer/X86/slp-fma-loss.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/slp-fma-loss.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/slp-fma-loss.ll @@ -52,10 +52,13 @@ ; CHECK-LABEL: @hr_or_mul( ; CHECK-NEXT: [[CVT0:%.*]] = uitofp i16 3 to double ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x double> poison, double [[CVT0]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <4 x double> , [[SHUFFLE]] -; CHECK-NEXT: [[TMP3:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double -0.000000e+00, <4 x double> [[TMP2]]) -; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast double [[TMP3]], [[CVT0]] +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[CVT0]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[CVT0]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> [[TMP3]], double [[CVT0]], i32 3 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <4 x double> , [[SHUFFLE]] +; CHECK-NEXT: [[TMP6:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double -0.000000e+00, <4 x double> [[TMP5]]) +; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast double [[TMP6]], [[CVT0]] ; CHECK-NEXT: ret double [[OP_RDX]] ; %cvt0 = uitofp i16 3 to double diff --git a/llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll b/llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll @@ -316,12 +316,17 @@ ; CHECK-NEXT: call void @quux(i32* inalloca(i32) [[VAR24]]) ; CHECK-NEXT: call void @llvm.stackrestore(i8* [[VAR23]]) ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i8*> poison, i8* [[VAR4]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i8*> [[TMP2]], <4 x i8*> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8*> [[TMP2]], i8* [[VAR4]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i8*> [[TMP3]], i8* [[VAR4]], i32 2 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i8*> [[TMP4]], i8* [[VAR4]], i32 3 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i8*> [[TMP5]], <4 x i8*> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: store <4 x i8*> [[SHUFFLE]], <4 x i8*>* [[TMP1]], align 8 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8*> [[TMP2]], i8* [[VAR5]], i32 1 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i8*> [[TMP3]], <4 x i8*> poison, <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8** [[VAR36]] to <4 x i8*>* -; CHECK-NEXT: store <4 x i8*> [[SHUFFLE1]], <4 x i8*>* [[TMP4]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i8*> [[TMP2]], i8* [[VAR5]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i8*> [[TMP6]], i8* [[VAR5]], i32 2 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i8*> [[TMP7]], i8* [[VAR5]], i32 3 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i8*> [[TMP8]], <4 x i8*> poison, <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8** [[VAR36]] to <4 x i8*>* +; CHECK-NEXT: store <4 x i8*> [[SHUFFLE1]], <4 x i8*>* [[TMP9]], align 8 ; CHECK-NEXT: ret void ; %var2 = alloca i8 @@ -363,9 +368,11 @@ ; CHECK-NEXT: [[VAR5:%.*]] = alloca i8, align 1 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i8*> poison, i8* [[VAR4]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i8*> [[TMP1]], i8* [[VAR5]], i32 1 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i8*> [[TMP2]], <4 x i8*> poison, <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8** [[VAR36]] to <4 x i8*>* -; CHECK-NEXT: store <4 x i8*> [[SHUFFLE]], <4 x i8*>* [[TMP3]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8*> [[TMP2]], i8* [[VAR5]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i8*> [[TMP3]], i8* [[VAR5]], i32 3 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i8*> [[TMP4]], <4 x i8*> poison, <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8** [[VAR36]] to <4 x i8*>* +; CHECK-NEXT: store <4 x i8*> [[SHUFFLE]], <4 x i8*>* [[TMP5]], align 8 ; CHECK-NEXT: ret void ; %var4 = alloca i8 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll b/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll @@ -213,9 +213,12 @@ ; CHECK-LABEL: @store_splat( ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, float* [[TMP0:%.*]], i64 0 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> poison, float [[TMP1:%.*]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = bitcast float* [[TMP3]] to <4 x float>* -; CHECK-NEXT: store <4 x float> [[SHUFFLE]], <4 x float>* [[TMP5]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP4]], float [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float [[TMP1]], i32 2 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP1]], i32 3 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = bitcast float* [[TMP3]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[SHUFFLE]], <4 x float>* [[TMP8]], align 4 ; CHECK-NEXT: ret void ; %3 = getelementptr inbounds float, float* %0, i64 0 @@ -299,9 +302,15 @@ ; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds i16, i16* [[A:%.*]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i16> poison, i16 [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i16> [[TMP3]], i16 [[TMP2]], i32 1 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> poison, <8 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16* [[PTR0]] to <8 x i16>* -; CHECK-NEXT: store <8 x i16> [[SHUFFLE]], <8 x i16>* [[TMP5]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i16> [[TMP4]], i16 [[TMP1]], i32 2 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i16> [[TMP5]], i16 [[TMP2]], i32 3 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i16> [[TMP6]], i16 [[TMP1]], i32 4 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i16> [[TMP7]], i16 [[TMP2]], i32 5 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i16> [[TMP8]], i16 [[TMP1]], i32 6 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x i16> [[TMP9]], i16 [[TMP2]], i32 7 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[TMP10]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16* [[PTR0]] to <8 x i16>* +; CHECK-NEXT: store <8 x i16> [[SHUFFLE]], <8 x i16>* [[TMP11]], align 16 ; CHECK-NEXT: ret void ; %1 = load i16, i16* %v1, align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll b/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll @@ -29,214 +29,232 @@ ; CHECK-NEXT: [[TMP14:%.*]] = trunc i64 [[INDVARS_IV]] to i32 ; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], -183 ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> poison, i32 [[TMP15]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP16]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP17:%.*]] = sub <4 x i32> [[SHUFFLE]], [[TMP0]] -; CHECK-NEXT: [[TMP18:%.*]] = icmp slt <4 x i32> [[TMP17]], zeroinitializer -; CHECK-NEXT: [[TMP19:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP17]] -; CHECK-NEXT: [[TMP20:%.*]] = select <4 x i1> [[TMP18]], <4 x i32> [[TMP19]], <4 x i32> [[TMP17]] -; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP20]]) -; CHECK-NEXT: [[OP_RDX:%.*]] = icmp slt i32 [[TMP21]], [[B_0]] -; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 [[TMP21]], i32 [[B_0]] +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP16]], i32 [[TMP15]], i32 1 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP15]], i32 2 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP15]], i32 3 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP19]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = sub <4 x i32> [[SHUFFLE]], [[TMP0]] +; CHECK-NEXT: [[TMP21:%.*]] = icmp slt <4 x i32> [[TMP20]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP20]] +; CHECK-NEXT: [[TMP23:%.*]] = select <4 x i1> [[TMP21]], <4 x i32> [[TMP22]], <4 x i32> [[TMP20]] +; CHECK-NEXT: [[TMP24:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP23]]) +; CHECK-NEXT: [[OP_RDX:%.*]] = icmp slt i32 [[TMP24]], [[B_0]] +; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 [[TMP24]], i32 [[B_0]] ; CHECK-NEXT: [[SUB_116:%.*]] = sub i32 [[TMP15]], [[TMP1]] -; CHECK-NEXT: [[TMP22:%.*]] = icmp slt i32 [[SUB_116]], 0 +; CHECK-NEXT: [[TMP25:%.*]] = icmp slt i32 [[SUB_116]], 0 ; CHECK-NEXT: [[NEG_117:%.*]] = sub nsw i32 0, [[SUB_116]] -; CHECK-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[NEG_117]], i32 [[SUB_116]] -; CHECK-NEXT: [[CMP12_118:%.*]] = icmp slt i32 [[TMP23]], [[OP_RDX1]] -; CHECK-NEXT: [[SPEC_SELECT8_120:%.*]] = select i1 [[CMP12_118]], i32 [[TMP23]], i32 [[OP_RDX1]] +; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[NEG_117]], i32 [[SUB_116]] +; CHECK-NEXT: [[CMP12_118:%.*]] = icmp slt i32 [[TMP26]], [[OP_RDX1]] +; CHECK-NEXT: [[SPEC_SELECT8_120:%.*]] = select i1 [[CMP12_118]], i32 [[TMP26]], i32 [[OP_RDX1]] ; CHECK-NEXT: [[SUB_1_1:%.*]] = sub i32 [[TMP15]], [[TMP2]] -; CHECK-NEXT: [[TMP24:%.*]] = icmp slt i32 [[SUB_1_1]], 0 +; CHECK-NEXT: [[TMP27:%.*]] = icmp slt i32 [[SUB_1_1]], 0 ; CHECK-NEXT: [[NEG_1_1:%.*]] = sub nsw i32 0, [[SUB_1_1]] -; CHECK-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i32 [[NEG_1_1]], i32 [[SUB_1_1]] -; CHECK-NEXT: [[CMP12_1_1:%.*]] = icmp slt i32 [[TMP25]], [[SPEC_SELECT8_120]] +; CHECK-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[NEG_1_1]], i32 [[SUB_1_1]] +; CHECK-NEXT: [[CMP12_1_1:%.*]] = icmp slt i32 [[TMP28]], [[SPEC_SELECT8_120]] ; CHECK-NEXT: [[NARROW:%.*]] = or i1 [[CMP12_1_1]], [[CMP12_118]] -; CHECK-NEXT: [[SPEC_SELECT8_1_1:%.*]] = select i1 [[CMP12_1_1]], i32 [[TMP25]], i32 [[SPEC_SELECT8_120]] +; CHECK-NEXT: [[SPEC_SELECT8_1_1:%.*]] = select i1 [[CMP12_1_1]], i32 [[TMP28]], i32 [[SPEC_SELECT8_120]] ; CHECK-NEXT: [[SUB_2_1:%.*]] = sub i32 [[TMP15]], [[TMP3]] -; CHECK-NEXT: [[TMP26:%.*]] = icmp slt i32 [[SUB_2_1]], 0 +; CHECK-NEXT: [[TMP29:%.*]] = icmp slt i32 [[SUB_2_1]], 0 ; CHECK-NEXT: [[NEG_2_1:%.*]] = sub nsw i32 0, [[SUB_2_1]] -; CHECK-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 [[NEG_2_1]], i32 [[SUB_2_1]] -; CHECK-NEXT: [[CMP12_2_1:%.*]] = icmp slt i32 [[TMP27]], [[SPEC_SELECT8_1_1]] +; CHECK-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[NEG_2_1]], i32 [[SUB_2_1]] +; CHECK-NEXT: [[CMP12_2_1:%.*]] = icmp slt i32 [[TMP30]], [[SPEC_SELECT8_1_1]] ; CHECK-NEXT: [[NARROW34:%.*]] = or i1 [[CMP12_2_1]], [[NARROW]] -; CHECK-NEXT: [[SPEC_SELECT8_2_1:%.*]] = select i1 [[CMP12_2_1]], i32 [[TMP27]], i32 [[SPEC_SELECT8_1_1]] +; CHECK-NEXT: [[SPEC_SELECT8_2_1:%.*]] = select i1 [[CMP12_2_1]], i32 [[TMP30]], i32 [[SPEC_SELECT8_1_1]] ; CHECK-NEXT: [[SUB_3_1:%.*]] = sub i32 [[TMP15]], [[TMP4]] -; CHECK-NEXT: [[TMP28:%.*]] = icmp slt i32 [[SUB_3_1]], 0 +; CHECK-NEXT: [[TMP31:%.*]] = icmp slt i32 [[SUB_3_1]], 0 ; CHECK-NEXT: [[NEG_3_1:%.*]] = sub nsw i32 0, [[SUB_3_1]] -; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i32 [[NEG_3_1]], i32 [[SUB_3_1]] -; CHECK-NEXT: [[CMP12_3_1:%.*]] = icmp slt i32 [[TMP29]], [[SPEC_SELECT8_2_1]] +; CHECK-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], i32 [[NEG_3_1]], i32 [[SUB_3_1]] +; CHECK-NEXT: [[CMP12_3_1:%.*]] = icmp slt i32 [[TMP32]], [[SPEC_SELECT8_2_1]] ; CHECK-NEXT: [[NARROW35:%.*]] = or i1 [[CMP12_3_1]], [[NARROW34]] ; CHECK-NEXT: [[SPEC_SELECT_3_1:%.*]] = zext i1 [[NARROW35]] to i32 -; CHECK-NEXT: [[SPEC_SELECT8_3_1:%.*]] = select i1 [[CMP12_3_1]], i32 [[TMP29]], i32 [[SPEC_SELECT8_2_1]] +; CHECK-NEXT: [[SPEC_SELECT8_3_1:%.*]] = select i1 [[CMP12_3_1]], i32 [[TMP32]], i32 [[SPEC_SELECT8_2_1]] ; CHECK-NEXT: [[SUB_222:%.*]] = sub i32 [[TMP15]], [[TMP5]] -; CHECK-NEXT: [[TMP30:%.*]] = icmp slt i32 [[SUB_222]], 0 +; CHECK-NEXT: [[TMP33:%.*]] = icmp slt i32 [[SUB_222]], 0 ; CHECK-NEXT: [[NEG_223:%.*]] = sub nsw i32 0, [[SUB_222]] -; CHECK-NEXT: [[TMP31:%.*]] = select i1 [[TMP30]], i32 [[NEG_223]], i32 [[SUB_222]] -; CHECK-NEXT: [[CMP12_224:%.*]] = icmp slt i32 [[TMP31]], [[SPEC_SELECT8_3_1]] -; CHECK-NEXT: [[SPEC_SELECT8_226:%.*]] = select i1 [[CMP12_224]], i32 [[TMP31]], i32 [[SPEC_SELECT8_3_1]] +; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 [[NEG_223]], i32 [[SUB_222]] +; CHECK-NEXT: [[CMP12_224:%.*]] = icmp slt i32 [[TMP34]], [[SPEC_SELECT8_3_1]] +; CHECK-NEXT: [[SPEC_SELECT8_226:%.*]] = select i1 [[CMP12_224]], i32 [[TMP34]], i32 [[SPEC_SELECT8_3_1]] ; CHECK-NEXT: [[SUB_1_2:%.*]] = sub i32 [[TMP15]], [[TMP6]] -; CHECK-NEXT: [[TMP32:%.*]] = icmp slt i32 [[SUB_1_2]], 0 +; CHECK-NEXT: [[TMP35:%.*]] = icmp slt i32 [[SUB_1_2]], 0 ; CHECK-NEXT: [[NEG_1_2:%.*]] = sub nsw i32 0, [[SUB_1_2]] -; CHECK-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], i32 [[NEG_1_2]], i32 [[SUB_1_2]] -; CHECK-NEXT: [[CMP12_1_2:%.*]] = icmp slt i32 [[TMP33]], [[SPEC_SELECT8_226]] -; CHECK-NEXT: [[TMP34:%.*]] = or i1 [[CMP12_1_2]], [[CMP12_224]] -; CHECK-NEXT: [[SPEC_SELECT8_1_2:%.*]] = select i1 [[CMP12_1_2]], i32 [[TMP33]], i32 [[SPEC_SELECT8_226]] +; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[NEG_1_2]], i32 [[SUB_1_2]] +; CHECK-NEXT: [[CMP12_1_2:%.*]] = icmp slt i32 [[TMP36]], [[SPEC_SELECT8_226]] +; CHECK-NEXT: [[TMP37:%.*]] = or i1 [[CMP12_1_2]], [[CMP12_224]] +; CHECK-NEXT: [[SPEC_SELECT8_1_2:%.*]] = select i1 [[CMP12_1_2]], i32 [[TMP36]], i32 [[SPEC_SELECT8_226]] ; CHECK-NEXT: [[SUB_2_2:%.*]] = sub i32 [[TMP15]], [[TMP7]] -; CHECK-NEXT: [[TMP35:%.*]] = icmp slt i32 [[SUB_2_2]], 0 +; CHECK-NEXT: [[TMP38:%.*]] = icmp slt i32 [[SUB_2_2]], 0 ; CHECK-NEXT: [[NEG_2_2:%.*]] = sub nsw i32 0, [[SUB_2_2]] -; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[NEG_2_2]], i32 [[SUB_2_2]] -; CHECK-NEXT: [[CMP12_2_2:%.*]] = icmp slt i32 [[TMP36]], [[SPEC_SELECT8_1_2]] -; CHECK-NEXT: [[TMP37:%.*]] = or i1 [[CMP12_2_2]], [[TMP34]] -; CHECK-NEXT: [[SPEC_SELECT8_2_2:%.*]] = select i1 [[CMP12_2_2]], i32 [[TMP36]], i32 [[SPEC_SELECT8_1_2]] +; CHECK-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], i32 [[NEG_2_2]], i32 [[SUB_2_2]] +; CHECK-NEXT: [[CMP12_2_2:%.*]] = icmp slt i32 [[TMP39]], [[SPEC_SELECT8_1_2]] +; CHECK-NEXT: [[TMP40:%.*]] = or i1 [[CMP12_2_2]], [[TMP37]] +; CHECK-NEXT: [[SPEC_SELECT8_2_2:%.*]] = select i1 [[CMP12_2_2]], i32 [[TMP39]], i32 [[SPEC_SELECT8_1_2]] ; CHECK-NEXT: [[SUB_3_2:%.*]] = sub i32 [[TMP15]], [[TMP8]] -; CHECK-NEXT: [[TMP38:%.*]] = icmp slt i32 [[SUB_3_2]], 0 +; CHECK-NEXT: [[TMP41:%.*]] = icmp slt i32 [[SUB_3_2]], 0 ; CHECK-NEXT: [[NEG_3_2:%.*]] = sub nsw i32 0, [[SUB_3_2]] -; CHECK-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], i32 [[NEG_3_2]], i32 [[SUB_3_2]] -; CHECK-NEXT: [[CMP12_3_2:%.*]] = icmp slt i32 [[TMP39]], [[SPEC_SELECT8_2_2]] -; CHECK-NEXT: [[TMP40:%.*]] = or i1 [[CMP12_3_2]], [[TMP37]] -; CHECK-NEXT: [[SPEC_SELECT_3_2:%.*]] = select i1 [[TMP40]], i32 2, i32 [[SPEC_SELECT_3_1]] -; CHECK-NEXT: [[SPEC_SELECT8_3_2:%.*]] = select i1 [[CMP12_3_2]], i32 [[TMP39]], i32 [[SPEC_SELECT8_2_2]] +; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[NEG_3_2]], i32 [[SUB_3_2]] +; CHECK-NEXT: [[CMP12_3_2:%.*]] = icmp slt i32 [[TMP42]], [[SPEC_SELECT8_2_2]] +; CHECK-NEXT: [[TMP43:%.*]] = or i1 [[CMP12_3_2]], [[TMP40]] +; CHECK-NEXT: [[SPEC_SELECT_3_2:%.*]] = select i1 [[TMP43]], i32 2, i32 [[SPEC_SELECT_3_1]] +; CHECK-NEXT: [[SPEC_SELECT8_3_2:%.*]] = select i1 [[CMP12_3_2]], i32 [[TMP42]], i32 [[SPEC_SELECT8_2_2]] ; CHECK-NEXT: [[SUB_328:%.*]] = sub i32 [[TMP15]], [[TMP9]] -; CHECK-NEXT: [[TMP41:%.*]] = icmp slt i32 [[SUB_328]], 0 +; CHECK-NEXT: [[TMP44:%.*]] = icmp slt i32 [[SUB_328]], 0 ; CHECK-NEXT: [[NEG_329:%.*]] = sub nsw i32 0, [[SUB_328]] -; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[NEG_329]], i32 [[SUB_328]] -; CHECK-NEXT: [[CMP12_330:%.*]] = icmp slt i32 [[TMP42]], [[SPEC_SELECT8_3_2]] -; CHECK-NEXT: [[SPEC_SELECT8_332:%.*]] = select i1 [[CMP12_330]], i32 [[TMP42]], i32 [[SPEC_SELECT8_3_2]] +; CHECK-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], i32 [[NEG_329]], i32 [[SUB_328]] +; CHECK-NEXT: [[CMP12_330:%.*]] = icmp slt i32 [[TMP45]], [[SPEC_SELECT8_3_2]] +; CHECK-NEXT: [[SPEC_SELECT8_332:%.*]] = select i1 [[CMP12_330]], i32 [[TMP45]], i32 [[SPEC_SELECT8_3_2]] ; CHECK-NEXT: [[SUB_1_3:%.*]] = sub i32 [[TMP15]], [[TMP10]] -; CHECK-NEXT: [[TMP43:%.*]] = icmp slt i32 [[SUB_1_3]], 0 +; CHECK-NEXT: [[TMP46:%.*]] = icmp slt i32 [[SUB_1_3]], 0 ; CHECK-NEXT: [[NEG_1_3:%.*]] = sub nsw i32 0, [[SUB_1_3]] -; CHECK-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], i32 [[NEG_1_3]], i32 [[SUB_1_3]] -; CHECK-NEXT: [[CMP12_1_3:%.*]] = icmp slt i32 [[TMP44]], [[SPEC_SELECT8_332]] -; CHECK-NEXT: [[TMP45:%.*]] = or i1 [[CMP12_1_3]], [[CMP12_330]] -; CHECK-NEXT: [[SPEC_SELECT8_1_3:%.*]] = select i1 [[CMP12_1_3]], i32 [[TMP44]], i32 [[SPEC_SELECT8_332]] +; CHECK-NEXT: [[TMP47:%.*]] = select i1 [[TMP46]], i32 [[NEG_1_3]], i32 [[SUB_1_3]] +; CHECK-NEXT: [[CMP12_1_3:%.*]] = icmp slt i32 [[TMP47]], [[SPEC_SELECT8_332]] +; CHECK-NEXT: [[TMP48:%.*]] = or i1 [[CMP12_1_3]], [[CMP12_330]] +; CHECK-NEXT: [[SPEC_SELECT8_1_3:%.*]] = select i1 [[CMP12_1_3]], i32 [[TMP47]], i32 [[SPEC_SELECT8_332]] ; CHECK-NEXT: [[SUB_2_3:%.*]] = sub i32 [[TMP15]], [[TMP11]] -; CHECK-NEXT: [[TMP46:%.*]] = icmp slt i32 [[SUB_2_3]], 0 +; CHECK-NEXT: [[TMP49:%.*]] = icmp slt i32 [[SUB_2_3]], 0 ; CHECK-NEXT: [[NEG_2_3:%.*]] = sub nsw i32 0, [[SUB_2_3]] -; CHECK-NEXT: [[TMP47:%.*]] = select i1 [[TMP46]], i32 [[NEG_2_3]], i32 [[SUB_2_3]] -; CHECK-NEXT: [[CMP12_2_3:%.*]] = icmp slt i32 [[TMP47]], [[SPEC_SELECT8_1_3]] -; CHECK-NEXT: [[TMP48:%.*]] = or i1 [[CMP12_2_3]], [[TMP45]] -; CHECK-NEXT: [[SPEC_SELECT8_2_3:%.*]] = select i1 [[CMP12_2_3]], i32 [[TMP47]], i32 [[SPEC_SELECT8_1_3]] +; CHECK-NEXT: [[TMP50:%.*]] = select i1 [[TMP49]], i32 [[NEG_2_3]], i32 [[SUB_2_3]] +; CHECK-NEXT: [[CMP12_2_3:%.*]] = icmp slt i32 [[TMP50]], [[SPEC_SELECT8_1_3]] +; CHECK-NEXT: [[TMP51:%.*]] = or i1 [[CMP12_2_3]], [[TMP48]] +; CHECK-NEXT: [[SPEC_SELECT8_2_3:%.*]] = select i1 [[CMP12_2_3]], i32 [[TMP50]], i32 [[SPEC_SELECT8_1_3]] ; CHECK-NEXT: [[SUB_3_3:%.*]] = sub i32 [[TMP15]], [[TMP12]] -; CHECK-NEXT: [[TMP49:%.*]] = icmp slt i32 [[SUB_3_3]], 0 +; CHECK-NEXT: [[TMP52:%.*]] = icmp slt i32 [[SUB_3_3]], 0 ; CHECK-NEXT: [[NEG_3_3:%.*]] = sub nsw i32 0, [[SUB_3_3]] -; CHECK-NEXT: [[TMP50:%.*]] = select i1 [[TMP49]], i32 [[NEG_3_3]], i32 [[SUB_3_3]] -; CHECK-NEXT: [[CMP12_3_3:%.*]] = icmp slt i32 [[TMP50]], [[SPEC_SELECT8_2_3]] -; CHECK-NEXT: [[TMP51:%.*]] = or i1 [[CMP12_3_3]], [[TMP48]] -; CHECK-NEXT: [[SPEC_SELECT_3_3:%.*]] = select i1 [[TMP51]], i32 3, i32 [[SPEC_SELECT_3_2]] -; CHECK-NEXT: [[SPEC_SELECT8_3_3:%.*]] = select i1 [[CMP12_3_3]], i32 [[TMP50]], i32 [[SPEC_SELECT8_2_3]] -; CHECK-NEXT: [[TMP52:%.*]] = insertelement <16 x i32> poison, i32 [[TMP15]], i32 0 -; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <16 x i32> [[TMP52]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP53:%.*]] = sub <16 x i32> [[SHUFFLE2]], [[TMP13]] -; CHECK-NEXT: [[TMP54:%.*]] = extractelement <16 x i32> [[TMP53]], i32 0 -; CHECK-NEXT: [[NEG_4:%.*]] = sub nsw i32 0, [[TMP54]] -; CHECK-NEXT: [[TMP55:%.*]] = icmp slt <16 x i32> [[TMP53]], zeroinitializer -; CHECK-NEXT: [[TMP56:%.*]] = extractelement <16 x i1> [[TMP55]], i32 0 -; CHECK-NEXT: [[TMP57:%.*]] = select i1 [[TMP56]], i32 [[NEG_4]], i32 [[TMP54]] -; CHECK-NEXT: [[CMP12_4:%.*]] = icmp slt i32 [[TMP57]], [[SPEC_SELECT8_3_3]] -; CHECK-NEXT: [[SPEC_SELECT8_4:%.*]] = select i1 [[CMP12_4]], i32 [[TMP57]], i32 [[SPEC_SELECT8_3_3]] -; CHECK-NEXT: [[TMP58:%.*]] = extractelement <16 x i32> [[TMP53]], i32 1 -; CHECK-NEXT: [[NEG_1_4:%.*]] = sub nsw i32 0, [[TMP58]] -; CHECK-NEXT: [[TMP59:%.*]] = extractelement <16 x i1> [[TMP55]], i32 1 -; CHECK-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], i32 [[NEG_1_4]], i32 [[TMP58]] -; CHECK-NEXT: [[CMP12_1_4:%.*]] = icmp slt i32 [[TMP60]], [[SPEC_SELECT8_4]] -; CHECK-NEXT: [[TMP61:%.*]] = or i1 [[CMP12_1_4]], [[CMP12_4]] -; CHECK-NEXT: [[SPEC_SELECT8_1_4:%.*]] = select i1 [[CMP12_1_4]], i32 [[TMP60]], i32 [[SPEC_SELECT8_4]] -; CHECK-NEXT: [[TMP62:%.*]] = extractelement <16 x i32> [[TMP53]], i32 2 -; CHECK-NEXT: [[NEG_2_4:%.*]] = sub nsw i32 0, [[TMP62]] -; CHECK-NEXT: [[TMP63:%.*]] = extractelement <16 x i1> [[TMP55]], i32 2 -; CHECK-NEXT: [[TMP64:%.*]] = select i1 [[TMP63]], i32 [[NEG_2_4]], i32 [[TMP62]] -; CHECK-NEXT: [[CMP12_2_4:%.*]] = icmp slt i32 [[TMP64]], [[SPEC_SELECT8_1_4]] -; CHECK-NEXT: [[TMP65:%.*]] = or i1 [[CMP12_2_4]], [[TMP61]] -; CHECK-NEXT: [[SPEC_SELECT8_2_4:%.*]] = select i1 [[CMP12_2_4]], i32 [[TMP64]], i32 [[SPEC_SELECT8_1_4]] -; CHECK-NEXT: [[TMP66:%.*]] = extractelement <16 x i32> [[TMP53]], i32 3 -; CHECK-NEXT: [[NEG_3_4:%.*]] = sub nsw i32 0, [[TMP66]] -; CHECK-NEXT: [[TMP67:%.*]] = extractelement <16 x i1> [[TMP55]], i32 3 -; CHECK-NEXT: [[TMP68:%.*]] = select i1 [[TMP67]], i32 [[NEG_3_4]], i32 [[TMP66]] -; CHECK-NEXT: [[CMP12_3_4:%.*]] = icmp slt i32 [[TMP68]], [[SPEC_SELECT8_2_4]] -; CHECK-NEXT: [[TMP69:%.*]] = or i1 [[CMP12_3_4]], [[TMP65]] -; CHECK-NEXT: [[SPEC_SELECT_3_4:%.*]] = select i1 [[TMP69]], i32 4, i32 [[SPEC_SELECT_3_3]] -; CHECK-NEXT: [[SPEC_SELECT8_3_4:%.*]] = select i1 [[CMP12_3_4]], i32 [[TMP68]], i32 [[SPEC_SELECT8_2_4]] -; CHECK-NEXT: [[TMP70:%.*]] = extractelement <16 x i32> [[TMP53]], i32 4 -; CHECK-NEXT: [[NEG_5:%.*]] = sub nsw i32 0, [[TMP70]] -; CHECK-NEXT: [[TMP71:%.*]] = extractelement <16 x i1> [[TMP55]], i32 4 -; CHECK-NEXT: [[TMP72:%.*]] = select i1 [[TMP71]], i32 [[NEG_5]], i32 [[TMP70]] -; CHECK-NEXT: [[CMP12_5:%.*]] = icmp slt i32 [[TMP72]], [[SPEC_SELECT8_3_4]] -; CHECK-NEXT: [[SPEC_SELECT8_5:%.*]] = select i1 [[CMP12_5]], i32 [[TMP72]], i32 [[SPEC_SELECT8_3_4]] -; CHECK-NEXT: [[TMP73:%.*]] = extractelement <16 x i32> [[TMP53]], i32 5 -; CHECK-NEXT: [[NEG_1_5:%.*]] = sub nsw i32 0, [[TMP73]] -; CHECK-NEXT: [[TMP74:%.*]] = extractelement <16 x i1> [[TMP55]], i32 5 -; CHECK-NEXT: [[TMP75:%.*]] = select i1 [[TMP74]], i32 [[NEG_1_5]], i32 [[TMP73]] -; CHECK-NEXT: [[CMP12_1_5:%.*]] = icmp slt i32 [[TMP75]], [[SPEC_SELECT8_5]] -; CHECK-NEXT: [[TMP76:%.*]] = or i1 [[CMP12_1_5]], [[CMP12_5]] -; CHECK-NEXT: [[SPEC_SELECT8_1_5:%.*]] = select i1 [[CMP12_1_5]], i32 [[TMP75]], i32 [[SPEC_SELECT8_5]] -; CHECK-NEXT: [[TMP77:%.*]] = extractelement <16 x i32> [[TMP53]], i32 6 -; CHECK-NEXT: [[NEG_2_5:%.*]] = sub nsw i32 0, [[TMP77]] -; CHECK-NEXT: [[TMP78:%.*]] = extractelement <16 x i1> [[TMP55]], i32 6 -; CHECK-NEXT: [[TMP79:%.*]] = select i1 [[TMP78]], i32 [[NEG_2_5]], i32 [[TMP77]] -; CHECK-NEXT: [[CMP12_2_5:%.*]] = icmp slt i32 [[TMP79]], [[SPEC_SELECT8_1_5]] -; CHECK-NEXT: [[TMP80:%.*]] = or i1 [[CMP12_2_5]], [[TMP76]] -; CHECK-NEXT: [[SPEC_SELECT8_2_5:%.*]] = select i1 [[CMP12_2_5]], i32 [[TMP79]], i32 [[SPEC_SELECT8_1_5]] -; CHECK-NEXT: [[TMP81:%.*]] = extractelement <16 x i32> [[TMP53]], i32 7 -; CHECK-NEXT: [[NEG_3_5:%.*]] = sub nsw i32 0, [[TMP81]] -; CHECK-NEXT: [[TMP82:%.*]] = extractelement <16 x i1> [[TMP55]], i32 7 -; CHECK-NEXT: [[TMP83:%.*]] = select i1 [[TMP82]], i32 [[NEG_3_5]], i32 [[TMP81]] -; CHECK-NEXT: [[CMP12_3_5:%.*]] = icmp slt i32 [[TMP83]], [[SPEC_SELECT8_2_5]] -; CHECK-NEXT: [[TMP84:%.*]] = or i1 [[CMP12_3_5]], [[TMP80]] -; CHECK-NEXT: [[SPEC_SELECT_3_5:%.*]] = select i1 [[TMP84]], i32 5, i32 [[SPEC_SELECT_3_4]] -; CHECK-NEXT: [[SPEC_SELECT8_3_5:%.*]] = select i1 [[CMP12_3_5]], i32 [[TMP83]], i32 [[SPEC_SELECT8_2_5]] -; CHECK-NEXT: [[TMP85:%.*]] = extractelement <16 x i32> [[TMP53]], i32 8 -; CHECK-NEXT: [[NEG_6:%.*]] = sub nsw i32 0, [[TMP85]] -; CHECK-NEXT: [[TMP86:%.*]] = extractelement <16 x i1> [[TMP55]], i32 8 -; CHECK-NEXT: [[TMP87:%.*]] = select i1 [[TMP86]], i32 [[NEG_6]], i32 [[TMP85]] -; CHECK-NEXT: [[CMP12_6:%.*]] = icmp slt i32 [[TMP87]], [[SPEC_SELECT8_3_5]] -; CHECK-NEXT: [[SPEC_SELECT8_6:%.*]] = select i1 [[CMP12_6]], i32 [[TMP87]], i32 [[SPEC_SELECT8_3_5]] -; CHECK-NEXT: [[TMP88:%.*]] = extractelement <16 x i32> [[TMP53]], i32 9 -; CHECK-NEXT: [[NEG_1_6:%.*]] = sub nsw i32 0, [[TMP88]] -; CHECK-NEXT: [[TMP89:%.*]] = extractelement <16 x i1> [[TMP55]], i32 9 -; CHECK-NEXT: [[TMP90:%.*]] = select i1 [[TMP89]], i32 [[NEG_1_6]], i32 [[TMP88]] -; CHECK-NEXT: [[CMP12_1_6:%.*]] = icmp slt i32 [[TMP90]], [[SPEC_SELECT8_6]] -; CHECK-NEXT: [[TMP91:%.*]] = or i1 [[CMP12_1_6]], [[CMP12_6]] -; CHECK-NEXT: [[SPEC_SELECT8_1_6:%.*]] = select i1 [[CMP12_1_6]], i32 [[TMP90]], i32 [[SPEC_SELECT8_6]] -; CHECK-NEXT: [[TMP92:%.*]] = extractelement <16 x i32> [[TMP53]], i32 10 -; CHECK-NEXT: [[NEG_2_6:%.*]] = sub nsw i32 0, [[TMP92]] -; CHECK-NEXT: [[TMP93:%.*]] = extractelement <16 x i1> [[TMP55]], i32 10 -; CHECK-NEXT: [[TMP94:%.*]] = select i1 [[TMP93]], i32 [[NEG_2_6]], i32 [[TMP92]] -; CHECK-NEXT: [[CMP12_2_6:%.*]] = icmp slt i32 [[TMP94]], [[SPEC_SELECT8_1_6]] -; CHECK-NEXT: [[TMP95:%.*]] = or i1 [[CMP12_2_6]], [[TMP91]] -; CHECK-NEXT: [[SPEC_SELECT8_2_6:%.*]] = select i1 [[CMP12_2_6]], i32 [[TMP94]], i32 [[SPEC_SELECT8_1_6]] -; CHECK-NEXT: [[TMP96:%.*]] = extractelement <16 x i32> [[TMP53]], i32 11 -; CHECK-NEXT: [[NEG_3_6:%.*]] = sub nsw i32 0, [[TMP96]] -; CHECK-NEXT: [[TMP97:%.*]] = extractelement <16 x i1> [[TMP55]], i32 11 -; CHECK-NEXT: [[TMP98:%.*]] = select i1 [[TMP97]], i32 [[NEG_3_6]], i32 [[TMP96]] -; CHECK-NEXT: [[CMP12_3_6:%.*]] = icmp slt i32 [[TMP98]], [[SPEC_SELECT8_2_6]] -; CHECK-NEXT: [[TMP99:%.*]] = or i1 [[CMP12_3_6]], [[TMP95]] -; CHECK-NEXT: [[SPEC_SELECT_3_6:%.*]] = select i1 [[TMP99]], i32 6, i32 [[SPEC_SELECT_3_5]] -; CHECK-NEXT: [[SPEC_SELECT8_3_6:%.*]] = select i1 [[CMP12_3_6]], i32 [[TMP98]], i32 [[SPEC_SELECT8_2_6]] -; CHECK-NEXT: [[TMP100:%.*]] = extractelement <16 x i32> [[TMP53]], i32 12 -; CHECK-NEXT: [[NEG_7:%.*]] = sub nsw i32 0, [[TMP100]] -; CHECK-NEXT: [[TMP101:%.*]] = extractelement <16 x i1> [[TMP55]], i32 12 -; CHECK-NEXT: [[TMP102:%.*]] = select i1 [[TMP101]], i32 [[NEG_7]], i32 [[TMP100]] -; CHECK-NEXT: [[CMP12_7:%.*]] = icmp slt i32 [[TMP102]], [[SPEC_SELECT8_3_6]] -; CHECK-NEXT: [[SPEC_SELECT8_7:%.*]] = select i1 [[CMP12_7]], i32 [[TMP102]], i32 [[SPEC_SELECT8_3_6]] -; CHECK-NEXT: [[TMP103:%.*]] = extractelement <16 x i32> [[TMP53]], i32 13 -; CHECK-NEXT: [[NEG_1_7:%.*]] = sub nsw i32 0, [[TMP103]] -; CHECK-NEXT: [[TMP104:%.*]] = extractelement <16 x i1> [[TMP55]], i32 13 -; CHECK-NEXT: [[TMP105:%.*]] = select i1 [[TMP104]], i32 [[NEG_1_7]], i32 [[TMP103]] -; CHECK-NEXT: [[CMP12_1_7:%.*]] = icmp slt i32 [[TMP105]], [[SPEC_SELECT8_7]] -; CHECK-NEXT: [[TMP106:%.*]] = or i1 [[CMP12_1_7]], [[CMP12_7]] -; CHECK-NEXT: [[SPEC_SELECT8_1_7:%.*]] = select i1 [[CMP12_1_7]], i32 [[TMP105]], i32 [[SPEC_SELECT8_7]] -; CHECK-NEXT: [[TMP107:%.*]] = extractelement <16 x i32> [[TMP53]], i32 14 -; CHECK-NEXT: [[NEG_2_7:%.*]] = sub nsw i32 0, [[TMP107]] -; CHECK-NEXT: [[TMP108:%.*]] = extractelement <16 x i1> [[TMP55]], i32 14 -; CHECK-NEXT: [[TMP109:%.*]] = select i1 [[TMP108]], i32 [[NEG_2_7]], i32 [[TMP107]] -; CHECK-NEXT: [[CMP12_2_7:%.*]] = icmp slt i32 [[TMP109]], [[SPEC_SELECT8_1_7]] -; CHECK-NEXT: [[TMP110:%.*]] = or i1 [[CMP12_2_7]], [[TMP106]] -; CHECK-NEXT: [[SPEC_SELECT8_2_7:%.*]] = select i1 [[CMP12_2_7]], i32 [[TMP109]], i32 [[SPEC_SELECT8_1_7]] -; CHECK-NEXT: [[TMP111:%.*]] = extractelement <16 x i32> [[TMP53]], i32 15 -; CHECK-NEXT: [[NEG_3_7:%.*]] = sub nsw i32 0, [[TMP111]] -; CHECK-NEXT: [[TMP112:%.*]] = extractelement <16 x i1> [[TMP55]], i32 15 -; CHECK-NEXT: [[TMP113:%.*]] = select i1 [[TMP112]], i32 [[NEG_3_7]], i32 [[TMP111]] -; CHECK-NEXT: [[CMP12_3_7:%.*]] = icmp slt i32 [[TMP113]], [[SPEC_SELECT8_2_7]] -; CHECK-NEXT: [[TMP114:%.*]] = or i1 [[CMP12_3_7]], [[TMP110]] -; CHECK-NEXT: [[SPEC_SELECT_3_7:%.*]] = select i1 [[TMP114]], i32 7, i32 [[SPEC_SELECT_3_6]] -; CHECK-NEXT: [[SPEC_SELECT8_3_7]] = select i1 [[CMP12_3_7]], i32 [[TMP113]], i32 [[SPEC_SELECT8_2_7]] +; CHECK-NEXT: [[TMP53:%.*]] = select i1 [[TMP52]], i32 [[NEG_3_3]], i32 [[SUB_3_3]] +; CHECK-NEXT: [[CMP12_3_3:%.*]] = icmp slt i32 [[TMP53]], [[SPEC_SELECT8_2_3]] +; CHECK-NEXT: [[TMP54:%.*]] = or i1 [[CMP12_3_3]], [[TMP51]] +; CHECK-NEXT: [[SPEC_SELECT_3_3:%.*]] = select i1 [[TMP54]], i32 3, i32 [[SPEC_SELECT_3_2]] +; CHECK-NEXT: [[SPEC_SELECT8_3_3:%.*]] = select i1 [[CMP12_3_3]], i32 [[TMP53]], i32 [[SPEC_SELECT8_2_3]] +; CHECK-NEXT: [[TMP55:%.*]] = insertelement <16 x i32> poison, i32 [[TMP15]], i32 0 +; CHECK-NEXT: [[TMP56:%.*]] = insertelement <16 x i32> [[TMP55]], i32 [[TMP15]], i32 1 +; CHECK-NEXT: [[TMP57:%.*]] = insertelement <16 x i32> [[TMP56]], i32 [[TMP15]], i32 2 +; CHECK-NEXT: [[TMP58:%.*]] = insertelement <16 x i32> [[TMP57]], i32 [[TMP15]], i32 3 +; CHECK-NEXT: [[TMP59:%.*]] = insertelement <16 x i32> [[TMP58]], i32 [[TMP15]], i32 4 +; CHECK-NEXT: [[TMP60:%.*]] = insertelement <16 x i32> [[TMP59]], i32 [[TMP15]], i32 5 +; CHECK-NEXT: [[TMP61:%.*]] = insertelement <16 x i32> [[TMP60]], i32 [[TMP15]], i32 6 +; CHECK-NEXT: [[TMP62:%.*]] = insertelement <16 x i32> [[TMP61]], i32 [[TMP15]], i32 7 +; CHECK-NEXT: [[TMP63:%.*]] = insertelement <16 x i32> [[TMP62]], i32 [[TMP15]], i32 8 +; CHECK-NEXT: [[TMP64:%.*]] = insertelement <16 x i32> [[TMP63]], i32 [[TMP15]], i32 9 +; CHECK-NEXT: [[TMP65:%.*]] = insertelement <16 x i32> [[TMP64]], i32 [[TMP15]], i32 10 +; CHECK-NEXT: [[TMP66:%.*]] = insertelement <16 x i32> [[TMP65]], i32 [[TMP15]], i32 11 +; CHECK-NEXT: [[TMP67:%.*]] = insertelement <16 x i32> [[TMP66]], i32 [[TMP15]], i32 12 +; CHECK-NEXT: [[TMP68:%.*]] = insertelement <16 x i32> [[TMP67]], i32 [[TMP15]], i32 13 +; CHECK-NEXT: [[TMP69:%.*]] = insertelement <16 x i32> [[TMP68]], i32 [[TMP15]], i32 14 +; CHECK-NEXT: [[TMP70:%.*]] = insertelement <16 x i32> [[TMP69]], i32 [[TMP15]], i32 15 +; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <16 x i32> [[TMP70]], <16 x i32> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP71:%.*]] = sub <16 x i32> [[SHUFFLE2]], [[TMP13]] +; CHECK-NEXT: [[TMP72:%.*]] = extractelement <16 x i32> [[TMP71]], i32 0 +; CHECK-NEXT: [[NEG_4:%.*]] = sub nsw i32 0, [[TMP72]] +; CHECK-NEXT: [[TMP73:%.*]] = icmp slt <16 x i32> [[TMP71]], zeroinitializer +; CHECK-NEXT: [[TMP74:%.*]] = extractelement <16 x i1> [[TMP73]], i32 0 +; CHECK-NEXT: [[TMP75:%.*]] = select i1 [[TMP74]], i32 [[NEG_4]], i32 [[TMP72]] +; CHECK-NEXT: [[CMP12_4:%.*]] = icmp slt i32 [[TMP75]], [[SPEC_SELECT8_3_3]] +; CHECK-NEXT: [[SPEC_SELECT8_4:%.*]] = select i1 [[CMP12_4]], i32 [[TMP75]], i32 [[SPEC_SELECT8_3_3]] +; CHECK-NEXT: [[TMP76:%.*]] = extractelement <16 x i32> [[TMP71]], i32 1 +; CHECK-NEXT: [[NEG_1_4:%.*]] = sub nsw i32 0, [[TMP76]] +; CHECK-NEXT: [[TMP77:%.*]] = extractelement <16 x i1> [[TMP73]], i32 1 +; CHECK-NEXT: [[TMP78:%.*]] = select i1 [[TMP77]], i32 [[NEG_1_4]], i32 [[TMP76]] +; CHECK-NEXT: [[CMP12_1_4:%.*]] = icmp slt i32 [[TMP78]], [[SPEC_SELECT8_4]] +; CHECK-NEXT: [[TMP79:%.*]] = or i1 [[CMP12_1_4]], [[CMP12_4]] +; CHECK-NEXT: [[SPEC_SELECT8_1_4:%.*]] = select i1 [[CMP12_1_4]], i32 [[TMP78]], i32 [[SPEC_SELECT8_4]] +; CHECK-NEXT: [[TMP80:%.*]] = extractelement <16 x i32> [[TMP71]], i32 2 +; CHECK-NEXT: [[NEG_2_4:%.*]] = sub nsw i32 0, [[TMP80]] +; CHECK-NEXT: [[TMP81:%.*]] = extractelement <16 x i1> [[TMP73]], i32 2 +; CHECK-NEXT: [[TMP82:%.*]] = select i1 [[TMP81]], i32 [[NEG_2_4]], i32 [[TMP80]] +; CHECK-NEXT: [[CMP12_2_4:%.*]] = icmp slt i32 [[TMP82]], [[SPEC_SELECT8_1_4]] +; CHECK-NEXT: [[TMP83:%.*]] = or i1 [[CMP12_2_4]], [[TMP79]] +; CHECK-NEXT: [[SPEC_SELECT8_2_4:%.*]] = select i1 [[CMP12_2_4]], i32 [[TMP82]], i32 [[SPEC_SELECT8_1_4]] +; CHECK-NEXT: [[TMP84:%.*]] = extractelement <16 x i32> [[TMP71]], i32 3 +; CHECK-NEXT: [[NEG_3_4:%.*]] = sub nsw i32 0, [[TMP84]] +; CHECK-NEXT: [[TMP85:%.*]] = extractelement <16 x i1> [[TMP73]], i32 3 +; CHECK-NEXT: [[TMP86:%.*]] = select i1 [[TMP85]], i32 [[NEG_3_4]], i32 [[TMP84]] +; CHECK-NEXT: [[CMP12_3_4:%.*]] = icmp slt i32 [[TMP86]], [[SPEC_SELECT8_2_4]] +; CHECK-NEXT: [[TMP87:%.*]] = or i1 [[CMP12_3_4]], [[TMP83]] +; CHECK-NEXT: [[SPEC_SELECT_3_4:%.*]] = select i1 [[TMP87]], i32 4, i32 [[SPEC_SELECT_3_3]] +; CHECK-NEXT: [[SPEC_SELECT8_3_4:%.*]] = select i1 [[CMP12_3_4]], i32 [[TMP86]], i32 [[SPEC_SELECT8_2_4]] +; CHECK-NEXT: [[TMP88:%.*]] = extractelement <16 x i32> [[TMP71]], i32 4 +; CHECK-NEXT: [[NEG_5:%.*]] = sub nsw i32 0, [[TMP88]] +; CHECK-NEXT: [[TMP89:%.*]] = extractelement <16 x i1> [[TMP73]], i32 4 +; CHECK-NEXT: [[TMP90:%.*]] = select i1 [[TMP89]], i32 [[NEG_5]], i32 [[TMP88]] +; CHECK-NEXT: [[CMP12_5:%.*]] = icmp slt i32 [[TMP90]], [[SPEC_SELECT8_3_4]] +; CHECK-NEXT: [[SPEC_SELECT8_5:%.*]] = select i1 [[CMP12_5]], i32 [[TMP90]], i32 [[SPEC_SELECT8_3_4]] +; CHECK-NEXT: [[TMP91:%.*]] = extractelement <16 x i32> [[TMP71]], i32 5 +; CHECK-NEXT: [[NEG_1_5:%.*]] = sub nsw i32 0, [[TMP91]] +; CHECK-NEXT: [[TMP92:%.*]] = extractelement <16 x i1> [[TMP73]], i32 5 +; CHECK-NEXT: [[TMP93:%.*]] = select i1 [[TMP92]], i32 [[NEG_1_5]], i32 [[TMP91]] +; CHECK-NEXT: [[CMP12_1_5:%.*]] = icmp slt i32 [[TMP93]], [[SPEC_SELECT8_5]] +; CHECK-NEXT: [[TMP94:%.*]] = or i1 [[CMP12_1_5]], [[CMP12_5]] +; CHECK-NEXT: [[SPEC_SELECT8_1_5:%.*]] = select i1 [[CMP12_1_5]], i32 [[TMP93]], i32 [[SPEC_SELECT8_5]] +; CHECK-NEXT: [[TMP95:%.*]] = extractelement <16 x i32> [[TMP71]], i32 6 +; CHECK-NEXT: [[NEG_2_5:%.*]] = sub nsw i32 0, [[TMP95]] +; CHECK-NEXT: [[TMP96:%.*]] = extractelement <16 x i1> [[TMP73]], i32 6 +; CHECK-NEXT: [[TMP97:%.*]] = select i1 [[TMP96]], i32 [[NEG_2_5]], i32 [[TMP95]] +; CHECK-NEXT: [[CMP12_2_5:%.*]] = icmp slt i32 [[TMP97]], [[SPEC_SELECT8_1_5]] +; CHECK-NEXT: [[TMP98:%.*]] = or i1 [[CMP12_2_5]], [[TMP94]] +; CHECK-NEXT: [[SPEC_SELECT8_2_5:%.*]] = select i1 [[CMP12_2_5]], i32 [[TMP97]], i32 [[SPEC_SELECT8_1_5]] +; CHECK-NEXT: [[TMP99:%.*]] = extractelement <16 x i32> [[TMP71]], i32 7 +; CHECK-NEXT: [[NEG_3_5:%.*]] = sub nsw i32 0, [[TMP99]] +; CHECK-NEXT: [[TMP100:%.*]] = extractelement <16 x i1> [[TMP73]], i32 7 +; CHECK-NEXT: [[TMP101:%.*]] = select i1 [[TMP100]], i32 [[NEG_3_5]], i32 [[TMP99]] +; CHECK-NEXT: [[CMP12_3_5:%.*]] = icmp slt i32 [[TMP101]], [[SPEC_SELECT8_2_5]] +; CHECK-NEXT: [[TMP102:%.*]] = or i1 [[CMP12_3_5]], [[TMP98]] +; CHECK-NEXT: [[SPEC_SELECT_3_5:%.*]] = select i1 [[TMP102]], i32 5, i32 [[SPEC_SELECT_3_4]] +; CHECK-NEXT: [[SPEC_SELECT8_3_5:%.*]] = select i1 [[CMP12_3_5]], i32 [[TMP101]], i32 [[SPEC_SELECT8_2_5]] +; CHECK-NEXT: [[TMP103:%.*]] = extractelement <16 x i32> [[TMP71]], i32 8 +; CHECK-NEXT: [[NEG_6:%.*]] = sub nsw i32 0, [[TMP103]] +; CHECK-NEXT: [[TMP104:%.*]] = extractelement <16 x i1> [[TMP73]], i32 8 +; CHECK-NEXT: [[TMP105:%.*]] = select i1 [[TMP104]], i32 [[NEG_6]], i32 [[TMP103]] +; CHECK-NEXT: [[CMP12_6:%.*]] = icmp slt i32 [[TMP105]], [[SPEC_SELECT8_3_5]] +; CHECK-NEXT: [[SPEC_SELECT8_6:%.*]] = select i1 [[CMP12_6]], i32 [[TMP105]], i32 [[SPEC_SELECT8_3_5]] +; CHECK-NEXT: [[TMP106:%.*]] = extractelement <16 x i32> [[TMP71]], i32 9 +; CHECK-NEXT: [[NEG_1_6:%.*]] = sub nsw i32 0, [[TMP106]] +; CHECK-NEXT: [[TMP107:%.*]] = extractelement <16 x i1> [[TMP73]], i32 9 +; CHECK-NEXT: [[TMP108:%.*]] = select i1 [[TMP107]], i32 [[NEG_1_6]], i32 [[TMP106]] +; CHECK-NEXT: [[CMP12_1_6:%.*]] = icmp slt i32 [[TMP108]], [[SPEC_SELECT8_6]] +; CHECK-NEXT: [[TMP109:%.*]] = or i1 [[CMP12_1_6]], [[CMP12_6]] +; CHECK-NEXT: [[SPEC_SELECT8_1_6:%.*]] = select i1 [[CMP12_1_6]], i32 [[TMP108]], i32 [[SPEC_SELECT8_6]] +; CHECK-NEXT: [[TMP110:%.*]] = extractelement <16 x i32> [[TMP71]], i32 10 +; CHECK-NEXT: [[NEG_2_6:%.*]] = sub nsw i32 0, [[TMP110]] +; CHECK-NEXT: [[TMP111:%.*]] = extractelement <16 x i1> [[TMP73]], i32 10 +; CHECK-NEXT: [[TMP112:%.*]] = select i1 [[TMP111]], i32 [[NEG_2_6]], i32 [[TMP110]] +; CHECK-NEXT: [[CMP12_2_6:%.*]] = icmp slt i32 [[TMP112]], [[SPEC_SELECT8_1_6]] +; CHECK-NEXT: [[TMP113:%.*]] = or i1 [[CMP12_2_6]], [[TMP109]] +; CHECK-NEXT: [[SPEC_SELECT8_2_6:%.*]] = select i1 [[CMP12_2_6]], i32 [[TMP112]], i32 [[SPEC_SELECT8_1_6]] +; CHECK-NEXT: [[TMP114:%.*]] = extractelement <16 x i32> [[TMP71]], i32 11 +; CHECK-NEXT: [[NEG_3_6:%.*]] = sub nsw i32 0, [[TMP114]] +; CHECK-NEXT: [[TMP115:%.*]] = extractelement <16 x i1> [[TMP73]], i32 11 +; CHECK-NEXT: [[TMP116:%.*]] = select i1 [[TMP115]], i32 [[NEG_3_6]], i32 [[TMP114]] +; CHECK-NEXT: [[CMP12_3_6:%.*]] = icmp slt i32 [[TMP116]], [[SPEC_SELECT8_2_6]] +; CHECK-NEXT: [[TMP117:%.*]] = or i1 [[CMP12_3_6]], [[TMP113]] +; CHECK-NEXT: [[SPEC_SELECT_3_6:%.*]] = select i1 [[TMP117]], i32 6, i32 [[SPEC_SELECT_3_5]] +; CHECK-NEXT: [[SPEC_SELECT8_3_6:%.*]] = select i1 [[CMP12_3_6]], i32 [[TMP116]], i32 [[SPEC_SELECT8_2_6]] +; CHECK-NEXT: [[TMP118:%.*]] = extractelement <16 x i32> [[TMP71]], i32 12 +; CHECK-NEXT: [[NEG_7:%.*]] = sub nsw i32 0, [[TMP118]] +; CHECK-NEXT: [[TMP119:%.*]] = extractelement <16 x i1> [[TMP73]], i32 12 +; CHECK-NEXT: [[TMP120:%.*]] = select i1 [[TMP119]], i32 [[NEG_7]], i32 [[TMP118]] +; CHECK-NEXT: [[CMP12_7:%.*]] = icmp slt i32 [[TMP120]], [[SPEC_SELECT8_3_6]] +; CHECK-NEXT: [[SPEC_SELECT8_7:%.*]] = select i1 [[CMP12_7]], i32 [[TMP120]], i32 [[SPEC_SELECT8_3_6]] +; CHECK-NEXT: [[TMP121:%.*]] = extractelement <16 x i32> [[TMP71]], i32 13 +; CHECK-NEXT: [[NEG_1_7:%.*]] = sub nsw i32 0, [[TMP121]] +; CHECK-NEXT: [[TMP122:%.*]] = extractelement <16 x i1> [[TMP73]], i32 13 +; CHECK-NEXT: [[TMP123:%.*]] = select i1 [[TMP122]], i32 [[NEG_1_7]], i32 [[TMP121]] +; CHECK-NEXT: [[CMP12_1_7:%.*]] = icmp slt i32 [[TMP123]], [[SPEC_SELECT8_7]] +; CHECK-NEXT: [[TMP124:%.*]] = or i1 [[CMP12_1_7]], [[CMP12_7]] +; CHECK-NEXT: [[SPEC_SELECT8_1_7:%.*]] = select i1 [[CMP12_1_7]], i32 [[TMP123]], i32 [[SPEC_SELECT8_7]] +; CHECK-NEXT: [[TMP125:%.*]] = extractelement <16 x i32> [[TMP71]], i32 14 +; CHECK-NEXT: [[NEG_2_7:%.*]] = sub nsw i32 0, [[TMP125]] +; CHECK-NEXT: [[TMP126:%.*]] = extractelement <16 x i1> [[TMP73]], i32 14 +; CHECK-NEXT: [[TMP127:%.*]] = select i1 [[TMP126]], i32 [[NEG_2_7]], i32 [[TMP125]] +; CHECK-NEXT: [[CMP12_2_7:%.*]] = icmp slt i32 [[TMP127]], [[SPEC_SELECT8_1_7]] +; CHECK-NEXT: [[TMP128:%.*]] = or i1 [[CMP12_2_7]], [[TMP124]] +; CHECK-NEXT: [[SPEC_SELECT8_2_7:%.*]] = select i1 [[CMP12_2_7]], i32 [[TMP127]], i32 [[SPEC_SELECT8_1_7]] +; CHECK-NEXT: [[TMP129:%.*]] = extractelement <16 x i32> [[TMP71]], i32 15 +; CHECK-NEXT: [[NEG_3_7:%.*]] = sub nsw i32 0, [[TMP129]] +; CHECK-NEXT: [[TMP130:%.*]] = extractelement <16 x i1> [[TMP73]], i32 15 +; CHECK-NEXT: [[TMP131:%.*]] = select i1 [[TMP130]], i32 [[NEG_3_7]], i32 [[TMP129]] +; CHECK-NEXT: [[CMP12_3_7:%.*]] = icmp slt i32 [[TMP131]], [[SPEC_SELECT8_2_7]] +; CHECK-NEXT: [[TMP132:%.*]] = or i1 [[CMP12_3_7]], [[TMP128]] +; CHECK-NEXT: [[SPEC_SELECT_3_7:%.*]] = select i1 [[TMP132]], i32 7, i32 [[SPEC_SELECT_3_6]] +; CHECK-NEXT: [[SPEC_SELECT8_3_7]] = select i1 [[CMP12_3_7]], i32 [[TMP131]], i32 [[SPEC_SELECT8_2_7]] ; CHECK-NEXT: [[K:%.*]] = getelementptr inbounds [366 x i32], [366 x i32]* @l, i64 0, i64 [[INDVARS_IV]] ; CHECK-NEXT: store i32 [[SPEC_SELECT_3_7]], i32* [[K]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll @@ -8,7 +8,7 @@ ; CHECK-NEXT: [[SUB:%.*]] = fsub float 6.553500e+04, undef ; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> poison, float [[SUB]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> , float [[SUB]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> [[TMP0]], float [[CONV]], i32 1 ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb2: