Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1077,6 +1077,12 @@ APInt DemandedElts = APInt::getLowBitsSet(Width, DemandedWidth); return SimplifyDemandedVectorElts(Op, DemandedElts, UndefElts); }; + auto SimplifyDemandedVectorEltsHigh = [this](Value *Op, unsigned Width, + unsigned DemandedWidth) { + APInt UndefElts(Width, 0); + APInt DemandedElts = APInt::getHighBitsSet(Width, DemandedWidth); + return SimplifyDemandedVectorElts(Op, DemandedElts, UndefElts); + }; switch (II->getIntrinsicID()) { default: break; @@ -1423,6 +1429,52 @@ break; } + case Intrinsic::x86_sse_add_ss: + case Intrinsic::x86_sse_sub_ss: + case Intrinsic::x86_sse_mul_ss: + case Intrinsic::x86_sse_div_ss: + case Intrinsic::x86_sse_min_ss: + case Intrinsic::x86_sse_max_ss: + case Intrinsic::x86_sse_cmp_ss: + case Intrinsic::x86_sse2_add_sd: + case Intrinsic::x86_sse2_sub_sd: + case Intrinsic::x86_sse2_mul_sd: + case Intrinsic::x86_sse2_div_sd: + case Intrinsic::x86_sse2_min_sd: + case Intrinsic::x86_sse2_max_sd: + case Intrinsic::x86_sse2_cmp_sd: { + // These intrinsics only demand the lowest element of the second input + // vector. + Value *Arg1 = II->getArgOperand(1); + unsigned VWidth = Arg1->getType()->getVectorNumElements(); + if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) { + II->setArgOperand(1, V); + return II; + } + break; + } + + case Intrinsic::x86_sse41_round_ss: + case Intrinsic::x86_sse41_round_sd: { + // These intrinsics demand the upper elements of the first input vector and + // the lowest element of the second input vector. + bool MadeChange = false; + Value *Arg0 = II->getArgOperand(0); + Value *Arg1 = II->getArgOperand(1); + unsigned VWidth = Arg0->getType()->getVectorNumElements(); + if (Value *V = SimplifyDemandedVectorEltsHigh(Arg0, VWidth, VWidth - 1)) { + II->setArgOperand(0, V); + MadeChange = true; + } + if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) { + II->setArgOperand(1, V); + MadeChange = true; + } + if (MadeChange) + return II; + break; + } + // Constant fold ashr( , Ci ). // Constant fold lshr( , Ci ). // Constant fold shl( , Ci ). Index: llvm/trunk/test/Transforms/InstCombine/x86-sse.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/x86-sse.ll +++ llvm/trunk/test/Transforms/InstCombine/x86-sse.ll @@ -118,11 +118,8 @@ define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @test_add_ss( -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> %b, float 1.000000e+00, i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 2.000000e+00, i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 3.000000e+00, i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a, <4 x float> [[TMP3]]) -; CHECK-NEXT: ret <4 x float> [[TMP4]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a, <4 x float> %b) +; CHECK-NEXT: ret <4 x float> [[TMP1]] ; %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -138,11 +135,8 @@ ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> undef, float %b, i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float 4.000000e+00, i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float 5.000000e+00, i32 2 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float 6.000000e+00, i32 3 -; CHECK-NEXT: [[TMP9:%.*]] = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> [[TMP4]], <4 x float> [[TMP8]]) -; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP9]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> [[TMP4]], <4 x float> [[TMP5]]) +; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP6]], i32 0 ; CHECK-NEXT: ret float [[R]] ; %1 = insertelement <4 x float> undef, float %a, i32 0 @@ -181,11 +175,8 @@ define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @test_sub_ss( -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> %b, float 1.000000e+00, i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 2.000000e+00, i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 3.000000e+00, i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a, <4 x float> [[TMP3]]) -; CHECK-NEXT: ret <4 x float> [[TMP4]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a, <4 x float> %b) +; CHECK-NEXT: ret <4 x float> [[TMP1]] ; %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -230,11 +221,8 @@ define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @test_mul_ss( -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> %b, float 1.000000e+00, i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 2.000000e+00, i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 3.000000e+00, i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a, <4 x float> [[TMP3]]) -; CHECK-NEXT: ret <4 x float> [[TMP4]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a, <4 x float> %b) +; CHECK-NEXT: ret <4 x float> [[TMP1]] ; %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -279,11 +267,8 @@ define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @test_div_ss( -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> %b, float 1.000000e+00, i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 2.000000e+00, i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 3.000000e+00, i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a, <4 x float> [[TMP3]]) -; CHECK-NEXT: ret <4 x float> [[TMP4]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a, <4 x float> %b) +; CHECK-NEXT: ret <4 x float> [[TMP1]] ; %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -299,11 +284,8 @@ ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> undef, float %b, i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float 4.000000e+00, i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float 5.000000e+00, i32 2 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float 6.000000e+00, i32 3 -; CHECK-NEXT: [[TMP9:%.*]] = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> [[TMP4]], <4 x float> [[TMP8]]) -; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP9]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> [[TMP4]], <4 x float> [[TMP5]]) +; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP6]], i32 0 ; CHECK-NEXT: ret float [[R]] ; %1 = insertelement <4 x float> undef, float %a, i32 0 @@ -342,11 +324,8 @@ define <4 x float> @test_min_ss(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @test_min_ss( -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> %b, float 1.000000e+00, i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 2.000000e+00, i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 3.000000e+00, i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a, <4 x float> [[TMP3]]) -; CHECK-NEXT: ret <4 x float> [[TMP4]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a, <4 x float> %b) +; CHECK-NEXT: ret <4 x float> [[TMP1]] ; %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -394,11 +373,8 @@ define <4 x float> @test_max_ss(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @test_max_ss( -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> %b, float 1.000000e+00, i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 2.000000e+00, i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 3.000000e+00, i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a, <4 x float> [[TMP3]]) -; CHECK-NEXT: ret <4 x float> [[TMP4]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a, <4 x float> %b) +; CHECK-NEXT: ret <4 x float> [[TMP1]] ; %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -446,11 +422,8 @@ define <4 x float> @test_cmp_ss(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @test_cmp_ss( -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> %b, float 1.000000e+00, i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 2.000000e+00, i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 3.000000e+00, i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a, <4 x float> [[TMP3]], i8 0) -; CHECK-NEXT: ret <4 x float> [[TMP4]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a, <4 x float> %b, i8 0) +; CHECK-NEXT: ret <4 x float> [[TMP1]] ; %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -466,11 +439,8 @@ ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> undef, float %b, i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float 4.000000e+00, i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float 5.000000e+00, i32 2 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float 6.000000e+00, i32 3 -; CHECK-NEXT: [[TMP9:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[TMP4]], <4 x float> [[TMP8]], i8 0) -; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP9]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[TMP4]], <4 x float> [[TMP5]], i8 0) +; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP6]], i32 0 ; CHECK-NEXT: ret float [[R]] ; %1 = insertelement <4 x float> undef, float %a, i32 0 Index: llvm/trunk/test/Transforms/InstCombine/x86-sse2.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/x86-sse2.ll +++ llvm/trunk/test/Transforms/InstCombine/x86-sse2.ll @@ -34,9 +34,8 @@ define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: @test_add_sd( -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> %b, double 2.000000e+00, i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a, <2 x double> [[TMP1]]) -; CHECK-NEXT: ret <2 x double> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a, <2 x double> %b) +; CHECK-NEXT: ret <2 x double> [[TMP1]] ; %1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1 %2 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a, <2 x double> %1) @@ -48,10 +47,9 @@ ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 1.000000e+00, i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> undef, double %b, i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double 2.000000e+00, i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> [[TMP2]], <2 x double> [[TMP4]]) -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 0 -; CHECK-NEXT: ret double [[TMP6]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> [[TMP2]], <2 x double> [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0 +; CHECK-NEXT: ret double [[TMP5]] ; %1 = insertelement <2 x double> undef, double %a, i32 0 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 @@ -67,10 +65,9 @@ ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 1.000000e+00, i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> undef, double %b, i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double 2.000000e+00, i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> [[TMP2]], <2 x double> [[TMP4]]) -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 1 -; CHECK-NEXT: ret double [[TMP6]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> [[TMP2]], <2 x double> [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 1 +; CHECK-NEXT: ret double [[TMP5]] ; %1 = insertelement <2 x double> undef, double %a, i32 0 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 @@ -83,9 +80,8 @@ define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: @test_sub_sd( -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> %b, double 2.000000e+00, i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a, <2 x double> [[TMP1]]) -; CHECK-NEXT: ret <2 x double> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a, <2 x double> %b) +; CHECK-NEXT: ret <2 x double> [[TMP1]] ; %1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1 %2 = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a, <2 x double> %1) @@ -108,7 +104,7 @@ define double @test_sub_sd_1(double %a, double %b) { ; CHECK-LABEL: @test_sub_sd_1( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> , <2 x double> ) +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> , <2 x double> undef) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 ; CHECK-NEXT: ret double [[TMP2]] ; @@ -123,9 +119,8 @@ define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: @test_mul_sd( -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> %b, double 2.000000e+00, i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a, <2 x double> [[TMP1]]) -; CHECK-NEXT: ret <2 x double> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a, <2 x double> %b) +; CHECK-NEXT: ret <2 x double> [[TMP1]] ; %1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1 %2 = tail call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a, <2 x double> %1) @@ -148,7 +143,7 @@ define double @test_mul_sd_1(double %a, double %b) { ; CHECK-LABEL: @test_mul_sd_1( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> , <2 x double> ) +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> , <2 x double> undef) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 ; CHECK-NEXT: ret double [[TMP2]] ; @@ -163,9 +158,8 @@ define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: @test_div_sd( -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> %b, double 2.000000e+00, i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a, <2 x double> [[TMP1]]) -; CHECK-NEXT: ret <2 x double> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a, <2 x double> %b) +; CHECK-NEXT: ret <2 x double> [[TMP1]] ; %1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1 %2 = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a, <2 x double> %1) @@ -177,10 +171,9 @@ ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 1.000000e+00, i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> undef, double %b, i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double 2.000000e+00, i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> [[TMP2]], <2 x double> [[TMP4]]) -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 0 -; CHECK-NEXT: ret double [[TMP6]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> [[TMP2]], <2 x double> [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0 +; CHECK-NEXT: ret double [[TMP5]] ; %1 = insertelement <2 x double> undef, double %a, i32 0 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 @@ -196,10 +189,9 @@ ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 1.000000e+00, i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> undef, double %b, i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double 2.000000e+00, i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> [[TMP2]], <2 x double> [[TMP4]]) -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 1 -; CHECK-NEXT: ret double [[TMP6]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> [[TMP2]], <2 x double> [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 1 +; CHECK-NEXT: ret double [[TMP5]] ; %1 = insertelement <2 x double> undef, double %a, i32 0 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 @@ -212,9 +204,8 @@ define <2 x double> @test_min_sd(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: @test_min_sd( -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> %b, double 2.000000e+00, i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a, <2 x double> [[TMP1]]) -; CHECK-NEXT: ret <2 x double> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a, <2 x double> %b) +; CHECK-NEXT: ret <2 x double> [[TMP1]] ; %1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1 %2 = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a, <2 x double> %1) @@ -240,7 +231,7 @@ define double @test_min_sd_1(double %a, double %b) { ; CHECK-LABEL: @test_min_sd_1( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> , <2 x double> ) +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> , <2 x double> undef) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 ; CHECK-NEXT: ret double [[TMP2]] ; @@ -255,9 +246,8 @@ define <2 x double> @test_max_sd(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: @test_max_sd( -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> %b, double 2.000000e+00, i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a, <2 x double> [[TMP1]]) -; CHECK-NEXT: ret <2 x double> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a, <2 x double> %b) +; CHECK-NEXT: ret <2 x double> [[TMP1]] ; %1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1 %2 = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a, <2 x double> %1) @@ -283,7 +273,7 @@ define double @test_max_sd_1(double %a, double %b) { ; CHECK-LABEL: @test_max_sd_1( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> , <2 x double> ) +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> , <2 x double> undef) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 ; CHECK-NEXT: ret double [[TMP2]] ; @@ -298,9 +288,8 @@ define <2 x double> @test_cmp_sd(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: @test_cmp_sd( -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> %b, double 2.000000e+00, i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a, <2 x double> [[TMP1]], i8 0) -; CHECK-NEXT: ret <2 x double> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a, <2 x double> %b, i8 0) +; CHECK-NEXT: ret <2 x double> [[TMP1]] ; %1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1 %2 = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a, <2 x double> %1, i8 0) @@ -312,10 +301,9 @@ ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 1.000000e+00, i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> undef, double %b, i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double 2.000000e+00, i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[TMP2]], <2 x double> [[TMP4]], i8 0) -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 0 -; CHECK-NEXT: ret double [[TMP6]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[TMP2]], <2 x double> [[TMP3]], i8 0) +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0 +; CHECK-NEXT: ret double [[TMP5]] ; %1 = insertelement <2 x double> undef, double %a, i32 0 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 @@ -331,10 +319,9 @@ ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 1.000000e+00, i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> undef, double %b, i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double 2.000000e+00, i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[TMP2]], <2 x double> [[TMP4]], i8 0) -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 1 -; CHECK-NEXT: ret double [[TMP6]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[TMP2]], <2 x double> [[TMP3]], i8 0) +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 1 +; CHECK-NEXT: ret double [[TMP5]] ; %1 = insertelement <2 x double> undef, double %a, i32 0 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 Index: llvm/trunk/test/Transforms/InstCombine/x86-sse41.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/x86-sse41.ll +++ llvm/trunk/test/Transforms/InstCombine/x86-sse41.ll @@ -4,10 +4,8 @@ define <2 x double> @test_round_sd(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: @test_round_sd( -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> %a, double 1.000000e+00, i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> %b, double 2.000000e+00, i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]], i32 10) -; CHECK-NEXT: ret <2 x double> [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a, <2 x double> %b, i32 10) +; CHECK-NEXT: ret <2 x double> [[TMP1]] ; %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 0 %2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1 @@ -17,13 +15,10 @@ define double @test_round_sd_0(double %a, double %b) { ; CHECK-LABEL: @test_round_sd_0( -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 1.000000e+00, i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> undef, double %b, i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double 2.000000e+00, i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> [[TMP2]], <2 x double> [[TMP4]], i32 10) -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 0 -; CHECK-NEXT: ret double [[TMP6]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %b, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> , <2 x double> [[TMP1]], i32 10) +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 0 +; CHECK-NEXT: ret double [[TMP3]] ; %1 = insertelement <2 x double> undef, double %a, i32 0 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 @@ -36,13 +31,10 @@ define double @test_round_sd_1(double %a, double %b) { ; CHECK-LABEL: @test_round_sd_1( -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 1.000000e+00, i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> undef, double %b, i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double 2.000000e+00, i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> [[TMP2]], <2 x double> [[TMP4]], i32 10) -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 1 -; CHECK-NEXT: ret double [[TMP6]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %b, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> , <2 x double> [[TMP1]], i32 10) +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 1 +; CHECK-NEXT: ret double [[TMP3]] ; %1 = insertelement <2 x double> undef, double %a, i32 0 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 @@ -55,14 +47,8 @@ define <4 x float> @test_round_ss(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @test_round_ss( -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> %a, float 1.000000e+00, i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 2.000000e+00, i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 3.000000e+00, i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> %b, float 1.000000e+00, i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP4]], float 2.000000e+00, i32 2 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float 3.000000e+00, i32 3 -; CHECK-NEXT: [[TMP7:%.*]] = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> [[TMP3]], <4 x float> [[TMP6]], i32 10) -; CHECK-NEXT: ret <4 x float> [[TMP7]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> , <4 x float> %b, i32 10) +; CHECK-NEXT: ret <4 x float> [[TMP1]] ; %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -76,16 +62,9 @@ define float @test_round_ss_0(float %a, float %b) { ; CHECK-LABEL: @test_round_ss_0( -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 1.000000e+00, i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> undef, float %b, i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float 4.000000e+00, i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float 5.000000e+00, i32 2 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float 6.000000e+00, i32 3 -; CHECK-NEXT: [[TMP9:%.*]] = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> [[TMP4]], <4 x float> [[TMP8]], i32 10) -; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP9]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %b, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> , <4 x float> [[TMP1]], i32 10) +; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 ; CHECK-NEXT: ret float [[R]] ; %1 = insertelement <4 x float> undef, float %a, i32 0 @@ -103,16 +82,9 @@ define float @test_round_ss_2(float %a, float %b) { ; CHECK-LABEL: @test_round_ss_2( -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 1.000000e+00, i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> undef, float %b, i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float 4.000000e+00, i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float 5.000000e+00, i32 2 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float 6.000000e+00, i32 3 -; CHECK-NEXT: [[TMP9:%.*]] = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> [[TMP4]], <4 x float> [[TMP8]], i32 10) -; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP9]], i32 2 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %b, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> , <4 x float> [[TMP1]], i32 10) +; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP2]], i32 2 ; CHECK-NEXT: ret float [[R]] ; %1 = insertelement <4 x float> undef, float %a, i32 0