Index: include/llvm/IR/IntrinsicsX86.td =================================================================== --- include/llvm/IR/IntrinsicsX86.td +++ include/llvm/IR/IntrinsicsX86.td @@ -485,8 +485,6 @@ // Conversion ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_sse2_cvtdq2pd : GCCBuiltin<"__builtin_ia32_cvtdq2pd">, - Intrinsic<[llvm_v2f64_ty], [llvm_v4i32_ty], [IntrNoMem]>; def int_x86_sse2_cvtdq2ps : GCCBuiltin<"__builtin_ia32_cvtdq2ps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty], [IntrNoMem]>; def int_x86_sse2_cvtpd2dq : GCCBuiltin<"__builtin_ia32_cvtpd2dq">, @@ -499,8 +497,6 @@ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse2_cvttps2dq : GCCBuiltin<"__builtin_ia32_cvttps2dq">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_sse2_cvtps2pd : GCCBuiltin<"__builtin_ia32_cvtps2pd">, - Intrinsic<[llvm_v2f64_ty], [llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse2_cvtsd2si : GCCBuiltin<"__builtin_ia32_cvtsd2si">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse2_cvtsd2si64 : GCCBuiltin<"__builtin_ia32_cvtsd2si64">, @@ -1765,16 +1761,12 @@ // Vector convert let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx_cvtdq2_pd_256 : GCCBuiltin<"__builtin_ia32_cvtdq2pd256">, - Intrinsic<[llvm_v4f64_ty], [llvm_v4i32_ty], [IntrNoMem]>; def int_x86_avx_cvtdq2_ps_256 : GCCBuiltin<"__builtin_ia32_cvtdq2ps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8i32_ty], [IntrNoMem]>; def int_x86_avx_cvt_pd2_ps_256 : GCCBuiltin<"__builtin_ia32_cvtpd2ps256">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f64_ty], [IntrNoMem]>; def int_x86_avx_cvt_ps2dq_256 : GCCBuiltin<"__builtin_ia32_cvtps2dq256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; - def int_x86_avx_cvt_ps2_pd_256 : GCCBuiltin<"__builtin_ia32_cvtps2pd256">, - Intrinsic<[llvm_v4f64_ty], [llvm_v4f32_ty], [IntrNoMem]>; def int_x86_avx_cvtt_pd2dq_256 : GCCBuiltin<"__builtin_ia32_cvttpd2dq256">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f64_ty], [IntrNoMem]>; def int_x86_avx_cvt_pd2dq_256 : GCCBuiltin<"__builtin_ia32_cvtpd2dq256">, Index: lib/IR/AutoUpgrade.cpp =================================================================== --- lib/IR/AutoUpgrade.cpp +++ lib/IR/AutoUpgrade.cpp @@ -178,6 +178,10 @@ Name.startswith("x86.avx2.pbroadcast") || Name.startswith("x86.avx.vpermil.") || Name.startswith("x86.sse41.pmovsx") || + Name == "x86.sse2.cvtdq2pd" || + Name == "x86.sse2.cvtps2pd" || + Name == "x86.avx.cvtdq2.pd.256" || + Name == "x86.avx.cvt.ps2.pd.256" || Name == "x86.avx.vinsertf128.pd.256" || Name == "x86.avx.vinsertf128.ps.256" || Name == "x86.avx.vinsertf128.si.256" || @@ -396,6 +400,30 @@ "pcmpgt"); // need to sign extend since icmp returns vector of i1 Rep = Builder.CreateSExt(Rep, CI->getType(), ""); + } else if (Name == "llvm.x86.sse2.cvtdq2pd" || + Name == "llvm.x86.sse2.cvtps2pd" || + Name == "llvm.x86.avx.cvtdq2.pd.256" || + Name == "llvm.x86.avx.cvt.ps2.pd.256") { + // Lossless i32/float to double conversion. + // Extract the bottom elements if necessary and convert to double vector. + Value *Src = CI->getArgOperand(0); + VectorType *SrcTy = cast(Src->getType()); + VectorType *DstTy = cast(CI->getType()); + Rep = CI->getArgOperand(0); + + unsigned NumDstElts = DstTy->getNumElements(); + if (NumDstElts < SrcTy->getNumElements()) { + SmallVector ShuffleMask; + for (int i = 0; i != (int)NumDstElts; ++i) + ShuffleMask.push_back(i); + Rep = Builder.CreateShuffleVector(Rep, UndefValue::get(SrcTy), ShuffleMask); + } + + bool Int2Double = (StringRef::npos != Name.find("cvtdq2")); + if (Int2Double) + Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd"); + else + Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd"); } else if (Name == "llvm.x86.avx.movnt.dq.256" || Name == "llvm.x86.avx.movnt.ps.256" || Name == "llvm.x86.avx.movnt.pd.256") { Index: lib/Target/X86/X86InstrSSE.td =================================================================== --- lib/Target/X86/X86InstrSSE.td +++ lib/Target/X86/X86InstrSSE.td @@ -2164,29 +2164,24 @@ // SSE2 instructions without OpSize prefix def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], - IIC_SSE_CVT_PD_RR>, PS, VEX, Sched<[WriteCvtF2F]>; + [], IIC_SSE_CVT_PD_RR>, PS, VEX, Sched<[WriteCvtF2F]>; def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))], IIC_SSE_CVT_PD_RM>, PS, VEX, Sched<[WriteCvtF2FLd]>; def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", - [(set VR256:$dst, - (int_x86_avx_cvt_ps2_pd_256 VR128:$src))], - IIC_SSE_CVT_PD_RR>, PS, VEX, VEX_L, Sched<[WriteCvtF2F]>; + [], IIC_SSE_CVT_PD_RR>, PS, VEX, VEX_L, Sched<[WriteCvtF2F]>; def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", - [(set VR256:$dst, - (int_x86_avx_cvt_ps2_pd_256 (loadv4f32 addr:$src)))], + [], IIC_SSE_CVT_PD_RM>, PS, VEX, VEX_L, Sched<[WriteCvtF2FLd]>; } let Predicates = [UseSSE2] in { def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], - IIC_SSE_CVT_PD_RR>, PS, Sched<[WriteCvtF2F]>; + [], IIC_SSE_CVT_PD_RR>, PS, Sched<[WriteCvtF2F]>; def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))], @@ -2201,30 +2196,22 @@ []>, VEX, Sched<[WriteCvtI2FLd]>; def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (int_x86_sse2_cvtdq2pd VR128:$src))]>, VEX, - Sched<[WriteCvtI2F]>; + []>, VEX, Sched<[WriteCvtI2F]>; def VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", - [(set VR256:$dst, - (int_x86_avx_cvtdq2_pd_256 - (bitconvert (loadv2i64 addr:$src))))]>, VEX, VEX_L, - Sched<[WriteCvtI2FLd]>; + []>, VEX, VEX_L, Sched<[WriteCvtI2FLd]>; def VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", - [(set VR256:$dst, - (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX, VEX_L, - Sched<[WriteCvtI2F]>; + []>, VEX, VEX_L, Sched<[WriteCvtI2F]>; } let hasSideEffects = 0, mayLoad = 1 in def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), - "cvtdq2pd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtI2FLd]>; + "cvtdq2pd\t{$src, $dst|$dst, $src}", + [], IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtI2FLd]>; def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtdq2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))], - IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtI2F]>; + [], IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtI2F]>; // AVX register conversion intrinsics let Predicates = [HasAVX] in { Index: lib/Transforms/Instrumentation/MemorySanitizer.cpp =================================================================== --- lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -2347,8 +2347,6 @@ case llvm::Intrinsic::x86_sse_cvttss2si: handleVectorConvertIntrinsic(I, 1); break; - case llvm::Intrinsic::x86_sse2_cvtdq2pd: - case llvm::Intrinsic::x86_sse2_cvtps2pd: case llvm::Intrinsic::x86_sse_cvtps2pi: case llvm::Intrinsic::x86_sse_cvttps2pi: handleVectorConvertIntrinsic(I, 2); Index: test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll =================================================================== --- test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll +++ test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll @@ -245,3 +245,67 @@ ret <2 x i64> %res } declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone + + +define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) { +; AVX-LABEL: test_x86_sse2_cvtdq2pd: +; AVX: ## BB#0: +; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 +; AVX-NEXT: retl +; +; AVX512VL-LABEL: test_x86_sse2_cvtdq2pd: +; AVX512VL: ## BB#0: +; AVX512VL-NEXT: vcvtdq2pd %xmm0, %xmm0 +; AVX512VL-NEXT: retl + %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone + + +define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) { +; AVX-LABEL: test_x86_avx_cvtdq2_pd_256: +; AVX: ## BB#0: +; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 +; AVX-NEXT: retl +; +; AVX512VL-LABEL: test_x86_avx_cvtdq2_pd_256: +; AVX512VL: ## BB#0: +; AVX512VL-NEXT: vcvtdq2pd %xmm0, %ymm0 +; AVX512VL-NEXT: retl + %res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone + + +define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) { +; AVX-LABEL: test_x86_sse2_cvtps2pd: +; AVX: ## BB#0: +; AVX-NEXT: vcvtps2pd %xmm0, %xmm0 +; AVX-NEXT: retl +; +; AVX512VL-LABEL: test_x86_sse2_cvtps2pd: +; AVX512VL: ## BB#0: +; AVX512VL-NEXT: vcvtps2pd %xmm0, %xmm0 +; AVX512VL-NEXT: retl + %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone + + +define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) { +; AVX-LABEL: test_x86_avx_cvt_ps2_pd_256: +; AVX: ## BB#0: +; AVX-NEXT: vcvtps2pd %xmm0, %ymm0 +; AVX-NEXT: retl +; +; AVX512VL-LABEL: test_x86_avx_cvt_ps2_pd_256: +; AVX512VL: ## BB#0: +; AVX512VL-NEXT: vcvtps2pd %xmm0, %ymm0 +; AVX512VL-NEXT: retl + %res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone Index: test/CodeGen/X86/avx-intrinsics-x86.ll =================================================================== --- test/CodeGen/X86/avx-intrinsics-x86.ll +++ test/CodeGen/X86/avx-intrinsics-x86.ll @@ -274,22 +274,6 @@ declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone -define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) { -; AVX-LABEL: test_x86_sse2_cvtdq2pd: -; AVX: ## BB#0: -; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 -; AVX-NEXT: retl -; -; AVX512VL-LABEL: test_x86_sse2_cvtdq2pd: -; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vcvtdq2pd %xmm0, %xmm0 -; AVX512VL-NEXT: retl - %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1] - ret <2 x double> %res -} -declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone - - define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) { ; AVX-LABEL: test_x86_sse2_cvtdq2ps: ; AVX: ## BB#0: @@ -354,22 +338,6 @@ declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone -define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) { -; AVX-LABEL: test_x86_sse2_cvtps2pd: -; AVX: ## BB#0: -; AVX-NEXT: vcvtps2pd %xmm0, %xmm0 -; AVX-NEXT: retl -; -; AVX512VL-LABEL: test_x86_sse2_cvtps2pd: -; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vcvtps2pd %xmm0, %xmm0 -; AVX512VL-NEXT: retl - %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1] - ret <2 x double> %res -} -declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone - - define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) { ; AVX-LABEL: test_x86_sse2_cvtsd2si: ; AVX: ## BB#0: @@ -1276,14 +1244,14 @@ ; AVX-LABEL: test_x86_sse2_storeu_dq: ; AVX: ## BB#0: ; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX-NEXT: vpaddb LCPI77_0, %xmm0, %xmm0 +; AVX-NEXT: vpaddb LCPI75_0, %xmm0, %xmm0 ; AVX-NEXT: vmovdqu %xmm0, (%eax) ; AVX-NEXT: retl ; ; AVX512VL-LABEL: test_x86_sse2_storeu_dq: ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512VL-NEXT: vpaddb LCPI77_0, %xmm0, %xmm0 +; AVX512VL-NEXT: vpaddb LCPI75_0, %xmm0, %xmm0 ; AVX512VL-NEXT: vmovdqu %xmm0, (%eax) ; AVX512VL-NEXT: retl %a2 = add <16 x i8> %a1, @@ -3587,22 +3555,6 @@ declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone -define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) { -; AVX-LABEL: test_x86_avx_cvt_ps2_pd_256: -; AVX: ## BB#0: -; AVX-NEXT: vcvtps2pd %xmm0, %ymm0 -; AVX-NEXT: retl -; -; AVX512VL-LABEL: test_x86_avx_cvt_ps2_pd_256: -; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vcvtps2pd %xmm0, %ymm0 -; AVX512VL-NEXT: retl - %res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1] - ret <4 x double> %res -} -declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone - - define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) { ; AVX-LABEL: test_x86_avx_cvt_ps2dq_256: ; AVX: ## BB#0: @@ -3619,22 +3571,6 @@ declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone -define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) { -; AVX-LABEL: test_x86_avx_cvtdq2_pd_256: -; AVX: ## BB#0: -; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 -; AVX-NEXT: retl -; -; AVX512VL-LABEL: test_x86_avx_cvtdq2_pd_256: -; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vcvtdq2pd %xmm0, %ymm0 -; AVX512VL-NEXT: retl - %res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1] - ret <4 x double> %res -} -declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone - - define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) { ; AVX-LABEL: test_x86_avx_cvtdq2_ps_256: ; AVX: ## BB#0: @@ -4208,7 +4144,7 @@ ; AVX512VL-LABEL: test_x86_avx_storeu_dq_256: ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512VL-NEXT: vpaddb LCPI236_0, %ymm0, %ymm0 +; AVX512VL-NEXT: vpaddb LCPI232_0, %ymm0, %ymm0 ; AVX512VL-NEXT: vmovdqu %ymm0, (%eax) ; AVX512VL-NEXT: retl %a2 = add <32 x i8> %a1, @@ -4449,7 +4385,7 @@ ; ; AVX512VL-LABEL: test_x86_avx_vpermilvar_pd_256_2: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpermilpd LCPI250_0, %ymm0, %ymm0 +; AVX512VL-NEXT: vpermilpd LCPI246_0, %ymm0, %ymm0 ; AVX512VL-NEXT: retl %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> ) ; <<4 x double>> [#uses=1] ret <4 x double> %res @@ -4941,7 +4877,7 @@ ; AVX-LABEL: movnt_dq: ; AVX: ## BB#0: ; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX-NEXT: vpaddq LCPI277_0, %xmm0, %xmm0 +; AVX-NEXT: vpaddq LCPI273_0, %xmm0, %xmm0 ; AVX-NEXT: vmovntdq %ymm0, (%eax) ; AVX-NEXT: vzeroupper ; AVX-NEXT: retl @@ -4949,7 +4885,7 @@ ; AVX512VL-LABEL: movnt_dq: ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512VL-NEXT: vpaddq LCPI277_0, %xmm0, %xmm0 +; AVX512VL-NEXT: vpaddq LCPI273_0, %xmm0, %xmm0 ; AVX512VL-NEXT: vmovntdq %ymm0, (%eax) ; AVX512VL-NEXT: retl %a2 = add <2 x i64> %a1, Index: test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll =================================================================== --- test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll +++ test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll @@ -42,3 +42,35 @@ ret <2 x i64> %res } declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone + + +define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) { +; SSE-LABEL: test_x86_sse2_cvtdq2pd: +; SSE: ## BB#0: +; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_cvtdq2pd: +; KNL: ## BB#0: +; KNL-NEXT: vcvtdq2pd %xmm0, %xmm0 +; KNL-NEXT: retl + %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone + + +define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) { +; SSE-LABEL: test_x86_sse2_cvtps2pd: +; SSE: ## BB#0: +; SSE-NEXT: cvtps2pd %xmm0, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_cvtps2pd: +; KNL: ## BB#0: +; KNL-NEXT: vcvtps2pd %xmm0, %xmm0 +; KNL-NEXT: retl + %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone Index: test/CodeGen/X86/sse2-intrinsics-x86.ll =================================================================== --- test/CodeGen/X86/sse2-intrinsics-x86.ll +++ test/CodeGen/X86/sse2-intrinsics-x86.ll @@ -178,22 +178,6 @@ declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone -define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) { -; SSE-LABEL: test_x86_sse2_cvtdq2pd: -; SSE: ## BB#0: -; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 -; SSE-NEXT: retl -; -; KNL-LABEL: test_x86_sse2_cvtdq2pd: -; KNL: ## BB#0: -; KNL-NEXT: vcvtdq2pd %xmm0, %xmm0 -; KNL-NEXT: retl - %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1] - ret <2 x double> %res -} -declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone - - define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) { ; SSE-LABEL: test_x86_sse2_cvtdq2ps: ; SSE: ## BB#0: @@ -258,22 +242,6 @@ declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone -define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) { -; SSE-LABEL: test_x86_sse2_cvtps2pd: -; SSE: ## BB#0: -; SSE-NEXT: cvtps2pd %xmm0, %xmm0 -; SSE-NEXT: retl -; -; KNL-LABEL: test_x86_sse2_cvtps2pd: -; KNL: ## BB#0: -; KNL-NEXT: vcvtps2pd %xmm0, %xmm0 -; KNL-NEXT: retl - %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1] - ret <2 x double> %res -} -declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone - - define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) { ; SSE-LABEL: test_x86_sse2_cvtsd2si: ; SSE: ## BB#0: @@ -1180,14 +1148,14 @@ ; SSE-LABEL: test_x86_sse2_storeu_dq: ; SSE: ## BB#0: ; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; SSE-NEXT: paddb LCPI71_0, %xmm0 +; SSE-NEXT: paddb LCPI69_0, %xmm0 ; SSE-NEXT: movdqu %xmm0, (%eax) ; SSE-NEXT: retl ; ; KNL-LABEL: test_x86_sse2_storeu_dq: ; KNL: ## BB#0: ; KNL-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL-NEXT: vpaddb LCPI71_0, %xmm0, %xmm0 +; KNL-NEXT: vpaddb LCPI69_0, %xmm0, %xmm0 ; KNL-NEXT: vmovdqu %xmm0, (%eax) ; KNL-NEXT: retl %a2 = add <16 x i8> %a1,