Index: include/llvm/IR/IntrinsicsX86.td =================================================================== --- include/llvm/IR/IntrinsicsX86.td +++ include/llvm/IR/IntrinsicsX86.td @@ -4104,24 +4104,6 @@ // Vector convert let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx512_mask_cvtdq2pd_128 : - GCCBuiltin<"__builtin_ia32_cvtdq2pd128_mask">, - Intrinsic<[llvm_v2f64_ty], - [llvm_v4i32_ty, llvm_v2f64_ty, llvm_i8_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_cvtdq2pd_256 : - GCCBuiltin<"__builtin_ia32_cvtdq2pd256_mask">, - Intrinsic<[llvm_v4f64_ty], - [llvm_v4i32_ty, llvm_v4f64_ty, llvm_i8_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_cvtdq2pd_512 : - GCCBuiltin<"__builtin_ia32_cvtdq2pd512_mask">, - Intrinsic<[llvm_v8f64_ty], - [llvm_v8i32_ty, llvm_v8f64_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_cvtdq2ps_128 : GCCBuiltin<"__builtin_ia32_cvtdq2ps128_mask">, Intrinsic<[llvm_v4f32_ty], @@ -4512,24 +4494,6 @@ [llvm_v8f32_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_cvtudq2pd_128 : - GCCBuiltin<"__builtin_ia32_cvtudq2pd128_mask">, - Intrinsic<[llvm_v2f64_ty], - [llvm_v4i32_ty, llvm_v2f64_ty, llvm_i8_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_cvtudq2pd_256 : - GCCBuiltin<"__builtin_ia32_cvtudq2pd256_mask">, - Intrinsic<[llvm_v4f64_ty], - [llvm_v4i32_ty, llvm_v4f64_ty, llvm_i8_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_cvtudq2pd_512 : - GCCBuiltin<"__builtin_ia32_cvtudq2pd512_mask">, - Intrinsic<[llvm_v8f64_ty], - [llvm_v8i32_ty, llvm_v8f64_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_cvtudq2ps_128 : GCCBuiltin<"__builtin_ia32_cvtudq2ps128_mask">, Intrinsic<[llvm_v4f32_ty], Index: lib/IR/AutoUpgrade.cpp =================================================================== --- lib/IR/AutoUpgrade.cpp +++ lib/IR/AutoUpgrade.cpp @@ -295,6 +295,8 @@ Name.startswith("avx512.mask.padd.") || // Added in 4.0 Name.startswith("avx512.mask.psub.") || // Added in 4.0 Name.startswith("avx512.mask.pmull.") || // Added in 4.0 + Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0 + Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0 Name == "avx512.mask.add.pd.128" || // Added in 4.0 Name == "avx512.mask.add.pd.256" || // Added in 4.0 Name == "avx512.mask.add.ps.128" || // Added in 4.0 @@ -821,7 +823,9 @@ } else if (IsX86 && (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtps2pd" || Name == "avx.cvtdq2.pd.256" || - Name == "avx.cvt.ps2.pd.256")) { + Name == "avx.cvt.ps2.pd.256" || + Name.startswith("avx512.mask.cvtdq2pd.") || + Name.startswith("avx512.mask.cvtudq2pd."))) { // Lossless i32/float to double conversion. // Extract the bottom elements if necessary and convert to double vector. Value *Src = CI->getArgOperand(0); @@ -837,11 +841,18 @@ ShuffleMask); } - bool Int2Double = (StringRef::npos != Name.find("cvtdq2")); - if (Int2Double) + bool SInt2Double = (StringRef::npos != Name.find("cvtdq2")); + bool UInt2Double = (StringRef::npos != Name.find("cvtudq2")); + if (SInt2Double) Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd"); + else if (UInt2Double) + Rep = Builder.CreateUIToFP(Rep, DstTy, "cvtudq2pd"); else Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd"); + + if (CI->getNumArgOperands() == 3) + Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, + CI->getArgOperand(1)); } else if (IsX86 && Name.startswith("sse4a.movnt.")) { Module *M = F->getParent(); SmallVector Elts; Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -1269,6 +1269,7 @@ setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); setOperationAction(ISD::ZERO_EXTEND, MVT::v4i32, Custom); @@ -14028,26 +14029,34 @@ SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const { SDValue N0 = Op.getOperand(0); - MVT SVT = N0.getSimpleValueType(); + MVT VT = Op.getSimpleValueType(); + MVT SrcVT = N0.getSimpleValueType(); SDLoc dl(Op); - if (SVT.getVectorElementType() == MVT::i1) { - if (SVT == MVT::v2i1) + if (SrcVT.getVectorElementType() == MVT::i1) { + if (SrcVT == MVT::v2i1) return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(), DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, N0)); - MVT IntegerVT = MVT::getVectorVT(MVT::i32, SVT.getVectorNumElements()); + MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements()); return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(), DAG.getNode(ISD::ZERO_EXTEND, dl, IntegerVT, N0)); } - switch (SVT.SimpleTy) { + switch (SrcVT.SimpleTy) { default: llvm_unreachable("Custom UINT_TO_FP is not supported!"); + case MVT::v2i32: { + if (VT == MVT::v2f64) + return DAG.getNode(X86ISD::CVTUDQ2PD, dl, VT, + DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, N0, + DAG.getUNDEF(SrcVT))); + return SDValue(); + } case MVT::v4i8: case MVT::v4i16: case MVT::v8i8: case MVT::v8i16: { - MVT NVT = MVT::getVectorVT(MVT::i32, SVT.getVectorNumElements()); + MVT NVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements()); return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N0)); } Index: lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- lib/Target/X86/X86IntrinsicsInfo.h +++ lib/Target/X86/X86IntrinsicsInfo.h @@ -488,12 +488,6 @@ X86ISD::CONFLICT, 0), X86_INTRINSIC_DATA(avx512_mask_conflict_q_512, INTR_TYPE_1OP_MASK, X86ISD::CONFLICT, 0), - X86_INTRINSIC_DATA(avx512_mask_cvtdq2pd_128, INTR_TYPE_1OP_MASK, - X86ISD::CVTDQ2PD, 0), - X86_INTRINSIC_DATA(avx512_mask_cvtdq2pd_256, INTR_TYPE_1OP_MASK, - ISD::SINT_TO_FP, 0), - X86_INTRINSIC_DATA(avx512_mask_cvtdq2pd_512, INTR_TYPE_1OP_MASK, - ISD::SINT_TO_FP, 0), // no rm X86_INTRINSIC_DATA(avx512_mask_cvtdq2ps_128, INTR_TYPE_1OP_MASK, ISD::SINT_TO_FP, 0), X86_INTRINSIC_DATA(avx512_mask_cvtdq2ps_256, INTR_TYPE_1OP_MASK, @@ -624,12 +618,6 @@ ISD::FP_TO_UINT, 0), X86_INTRINSIC_DATA(avx512_mask_cvttps2uqq_512, INTR_TYPE_1OP_MASK, ISD::FP_TO_UINT, X86ISD::CVTTP2UI_RND), - X86_INTRINSIC_DATA(avx512_mask_cvtudq2pd_128, INTR_TYPE_1OP_MASK, - X86ISD::CVTUDQ2PD, 0), - X86_INTRINSIC_DATA(avx512_mask_cvtudq2pd_256, INTR_TYPE_1OP_MASK, - ISD::UINT_TO_FP, 0), - X86_INTRINSIC_DATA(avx512_mask_cvtudq2pd_512, INTR_TYPE_1OP_MASK, - ISD::UINT_TO_FP, 0), // no rm X86_INTRINSIC_DATA(avx512_mask_cvtudq2ps_128, INTR_TYPE_1OP_MASK, ISD::UINT_TO_FP, 0), X86_INTRINSIC_DATA(avx512_mask_cvtudq2ps_256, INTR_TYPE_1OP_MASK, Index: test/CodeGen/X86/avx512-intrinsics-upgrade.ll =================================================================== --- test/CodeGen/X86/avx512-intrinsics-upgrade.ll +++ test/CodeGen/X86/avx512-intrinsics-upgrade.ll @@ -2497,6 +2497,37 @@ ret <8 x i64> %res } +declare <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32>, <8 x double>, i8) + +define <8 x double>@test_int_x86_avx512_mask_cvt_dq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_512: +; CHECK: ## BB#0: +; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm2 +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm1 {%k1} +; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: retq + %res = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) + %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1) + %res2 = fadd <8 x double> %res, %res1 + ret <8 x double> %res2 +} + +declare <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32>, <8 x double>, i8) + +define <8 x double>@test_int_x86_avx512_mask_cvt_udq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_512: +; CHECK: ## BB#0: +; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm2 +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm1 {%k1} +; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: retq + %res = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) + %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1) + %res2 = fadd <8 x double> %res, %res1 + ret <8 x double> %res2 +} define <8 x i64> @test_x86_avx512_mask_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) { ; CHECK-LABEL: test_x86_avx512_mask_psrlv_q: ; CHECK: ## BB#0: Index: test/CodeGen/X86/avx512-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512-intrinsics.ll +++ test/CodeGen/X86/avx512-intrinsics.ll @@ -3397,22 +3397,6 @@ ret void } -declare <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32>, <8 x double>, i8) - -define <8 x double>@test_int_x86_avx512_mask_cvt_dq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_512: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm1 {%k1} -; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 -; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: retq - %res = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) - %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1) - %res2 = fadd <8 x double> %res, %res1 - ret <8 x double> %res2 -} - declare <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32>, <16 x float>, i16, i32) define <16 x float>@test_int_x86_avx512_mask_cvt_dq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) { @@ -3541,23 +3525,6 @@ ret <8 x i32> %res2 } -declare <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32>, <8 x double>, i8) - -define <8 x double>@test_int_x86_avx512_mask_cvt_udq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_512: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm1 {%k1} -; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm0 -; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: retq - %res = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) - %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1) - %res2 = fadd <8 x double> %res, %res1 - ret <8 x double> %res2 -} - - declare <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32>, <16 x float>, i16, i32) define <16 x float>@test_int_x86_avx512_mask_cvt_udq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) { Index: test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll =================================================================== --- test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll +++ test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll @@ -4552,3 +4552,66 @@ ret <4 x i64> %res4 } +declare <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32>, <2 x double>, i8) + +define <2 x double>@test_int_x86_avx512_mask_cvt_dq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_128: +; CHECK: ## BB#0: +; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm2 ## encoding: [0x62,0xf1,0x7e,0x08,0xe6,0xd0] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0xe6,0xc8] +; CHECK-NEXT: vaddpd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc2] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) + %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 -1) + %res2 = fadd <2 x double> %res, %res1 + ret <2 x double> %res2 +} + +declare <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32>, <4 x double>, i8) + +define <4 x double>@test_int_x86_avx512_mask_cvt_dq2pd_256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_256: +; CHECK: ## BB#0: +; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm2 ## encoding: [0x62,0xf1,0x7e,0x28,0xe6,0xd0] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0xe6,0xc8] +; CHECK-NEXT: vaddpd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc2] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) + %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 -1) + %res2 = fadd <4 x double> %res, %res1 + ret <4 x double> %res2 +} + +declare <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32>, <2 x double>, i8) + +define <2 x double>@test_int_x86_avx512_mask_cvt_udq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_128: +; CHECK: ## BB#0: +; CHECK-NEXT: vcvtudq2pd %xmm0, %xmm2 ## encoding: [0x62,0xf1,0x7e,0x08,0x7a,0xd0] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vcvtudq2pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x7a,0xc8] +; CHECK-NEXT: vaddpd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc2] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) + %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 -1) + %res2 = fadd <2 x double> %res, %res1 + ret <2 x double> %res2 +} + +declare <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32>, <4 x double>, i8) + +define <4 x double>@test_int_x86_avx512_mask_cvt_udq2pd_256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_256: +; CHECK: ## BB#0: +; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm2 ## encoding: [0x62,0xf1,0x7e,0x28,0x7a,0xd0] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x7a,0xc8] +; CHECK-NEXT: vaddpd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc2] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) + %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 -1) + %res2 = fadd <4 x double> %res, %res1 + ret <4 x double> %res2 +} Index: test/CodeGen/X86/avx512vl-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512vl-intrinsics.ll +++ test/CodeGen/X86/avx512vl-intrinsics.ll @@ -3001,38 +3001,6 @@ ret void } -declare <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32>, <2 x double>, i8) - -define <2 x double>@test_int_x86_avx512_mask_cvt_dq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_128: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0xe6,0xc8] -; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0xe6,0xc0] -; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) - %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 -1) - %res2 = fadd <2 x double> %res, %res1 - ret <2 x double> %res2 -} - -declare <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32>, <4 x double>, i8) - -define <4 x double>@test_int_x86_avx512_mask_cvt_dq2pd_256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_256: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0xe6,0xc8] -; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0 ## encoding: [0x62,0xf1,0x7e,0x28,0xe6,0xc0] -; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) - %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 -1) - %res2 = fadd <4 x double> %res, %res1 - ret <4 x double> %res2 -} - declare <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32>, <4 x float>, i8) define <4 x float>@test_int_x86_avx512_mask_cvt_dq2ps_128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) { @@ -3385,38 +3353,6 @@ ret <8 x i32> %res2 } -declare <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32>, <2 x double>, i8) - -define <2 x double>@test_int_x86_avx512_mask_cvt_udq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_128: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vcvtudq2pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x7a,0xc8] -; CHECK-NEXT: vcvtudq2pd %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x7a,0xc0] -; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) - %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 -1) - %res2 = fadd <2 x double> %res, %res1 - ret <2 x double> %res2 -} - -declare <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32>, <4 x double>, i8) - -define <4 x double>@test_int_x86_avx512_mask_cvt_udq2pd_256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_256: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x7a,0xc8] -; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm0 ## encoding: [0x62,0xf1,0x7e,0x28,0x7a,0xc0] -; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) - %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 -1) - %res2 = fadd <4 x double> %res, %res1 - ret <4 x double> %res2 -} - declare <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32>, <4 x float>, i8) define <4 x float>@test_int_x86_avx512_mask_cvt_udq2ps_128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) { Index: test/CodeGen/X86/vec_int_to_fp.ll =================================================================== --- test/CodeGen/X86/vec_int_to_fp.ll +++ test/CodeGen/X86/vec_int_to_fp.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=VEX --check-prefix=AVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=VEX --check-prefix=AVX2 @@ -463,12 +464,7 @@ ; ; AVX512-LABEL: uitofp_2i32_to_2f64: ; AVX512: # BB#0: -; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX512-NEXT: vpextrq $1, %xmm0, %rax -; AVX512-NEXT: vcvtusi2sdq %rax, %xmm1, %xmm1 -; AVX512-NEXT: vmovq %xmm0, %rax -; AVX512-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0 -; AVX512-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512-NEXT: vcvtudq2pd %xmm0, %xmm0 ; AVX512-NEXT: retq %shuf = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %cvt = uitofp <2 x i32> %shuf to <2 x double> @@ -2388,13 +2384,8 @@ ; AVX512-LABEL: uitofp_load_2i32_to_2f64: ; AVX512: # BB#0: ; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero -; AVX512-NEXT: vpxord %xmm1, %xmm1, %xmm1 -; AVX512-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] -; AVX512-NEXT: vpextrq $1, %xmm0, %rax -; AVX512-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm1 -; AVX512-NEXT: vmovq %xmm0, %rax -; AVX512-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0 -; AVX512-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX512-NEXT: vcvtudq2pd %xmm0, %xmm0 ; AVX512-NEXT: retq %ld = load <2 x i32>, <2 x i32> *%a %cvt = uitofp <2 x i32> %ld to <2 x double>