Index: llvm/trunk/include/llvm/IR/IntrinsicsX86.td =================================================================== --- llvm/trunk/include/llvm/IR/IntrinsicsX86.td +++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td @@ -2688,10 +2688,12 @@ // Vector convert let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx512_mask_cvtdq2ps_512 : - GCCBuiltin<"__builtin_ia32_cvtdq2ps512_mask">, - Intrinsic<[llvm_v16f32_ty], - [llvm_v16i32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], + def int_x86_avx512_sitofp_round : + Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty], + [IntrNoMem]>; + + def int_x86_avx512_uitofp_round : + Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtpd2dq_128 : @@ -2862,30 +2864,12 @@ [llvm_v8f32_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_cvtqq2pd_512 : - GCCBuiltin<"__builtin_ia32_cvtqq2pd512_mask">, - Intrinsic<[llvm_v8f64_ty], - [llvm_v8i64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_cvtqq2ps_128 : GCCBuiltin<"__builtin_ia32_cvtqq2ps128_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v2i64_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_cvtqq2ps_256 : - GCCBuiltin<"__builtin_ia32_cvtqq2ps256_mask">, - Intrinsic<[llvm_v4f32_ty], - [llvm_v4i64_ty, llvm_v4f32_ty, llvm_i8_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_cvtqq2ps_512 : - GCCBuiltin<"__builtin_ia32_cvtqq2ps512_mask">, - Intrinsic<[llvm_v8f32_ty], - [llvm_v8i64_ty, llvm_v8f32_ty, llvm_i8_ty, llvm_i32_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_cvttpd2dq_128 : GCCBuiltin<"__builtin_ia32_cvttpd2dq128_mask">, Intrinsic<[llvm_v4i32_ty], @@ -3012,36 +2996,12 @@ [llvm_v8f32_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_cvtudq2ps_512 : - GCCBuiltin<"__builtin_ia32_cvtudq2ps512_mask">, - Intrinsic<[llvm_v16f32_ty], - [llvm_v16i32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_cvtuqq2pd_512 : - GCCBuiltin<"__builtin_ia32_cvtuqq2pd512_mask">, - Intrinsic<[llvm_v8f64_ty], - [llvm_v8i64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_cvtuqq2ps_128 : GCCBuiltin<"__builtin_ia32_cvtuqq2ps128_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v2i64_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_cvtuqq2ps_256 : - GCCBuiltin<"__builtin_ia32_cvtuqq2ps256_mask">, - Intrinsic<[llvm_v4f32_ty], - [llvm_v4i64_ty, llvm_v4f32_ty, llvm_i8_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_cvtuqq2ps_512 : - GCCBuiltin<"__builtin_ia32_cvtuqq2ps512_mask">, - Intrinsic<[llvm_v8f32_ty], - [llvm_v8i64_ty, llvm_v8f32_ty, llvm_i8_ty, llvm_i32_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_rndscale_pd_128 : GCCBuiltin<"__builtin_ia32_rndscalepd_128_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i32_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; Index: llvm/trunk/lib/IR/AutoUpgrade.cpp =================================================================== --- llvm/trunk/lib/IR/AutoUpgrade.cpp +++ llvm/trunk/lib/IR/AutoUpgrade.cpp @@ -198,14 +198,14 @@ Name.startswith("avx512.mask.pmull.") || // Added in 4.0 Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0 Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0 - Name == "avx512.mask.cvtudq2ps.128" || // Added in 7.0 - Name == "avx512.mask.cvtudq2ps.256" || // Added in 7.0 - Name == "avx512.mask.cvtqq2pd.128" || // Added in 7.0 - Name == "avx512.mask.cvtqq2pd.256" || // Added in 7.0 - Name == "avx512.mask.cvtuqq2pd.128" || // Added in 7.0 - Name == "avx512.mask.cvtuqq2pd.256" || // Added in 7.0 - Name == "avx512.mask.cvtdq2ps.128" || // Added in 7.0 - Name == "avx512.mask.cvtdq2ps.256" || // Added in 7.0 + Name.startswith("avx512.mask.cvtudq2ps.") || // Added in 7.0 updated 9.0 + Name.startswith("avx512.mask.cvtqq2pd.") || // Added in 7.0 updated 9.0 + Name.startswith("avx512.mask.cvtuqq2pd.") || // Added in 7.0 updated 9.0 + Name.startswith("avx512.mask.cvtdq2ps.") || // Added in 7.0 updated 9.0 + Name == "avx512.mask.cvtqq2ps.256" || // Added in 9.0 + Name == "avx512.mask.cvtqq2ps.512" || // Added in 9.0 + Name == "avx512.mask.cvtuqq2ps.256" || // Added in 9.0 + Name == "avx512.mask.cvtuqq2ps.512" || // Added in 9.0 Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0 Name == "avx512.mask.cvtpd2ps.256" || // Added in 7.0 Name == "avx512.mask.cvttpd2dq.256" || // Added in 7.0 @@ -1958,38 +1958,47 @@ Name == "avx.cvtdq2.ps.256" || Name.startswith("avx512.mask.cvtdq2pd.") || Name.startswith("avx512.mask.cvtudq2pd.") || - Name == "avx512.mask.cvtdq2ps.128" || - Name == "avx512.mask.cvtdq2ps.256" || - Name == "avx512.mask.cvtudq2ps.128" || - Name == "avx512.mask.cvtudq2ps.256" || - Name == "avx512.mask.cvtqq2pd.128" || - Name == "avx512.mask.cvtqq2pd.256" || - Name == "avx512.mask.cvtuqq2pd.128" || - Name == "avx512.mask.cvtuqq2pd.256" || + Name.startswith("avx512.mask.cvtdq2ps.") || + Name.startswith("avx512.mask.cvtudq2ps.") || + Name.startswith("avx512.mask.cvtqq2pd.") || + Name.startswith("avx512.mask.cvtuqq2pd.") || + Name == "avx512.mask.cvtqq2ps.256" || + Name == "avx512.mask.cvtqq2ps.512" || + Name == "avx512.mask.cvtuqq2ps.256" || + Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" || Name == "avx.cvt.ps2.pd.256" || Name == "avx512.mask.cvtps2pd.128" || Name == "avx512.mask.cvtps2pd.256")) { Type *DstTy = CI->getType(); Rep = CI->getArgOperand(0); + Type *SrcTy = Rep->getType(); unsigned NumDstElts = DstTy->getVectorNumElements(); - if (NumDstElts < Rep->getType()->getVectorNumElements()) { + if (NumDstElts < SrcTy->getVectorNumElements()) { assert(NumDstElts == 2 && "Unexpected vector size"); uint32_t ShuffleMask[2] = { 0, 1 }; Rep = Builder.CreateShuffleVector(Rep, Rep, ShuffleMask); } - bool IsPS2PD = (StringRef::npos != Name.find("ps2")); + bool IsPS2PD = SrcTy->getVectorElementType()->isFloatTy(); bool IsUnsigned = (StringRef::npos != Name.find("cvtu")); if (IsPS2PD) Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd"); - else if (IsUnsigned) - Rep = Builder.CreateUIToFP(Rep, DstTy, "cvt"); - else - Rep = Builder.CreateSIToFP(Rep, DstTy, "cvt"); + else if (CI->getNumArgOperands() == 4 && + (!isa(CI->getArgOperand(3)) || + cast(CI->getArgOperand(3))->getZExtValue() != 4)) { + Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round + : Intrinsic::x86_avx512_sitofp_round; + Function *F = Intrinsic::getDeclaration(CI->getModule(), IID, + { DstTy, SrcTy }); + Rep = Builder.CreateCall(F, { Rep, CI->getArgOperand(3) }); + } else { + Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt") + : Builder.CreateSIToFP(Rep, DstTy, "cvt"); + } - if (CI->getNumArgOperands() == 3) + if (CI->getNumArgOperands() >= 3) Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1)); } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) { Index: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h +++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h @@ -513,8 +513,6 @@ X86ISD::CONFLICT, 0), X86_INTRINSIC_DATA(avx512_mask_conflict_q_512, INTR_TYPE_1OP_MASK, X86ISD::CONFLICT, 0), - X86_INTRINSIC_DATA(avx512_mask_cvtdq2ps_512, INTR_TYPE_1OP_MASK, - ISD::SINT_TO_FP, X86ISD::SINT_TO_FP_RND), //er X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_128, CVTPD2DQ_MASK, X86ISD::CVTP2SI, X86ISD::MCVTP2SI), X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_512, INTR_TYPE_1OP_MASK, @@ -567,14 +565,8 @@ X86ISD::CVTP2UI, 0), X86_INTRINSIC_DATA(avx512_mask_cvtps2uqq_512, INTR_TYPE_1OP_MASK, X86ISD::CVTP2UI, X86ISD::CVTP2UI_RND), - X86_INTRINSIC_DATA(avx512_mask_cvtqq2pd_512, INTR_TYPE_1OP_MASK, - ISD::SINT_TO_FP, X86ISD::SINT_TO_FP_RND), X86_INTRINSIC_DATA(avx512_mask_cvtqq2ps_128, CVTQQ2PS_MASK, X86ISD::CVTSI2P, X86ISD::MCVTSI2P), - X86_INTRINSIC_DATA(avx512_mask_cvtqq2ps_256, INTR_TYPE_1OP_MASK, - ISD::SINT_TO_FP, 0), - X86_INTRINSIC_DATA(avx512_mask_cvtqq2ps_512, INTR_TYPE_1OP_MASK, - ISD::SINT_TO_FP, X86ISD::SINT_TO_FP_RND), X86_INTRINSIC_DATA(avx512_mask_cvtsd2ss_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::VFPROUNDS_RND, 0), X86_INTRINSIC_DATA(avx512_mask_cvtss2sd_round, INTR_TYPE_SCALAR_MASK_RM, @@ -621,16 +613,8 @@ X86ISD::CVTTP2UI, 0), X86_INTRINSIC_DATA(avx512_mask_cvttps2uqq_512, INTR_TYPE_1OP_MASK, X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_RND), - X86_INTRINSIC_DATA(avx512_mask_cvtudq2ps_512, INTR_TYPE_1OP_MASK, - ISD::UINT_TO_FP, X86ISD::UINT_TO_FP_RND), - X86_INTRINSIC_DATA(avx512_mask_cvtuqq2pd_512, INTR_TYPE_1OP_MASK, - ISD::UINT_TO_FP, X86ISD::UINT_TO_FP_RND), X86_INTRINSIC_DATA(avx512_mask_cvtuqq2ps_128, CVTQQ2PS_MASK, X86ISD::CVTUI2P, X86ISD::MCVTUI2P), - X86_INTRINSIC_DATA(avx512_mask_cvtuqq2ps_256, INTR_TYPE_1OP_MASK, - ISD::UINT_TO_FP, 0), - X86_INTRINSIC_DATA(avx512_mask_cvtuqq2ps_512, INTR_TYPE_1OP_MASK, - ISD::UINT_TO_FP, X86ISD::UINT_TO_FP_RND), X86_INTRINSIC_DATA(avx512_mask_div_sd_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FDIVS_RND, 0), X86_INTRINSIC_DATA(avx512_mask_div_ss_round, INTR_TYPE_SCALAR_MASK_RM, @@ -1006,10 +990,12 @@ X86_INTRINSIC_DATA(avx512_rsqrt28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0), X86_INTRINSIC_DATA(avx512_rsqrt28_sd, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28S, 0), X86_INTRINSIC_DATA(avx512_rsqrt28_ss, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28S, 0), + X86_INTRINSIC_DATA(avx512_sitofp_round, INTR_TYPE_1OP, ISD::SINT_TO_FP, X86ISD::SINT_TO_FP_RND), X86_INTRINSIC_DATA(avx512_sqrt_pd_512, INTR_TYPE_1OP, ISD::FSQRT, X86ISD::FSQRT_RND), X86_INTRINSIC_DATA(avx512_sqrt_ps_512, INTR_TYPE_1OP, ISD::FSQRT, X86ISD::FSQRT_RND), X86_INTRINSIC_DATA(avx512_sub_pd_512, INTR_TYPE_2OP, ISD::FSUB, X86ISD::FSUB_RND), X86_INTRINSIC_DATA(avx512_sub_ps_512, INTR_TYPE_2OP, ISD::FSUB, X86ISD::FSUB_RND), + X86_INTRINSIC_DATA(avx512_uitofp_round, INTR_TYPE_1OP, ISD::UINT_TO_FP, X86ISD::UINT_TO_FP_RND), X86_INTRINSIC_DATA(avx512_vcomi_sd, COMI_RM, X86ISD::COMI, X86ISD::UCOMI), X86_INTRINSIC_DATA(avx512_vcomi_ss, COMI_RM, X86ISD::COMI, X86ISD::UCOMI), X86_INTRINSIC_DATA(avx512_vcvtsd2si32, INTR_TYPE_1OP, X86ISD::CVTS2SI, X86ISD::CVTS2SI_RND), Index: llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll +++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll @@ -9734,3 +9734,51 @@ %res4 = add <8 x i32> %res3, %res2 ret <8 x i32> %res4 } + +declare <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32>, <16 x float>, i16, i32) + +define <16 x float> @test_int_x86_avx512_mask_cvt_dq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) { +; X86-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_512: +; X86: ## %bb.0: +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vcvtdq2ps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5b,0xc8] +; X86-NEXT: vcvtdq2ps {rn-sae}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5b,0xc0] +; X86-NEXT: vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0] +; X86-NEXT: retl ## encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_512: +; X64: ## %bb.0: +; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vcvtdq2ps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5b,0xc8] +; X64-NEXT: vcvtdq2ps {rn-sae}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5b,0xc0] +; X64-NEXT: vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0] +; X64-NEXT: retq ## encoding: [0xc3] + %res = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4) + %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 0) + %res2 = fadd <16 x float> %res, %res1 + ret <16 x float> %res2 +} + +declare <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32>, <16 x float>, i16, i32) + +define <16 x float> @test_int_x86_avx512_mask_cvt_udq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) { +; X86-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_512: +; X86: ## %bb.0: +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vcvtudq2ps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7f,0x49,0x7a,0xc8] +; X86-NEXT: vcvtudq2ps {rn-sae}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7f,0x18,0x7a,0xc0] +; X86-NEXT: vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0] +; X86-NEXT: retl ## encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_512: +; X64: ## %bb.0: +; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vcvtudq2ps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7f,0x49,0x7a,0xc8] +; X64-NEXT: vcvtudq2ps {rn-sae}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7f,0x18,0x7a,0xc0] +; X64-NEXT: vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0] +; X64-NEXT: retq ## encoding: [0xc3] + %res = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4) + %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 0) + %res2 = fadd <16 x float> %res, %res1 + ret <16 x float> %res2 +} Index: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll @@ -3133,7 +3133,7 @@ ret void } -declare <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32>, <16 x float>, i16, i32) +declare <16 x float> @llvm.x86.avx512.sitofp.round.v16f32.v16i32(<16 x i32>, i32) define <16 x float>@test_int_x86_avx512_mask_cvt_dq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_512: @@ -3143,9 +3143,11 @@ ; CHECK-NEXT: vcvtdq2ps {rn-sae}, %zmm0, %zmm0 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 ; CHECK-NEXT: retq - %res = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4) - %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 0) - %res2 = fadd <16 x float> %res, %res1 + %cvt = sitofp <16 x i32> %x0 to <16 x float> + %1 = bitcast i16 %x2 to <16 x i1> + %2 = select <16 x i1> %1, <16 x float> %cvt, <16 x float> %x1 + %3 = call <16 x float> @llvm.x86.avx512.sitofp.round.v16f32.v16i32(<16 x i32> %x0, i32 0) + %res2 = fadd <16 x float> %2, %3 ret <16 x float> %res2 } @@ -3261,7 +3263,7 @@ ret <8 x i32> %res2 } -declare <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32>, <16 x float>, i16, i32) +declare <16 x float> @llvm.x86.avx512.uitofp.round.v16f32.v16i32(<16 x i32>, i32) define <16 x float>@test_int_x86_avx512_mask_cvt_udq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_512: @@ -3271,9 +3273,11 @@ ; CHECK-NEXT: vcvtudq2ps {rn-sae}, %zmm0, %zmm0 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 ; CHECK-NEXT: retq - %res = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4) - %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 0) - %res2 = fadd <16 x float> %res, %res1 + %cvt = uitofp <16 x i32> %x0 to <16 x float> + %1 = bitcast i16 %x2 to <16 x i1> + %2 = select <16 x i1> %1, <16 x float> %cvt, <16 x float> %x1 + %3 = call <16 x float> @llvm.x86.avx512.uitofp.round.v16f32.v16i32(<16 x i32> %x0, i32 0) + %res2 = fadd <16 x float> %2, %3 ret <16 x float> %res2 } Index: llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll +++ llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll @@ -567,3 +567,99 @@ %res1 = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 2, i16 %res) ret i16 %res1 } + +declare <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64>, <8 x double>, i8, i32) + +define <8 x double>@test_int_x86_avx512_mask_cvt_qq2pd_512(<8 x i64> %x0, <8 x double> %x1, i8 %x2) { +; X86-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_512: +; X86: # %bb.0: +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vcvtqq2pd %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x49,0xe6,0xc8] +; X86-NEXT: vcvtqq2pd {rn-sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfe,0x18,0xe6,0xc0] +; X86-NEXT: vaddpd %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_512: +; X64: # %bb.0: +; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vcvtqq2pd %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x49,0xe6,0xc8] +; X64-NEXT: vcvtqq2pd {rn-sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfe,0x18,0xe6,0xc0] +; X64-NEXT: vaddpd %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0] +; X64-NEXT: retq # encoding: [0xc3] + %res = call <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 %x2, i32 4) + %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 -1, i32 0) + %res2 = fadd <8 x double> %res, %res1 + ret <8 x double> %res2 +} + +declare <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64>, <8 x float>, i8, i32) + +define <8 x float> @test_int_x86_avx512_mask_cvt_qq2ps_512(<8 x i64> %x0, <8 x float> %x1, i8 %x2) { +; X86-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_512: +; X86: # %bb.0: +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vcvtqq2ps %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x49,0x5b,0xc8] +; X86-NEXT: vcvtqq2ps {rn-sae}, %zmm0, %ymm0 # encoding: [0x62,0xf1,0xfc,0x18,0x5b,0xc0] +; X86-NEXT: vaddps %ymm0, %ymm1, %ymm0 # encoding: [0xc5,0xf4,0x58,0xc0] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_512: +; X64: # %bb.0: +; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vcvtqq2ps %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x49,0x5b,0xc8] +; X64-NEXT: vcvtqq2ps {rn-sae}, %zmm0, %ymm0 # encoding: [0x62,0xf1,0xfc,0x18,0x5b,0xc0] +; X64-NEXT: vaddps %ymm0, %ymm1, %ymm0 # encoding: [0xc5,0xf4,0x58,0xc0] +; X64-NEXT: retq # encoding: [0xc3] + %res = call <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 %x2, i32 4) + %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 -1, i32 0) + %res2 = fadd <8 x float> %res, %res1 + ret <8 x float> %res2 +} + +declare <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64>, <8 x double>, i8, i32) + +define <8 x double> @test_int_x86_avx512_mask_cvt_uqq2pd_512(<8 x i64> %x0, <8 x double> %x1, i8 %x2) { +; X86-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_512: +; X86: # %bb.0: +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vcvtuqq2pd %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x49,0x7a,0xc8] +; X86-NEXT: vcvtuqq2pd {rn-sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfe,0x18,0x7a,0xc0] +; X86-NEXT: vaddpd %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_512: +; X64: # %bb.0: +; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vcvtuqq2pd %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x49,0x7a,0xc8] +; X64-NEXT: vcvtuqq2pd {rn-sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfe,0x18,0x7a,0xc0] +; X64-NEXT: vaddpd %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0] +; X64-NEXT: retq # encoding: [0xc3] + %res = call <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 %x2, i32 4) + %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 -1, i32 0) + %res2 = fadd <8 x double> %res, %res1 + ret <8 x double> %res2 +} + +declare <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64>, <8 x float>, i8, i32) + +define <8 x float> @test_int_x86_avx512_mask_cvt_uqq2ps_512(<8 x i64> %x0, <8 x float> %x1, i8 %x2) { +; X86-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_512: +; X86: # %bb.0: +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vcvtuqq2ps %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x7a,0xc8] +; X86-NEXT: vcvtuqq2ps {rn-sae}, %zmm0, %ymm0 # encoding: [0x62,0xf1,0xff,0x18,0x7a,0xc0] +; X86-NEXT: vaddps %ymm0, %ymm1, %ymm0 # encoding: [0xc5,0xf4,0x58,0xc0] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_512: +; X64: # %bb.0: +; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vcvtuqq2ps %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x7a,0xc8] +; X64-NEXT: vcvtuqq2ps {rn-sae}, %zmm0, %ymm0 # encoding: [0x62,0xf1,0xff,0x18,0x7a,0xc0] +; X64-NEXT: vaddps %ymm0, %ymm1, %ymm0 # encoding: [0xc5,0xf4,0x58,0xc0] +; X64-NEXT: retq # encoding: [0xc3] + %res = call <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 %x2, i32 4) + %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 -1, i32 0) + %res2 = fadd <8 x float> %res, %res1 + ret <8 x float> %res2 +} Index: llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll @@ -212,7 +212,7 @@ ret <8 x i64> %res2 } -declare <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64>, <8 x double>, i8, i32) +declare <8 x double> @llvm.x86.avx512.sitofp.round.v8f64.v8i64(<8 x i64>, i32) define <8 x double>@test_int_x86_avx512_mask_cvt_qq2pd_512(<8 x i64> %x0, <8 x double> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_512: @@ -230,13 +230,15 @@ ; X64-NEXT: vcvtqq2pd {rn-sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfe,0x18,0xe6,0xc0] ; X64-NEXT: vaddpd %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0] ; X64-NEXT: retq # encoding: [0xc3] - %res = call <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 %x2, i32 4) - %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 -1, i32 0) - %res2 = fadd <8 x double> %res, %res1 + %cvt = sitofp <8 x i64> %x0 to <8 x double> + %1 = bitcast i8 %x2 to <8 x i1> + %2 = select <8 x i1> %1, <8 x double> %cvt, <8 x double> %x1 + %3 = call <8 x double> @llvm.x86.avx512.sitofp.round.v8f64.v8i64(<8 x i64> %x0, i32 0) + %res2 = fadd <8 x double> %2, %3 ret <8 x double> %res2 } -declare <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64>, <8 x float>, i8, i32) +declare <8 x float> @llvm.x86.avx512.sitofp.round.v8f32.v8i64(<8 x i64>, i32) define <8 x float>@test_int_x86_avx512_mask_cvt_qq2ps_512(<8 x i64> %x0, <8 x float> %x1, i8 %x2) { ; X86-AVX512DQ-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_512: @@ -270,9 +272,11 @@ ; X64-AVX512DQVL-NEXT: vcvtqq2ps {rn-sae}, %zmm0, %ymm0 # encoding: [0x62,0xf1,0xfc,0x18,0x5b,0xc0] ; X64-AVX512DQVL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0] ; X64-AVX512DQVL-NEXT: retq # encoding: [0xc3] - %res = call <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 %x2, i32 4) - %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 -1, i32 0) - %res2 = fadd <8 x float> %res, %res1 + %cvt = sitofp <8 x i64> %x0 to <8 x float> + %1 = bitcast i8 %x2 to <8 x i1> + %2 = select <8 x i1> %1, <8 x float> %cvt, <8 x float> %x1 + %3 = call <8 x float> @llvm.x86.avx512.sitofp.round.v8f32.v8i64(<8 x i64> %x0, i32 0) + %res2 = fadd <8 x float> %2, %3 ret <8 x float> %res2 } @@ -372,7 +376,7 @@ ret <8 x i64> %res2 } -declare <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64>, <8 x double>, i8, i32) +declare <8 x double> @llvm.x86.avx512.uitofp.round.v8f64.v8i64(<8 x i64>, i32) define <8 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_512(<8 x i64> %x0, <8 x double> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_512: @@ -390,13 +394,15 @@ ; X64-NEXT: vcvtuqq2pd {rn-sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfe,0x18,0x7a,0xc0] ; X64-NEXT: vaddpd %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0] ; X64-NEXT: retq # encoding: [0xc3] - %res = call <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 %x2, i32 4) - %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 -1, i32 0) - %res2 = fadd <8 x double> %res, %res1 + %cvt = uitofp <8 x i64> %x0 to <8 x double> + %1 = bitcast i8 %x2 to <8 x i1> + %2 = select <8 x i1> %1, <8 x double> %cvt, <8 x double> %x1 + %3 = call <8 x double> @llvm.x86.avx512.uitofp.round.v8f64.v8i64(<8 x i64> %x0, i32 0) + %res2 = fadd <8 x double> %2, %3 ret <8 x double> %res2 } -declare <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64>, <8 x float>, i8, i32) +declare <8 x float> @llvm.x86.avx512.uitofp.round.v8f32.v8i64(<8 x i64>, i32) define <8 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_512(<8 x i64> %x0, <8 x float> %x1, i8 %x2) { ; X86-AVX512DQ-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_512: @@ -430,9 +436,11 @@ ; X64-AVX512DQVL-NEXT: vcvtuqq2ps {rn-sae}, %zmm0, %ymm0 # encoding: [0x62,0xf1,0xff,0x18,0x7a,0xc0] ; X64-AVX512DQVL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0] ; X64-AVX512DQVL-NEXT: retq # encoding: [0xc3] - %res = call <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 %x2, i32 4) - %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 -1, i32 0) - %res2 = fadd <8 x float> %res, %res1 + %cvt = uitofp <8 x i64> %x0 to <8 x float> + %1 = bitcast i8 %x2 to <8 x i1> + %2 = select <8 x i1> %1, <8 x float> %cvt, <8 x float> %x1 + %3 = call <8 x float> @llvm.x86.avx512.uitofp.round.v8f32.v8i64(<8 x i64> %x0, i32 0) + %res2 = fadd <8 x float> %2, %3 ret <8 x float> %res2 } Index: llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll +++ llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll @@ -2990,3 +2990,55 @@ %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 4, i8 %res) ret i8 %res1 } + +declare <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.256(<4 x i64>, <4 x float>, i8) + +define <4 x float> @test_int_x86_avx512_mask_cvt_qq2ps_256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) { +; X86-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_256: +; X86: # %bb.0: +; X86-NEXT: vcvtqq2ps %ymm0, %xmm2 # encoding: [0x62,0xf1,0xfc,0x28,0x5b,0xd0] +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vcvtqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x29,0x5b,0xc8] +; X86-NEXT: vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2] +; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_256: +; X64: # %bb.0: +; X64-NEXT: vcvtqq2ps %ymm0, %xmm2 # encoding: [0x62,0xf1,0xfc,0x28,0x5b,0xd0] +; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vcvtqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x29,0x5b,0xc8] +; X64-NEXT: vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2] +; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; X64-NEXT: retq # encoding: [0xc3] + %res = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) + %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 -1) + %res2 = fadd <4 x float> %res, %res1 + ret <4 x float> %res2 +} + +declare <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.256(<4 x i64>, <4 x float>, i8) + +define <4 x float> @test_int_x86_avx512_mask_cvt_uqq2ps_256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) { +; X86-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_256: +; X86: # %bb.0: +; X86-NEXT: vcvtuqq2ps %ymm0, %xmm2 # encoding: [0x62,0xf1,0xff,0x28,0x7a,0xd0] +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vcvtuqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x7a,0xc8] +; X86-NEXT: vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2] +; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_256: +; X64: # %bb.0: +; X64-NEXT: vcvtuqq2ps %ymm0, %xmm2 # encoding: [0x62,0xf1,0xff,0x28,0x7a,0xd0] +; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vcvtuqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x7a,0xc8] +; X64-NEXT: vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2] +; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; X64-NEXT: retq # encoding: [0xc3] + %res = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) + %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 -1) + %res2 = fadd <4 x float> %res, %res1 + ret <4 x float> %res2 +} Index: llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics.ll @@ -242,29 +242,30 @@ ret <4 x float> %res4 } -declare <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.256(<4 x i64>, <4 x float>, i8) - define <4 x float>@test_int_x86_avx512_mask_cvt_qq2ps_256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_256: ; X86: # %bb.0: +; X86-NEXT: vcvtqq2ps %ymm0, %xmm2 # encoding: [0x62,0xf1,0xfc,0x28,0x5b,0xd0] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] ; X86-NEXT: vcvtqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x29,0x5b,0xc8] -; X86-NEXT: vcvtqq2ps %ymm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x28,0x5b,0xc0] -; X86-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] +; X86-NEXT: vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2] ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_256: ; X64: # %bb.0: +; X64-NEXT: vcvtqq2ps %ymm0, %xmm2 # encoding: [0x62,0xf1,0xfc,0x28,0x5b,0xd0] ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] ; X64-NEXT: vcvtqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x29,0x5b,0xc8] -; X64-NEXT: vcvtqq2ps %ymm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x28,0x5b,0xc0] -; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] +; X64-NEXT: vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2] ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64-NEXT: retq # encoding: [0xc3] - %res = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) - %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 -1) - %res2 = fadd <4 x float> %res, %res1 + %cvt1 = sitofp <4 x i64> %x0 to <4 x float> + %1 = bitcast i8 %x2 to <8 x i1> + %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> + %2 = select <4 x i1> %extract, <4 x float> %cvt1, <4 x float> %x1 + %cvt = sitofp <4 x i64> %x0 to <4 x float> + %res2 = fadd <4 x float> %2, %cvt ret <4 x float> %res2 } @@ -465,24 +466,27 @@ define <4 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_256: ; X86: # %bb.0: +; X86-NEXT: vcvtuqq2ps %ymm0, %xmm2 # encoding: [0x62,0xf1,0xff,0x28,0x7a,0xd0] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] ; X86-NEXT: vcvtuqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x7a,0xc8] -; X86-NEXT: vcvtuqq2ps %ymm0, %xmm0 # encoding: [0x62,0xf1,0xff,0x28,0x7a,0xc0] -; X86-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] +; X86-NEXT: vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2] ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_256: ; X64: # %bb.0: +; X64-NEXT: vcvtuqq2ps %ymm0, %xmm2 # encoding: [0x62,0xf1,0xff,0x28,0x7a,0xd0] ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] ; X64-NEXT: vcvtuqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x7a,0xc8] -; X64-NEXT: vcvtuqq2ps %ymm0, %xmm0 # encoding: [0x62,0xf1,0xff,0x28,0x7a,0xc0] -; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] +; X64-NEXT: vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2] ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64-NEXT: retq # encoding: [0xc3] - %res = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) - %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 -1) - %res2 = fadd <4 x float> %res, %res1 + %cvt1 = uitofp <4 x i64> %x0 to <4 x float> + %1 = bitcast i8 %x2 to <8 x i1> + %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> + %2 = select <4 x i1> %extract, <4 x float> %cvt1, <4 x float> %x1 + %cvt = uitofp <4 x i64> %x0 to <4 x float> + %res2 = fadd <4 x float> %2, %cvt ret <4 x float> %res2 }