Skip to content

Commit b57dd17

Browse files
committedNov 16, 2016
[X86][AVX512] Autoupgrade lossless i32/u32 to f64 conversion intrinsics with generic IR
Both the (V)CVTDQ2PD (i32 to f64) and (V)CVTUDQ2PD (u32 to f64) conversion instructions are lossless and can be safely represented as generic SINT_TO_FP/UINT_TO_FP calls instead of x86 intrinsics without affecting final codegen. LLVM counterpart to D26686 Differential Revision: https://reviews.llvm.org/D26736 llvm-svn: 287108
1 parent 678dd8f commit b57dd17

9 files changed

+127
-167
lines changed
 

‎llvm/include/llvm/IR/IntrinsicsX86.td

-36
Original file line numberDiff line numberDiff line change
@@ -4104,24 +4104,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
41044104

41054105
// Vector convert
41064106
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
4107-
def int_x86_avx512_mask_cvtdq2pd_128 :
4108-
GCCBuiltin<"__builtin_ia32_cvtdq2pd128_mask">,
4109-
Intrinsic<[llvm_v2f64_ty],
4110-
[llvm_v4i32_ty, llvm_v2f64_ty, llvm_i8_ty],
4111-
[IntrNoMem]>;
4112-
4113-
def int_x86_avx512_mask_cvtdq2pd_256 :
4114-
GCCBuiltin<"__builtin_ia32_cvtdq2pd256_mask">,
4115-
Intrinsic<[llvm_v4f64_ty],
4116-
[llvm_v4i32_ty, llvm_v4f64_ty, llvm_i8_ty],
4117-
[IntrNoMem]>;
4118-
4119-
def int_x86_avx512_mask_cvtdq2pd_512 :
4120-
GCCBuiltin<"__builtin_ia32_cvtdq2pd512_mask">,
4121-
Intrinsic<[llvm_v8f64_ty],
4122-
[llvm_v8i32_ty, llvm_v8f64_ty, llvm_i8_ty],
4123-
[IntrNoMem]>;
4124-
41254107
def int_x86_avx512_mask_cvtdq2ps_128 :
41264108
GCCBuiltin<"__builtin_ia32_cvtdq2ps128_mask">,
41274109
Intrinsic<[llvm_v4f32_ty],
@@ -4512,24 +4494,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
45124494
[llvm_v8f32_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
45134495
[IntrNoMem]>;
45144496

4515-
def int_x86_avx512_mask_cvtudq2pd_128 :
4516-
GCCBuiltin<"__builtin_ia32_cvtudq2pd128_mask">,
4517-
Intrinsic<[llvm_v2f64_ty],
4518-
[llvm_v4i32_ty, llvm_v2f64_ty, llvm_i8_ty],
4519-
[IntrNoMem]>;
4520-
4521-
def int_x86_avx512_mask_cvtudq2pd_256 :
4522-
GCCBuiltin<"__builtin_ia32_cvtudq2pd256_mask">,
4523-
Intrinsic<[llvm_v4f64_ty],
4524-
[llvm_v4i32_ty, llvm_v4f64_ty, llvm_i8_ty],
4525-
[IntrNoMem]>;
4526-
4527-
def int_x86_avx512_mask_cvtudq2pd_512 :
4528-
GCCBuiltin<"__builtin_ia32_cvtudq2pd512_mask">,
4529-
Intrinsic<[llvm_v8f64_ty],
4530-
[llvm_v8i32_ty, llvm_v8f64_ty, llvm_i8_ty],
4531-
[IntrNoMem]>;
4532-
45334497
def int_x86_avx512_mask_cvtudq2ps_128 :
45344498
GCCBuiltin<"__builtin_ia32_cvtudq2ps128_mask">,
45354499
Intrinsic<[llvm_v4f32_ty],

‎llvm/lib/IR/AutoUpgrade.cpp

+14-3
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,8 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
295295
Name.startswith("avx512.mask.padd.") || // Added in 4.0
296296
Name.startswith("avx512.mask.psub.") || // Added in 4.0
297297
Name.startswith("avx512.mask.pmull.") || // Added in 4.0
298+
Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
299+
Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
298300
Name == "avx512.mask.add.pd.128" || // Added in 4.0
299301
Name == "avx512.mask.add.pd.256" || // Added in 4.0
300302
Name == "avx512.mask.add.ps.128" || // Added in 4.0
@@ -821,7 +823,9 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
821823
} else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
822824
Name == "sse2.cvtps2pd" ||
823825
Name == "avx.cvtdq2.pd.256" ||
824-
Name == "avx.cvt.ps2.pd.256")) {
826+
Name == "avx.cvt.ps2.pd.256" ||
827+
Name.startswith("avx512.mask.cvtdq2pd.") ||
828+
Name.startswith("avx512.mask.cvtudq2pd."))) {
825829
// Lossless i32/float to double conversion.
826830
// Extract the bottom elements if necessary and convert to double vector.
827831
Value *Src = CI->getArgOperand(0);
@@ -837,11 +841,18 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
837841
ShuffleMask);
838842
}
839843

840-
bool Int2Double = (StringRef::npos != Name.find("cvtdq2"));
841-
if (Int2Double)
844+
bool SInt2Double = (StringRef::npos != Name.find("cvtdq2"));
845+
bool UInt2Double = (StringRef::npos != Name.find("cvtudq2"));
846+
if (SInt2Double)
842847
Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd");
848+
else if (UInt2Double)
849+
Rep = Builder.CreateUIToFP(Rep, DstTy, "cvtudq2pd");
843850
else
844851
Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
852+
853+
if (CI->getNumArgOperands() == 3)
854+
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
855+
CI->getArgOperand(1));
845856
} else if (IsX86 && Name.startswith("sse4a.movnt.")) {
846857
Module *M = F->getParent();
847858
SmallVector<Metadata *, 1> Elts;

‎llvm/lib/Target/X86/X86ISelLowering.cpp

+15-6
Original file line numberDiff line numberDiff line change
@@ -1269,6 +1269,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
12691269
setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
12701270
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
12711271
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
1272+
setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
12721273
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
12731274
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
12741275
setOperationAction(ISD::ZERO_EXTEND, MVT::v4i32, Custom);
@@ -14028,26 +14029,34 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
1402814029
SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op,
1402914030
SelectionDAG &DAG) const {
1403014031
SDValue N0 = Op.getOperand(0);
14031-
MVT SVT = N0.getSimpleValueType();
14032+
MVT VT = Op.getSimpleValueType();
14033+
MVT SrcVT = N0.getSimpleValueType();
1403214034
SDLoc dl(Op);
1403314035

14034-
if (SVT.getVectorElementType() == MVT::i1) {
14035-
if (SVT == MVT::v2i1)
14036+
if (SrcVT.getVectorElementType() == MVT::i1) {
14037+
if (SrcVT == MVT::v2i1)
1403614038
return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
1403714039
DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, N0));
14038-
MVT IntegerVT = MVT::getVectorVT(MVT::i32, SVT.getVectorNumElements());
14040+
MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements());
1403914041
return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
1404014042
DAG.getNode(ISD::ZERO_EXTEND, dl, IntegerVT, N0));
1404114043
}
1404214044

14043-
switch (SVT.SimpleTy) {
14045+
switch (SrcVT.SimpleTy) {
1404414046
default:
1404514047
llvm_unreachable("Custom UINT_TO_FP is not supported!");
14048+
case MVT::v2i32: {
14049+
if (VT == MVT::v2f64)
14050+
return DAG.getNode(X86ISD::CVTUDQ2PD, dl, VT,
14051+
DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, N0,
14052+
DAG.getUNDEF(SrcVT)));
14053+
return SDValue();
14054+
}
1404614055
case MVT::v4i8:
1404714056
case MVT::v4i16:
1404814057
case MVT::v8i8:
1404914058
case MVT::v8i16: {
14050-
MVT NVT = MVT::getVectorVT(MVT::i32, SVT.getVectorNumElements());
14059+
MVT NVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements());
1405114060
return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
1405214061
DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N0));
1405314062
}

‎llvm/lib/Target/X86/X86IntrinsicsInfo.h

-12
Original file line numberDiff line numberDiff line change
@@ -488,12 +488,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
488488
X86ISD::CONFLICT, 0),
489489
X86_INTRINSIC_DATA(avx512_mask_conflict_q_512, INTR_TYPE_1OP_MASK,
490490
X86ISD::CONFLICT, 0),
491-
X86_INTRINSIC_DATA(avx512_mask_cvtdq2pd_128, INTR_TYPE_1OP_MASK,
492-
X86ISD::CVTDQ2PD, 0),
493-
X86_INTRINSIC_DATA(avx512_mask_cvtdq2pd_256, INTR_TYPE_1OP_MASK,
494-
ISD::SINT_TO_FP, 0),
495-
X86_INTRINSIC_DATA(avx512_mask_cvtdq2pd_512, INTR_TYPE_1OP_MASK,
496-
ISD::SINT_TO_FP, 0), // no rm
497491
X86_INTRINSIC_DATA(avx512_mask_cvtdq2ps_128, INTR_TYPE_1OP_MASK,
498492
ISD::SINT_TO_FP, 0),
499493
X86_INTRINSIC_DATA(avx512_mask_cvtdq2ps_256, INTR_TYPE_1OP_MASK,
@@ -624,12 +618,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
624618
ISD::FP_TO_UINT, 0),
625619
X86_INTRINSIC_DATA(avx512_mask_cvttps2uqq_512, INTR_TYPE_1OP_MASK,
626620
ISD::FP_TO_UINT, X86ISD::CVTTP2UI_RND),
627-
X86_INTRINSIC_DATA(avx512_mask_cvtudq2pd_128, INTR_TYPE_1OP_MASK,
628-
X86ISD::CVTUDQ2PD, 0),
629-
X86_INTRINSIC_DATA(avx512_mask_cvtudq2pd_256, INTR_TYPE_1OP_MASK,
630-
ISD::UINT_TO_FP, 0),
631-
X86_INTRINSIC_DATA(avx512_mask_cvtudq2pd_512, INTR_TYPE_1OP_MASK,
632-
ISD::UINT_TO_FP, 0), // no rm
633621
X86_INTRINSIC_DATA(avx512_mask_cvtudq2ps_128, INTR_TYPE_1OP_MASK,
634622
ISD::UINT_TO_FP, 0),
635623
X86_INTRINSIC_DATA(avx512_mask_cvtudq2ps_256, INTR_TYPE_1OP_MASK,

‎llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll

+31
Original file line numberDiff line numberDiff line change
@@ -2530,3 +2530,34 @@ define <8 x i64> @test_x86_avx512_psrlv_q_memop(<8 x i64> %a0, <8 x i64>* %ptr)
25302530
ret <8 x i64> %res
25312531
}
25322532

2533+
declare <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32>, <8 x double>, i8)
2534+
2535+
define <8 x double>@test_int_x86_avx512_mask_cvt_dq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) {
2536+
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_512:
2537+
; CHECK: ## BB#0:
2538+
; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm2
2539+
; CHECK-NEXT: kmovw %edi, %k1
2540+
; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm1 {%k1}
2541+
; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm0
2542+
; CHECK-NEXT: retq
2543+
%res = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2)
2544+
%res1 = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1)
2545+
%res2 = fadd <8 x double> %res, %res1
2546+
ret <8 x double> %res2
2547+
}
2548+
2549+
declare <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32>, <8 x double>, i8)
2550+
2551+
define <8 x double>@test_int_x86_avx512_mask_cvt_udq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) {
2552+
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_512:
2553+
; CHECK: ## BB#0:
2554+
; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm2
2555+
; CHECK-NEXT: kmovw %edi, %k1
2556+
; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm1 {%k1}
2557+
; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm0
2558+
; CHECK-NEXT: retq
2559+
%res = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2)
2560+
%res1 = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1)
2561+
%res2 = fadd <8 x double> %res, %res1
2562+
ret <8 x double> %res2
2563+
}

‎llvm/test/CodeGen/X86/avx512-intrinsics.ll

-33
Original file line numberDiff line numberDiff line change
@@ -3397,22 +3397,6 @@ define void @test_int_x86_avx512_mask_pmovus_dw_mem_512(i8* %ptr, <16 x i32> %x1
33973397
ret void
33983398
}
33993399

3400-
declare <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32>, <8 x double>, i8)
3401-
3402-
define <8 x double>@test_int_x86_avx512_mask_cvt_dq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) {
3403-
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_512:
3404-
; CHECK: ## BB#0:
3405-
; CHECK-NEXT: kmovw %edi, %k1
3406-
; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm1 {%k1}
3407-
; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0
3408-
; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
3409-
; CHECK-NEXT: retq
3410-
%res = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2)
3411-
%res1 = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1)
3412-
%res2 = fadd <8 x double> %res, %res1
3413-
ret <8 x double> %res2
3414-
}
3415-
34163400
declare <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32>, <16 x float>, i16, i32)
34173401

34183402
define <16 x float>@test_int_x86_avx512_mask_cvt_dq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) {
@@ -3541,23 +3525,6 @@ define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_512(<8 x double> %x0, <8 x
35413525
ret <8 x i32> %res2
35423526
}
35433527

3544-
declare <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32>, <8 x double>, i8)
3545-
3546-
define <8 x double>@test_int_x86_avx512_mask_cvt_udq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) {
3547-
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_512:
3548-
; CHECK: ## BB#0:
3549-
; CHECK-NEXT: kmovw %edi, %k1
3550-
; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm1 {%k1}
3551-
; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm0
3552-
; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
3553-
; CHECK-NEXT: retq
3554-
%res = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2)
3555-
%res1 = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1)
3556-
%res2 = fadd <8 x double> %res, %res1
3557-
ret <8 x double> %res2
3558-
}
3559-
3560-
35613528
declare <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32>, <16 x float>, i16, i32)
35623529

35633530
define <16 x float>@test_int_x86_avx512_mask_cvt_udq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) {

‎llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll

+63
Original file line numberDiff line numberDiff line change
@@ -4552,3 +4552,66 @@ define <4 x i64>@test_int_x86_avx512_mask_psrav_q_256(<4 x i64> %x0, <4 x i64> %
45524552
ret <4 x i64> %res4
45534553
}
45544554

4555+
declare <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32>, <2 x double>, i8)
4556+
4557+
define <2 x double>@test_int_x86_avx512_mask_cvt_dq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) {
4558+
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_128:
4559+
; CHECK: ## BB#0:
4560+
; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm2 ## encoding: [0x62,0xf1,0x7e,0x08,0xe6,0xd0]
4561+
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4562+
; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0xe6,0xc8]
4563+
; CHECK-NEXT: vaddpd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc2]
4564+
; CHECK-NEXT: retq ## encoding: [0xc3]
4565+
%res = call <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 %x2)
4566+
%res1 = call <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 -1)
4567+
%res2 = fadd <2 x double> %res, %res1
4568+
ret <2 x double> %res2
4569+
}
4570+
4571+
declare <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32>, <4 x double>, i8)
4572+
4573+
define <4 x double>@test_int_x86_avx512_mask_cvt_dq2pd_256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) {
4574+
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_256:
4575+
; CHECK: ## BB#0:
4576+
; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm2 ## encoding: [0x62,0xf1,0x7e,0x28,0xe6,0xd0]
4577+
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4578+
; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0xe6,0xc8]
4579+
; CHECK-NEXT: vaddpd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc2]
4580+
; CHECK-NEXT: retq ## encoding: [0xc3]
4581+
%res = call <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 %x2)
4582+
%res1 = call <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 -1)
4583+
%res2 = fadd <4 x double> %res, %res1
4584+
ret <4 x double> %res2
4585+
}
4586+
4587+
declare <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32>, <2 x double>, i8)
4588+
4589+
define <2 x double>@test_int_x86_avx512_mask_cvt_udq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) {
4590+
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_128:
4591+
; CHECK: ## BB#0:
4592+
; CHECK-NEXT: vcvtudq2pd %xmm0, %xmm2 ## encoding: [0x62,0xf1,0x7e,0x08,0x7a,0xd0]
4593+
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4594+
; CHECK-NEXT: vcvtudq2pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x7a,0xc8]
4595+
; CHECK-NEXT: vaddpd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc2]
4596+
; CHECK-NEXT: retq ## encoding: [0xc3]
4597+
%res = call <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 %x2)
4598+
%res1 = call <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 -1)
4599+
%res2 = fadd <2 x double> %res, %res1
4600+
ret <2 x double> %res2
4601+
}
4602+
4603+
declare <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32>, <4 x double>, i8)
4604+
4605+
define <4 x double>@test_int_x86_avx512_mask_cvt_udq2pd_256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) {
4606+
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_256:
4607+
; CHECK: ## BB#0:
4608+
; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm2 ## encoding: [0x62,0xf1,0x7e,0x28,0x7a,0xd0]
4609+
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4610+
; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x7a,0xc8]
4611+
; CHECK-NEXT: vaddpd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc2]
4612+
; CHECK-NEXT: retq ## encoding: [0xc3]
4613+
%res = call <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 %x2)
4614+
%res1 = call <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 -1)
4615+
%res2 = fadd <4 x double> %res, %res1
4616+
ret <4 x double> %res2
4617+
}

‎llvm/test/CodeGen/X86/avx512vl-intrinsics.ll

-64
Original file line numberDiff line numberDiff line change
@@ -3001,38 +3001,6 @@ define void @test_int_x86_avx512_mask_pmovus_dw_mem_256(i8* %ptr, <8 x i32> %x1,
30013001
ret void
30023002
}
30033003

3004-
declare <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32>, <2 x double>, i8)
3005-
3006-
define <2 x double>@test_int_x86_avx512_mask_cvt_dq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) {
3007-
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_128:
3008-
; CHECK: ## BB#0:
3009-
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3010-
; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0xe6,0xc8]
3011-
; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0xe6,0xc0]
3012-
; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0]
3013-
; CHECK-NEXT: retq ## encoding: [0xc3]
3014-
%res = call <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 %x2)
3015-
%res1 = call <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 -1)
3016-
%res2 = fadd <2 x double> %res, %res1
3017-
ret <2 x double> %res2
3018-
}
3019-
3020-
declare <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32>, <4 x double>, i8)
3021-
3022-
define <4 x double>@test_int_x86_avx512_mask_cvt_dq2pd_256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) {
3023-
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_256:
3024-
; CHECK: ## BB#0:
3025-
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3026-
; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0xe6,0xc8]
3027-
; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0 ## encoding: [0x62,0xf1,0x7e,0x28,0xe6,0xc0]
3028-
; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0]
3029-
; CHECK-NEXT: retq ## encoding: [0xc3]
3030-
%res = call <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 %x2)
3031-
%res1 = call <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 -1)
3032-
%res2 = fadd <4 x double> %res, %res1
3033-
ret <4 x double> %res2
3034-
}
3035-
30363004
declare <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32>, <4 x float>, i8)
30373005

30383006
define <4 x float>@test_int_x86_avx512_mask_cvt_dq2ps_128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) {
@@ -3385,38 +3353,6 @@ define <8 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_256(<8 x float> %x0, <8 x
33853353
ret <8 x i32> %res2
33863354
}
33873355

3388-
declare <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32>, <2 x double>, i8)
3389-
3390-
define <2 x double>@test_int_x86_avx512_mask_cvt_udq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) {
3391-
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_128:
3392-
; CHECK: ## BB#0:
3393-
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3394-
; CHECK-NEXT: vcvtudq2pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x7a,0xc8]
3395-
; CHECK-NEXT: vcvtudq2pd %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x7a,0xc0]
3396-
; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0]
3397-
; CHECK-NEXT: retq ## encoding: [0xc3]
3398-
%res = call <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 %x2)
3399-
%res1 = call <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 -1)
3400-
%res2 = fadd <2 x double> %res, %res1
3401-
ret <2 x double> %res2
3402-
}
3403-
3404-
declare <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32>, <4 x double>, i8)
3405-
3406-
define <4 x double>@test_int_x86_avx512_mask_cvt_udq2pd_256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) {
3407-
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_256:
3408-
; CHECK: ## BB#0:
3409-
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3410-
; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x7a,0xc8]
3411-
; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm0 ## encoding: [0x62,0xf1,0x7e,0x28,0x7a,0xc0]
3412-
; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0]
3413-
; CHECK-NEXT: retq ## encoding: [0xc3]
3414-
%res = call <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 %x2)
3415-
%res1 = call <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 -1)
3416-
%res2 = fadd <4 x double> %res, %res1
3417-
ret <4 x double> %res2
3418-
}
3419-
34203356
declare <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32>, <4 x float>, i8)
34213357

34223358
define <4 x float>@test_int_x86_avx512_mask_cvt_udq2ps_128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) {

0 commit comments

Comments
 (0)
Please sign in to comment.