Skip to content

Commit f334ac1

Browse files
committedNov 9, 2016
[AVX-512] Add lowering to cvttpd2udq/cvttps2udq for fptoui v2f64/2f32 to 2i32
This patch adds support for fptoui to 2i32 from both 2f64 and 2f32, building on Simon's change for the signed version in r284459 and using AVX-512 instructions. If we don't have VLX support we need to use a 512-bit operation for v2f64->v2i32 and extract the result. It also recognises that cvttpd2udq zeroes the upper 64-bits of the xmm result. Differential Revision: https://reviews.llvm.org/D26331 llvm-svn: 286345
1 parent 731bf9c commit f334ac1

File tree

6 files changed

+53
-51
lines changed

6 files changed

+53
-51
lines changed
 

‎llvm/lib/Target/X86/X86ISelLowering.cpp

+10-3
Original file line numberDiff line numberDiff line change
@@ -1193,6 +1193,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
11931193
setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
11941194
setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
11951195
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
1196+
setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
11961197
setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
11971198
setOperationAction(ISD::SINT_TO_FP, MVT::v8i1, Custom);
11981199
setOperationAction(ISD::SINT_TO_FP, MVT::v16i1, Custom);
@@ -22358,12 +22359,16 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
2235822359
case ISD::FP_TO_UINT: {
2235922360
bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
2236022361

22361-
if (IsSigned && N->getValueType(0) == MVT::v2i32) {
22362+
if (N->getValueType(0) == MVT::v2i32) {
22363+
assert((IsSigned || Subtarget.hasAVX512()) &&
22364+
"Can only handle signed conversion without AVX512");
2236222365
assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
2236322366
SDValue Src = N->getOperand(0);
2236422367
if (Src.getValueType() == MVT::v2f64) {
2236522368
SDValue Idx = DAG.getIntPtrConstant(0, dl);
22366-
SDValue Res = DAG.getNode(X86ISD::CVTTPD2DQ, dl, MVT::v4i32, Src);
22369+
SDValue Res = DAG.getNode(IsSigned ? X86ISD::CVTTPD2DQ
22370+
: X86ISD::CVTTPD2UDQ,
22371+
dl, MVT::v4i32, Src);
2236722372
Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, Idx);
2236822373
Results.push_back(Res);
2236922374
return;
@@ -22372,7 +22377,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
2237222377
SDValue Idx = DAG.getIntPtrConstant(0, dl);
2237322378
SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
2237422379
DAG.getUNDEF(MVT::v2f32));
22375-
Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, Res);
22380+
Res = DAG.getNode(IsSigned ? ISD::FP_TO_SINT
22381+
: ISD::FP_TO_UINT, dl, MVT::v4i32, Res);
2237622382
Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, Idx);
2237722383
Results.push_back(Res);
2237822384
return;
@@ -22700,6 +22706,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
2270022706
case X86ISD::VFPROUND_RND: return "X86ISD::VFPROUND_RND";
2270122707
case X86ISD::VFPROUNDS_RND: return "X86ISD::VFPROUNDS_RND";
2270222708
case X86ISD::CVTTPD2DQ: return "X86ISD::CVTTPD2DQ";
22709+
case X86ISD::CVTTPD2UDQ: return "X86ISD::CVTTPD2UDQ";
2270322710
case X86ISD::CVTDQ2PD: return "X86ISD::CVTDQ2PD";
2270422711
case X86ISD::CVTUDQ2PD: return "X86ISD::CVTUDQ2PD";
2270522712
case X86ISD::CVT2MASK: return "X86ISD::CVT2MASK";

‎llvm/lib/Target/X86/X86ISelLowering.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -302,8 +302,8 @@ namespace llvm {
302302
// Vector FP round.
303303
VFPROUND, VFPROUND_RND, VFPROUNDS_RND,
304304

305-
// Vector double to signed integer (truncated).
306-
CVTTPD2DQ,
305+
// Vector double to signed/unsigned integer (truncated).
306+
CVTTPD2DQ, CVTTPD2UDQ,
307307

308308
// Vector signed/unsigned integer to double.
309309
CVTDQ2PD, CVTUDQ2PD,

‎llvm/lib/Target/X86/X86InstrAVX512.td

+10-2
Original file line numberDiff line numberDiff line change
@@ -6314,8 +6314,8 @@ defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", fp_to_uint,
63146314
X86cvttp2uiRnd>, PS,
63156315
EVEX_CD8<32, CD8VF>;
63166316

6317-
defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", fp_to_uint, fp_to_uint,
6318-
X86cvttp2uiRnd>, PS, VEX_W,
6317+
defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", fp_to_uint,
6318+
X86cvttpd2udq, X86cvttp2uiRnd>, PS, VEX_W,
63196319
EVEX_CD8<64, CD8VF>;
63206320

63216321
defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp, X86cvtudq2pd>,
@@ -6395,6 +6395,11 @@ def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src1))),
63956395
(v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
63966396
VR256X:$src1, sub_ymm)))), sub_xmm)>;
63976397

6398+
def : Pat<(v4i32 (X86cvttpd2udq (v2f64 VR128X:$src))),
6399+
(EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr
6400+
(v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
6401+
VR128X:$src, sub_xmm)))), sub_xmm)>;
6402+
63986403
def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
63996404
(EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
64006405
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
@@ -6416,6 +6421,9 @@ let Predicates = [HasAVX512, HasVLX] in {
64166421
def : Pat<(X86vzmovl (v2i64 (bitconvert
64176422
(v4i32 (X86cvttpd2dq (v2f64 VR128X:$src)))))),
64186423
(VCVTTPD2DQZ128rr VR128:$src)>;
6424+
def : Pat<(v4i32 (bitconvert (X86vzmovl (v2i64 (bitconvert
6425+
(v4i32 (X86cvttpd2udq (v2f64 VR128X:$src)))))))),
6426+
(VCVTTPD2UDQZ128rr VR128:$src)>;
64196427
}
64206428

64216429
let Predicates = [HasAVX512] in {

‎llvm/lib/Target/X86/X86InstrFragmentsSIMD.td

+3
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@ def X86cmps : SDNode<"X86ISD::FSETCC", SDTX86Cmps>;
7070
def X86cvttpd2dq: SDNode<"X86ISD::CVTTPD2DQ",
7171
SDTypeProfile<1, 1, [SDTCisVT<0, v4i32>,
7272
SDTCisVT<1, v2f64>]>>;
73+
def X86cvttpd2udq: SDNode<"X86ISD::CVTTPD2UDQ",
74+
SDTypeProfile<1, 1, [SDTCisVT<0, v4i32>,
75+
SDTCisVT<1, v2f64>]>>;
7376
def X86cvtdq2pd: SDNode<"X86ISD::CVTDQ2PD",
7477
SDTypeProfile<1, 1, [SDTCisVT<0, v2f64>,
7578
SDTCisVT<1, v4i32>]>>;

‎llvm/lib/Target/X86/X86IntrinsicsInfo.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -586,7 +586,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
586586
X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_512, INTR_TYPE_1OP_MASK,
587587
ISD::FP_TO_SINT, X86ISD::CVTTP2SI_RND),
588588
X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_128, INTR_TYPE_1OP_MASK,
589-
ISD::FP_TO_UINT, 0),
589+
X86ISD::CVTTPD2UDQ, 0),
590590
X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_256, INTR_TYPE_1OP_MASK,
591591
ISD::FP_TO_UINT, 0),
592592
X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_512, INTR_TYPE_1OP_MASK,

‎llvm/test/CodeGen/X86/vec_fp_to_int.ll

+27-43
Original file line numberDiff line numberDiff line change
@@ -407,33 +407,19 @@ define <4 x i32> @fptoui_2f64_to_4i32(<2 x double> %a) {
407407
;
408408
; AVX512F-LABEL: fptoui_2f64_to_4i32:
409409
; AVX512F: # BB#0:
410-
; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax
411-
; AVX512F-NEXT: vmovq %rax, %xmm1
412-
; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
413-
; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax
414-
; AVX512F-NEXT: vmovq %rax, %xmm0
415-
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
416-
; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
410+
; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
411+
; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0
417412
; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
418413
; AVX512F-NEXT: retq
419414
;
420415
; AVX512VL-LABEL: fptoui_2f64_to_4i32:
421416
; AVX512VL: # BB#0:
422-
; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax
423-
; AVX512VL-NEXT: vmovq %rax, %xmm1
424-
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
425-
; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax
426-
; AVX512VL-NEXT: vmovq %rax, %xmm0
427-
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
428-
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
429-
; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
417+
; AVX512VL-NEXT: vcvttpd2udq %xmm0, %xmm0
430418
; AVX512VL-NEXT: retq
431419
;
432420
; AVX512VLDQ-LABEL: fptoui_2f64_to_4i32:
433421
; AVX512VLDQ: # BB#0:
434-
; AVX512VLDQ-NEXT: vcvttpd2uqq %xmm0, %xmm0
435-
; AVX512VLDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
436-
; AVX512VLDQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
422+
; AVX512VLDQ-NEXT: vcvttpd2udq %xmm0, %xmm0
437423
; AVX512VLDQ-NEXT: retq
438424
%cvt = fptoui <2 x double> %a to <2 x i32>
439425
%ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -491,30 +477,19 @@ define <4 x i32> @fptoui_2f64_to_2i32(<2 x double> %a) {
491477
;
492478
; AVX512F-LABEL: fptoui_2f64_to_2i32:
493479
; AVX512F: # BB#0:
494-
; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax
495-
; AVX512F-NEXT: vmovq %rax, %xmm1
496-
; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
497-
; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax
498-
; AVX512F-NEXT: vmovq %rax, %xmm0
499-
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
500-
; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
480+
; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
481+
; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0
482+
; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
501483
; AVX512F-NEXT: retq
502484
;
503485
; AVX512VL-LABEL: fptoui_2f64_to_2i32:
504486
; AVX512VL: # BB#0:
505-
; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax
506-
; AVX512VL-NEXT: vmovq %rax, %xmm1
507-
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
508-
; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax
509-
; AVX512VL-NEXT: vmovq %rax, %xmm0
510-
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
511-
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
487+
; AVX512VL-NEXT: vcvttpd2udq %xmm0, %xmm0
512488
; AVX512VL-NEXT: retq
513489
;
514490
; AVX512VLDQ-LABEL: fptoui_2f64_to_2i32:
515491
; AVX512VLDQ: # BB#0:
516-
; AVX512VLDQ-NEXT: vcvttpd2uqq %xmm0, %xmm0
517-
; AVX512VLDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
492+
; AVX512VLDQ-NEXT: vcvttpd2udq %xmm0, %xmm0
518493
; AVX512VLDQ-NEXT: retq
519494
%cvt = fptoui <2 x double> %a to <2 x i32>
520495
%ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
@@ -1250,15 +1225,24 @@ define <2 x i32> @fptoui_2f32_to_2i32(<2 x float> %a) {
12501225
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
12511226
; AVX-NEXT: retq
12521227
;
1253-
; AVX512-LABEL: fptoui_2f32_to_2i32:
1254-
; AVX512: # BB#0:
1255-
; AVX512-NEXT: vcvttss2usi %xmm0, %rax
1256-
; AVX512-NEXT: vmovq %rax, %xmm1
1257-
; AVX512-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1258-
; AVX512-NEXT: vcvttss2usi %xmm0, %rax
1259-
; AVX512-NEXT: vmovq %rax, %xmm0
1260-
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1261-
; AVX512-NEXT: retq
1228+
; AVX512F-LABEL: fptoui_2f32_to_2i32:
1229+
; AVX512F: # BB#0:
1230+
; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
1231+
; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0
1232+
; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1233+
; AVX512F-NEXT: retq
1234+
;
1235+
; AVX512VL-LABEL: fptoui_2f32_to_2i32:
1236+
; AVX512VL: # BB#0:
1237+
; AVX512VL-NEXT: vcvttps2udq %xmm0, %xmm0
1238+
; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1239+
; AVX512VL-NEXT: retq
1240+
;
1241+
; AVX512VLDQ-LABEL: fptoui_2f32_to_2i32:
1242+
; AVX512VLDQ: # BB#0:
1243+
; AVX512VLDQ-NEXT: vcvttps2udq %xmm0, %xmm0
1244+
; AVX512VLDQ-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1245+
; AVX512VLDQ-NEXT: retq
12621246
%cvt = fptoui <2 x float> %a to <2 x i32>
12631247
ret <2 x i32> %cvt
12641248
}

0 commit comments

Comments
 (0)