Index: llvm/trunk/lib/Target/X86/X86InstrAVX512.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrAVX512.td +++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td @@ -7561,8 +7561,7 @@ // Convert Double to Signed/Unsigned Doubleword with truncation multiclass avx512_cvttpd2dq opc, string OpcodeStr, SDNode OpNode, - SDNode OpNode128, SDNode OpNodeRnd, - X86SchedWriteWidths sched> { + SDNode OpNodeRnd, X86SchedWriteWidths sched> { let Predicates = [HasAVX512] in { defm Z : avx512_vcvt_fp, @@ -7575,7 +7574,7 @@ // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly // due to the same reason. defm Z128 : avx512_vcvt_fp, EVEX_V128; + OpNode, sched.XMM, "{1to2}", "{x}">, EVEX_V128; defm Z256 : avx512_vcvt_fp, EVEX_V256; @@ -7692,8 +7691,7 @@ // Convert Float to Signed/Unsigned Quardword with truncation multiclass avx512_cvttps2qq opc, string OpcodeStr, SDNode OpNode, - SDNode OpNode128, SDNode OpNodeRnd, - X86SchedWriteWidths sched> { + SDNode OpNodeRnd, X86SchedWriteWidths sched> { let Predicates = [HasDQI] in { defm Z : avx512_vcvt_fp, avx512_vcvt_fp_sae, EVEX_V128; defm Z256 : avx512_vcvt_fp, EVEX_V256; @@ -7747,20 +7745,20 @@ X86VSintToFpRnd, SchedWriteCvtDQ2PS>, PS, EVEX_CD8<32, CD8VF>; -defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", fp_to_sint, +defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86cvttp2si, X86cvttp2siRnd, SchedWriteCvtPS2DQ>, XS, EVEX_CD8<32, CD8VF>; -defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", fp_to_sint, X86cvttp2si, +defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86cvttp2si, X86cvttp2siRnd, SchedWriteCvtPD2DQ>, PD, VEX_W, EVEX_CD8<64, CD8VF>; -defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", fp_to_uint, +defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86cvttp2ui, X86cvttp2uiRnd, SchedWriteCvtPS2DQ>, PS, EVEX_CD8<32, CD8VF>; -defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", fp_to_uint, - X86cvttp2ui, X86cvttp2uiRnd, SchedWriteCvtPD2DQ>, +defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86cvttp2ui, + X86cvttp2uiRnd, SchedWriteCvtPD2DQ>, PS, VEX_W, EVEX_CD8<64, CD8VF>; defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp, @@ -7803,19 +7801,19 @@ X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD, EVEX_CD8<32, CD8VH>; -defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", fp_to_sint, +defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86cvttp2si, X86cvttp2siRnd, SchedWriteCvtPD2DQ>, VEX_W, PD, EVEX_CD8<64, CD8VF>; -defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", fp_to_sint, X86cvttp2si, +defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86cvttp2si, X86cvttp2siRnd, SchedWriteCvtPS2DQ>, PD, EVEX_CD8<32, CD8VH>; -defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", fp_to_uint, +defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86cvttp2ui, X86cvttp2uiRnd, SchedWriteCvtPD2DQ>, VEX_W, PD, EVEX_CD8<64, CD8VF>; -defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", fp_to_uint, X86cvttp2ui, +defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86cvttp2ui, X86cvttp2uiRnd, SchedWriteCvtPS2DQ>, PD, EVEX_CD8<32, CD8VH>; @@ -7835,6 +7833,114 @@ X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, XD, EVEX_CD8<64, CD8VF>; +let Predicates = [HasAVX512] in { + def : Pat<(v16i32 (fp_to_sint (v16f32 VR512:$src))), + (VCVTTPS2DQZrr VR512:$src)>; + def : Pat<(v16i32 (fp_to_sint (loadv16f32 addr:$src))), + (VCVTTPS2DQZrm addr:$src)>; + + def : Pat<(v16i32 (fp_to_uint (v16f32 VR512:$src))), + (VCVTTPS2UDQZrr VR512:$src)>; + def : Pat<(v16i32 (fp_to_uint (loadv16f32 addr:$src))), + (VCVTTPS2UDQZrm addr:$src)>; + + def : Pat<(v8i32 (fp_to_sint (v8f64 VR512:$src))), + (VCVTTPD2DQZrr VR512:$src)>; + def : Pat<(v8i32 (fp_to_sint (loadv8f64 addr:$src))), + (VCVTTPD2DQZrm addr:$src)>; + + def : Pat<(v8i32 (fp_to_uint (v8f64 VR512:$src))), + (VCVTTPD2UDQZrr VR512:$src)>; + def : Pat<(v8i32 (fp_to_uint (loadv8f64 addr:$src))), + (VCVTTPD2UDQZrm addr:$src)>; +} + +let Predicates = [HasVLX] in { + def : Pat<(v4i32 (fp_to_sint (v4f32 VR128X:$src))), + (VCVTTPS2DQZ128rr VR128X:$src)>; + def : Pat<(v4i32 (fp_to_sint (loadv4f32 addr:$src))), + (VCVTTPS2DQZ128rm addr:$src)>; + + def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src))), + (VCVTTPS2UDQZ128rr VR128X:$src)>; + def : Pat<(v4i32 (fp_to_uint (loadv4f32 addr:$src))), + (VCVTTPS2UDQZ128rm addr:$src)>; + + def : Pat<(v8i32 (fp_to_sint (v8f32 VR256X:$src))), + (VCVTTPS2DQZ256rr VR256X:$src)>; + def : Pat<(v8i32 (fp_to_sint (loadv8f32 addr:$src))), + (VCVTTPS2DQZ256rm addr:$src)>; + + def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src))), + (VCVTTPS2UDQZ256rr VR256X:$src)>; + def : Pat<(v8i32 (fp_to_uint (loadv8f32 addr:$src))), + (VCVTTPS2UDQZ256rm addr:$src)>; + + def : Pat<(v4i32 (fp_to_sint (v4f64 VR256X:$src))), + (VCVTTPD2DQZ256rr VR256X:$src)>; + def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))), + (VCVTTPD2DQZ256rm addr:$src)>; + + def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src))), + (VCVTTPD2UDQZ256rr VR256X:$src)>; + def : Pat<(v4i32 (fp_to_uint (loadv4f64 addr:$src))), + (VCVTTPD2UDQZ256rm addr:$src)>; +} + +let Predicates = [HasDQI] in { + def : Pat<(v8i64 (fp_to_sint (v8f32 VR256X:$src))), + (VCVTTPS2QQZrr VR256X:$src)>; + def : Pat<(v8i64 (fp_to_sint (loadv8f32 addr:$src))), + (VCVTTPS2QQZrm addr:$src)>; + + def : Pat<(v8i64 (fp_to_uint (v8f32 VR256X:$src))), + (VCVTTPS2UQQZrr VR256X:$src)>; + def : Pat<(v8i64 (fp_to_uint (loadv8f32 addr:$src))), + (VCVTTPS2UQQZrm addr:$src)>; + + def : Pat<(v8i64 (fp_to_sint (v8f64 VR512:$src))), + (VCVTTPD2QQZrr VR512:$src)>; + def : Pat<(v8i64 (fp_to_sint (loadv8f64 addr:$src))), + (VCVTTPD2QQZrm addr:$src)>; + + def : Pat<(v8i64 (fp_to_uint (v8f64 VR512:$src))), + (VCVTTPD2UQQZrr VR512:$src)>; + def : Pat<(v8i64 (fp_to_uint (loadv8f64 addr:$src))), + (VCVTTPD2UQQZrm addr:$src)>; +} + +let Predicates = [HasDQI, HasVLX] in { + def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src))), + (VCVTTPS2QQZ256rr VR128X:$src)>; + def : Pat<(v4i64 (fp_to_sint (loadv4f32 addr:$src))), + (VCVTTPS2QQZ256rm addr:$src)>; + + def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src))), + (VCVTTPS2UQQZ256rr VR128X:$src)>; + def : Pat<(v4i64 (fp_to_uint (loadv4f32 addr:$src))), + (VCVTTPS2UQQZ256rm addr:$src)>; + + def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src))), + (VCVTTPD2QQZ128rr VR128X:$src)>; + def : Pat<(v2i64 (fp_to_sint (loadv2f64 addr:$src))), + (VCVTTPD2QQZ128rm addr:$src)>; + + def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src))), + (VCVTTPD2UQQZ128rr VR128X:$src)>; + def : Pat<(v2i64 (fp_to_uint (loadv2f64 addr:$src))), + (VCVTTPD2UQQZ128rm addr:$src)>; + + def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src))), + (VCVTTPD2QQZ256rr VR256X:$src)>; + def : Pat<(v4i64 (fp_to_sint (loadv4f64 addr:$src))), + (VCVTTPD2QQZ256rm addr:$src)>; + + def : Pat<(v4i64 (fp_to_uint (v4f64 VR256X:$src))), + (VCVTTPD2UQQZ256rr VR256X:$src)>; + def : Pat<(v4i64 (fp_to_uint (loadv4f64 addr:$src))), + (VCVTTPD2UQQZ256rm addr:$src)>; +} + let Predicates = [HasAVX512, NoVLX] in { def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))), (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr Index: llvm/trunk/lib/Target/X86/X86InstrMMX.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrMMX.td +++ llvm/trunk/lib/Target/X86/X86InstrMMX.td @@ -603,6 +603,9 @@ (bc_v2i64 (v4i32 (X86cvtp2Int (v4f32 VR128:$src)))))), (MMX_CVTPS2PIirr VR128:$src)>; def : Pat<(x86mmx (MMX_X86movdq2q + (bc_v2i64 (v4i32 (X86cvttp2si (v4f32 VR128:$src)))))), + (MMX_CVTTPS2PIirr VR128:$src)>; +def : Pat<(x86mmx (MMX_X86movdq2q (bc_v2i64 (v4i32 (fp_to_sint (v4f32 VR128:$src)))))), (MMX_CVTTPS2PIirr VR128:$src)>; def : Pat<(x86mmx (MMX_X86movdq2q Index: llvm/trunk/lib/Target/X86/X86InstrSSE.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrSSE.td +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td @@ -1574,37 +1574,55 @@ def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v4i32 (fp_to_sint (v4f32 VR128:$src))))]>, + (v4i32 (X86cvttp2si (v4f32 VR128:$src))))]>, VEX, Sched<[WriteCvtPS2I]>, VEX_WIG; def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v4i32 (fp_to_sint (loadv4f32 addr:$src))))]>, + (v4i32 (X86cvttp2si (loadv4f32 addr:$src))))]>, VEX, Sched<[WriteCvtPS2ILd]>, VEX_WIG; def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, - (v8i32 (fp_to_sint (v8f32 VR256:$src))))]>, + (v8i32 (X86cvttp2si (v8f32 VR256:$src))))]>, VEX, VEX_L, Sched<[WriteCvtPS2IY]>, VEX_WIG; def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, - (v8i32 (fp_to_sint (loadv8f32 addr:$src))))]>, + (v8i32 (X86cvttp2si (loadv8f32 addr:$src))))]>, VEX, VEX_L, Sched<[WriteCvtPS2IYLd]>, VEX_WIG; } +let Predicates = [HasAVX, NoVLX] in { + def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), + (VCVTTPS2DQrr VR128:$src)>; + def : Pat<(v4i32 (fp_to_sint (loadv4f32 addr:$src))), + (VCVTTPS2DQrm addr:$src)>; + def : Pat<(v8i32 (fp_to_sint (v8f32 VR256:$src))), + (VCVTTPS2DQYrr VR256:$src)>; + def : Pat<(v8i32 (fp_to_sint (loadv8f32 addr:$src))), + (VCVTTPS2DQYrm addr:$src)>; +} + def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v4i32 (fp_to_sint (v4f32 VR128:$src))))]>, + (v4i32 (X86cvttp2si (v4f32 VR128:$src))))]>, Sched<[WriteCvtPS2I]>; def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v4i32 (fp_to_sint (memopv4f32 addr:$src))))]>, + (v4i32 (X86cvttp2si (memopv4f32 addr:$src))))]>, Sched<[WriteCvtPS2ILd]>; +let Predicates = [UseSSE2] in { + def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), + (CVTTPS2DQrr VR128:$src)>; + def : Pat<(v4i32 (fp_to_sint (memopv4f32 addr:$src))), + (CVTTPS2DQrm addr:$src)>; +} + let Predicates = [HasAVX, NoVLX] in def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", @@ -1619,6 +1637,7 @@ // XMM only def : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}", (VCVTTPD2DQrr VR128:$dst, VR128:$src), 0>; + let Predicates = [HasAVX, NoVLX] in def VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttpd2dq{x}\t{$src, $dst|$dst, $src}", @@ -1633,12 +1652,12 @@ def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v4i32 (fp_to_sint (v4f64 VR256:$src))))]>, + (v4i32 (X86cvttp2si (v4f64 VR256:$src))))]>, VEX, VEX_L, Sched<[WriteCvtPD2IY]>, VEX_WIG; def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v4i32 (fp_to_sint (loadv4f64 addr:$src))))]>, + (v4i32 (X86cvttp2si (loadv4f64 addr:$src))))]>, VEX, VEX_L, Sched<[WriteCvtPD2IYLd]>, VEX_WIG; } def : InstAlias<"vcvttpd2dqy\t{$src, $dst|$dst, $src}", @@ -1647,6 +1666,13 @@ (VCVTTPD2DQYrm VR128:$dst, f256mem:$src), 0, "intel">; let Predicates = [HasAVX, NoVLX] in { + def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))), + (VCVTTPD2DQYrr VR256:$src)>; + def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))), + (VCVTTPD2DQYrm addr:$src)>; +} + +let Predicates = [HasAVX, NoVLX] in { let AddedComplexity = 15 in { def : Pat<(X86vzmovl (v2i64 (bitconvert (v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))), Index: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h +++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h @@ -302,8 +302,8 @@ X86_INTRINSIC_DATA(avx_cvt_pd2_ps_256,CVTPD2PS, ISD::FP_ROUND, 0), X86_INTRINSIC_DATA(avx_cvt_pd2dq_256, INTR_TYPE_1OP, X86ISD::CVTP2SI, 0), X86_INTRINSIC_DATA(avx_cvt_ps2dq_256, INTR_TYPE_1OP, X86ISD::CVTP2SI, 0), - X86_INTRINSIC_DATA(avx_cvtt_pd2dq_256,INTR_TYPE_1OP, ISD::FP_TO_SINT, 0), - X86_INTRINSIC_DATA(avx_cvtt_ps2dq_256,INTR_TYPE_1OP, ISD::FP_TO_SINT, 0), + X86_INTRINSIC_DATA(avx_cvtt_pd2dq_256,INTR_TYPE_1OP, X86ISD::CVTTP2SI, 0), + X86_INTRINSIC_DATA(avx_cvtt_ps2dq_256,INTR_TYPE_1OP, X86ISD::CVTTP2SI, 0), X86_INTRINSIC_DATA(avx_hadd_pd_256, INTR_TYPE_2OP, X86ISD::FHADD, 0), X86_INTRINSIC_DATA(avx_hadd_ps_256, INTR_TYPE_2OP, X86ISD::FHADD, 0), X86_INTRINSIC_DATA(avx_hsub_pd_256, INTR_TYPE_2OP, X86ISD::FHSUB, 0), @@ -533,45 +533,45 @@ X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_128, INTR_TYPE_1OP_MASK, X86ISD::CVTTP2SI, 0), X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_512, INTR_TYPE_1OP_MASK, - ISD::FP_TO_SINT, X86ISD::CVTTP2SI_RND), + X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_RND), X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_128, INTR_TYPE_1OP_MASK, - ISD::FP_TO_SINT, 0), + X86ISD::CVTTP2SI, 0), X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_256, INTR_TYPE_1OP_MASK, - ISD::FP_TO_SINT, 0), + X86ISD::CVTTP2SI, 0), X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_512, INTR_TYPE_1OP_MASK, - ISD::FP_TO_SINT, X86ISD::CVTTP2SI_RND), + X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_RND), X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_128, INTR_TYPE_1OP_MASK, X86ISD::CVTTP2UI, 0), X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_256, INTR_TYPE_1OP_MASK, - ISD::FP_TO_UINT, 0), + X86ISD::CVTTP2UI, 0), X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_512, INTR_TYPE_1OP_MASK, - ISD::FP_TO_UINT, X86ISD::CVTTP2UI_RND), + X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_RND), X86_INTRINSIC_DATA(avx512_mask_cvttpd2uqq_128, INTR_TYPE_1OP_MASK, - ISD::FP_TO_UINT, 0), + X86ISD::CVTTP2UI, 0), X86_INTRINSIC_DATA(avx512_mask_cvttpd2uqq_256, INTR_TYPE_1OP_MASK, - ISD::FP_TO_UINT, 0), + X86ISD::CVTTP2UI, 0), X86_INTRINSIC_DATA(avx512_mask_cvttpd2uqq_512, INTR_TYPE_1OP_MASK, - ISD::FP_TO_UINT, X86ISD::CVTTP2UI_RND), + X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_RND), X86_INTRINSIC_DATA(avx512_mask_cvttps2dq_512, INTR_TYPE_1OP_MASK, - ISD::FP_TO_SINT, X86ISD::CVTTP2SI_RND), + X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_RND), X86_INTRINSIC_DATA(avx512_mask_cvttps2qq_128, INTR_TYPE_1OP_MASK, X86ISD::CVTTP2SI, 0), X86_INTRINSIC_DATA(avx512_mask_cvttps2qq_256, INTR_TYPE_1OP_MASK, - ISD::FP_TO_SINT, 0), + X86ISD::CVTTP2SI, 0), X86_INTRINSIC_DATA(avx512_mask_cvttps2qq_512, INTR_TYPE_1OP_MASK, - ISD::FP_TO_SINT, X86ISD::CVTTP2SI_RND), + X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_RND), X86_INTRINSIC_DATA(avx512_mask_cvttps2udq_128, INTR_TYPE_1OP_MASK, - ISD::FP_TO_UINT, 0), + X86ISD::CVTTP2UI, 0), X86_INTRINSIC_DATA(avx512_mask_cvttps2udq_256, INTR_TYPE_1OP_MASK, - ISD::FP_TO_UINT, 0), + X86ISD::CVTTP2UI, 0), X86_INTRINSIC_DATA(avx512_mask_cvttps2udq_512, INTR_TYPE_1OP_MASK, - ISD::FP_TO_UINT, X86ISD::CVTTP2UI_RND), + X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_RND), X86_INTRINSIC_DATA(avx512_mask_cvttps2uqq_128, INTR_TYPE_1OP_MASK, X86ISD::CVTTP2UI, 0), X86_INTRINSIC_DATA(avx512_mask_cvttps2uqq_256, INTR_TYPE_1OP_MASK, - ISD::FP_TO_UINT, 0), + X86ISD::CVTTP2UI, 0), X86_INTRINSIC_DATA(avx512_mask_cvttps2uqq_512, INTR_TYPE_1OP_MASK, - ISD::FP_TO_UINT, X86ISD::CVTTP2UI_RND), + X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_RND), X86_INTRINSIC_DATA(avx512_mask_cvtudq2ps_512, INTR_TYPE_1OP_MASK, ISD::UINT_TO_FP, X86ISD::UINT_TO_FP_RND), X86_INTRINSIC_DATA(avx512_mask_cvtuqq2pd_512, INTR_TYPE_1OP_MASK, @@ -1307,7 +1307,7 @@ X86_INTRINSIC_DATA(sse2_cvtpd2ps, INTR_TYPE_1OP, X86ISD::VFPROUND, 0), X86_INTRINSIC_DATA(sse2_cvtps2dq, INTR_TYPE_1OP, X86ISD::CVTP2SI, 0), X86_INTRINSIC_DATA(sse2_cvttpd2dq, INTR_TYPE_1OP, X86ISD::CVTTP2SI, 0), - X86_INTRINSIC_DATA(sse2_cvttps2dq, INTR_TYPE_1OP, ISD::FP_TO_SINT, 0), + X86_INTRINSIC_DATA(sse2_cvttps2dq, INTR_TYPE_1OP, X86ISD::CVTTP2SI, 0), X86_INTRINSIC_DATA(sse2_max_pd, INTR_TYPE_2OP, X86ISD::FMAX, 0), X86_INTRINSIC_DATA(sse2_max_sd, INTR_TYPE_2OP, X86ISD::FMAXS, 0), X86_INTRINSIC_DATA(sse2_min_pd, INTR_TYPE_2OP, X86ISD::FMIN, 0), Index: llvm/trunk/test/CodeGen/X86/avx-cvttp2si.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx-cvttp2si.ll +++ llvm/trunk/test/CodeGen/X86/avx-cvttp2si.ll @@ -11,7 +11,8 @@ define <8 x float> @float_to_int_to_float_mem_v8f32(<8 x float>* %p) { ; AVX-LABEL: float_to_int_to_float_mem_v8f32: ; AVX: # %bb.0: -; AVX-NEXT: vroundps $11, (%rdi), %ymm0 +; AVX-NEXT: vcvttps2dq (%rdi), %ymm0 +; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0 ; AVX-NEXT: retq %x = load <8 x float>, <8 x float>* %p, align 16 %fptosi = tail call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %x) @@ -22,7 +23,8 @@ define <8 x float> @float_to_int_to_float_reg_v8f32(<8 x float> %x) { ; AVX-LABEL: float_to_int_to_float_reg_v8f32: ; AVX: # %bb.0: -; AVX-NEXT: vroundps $11, %ymm0, %ymm0 +; AVX-NEXT: vcvttps2dq %ymm0, %ymm0 +; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0 ; AVX-NEXT: retq %fptosi = tail call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %x) %sitofp = sitofp <8 x i32> %fptosi to <8 x float> @@ -32,7 +34,8 @@ define <4 x double> @float_to_int_to_float_mem_v4f64(<4 x double>* %p) { ; AVX-LABEL: float_to_int_to_float_mem_v4f64: ; AVX: # %bb.0: -; AVX-NEXT: vroundpd $11, (%rdi), %ymm0 +; AVX-NEXT: vcvttpd2dqy (%rdi), %xmm0 +; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 ; AVX-NEXT: retq %x = load <4 x double>, <4 x double>* %p, align 16 %fptosi = tail call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %x) @@ -43,7 +46,8 @@ define <4 x double> @float_to_int_to_float_reg_v4f64(<4 x double> %x) { ; AVX-LABEL: float_to_int_to_float_reg_v4f64: ; AVX: # %bb.0: -; AVX-NEXT: vroundpd $11, %ymm0, %ymm0 +; AVX-NEXT: vcvttpd2dq %ymm0, %xmm0 +; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 ; AVX-NEXT: retq %fptosi = tail call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %x) %sitofp = sitofp <4 x i32> %fptosi to <4 x double> Index: llvm/trunk/test/CodeGen/X86/avx512-cvttp2i.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-cvttp2i.ll +++ llvm/trunk/test/CodeGen/X86/avx512-cvttp2i.ll @@ -25,7 +25,8 @@ define <16 x float> @float_to_sint_to_float_mem_v16f32(<16 x float>* %p) { ; CHECK-LABEL: float_to_sint_to_float_mem_v16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vrndscaleps $11, (%rdi), %zmm0 +; CHECK-NEXT: vcvttps2dq (%rdi), %zmm0 +; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 ; CHECK-NEXT: retq %x = load <16 x float>, <16 x float>* %p %fptosi = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x, <16 x i32> undef, i16 -1, i32 4) @@ -36,7 +37,8 @@ define <16 x float> @float_to_sint_to_float_reg_v16f32(<16 x float> %x) { ; CHECK-LABEL: float_to_sint_to_float_reg_v16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vrndscaleps $11, %zmm0, %zmm0 +; CHECK-NEXT: vcvttps2dq %zmm0, %zmm0 +; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 ; CHECK-NEXT: retq %fptosi = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x, <16 x i32> undef, i16 -1, i32 4) %sitofp = sitofp <16 x i32> %fptosi to <16 x float> @@ -46,7 +48,8 @@ define <16 x float> @float_to_uint_to_float_mem_v16f32(<16 x float>* %p) { ; CHECK-LABEL: float_to_uint_to_float_mem_v16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vrndscaleps $11, (%rdi), %zmm0 +; CHECK-NEXT: vcvttps2udq (%rdi), %zmm0 +; CHECK-NEXT: vcvtudq2ps %zmm0, %zmm0 ; CHECK-NEXT: retq %x = load <16 x float>, <16 x float>* %p %fptoui = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x, <16 x i32> undef, i16 -1, i32 4) @@ -57,7 +60,8 @@ define <16 x float> @float_to_uint_to_float_reg_v16f32(<16 x float> %x) { ; CHECK-LABEL: float_to_uint_to_float_reg_v16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vrndscaleps $11, %zmm0, %zmm0 +; CHECK-NEXT: vcvttps2udq %zmm0, %zmm0 +; CHECK-NEXT: vcvtudq2ps %zmm0, %zmm0 ; CHECK-NEXT: retq %fptoui = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x, <16 x i32> undef, i16 -1, i32 4) %uitofp = uitofp <16 x i32> %fptoui to <16 x float> @@ -67,7 +71,8 @@ define <4 x float> @float_to_uint_to_float_mem_v4f32(<4 x float>* %p) { ; CHECK-LABEL: float_to_uint_to_float_mem_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundps $11, (%rdi), %xmm0 +; CHECK-NEXT: vcvttps2udq (%rdi), %xmm0 +; CHECK-NEXT: vcvtudq2ps %xmm0, %xmm0 ; CHECK-NEXT: retq %x = load <4 x float>, <4 x float>* %p %fptoui = tail call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %x, <4 x i32> undef, i8 -1) @@ -78,7 +83,8 @@ define <4 x float> @float_to_uint_to_float_reg_v4f32(<4 x float> %x) { ; CHECK-LABEL: float_to_uint_to_float_reg_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundps $11, %xmm0, %xmm0 +; CHECK-NEXT: vcvttps2udq %xmm0, %xmm0 +; CHECK-NEXT: vcvtudq2ps %xmm0, %xmm0 ; CHECK-NEXT: retq %fptoui = tail call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %x, <4 x i32> undef, i8 -1) %uitofp = uitofp <4 x i32> %fptoui to <4 x float> @@ -88,7 +94,8 @@ define <8 x float> @float_to_uint_to_float_mem_v8f32(<8 x float>* %p) { ; CHECK-LABEL: float_to_uint_to_float_mem_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundps $11, (%rdi), %ymm0 +; CHECK-NEXT: vcvttps2udq (%rdi), %ymm0 +; CHECK-NEXT: vcvtudq2ps %ymm0, %ymm0 ; CHECK-NEXT: retq %x = load <8 x float>, <8 x float>* %p %fptoui = tail call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %x, <8 x i32> undef, i8 -1) @@ -99,7 +106,8 @@ define <8 x float> @float_to_uint_to_float_reg_v8f32(<8 x float> %x) { ; CHECK-LABEL: float_to_uint_to_float_reg_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundps $11, %ymm0, %ymm0 +; CHECK-NEXT: vcvttps2udq %ymm0, %ymm0 +; CHECK-NEXT: vcvtudq2ps %ymm0, %ymm0 ; CHECK-NEXT: retq %fptoui = tail call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %x, <8 x i32> undef, i8 -1) %uitofp = uitofp <8 x i32> %fptoui to <8 x float> @@ -109,7 +117,8 @@ define <4 x double> @double_to_uint_to_double_mem_v4f64(<4 x double>* %p) { ; CHECK-LABEL: double_to_uint_to_double_mem_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundpd $11, (%rdi), %ymm0 +; CHECK-NEXT: vcvttpd2udqy (%rdi), %xmm0 +; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm0 ; CHECK-NEXT: retq %x = load <4 x double>, <4 x double>* %p %fptoui = tail call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double> %x, <4 x i32> undef, i8 -1) @@ -120,7 +129,8 @@ define <4 x double> @double_to_uint_to_double_reg_v4f64(<4 x double> %x) { ; CHECK-LABEL: double_to_uint_to_double_reg_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundpd $11, %ymm0, %ymm0 +; CHECK-NEXT: vcvttpd2udq %ymm0, %xmm0 +; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm0 ; CHECK-NEXT: retq %fptoui = tail call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double> %x, <4 x i32> undef, i8 -1) %uitofp = uitofp <4 x i32> %fptoui to <4 x double> @@ -130,7 +140,8 @@ define <8 x double> @double_to_sint_to_double_mem_v8f64(<8 x double>* %p) { ; CHECK-LABEL: double_to_sint_to_double_mem_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vrndscalepd $11, (%rdi), %zmm0 +; CHECK-NEXT: vcvttpd2dq (%rdi), %ymm0 +; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 ; CHECK-NEXT: retq %x = load <8 x double>, <8 x double>* %p %fptosi = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x, <8 x i32> undef, i8 -1, i32 4) @@ -141,7 +152,8 @@ define <8 x double> @double_to_sint_to_double_reg_v8f64(<8 x double> %x) { ; CHECK-LABEL: double_to_sint_to_double_reg_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vrndscalepd $11, %zmm0, %zmm0 +; CHECK-NEXT: vcvttpd2dq %zmm0, %ymm0 +; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 ; CHECK-NEXT: retq %fptosi = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x, <8 x i32> undef, i8 -1, i32 4) %sitofp = sitofp <8 x i32> %fptosi to <8 x double> @@ -151,7 +163,8 @@ define <8 x double> @double_to_uint_to_double_mem_v8f64(<8 x double>* %p) { ; CHECK-LABEL: double_to_uint_to_double_mem_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vrndscalepd $11, (%rdi), %zmm0 +; CHECK-NEXT: vcvttpd2udq (%rdi), %ymm0 +; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm0 ; CHECK-NEXT: retq %x = load <8 x double>, <8 x double>* %p %fptoui = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x, <8 x i32> undef, i8 -1, i32 4) @@ -162,7 +175,8 @@ define <8 x double> @double_to_uint_to_double_reg_v8f64(<8 x double> %x) { ; CHECK-LABEL: double_to_uint_to_double_reg_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vrndscalepd $11, %zmm0, %zmm0 +; CHECK-NEXT: vcvttpd2udq %zmm0, %ymm0 +; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm0 ; CHECK-NEXT: retq %fptoui = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x, <8 x i32> undef, i8 -1, i32 4) %uitofp = uitofp <8 x i32> %fptoui to <8 x double> @@ -172,7 +186,9 @@ define <4 x float> @float_to_sint64_to_float_mem_v4f32(<4 x float>* %p) { ; CHECK-LABEL: float_to_sint64_to_float_mem_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundps $11, (%rdi), %xmm0 +; CHECK-NEXT: vcvttps2qq (%rdi), %ymm0 +; CHECK-NEXT: vcvtqq2ps %ymm0, %xmm0 +; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %x = load <4 x float>, <4 x float>* %p %fptosi = tail call <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float> %x, <4 x i64> undef, i8 -1) @@ -183,7 +199,9 @@ define <4 x float> @float_to_sint64_to_float_reg_v4f32(<4 x float> %x) { ; CHECK-LABEL: float_to_sint64_to_float_reg_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundps $11, %xmm0, %xmm0 +; CHECK-NEXT: vcvttps2qq %xmm0, %ymm0 +; CHECK-NEXT: vcvtqq2ps %ymm0, %xmm0 +; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %fptosi = tail call <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float> %x, <4 x i64> undef, i8 -1) %sitofp = sitofp <4 x i64> %fptosi to <4 x float> @@ -193,7 +211,9 @@ define <4 x float> @float_to_uint64_to_float_mem_v4f32(<4 x float>* %p) { ; CHECK-LABEL: float_to_uint64_to_float_mem_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundps $11, (%rdi), %xmm0 +; CHECK-NEXT: vcvttps2uqq (%rdi), %ymm0 +; CHECK-NEXT: vcvtuqq2ps %ymm0, %xmm0 +; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %x = load <4 x float>, <4 x float>* %p %fptoui = tail call <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float> %x, <4 x i64> undef, i8 -1) @@ -204,7 +224,9 @@ define <4 x float> @float_to_uint64_to_float_reg_v4f32(<4 x float> %x) { ; CHECK-LABEL: float_to_uint64_to_float_reg_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundps $11, %xmm0, %xmm0 +; CHECK-NEXT: vcvttps2uqq %xmm0, %ymm0 +; CHECK-NEXT: vcvtuqq2ps %ymm0, %xmm0 +; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %fptoui = tail call <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float> %x, <4 x i64> undef, i8 -1) %uitofp = uitofp <4 x i64> %fptoui to <4 x float> @@ -214,7 +236,8 @@ define <8 x float> @float_to_sint64_to_float_mem_v8f32(<8 x float>* %p) { ; CHECK-LABEL: float_to_sint64_to_float_mem_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundps $11, (%rdi), %ymm0 +; CHECK-NEXT: vcvttps2qq (%rdi), %zmm0 +; CHECK-NEXT: vcvtqq2ps %zmm0, %ymm0 ; CHECK-NEXT: retq %x = load <8 x float>, <8 x float>* %p %fptosi = tail call <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float> %x, <8 x i64> undef, i8 -1, i32 4) @@ -225,7 +248,8 @@ define <8 x float> @float_to_sint64_to_float_reg_v8f32(<8 x float> %x) { ; CHECK-LABEL: float_to_sint64_to_float_reg_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundps $11, %ymm0, %ymm0 +; CHECK-NEXT: vcvttps2qq %ymm0, %zmm0 +; CHECK-NEXT: vcvtqq2ps %zmm0, %ymm0 ; CHECK-NEXT: retq %fptosi = tail call <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float> %x, <8 x i64> undef, i8 -1, i32 4) %sitofp = sitofp <8 x i64> %fptosi to <8 x float> @@ -235,7 +259,8 @@ define <8 x float> @float_to_uint64_to_float_mem_v8f32(<8 x float>* %p) { ; CHECK-LABEL: float_to_uint64_to_float_mem_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundps $11, (%rdi), %ymm0 +; CHECK-NEXT: vcvttps2uqq (%rdi), %zmm0 +; CHECK-NEXT: vcvtuqq2ps %zmm0, %ymm0 ; CHECK-NEXT: retq %x = load <8 x float>, <8 x float>* %p %fptoui = tail call <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float> %x, <8 x i64> undef, i8 -1, i32 4) @@ -246,7 +271,8 @@ define <8 x float> @float_to_uint64_to_float_reg_v8f32(<8 x float> %x) { ; CHECK-LABEL: float_to_uint64_to_float_reg_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundps $11, %ymm0, %ymm0 +; CHECK-NEXT: vcvttps2uqq %ymm0, %zmm0 +; CHECK-NEXT: vcvtuqq2ps %zmm0, %ymm0 ; CHECK-NEXT: retq %fptoui = tail call <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float> %x, <8 x i64> undef, i8 -1, i32 4) %uitofp = uitofp <8 x i64> %fptoui to <8 x float> @@ -256,7 +282,8 @@ define <2 x double> @double_to_sint64_to_double_mem_v2f64(<2 x double>* %p) { ; CHECK-LABEL: double_to_sint64_to_double_mem_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundpd $11, (%rdi), %xmm0 +; CHECK-NEXT: vcvttpd2qq (%rdi), %xmm0 +; CHECK-NEXT: vcvtqq2pd %xmm0, %xmm0 ; CHECK-NEXT: retq %x = load <2 x double>, <2 x double>* %p %fptosi = tail call <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double> %x, <2 x i64> undef, i8 -1) @@ -267,7 +294,8 @@ define <2 x double> @double_to_sint64_to_double_reg_v2f64(<2 x double> %x) { ; CHECK-LABEL: double_to_sint64_to_double_reg_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundpd $11, %xmm0, %xmm0 +; CHECK-NEXT: vcvttpd2qq %xmm0, %xmm0 +; CHECK-NEXT: vcvtqq2pd %xmm0, %xmm0 ; CHECK-NEXT: retq %fptosi = tail call <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double> %x, <2 x i64> undef, i8 -1) %sitofp = sitofp <2 x i64> %fptosi to <2 x double> @@ -277,7 +305,8 @@ define <2 x double> @double_to_uint64_to_double_mem_v2f64(<2 x double>* %p) { ; CHECK-LABEL: double_to_uint64_to_double_mem_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundpd $11, (%rdi), %xmm0 +; CHECK-NEXT: vcvttpd2uqq (%rdi), %xmm0 +; CHECK-NEXT: vcvtuqq2pd %xmm0, %xmm0 ; CHECK-NEXT: retq %x = load <2 x double>, <2 x double>* %p %fptoui = tail call <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double> %x, <2 x i64> undef, i8 -1) @@ -288,7 +317,8 @@ define <2 x double> @double_to_uint64_to_double_reg_v2f64(<2 x double> %x) { ; CHECK-LABEL: double_to_uint64_to_double_reg_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundpd $11, %xmm0, %xmm0 +; CHECK-NEXT: vcvttpd2uqq %xmm0, %xmm0 +; CHECK-NEXT: vcvtuqq2pd %xmm0, %xmm0 ; CHECK-NEXT: retq %fptoui = tail call <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double> %x, <2 x i64> undef, i8 -1) %uitofp = uitofp <2 x i64> %fptoui to <2 x double> @@ -298,7 +328,8 @@ define <4 x double> @double_to_sint64_to_double_mem_v4f64(<4 x double>* %p) { ; CHECK-LABEL: double_to_sint64_to_double_mem_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundpd $11, (%rdi), %ymm0 +; CHECK-NEXT: vcvttpd2qq (%rdi), %ymm0 +; CHECK-NEXT: vcvtqq2pd %ymm0, %ymm0 ; CHECK-NEXT: retq %x = load <4 x double>, <4 x double>* %p %fptosi = tail call <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double> %x, <4 x i64> undef, i8 -1) @@ -309,7 +340,8 @@ define <4 x double> @double_to_sint64_to_double_reg_v4f64(<4 x double> %x) { ; CHECK-LABEL: double_to_sint64_to_double_reg_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundpd $11, %ymm0, %ymm0 +; CHECK-NEXT: vcvttpd2qq %ymm0, %ymm0 +; CHECK-NEXT: vcvtqq2pd %ymm0, %ymm0 ; CHECK-NEXT: retq %fptosi = tail call <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double> %x, <4 x i64> undef, i8 -1) %sitofp = sitofp <4 x i64> %fptosi to <4 x double> @@ -319,7 +351,8 @@ define <4 x double> @double_to_uint64_to_double_mem_v4f64(<4 x double>* %p) { ; CHECK-LABEL: double_to_uint64_to_double_mem_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundpd $11, (%rdi), %ymm0 +; CHECK-NEXT: vcvttpd2uqq (%rdi), %ymm0 +; CHECK-NEXT: vcvtuqq2pd %ymm0, %ymm0 ; CHECK-NEXT: retq %x = load <4 x double>, <4 x double>* %p %fptoui = tail call <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double> %x, <4 x i64> undef, i8 -1) @@ -330,7 +363,8 @@ define <4 x double> @double_to_uint64_to_double_reg_v4f64(<4 x double> %x) { ; CHECK-LABEL: double_to_uint64_to_double_reg_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundpd $11, %ymm0, %ymm0 +; CHECK-NEXT: vcvttpd2uqq %ymm0, %ymm0 +; CHECK-NEXT: vcvtuqq2pd %ymm0, %ymm0 ; CHECK-NEXT: retq %fptoui = tail call <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double> %x, <4 x i64> undef, i8 -1) %uitofp = uitofp <4 x i64> %fptoui to <4 x double> @@ -340,7 +374,8 @@ define <8 x double> @double_to_sint64_to_double_mem_v8f64(<8 x double>* %p) { ; CHECK-LABEL: double_to_sint64_to_double_mem_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vrndscalepd $11, (%rdi), %zmm0 +; CHECK-NEXT: vcvttpd2qq (%rdi), %zmm0 +; CHECK-NEXT: vcvtqq2pd %zmm0, %zmm0 ; CHECK-NEXT: retq %x = load <8 x double>, <8 x double>* %p %fptosi = tail call <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double> %x, <8 x i64> undef, i8 -1, i32 4) @@ -351,7 +386,8 @@ define <8 x double> @double_to_sint64_to_double_reg_v8f64(<8 x double> %x) { ; CHECK-LABEL: double_to_sint64_to_double_reg_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vrndscalepd $11, %zmm0, %zmm0 +; CHECK-NEXT: vcvttpd2qq %zmm0, %zmm0 +; CHECK-NEXT: vcvtqq2pd %zmm0, %zmm0 ; CHECK-NEXT: retq %fptosi = tail call <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double> %x, <8 x i64> undef, i8 -1, i32 4) %sitofp = sitofp <8 x i64> %fptosi to <8 x double> @@ -361,7 +397,8 @@ define <8 x double> @double_to_uint64_to_double_mem_v8f64(<8 x double>* %p) { ; CHECK-LABEL: double_to_uint64_to_double_mem_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vrndscalepd $11, (%rdi), %zmm0 +; CHECK-NEXT: vcvttpd2uqq (%rdi), %zmm0 +; CHECK-NEXT: vcvtuqq2pd %zmm0, %zmm0 ; CHECK-NEXT: retq %x = load <8 x double>, <8 x double>* %p %fptoui = tail call <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double> %x, <8 x i64> undef, i8 -1, i32 4) @@ -372,7 +409,8 @@ define <8 x double> @double_to_uint64_to_double_reg_v8f64(<8 x double> %x) { ; CHECK-LABEL: double_to_uint64_to_double_reg_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vrndscalepd $11, %zmm0, %zmm0 +; CHECK-NEXT: vcvttpd2uqq %zmm0, %zmm0 +; CHECK-NEXT: vcvtuqq2pd %zmm0, %zmm0 ; CHECK-NEXT: retq %fptoui = tail call <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double> %x, <8 x i64> undef, i8 -1, i32 4) %uitofp = uitofp <8 x i64> %fptoui to <8 x double> Index: llvm/trunk/test/CodeGen/X86/sse-cvttp2si.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse-cvttp2si.ll +++ llvm/trunk/test/CodeGen/X86/sse-cvttp2si.ll @@ -160,12 +160,14 @@ define <4 x float> @float_to_int_to_float_mem_v4f32(<4 x float>* %p) { ; SSE-LABEL: float_to_int_to_float_mem_v4f32: ; SSE: # %bb.0: -; SSE-NEXT: roundps $11, (%rdi), %xmm0 +; SSE-NEXT: cvttps2dq (%rdi), %xmm0 +; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: float_to_int_to_float_mem_v4f32: ; AVX: # %bb.0: -; AVX-NEXT: vroundps $11, (%rdi), %xmm0 +; AVX-NEXT: vcvttps2dq (%rdi), %xmm0 +; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 ; AVX-NEXT: retq %x = load <4 x float>, <4 x float>* %p, align 16 %fptosi = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %x) @@ -176,12 +178,14 @@ define <4 x float> @float_to_int_to_float_reg_v4f32(<4 x float> %x) { ; SSE-LABEL: float_to_int_to_float_reg_v4f32: ; SSE: # %bb.0: -; SSE-NEXT: roundps $11, %xmm0, %xmm0 +; SSE-NEXT: cvttps2dq %xmm0, %xmm0 +; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: float_to_int_to_float_reg_v4f32: ; AVX: # %bb.0: -; AVX-NEXT: vroundps $11, %xmm0, %xmm0 +; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 +; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 ; AVX-NEXT: retq %fptosi = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %x) %sitofp = sitofp <4 x i32> %fptosi to <4 x float>