Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -7559,8 +7559,7 @@ // Convert Double to Signed/Unsigned Doubleword with truncation multiclass avx512_cvttpd2dq opc, string OpcodeStr, SDNode OpNode, - SDNode OpNode128, SDNode OpNodeRnd, - X86SchedWriteWidths sched> { + SDNode OpNodeRnd, X86SchedWriteWidths sched> { let Predicates = [HasAVX512] in { defm Z : avx512_vcvt_fp, @@ -7573,7 +7572,7 @@ // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly // due to the same reason. defm Z128 : avx512_vcvt_fp, EVEX_V128; + OpNode, sched.XMM, "{1to2}", "{x}">, EVEX_V128; defm Z256 : avx512_vcvt_fp, EVEX_V256; @@ -7690,8 +7689,7 @@ // Convert Float to Signed/Unsigned Quardword with truncation multiclass avx512_cvttps2qq opc, string OpcodeStr, SDNode OpNode, - SDNode OpNode128, SDNode OpNodeRnd, - X86SchedWriteWidths sched> { + SDNode OpNodeRnd, X86SchedWriteWidths sched> { let Predicates = [HasDQI] in { defm Z : avx512_vcvt_fp, avx512_vcvt_fp_sae, EVEX_V128; defm Z256 : avx512_vcvt_fp, EVEX_V256; @@ -7745,20 +7743,20 @@ X86VSintToFpRnd, SchedWriteCvtDQ2PS>, PS, EVEX_CD8<32, CD8VF>; -defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", fp_to_sint, +defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86cvttp2si, X86cvttp2siRnd, SchedWriteCvtPS2DQ>, XS, EVEX_CD8<32, CD8VF>; -defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", fp_to_sint, X86cvttp2si, +defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86cvttp2si, X86cvttp2siRnd, SchedWriteCvtPD2DQ>, PD, VEX_W, EVEX_CD8<64, CD8VF>; -defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", fp_to_uint, +defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86cvttp2ui, X86cvttp2uiRnd, SchedWriteCvtPS2DQ>, PS, EVEX_CD8<32, CD8VF>; -defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", fp_to_uint, - X86cvttp2ui, X86cvttp2uiRnd, SchedWriteCvtPD2DQ>, +defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86cvttp2ui, + X86cvttp2uiRnd, SchedWriteCvtPD2DQ>, PS, VEX_W, EVEX_CD8<64, CD8VF>; defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp, @@ -7801,19 +7799,19 @@ X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD, EVEX_CD8<32, CD8VH>; -defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", fp_to_sint, +defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86cvttp2si, X86cvttp2siRnd, SchedWriteCvtPD2DQ>, VEX_W, PD, EVEX_CD8<64, CD8VF>; -defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", fp_to_sint, X86cvttp2si, +defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86cvttp2si, X86cvttp2siRnd, SchedWriteCvtPS2DQ>, PD, EVEX_CD8<32, CD8VH>; -defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", fp_to_uint, +defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86cvttp2ui, X86cvttp2uiRnd, SchedWriteCvtPD2DQ>, VEX_W, PD, EVEX_CD8<64, CD8VF>; -defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", fp_to_uint, X86cvttp2ui, +defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86cvttp2ui, X86cvttp2uiRnd, SchedWriteCvtPS2DQ>, PD, EVEX_CD8<32, CD8VH>; @@ -7833,6 +7831,114 @@ X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, XD, EVEX_CD8<64, CD8VF>; +let Predicates = [HasAVX512] in { + def : Pat<(v16i32 (fp_to_sint (v16f32 VR512:$src))), + (VCVTTPS2DQZrr VR512:$src)>; + def : Pat<(v16i32 (fp_to_sint (loadv16f32 addr:$src))), + (VCVTTPS2DQZrm addr:$src)>; + + def : Pat<(v16i32 (fp_to_uint (v16f32 VR512:$src))), + (VCVTTPS2UDQZrr VR512:$src)>; + def : Pat<(v16i32 (fp_to_uint (loadv16f32 addr:$src))), + (VCVTTPS2UDQZrm addr:$src)>; + + def : Pat<(v8i32 (fp_to_sint (v8f64 VR512:$src))), + (VCVTTPD2DQZrr VR512:$src)>; + def : Pat<(v8i32 (fp_to_sint (loadv8f64 addr:$src))), + (VCVTTPD2DQZrm addr:$src)>; + + def : Pat<(v8i32 (fp_to_uint (v8f64 VR512:$src))), + (VCVTTPD2UDQZrr VR512:$src)>; + def : Pat<(v8i32 (fp_to_uint (loadv8f64 addr:$src))), + (VCVTTPD2UDQZrm addr:$src)>; +} + +let Predicates = [HasVLX] in { + def : Pat<(v4i32 (fp_to_sint (v4f32 VR128X:$src))), + (VCVTTPS2DQZ128rr VR128X:$src)>; + def : Pat<(v4i32 (fp_to_sint (loadv4f32 addr:$src))), + (VCVTTPS2DQZ128rm addr:$src)>; + + def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src))), + (VCVTTPS2UDQZ128rr VR128X:$src)>; + def : Pat<(v4i32 (fp_to_uint (loadv4f32 addr:$src))), + (VCVTTPS2UDQZ128rm addr:$src)>; + + def : Pat<(v8i32 (fp_to_sint (v8f32 VR256X:$src))), + (VCVTTPS2DQZ256rr VR256X:$src)>; + def : Pat<(v8i32 (fp_to_sint (loadv8f32 addr:$src))), + (VCVTTPS2DQZ256rm addr:$src)>; + + def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src))), + (VCVTTPS2UDQZ256rr VR256X:$src)>; + def : Pat<(v8i32 (fp_to_uint (loadv8f32 addr:$src))), + (VCVTTPS2UDQZ256rm addr:$src)>; + + def : Pat<(v4i32 (fp_to_sint (v4f64 VR256X:$src))), + (VCVTTPD2DQZ256rr VR256X:$src)>; + def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))), + (VCVTTPD2DQZ256rm addr:$src)>; + + def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src))), + (VCVTTPD2UDQZ256rr VR256X:$src)>; + def : Pat<(v4i32 (fp_to_uint (loadv4f64 addr:$src))), + (VCVTTPD2UDQZ256rm addr:$src)>; +} + +let Predicates = [HasDQI] in { + def : Pat<(v8i64 (fp_to_sint (v8f32 VR256X:$src))), + (VCVTTPS2QQZrr VR256X:$src)>; + def : Pat<(v8i64 (fp_to_sint (loadv8f32 addr:$src))), + (VCVTTPS2QQZrm addr:$src)>; + + def : Pat<(v8i64 (fp_to_uint (v8f32 VR256X:$src))), + (VCVTTPS2UQQZrr VR256X:$src)>; + def : Pat<(v8i64 (fp_to_uint (loadv8f32 addr:$src))), + (VCVTTPS2UQQZrm addr:$src)>; + + def : Pat<(v8i64 (fp_to_sint (v8f64 VR512:$src))), + (VCVTTPD2QQZrr VR512:$src)>; + def : Pat<(v8i64 (fp_to_sint (loadv8f64 addr:$src))), + (VCVTTPD2QQZrm addr:$src)>; + + def : Pat<(v8i64 (fp_to_uint (v8f64 VR512:$src))), + (VCVTTPD2UQQZrr VR512:$src)>; + def : Pat<(v8i64 (fp_to_uint (loadv8f64 addr:$src))), + (VCVTTPD2UQQZrm addr:$src)>; +} + +let Predicates = [HasDQI, HasVLX] in { + def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src))), + (VCVTTPS2QQZ256rr VR128X:$src)>; + def : Pat<(v4i64 (fp_to_sint (loadv4f32 addr:$src))), + (VCVTTPS2QQZ256rm addr:$src)>; + + def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src))), + (VCVTTPS2UQQZ256rr VR128X:$src)>; + def : Pat<(v4i64 (fp_to_uint (loadv4f32 addr:$src))), + (VCVTTPS2UQQZ256rm addr:$src)>; + + def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src))), + (VCVTTPD2QQZ128rr VR128X:$src)>; + def : Pat<(v2i64 (fp_to_sint (loadv2f64 addr:$src))), + (VCVTTPD2QQZ128rm addr:$src)>; + + def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src))), + (VCVTTPD2UQQZ128rr VR128X:$src)>; + def : Pat<(v2i64 (fp_to_uint (loadv2f64 addr:$src))), + (VCVTTPD2UQQZ128rm addr:$src)>; + + def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src))), + (VCVTTPD2QQZ256rr VR256X:$src)>; + def : Pat<(v4i64 (fp_to_sint (loadv4f64 addr:$src))), + (VCVTTPD2QQZ256rm addr:$src)>; + + def : Pat<(v4i64 (fp_to_uint (v4f64 VR256X:$src))), + (VCVTTPD2UQQZ256rr VR256X:$src)>; + def : Pat<(v4i64 (fp_to_uint (loadv4f64 addr:$src))), + (VCVTTPD2UQQZ256rm addr:$src)>; +} + let Predicates = [HasAVX512, NoVLX] in { def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))), (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr Index: lib/Target/X86/X86InstrSSE.td =================================================================== --- lib/Target/X86/X86InstrSSE.td +++ lib/Target/X86/X86InstrSSE.td @@ -1574,37 +1574,55 @@ def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v4i32 (fp_to_sint (v4f32 VR128:$src))))]>, + (v4i32 (X86cvttp2si (v4f32 VR128:$src))))]>, VEX, Sched<[WriteCvtPS2I]>, VEX_WIG; def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v4i32 (fp_to_sint (loadv4f32 addr:$src))))]>, + (v4i32 (X86cvttp2si (loadv4f32 addr:$src))))]>, VEX, Sched<[WriteCvtPS2ILd]>, VEX_WIG; def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, - (v8i32 (fp_to_sint (v8f32 VR256:$src))))]>, + (v8i32 (X86cvttp2si (v8f32 VR256:$src))))]>, VEX, VEX_L, Sched<[WriteCvtPS2IY]>, VEX_WIG; def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, - (v8i32 (fp_to_sint (loadv8f32 addr:$src))))]>, + (v8i32 (X86cvttp2si (loadv8f32 addr:$src))))]>, VEX, VEX_L, Sched<[WriteCvtPS2IYLd]>, VEX_WIG; } +let Predicates = [HasAVX, NoVLX] in { + def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), + (VCVTTPS2DQrr VR128:$src)>; + def : Pat<(v4i32 (fp_to_sint (loadv4f32 addr:$src))), + (VCVTTPS2DQrm addr:$src)>; + def : Pat<(v8i32 (fp_to_sint (v8f32 VR256:$src))), + (VCVTTPS2DQYrr VR256:$src)>; + def : Pat<(v8i32 (fp_to_sint (loadv8f32 addr:$src))), + (VCVTTPS2DQYrm addr:$src)>; +} + def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v4i32 (fp_to_sint (v4f32 VR128:$src))))]>, + (v4i32 (X86cvttp2si (v4f32 VR128:$src))))]>, Sched<[WriteCvtPS2I]>; def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v4i32 (fp_to_sint (memopv4f32 addr:$src))))]>, + (v4i32 (X86cvttp2si (memopv4f32 addr:$src))))]>, Sched<[WriteCvtPS2ILd]>; +let Predicates = [UseSSE2] in { + def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), + (CVTTPS2DQrr VR128:$src)>; + def : Pat<(v4i32 (fp_to_sint (memopv4f32 addr:$src))), + (CVTTPS2DQrm addr:$src)>; +} + let Predicates = [HasAVX, NoVLX] in def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", @@ -1619,6 +1637,7 @@ // XMM only def : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}", (VCVTTPD2DQrr VR128:$dst, VR128:$src), 0>; + let Predicates = [HasAVX, NoVLX] in def VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttpd2dq{x}\t{$src, $dst|$dst, $src}", @@ -1633,12 +1652,12 @@ def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v4i32 (fp_to_sint (v4f64 VR256:$src))))]>, + (v4i32 (X86cvttp2si (v4f64 VR256:$src))))]>, VEX, VEX_L, Sched<[WriteCvtPD2IY]>, VEX_WIG; def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v4i32 (fp_to_sint (loadv4f64 addr:$src))))]>, + (v4i32 (X86cvttp2si (loadv4f64 addr:$src))))]>, VEX, VEX_L, Sched<[WriteCvtPD2IYLd]>, VEX_WIG; } def : InstAlias<"vcvttpd2dqy\t{$src, $dst|$dst, $src}", @@ -1647,6 +1666,13 @@ (VCVTTPD2DQYrm VR128:$dst, f256mem:$src), 0, "intel">; let Predicates = [HasAVX, NoVLX] in { + def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))), + (VCVTTPD2DQYrr VR256:$src)>; + def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))), + (VCVTTPD2DQYrm addr:$src)>; +} + +let Predicates = [HasAVX, NoVLX] in { let AddedComplexity = 15 in { def : Pat<(X86vzmovl (v2i64 (bitconvert (v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))), Index: lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- lib/Target/X86/X86IntrinsicsInfo.h +++ lib/Target/X86/X86IntrinsicsInfo.h @@ -302,8 +302,8 @@ X86_INTRINSIC_DATA(avx_cvt_pd2_ps_256,CVTPD2PS, ISD::FP_ROUND, 0), X86_INTRINSIC_DATA(avx_cvt_pd2dq_256, INTR_TYPE_1OP, X86ISD::CVTP2SI, 0), X86_INTRINSIC_DATA(avx_cvt_ps2dq_256, INTR_TYPE_1OP, X86ISD::CVTP2SI, 0), - X86_INTRINSIC_DATA(avx_cvtt_pd2dq_256,INTR_TYPE_1OP, ISD::FP_TO_SINT, 0), - X86_INTRINSIC_DATA(avx_cvtt_ps2dq_256,INTR_TYPE_1OP, ISD::FP_TO_SINT, 0), + X86_INTRINSIC_DATA(avx_cvtt_pd2dq_256,INTR_TYPE_1OP, X86ISD::CVTTP2SI, 0), + X86_INTRINSIC_DATA(avx_cvtt_ps2dq_256,INTR_TYPE_1OP, X86ISD::CVTTP2SI, 0), X86_INTRINSIC_DATA(avx_hadd_pd_256, INTR_TYPE_2OP, X86ISD::FHADD, 0), X86_INTRINSIC_DATA(avx_hadd_ps_256, INTR_TYPE_2OP, X86ISD::FHADD, 0), X86_INTRINSIC_DATA(avx_hsub_pd_256, INTR_TYPE_2OP, X86ISD::FHSUB, 0), @@ -533,45 +533,45 @@ X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_128, INTR_TYPE_1OP_MASK, X86ISD::CVTTP2SI, 0), X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_512, INTR_TYPE_1OP_MASK, - ISD::FP_TO_SINT, X86ISD::CVTTP2SI_RND), + X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_RND), X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_128, INTR_TYPE_1OP_MASK, - ISD::FP_TO_SINT, 0), + X86ISD::CVTTP2SI, 0), X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_256, INTR_TYPE_1OP_MASK, - ISD::FP_TO_SINT, 0), + X86ISD::CVTTP2SI, 0), X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_512, INTR_TYPE_1OP_MASK, - ISD::FP_TO_SINT, X86ISD::CVTTP2SI_RND), + X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_RND), X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_128, INTR_TYPE_1OP_MASK, X86ISD::CVTTP2UI, 0), X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_256, INTR_TYPE_1OP_MASK, - ISD::FP_TO_UINT, 0), + X86ISD::CVTTP2UI, 0), X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_512, INTR_TYPE_1OP_MASK, - ISD::FP_TO_UINT, X86ISD::CVTTP2UI_RND), + X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_RND), X86_INTRINSIC_DATA(avx512_mask_cvttpd2uqq_128, INTR_TYPE_1OP_MASK, - ISD::FP_TO_UINT, 0), + X86ISD::CVTTP2UI, 0), X86_INTRINSIC_DATA(avx512_mask_cvttpd2uqq_256, INTR_TYPE_1OP_MASK, - ISD::FP_TO_UINT, 0), + X86ISD::CVTTP2UI, 0), X86_INTRINSIC_DATA(avx512_mask_cvttpd2uqq_512, INTR_TYPE_1OP_MASK, - ISD::FP_TO_UINT, X86ISD::CVTTP2UI_RND), + X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_RND), X86_INTRINSIC_DATA(avx512_mask_cvttps2dq_512, INTR_TYPE_1OP_MASK, - ISD::FP_TO_SINT, X86ISD::CVTTP2SI_RND), + X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_RND), X86_INTRINSIC_DATA(avx512_mask_cvttps2qq_128, INTR_TYPE_1OP_MASK, X86ISD::CVTTP2SI, 0), X86_INTRINSIC_DATA(avx512_mask_cvttps2qq_256, INTR_TYPE_1OP_MASK, - ISD::FP_TO_SINT, 0), + X86ISD::CVTTP2SI, 0), X86_INTRINSIC_DATA(avx512_mask_cvttps2qq_512, INTR_TYPE_1OP_MASK, - ISD::FP_TO_SINT, X86ISD::CVTTP2SI_RND), + X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_RND), X86_INTRINSIC_DATA(avx512_mask_cvttps2udq_128, INTR_TYPE_1OP_MASK, - ISD::FP_TO_UINT, 0), + X86ISD::CVTTP2UI, 0), X86_INTRINSIC_DATA(avx512_mask_cvttps2udq_256, INTR_TYPE_1OP_MASK, - ISD::FP_TO_UINT, 0), + X86ISD::CVTTP2UI, 0), X86_INTRINSIC_DATA(avx512_mask_cvttps2udq_512, INTR_TYPE_1OP_MASK, - ISD::FP_TO_UINT, X86ISD::CVTTP2UI_RND), + X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_RND), X86_INTRINSIC_DATA(avx512_mask_cvttps2uqq_128, INTR_TYPE_1OP_MASK, X86ISD::CVTTP2UI, 0), X86_INTRINSIC_DATA(avx512_mask_cvttps2uqq_256, INTR_TYPE_1OP_MASK, - ISD::FP_TO_UINT, 0), + X86ISD::CVTTP2UI, 0), X86_INTRINSIC_DATA(avx512_mask_cvttps2uqq_512, INTR_TYPE_1OP_MASK, - ISD::FP_TO_UINT, X86ISD::CVTTP2UI_RND), + X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_RND), X86_INTRINSIC_DATA(avx512_mask_cvtudq2ps_512, INTR_TYPE_1OP_MASK, ISD::UINT_TO_FP, X86ISD::UINT_TO_FP_RND), X86_INTRINSIC_DATA(avx512_mask_cvtuqq2pd_512, INTR_TYPE_1OP_MASK, @@ -1307,7 +1307,7 @@ X86_INTRINSIC_DATA(sse2_cvtpd2ps, INTR_TYPE_1OP, X86ISD::VFPROUND, 0), X86_INTRINSIC_DATA(sse2_cvtps2dq, INTR_TYPE_1OP, X86ISD::CVTP2SI, 0), X86_INTRINSIC_DATA(sse2_cvttpd2dq, INTR_TYPE_1OP, X86ISD::CVTTP2SI, 0), - X86_INTRINSIC_DATA(sse2_cvttps2dq, INTR_TYPE_1OP, ISD::FP_TO_SINT, 0), + X86_INTRINSIC_DATA(sse2_cvttps2dq, INTR_TYPE_1OP, X86ISD::CVTTP2SI, 0), X86_INTRINSIC_DATA(sse2_max_pd, INTR_TYPE_2OP, X86ISD::FMAX, 0), X86_INTRINSIC_DATA(sse2_max_sd, INTR_TYPE_2OP, X86ISD::FMAXS, 0), X86_INTRINSIC_DATA(sse2_min_pd, INTR_TYPE_2OP, X86ISD::FMIN, 0), Index: test/CodeGen/X86/avx-cvttp2si.ll =================================================================== --- test/CodeGen/X86/avx-cvttp2si.ll +++ test/CodeGen/X86/avx-cvttp2si.ll @@ -11,7 +11,8 @@ define <8 x float> @float_to_int_to_float_mem_v8f32(<8 x float>* %p) { ; AVX-LABEL: float_to_int_to_float_mem_v8f32: ; AVX: # %bb.0: -; AVX-NEXT: vroundps $11, (%rdi), %ymm0 +; AVX-NEXT: vcvttps2dq (%rdi), %ymm0 +; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0 ; AVX-NEXT: retq %x = load <8 x float>, <8 x float>* %p, align 16 %fptosi = tail call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %x) @@ -22,7 +23,8 @@ define <8 x float> @float_to_int_to_float_reg_v8f32(<8 x float> %x) { ; AVX-LABEL: float_to_int_to_float_reg_v8f32: ; AVX: # %bb.0: -; AVX-NEXT: vroundps $11, %ymm0, %ymm0 +; AVX-NEXT: vcvttps2dq %ymm0, %ymm0 +; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0 ; AVX-NEXT: retq %fptosi = tail call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %x) %sitofp = sitofp <8 x i32> %fptosi to <8 x float> @@ -32,7 +34,8 @@ define <4 x double> @float_to_int_to_float_mem_v4f64(<4 x double>* %p) { ; AVX-LABEL: float_to_int_to_float_mem_v4f64: ; AVX: # %bb.0: -; AVX-NEXT: vroundpd $11, (%rdi), %ymm0 +; AVX-NEXT: vcvttpd2dqy (%rdi), %xmm0 +; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 ; AVX-NEXT: retq %x = load <4 x double>, <4 x double>* %p, align 16 %fptosi = tail call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %x) @@ -43,7 +46,8 @@ define <4 x double> @float_to_int_to_float_reg_v4f64(<4 x double> %x) { ; AVX-LABEL: float_to_int_to_float_reg_v4f64: ; AVX: # %bb.0: -; AVX-NEXT: vroundpd $11, %ymm0, %ymm0 +; AVX-NEXT: vcvttpd2dq %ymm0, %xmm0 +; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 ; AVX-NEXT: retq %fptosi = tail call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %x) %sitofp = sitofp <4 x i32> %fptosi to <4 x double> Index: test/CodeGen/X86/avx512-cvttp2i.ll =================================================================== --- test/CodeGen/X86/avx512-cvttp2i.ll +++ test/CodeGen/X86/avx512-cvttp2i.ll @@ -24,7 +24,8 @@ define <16 x float> @float_to_sint_to_float_mem_v16f32(<16 x float>* %p) { ; CHECK-LABEL: float_to_sint_to_float_mem_v16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vrndscaleps $11, (%rdi), %zmm0 +; CHECK-NEXT: vcvttps2dq (%rdi), %zmm0 +; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 ; CHECK-NEXT: retq %x = load <16 x float>, <16 x float>* %p %fptosi = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x, <16 x i32> undef, i16 -1, i32 4) @@ -35,7 +36,8 @@ define <16 x float> @float_to_sint_to_float_reg_v16f32(<16 x float> %x) { ; CHECK-LABEL: float_to_sint_to_float_reg_v16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vrndscaleps $11, %zmm0, %zmm0 +; CHECK-NEXT: vcvttps2dq %zmm0, %zmm0 +; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 ; CHECK-NEXT: retq %fptosi = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x, <16 x i32> undef, i16 -1, i32 4) %sitofp = sitofp <16 x i32> %fptosi to <16 x float> @@ -45,7 +47,8 @@ define <16 x float> @float_to_uint_to_float_mem_v16f32(<16 x float>* %p) { ; CHECK-LABEL: float_to_uint_to_float_mem_v16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vrndscaleps $11, (%rdi), %zmm0 +; CHECK-NEXT: vcvttps2udq (%rdi), %zmm0 +; CHECK-NEXT: vcvtudq2ps %zmm0, %zmm0 ; CHECK-NEXT: retq %x = load <16 x float>, <16 x float>* %p %fptoui = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x, <16 x i32> undef, i16 -1, i32 4) @@ -56,7 +59,8 @@ define <16 x float> @float_to_uint_to_float_reg_v16f32(<16 x float> %x) { ; CHECK-LABEL: float_to_uint_to_float_reg_v16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vrndscaleps $11, %zmm0, %zmm0 +; CHECK-NEXT: vcvttps2udq %zmm0, %zmm0 +; CHECK-NEXT: vcvtudq2ps %zmm0, %zmm0 ; CHECK-NEXT: retq %fptoui = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x, <16 x i32> undef, i16 -1, i32 4) %uitofp = uitofp <16 x i32> %fptoui to <16 x float> @@ -66,7 +70,8 @@ define <4 x float> @float_to_uint_to_float_mem_v4f32(<4 x float>* %p) { ; CHECK-LABEL: float_to_uint_to_float_mem_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundps $11, (%rdi), %xmm0 +; CHECK-NEXT: vcvttps2udq (%rdi), %xmm0 +; CHECK-NEXT: vcvtudq2ps %xmm0, %xmm0 ; CHECK-NEXT: retq %x = load <4 x float>, <4 x float>* %p %fptoui = tail call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %x, <4 x i32> undef, i8 -1) @@ -77,7 +82,8 @@ define <4 x float> @float_to_uint_to_float_reg_v4f32(<4 x float> %x) { ; CHECK-LABEL: float_to_uint_to_float_reg_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundps $11, %xmm0, %xmm0 +; CHECK-NEXT: vcvttps2udq %xmm0, %xmm0 +; CHECK-NEXT: vcvtudq2ps %xmm0, %xmm0 ; CHECK-NEXT: retq %fptoui = tail call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %x, <4 x i32> undef, i8 -1) %uitofp = uitofp <4 x i32> %fptoui to <4 x float> @@ -87,7 +93,8 @@ define <8 x float> @float_to_uint_to_float_mem_v8f32(<8 x float>* %p) { ; CHECK-LABEL: float_to_uint_to_float_mem_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundps $11, (%rdi), %ymm0 +; CHECK-NEXT: vcvttps2udq (%rdi), %ymm0 +; CHECK-NEXT: vcvtudq2ps %ymm0, %ymm0 ; CHECK-NEXT: retq %x = load <8 x float>, <8 x float>* %p %fptoui = tail call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %x, <8 x i32> undef, i8 -1) @@ -98,7 +105,8 @@ define <8 x float> @float_to_uint_to_float_reg_v8f32(<8 x float> %x) { ; CHECK-LABEL: float_to_uint_to_float_reg_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundps $11, %ymm0, %ymm0 +; CHECK-NEXT: vcvttps2udq %ymm0, %ymm0 +; CHECK-NEXT: vcvtudq2ps %ymm0, %ymm0 ; CHECK-NEXT: retq %fptoui = tail call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %x, <8 x i32> undef, i8 -1) %uitofp = uitofp <8 x i32> %fptoui to <8 x float> @@ -108,7 +116,8 @@ define <4 x double> @double_to_uint_to_double_mem_v4f64(<4 x double>* %p) { ; CHECK-LABEL: double_to_uint_to_double_mem_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundpd $11, (%rdi), %ymm0 +; CHECK-NEXT: vcvttpd2udqy (%rdi), %xmm0 +; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm0 ; CHECK-NEXT: retq %x = load <4 x double>, <4 x double>* %p %fptoui = tail call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double> %x, <4 x i32> undef, i8 -1) @@ -119,7 +128,8 @@ define <4 x double> @double_to_uint_to_double_reg_v4f64(<4 x double> %x) { ; CHECK-LABEL: double_to_uint_to_double_reg_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundpd $11, %ymm0, %ymm0 +; CHECK-NEXT: vcvttpd2udq %ymm0, %xmm0 +; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm0 ; CHECK-NEXT: retq %fptoui = tail call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double> %x, <4 x i32> undef, i8 -1) %uitofp = uitofp <4 x i32> %fptoui to <4 x double> @@ -129,7 +139,8 @@ define <8 x double> @double_to_uint_to_double_mem_v8f64(<8 x double>* %p) { ; CHECK-LABEL: double_to_uint_to_double_mem_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vrndscalepd $11, (%rdi), %zmm0 +; CHECK-NEXT: vcvttpd2udq (%rdi), %ymm0 +; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm0 ; CHECK-NEXT: retq %x = load <8 x double>, <8 x double>* %p %fptoui = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x, <8 x i32> undef, i8 -1, i32 4) @@ -140,7 +151,8 @@ define <8 x double> @double_to_uint_to_double_reg_v8f64(<8 x double> %x) { ; CHECK-LABEL: double_to_uint_to_double_reg_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vrndscalepd $11, %zmm0, %zmm0 +; CHECK-NEXT: vcvttpd2udq %zmm0, %ymm0 +; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm0 ; CHECK-NEXT: retq %fptoui = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x, <8 x i32> undef, i8 -1, i32 4) %uitofp = uitofp <8 x i32> %fptoui to <8 x double> @@ -150,7 +162,9 @@ define <4 x float> @float_to_sint64_to_float_mem_v4f32(<4 x float>* %p) { ; CHECK-LABEL: float_to_sint64_to_float_mem_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundps $11, (%rdi), %xmm0 +; CHECK-NEXT: vcvttps2qq (%rdi), %ymm0 +; CHECK-NEXT: vcvtqq2ps %ymm0, %xmm0 +; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %x = load <4 x float>, <4 x float>* %p %fptosi = tail call <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float> %x, <4 x i64> undef, i8 -1) @@ -161,7 +175,9 @@ define <4 x float> @float_to_sint64_to_float_reg_v4f32(<4 x float> %x) { ; CHECK-LABEL: float_to_sint64_to_float_reg_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundps $11, %xmm0, %xmm0 +; CHECK-NEXT: vcvttps2qq %xmm0, %ymm0 +; CHECK-NEXT: vcvtqq2ps %ymm0, %xmm0 +; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %fptosi = tail call <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float> %x, <4 x i64> undef, i8 -1) %sitofp = sitofp <4 x i64> %fptosi to <4 x float> @@ -171,7 +187,9 @@ define <4 x float> @float_to_uint64_to_float_mem_v4f32(<4 x float>* %p) { ; CHECK-LABEL: float_to_uint64_to_float_mem_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundps $11, (%rdi), %xmm0 +; CHECK-NEXT: vcvttps2uqq (%rdi), %ymm0 +; CHECK-NEXT: vcvtuqq2ps %ymm0, %xmm0 +; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %x = load <4 x float>, <4 x float>* %p %fptoui = tail call <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float> %x, <4 x i64> undef, i8 -1) @@ -182,7 +200,9 @@ define <4 x float> @float_to_uint64_to_float_reg_v4f32(<4 x float> %x) { ; CHECK-LABEL: float_to_uint64_to_float_reg_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundps $11, %xmm0, %xmm0 +; CHECK-NEXT: vcvttps2uqq %xmm0, %ymm0 +; CHECK-NEXT: vcvtuqq2ps %ymm0, %xmm0 +; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %fptoui = tail call <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float> %x, <4 x i64> undef, i8 -1) %uitofp = uitofp <4 x i64> %fptoui to <4 x float> @@ -192,7 +212,8 @@ define <8 x float> @float_to_sint64_to_float_mem_v8f32(<8 x float>* %p) { ; CHECK-LABEL: float_to_sint64_to_float_mem_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundps $11, (%rdi), %ymm0 +; CHECK-NEXT: vcvttps2qq (%rdi), %zmm0 +; CHECK-NEXT: vcvtqq2ps %zmm0, %ymm0 ; CHECK-NEXT: retq %x = load <8 x float>, <8 x float>* %p %fptosi = tail call <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float> %x, <8 x i64> undef, i8 -1, i32 4) @@ -203,7 +224,8 @@ define <8 x float> @float_to_sint64_to_float_reg_v8f32(<8 x float> %x) { ; CHECK-LABEL: float_to_sint64_to_float_reg_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundps $11, %ymm0, %ymm0 +; CHECK-NEXT: vcvttps2qq %ymm0, %zmm0 +; CHECK-NEXT: vcvtqq2ps %zmm0, %ymm0 ; CHECK-NEXT: retq %fptosi = tail call <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float> %x, <8 x i64> undef, i8 -1, i32 4) %sitofp = sitofp <8 x i64> %fptosi to <8 x float> @@ -213,7 +235,8 @@ define <8 x float> @float_to_uint64_to_float_mem_v8f32(<8 x float>* %p) { ; CHECK-LABEL: float_to_uint64_to_float_mem_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundps $11, (%rdi), %ymm0 +; CHECK-NEXT: vcvttps2uqq (%rdi), %zmm0 +; CHECK-NEXT: vcvtuqq2ps %zmm0, %ymm0 ; CHECK-NEXT: retq %x = load <8 x float>, <8 x float>* %p %fptoui = tail call <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float> %x, <8 x i64> undef, i8 -1, i32 4) @@ -224,7 +247,8 @@ define <8 x float> @float_to_uint64_to_float_reg_v8f32(<8 x float> %x) { ; CHECK-LABEL: float_to_uint64_to_float_reg_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundps $11, %ymm0, %ymm0 +; CHECK-NEXT: vcvttps2uqq %ymm0, %zmm0 +; CHECK-NEXT: vcvtuqq2ps %zmm0, %ymm0 ; CHECK-NEXT: retq %fptoui = tail call <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float> %x, <8 x i64> undef, i8 -1, i32 4) %uitofp = uitofp <8 x i64> %fptoui to <8 x float> @@ -234,7 +258,8 @@ define <2 x double> @double_to_sint64_to_double_mem_v2f64(<2 x double>* %p) { ; CHECK-LABEL: double_to_sint64_to_double_mem_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundpd $11, (%rdi), %xmm0 +; CHECK-NEXT: vcvttpd2qq (%rdi), %xmm0 +; CHECK-NEXT: vcvtqq2pd %xmm0, %xmm0 ; CHECK-NEXT: retq %x = load <2 x double>, <2 x double>* %p %fptosi = tail call <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double> %x, <2 x i64> undef, i8 -1) @@ -245,7 +270,8 @@ define <2 x double> @double_to_sint64_to_double_reg_v2f64(<2 x double> %x) { ; CHECK-LABEL: double_to_sint64_to_double_reg_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundpd $11, %xmm0, %xmm0 +; CHECK-NEXT: vcvttpd2qq %xmm0, %xmm0 +; CHECK-NEXT: vcvtqq2pd %xmm0, %xmm0 ; CHECK-NEXT: retq %fptosi = tail call <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double> %x, <2 x i64> undef, i8 -1) %sitofp = sitofp <2 x i64> %fptosi to <2 x double> @@ -255,7 +281,8 @@ define <2 x double> @double_to_uint64_to_double_mem_v2f64(<2 x double>* %p) { ; CHECK-LABEL: double_to_uint64_to_double_mem_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundpd $11, (%rdi), %xmm0 +; CHECK-NEXT: vcvttpd2uqq (%rdi), %xmm0 +; CHECK-NEXT: vcvtuqq2pd %xmm0, %xmm0 ; CHECK-NEXT: retq %x = load <2 x double>, <2 x double>* %p %fptoui = tail call <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double> %x, <2 x i64> undef, i8 -1) @@ -266,7 +293,8 @@ define <2 x double> @double_to_uint64_to_double_reg_v2f64(<2 x double> %x) { ; CHECK-LABEL: double_to_uint64_to_double_reg_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundpd $11, %xmm0, %xmm0 +; CHECK-NEXT: vcvttpd2uqq %xmm0, %xmm0 +; CHECK-NEXT: vcvtuqq2pd %xmm0, %xmm0 ; CHECK-NEXT: retq %fptoui = tail call <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double> %x, <2 x i64> undef, i8 -1) %uitofp = uitofp <2 x i64> %fptoui to <2 x double> @@ -276,7 +304,8 @@ define <4 x double> @double_to_sint64_to_double_mem_v4f64(<4 x double>* %p) { ; CHECK-LABEL: double_to_sint64_to_double_mem_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundpd $11, (%rdi), %ymm0 +; CHECK-NEXT: vcvttpd2qq (%rdi), %ymm0 +; CHECK-NEXT: vcvtqq2pd %ymm0, %ymm0 ; CHECK-NEXT: retq %x = load <4 x double>, <4 x double>* %p %fptosi = tail call <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double> %x, <4 x i64> undef, i8 -1) @@ -287,7 +316,8 @@ define <4 x double> @double_to_sint64_to_double_reg_v4f64(<4 x double> %x) { ; CHECK-LABEL: double_to_sint64_to_double_reg_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundpd $11, %ymm0, %ymm0 +; CHECK-NEXT: vcvttpd2qq %ymm0, %ymm0 +; CHECK-NEXT: vcvtqq2pd %ymm0, %ymm0 ; CHECK-NEXT: retq %fptosi = tail call <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double> %x, <4 x i64> undef, i8 -1) %sitofp = sitofp <4 x i64> %fptosi to <4 x double> @@ -297,7 +327,8 @@ define <4 x double> @double_to_uint64_to_double_mem_v4f64(<4 x double>* %p) { ; CHECK-LABEL: double_to_uint64_to_double_mem_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundpd $11, (%rdi), %ymm0 +; CHECK-NEXT: vcvttpd2uqq (%rdi), %ymm0 +; CHECK-NEXT: vcvtuqq2pd %ymm0, %ymm0 ; CHECK-NEXT: retq %x = load <4 x double>, <4 x double>* %p %fptoui = tail call <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double> %x, <4 x i64> undef, i8 -1) @@ -308,7 +339,8 @@ define <4 x double> @double_to_uint64_to_double_reg_v4f64(<4 x double> %x) { ; CHECK-LABEL: double_to_uint64_to_double_reg_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vroundpd $11, %ymm0, %ymm0 +; CHECK-NEXT: vcvttpd2uqq %ymm0, %ymm0 +; CHECK-NEXT: vcvtuqq2pd %ymm0, %ymm0 ; CHECK-NEXT: retq %fptoui = tail call <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double> %x, <4 x i64> undef, i8 -1) %uitofp = uitofp <4 x i64> %fptoui to <4 x double> @@ -318,7 +350,8 @@ define <8 x double> @double_to_sint64_to_double_mem_v8f64(<8 x double>* %p) { ; CHECK-LABEL: double_to_sint64_to_double_mem_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vrndscalepd $11, (%rdi), %zmm0 +; CHECK-NEXT: vcvttpd2qq (%rdi), %zmm0 +; CHECK-NEXT: vcvtqq2pd %zmm0, %zmm0 ; CHECK-NEXT: retq %x = load <8 x double>, <8 x double>* %p %fptosi = tail call <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double> %x, <8 x i64> undef, i8 -1, i32 4) @@ -329,7 +362,8 @@ define <8 x double> @double_to_sint64_to_double_reg_v8f64(<8 x double> %x) { ; CHECK-LABEL: double_to_sint64_to_double_reg_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vrndscalepd $11, %zmm0, %zmm0 +; CHECK-NEXT: vcvttpd2qq %zmm0, %zmm0 +; CHECK-NEXT: vcvtqq2pd %zmm0, %zmm0 ; CHECK-NEXT: retq %fptosi = tail call <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double> %x, <8 x i64> undef, i8 -1, i32 4) %sitofp = sitofp <8 x i64> %fptosi to <8 x double> @@ -339,7 +373,8 @@ define <8 x double> @double_to_uint64_to_double_mem_v8f64(<8 x double>* %p) { ; CHECK-LABEL: double_to_uint64_to_double_mem_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vrndscalepd $11, (%rdi), %zmm0 +; CHECK-NEXT: vcvttpd2uqq (%rdi), %zmm0 +; CHECK-NEXT: vcvtuqq2pd %zmm0, %zmm0 ; CHECK-NEXT: retq %x = load <8 x double>, <8 x double>* %p %fptoui = tail call <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double> %x, <8 x i64> undef, i8 -1, i32 4) @@ -350,7 +385,8 @@ define <8 x double> @double_to_uint64_to_double_reg_v8f64(<8 x double> %x) { ; CHECK-LABEL: double_to_uint64_to_double_reg_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vrndscalepd $11, %zmm0, %zmm0 +; CHECK-NEXT: vcvttpd2uqq %zmm0, %zmm0 +; CHECK-NEXT: vcvtuqq2pd %zmm0, %zmm0 ; CHECK-NEXT: retq %fptoui = tail call <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double> %x, <8 x i64> undef, i8 -1, i32 4) %uitofp = uitofp <8 x i64> %fptoui to <8 x double> Index: test/CodeGen/X86/mmx-cvt.ll =================================================================== --- test/CodeGen/X86/mmx-cvt.ll +++ test/CodeGen/X86/mmx-cvt.ll @@ -155,7 +155,8 @@ ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp ; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: cvttps2pi %xmm0, %mm0 +; X86-NEXT: cvttps2dq %xmm0, %xmm0 +; X86-NEXT: movdq2q %xmm0, %mm0 ; X86-NEXT: paddd %mm0, %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %ecx @@ -168,7 +169,8 @@ ; ; X64-LABEL: cvtt_v2f32_v2i32: ; X64: # %bb.0: -; X64-NEXT: cvttps2pi %xmm0, %mm0 +; X64-NEXT: cvttps2dq %xmm0, %xmm0 +; X64-NEXT: movdq2q %xmm0, %mm0 ; X64-NEXT: paddd %mm0, %mm0 ; X64-NEXT: movq %mm0, (%rdi) ; X64-NEXT: retq Index: test/CodeGen/X86/sse-cvttp2si.ll =================================================================== --- test/CodeGen/X86/sse-cvttp2si.ll +++ test/CodeGen/X86/sse-cvttp2si.ll @@ -160,12 +160,14 @@ define <4 x float> @float_to_int_to_float_mem_v4f32(<4 x float>* %p) { ; SSE-LABEL: float_to_int_to_float_mem_v4f32: ; SSE: # %bb.0: -; SSE-NEXT: roundps $11, (%rdi), %xmm0 +; SSE-NEXT: cvttps2dq (%rdi), %xmm0 +; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: float_to_int_to_float_mem_v4f32: ; AVX: # %bb.0: -; AVX-NEXT: vroundps $11, (%rdi), %xmm0 +; AVX-NEXT: vcvttps2dq (%rdi), %xmm0 +; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 ; AVX-NEXT: retq %x = load <4 x float>, <4 x float>* %p, align 16 %fptosi = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %x) @@ -176,12 +178,14 @@ define <4 x float> @float_to_int_to_float_reg_v4f32(<4 x float> %x) { ; SSE-LABEL: float_to_int_to_float_reg_v4f32: ; SSE: # %bb.0: -; SSE-NEXT: roundps $11, %xmm0, %xmm0 +; SSE-NEXT: cvttps2dq %xmm0, %xmm0 +; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: float_to_int_to_float_reg_v4f32: ; AVX: # %bb.0: -; AVX-NEXT: vroundps $11, %xmm0, %xmm0 +; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 +; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 ; AVX-NEXT: retq %fptosi = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %x) %sitofp = sitofp <4 x i32> %fptosi to <4 x float>