diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1631,13 +1631,16 @@ if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) { // These operations are handled on non-VLX by artificially widening in // isel patterns. - // TODO: Custom widen in lowering on non-VLX and drop the isel patterns? - setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal); - setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, + Subtarget.hasVLX() ? Legal : Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, + Subtarget.hasVLX() ? Legal : Custom); setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom); - setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32, Legal); - setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32, + Subtarget.hasVLX() ? Legal : Custom); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, + Subtarget.hasVLX() ? Legal : Custom); setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i32, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Subtarget.hasVLX() ? Legal : Custom); @@ -1679,10 +1682,14 @@ Subtarget.hasVLX() ? Legal : Custom); setOperationAction(ISD::STRICT_UINT_TO_FP, VT, Subtarget.hasVLX() ? Legal : Custom); - setOperationAction(ISD::FP_TO_SINT, VT, Legal); - setOperationAction(ISD::FP_TO_UINT, VT, Legal); - setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Legal); - setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Legal); + setOperationAction(ISD::FP_TO_SINT, VT, + Subtarget.hasVLX() ? Legal : Custom); + setOperationAction(ISD::FP_TO_UINT, VT, + Subtarget.hasVLX() ? Legal : Custom); + setOperationAction(ISD::STRICT_FP_TO_SINT, VT, + Subtarget.hasVLX() ? Legal : Custom); + setOperationAction(ISD::STRICT_FP_TO_UINT, VT, + Subtarget.hasVLX() ? Legal : Custom); setOperationAction(ISD::MUL, VT, Legal); } } @@ -19919,7 +19926,7 @@ bool IsStrict = Op->isStrictFPOpcode(); bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT || Op.getOpcode() == ISD::STRICT_FP_TO_SINT; - MVT VT = Op.getSimpleValueType(); + MVT VT = Op->getSimpleValueType(0); SDValue Src = Op.getOperand(IsStrict ? 1 : 0); MVT SrcVT = Src.getSimpleValueType(); SDLoc dl(Op); @@ -19935,13 +19942,11 @@ Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI; if (!IsSigned && !Subtarget.hasVLX()) { + assert(Subtarget.useAVX512Regs() && "Unexpected features!"); // Widen to 512-bits. ResVT = MVT::v8i32; TruncVT = MVT::v8i1; - if (IsStrict) - Opc = IsSigned ? ISD::STRICT_FP_TO_SINT : ISD::STRICT_FP_TO_UINT; - else - Opc = IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT; + Opc = Op.getOpcode(); // Need to concat with zero vector for strict fp to avoid spurious // exceptions. // TODO: Should we just do this for non-strict as well? @@ -19967,8 +19972,79 @@ return Res; } - assert(Subtarget.hasDQI() && Subtarget.hasVLX() && "Requires AVX512DQVL!"); + // v8f64->v8i32 is legal, but we need v8i32 to be custom for v8f32. + if (VT == MVT::v8i32 && SrcVT == MVT::v8f64) { + assert(!IsSigned && "Expected unsigned conversion!"); + assert(Subtarget.useAVX512Regs() && "Requires avx512f"); + return Op; + } + + // Widen vXi32 fp_to_uint with avx512f to 512-bit source. + if ((VT == MVT::v4i32 || VT == MVT::v8i32) && + (SrcVT == MVT::v4f64 || SrcVT == MVT::v4f32 || SrcVT == MVT::v8f32)) { + assert(!IsSigned && "Expected unsigned conversion!"); + assert(Subtarget.useAVX512Regs() && !Subtarget.hasVLX() && + "Unexpected features!"); + MVT WideVT = SrcVT == MVT::v4f64 ? MVT::v8f64 : MVT::v16f32; + MVT ResVT = SrcVT == MVT::v4f64 ? MVT::v8i32 : MVT::v16i32; + // Need to concat with zero vector for strict fp to avoid spurious + // exceptions. + // TODO: Should we just do this for non-strict as well? + SDValue Tmp = + IsStrict ? DAG.getConstantFP(0.0, dl, WideVT) : DAG.getUNDEF(WideVT); + Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVT, Tmp, Src, + DAG.getIntPtrConstant(0, dl)); + + SDValue Res, Chain; + if (IsStrict) { + Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, dl, {ResVT, MVT::Other}, + {Op->getOperand(0), Src}); + Chain = Res.getValue(1); + } else { + Res = DAG.getNode(ISD::FP_TO_UINT, dl, ResVT, Src); + } + + Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res, + DAG.getIntPtrConstant(0, dl)); + + if (IsStrict) + return DAG.getMergeValues({Res, Chain}, dl); + return Res; + } + + // Widen vXi64 fp_to_uint/fp_to_sint with avx512dq to 512-bit source. + if ((VT == MVT::v2i64 || VT == MVT::v4i64) && + (SrcVT == MVT::v2f64 || SrcVT == MVT::v4f64 || SrcVT == MVT::v4f32)) { + assert(Subtarget.useAVX512Regs() && Subtarget.hasDQI() && + !Subtarget.hasVLX() && "Unexpected features!"); + MVT WideVT = SrcVT == MVT::v4f32 ? MVT::v8f32 : MVT::v8f64; + // Need to concat with zero vector for strict fp to avoid spurious + // exceptions. + // TODO: Should we just do this for non-strict as well? + SDValue Tmp = + IsStrict ? DAG.getConstantFP(0.0, dl, WideVT) : DAG.getUNDEF(WideVT); + Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVT, Tmp, Src, + DAG.getIntPtrConstant(0, dl)); + + SDValue Res, Chain; + if (IsStrict) { + Res = DAG.getNode(Op.getOpcode(), dl, {MVT::v8i64, MVT::Other}, + {Op->getOperand(0), Src}); + Chain = Res.getValue(1); + } else { + Res = DAG.getNode(Op.getOpcode(), dl, MVT::v8i64, Src); + } + + Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res, + DAG.getIntPtrConstant(0, dl)); + + if (IsStrict) + return DAG.getMergeValues({Res, Chain}, dl); + return Res; + } + if (VT == MVT::v2i64 && SrcVT == MVT::v2f32) { + assert(Subtarget.hasDQI() && Subtarget.hasVLX() && "Requires AVX512DQVL"); SDValue Tmp = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src, DAG.getUNDEF(MVT::v2f32)); if (IsStrict) { diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -8367,23 +8367,6 @@ (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>; } -let Predicates = [HasAVX512, NoVLX] in { -def : Pat<(v8i32 (X86any_cvttp2ui (v8f32 VR256X:$src1))), - (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr - (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), - VR256X:$src1, sub_ymm)))), sub_ymm)>; - -def : Pat<(v4i32 (X86any_cvttp2ui (v4f32 VR128X:$src1))), - (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr - (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), - VR128X:$src1, sub_xmm)))), sub_xmm)>; - -def : Pat<(v4i32 (X86any_cvttp2ui (v4f64 VR256X:$src1))), - (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr - (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), - VR256X:$src1, sub_ymm)))), sub_xmm)>; -} - let Predicates = [HasVLX] in { def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), (VCVTDQ2PDZ128rm addr:$src)>; @@ -8468,38 +8451,6 @@ (VCVTUQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>; } -let Predicates = [HasDQI, NoVLX] in { -def : Pat<(v2i64 (X86any_cvttp2si (v2f64 VR128X:$src1))), - (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr - (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), - VR128X:$src1, sub_xmm)))), sub_xmm)>; - -def : Pat<(v4i64 (X86any_cvttp2si (v4f32 VR128X:$src1))), - (EXTRACT_SUBREG (v8i64 (VCVTTPS2QQZrr - (v8f32 (INSERT_SUBREG (IMPLICIT_DEF), - VR128X:$src1, sub_xmm)))), sub_ymm)>; - -def : Pat<(v4i64 (X86any_cvttp2si (v4f64 VR256X:$src1))), - (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr - (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), - VR256X:$src1, sub_ymm)))), sub_ymm)>; - -def : Pat<(v2i64 (X86any_cvttp2ui (v2f64 VR128X:$src1))), - (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr - (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), - VR128X:$src1, sub_xmm)))), sub_xmm)>; - -def : Pat<(v4i64 (X86any_cvttp2ui (v4f32 VR128X:$src1))), - (EXTRACT_SUBREG (v8i64 (VCVTTPS2UQQZrr - (v8f32 (INSERT_SUBREG (IMPLICIT_DEF), - VR128X:$src1, sub_xmm)))), sub_ymm)>; - -def : Pat<(v4i64 (X86any_cvttp2ui (v4f64 VR256X:$src1))), - (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr - (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), - VR256X:$src1, sub_ymm)))), sub_ymm)>; -} - //===----------------------------------------------------------------------===// // Half precision conversion instructions //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1398,6 +1398,7 @@ { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 }, { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f64, 1 }, { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f32, 1 }, + { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f64, 1 }, { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f64, 2 }, { ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f64, 2 }, { ISD::FP_TO_UINT, MVT::v16i32, MVT::v16f32, 1 }, diff --git a/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll b/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll --- a/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll +++ b/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll @@ -190,10 +190,9 @@ ; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX512VL-64-NEXT: retq ; -; FIXME: This is an unsafe behavior for strict FP ; AVX512DQ-LABEL: strict_vector_fptosi_v2f64_to_v2i64: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0 ; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512DQ-NEXT: vzeroupper @@ -501,10 +500,9 @@ ; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX512VL-64-NEXT: retq ; -; FIXME: This is an unsafe behavior for strict FP ; AVX512DQ-LABEL: strict_vector_fptoui_v2f64_to_v2i64: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0 ; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512DQ-NEXT: vzeroupper @@ -2662,10 +2660,9 @@ ; AVX-64-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0 ; AVX-64-NEXT: retq ; -; FIXME: This is an unsafe behavior for strict FP ; AVX512F-LABEL: strict_vector_fptoui_v4f32_to_v4i32: ; AVX512F: # %bb.0: -; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512F-NEXT: vmovaps %xmm0, %xmm0 ; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512F-NEXT: vzeroupper @@ -2676,10 +2673,9 @@ ; AVX512VL-NEXT: vcvttps2udq %xmm0, %xmm0 ; AVX512VL-NEXT: ret{{[l|q]}} ; -; FIXME: This is an unsafe behavior for strict FP ; AVX512DQ-LABEL: strict_vector_fptoui_v4f32_to_v4i32: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0 ; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512DQ-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/vec-strict-fptoint-256.ll b/llvm/test/CodeGen/X86/vec-strict-fptoint-256.ll --- a/llvm/test/CodeGen/X86/vec-strict-fptoint-256.ll +++ b/llvm/test/CodeGen/X86/vec-strict-fptoint-256.ll @@ -197,10 +197,9 @@ ; AVX512VL-64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX512VL-64-NEXT: retq ; -; FIXME: This is an unsafe behavior for strict FP ; AVX512DQ-LABEL: strict_vector_fptosi_v4f64_to_v4i64: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0 ; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512DQ-NEXT: ret{{[l|q]}} @@ -569,10 +568,9 @@ ; AVX512VL-64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX512VL-64-NEXT: retq ; -; FIXME: This is an unsafe behavior for strict FP ; AVX512DQ-LABEL: strict_vector_fptoui_v4f64_to_v4i64: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0 ; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512DQ-NEXT: ret{{[l|q]}} @@ -746,10 +744,9 @@ ; AVX512VL-64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX512VL-64-NEXT: retq ; -; FIXME: This is an unsafe behavior for strict FP ; AVX512DQ-LABEL: strict_vector_fptosi_v4f32_to_v4i64: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0 ; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512DQ-NEXT: ret{{[l|q]}} @@ -1118,10 +1115,9 @@ ; AVX512VL-64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX512VL-64-NEXT: retq ; -; FIXME: This is an unsafe behavior for strict FP ; AVX512DQ-LABEL: strict_vector_fptoui_v4f32_to_v4i64: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0 ; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512DQ-NEXT: ret{{[l|q]}} @@ -1195,10 +1191,9 @@ ; AVX-64-NEXT: vzeroupper ; AVX-64-NEXT: retq ; -; FIXME: This is an unsafe behavior for strict FP ; AVX512F-LABEL: strict_vector_fptoui_v4f64_to_v4i32: ; AVX512F: # %bb.0: -; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vmovaps %ymm0, %ymm0 ; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512F-NEXT: vzeroupper @@ -1210,10 +1205,9 @@ ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: ret{{[l|q]}} ; -; FIXME: This is an unsafe behavior for strict FP ; AVX512DQ-LABEL: strict_vector_fptoui_v4f64_to_v4i32: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0 ; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512DQ-NEXT: vzeroupper @@ -1472,10 +1466,9 @@ ; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX-64-NEXT: retq ; -; FIXME: This is an unsafe behavior for strict FP ; AVX512F-LABEL: strict_vector_fptoui_v8f32_to_v8i32: ; AVX512F: # %bb.0: -; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vmovaps %ymm0, %ymm0 ; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512F-NEXT: ret{{[l|q]}} @@ -1485,10 +1478,9 @@ ; AVX512VL-NEXT: vcvttps2udq %ymm0, %ymm0 ; AVX512VL-NEXT: ret{{[l|q]}} ; -; FIXME: This is an unsafe behavior for strict FP ; AVX512DQ-LABEL: strict_vector_fptoui_v8f32_to_v8i32: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0 ; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512DQ-NEXT: ret{{[l|q]}}