diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1680,16 +1680,20 @@ setOperationAction(ISD::FCOPYSIGN, VT, Custom); } - for (MVT VT : { MVT::v16i1, MVT::v16i8, MVT::v16i16 }) { + for (MVT VT : { MVT::v16i1, MVT::v16i8 }) { setOperationPromotedToType(ISD::FP_TO_SINT , VT, MVT::v16i32); setOperationPromotedToType(ISD::FP_TO_UINT , VT, MVT::v16i32); setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, VT, MVT::v16i32); setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, VT, MVT::v16i32); } - setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Custom); - setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v16i32, Custom); - setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v16i32, Custom); + + for (MVT VT : { MVT::v16i16, MVT::v16i32 }) { + setOperationAction(ISD::FP_TO_SINT, VT, Custom); + setOperationAction(ISD::FP_TO_UINT, VT, Custom); + setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Custom); + setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Custom); + } + setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Custom); setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i32, Custom); @@ -22829,19 +22833,21 @@ return Res; } - if (VT == MVT::v8i16 && (SrcVT == MVT::v8f32 || SrcVT == MVT::v8f64)) { + // v8f32/v16f32/v8f64->v8i16/v16i16 need to widden first. + if (VT.getVectorElementType() == MVT::i16) { + MVT NVT = VT.changeVectorElementType(MVT::i32); if (IsStrict) { Res = DAG.getNode(IsSigned ? ISD::STRICT_FP_TO_SINT : ISD::STRICT_FP_TO_UINT, - dl, {MVT::v8i32, MVT::Other}, {Chain, Src}); + dl, {NVT, MVT::Other}, {Chain, Src}); Chain = Res.getValue(1); } else { Res = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, - MVT::v8i32, Src); + NVT, Src); } // TODO: Need to add exception check code for strict FP. - Res = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i16, Res); + Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res); if (IsStrict) return DAG.getMergeValues({Res, Chain}, dl); diff --git a/llvm/test/CodeGen/X86/avx512-cvt.ll b/llvm/test/CodeGen/X86/avx512-cvt.ll --- a/llvm/test/CodeGen/X86/avx512-cvt.ll +++ b/llvm/test/CodeGen/X86/avx512-cvt.ll @@ -424,7 +424,7 @@ define <16 x i16> @f32to16us(<16 x float> %f) { ; ALL-LABEL: f32to16us: ; ALL: # %bb.0: -; ALL-NEXT: vcvttps2dq %zmm0, %zmm0 +; ALL-NEXT: vcvttps2udq %zmm0, %zmm0 ; ALL-NEXT: vpmovdw %zmm0, %ymm0 ; ALL-NEXT: retq %res = fptoui <16 x float> %f to <16 x i16> diff --git a/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll --- a/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll @@ -771,8 +771,7 @@ define <16 x i16> @test_s16tof16(<16 x half> %a) { ; CHECK-LABEL: test_s16tof16: ; CHECK: # %bb.0: -; CHECK-NEXT: vcvttph2dq %ymm0, %zmm0 -; CHECK-NEXT: vpmovdw %zmm0, %ymm0 +; CHECK-NEXT: vcvttph2w %ymm0, %ymm0 ; CHECK-NEXT: retq %res = fptosi <16 x half> %a to <16 x i16> ret <16 x i16> %res @@ -781,8 +780,7 @@ define <16 x i16> @test_u16tof16(<16 x half> %a) { ; CHECK-LABEL: test_u16tof16: ; CHECK: # %bb.0: -; CHECK-NEXT: vcvttph2dq %ymm0, %zmm0 -; CHECK-NEXT: vpmovdw %zmm0, %ymm0 +; CHECK-NEXT: vcvttph2uw %ymm0, %ymm0 ; CHECK-NEXT: retq %res = fptoui <16 x half> %a to <16 x i16> ret <16 x i16> %res diff --git a/llvm/test/CodeGen/X86/vec-strict-fptoint-256-fp16.ll b/llvm/test/CodeGen/X86/vec-strict-fptoint-256-fp16.ll --- a/llvm/test/CodeGen/X86/vec-strict-fptoint-256-fp16.ll +++ b/llvm/test/CodeGen/X86/vec-strict-fptoint-256-fp16.ll @@ -59,8 +59,7 @@ define <16 x i16> @strict_vector_fptosi_v16f16_to_v16i16(<16 x half> %a) #0 { ; CHECK-LABEL: strict_vector_fptosi_v16f16_to_v16i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vcvttph2dq %ymm0, %zmm0 -; CHECK-NEXT: vpmovdw %zmm0, %ymm0 +; CHECK-NEXT: vcvttph2w %ymm0, %ymm0 ; CHECK-NEXT: ret{{[l|q]}} %ret = call <16 x i16> @llvm.experimental.constrained.fptosi.v16i16.v16f16(<16 x half> %a, metadata !"fpexcept.strict") #0 @@ -70,8 +69,7 @@ define <16 x i16> @strict_vector_fptoui_v16f16_to_v16i16(<16 x half> %a) #0 { ; CHECK-LABEL: strict_vector_fptoui_v16f16_to_v16i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vcvttph2dq %ymm0, %zmm0 -; CHECK-NEXT: vpmovdw %zmm0, %ymm0 +; CHECK-NEXT: vcvttph2uw %ymm0, %ymm0 ; CHECK-NEXT: ret{{[l|q]}} %ret = call <16 x i16> @llvm.experimental.constrained.fptoui.v16i16.v16f16(<16 x half> %a, metadata !"fpexcept.strict") #0 diff --git a/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll b/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll --- a/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll +++ b/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll @@ -798,7 +798,7 @@ define <16 x i16> @strict_vector_fptoui_v16f32_to_v16i16(<16 x float> %a) #0 { ; CHECK-LABEL: strict_vector_fptoui_v16f32_to_v16i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vcvttps2dq %zmm0, %zmm0 +; CHECK-NEXT: vcvttps2udq %zmm0, %zmm0 ; CHECK-NEXT: vpmovdw %zmm0, %ymm0 ; CHECK-NEXT: ret{{[l|q]}} %ret = call <16 x i16> @llvm.experimental.constrained.fptoui.v16i16.v16f32(<16 x float> %a,