diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1627,6 +1627,9 @@ setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32, Legal); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i32, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); diff --git a/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll b/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll --- a/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll +++ b/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll @@ -934,11 +934,7 @@ ; ; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i32: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512VL-NEXT: vcvttsd2usi %xmm1, %eax -; AVX512VL-NEXT: vcvttsd2usi %xmm0, %ecx -; AVX512VL-NEXT: vmovd %ecx, %xmm0 -; AVX512VL-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 +; AVX512VL-NEXT: vcvttpd2udq %xmm0, %xmm0 ; AVX512VL-NEXT: ret{{[l|q]}} ; ; AVX512DQ-LABEL: strict_vector_fptoui_v2f64_to_v2i32: @@ -1758,21 +1754,11 @@ ; AVX512VL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VL-NEXT: ret{{[l|q]}} ; +; FIXME: This is an unsafe behavior for strict FP ; AVX512DQ-LABEL: strict_vector_fptoui_v2f64_to_v2i1: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vcvttsd2usi %xmm0, %eax -; AVX512DQ-NEXT: vmovd %eax, %xmm1 -; AVX512DQ-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 -; AVX512DQ-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 -; AVX512DQ-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1 -; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512DQ-NEXT: vcvttsd2usi %xmm2, %ecx -; AVX512DQ-NEXT: vcvttsd2usi %xmm0, %edx -; AVX512DQ-NEXT: vmovd %edx, %xmm0 -; AVX512DQ-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 -; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0 ; AVX512DQ-NEXT: vpslld $31, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 ; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0 @@ -2281,32 +2267,16 @@ ; ; AVX512VL-LABEL: strict_vector_fptoui_v4f32_to_v4i32: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX512VL-NEXT: vcvttss2usi %xmm1, %eax -; AVX512VL-NEXT: vcvttss2usi %xmm0, %ecx -; AVX512VL-NEXT: vmovd %ecx, %xmm1 -; AVX512VL-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512VL-NEXT: vcvttss2usi %xmm2, %eax -; AVX512VL-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 -; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; AVX512VL-NEXT: vcvttss2usi %xmm0, %eax -; AVX512VL-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0 +; AVX512VL-NEXT: vcvttps2udq %xmm0, %xmm0 ; AVX512VL-NEXT: ret{{[l|q]}} ; +; FIXME: This is an unsafe behavior for strict FP ; AVX512DQ-LABEL: strict_vector_fptoui_v4f32_to_v4i32: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX512DQ-NEXT: vcvttss2usi %xmm1, %eax -; AVX512DQ-NEXT: vcvttss2usi %xmm0, %ecx -; AVX512DQ-NEXT: vmovd %ecx, %xmm1 -; AVX512DQ-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 -; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512DQ-NEXT: vcvttss2usi %xmm2, %eax -; AVX512DQ-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 -; AVX512DQ-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; AVX512DQ-NEXT: vcvttss2usi %xmm0, %eax -; AVX512DQ-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0 +; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0 +; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: ret{{[l|q]}} %ret = call <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f32(<4 x float> %a, metadata !"fpexcept.strict") diff --git a/llvm/test/CodeGen/X86/vec-strict-fptoint-256.ll b/llvm/test/CodeGen/X86/vec-strict-fptoint-256.ll --- a/llvm/test/CodeGen/X86/vec-strict-fptoint-256.ll +++ b/llvm/test/CodeGen/X86/vec-strict-fptoint-256.ll @@ -856,33 +856,16 @@ ; ; AVX512VL-LABEL: strict_vector_fptoui_v4f64_to_v4i32: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512VL-NEXT: vcvttsd2usi %xmm1, %eax -; AVX512VL-NEXT: vcvttsd2usi %xmm0, %ecx -; AVX512VL-NEXT: vmovd %ecx, %xmm1 -; AVX512VL-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 -; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX512VL-NEXT: vcvttsd2usi %xmm0, %eax -; AVX512VL-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512VL-NEXT: vcvttsd2usi %xmm0, %eax -; AVX512VL-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0 +; AVX512VL-NEXT: vcvttpd2udq %ymm0, %xmm0 ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: ret{{[l|q]}} ; +; FIXME: This is an unsafe behavior for strict FP ; AVX512DQ-LABEL: strict_vector_fptoui_v4f64_to_v4i32: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512DQ-NEXT: vcvttsd2usi %xmm1, %eax -; AVX512DQ-NEXT: vcvttsd2usi %xmm0, %ecx -; AVX512DQ-NEXT: vmovd %ecx, %xmm1 -; AVX512DQ-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 -; AVX512DQ-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX512DQ-NEXT: vcvttsd2usi %xmm0, %eax -; AVX512DQ-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 -; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512DQ-NEXT: vcvttsd2usi %xmm0, %eax -; AVX512DQ-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0 +; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0 +; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: ret{{[l|q]}} %ret = call <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f64(<4 x double> %a, @@ -1099,58 +1082,15 @@ ; ; AVX512VL-LABEL: strict_vector_fptoui_v8f32_to_v8i32: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] -; AVX512VL-NEXT: vcvttss2usi %xmm2, %eax -; AVX512VL-NEXT: vcvttss2usi %xmm1, %ecx -; AVX512VL-NEXT: vmovd %ecx, %xmm2 -; AVX512VL-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0] -; AVX512VL-NEXT: vcvttss2usi %xmm3, %eax -; AVX512VL-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 -; AVX512VL-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3] -; AVX512VL-NEXT: vcvttss2usi %xmm1, %eax -; AVX512VL-NEXT: vpinsrd $3, %eax, %xmm2, %xmm1 -; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512VL-NEXT: vcvttss2usi %xmm2, %eax -; AVX512VL-NEXT: vcvttss2usi %xmm0, %ecx -; AVX512VL-NEXT: vmovd %ecx, %xmm2 -; AVX512VL-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] -; AVX512VL-NEXT: vcvttss2usi %xmm3, %eax -; AVX512VL-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 -; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; AVX512VL-NEXT: vcvttss2usi %xmm0, %eax -; AVX512VL-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0 -; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vcvttps2udq %ymm0, %ymm0 ; AVX512VL-NEXT: ret{{[l|q]}} ; +; FIXME: This is an unsafe behavior for strict FP ; AVX512DQ-LABEL: strict_vector_fptoui_v8f32_to_v8i32: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] -; AVX512DQ-NEXT: vcvttss2usi %xmm2, %eax -; AVX512DQ-NEXT: vcvttss2usi %xmm1, %ecx -; AVX512DQ-NEXT: vmovd %ecx, %xmm2 -; AVX512DQ-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 -; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0] -; AVX512DQ-NEXT: vcvttss2usi %xmm3, %eax -; AVX512DQ-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 -; AVX512DQ-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3] -; AVX512DQ-NEXT: vcvttss2usi %xmm1, %eax -; AVX512DQ-NEXT: vpinsrd $3, %eax, %xmm2, %xmm1 -; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512DQ-NEXT: vcvttss2usi %xmm2, %eax -; AVX512DQ-NEXT: vcvttss2usi %xmm0, %ecx -; AVX512DQ-NEXT: vmovd %ecx, %xmm2 -; AVX512DQ-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 -; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] -; AVX512DQ-NEXT: vcvttss2usi %xmm3, %eax -; AVX512DQ-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 -; AVX512DQ-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; AVX512DQ-NEXT: vcvttss2usi %xmm0, %eax -; AVX512DQ-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0 -; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0 +; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512DQ-NEXT: ret{{[l|q]}} %ret = call <8 x i32> @llvm.experimental.constrained.fptoui.v8i32.v8f32(<8 x float> %a, metadata !"fpexcept.strict") diff --git a/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll b/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll --- a/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll +++ b/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll @@ -651,30 +651,7 @@ define <8 x i32> @strict_vector_fptoui_v8f64_to_v8i32(<8 x double> %a) #0 { ; CHECK-LABEL: strict_vector_fptoui_v8f64_to_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm1 -; CHECK-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] -; CHECK-NEXT: vcvttsd2usi %xmm2, %eax -; CHECK-NEXT: vcvttsd2usi %xmm1, %ecx -; CHECK-NEXT: vmovd %ecx, %xmm1 -; CHECK-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 -; CHECK-NEXT: vextractf32x4 $3, %zmm0, %xmm2 -; CHECK-NEXT: vcvttsd2usi %xmm2, %eax -; CHECK-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 -; CHECK-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; CHECK-NEXT: vcvttsd2usi %xmm2, %eax -; CHECK-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1 -; CHECK-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; CHECK-NEXT: vcvttsd2usi %xmm2, %eax -; CHECK-NEXT: vcvttsd2usi %xmm0, %ecx -; CHECK-NEXT: vmovd %ecx, %xmm2 -; CHECK-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 -; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 -; CHECK-NEXT: vcvttsd2usi %xmm0, %eax -; CHECK-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 -; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; CHECK-NEXT: vcvttsd2usi %xmm0, %eax -; CHECK-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0 -; CHECK-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; CHECK-NEXT: vcvttpd2udq %zmm0, %ymm0 ; CHECK-NEXT: ret{{[l|q]}} %ret = call <8 x i32> @llvm.experimental.constrained.fptoui.v8i32.v8f64(<8 x double> %a, metadata !"fpexcept.strict") diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll --- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll @@ -4445,14 +4445,10 @@ ; ; AVX512-LABEL: constrained_vector_fptoui_v4i32_v4f32: ; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vcvttss2usi {{.*}}(%rip), %eax -; AVX512-NEXT: vmovd %eax, %xmm0 -; AVX512-NEXT: vcvttss2usi {{.*}}(%rip), %eax -; AVX512-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 -; AVX512-NEXT: vcvttss2usi {{.*}}(%rip), %eax -; AVX512-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 -; AVX512-NEXT: vcvttss2usi {{.*}}(%rip), %eax -; AVX512-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 +; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [4.2E+1,4.3E+1,4.4E+1,4.5E+1] +; AVX512-NEXT: vcvttps2udq %zmm0, %zmm0 +; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq entry: %result = call <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f32( @@ -4990,14 +4986,10 @@ ; ; AVX512-LABEL: constrained_vector_fptoui_v4i32_v4f64: ; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vcvttsd2usi {{.*}}(%rip), %eax -; AVX512-NEXT: vmovd %eax, %xmm0 -; AVX512-NEXT: vcvttsd2usi {{.*}}(%rip), %eax -; AVX512-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 -; AVX512-NEXT: vcvttsd2usi {{.*}}(%rip), %eax -; AVX512-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 -; AVX512-NEXT: vcvttsd2usi {{.*}}(%rip), %eax -; AVX512-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 +; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [4.2100000000000001E+1,4.2200000000000003E+1,4.2299999999999997E+1,4.2399999999999999E+1] +; AVX512-NEXT: vcvttpd2udq %zmm0, %ymm0 +; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq entry: %result = call <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f64(