Index: llvm/lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -583,7 +583,8 @@ // FIXME: Temporary hack to prevent strict floating point nodes from // folding into masked operations illegally. if (U == Root && Root->getOpcode() == ISD::VSELECT && - N.getOpcode() != ISD::LOAD && N.getOpcode() != X86ISD::VBROADCAST_LOAD) + N.getOpcode() != ISD::LOAD && N.getOpcode() != X86ISD::VBROADCAST_LOAD + && N->getFlags().hasFPExcept()) return false; if (N.getOpcode() != ISD::LOAD) @@ -833,10 +834,12 @@ case ISD::FP_TO_UINT: NewOpc = X86ISD::CVTTP2UI; break; } SDValue Res; - if (N->isStrictFPOpcode()) + if (N->isStrictFPOpcode()) { + SDNodeFlags Flags = N->getFlags(); Res = CurDAG->getNode(NewOpc, SDLoc(N), {N->getValueType(0), MVT::Other}, - {N->getOperand(0), N->getOperand(1)}); + {N->getOperand(0), N->getOperand(1)}, Flags); + } else Res = CurDAG->getNode(NewOpc, SDLoc(N), {N->getValueType(0), MVT::Other}, @@ -5246,13 +5249,6 @@ SelectCode(Res.getNode()); return; } - case ISD::STRICT_FP_TO_SINT: - case ISD::STRICT_FP_TO_UINT: - // FIXME: Remove when we have isel patterns for strict versions of these - // nodes. - if (!TLI->isStrictFPEnabled()) - CurDAG->mutateStrictFPToFP(Node); - break; } SelectCode(Node); Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -19771,8 +19771,9 @@ Src, DAG.getIntPtrConstant(0, dl)); SDValue Res, Chain; if (IsStrict) { + SDNodeFlags Flags = Op->getFlags(); Res = DAG.getNode(Opc, dl, {ResVT, MVT::Other}, - {Op.getOperand(0), Src}); + {Op.getOperand(0), Src}, Flags); Chain = Res.getValue(1); } else Res = DAG.getNode(Opc, dl, ResVT, Src); @@ -19786,8 +19787,10 @@ SDValue Res, Chain; unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI; if (IsStrict) { + SDNodeFlags Flags = Op->getFlags(); Res = - DAG.getNode(Opc, dl, {ResVT, MVT::Other}, {Op->getOperand(0), Src}); + DAG.getNode(Opc, dl, {ResVT, MVT::Other}, + {Op->getOperand(0), Src}, Flags); Chain = Res.getValue(1); } else Res = DAG.getNode(Opc, dl, {ResVT, MVT::Other}, @@ -19807,7 +19810,8 @@ unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI; SDValue Res, Chain; if (IsStrict) { - Res = DAG.getNode(Opc, dl, {VT, MVT::Other}, {Op->getOperand(0), Tmp}); + SDNodeFlags Flags = Op->getFlags(); + Res = DAG.getNode(Opc, dl, {VT, MVT::Other}, {Op->getOperand(0), Tmp}, Flags); Chain = Res.getValue(1); return DAG.getMergeValues({Res, Chain}, dl); } @@ -28671,6 +28675,7 @@ SDValue Res; SDValue Chain; if (IsStrict) { + SDNodeFlags Flags = N->getFlags(); Res = DAG.getNode(Opc, dl, {MVT::v4i32, MVT::Other}, {N->getOperand(0), Src}); Chain = Res.getValue(1); Index: llvm/test/CodeGen/X86/avx512-intrinsics.ll =================================================================== --- llvm/test/CodeGen/X86/avx512-intrinsics.ll +++ llvm/test/CodeGen/X86/avx512-intrinsics.ll @@ -4322,21 +4322,17 @@ define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) { ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_512: ; X64: # %bb.0: -; X64-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vcvttpd2dq %zmm0, %ymm2 -; X64-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1} +; X64-NEXT: vcvttpd2dq %zmm0, %ymm1 {%k1} ; X64-NEXT: vcvttpd2dq {sae}, %zmm0, %ymm0 ; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq ; ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_512: ; X86: # %bb.0: -; X86-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vcvttpd2dq %zmm0, %ymm2 -; X86-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1} +; X86-NEXT: vcvttpd2dq %zmm0, %ymm1 {%k1} ; X86-NEXT: vcvttpd2dq {sae}, %zmm0, %ymm0 ; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ; X86-NEXT: retl @@ -4377,21 +4373,17 @@ define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) { ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_512: ; X64: # %bb.0: -; X64-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vcvttpd2udq %zmm0, %ymm2 -; X64-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1} +; X64-NEXT: vcvttpd2udq %zmm0, %ymm1 {%k1} ; X64-NEXT: vcvttpd2udq {sae}, %zmm0, %ymm0 ; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq ; ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_512: ; X86: # %bb.0: -; X86-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vcvttpd2udq %zmm0, %ymm2 -; X86-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1} +; X86-NEXT: vcvttpd2udq %zmm0, %ymm1 {%k1} ; X86-NEXT: vcvttpd2udq {sae}, %zmm0, %ymm0 ; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ; X86-NEXT: retl @@ -4407,8 +4399,7 @@ ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_512: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vcvttps2dq %zmm0, %zmm2 -; X64-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1} +; X64-NEXT: vcvttps2dq %zmm0, %zmm1 {%k1} ; X64-NEXT: vcvttps2dq {sae}, %zmm0, %zmm0 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ; X64-NEXT: retq @@ -4416,8 +4407,7 @@ ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_512: ; X86: # %bb.0: ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 -; X86-NEXT: vcvttps2dq %zmm0, %zmm2 -; X86-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1} +; X86-NEXT: vcvttps2dq %zmm0, %zmm1 {%k1} ; X86-NEXT: vcvttps2dq {sae}, %zmm0, %zmm0 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ; X86-NEXT: retl @@ -4433,8 +4423,7 @@ ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_512: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vcvttps2udq %zmm0, %zmm2 -; X64-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1} +; X64-NEXT: vcvttps2udq %zmm0, %zmm1 {%k1} ; X64-NEXT: vcvttps2udq {sae}, %zmm0, %zmm0 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ; X64-NEXT: retq @@ -4442,8 +4431,7 @@ ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_512: ; X86: # %bb.0: ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 -; X86-NEXT: vcvttps2udq %zmm0, %zmm2 -; X86-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1} +; X86-NEXT: vcvttps2udq %zmm0, %zmm1 {%k1} ; X86-NEXT: vcvttps2udq {sae}, %zmm0, %zmm0 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ; X86-NEXT: retl Index: llvm/test/CodeGen/X86/avx512dq-intrinsics.ll =================================================================== --- llvm/test/CodeGen/X86/avx512dq-intrinsics.ll +++ llvm/test/CodeGen/X86/avx512dq-intrinsics.ll @@ -286,8 +286,7 @@ ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_512: ; X86: # %bb.0: ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vcvttpd2qq %zmm0, %zmm2 # encoding: [0x62,0xf1,0xfd,0x48,0x7a,0xd0] -; X86-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x6f,0xca] +; X86-NEXT: vcvttpd2qq %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x7a,0xc8] ; X86-NEXT: vcvttpd2qq {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x18,0x7a,0xc0] ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] ; X86-NEXT: retl # encoding: [0xc3] @@ -295,8 +294,7 @@ ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_512: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vcvttpd2qq %zmm0, %zmm2 # encoding: [0x62,0xf1,0xfd,0x48,0x7a,0xd0] -; X64-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x6f,0xca] +; X64-NEXT: vcvttpd2qq %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x7a,0xc8] ; X64-NEXT: vcvttpd2qq {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x18,0x7a,0xc0] ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] ; X64-NEXT: retq # encoding: [0xc3] @@ -312,8 +310,7 @@ ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_512: ; X86: # %bb.0: ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vcvttpd2uqq %zmm0, %zmm2 # encoding: [0x62,0xf1,0xfd,0x48,0x78,0xd0] -; X86-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x6f,0xca] +; X86-NEXT: vcvttpd2uqq %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x78,0xc8] ; X86-NEXT: vcvttpd2uqq {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x18,0x78,0xc0] ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] ; X86-NEXT: retl # encoding: [0xc3] @@ -321,8 +318,7 @@ ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_512: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vcvttpd2uqq %zmm0, %zmm2 # encoding: [0x62,0xf1,0xfd,0x48,0x78,0xd0] -; X64-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x6f,0xca] +; X64-NEXT: vcvttpd2uqq %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x78,0xc8] ; X64-NEXT: vcvttpd2uqq {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x18,0x78,0xc0] ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] ; X64-NEXT: retq # encoding: [0xc3] @@ -338,8 +334,7 @@ ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_512: ; X86: # %bb.0: ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vcvttps2qq %ymm0, %zmm2 # encoding: [0x62,0xf1,0x7d,0x48,0x7a,0xd0] -; X86-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x6f,0xca] +; X86-NEXT: vcvttps2qq %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x7a,0xc8] ; X86-NEXT: vcvttps2qq {sae}, %ymm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x7a,0xc0] ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] ; X86-NEXT: retl # encoding: [0xc3] @@ -347,8 +342,7 @@ ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_512: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vcvttps2qq %ymm0, %zmm2 # encoding: [0x62,0xf1,0x7d,0x48,0x7a,0xd0] -; X64-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x6f,0xca] +; X64-NEXT: vcvttps2qq %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x7a,0xc8] ; X64-NEXT: vcvttps2qq {sae}, %ymm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x7a,0xc0] ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] ; X64-NEXT: retq # encoding: [0xc3] @@ -364,8 +358,7 @@ ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_512: ; X86: # %bb.0: ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vcvttps2uqq %ymm0, %zmm2 # encoding: [0x62,0xf1,0x7d,0x48,0x78,0xd0] -; X86-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x6f,0xca] +; X86-NEXT: vcvttps2uqq %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x78,0xc8] ; X86-NEXT: vcvttps2uqq {sae}, %ymm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x78,0xc0] ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] ; X86-NEXT: retl # encoding: [0xc3] @@ -373,8 +366,7 @@ ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_512: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vcvttps2uqq %ymm0, %zmm2 # encoding: [0x62,0xf1,0x7d,0x48,0x78,0xd0] -; X64-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x6f,0xca] +; X64-NEXT: vcvttps2uqq %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x78,0xc8] ; X64-NEXT: vcvttps2uqq {sae}, %ymm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x78,0xc0] ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] ; X64-NEXT: retq # encoding: [0xc3] Index: llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll =================================================================== --- llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll +++ llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll @@ -735,16 +735,14 @@ ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vcvttps2qq (%eax), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x08] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1] +; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vcvttps2qq (%rdi), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x0f] -; X64-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1] +; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> @@ -756,16 +754,14 @@ ; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vcvttps2qq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x00] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0] +; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vcvttps2qq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x07] -; X64-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0] +; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> @@ -795,16 +791,14 @@ ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load_2: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vcvttps2qq (%eax), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x08] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1] +; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load_2: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vcvttps2qq (%rdi), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x0f] -; X64-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1] +; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> @@ -816,16 +810,14 @@ ; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_2: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vcvttps2qq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x00] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0] +; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_2: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vcvttps2qq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x07] -; X64-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0] +; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> @@ -853,16 +845,14 @@ ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load_3: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vcvttps2qq (%eax), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x08] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1] +; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load_3: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vcvttps2qq (%rdi), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x0f] -; X64-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1] +; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <4 x float>, <4 x float>* %p %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> %passthru, i8 %mask) @@ -873,16 +863,14 @@ ; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_3: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vcvttps2qq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x00] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0] +; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_3: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vcvttps2qq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x07] -; X64-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0] +; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <4 x float>, <4 x float>* %p %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> zeroinitializer, i8 %mask) @@ -1035,16 +1023,14 @@ ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vcvttps2uqq (%eax), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x08] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1] +; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vcvttps2uqq (%rdi), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x0f] -; X64-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1] +; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> @@ -1056,16 +1042,14 @@ ; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vcvttps2uqq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x00] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0] +; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x07] -; X64-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0] +; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> @@ -1095,16 +1079,14 @@ ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_2: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vcvttps2uqq (%eax), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x08] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1] +; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_2: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vcvttps2uqq (%rdi), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x0f] -; X64-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1] +; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> @@ -1116,16 +1098,14 @@ ; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_2: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vcvttps2uqq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x00] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0] +; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_2: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x07] -; X64-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0] +; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> @@ -1153,16 +1133,14 @@ ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_3: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vcvttps2uqq (%eax), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x08] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1] +; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_3: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vcvttps2uqq (%rdi), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x0f] -; X64-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1] +; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <4 x float>, <4 x float>* %p %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> %passthru, i8 %mask) @@ -1173,16 +1151,14 @@ ; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_3: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vcvttps2uqq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x00] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0] +; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_3: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x07] -; X64-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0] +; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <4 x float>, <4 x float>* %p %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> zeroinitializer, i8 %mask) Index: llvm/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll =================================================================== --- llvm/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll +++ llvm/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll @@ -682,17 +682,15 @@ ; X86-LABEL: test_mm256_mask_cvttpd_epi32: ; X86: # %bb.0: # %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-NEXT: vcvttpd2dq %ymm1, %xmm1 ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} +; X86-NEXT: vcvttpd2dq %ymm1, %xmm0 {%k1} ; X86-NEXT: vzeroupper ; X86-NEXT: retl ; ; X64-LABEL: test_mm256_mask_cvttpd_epi32: ; X64: # %bb.0: # %entry -; X64-NEXT: vcvttpd2dq %ymm1, %xmm1 ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} +; X64-NEXT: vcvttpd2dq %ymm1, %xmm0 {%k1} ; X64-NEXT: vzeroupper ; X64-NEXT: retq entry: @@ -709,17 +707,15 @@ ; X86-LABEL: test_mm256_maskz_cvttpd_epi32: ; X86: # %bb.0: # %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-NEXT: vcvttpd2dq %ymm0, %xmm0 ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; X86-NEXT: vcvttpd2dq %ymm0, %xmm0 {%k1} {z} ; X86-NEXT: vzeroupper ; X86-NEXT: retl ; ; X64-LABEL: test_mm256_maskz_cvttpd_epi32: ; X64: # %bb.0: # %entry -; X64-NEXT: vcvttpd2dq %ymm0, %xmm0 ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; X64-NEXT: vcvttpd2dq %ymm0, %xmm0 {%k1} {z} ; X64-NEXT: vzeroupper ; X64-NEXT: retq entry: @@ -798,16 +794,14 @@ ; X86: # %bb.0: # %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vcvttpd2udq %ymm1, %xmm1 -; X86-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} +; X86-NEXT: vcvttpd2udq %ymm1, %xmm0 {%k1} ; X86-NEXT: vzeroupper ; X86-NEXT: retl ; ; X64-LABEL: test_mm256_mask_cvttpd_epu32: ; X64: # %bb.0: # %entry ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vcvttpd2udq %ymm1, %xmm1 -; X64-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} +; X64-NEXT: vcvttpd2udq %ymm1, %xmm0 {%k1} ; X64-NEXT: vzeroupper ; X64-NEXT: retq entry: @@ -822,16 +816,14 @@ ; X86: # %bb.0: # %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vcvttpd2udq %ymm0, %xmm0 -; X86-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; X86-NEXT: vcvttpd2udq %ymm0, %xmm0 {%k1} {z} ; X86-NEXT: vzeroupper ; X86-NEXT: retl ; ; X64-LABEL: test_mm256_maskz_cvttpd_epu32: ; X64: # %bb.0: # %entry ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vcvttpd2udq %ymm0, %xmm0 -; X64-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; X64-NEXT: vcvttpd2udq %ymm0, %xmm0 {%k1} {z} ; X64-NEXT: vzeroupper ; X64-NEXT: retq entry: @@ -844,16 +836,14 @@ ; X86-LABEL: test_mm_mask_cvttps_epi32: ; X86: # %bb.0: # %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-NEXT: vcvttps2dq %xmm1, %xmm1 ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} +; X86-NEXT: vcvttps2dq %xmm1, %xmm0 {%k1} ; X86-NEXT: retl ; ; X64-LABEL: test_mm_mask_cvttps_epi32: ; X64: # %bb.0: # %entry -; X64-NEXT: vcvttps2dq %xmm1, %xmm1 ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} +; X64-NEXT: vcvttps2dq %xmm1, %xmm0 {%k1} ; X64-NEXT: retq entry: %0 = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %__A) #8 @@ -869,16 +859,14 @@ ; X86-LABEL: test_mm_maskz_cvttps_epi32: ; X86: # %bb.0: # %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-NEXT: vcvttps2dq %xmm0, %xmm0 ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; X86-NEXT: vcvttps2dq %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl ; ; X64-LABEL: test_mm_maskz_cvttps_epi32: ; X64: # %bb.0: # %entry -; X64-NEXT: vcvttps2dq %xmm0, %xmm0 ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; X64-NEXT: vcvttps2dq %xmm0, %xmm0 {%k1} {z} ; X64-NEXT: retq entry: %0 = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %__A) #8 @@ -893,16 +881,14 @@ ; X86-LABEL: test_mm256_mask_cvttps_epi32: ; X86: # %bb.0: # %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-NEXT: vcvttps2dq %ymm1, %ymm1 ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1} +; X86-NEXT: vcvttps2dq %ymm1, %ymm0 {%k1} ; X86-NEXT: retl ; ; X64-LABEL: test_mm256_mask_cvttps_epi32: ; X64: # %bb.0: # %entry -; X64-NEXT: vcvttps2dq %ymm1, %ymm1 ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1} +; X64-NEXT: vcvttps2dq %ymm1, %ymm0 {%k1} ; X64-NEXT: retq entry: %0 = tail call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %__A) #8 @@ -917,16 +903,14 @@ ; X86-LABEL: test_mm256_maskz_cvttps_epi32: ; X86: # %bb.0: # %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-NEXT: vcvttps2dq %ymm0, %ymm0 ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X86-NEXT: vcvttps2dq %ymm0, %ymm0 {%k1} {z} ; X86-NEXT: retl ; ; X64-LABEL: test_mm256_maskz_cvttps_epi32: ; X64: # %bb.0: # %entry -; X64-NEXT: vcvttps2dq %ymm0, %ymm0 ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X64-NEXT: vcvttps2dq %ymm0, %ymm0 {%k1} {z} ; X64-NEXT: retq entry: %0 = tail call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %__A) #8 @@ -952,15 +936,13 @@ ; X86: # %bb.0: # %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vcvttps2udq %xmm1, %xmm1 -; X86-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} +; X86-NEXT: vcvttps2udq %xmm1, %xmm0 {%k1} ; X86-NEXT: retl ; ; X64-LABEL: test_mm_mask_cvttps_epu32: ; X64: # %bb.0: # %entry ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vcvttps2udq %xmm1, %xmm1 -; X64-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} +; X64-NEXT: vcvttps2udq %xmm1, %xmm0 {%k1} ; X64-NEXT: retq entry: %0 = bitcast <2 x i64> %__W to <4 x i32> @@ -974,15 +956,13 @@ ; X86: # %bb.0: # %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vcvttps2udq %xmm0, %xmm0 -; X86-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; X86-NEXT: vcvttps2udq %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl ; ; X64-LABEL: test_mm_maskz_cvttps_epu32: ; X64: # %bb.0: # %entry ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vcvttps2udq %xmm0, %xmm0 -; X64-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; X64-NEXT: vcvttps2udq %xmm0, %xmm0 {%k1} {z} ; X64-NEXT: retq entry: %0 = tail call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %__A, <4 x i32> zeroinitializer, i8 %__U) #8 @@ -1006,15 +986,13 @@ ; X86: # %bb.0: # %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vcvttps2udq %ymm1, %ymm1 -; X86-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1} +; X86-NEXT: vcvttps2udq %ymm1, %ymm0 {%k1} ; X86-NEXT: retl ; ; X64-LABEL: test_mm256_mask_cvttps_epu32: ; X64: # %bb.0: # %entry ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vcvttps2udq %ymm1, %ymm1 -; X64-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1} +; X64-NEXT: vcvttps2udq %ymm1, %ymm0 {%k1} ; X64-NEXT: retq entry: %0 = bitcast <4 x i64> %__W to <8 x i32> @@ -1028,15 +1006,13 @@ ; X86: # %bb.0: # %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vcvttps2udq %ymm0, %ymm0 -; X86-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X86-NEXT: vcvttps2udq %ymm0, %ymm0 {%k1} {z} ; X86-NEXT: retl ; ; X64-LABEL: test_mm256_maskz_cvttps_epu32: ; X64: # %bb.0: # %entry ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vcvttps2udq %ymm0, %ymm0 -; X64-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X64-NEXT: vcvttps2udq %ymm0, %ymm0 {%k1} {z} ; X64-NEXT: retq entry: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %__A, <8 x i32> zeroinitializer, i8 %__U) #8 Index: llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll =================================================================== --- llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll +++ llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll @@ -2394,4 +2394,187 @@ ret <4 x i1> %ret } +; This testcase is mainly to make sure SDNode preserve the FPexcept information +define <4 x i32> @fun(<4 x i1> %__U, <4 x float> %__A) +; SSE-32-LABEL: fun: +; SSE-32: # %bb.0: # %entry +; SSE-32-NEXT: pslld $31, %xmm0 +; SSE-32-NEXT: psrad $31, %xmm0 +; SSE-32-NEXT: movaps %xmm1, %xmm3 +; SSE-32-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1],xmm1[2,3] +; SSE-32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; SSE-32-NEXT: xorl %eax, %eax +; SSE-32-NEXT: ucomiss %xmm2, %xmm3 +; SSE-32-NEXT: setae %al +; SSE-32-NEXT: shll $31, %eax +; SSE-32-NEXT: movaps %xmm3, %xmm4 +; SSE-32-NEXT: cmpltss %xmm2, %xmm4 +; SSE-32-NEXT: andnps %xmm2, %xmm4 +; SSE-32-NEXT: subss %xmm4, %xmm3 +; SSE-32-NEXT: cvttss2si %xmm3, %ecx +; SSE-32-NEXT: xorl %eax, %ecx +; SSE-32-NEXT: movd %ecx, %xmm4 +; SSE-32-NEXT: movaps %xmm1, %xmm3 +; SSE-32-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1] +; SSE-32-NEXT: xorl %eax, %eax +; SSE-32-NEXT: ucomiss %xmm2, %xmm3 +; SSE-32-NEXT: setae %al +; SSE-32-NEXT: shll $31, %eax +; SSE-32-NEXT: movaps %xmm3, %xmm5 +; SSE-32-NEXT: cmpltss %xmm2, %xmm5 +; SSE-32-NEXT: andnps %xmm2, %xmm5 +; SSE-32-NEXT: subss %xmm5, %xmm3 +; SSE-32-NEXT: cvttss2si %xmm3, %ecx +; SSE-32-NEXT: xorl %eax, %ecx +; SSE-32-NEXT: movd %ecx, %xmm3 +; SSE-32-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] +; SSE-32-NEXT: xorl %eax, %eax +; SSE-32-NEXT: ucomiss %xmm2, %xmm1 +; SSE-32-NEXT: setae %al +; SSE-32-NEXT: shll $31, %eax +; SSE-32-NEXT: movaps %xmm1, %xmm4 +; SSE-32-NEXT: cmpltss %xmm2, %xmm4 +; SSE-32-NEXT: andnps %xmm2, %xmm4 +; SSE-32-NEXT: movaps %xmm1, %xmm5 +; SSE-32-NEXT: subss %xmm4, %xmm5 +; SSE-32-NEXT: cvttss2si %xmm5, %ecx +; SSE-32-NEXT: xorl %eax, %ecx +; SSE-32-NEXT: movd %ecx, %xmm4 +; SSE-32-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] +; SSE-32-NEXT: xorl %eax, %eax +; SSE-32-NEXT: ucomiss %xmm2, %xmm1 +; SSE-32-NEXT: setae %al +; SSE-32-NEXT: shll $31, %eax +; SSE-32-NEXT: movaps %xmm1, %xmm5 +; SSE-32-NEXT: cmpltss %xmm2, %xmm5 +; SSE-32-NEXT: andnps %xmm2, %xmm5 +; SSE-32-NEXT: subss %xmm5, %xmm1 +; SSE-32-NEXT: cvttss2si %xmm1, %ecx +; SSE-32-NEXT: xorl %eax, %ecx +; SSE-32-NEXT: movd %ecx, %xmm1 +; SSE-32-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] +; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm3[0] +; SSE-32-NEXT: pand %xmm4, %xmm0 +; SSE-32-NEXT: retl +; +; SSE-64-LABEL: fun: +; SSE-64: # %bb.0: # %entry +; SSE-64-NEXT: pslld $31, %xmm0 +; SSE-64-NEXT: psrad $31, %xmm0 +; SSE-64-NEXT: movaps %xmm1, %xmm2 +; SSE-64-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1],xmm1[2,3] +; SSE-64-NEXT: cvttss2si %xmm2, %rax +; SSE-64-NEXT: movd %eax, %xmm2 +; SSE-64-NEXT: movaps %xmm1, %xmm3 +; SSE-64-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1] +; SSE-64-NEXT: cvttss2si %xmm3, %rax +; SSE-64-NEXT: movd %eax, %xmm3 +; SSE-64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] +; SSE-64-NEXT: cvttss2si %xmm1, %rax +; SSE-64-NEXT: movd %eax, %xmm2 +; SSE-64-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] +; SSE-64-NEXT: cvttss2si %xmm1, %rax +; SSE-64-NEXT: movd %eax, %xmm1 +; SSE-64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; SSE-64-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; SSE-64-NEXT: pand %xmm2, %xmm0 +; SSE-64-NEXT: retq +; +; AVX-32-LABEL: fun: +; AVX-32: # %bb.0: # %entry +; AVX-32-NEXT: pushl %ebp +; AVX-32-NEXT: .cfi_def_cfa_offset 8 +; AVX-32-NEXT: .cfi_offset %ebp, -8 +; AVX-32-NEXT: movl %esp, %ebp +; AVX-32-NEXT: .cfi_def_cfa_register %ebp +; AVX-32-NEXT: andl $-8, %esp +; AVX-32-NEXT: subl $32, %esp +; AVX-32-NEXT: vpslld $31, %xmm0, %xmm0 +; AVX-32-NEXT: vpsrad $31, %xmm0, %xmm0 +; AVX-32-NEXT: vmovss %xmm1, {{[0-9]+}}(%esp) +; AVX-32-NEXT: vextractps $1, %xmm1, {{[0-9]+}}(%esp) +; AVX-32-NEXT: vextractps $2, %xmm1, {{[0-9]+}}(%esp) +; AVX-32-NEXT: vextractps $3, %xmm1, (%esp) +; AVX-32-NEXT: flds {{[0-9]+}}(%esp) +; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX-32-NEXT: flds {{[0-9]+}}(%esp) +; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX-32-NEXT: flds {{[0-9]+}}(%esp) +; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX-32-NEXT: flds (%esp) +; AVX-32-NEXT: fisttpll (%esp) +; AVX-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; AVX-32-NEXT: vpinsrd $3, (%esp), %xmm1, %xmm1 +; AVX-32-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX-32-NEXT: movl %ebp, %esp +; AVX-32-NEXT: popl %ebp +; AVX-32-NEXT: .cfi_def_cfa %esp, 4 +; AVX-32-NEXT: retl +; +; AVX-64-LABEL: fun: +; AVX-64: # %bb.0: # %entry +; AVX-64-NEXT: vpslld $31, %xmm0, %xmm0 +; AVX-64-NEXT: vpsrad $31, %xmm0, %xmm0 +; AVX-64-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] +; AVX-64-NEXT: vcvttss2si %xmm2, %rax +; AVX-64-NEXT: vcvttss2si %xmm1, %rcx +; AVX-64-NEXT: vmovd %ecx, %xmm2 +; AVX-64-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 +; AVX-64-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0] +; AVX-64-NEXT: vcvttss2si %xmm3, %rax +; AVX-64-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 +; AVX-64-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3] +; AVX-64-NEXT: vcvttss2si %xmm1, %rax +; AVX-64-NEXT: vpinsrd $3, %eax, %xmm2, %xmm1 +; AVX-64-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX-64-NEXT: retq +; +; AVX512VL-LABEL: fun: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: vpslld $31, %xmm0, %xmm0 +; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm1[1,1,3,3] +; AVX512VL-NEXT: vcvttss2usi %xmm0, %eax +; AVX512VL-NEXT: vcvttss2usi %xmm1, %ecx +; AVX512VL-NEXT: vmovd %ecx, %xmm0 +; AVX512VL-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 +; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] +; AVX512VL-NEXT: vcvttss2usi %xmm2, %eax +; AVX512VL-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 +; AVX512VL-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3] +; AVX512VL-NEXT: vcvttss2usi %xmm1, %eax +; AVX512VL-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; AVX512VL-NEXT: ret{{[l|q]}} +; +; AVX512DQ-LABEL: fun: +; AVX512DQ: # %bb.0: # %entry +; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 +; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1 +; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm0 = xmm1[1,1,3,3] +; AVX512DQ-NEXT: vcvttss2usi %xmm0, %eax +; AVX512DQ-NEXT: vcvttss2usi %xmm1, %ecx +; AVX512DQ-NEXT: vmovd %ecx, %xmm0 +; AVX512DQ-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] +; AVX512DQ-NEXT: vcvttss2usi %xmm2, %eax +; AVX512DQ-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 +; AVX512DQ-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3] +; AVX512DQ-NEXT: vcvttss2usi %xmm1, %eax +; AVX512DQ-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 +; AVX512DQ-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} +; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: ret{{[l|q]}} +{ + entry: + %0 = call <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f32(<4 x float> %__A, + metadata !"fpexcept.strict") + %1 = select <4 x i1> %__U, <4 x i32> %0, <4 x i32> zeroinitializer + ret <4 x i32> %1 +} + + attributes #0 = { strictfp }