diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -296,10 +296,10 @@ VMTRUNC, VMTRUNCUS, VMTRUNCS, // Vector FP extend. - VFPEXT, VFPEXT_SAE, VFPEXTS, VFPEXTS_SAE, + VFPEXT, VFPEXT_SAE, VFPEXTS, VFPEXTS_SAE, STRICT_VFPEXT, // Vector FP round. - VFPROUND, VFPROUND_RND, VFPROUNDS, VFPROUNDS_RND, + VFPROUND, VFPROUND_RND, VFPROUNDS, VFPROUNDS_RND, STRICT_VFPROUND, // Masked version of above. Used for v2f64->v4f32. // SRC, PASSTHRU, MASK diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1004,7 +1004,9 @@ setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom); setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f32, Custom); setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f32, Custom); // We want to legalize this to an f64 load rather than an i64 load on // 64-bit targets and two 32-bit loads on a 32-bit target. Similar for @@ -20080,12 +20082,13 @@ } assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!"); - // FIXME: Strict fp. - assert(!IsStrict && "Strict FP not supported yet!"); - return DAG.getNode(X86ISD::VFPEXT, DL, VT, - DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32, - In, DAG.getUNDEF(SVT))); + SDValue Res = + DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32, In, DAG.getUNDEF(SVT)); + if (IsStrict) + return DAG.getNode(X86ISD::STRICT_VFPEXT, DL, {VT, MVT::Other}, + {Op->getOperand(0), Res}); + return DAG.getNode(X86ISD::VFPEXT, DL, VT, Res); } SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { @@ -28938,11 +28941,21 @@ Results.push_back(DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Sub)); return; } + case ISD::STRICT_FP_ROUND: case ISD::FP_ROUND: { - if (!isTypeLegal(N->getOperand(0).getValueType())) - return; - SDValue V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0)); + bool IsStrict = N->isStrictFPOpcode(); + SDValue Src = N->getOperand(IsStrict ? 1 : 0); + if (!isTypeLegal(Src.getValueType())) + return; + SDValue V; + if (IsStrict) + V = DAG.getNode(X86ISD::STRICT_VFPROUND, dl, {MVT::v4f32, MVT::Other}, + {N->getOperand(0), N->getOperand(1)}); + else + V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0)); Results.push_back(V); + if (IsStrict) + Results.push_back(V.getValue(1)); return; } case ISD::FP_EXTEND: { @@ -29380,10 +29393,12 @@ case X86ISD::VMTRUNCSTORES: return "X86ISD::VMTRUNCSTORES"; case X86ISD::VMTRUNCSTOREUS: return "X86ISD::VMTRUNCSTOREUS"; case X86ISD::VFPEXT: return "X86ISD::VFPEXT"; + case X86ISD::STRICT_VFPEXT: return "X86ISD::STRICT_VFPEXT"; case X86ISD::VFPEXT_SAE: return "X86ISD::VFPEXT_SAE"; case X86ISD::VFPEXTS: return "X86ISD::VFPEXTS"; case X86ISD::VFPEXTS_SAE: return "X86ISD::VFPEXTS_SAE"; case X86ISD::VFPROUND: return "X86ISD::VFPROUND"; + case X86ISD::STRICT_VFPROUND: return "X86ISD::STRICT_VFPROUND"; case X86ISD::VMFPROUND: return "X86ISD::VMFPROUND"; case X86ISD::VFPROUND_RND: return "X86ISD::VFPROUND_RND"; case X86ISD::VFPROUNDS: return "X86ISD::VFPROUNDS"; @@ -34983,6 +34998,7 @@ case X86ISD::STRICT_CVTTP2UI: case X86ISD::STRICT_CVTSI2P: case X86ISD::STRICT_CVTUI2P: + case X86ISD::STRICT_VFPROUND: if (In.getOperand(1).getValueType() == MVT::v2f64 || In.getOperand(1).getValueType() == MVT::v2i64) return N->getOperand(0); diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -7589,7 +7589,7 @@ } let Predicates = [HasVLX] in { defm Z128 : avx512_vcvt_fpextend, EVEX_V128; + X86any_vfpext, sched.XMM, "{1to2}", "", f64mem>, EVEX_V128; defm Z256 : avx512_vcvt_fpextend, EVEX_V256; } @@ -7719,7 +7719,7 @@ // Special patterns to allow use of X86vmfpround for masking. Instruction // patterns have been disabled with null_frag. - def : Pat<(X86vfpround (v2f64 VR128X:$src)), + def : Pat<(X86any_vfpround (v2f64 VR128X:$src)), (VCVTPD2PSZ128rr VR128X:$src)>; def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v4f32 VR128X:$src0), VK2WM:$mask), @@ -7728,7 +7728,7 @@ VK2WM:$mask), (VCVTPD2PSZ128rrkz VK2WM:$mask, VR128X:$src)>; - def : Pat<(X86vfpround (loadv2f64 addr:$src)), + def : Pat<(X86any_vfpround (loadv2f64 addr:$src)), (VCVTPD2PSZ128rm addr:$src)>; def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v4f32 VR128X:$src0), VK2WM:$mask), @@ -7737,7 +7737,7 @@ VK2WM:$mask), (VCVTPD2PSZ128rmkz VK2WM:$mask, addr:$src)>; - def : Pat<(X86vfpround (v2f64 (X86VBroadcastld64 addr:$src))), + def : Pat<(X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src))), (VCVTPD2PSZ128rmb addr:$src)>; def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)), (v4f32 VR128X:$src0), VK2WM:$mask), diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -127,11 +127,32 @@ SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f64>, SDTCVecEltisVT<1, f32>, SDTCisSameSizeAs<0, 1>]>>; + +def X86strict_vfpext : SDNode<"X86ISD::STRICT_VFPEXT", + SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f64>, + SDTCVecEltisVT<1, f32>, + SDTCisSameSizeAs<0, 1>]>, + [SDNPHasChain]>; + +def X86any_vfpext : PatFrags<(ops node:$src), + [(X86vfpext node:$src), + (X86strict_vfpext node:$src)]>; + def X86vfpround: SDNode<"X86ISD::VFPROUND", SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>, SDTCVecEltisVT<1, f64>, SDTCisOpSmallerThanOp<0, 1>]>>; +def X86strict_vfpround: SDNode<"X86ISD::STRICT_VFPROUND", + SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>, + SDTCVecEltisVT<1, f64>, + SDTCisOpSmallerThanOp<0, 1>]>, + [SDNPHasChain]>; + +def X86any_vfpround : PatFrags<(ops node:$src), + [(X86vfpround node:$src), + (X86strict_vfpround node:$src)]>; + def X86frounds : SDNode<"X86ISD::VFPROUNDS", SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f32>, SDTCisSameAs<0, 1>, diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -1617,7 +1617,7 @@ // SSE2 instructions without OpSize prefix def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (v2f64 (X86vfpext (v4f32 VR128:$src))))]>, + [(set VR128:$dst, (v2f64 (X86any_vfpext (v4f32 VR128:$src))))]>, PS, VEX, Sched<[WriteCvtPS2PD]>, VEX_WIG; def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", @@ -1636,7 +1636,7 @@ let Predicates = [UseSSE2], Uses = [MXCSR], mayRaiseFPException = 1 in { def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (v2f64 (X86vfpext (v4f32 VR128:$src))))]>, + [(set VR128:$dst, (v2f64 (X86any_vfpext (v4f32 VR128:$src))))]>, PS, Sched<[WriteCvtPS2PD]>; def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", @@ -1708,11 +1708,11 @@ // XMM only def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (X86vfpround (v2f64 VR128:$src)))]>, + [(set VR128:$dst, (X86any_vfpround (v2f64 VR128:$src)))]>, VEX, Sched<[WriteCvtPD2PS]>, VEX_WIG; def VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtpd2ps{x}\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (X86vfpround (loadv2f64 addr:$src)))]>, + [(set VR128:$dst, (X86any_vfpround (loadv2f64 addr:$src)))]>, VEX, Sched<[WriteCvtPD2PS.Folded]>, VEX_WIG; def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), @@ -1732,11 +1732,11 @@ def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (X86vfpround (v2f64 VR128:$src)))]>, + [(set VR128:$dst, (X86any_vfpround (v2f64 VR128:$src)))]>, Sched<[WriteCvtPD2PS]>, SIMD_EXC; def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (X86vfpround (memopv2f64 addr:$src)))]>, + [(set VR128:$dst, (X86any_vfpround (memopv2f64 addr:$src)))]>, Sched<[WriteCvtPD2PS.Folded]>, SIMD_EXC; let Predicates = [HasAVX, NoVLX] in { diff --git a/llvm/test/CodeGen/X86/vec-strict-128.ll b/llvm/test/CodeGen/X86/vec-strict-128.ll --- a/llvm/test/CodeGen/X86/vec-strict-128.ll +++ b/llvm/test/CodeGen/X86/vec-strict-128.ll @@ -17,7 +17,9 @@ declare <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double>, metadata, metadata) declare <4 x float> @llvm.experimental.constrained.sqrt.v4f32(<4 x float>, metadata, metadata) declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata) +declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata) declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata) +declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float>, metadata) declare <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, metadata, metadata) declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata) @@ -399,4 +401,38 @@ ret <2 x double> %res } +define <2 x double> @f15(<2 x float> %a) #0 { +; SSE-LABEL: f15: +; SSE: # %bb.0: +; SSE-NEXT: cvtps2pd %xmm0, %xmm0 +; SSE-NEXT: ret{{[l|q]}} +; +; AVX-LABEL: f15: +; AVX: # %bb.0: +; AVX-NEXT: vcvtps2pd %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} + %ret = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32( + <2 x float> %a, + metadata !"fpexcept.strict") #0 + ret <2 x double> %ret +} + +define <2 x float> @f16(<2 x double> %a) #0 { +; SSE-LABEL: f16: +; SSE: # %bb.0: +; SSE-NEXT: cvtpd2ps %xmm0, %xmm0 +; SSE-NEXT: ret{{[l|q]}} +; +; AVX-LABEL: f16: +; AVX: # %bb.0: +; AVX-NEXT: vcvtpd2ps %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} + %ret = call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64( + <2 x double> %a, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x float> %ret +} + + attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll --- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll @@ -5433,20 +5433,12 @@ define <2 x float> @constrained_vector_fptrunc_v2f64() #0 { ; CHECK-LABEL: constrained_vector_fptrunc_v2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: cvtsd2ss %xmm0, %xmm1 -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0 -; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: cvtpd2ps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fptrunc_v2f64: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] +; AVX-NEXT: vcvtpd2psx {{.*}}(%rip), %xmm0 ; AVX-NEXT: retq entry: %result = call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64( @@ -5492,17 +5484,9 @@ define <4 x float> @constrained_vector_fptrunc_v4f64() #0 { ; CHECK-LABEL: constrained_vector_fptrunc_v4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0 -; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: cvtsd2ss %xmm1, %xmm1 -; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: cvtsd2ss %xmm0, %xmm2 -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0 -; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: cvtpd2ps {{.*}}(%rip), %xmm1 +; CHECK-NEXT: cvtpd2ps {{.*}}(%rip), %xmm0 +; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fptrunc_v4f64: @@ -5540,20 +5524,12 @@ define <2 x double> @constrained_vector_fpext_v2f32() #0 { ; CHECK-LABEL: constrained_vector_fpext_v2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: cvtss2sd %xmm0, %xmm1 -; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: cvtss2sd %xmm0, %xmm0 -; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: cvtps2pd {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fpext_v2f32: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: vcvtps2pd {{.*}}(%rip), %xmm0 ; AVX-NEXT: retq entry: %result = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32( @@ -5597,16 +5573,8 @@ define <4 x double> @constrained_vector_fpext_v4f32() #0 { ; CHECK-LABEL: constrained_vector_fpext_v4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: cvtss2sd %xmm0, %xmm1 -; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: cvtss2sd %xmm0, %xmm0 -; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: cvtss2sd %xmm1, %xmm2 -; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: cvtss2sd %xmm1, %xmm1 -; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; CHECK-NEXT: cvtps2pd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: cvtps2pd {{.*}}(%rip), %xmm1 ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fpext_v4f32: