diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -5219,14 +5219,6 @@ SelectCode(Res.getNode()); return; } - case ISD::STRICT_FP_ROUND: { - // X87 instructions has enabled this strict fp operation. - bool UsingFp80 = Node->getSimpleValueType(0) == MVT::f80 || - Node->getOperand(1).getSimpleValueType() == MVT::f80; - if (UsingFp80 || (!Subtarget->hasSSE1() && Subtarget->hasX87())) - break; - LLVM_FALLTHROUGH; - } case ISD::STRICT_FP_TO_SINT: case ISD::STRICT_FP_TO_UINT: // FIXME: Remove when we have isel patterns for strict versions of these diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -591,13 +591,6 @@ setOperationAction(ISD::FSIN , VT, Expand); setOperationAction(ISD::FCOS , VT, Expand); setOperationAction(ISD::FSINCOS, VT, Expand); - - // Handle constrained floating-point operations of scalar. - setOperationAction(ISD::STRICT_FSQRT , VT, Legal); - setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal); - // FIXME: When the target is 64-bit, STRICT_FP_ROUND will be overwritten - // as Custom. - setOperationAction(ISD::STRICT_FP_ROUND, VT, Legal); } } @@ -622,14 +615,19 @@ addLegalFPImmediate(APFloat(+0.0)); // xorpd } // Handle constrained floating-point operations of scalar. - setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal); - setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal); - setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal); - setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal); - setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal); - setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal); - setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal); - setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal); // We don't support FMA. setOperationAction(ISD::FMA, MVT::f64, Expand); @@ -857,17 +855,11 @@ setOperationAction(ISD::LOAD, MVT::v2f32, Custom); setOperationAction(ISD::STORE, MVT::v2f32, Custom); - // FIXME: Currently mutated to non-strict form in X86ISelDAGToDAG::Select, - // but its sufficient to pretend they're Legal since they will be someday. - setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal); - setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal); - setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal); - setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal); - setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal); } if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) { @@ -1042,6 +1034,12 @@ // With AVX512, expanding (and promoting the shifts) is better. if (!Subtarget.hasAVX512()) setOperationAction(ISD::ROTL, MVT::v16i8, Custom); + + setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal); } if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) { @@ -1157,9 +1155,7 @@ setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal); - // FIXME: Currently mutated to non-strict form in X86ISelDAGToDAG::Select, - // but its sufficient to pretend they're Legal since they will be someday. - setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Legal); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FADD, MVT::v8f32, Legal); setOperationAction(ISD::STRICT_FADD, MVT::v4f64, Legal); setOperationAction(ISD::STRICT_FSUB, MVT::v8f32, Legal); @@ -1168,6 +1164,9 @@ setOperationAction(ISD::STRICT_FMUL, MVT::v4f64, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::v8f32, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::v4f64, Legal); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Legal); + setOperationAction(ISD::STRICT_FSQRT, MVT::v8f32, Legal); + setOperationAction(ISD::STRICT_FSQRT, MVT::v4f64, Legal); if (!Subtarget.hasAVX512()) setOperationAction(ISD::BITCAST, MVT::v32i1, Custom); @@ -1430,17 +1429,18 @@ setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal); - // FIXME: Currently mutated to non-strict form in X86ISelDAGToDAG::Select, - // but its sufficient to pretend their Legal since they will be someday. - setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f32, Legal); - setOperationAction(ISD::STRICT_FADD, MVT::v16f32, Legal); - setOperationAction(ISD::STRICT_FADD, MVT::v8f64, Legal); - setOperationAction(ISD::STRICT_FSUB, MVT::v16f32, Legal); - setOperationAction(ISD::STRICT_FSUB, MVT::v8f64, Legal); - setOperationAction(ISD::STRICT_FMUL, MVT::v16f32, Legal); - setOperationAction(ISD::STRICT_FMUL, MVT::v8f64, Legal); - setOperationAction(ISD::STRICT_FDIV, MVT::v16f32, Legal); - setOperationAction(ISD::STRICT_FDIV, MVT::v8f64, Legal); + setOperationAction(ISD::STRICT_FADD, MVT::v16f32, Legal); + setOperationAction(ISD::STRICT_FADD, MVT::v8f64, Legal); + setOperationAction(ISD::STRICT_FSUB, MVT::v16f32, Legal); + setOperationAction(ISD::STRICT_FSUB, MVT::v8f64, Legal); + setOperationAction(ISD::STRICT_FMUL, MVT::v16f32, Legal); + setOperationAction(ISD::STRICT_FMUL, MVT::v8f64, Legal); + setOperationAction(ISD::STRICT_FDIV, MVT::v16f32, Legal); + setOperationAction(ISD::STRICT_FDIV, MVT::v8f64, Legal); + setOperationAction(ISD::STRICT_FSQRT, MVT::v16f32, Legal); + setOperationAction(ISD::STRICT_FSQRT, MVT::v8f64, Legal); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Legal); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Legal); setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal); setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal); diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -7464,28 +7464,28 @@ X86fpextsSAE, WriteCvtSS2SD, f32x_info, f64x_info>; -def : Pat<(f64 (fpextend FR32X:$src)), +def : Pat<(f64 (any_fpextend FR32X:$src)), (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>, Requires<[HasAVX512]>; -def : Pat<(f64 (fpextend (loadf32 addr:$src))), +def : Pat<(f64 (any_fpextend (loadf32 addr:$src))), (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512, OptForSize]>; -def : Pat<(f32 (fpround FR64X:$src)), +def : Pat<(f32 (any_fpround FR64X:$src)), (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>, Requires<[HasAVX512]>; def : Pat<(v4f32 (X86Movss (v4f32 VR128X:$dst), (v4f32 (scalar_to_vector - (f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))), + (f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))), (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>, Requires<[HasAVX512]>; def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst), (v2f64 (scalar_to_vector - (f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))), + (f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))), (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>, Requires<[HasAVX512]>; @@ -7583,14 +7583,14 @@ X86SchedWriteWidths sched> { let Predicates = [HasAVX512] in { defm Z : avx512_vcvt_fpextend, + any_fpextend, sched.ZMM>, avx512_vcvt_fp_sae, EVEX_V512; } let Predicates = [HasVLX] in { defm Z128 : avx512_vcvt_fpextend, EVEX_V128; - defm Z256 : avx512_vcvt_fpextend, EVEX_V256; } } @@ -7657,63 +7657,63 @@ PS, EVEX_CD8<32, CD8VH>; let Predicates = [HasAVX512] in { - def : Pat<(v8f32 (fpround (v8f64 VR512:$src))), + def : Pat<(v8f32 (any_fpround (v8f64 VR512:$src))), (VCVTPD2PSZrr VR512:$src)>; - def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (v8f64 VR512:$src))), + def : Pat<(vselect VK8WM:$mask, (v8f32 (any_fpround (v8f64 VR512:$src))), VR256X:$src0), (VCVTPD2PSZrrk VR256X:$src0, VK8WM:$mask, VR512:$src)>; - def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (v8f64 VR512:$src))), + def : Pat<(vselect VK8WM:$mask, (v8f32 (any_fpround (v8f64 VR512:$src))), v8f32x_info.ImmAllZerosV), (VCVTPD2PSZrrkz VK8WM:$mask, VR512:$src)>; - def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))), + def : Pat<(v8f32 (any_fpround (loadv8f64 addr:$src))), (VCVTPD2PSZrm addr:$src)>; - def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (loadv8f64 addr:$src))), + def : Pat<(vselect VK8WM:$mask, (v8f32 (any_fpround (loadv8f64 addr:$src))), VR256X:$src0), (VCVTPD2PSZrmk VR256X:$src0, VK8WM:$mask, addr:$src)>; - def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (loadv8f64 addr:$src))), + def : Pat<(vselect VK8WM:$mask, (v8f32 (any_fpround (loadv8f64 addr:$src))), v8f32x_info.ImmAllZerosV), (VCVTPD2PSZrmkz VK8WM:$mask, addr:$src)>; - def : Pat<(v8f32 (fpround (v8f64 (X86VBroadcastld64 addr:$src)))), + def : Pat<(v8f32 (any_fpround (v8f64 (X86VBroadcastld64 addr:$src)))), (VCVTPD2PSZrmb addr:$src)>; def : Pat<(vselect VK8WM:$mask, - (fpround (v8f64 (X86VBroadcastld64 addr:$src))), + (any_fpround (v8f64 (X86VBroadcastld64 addr:$src))), (v8f32 VR256X:$src0)), (VCVTPD2PSZrmbk VR256X:$src0, VK8WM:$mask, addr:$src)>; def : Pat<(vselect VK8WM:$mask, - (fpround (v8f64 (X86VBroadcastld64 addr:$src))), + (any_fpround (v8f64 (X86VBroadcastld64 addr:$src))), v8f32x_info.ImmAllZerosV), (VCVTPD2PSZrmbkz VK8WM:$mask, addr:$src)>; } let Predicates = [HasVLX] in { - def : Pat<(v4f32 (fpround (v4f64 VR256X:$src))), + def : Pat<(v4f32 (any_fpround (v4f64 VR256X:$src))), (VCVTPD2PSZ256rr VR256X:$src)>; - def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (v4f64 VR256X:$src))), + def : Pat<(vselect VK4WM:$mask, (v4f32 (any_fpround (v4f64 VR256X:$src))), VR128X:$src0), (VCVTPD2PSZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>; - def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (v4f64 VR256X:$src))), + def : Pat<(vselect VK4WM:$mask, (v4f32 (any_fpround (v4f64 VR256X:$src))), v4f32x_info.ImmAllZerosV), (VCVTPD2PSZ256rrkz VK4WM:$mask, VR256X:$src)>; - def : Pat<(v4f32 (fpround (loadv4f64 addr:$src))), + def : Pat<(v4f32 (any_fpround (loadv4f64 addr:$src))), (VCVTPD2PSZ256rm addr:$src)>; - def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (loadv4f64 addr:$src))), + def : Pat<(vselect VK4WM:$mask, (v4f32 (any_fpround (loadv4f64 addr:$src))), VR128X:$src0), (VCVTPD2PSZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; - def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (loadv4f64 addr:$src))), + def : Pat<(vselect VK4WM:$mask, (v4f32 (any_fpround (loadv4f64 addr:$src))), v4f32x_info.ImmAllZerosV), (VCVTPD2PSZ256rmkz VK4WM:$mask, addr:$src)>; - def : Pat<(v4f32 (fpround (v4f64 (X86VBroadcastld64 addr:$src)))), + def : Pat<(v4f32 (any_fpround (v4f64 (X86VBroadcastld64 addr:$src)))), (VCVTPD2PSZ256rmb addr:$src)>; def : Pat<(vselect VK4WM:$mask, - (v4f32 (fpround (v4f64 (X86VBroadcastld64 addr:$src)))), + (v4f32 (any_fpround (v4f64 (X86VBroadcastld64 addr:$src)))), VR128X:$src0), (VCVTPD2PSZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; def : Pat<(vselect VK4WM:$mask, - (v4f32 (fpround (v4f64 (X86VBroadcastld64 addr:$src)))), + (v4f32 (any_fpround (v4f64 (X86VBroadcastld64 addr:$src)))), v4f32x_info.ImmAllZerosV), (VCVTPD2PSZ256rmbkz VK4WM:$mask, addr:$src)>; @@ -8963,17 +8963,17 @@ let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { defm r: AVX512_maskable, EVEX, + (_.VT (any_fsqrt _.RC:$src))>, EVEX, Sched<[sched]>; defm m: AVX512_maskable, EVEX, Sched<[sched.Folded, sched.ReadAfterFold]>; defm mb: AVX512_maskable, EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -9054,13 +9054,13 @@ } let Predicates = [HasAVX512] in { - def : Pat<(_.EltVT (fsqrt _.FRC:$src)), + def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)), (!cast(Name#Zr) (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>; } let Predicates = [HasAVX512, OptForSize] in { - def : Pat<(_.EltVT (fsqrt (load addr:$src))), + def : Pat<(_.EltVT (any_fsqrt (load addr:$src))), (!cast(Name#Zm) (_.EltVT (IMPLICIT_DEF)), addr:$src)>; } diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -1219,18 +1219,18 @@ Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>, SIMD_EXC; } -def : Pat<(f32 (fpround FR64:$src)), +def : Pat<(f32 (any_fpround FR64:$src)), (VCVTSD2SSrr (f32 (IMPLICIT_DEF)), FR64:$src)>, Requires<[UseAVX]>; let isCodeGenOnly = 1 in { def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", - [(set FR32:$dst, (fpround FR64:$src))]>, + [(set FR32:$dst, (any_fpround FR64:$src))]>, Sched<[WriteCvtSD2SS]>, SIMD_EXC; def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", - [(set FR32:$dst, (fpround (loadf64 addr:$src)))]>, + [(set FR32:$dst, (any_fpround (loadf64 addr:$src)))]>, XD, Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtSD2SS.Folded]>, SIMD_EXC; } @@ -1284,19 +1284,19 @@ Requires<[UseAVX, OptForSize]>, SIMD_EXC; } // isCodeGenOnly = 1, hasSideEffects = 0 -def : Pat<(f64 (fpextend FR32:$src)), +def : Pat<(f64 (any_fpextend FR32:$src)), (VCVTSS2SDrr (f64 (IMPLICIT_DEF)), FR32:$src)>, Requires<[UseAVX]>; -def : Pat<(fpextend (loadf32 addr:$src)), +def : Pat<(any_fpextend (loadf32 addr:$src)), (VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX, OptForSize]>; let isCodeGenOnly = 1 in { def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (fpextend FR32:$src))]>, + [(set FR64:$dst, (any_fpextend FR32:$src))]>, XS, Requires<[UseSSE2]>, Sched<[WriteCvtSS2SD]>, SIMD_EXC; def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (fpextend (loadf32 addr:$src)))]>, + [(set FR64:$dst, (any_fpextend (loadf32 addr:$src)))]>, XS, Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtSS2SD.Folded]>, SIMD_EXC; } // isCodeGenOnly = 1 @@ -1335,13 +1335,13 @@ def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector - (f32 (fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))), + (f32 (any_fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))), (VCVTSD2SSrr_Int VR128:$dst, VR128:$src)>; def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector - (f64 (fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))), + (f64 (any_fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))), (VCVTSS2SDrr_Int VR128:$dst, VR128:$src)>; def : Pat<(v4f32 (X86Movss @@ -1389,13 +1389,13 @@ def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector - (f32 (fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))), + (f32 (any_fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))), (CVTSD2SSrr_Int VR128:$dst, VR128:$src)>; def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector - (f64 (fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))), + (f64 (any_fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))), (CVTSS2SDrr_Int VR128:$dst, VR128:$src)>; def : Pat<(v2f64 (X86Movsd @@ -1625,7 +1625,7 @@ PS, VEX, Sched<[WriteCvtPS2PD.Folded]>, VEX_WIG; def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", - [(set VR256:$dst, (v4f64 (fpextend (v4f32 VR128:$src))))]>, + [(set VR256:$dst, (v4f64 (any_fpextend (v4f32 VR128:$src))))]>, PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY]>, VEX_WIG; def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", @@ -1740,9 +1740,9 @@ Sched<[WriteCvtPD2PS.Folded]>, SIMD_EXC; let Predicates = [HasAVX, NoVLX] in { - def : Pat<(v4f32 (fpround (v4f64 VR256:$src))), + def : Pat<(v4f32 (any_fpround (v4f64 VR256:$src))), (VCVTPD2PSYrr VR256:$src)>; - def : Pat<(v4f32 (fpround (loadv4f64 addr:$src))), + def : Pat<(v4f32 (any_fpround (loadv4f64 addr:$src))), (VCVTPD2PSYrm addr:$src)>; } @@ -3007,10 +3007,10 @@ } // Square root. -defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, SchedWriteFSqrt, UseAVX>, - sse1_fp_unop_p<0x51, "sqrt", fsqrt, SchedWriteFSqrt, [HasAVX, NoVLX]>, - sse2_fp_unop_s<0x51, "sqrt", fsqrt, SchedWriteFSqrt64, UseAVX>, - sse2_fp_unop_p<0x51, "sqrt", fsqrt, SchedWriteFSqrt64>, SIMD_EXC; +defm SQRT : sse1_fp_unop_s<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt, UseAVX>, + sse1_fp_unop_p<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt, [HasAVX, NoVLX]>, + sse2_fp_unop_s<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt64, UseAVX>, + sse2_fp_unop_p<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt64>, SIMD_EXC; // Reciprocal approximations. Note that these typically require refinement // in order to obtain suitable precision. @@ -3039,8 +3039,8 @@ } } -defm : scalar_unary_math_patterns; -defm : scalar_unary_math_patterns; +defm : scalar_unary_math_patterns; +defm : scalar_unary_math_patterns; multiclass scalar_unary_math_intr_patterns @llvm.experimental.constrained.fmul.v4f32(<4 x float>, <4 x float>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double>, <2 x double>, metadata, metadata) declare <4 x float> @llvm.experimental.constrained.fdiv.v4f32(<4 x float>, <4 x float>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.sqrt.v4f32(<4 x float>, metadata, metadata) +declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata) define <2 x double> @f1(<2 x double> %a, <2 x double> %b) #0 { ; SSE-LABEL: f1: @@ -143,4 +147,74 @@ ret <4 x float> %ret } +define <2 x double> @f9(<2 x double> %a) #0 { +; SSE-LABEL: f9: +; SSE: # %bb.0: +; SSE-NEXT: sqrtpd %xmm0, %xmm0 +; SSE-NEXT: ret{{[l|q]}} +; +; AVX-LABEL: f9: +; AVX: # %bb.0: +; AVX-NEXT: vsqrtpd %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} + %sqrt = call <2 x double> @llvm.experimental.constrained.sqrt.v2f64( + <2 x double> %a, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %sqrt +} + +define <4 x float> @f10(<4 x float> %a) #0 { +; SSE-LABEL: f10: +; SSE: # %bb.0: +; SSE-NEXT: sqrtps %xmm0, %xmm0 +; SSE-NEXT: ret{{[l|q]}} +; +; AVX-LABEL: f10: +; AVX: # %bb.0: +; AVX-NEXT: vsqrtps %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} + %sqrt = call <4 x float> @llvm.experimental.constrained.sqrt.v4f32( + <4 x float> %a, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x float > %sqrt +} + +define <4 x float> @f11(<2 x double> %a0, <4 x float> %a1) #0 { +; SSE-LABEL: f11: +; SSE: # %bb.0: +; SSE-NEXT: cvtsd2ss %xmm0, %xmm1 +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: ret{{[l|q]}} +; +; AVX-LABEL: f11: +; AVX: # %bb.0: +; AVX-NEXT: vcvtsd2ss %xmm0, %xmm1, %xmm0 +; AVX-NEXT: ret{{[l|q]}} + %ext = extractelement <2 x double> %a0, i32 0 + %cvt = call float @llvm.experimental.constrained.fptrunc.f32.f64(double %ext, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %res = insertelement <4 x float> %a1, float %cvt, i32 0 + ret <4 x float> %res +} + +define <2 x double> @f12(<2 x double> %a0, <4 x float> %a1) #0 { +; SSE-LABEL: f12: +; SSE: # %bb.0: +; SSE-NEXT: cvtss2sd %xmm1, %xmm0 +; SSE-NEXT: ret{{[l|q]}} +; +; AVX-LABEL: f12: +; AVX: # %bb.0: +; AVX-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} + %ext = extractelement <4 x float> %a1, i32 0 + %cvt = call double @llvm.experimental.constrained.fpext.f64.f32(float %ext, + metadata !"fpexcept.strict") #0 + %res = insertelement <2 x double> %a0, double %cvt, i32 0 + ret <2 x double> %res +} + attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/X86/vec-strict-256.ll b/llvm/test/CodeGen/X86/vec-strict-256.ll --- a/llvm/test/CodeGen/X86/vec-strict-256.ll +++ b/llvm/test/CodeGen/X86/vec-strict-256.ll @@ -12,6 +12,10 @@ declare <8 x float> @llvm.experimental.constrained.fmul.v8f32(<8 x float>, <8 x float>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.fdiv.v4f64(<4 x double>, <4 x double>, metadata, metadata) declare <8 x float> @llvm.experimental.constrained.fdiv.v8f32(<8 x float>, <8 x float>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.sqrt.v4f64(<4 x double>, metadata, metadata) +declare <8 x float> @llvm.experimental.constrained.sqrt.v8f32(<8 x float>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(<4 x float>, metadata) +declare <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(<4 x double>, metadata, metadata) define <4 x double> @f1(<4 x double> %a, <4 x double> %b) #0 { ; CHECK-LABEL: f1: @@ -101,4 +105,53 @@ ret <8 x float> %ret } +define <4 x double> @f9(<4 x double> %a) #0 { +; CHECK-LABEL: f9: +; CHECK: # %bb.0: +; CHECK-NEXT: vsqrtpd %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <4 x double> @llvm.experimental.constrained.sqrt.v4f64( + <4 x double> %a, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %ret +} + + +define <8 x float> @f10(<8 x float> %a) #0 { +; CHECK-LABEL: f10: +; CHECK: # %bb.0: +; CHECK-NEXT: vsqrtps %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <8 x float> @llvm.experimental.constrained.sqrt.v8f32( + <8 x float> %a, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <8 x float > %ret +} + +define <4 x double> @f11(<4 x float> %a) #0 { +; CHECK-LABEL: f11: +; CHECK: # %bb.0: +; CHECK-NEXT: vcvtps2pd %xmm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32( + <4 x float> %a, + metadata !"fpexcept.strict") #0 + ret <4 x double> %ret +} + +define <4 x float> @f12(<4 x double> %a) #0 { +; CHECK-LABEL: f12: +; CHECK: # %bb.0: +; CHECK-NEXT: vcvtpd2ps %ymm0, %xmm0 +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64( + <4 x double> %a, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x float> %ret +} + attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/X86/vec-strict-512.ll b/llvm/test/CodeGen/X86/vec-strict-512.ll --- a/llvm/test/CodeGen/X86/vec-strict-512.ll +++ b/llvm/test/CodeGen/X86/vec-strict-512.ll @@ -10,6 +10,10 @@ declare <16 x float> @llvm.experimental.constrained.fmul.v16f32(<16 x float>, <16 x float>, metadata, metadata) declare <8 x double> @llvm.experimental.constrained.fdiv.v8f64(<8 x double>, <8 x double>, metadata, metadata) declare <16 x float> @llvm.experimental.constrained.fdiv.v16f32(<16 x float>, <16 x float>, metadata, metadata) +declare <8 x double> @llvm.experimental.constrained.sqrt.v8f64(<8 x double>, metadata, metadata) +declare <16 x float> @llvm.experimental.constrained.sqrt.v16f32(<16 x float>, metadata, metadata) +declare <8 x double> @llvm.experimental.constrained.fpext.v8f64.v8f32(<8 x float>, metadata) +declare <8 x float> @llvm.experimental.constrained.fptrunc.v8f32.v8f64(<8 x double>, metadata, metadata) define <8 x double> @f1(<8 x double> %a, <8 x double> %b) #0 { ; CHECK-LABEL: f1: @@ -99,4 +103,52 @@ ret <16 x float> %ret } +define <8 x double> @f9(<8 x double> %a) #0 { +; CHECK-LABEL: f9: +; CHECK: # %bb.0: +; CHECK-NEXT: vsqrtpd %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <8 x double> @llvm.experimental.constrained.sqrt.v8f64( + <8 x double> %a, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <8 x double> %ret +} + + +define <16 x float> @f10(<16 x float> %a) #0 { +; CHECK-LABEL: f10: +; CHECK: # %bb.0: +; CHECK-NEXT: vsqrtps %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <16 x float> @llvm.experimental.constrained.sqrt.v16f32( + <16 x float> %a, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <16 x float > %ret +} + +define <8 x double> @f11(<8 x float> %a) #0 { +; CHECK-LABEL: f11: +; CHECK: # %bb.0: +; CHECK-NEXT: vcvtps2pd %ymm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <8 x double> @llvm.experimental.constrained.fpext.v8f64.v8f32( + <8 x float> %a, + metadata !"fpexcept.strict") #0 + ret <8 x double> %ret +} + +define <8 x float> @f12(<8 x double> %a) #0 { +; CHECK-LABEL: f12: +; CHECK: # %bb.0: +; CHECK-NEXT: vcvtpd2ps %zmm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <8 x float> @llvm.experimental.constrained.fptrunc.v8f32.v8f64( + <8 x double> %a, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <8 x float> %ret +} + attributes #0 = { strictfp }