diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -404,6 +404,8 @@ setOperationAction(ISD::SELECT, MVT::f128, Custom); setOperationAction(ISD::SELECT_CC, MVT::f128, Custom); setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom); + // FIXME: f128 FMINIMUM and FMAXIMUM (including STRICT versions) currently + // aren't handled. // Lowering for many of the conversions is actually specified by the non-f128 // type. The LowerXXX function will be trivial when f128 isn't involved. @@ -647,37 +649,35 @@ } // AArch64 has implementations of a lot of rounding-like FP operations. - for (MVT Ty : {MVT::f32, MVT::f64}) { - setOperationAction(ISD::FFLOOR, Ty, Legal); - setOperationAction(ISD::FNEARBYINT, Ty, Legal); - setOperationAction(ISD::FCEIL, Ty, Legal); - setOperationAction(ISD::FRINT, Ty, Legal); - setOperationAction(ISD::FTRUNC, Ty, Legal); - setOperationAction(ISD::FROUND, Ty, Legal); - setOperationAction(ISD::FROUNDEVEN, Ty, Legal); - setOperationAction(ISD::FMINNUM, Ty, Legal); - setOperationAction(ISD::FMAXNUM, Ty, Legal); - setOperationAction(ISD::FMINIMUM, Ty, Legal); - setOperationAction(ISD::FMAXIMUM, Ty, Legal); - setOperationAction(ISD::LROUND, Ty, Legal); - setOperationAction(ISD::LLROUND, Ty, Legal); - setOperationAction(ISD::LRINT, Ty, Legal); - setOperationAction(ISD::LLRINT, Ty, Legal); - } - - if (Subtarget->hasFullFP16()) { - setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal); - setOperationAction(ISD::FFLOOR, MVT::f16, Legal); - setOperationAction(ISD::FCEIL, MVT::f16, Legal); - setOperationAction(ISD::FRINT, MVT::f16, Legal); - setOperationAction(ISD::FTRUNC, MVT::f16, Legal); - setOperationAction(ISD::FROUND, MVT::f16, Legal); - setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal); - setOperationAction(ISD::FMINNUM, MVT::f16, Legal); - setOperationAction(ISD::FMAXNUM, MVT::f16, Legal); - setOperationAction(ISD::FMINIMUM, MVT::f16, Legal); - setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal); - } + for (auto Op : + {ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL, + ISD::FRINT, ISD::FTRUNC, ISD::FROUND, + ISD::FROUNDEVEN, ISD::FMINNUM, ISD::FMAXNUM, + ISD::FMINIMUM, ISD::FMAXIMUM, ISD::LROUND, + ISD::LLROUND, ISD::LRINT, ISD::LLRINT, + ISD::STRICT_FFLOOR, ISD::STRICT_FCEIL, ISD::STRICT_FNEARBYINT, + ISD::STRICT_FRINT, ISD::STRICT_FTRUNC, ISD::STRICT_FROUNDEVEN, + ISD::STRICT_FROUND, ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM, + ISD::STRICT_FMINIMUM, ISD::STRICT_FMAXIMUM, ISD::STRICT_LROUND, + ISD::STRICT_LLROUND, ISD::STRICT_LRINT, ISD::STRICT_LLRINT}) { + for (MVT Ty : {MVT::f32, MVT::f64}) + setOperationAction(Op, Ty, Legal); + if (Subtarget->hasFullFP16()) + setOperationAction(Op, MVT::f16, Legal); + } + + // Basic strict FP operations are legal + for (auto Op : {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL, + ISD::STRICT_FDIV, ISD::STRICT_FMA, ISD::STRICT_FSQRT}) { + for (MVT Ty : {MVT::f32, MVT::f64}) + setOperationAction(Op, Ty, Legal); + if (Subtarget->hasFullFP16()) + setOperationAction(Op, MVT::f16, Legal); + } + + // Strict conversion to a larger type is legal + for (auto VT : {MVT::f32, MVT::f64}) + setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal); setOperationAction(ISD::PREFETCH, MVT::Other, Custom); @@ -938,43 +938,29 @@ if (Subtarget->hasNEON()) { // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to // silliness like this: - setOperationAction(ISD::FABS, MVT::v1f64, Expand); - setOperationAction(ISD::FADD, MVT::v1f64, Expand); - setOperationAction(ISD::FCEIL, MVT::v1f64, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand); - setOperationAction(ISD::FCOS, MVT::v1f64, Expand); - setOperationAction(ISD::FDIV, MVT::v1f64, Expand); - setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand); - setOperationAction(ISD::FMA, MVT::v1f64, Expand); - setOperationAction(ISD::FMUL, MVT::v1f64, Expand); - setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand); - setOperationAction(ISD::FNEG, MVT::v1f64, Expand); - setOperationAction(ISD::FPOW, MVT::v1f64, Expand); - setOperationAction(ISD::FREM, MVT::v1f64, Expand); - setOperationAction(ISD::FROUND, MVT::v1f64, Expand); - setOperationAction(ISD::FROUNDEVEN, MVT::v1f64, Expand); - setOperationAction(ISD::FRINT, MVT::v1f64, Expand); - setOperationAction(ISD::FSIN, MVT::v1f64, Expand); - setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand); - setOperationAction(ISD::FSQRT, MVT::v1f64, Expand); - setOperationAction(ISD::FSUB, MVT::v1f64, Expand); - setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand); - setOperationAction(ISD::SETCC, MVT::v1f64, Expand); - setOperationAction(ISD::BR_CC, MVT::v1f64, Expand); - setOperationAction(ISD::SELECT, MVT::v1f64, Expand); - setOperationAction(ISD::SELECT_CC, MVT::v1f64, Expand); - setOperationAction(ISD::FP_EXTEND, MVT::v1f64, Expand); - - setOperationAction(ISD::FP_TO_SINT, MVT::v1i64, Expand); - setOperationAction(ISD::FP_TO_UINT, MVT::v1i64, Expand); - setOperationAction(ISD::SINT_TO_FP, MVT::v1i64, Expand); - setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand); - setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand); - - setOperationAction(ISD::FP_TO_SINT_SAT, MVT::v1i64, Expand); - setOperationAction(ISD::FP_TO_UINT_SAT, MVT::v1i64, Expand); - - setOperationAction(ISD::MUL, MVT::v1i64, Expand); + for (auto Op : + {ISD::SELECT, ISD::SELECT_CC, ISD::SETCC, + ISD::BR_CC, ISD::FADD, ISD::FSUB, + ISD::FMUL, ISD::FDIV, ISD::FMA, + ISD::FNEG, ISD::FABS, ISD::FCEIL, + ISD::FSQRT, ISD::FFLOOR, ISD::FNEARBYINT, + ISD::FRINT, ISD::FROUND, ISD::FROUNDEVEN, + ISD::FTRUNC, ISD::FMINNUM, ISD::FMAXNUM, + ISD::FMINIMUM, ISD::FMAXIMUM, ISD::STRICT_FADD, + ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, + ISD::STRICT_FMA, ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, + ISD::STRICT_FSQRT, ISD::STRICT_FRINT, ISD::STRICT_FNEARBYINT, + ISD::STRICT_FROUND, ISD::STRICT_FTRUNC, ISD::STRICT_FROUNDEVEN, + ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM, ISD::STRICT_FMINIMUM, + ISD::STRICT_FMAXIMUM}) + setOperationAction(Op, MVT::v1f64, Expand); + + for (auto Op : + {ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::SINT_TO_FP, ISD::UINT_TO_FP, + ISD::FP_ROUND, ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT, ISD::MUL, + ISD::STRICT_FP_TO_SINT, ISD::STRICT_FP_TO_UINT, + ISD::STRICT_SINT_TO_FP, ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_ROUND}) + setOperationAction(Op, MVT::v1i64, Expand); // AArch64 doesn't have a direct vector ->f32 conversion instructions for // elements smaller than i32, so promote the input to i32 first. @@ -982,14 +968,12 @@ setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32); // Similarly, there is no direct i32 -> f64 vector conversion instruction. - setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom); // Or, direct i32 -> f16 vector conversion. Set it so custom, so the // conversion happens in two steps: v4i32 -> v4f32 -> v4f16 - setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom); + for (auto Op : {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::STRICT_SINT_TO_FP, + ISD::STRICT_UINT_TO_FP}) + for (auto VT : {MVT::v2i32, MVT::v2i64, MVT::v4i32}) + setOperationAction(Op, VT, Custom); if (Subtarget->hasFullFP16()) { setOperationAction(ISD::SINT_TO_FP, MVT::v8i8, Custom); @@ -1103,26 +1087,16 @@ } // AArch64 has implementations of a lot of rounding-like FP operations. - for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) { - setOperationAction(ISD::FFLOOR, Ty, Legal); - setOperationAction(ISD::FNEARBYINT, Ty, Legal); - setOperationAction(ISD::FCEIL, Ty, Legal); - setOperationAction(ISD::FRINT, Ty, Legal); - setOperationAction(ISD::FTRUNC, Ty, Legal); - setOperationAction(ISD::FROUND, Ty, Legal); - setOperationAction(ISD::FROUNDEVEN, Ty, Legal); - } - - if (Subtarget->hasFullFP16()) { - for (MVT Ty : {MVT::v4f16, MVT::v8f16}) { - setOperationAction(ISD::FFLOOR, Ty, Legal); - setOperationAction(ISD::FNEARBYINT, Ty, Legal); - setOperationAction(ISD::FCEIL, Ty, Legal); - setOperationAction(ISD::FRINT, Ty, Legal); - setOperationAction(ISD::FTRUNC, Ty, Legal); - setOperationAction(ISD::FROUND, Ty, Legal); - setOperationAction(ISD::FROUNDEVEN, Ty, Legal); - } + for (auto Op : + {ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT, ISD::FTRUNC, + ISD::FROUND, ISD::FROUNDEVEN, ISD::STRICT_FFLOOR, + ISD::STRICT_FNEARBYINT, ISD::STRICT_FCEIL, ISD::STRICT_FRINT, + ISD::STRICT_FTRUNC, ISD::STRICT_FROUND, ISD::STRICT_FROUNDEVEN}) { + for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) + setOperationAction(Op, Ty, Legal); + if (Subtarget->hasFullFP16()) + for (MVT Ty : {MVT::v4f16, MVT::v8f16}) + setOperationAction(Op, Ty, Legal); } setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom); @@ -1481,10 +1455,10 @@ setOperationAction(ISD::SREM, VT, Expand); setOperationAction(ISD::FREM, VT, Expand); - setOperationAction(ISD::FP_TO_SINT, VT, Custom); - setOperationAction(ISD::FP_TO_UINT, VT, Custom); - setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom); - setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom); + for (unsigned Opcode : + {ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::FP_TO_SINT_SAT, + ISD::FP_TO_UINT_SAT, ISD::STRICT_FP_TO_SINT, ISD::STRICT_FP_TO_UINT}) + setOperationAction(Opcode, VT, Custom); if (!VT.isFloatingPoint()) setOperationAction(ISD::ABS, VT, Legal); @@ -1494,14 +1468,39 @@ for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) setOperationAction(Opcode, VT, Legal); - // F[MIN|MAX][NUM|NAN] are available for all FP NEON types. + // F[MIN|MAX][NUM|NAN] and simple strict operations are available for all FP + // NEON types. if (VT.isFloatingPoint() && VT.getVectorElementType() != MVT::bf16 && (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16())) for (unsigned Opcode : - {ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM}) + {ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM, + ISD::STRICT_FMINIMUM, ISD::STRICT_FMAXIMUM, ISD::STRICT_FMINNUM, + ISD::STRICT_FMAXNUM, ISD::STRICT_FADD, ISD::STRICT_FSUB, + ISD::STRICT_FMUL, ISD::STRICT_FDIV, ISD::STRICT_FMA, + ISD::STRICT_FSQRT}) setOperationAction(Opcode, VT, Legal); + // Strict fp extend and trunc are legal + if (VT.isFloatingPoint() && VT.getScalarSizeInBits() != 16) + setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal); + if (VT.isFloatingPoint() && VT.getScalarSizeInBits() != 64) + setOperationAction(ISD::STRICT_FP_ROUND, VT, Legal); + + // FIXME: We could potentially make use of the vector comparison instructions + // for STRICT_FSETCC and STRICT_FSETCSS, but there's a number of + // complications: + // * FCMPEQ/NE are quiet comparisons, the rest are signalling comparisons, + // so we would need to expand when the condition code doesn't match the + // kind of comparison. + // * Some kinds of comparison require more than one FCMXY instruction so + // would need to be expanded instead. + // * The lowering of the non-strict versions involves target-specific ISD + // nodes so we would likely need to add strict versions of all of them and + // handle them appropriately. + setOperationAction(ISD::STRICT_FSETCC, VT, Expand); + setOperationAction(ISD::STRICT_FSETCCS, VT, Expand); + if (Subtarget->isLittleEndian()) { for (unsigned im = (unsigned)ISD::PRE_INC; im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -4963,15 +4963,15 @@ // Half-precision to Double-precision def DHr : BaseFPConversion<0b11, 0b01, FPR64, FPR16, asm, - [(set FPR64:$Rd, (fpextend (f16 FPR16:$Rn)))]>; + [(set FPR64:$Rd, (any_fpextend (f16 FPR16:$Rn)))]>; // Half-precision to Single-precision def SHr : BaseFPConversion<0b11, 0b00, FPR32, FPR16, asm, - [(set FPR32:$Rd, (fpextend (f16 FPR16:$Rn)))]>; + [(set FPR32:$Rd, (any_fpextend (f16 FPR16:$Rn)))]>; // Single-precision to Double-precision def DSr : BaseFPConversion<0b00, 0b01, FPR64, FPR32, asm, - [(set FPR64:$Rd, (fpextend FPR32:$Rn))]>; + [(set FPR64:$Rd, (any_fpextend FPR32:$Rn))]>; // Single-precision to Half-precision def HSr : BaseFPConversion<0b00, 0b11, FPR16, FPR32, asm, @@ -5075,7 +5075,8 @@ } } -multiclass TwoOperandFPDataNeg opcode, string asm, SDNode node> { +multiclass TwoOperandFPDataNeg opcode, string asm, + SDPatternOperator node> { def Hrr : BaseTwoOperandFPData { let Inst{23-22} = 0b11; // 16-bit size flag diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -3890,24 +3890,24 @@ let Predicates = [HasFullFP16] in { - def : Pat<(i32 (lround f16:$Rn)), + def : Pat<(i32 (any_lround f16:$Rn)), (!cast(FCVTASUWHr) f16:$Rn)>; - def : Pat<(i64 (lround f16:$Rn)), + def : Pat<(i64 (any_lround f16:$Rn)), (!cast(FCVTASUXHr) f16:$Rn)>; - def : Pat<(i64 (llround f16:$Rn)), + def : Pat<(i64 (any_llround f16:$Rn)), (!cast(FCVTASUXHr) f16:$Rn)>; } -def : Pat<(i32 (lround f32:$Rn)), +def : Pat<(i32 (any_lround f32:$Rn)), (!cast(FCVTASUWSr) f32:$Rn)>; -def : Pat<(i32 (lround f64:$Rn)), +def : Pat<(i32 (any_lround f64:$Rn)), (!cast(FCVTASUWDr) f64:$Rn)>; -def : Pat<(i64 (lround f32:$Rn)), +def : Pat<(i64 (any_lround f32:$Rn)), (!cast(FCVTASUXSr) f32:$Rn)>; -def : Pat<(i64 (lround f64:$Rn)), +def : Pat<(i64 (any_lround f64:$Rn)), (!cast(FCVTASUXDr) f64:$Rn)>; -def : Pat<(i64 (llround f32:$Rn)), +def : Pat<(i64 (any_llround f32:$Rn)), (!cast(FCVTASUXSr) f32:$Rn)>; -def : Pat<(i64 (llround f64:$Rn)), +def : Pat<(i64 (any_llround f64:$Rn)), (!cast(FCVTASUXDr) f64:$Rn)>; //===----------------------------------------------------------------------===// @@ -3951,17 +3951,17 @@ defm FABS : SingleOperandFPData<0b0001, "fabs", fabs>; defm FMOV : SingleOperandFPData<0b0000, "fmov">; defm FNEG : SingleOperandFPData<0b0010, "fneg", fneg>; -defm FRINTA : SingleOperandFPData<0b1100, "frinta", fround>; -defm FRINTI : SingleOperandFPData<0b1111, "frinti", fnearbyint>; -defm FRINTM : SingleOperandFPData<0b1010, "frintm", ffloor>; -defm FRINTN : SingleOperandFPData<0b1000, "frintn", froundeven>; -defm FRINTP : SingleOperandFPData<0b1001, "frintp", fceil>; +defm FRINTA : SingleOperandFPData<0b1100, "frinta", any_fround>; +defm FRINTI : SingleOperandFPData<0b1111, "frinti", any_fnearbyint>; +defm FRINTM : SingleOperandFPData<0b1010, "frintm", any_ffloor>; +defm FRINTN : SingleOperandFPData<0b1000, "frintn", any_froundeven>; +defm FRINTP : SingleOperandFPData<0b1001, "frintp", any_fceil>; -defm FRINTX : SingleOperandFPData<0b1110, "frintx", frint>; -defm FRINTZ : SingleOperandFPData<0b1011, "frintz", ftrunc>; +defm FRINTX : SingleOperandFPData<0b1110, "frintx", any_frint>; +defm FRINTZ : SingleOperandFPData<0b1011, "frintz", any_ftrunc>; let SchedRW = [WriteFDiv] in { -defm FSQRT : SingleOperandFPData<0b0011, "fsqrt", fsqrt>; +defm FSQRT : SingleOperandFPData<0b0011, "fsqrt", any_fsqrt>; } let Predicates = [HasFRInt3264] in { @@ -3971,44 +3971,48 @@ defm FRINT64X : FRIntNNT<0b11, "frint64x", int_aarch64_frint64x>; } // HasFRInt3264 +// Emitting strict_lrint as two instructions is valid as any exceptions that +// occur will happen in exactly one of the instructions (e.g. if the input is +// not an integer the inexact exception will happen in the FRINTX but not then +// in the FCVTZS as the output of FRINTX is an integer). let Predicates = [HasFullFP16] in { - def : Pat<(i32 (lrint f16:$Rn)), + def : Pat<(i32 (any_lrint f16:$Rn)), (FCVTZSUWHr (!cast(FRINTXHr) f16:$Rn))>; - def : Pat<(i64 (lrint f16:$Rn)), + def : Pat<(i64 (any_lrint f16:$Rn)), (FCVTZSUXHr (!cast(FRINTXHr) f16:$Rn))>; - def : Pat<(i64 (llrint f16:$Rn)), + def : Pat<(i64 (any_llrint f16:$Rn)), (FCVTZSUXHr (!cast(FRINTXHr) f16:$Rn))>; } -def : Pat<(i32 (lrint f32:$Rn)), +def : Pat<(i32 (any_lrint f32:$Rn)), (FCVTZSUWSr (!cast(FRINTXSr) f32:$Rn))>; -def : Pat<(i32 (lrint f64:$Rn)), +def : Pat<(i32 (any_lrint f64:$Rn)), (FCVTZSUWDr (!cast(FRINTXDr) f64:$Rn))>; -def : Pat<(i64 (lrint f32:$Rn)), +def : Pat<(i64 (any_lrint f32:$Rn)), (FCVTZSUXSr (!cast(FRINTXSr) f32:$Rn))>; -def : Pat<(i64 (lrint f64:$Rn)), +def : Pat<(i64 (any_lrint f64:$Rn)), (FCVTZSUXDr (!cast(FRINTXDr) f64:$Rn))>; -def : Pat<(i64 (llrint f32:$Rn)), +def : Pat<(i64 (any_llrint f32:$Rn)), (FCVTZSUXSr (!cast(FRINTXSr) f32:$Rn))>; -def : Pat<(i64 (llrint f64:$Rn)), +def : Pat<(i64 (any_llrint f64:$Rn)), (FCVTZSUXDr (!cast(FRINTXDr) f64:$Rn))>; //===----------------------------------------------------------------------===// // Floating point two operand instructions. //===----------------------------------------------------------------------===// -defm FADD : TwoOperandFPData<0b0010, "fadd", fadd>; +defm FADD : TwoOperandFPData<0b0010, "fadd", any_fadd>; let SchedRW = [WriteFDiv] in { -defm FDIV : TwoOperandFPData<0b0001, "fdiv", fdiv>; +defm FDIV : TwoOperandFPData<0b0001, "fdiv", any_fdiv>; } -defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", fmaxnum>; -defm FMAX : TwoOperandFPData<0b0100, "fmax", fmaximum>; -defm FMINNM : TwoOperandFPData<0b0111, "fminnm", fminnum>; -defm FMIN : TwoOperandFPData<0b0101, "fmin", fminimum>; +defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", any_fmaxnum>; +defm FMAX : TwoOperandFPData<0b0100, "fmax", any_fmaximum>; +defm FMINNM : TwoOperandFPData<0b0111, "fminnm", any_fminnum>; +defm FMIN : TwoOperandFPData<0b0101, "fmin", any_fminimum>; let SchedRW = [WriteFMul] in { -defm FMUL : TwoOperandFPData<0b0000, "fmul", fmul>; -defm FNMUL : TwoOperandFPDataNeg<0b1000, "fnmul", fmul>; +defm FMUL : TwoOperandFPData<0b0000, "fmul", any_fmul>; +defm FNMUL : TwoOperandFPDataNeg<0b1000, "fnmul", any_fmul>; } -defm FSUB : TwoOperandFPData<0b0011, "fsub", fsub>; +defm FSUB : TwoOperandFPData<0b0011, "fsub", any_fsub>; def : Pat<(v1f64 (fmaximum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), (FMAXDrr FPR64:$Rn, FPR64:$Rm)>; @@ -4023,13 +4027,13 @@ // Floating point three operand instructions. //===----------------------------------------------------------------------===// -defm FMADD : ThreeOperandFPData<0, 0, "fmadd", fma>; +defm FMADD : ThreeOperandFPData<0, 0, "fmadd", any_fma>; defm FMSUB : ThreeOperandFPData<0, 1, "fmsub", - TriOpFrag<(fma node:$LHS, (fneg node:$MHS), node:$RHS)> >; + TriOpFrag<(any_fma node:$LHS, (fneg node:$MHS), node:$RHS)> >; defm FNMADD : ThreeOperandFPData<1, 0, "fnmadd", - TriOpFrag<(fneg (fma node:$LHS, node:$MHS, node:$RHS))> >; + TriOpFrag<(fneg (any_fma node:$LHS, node:$MHS, node:$RHS))> >; defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub", - TriOpFrag<(fma node:$LHS, node:$MHS, (fneg node:$RHS))> >; + TriOpFrag<(any_fma node:$LHS, node:$MHS, (fneg node:$RHS))> >; // The following def pats catch the case where the LHS of an FMA is negated. // The TriOpFrag above catches the case where the middle operand is negated. @@ -4218,9 +4222,9 @@ def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn), (i64 4)))), (FCVTLv8i16 V128:$Rn)>; -def : Pat<(v2f64 (fpextend (v2f32 V64:$Rn))), (FCVTLv2i32 V64:$Rn)>; +def : Pat<(v2f64 (any_fpextend (v2f32 V64:$Rn))), (FCVTLv2i32 V64:$Rn)>; -def : Pat<(v4f32 (fpextend (v4f16 V64:$Rn))), (FCVTLv4i16 V64:$Rn)>; +def : Pat<(v4f32 (any_fpextend (v4f16 V64:$Rn))), (FCVTLv4i16 V64:$Rn)>; defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>; defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>; @@ -4232,16 +4236,16 @@ def : Pat<(concat_vectors V64:$Rd, (v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))), (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; -def : Pat<(v2f32 (fpround (v2f64 V128:$Rn))), (FCVTNv2i32 V128:$Rn)>; -def : Pat<(v4f16 (fpround (v4f32 V128:$Rn))), (FCVTNv4i16 V128:$Rn)>; -def : Pat<(concat_vectors V64:$Rd, (v2f32 (fpround (v2f64 V128:$Rn)))), +def : Pat<(v2f32 (any_fpround (v2f64 V128:$Rn))), (FCVTNv2i32 V128:$Rn)>; +def : Pat<(v4f16 (any_fpround (v4f32 V128:$Rn))), (FCVTNv4i16 V128:$Rn)>; +def : Pat<(concat_vectors V64:$Rd, (v2f32 (any_fpround (v2f64 V128:$Rn)))), (FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_aarch64_neon_fcvtps>; defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_aarch64_neon_fcvtpu>; defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn", int_aarch64_neon_fcvtxn>; -defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", fp_to_sint>; -defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", fp_to_uint>; +defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", any_fp_to_sint>; +defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", any_fp_to_uint>; // AArch64's FCVT instructions saturate when out of range. multiclass SIMDTwoVectorFPToIntSatPats { @@ -4273,13 +4277,13 @@ defm FNEG : SIMDTwoVectorFP<1, 1, 0b01111, "fneg", fneg>; defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_aarch64_neon_frecpe>; -defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", fround>; -defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", fnearbyint>; -defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", ffloor>; -defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", froundeven>; -defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", fceil>; -defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", frint>; -defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", ftrunc>; +defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", any_fround>; +defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", any_fnearbyint>; +defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", any_ffloor>; +defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", any_froundeven>; +defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", any_fceil>; +defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", any_frint>; +defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", any_ftrunc>; let Predicates = [HasFRInt3264] in { defm FRINT32Z : FRIntNNTVector<0, 0, "frint32z", int_aarch64_neon_frint32z>; @@ -4289,7 +4293,7 @@ } // HasFRInt3264 defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_aarch64_neon_frsqrte>; -defm FSQRT : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", fsqrt>; +defm FSQRT : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", any_fsqrt>; defm NEG : SIMDTwoVectorBHSD<1, 0b01011, "neg", UnOpFrag<(sub immAllZerosV, node:$LHS)> >; defm NOT : SIMDTwoVectorB<1, 0b00, 0b00101, "not", vnot>; @@ -4313,7 +4317,7 @@ defm SADALP : SIMDLongTwoVectorTied<0, 0b00110, "sadalp", BinOpFrag<(add node:$LHS, (AArch64saddlp node:$RHS))> >; defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", AArch64saddlp>; -defm SCVTF : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", sint_to_fp>; +defm SCVTF : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", any_sint_to_fp>; defm SHLL : SIMDVectorLShiftLongBySizeBHS; defm SQABS : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_aarch64_neon_sqabs>; defm SQNEG : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_aarch64_neon_sqneg>; @@ -4323,7 +4327,7 @@ defm UADALP : SIMDLongTwoVectorTied<1, 0b00110, "uadalp", BinOpFrag<(add node:$LHS, (AArch64uaddlp node:$RHS))> >; defm UADDLP : SIMDLongTwoVector<1, 0b00010, "uaddlp", AArch64uaddlp>; -defm UCVTF : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", uint_to_fp>; +defm UCVTF : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", any_uint_to_fp>; defm UQXTN : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_aarch64_neon_uqxtn>; defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_aarch64_neon_urecpe>; defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_aarch64_neon_ursqrte>; @@ -4447,32 +4451,32 @@ defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",int_aarch64_neon_facge>; defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",int_aarch64_neon_facgt>; defm FADDP : SIMDThreeSameVectorFP<1,0,0b010,"faddp",int_aarch64_neon_faddp>; -defm FADD : SIMDThreeSameVectorFP<0,0,0b010,"fadd", fadd>; +defm FADD : SIMDThreeSameVectorFP<0,0,0b010,"fadd", any_fadd>; defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>; defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>; defm FCMGT : SIMDThreeSameVectorFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>; -defm FDIV : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", fdiv>; +defm FDIV : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", any_fdiv>; defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b000,"fmaxnmp", int_aarch64_neon_fmaxnmp>; -defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", fmaxnum>; +defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", any_fmaxnum>; defm FMAXP : SIMDThreeSameVectorFP<1,0,0b110,"fmaxp", int_aarch64_neon_fmaxp>; -defm FMAX : SIMDThreeSameVectorFP<0,0,0b110,"fmax", fmaximum>; +defm FMAX : SIMDThreeSameVectorFP<0,0,0b110,"fmax", any_fmaximum>; defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b000,"fminnmp", int_aarch64_neon_fminnmp>; -defm FMINNM : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", fminnum>; +defm FMINNM : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", any_fminnum>; defm FMINP : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>; -defm FMIN : SIMDThreeSameVectorFP<0,1,0b110,"fmin", fminimum>; +defm FMIN : SIMDThreeSameVectorFP<0,1,0b110,"fmin", any_fminimum>; // NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the // instruction expects the addend first, while the fma intrinsic puts it last. defm FMLA : SIMDThreeSameVectorFPTied<0, 0, 0b001, "fmla", - TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >; + TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)> >; defm FMLS : SIMDThreeSameVectorFPTied<0, 1, 0b001, "fmls", - TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; + TriOpFrag<(any_fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; defm FMULX : SIMDThreeSameVectorFP<0,0,0b011,"fmulx", int_aarch64_neon_fmulx>; -defm FMUL : SIMDThreeSameVectorFP<1,0,0b011,"fmul", fmul>; +defm FMUL : SIMDThreeSameVectorFP<1,0,0b011,"fmul", any_fmul>; defm FRECPS : SIMDThreeSameVectorFP<0,0,0b111,"frecps", int_aarch64_neon_frecps>; defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b111,"frsqrts", int_aarch64_neon_frsqrts>; -defm FSUB : SIMDThreeSameVectorFP<0,1,0b010,"fsub", fsub>; +defm FSUB : SIMDThreeSameVectorFP<0,1,0b010,"fsub", any_fsub>; // MLA and MLS are generated in MachineCombine defm MLA : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", null_frag>; @@ -6371,7 +6375,7 @@ TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)> >; defm FMULX : SIMDFPIndexed<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>; -defm FMUL : SIMDFPIndexed<0, 0b1001, "fmul", fmul>; +defm FMUL : SIMDFPIndexed<0, 0b1001, "fmul", any_fmul>; def : Pat<(v2f32 (fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))), (FMULv2i32_indexed V64:$Rn, diff --git a/llvm/test/CodeGen/AArch64/arm64-fmadd.ll b/llvm/test/CodeGen/AArch64/arm64-fmadd.ll --- a/llvm/test/CodeGen/AArch64/arm64-fmadd.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fmadd.ll @@ -109,6 +109,114 @@ ret double %0 } +define float @fma32_strict(float %a, float %b, float %c) nounwind readnone ssp { +; CHECK-LABEL: fma32_strict: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmadd s0, s0, s1, s2 +; CHECK-NEXT: ret +entry: + %0 = tail call float @llvm.experimental.constrained.fma.f32(float %a, float %b, float %c, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %0 +} + +define float @fnma32_strict(float %a, float %b, float %c) nounwind readnone ssp { +; CHECK-LABEL: fnma32_strict: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fnmadd s0, s0, s1, s2 +; CHECK-NEXT: ret +entry: + %0 = tail call float @llvm.experimental.constrained.fma.f32(float %a, float %b, float %c, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + %neg = fneg float %0 + ret float %neg +} + +define float @fms32_strict(float %a, float %b, float %c) nounwind readnone ssp { +; CHECK-LABEL: fms32_strict: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmsub s0, s0, s1, s2 +; CHECK-NEXT: ret +entry: + %neg = fneg float %b + %0 = tail call float @llvm.experimental.constrained.fma.f32(float %a, float %neg, float %c, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %0 +} + +define float @fms32_com_strict(float %a, float %b, float %c) nounwind readnone ssp { +; CHECK-LABEL: fms32_com_strict: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmsub s0, s0, s1, s2 +; CHECK-NEXT: ret +entry: + %neg = fneg float %b + %0 = tail call float @llvm.experimental.constrained.fma.f32(float %neg, float %a, float %c, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %0 +} + +define float @fnms32_strict(float %a, float %b, float %c) nounwind readnone ssp { +; CHECK-LABEL: fnms32_strict: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fnmsub s0, s0, s1, s2 +; CHECK-NEXT: ret +entry: + %neg = fneg float %c + %0 = tail call float @llvm.experimental.constrained.fma.f32(float %a, float %b, float %neg, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %0 +} + +define double @fma64_strict(double %a, double %b, double %c) nounwind readnone ssp { +; CHECK-LABEL: fma64_strict: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmadd d0, d0, d1, d2 +; CHECK-NEXT: ret +entry: + %0 = tail call double @llvm.experimental.constrained.fma.f64(double %a, double %b, double %c, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %0 +} + +define double @fnma64_strict(double %a, double %b, double %c) nounwind readnone ssp { +; CHECK-LABEL: fnma64_strict: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fnmadd d0, d0, d1, d2 +; CHECK-NEXT: ret +entry: + %0 = tail call double @llvm.experimental.constrained.fma.f64(double %a, double %b, double %c, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + %neg = fneg double %0 + ret double %neg +} + +define double @fms64_strict(double %a, double %b, double %c) nounwind readnone ssp { +; CHECK-LABEL: fms64_strict: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmsub d0, d0, d1, d2 +; CHECK-NEXT: ret +entry: + %neg = fneg double %b + %0 = tail call double @llvm.experimental.constrained.fma.f64(double %a, double %neg, double %c, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %0 +} + +define double @fms64_com_strict(double %a, double %b, double %c) nounwind readnone ssp { +; CHECK-LABEL: fms64_com_strict: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmsub d0, d0, d1, d2 +; CHECK-NEXT: ret +entry: + %neg = fneg double %b + %0 = tail call double @llvm.experimental.constrained.fma.f64(double %neg, double %a, double %c, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %0 +} + +define double @fnms64_strict(double %a, double %b, double %c) nounwind readnone ssp { +; CHECK-LABEL: fnms64_strict: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fnmsub d0, d0, d1, d2 +; CHECK-NEXT: ret +entry: + %neg = fneg double %c + %0 = tail call double @llvm.experimental.constrained.fma.f64(double %a, double %b, double %neg, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %0 +} + ; This would crash while trying getNegatedExpression(). define float @negated_constant(float %x) { @@ -127,5 +235,9 @@ ret float %nfma } +attributes #0 = { strictfp } + declare float @llvm.fma.f32(float, float, float) nounwind readnone declare double @llvm.fma.f64(double, double, double) nounwind readnone +declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata) +declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata) diff --git a/llvm/test/CodeGen/AArch64/fp-intrinsics.ll b/llvm/test/CodeGen/AArch64/fp-intrinsics.ll --- a/llvm/test/CodeGen/AArch64/fp-intrinsics.ll +++ b/llvm/test/CodeGen/AArch64/fp-intrinsics.ll @@ -1,4 +1,5 @@ -; RUN: llc -mtriple=aarch64-none-eabi %s -o - | FileCheck %s +; RUN: llc -mtriple=aarch64-none-eabi %s -disable-strictnode-mutation -o - | FileCheck %s +; RUN: llc -mtriple=aarch64-none-eabi -global-isel=true -global-isel-abort=2 -disable-strictnode-mutation %s -o - | FileCheck %s ; Check that constrained fp intrinsics are correctly lowered. @@ -231,6 +232,20 @@ ret float %val } +; CHECK-LABEL: maximum_f32: +; CHECK: fmax s0, s0, s1 +define float @maximum_f32(float %x, float %y) #0 { + %val = call float @llvm.experimental.constrained.maximum.f32(float %x, float %y, metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: minimum_f32: +; CHECK: fmin s0, s0, s1 +define float @minimum_f32(float %x, float %y) #0 { + %val = call float @llvm.experimental.constrained.minimum.f32(float %x, float %y, metadata !"fpexcept.strict") #0 + ret float %val +} + ; CHECK-LABEL: ceil_f32: ; CHECK: frintp s0, s0 define float @ceil_f32(float %x) #0 { @@ -701,6 +716,20 @@ ret double %val } +; CHECK-LABEL: maximum_f64: +; CHECK: fmax d0, d0, d1 +define double @maximum_f64(double %x, double %y) #0 { + %val = call double @llvm.experimental.constrained.maximum.f64(double %x, double %y, metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: minimum_f64: +; CHECK: fmin d0, d0, d1 +define double @minimum_f64(double %x, double %y) #0 { + %val = call double @llvm.experimental.constrained.minimum.f64(double %x, double %y, metadata !"fpexcept.strict") #0 + ret double %val +} + ; CHECK-LABEL: ceil_f64: ; CHECK: frintp d0, d0 define double @ceil_f64(double %x) #0 { @@ -1483,6 +1512,8 @@ declare i64 @llvm.experimental.constrained.llrint.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.maxnum.f32(float, float, metadata) declare float @llvm.experimental.constrained.minnum.f32(float, float, metadata) +declare float @llvm.experimental.constrained.maximum.f32(float, float, metadata) +declare float @llvm.experimental.constrained.minimum.f32(float, float, metadata) declare float @llvm.experimental.constrained.ceil.f32(float, metadata) declare float @llvm.experimental.constrained.floor.f32(float, metadata) declare i32 @llvm.experimental.constrained.lround.f32(float, metadata) @@ -1525,6 +1556,8 @@ declare i64 @llvm.experimental.constrained.llrint.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.maxnum.f64(double, double, metadata) declare double @llvm.experimental.constrained.minnum.f64(double, double, metadata) +declare double @llvm.experimental.constrained.maximum.f64(double, double, metadata) +declare double @llvm.experimental.constrained.minimum.f64(double, double, metadata) declare double @llvm.experimental.constrained.ceil.f64(double, metadata) declare double @llvm.experimental.constrained.floor.f64(double, metadata) declare i32 @llvm.experimental.constrained.lround.f64(double, metadata)