Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -690,37 +690,33 @@ } // AArch64 has implementations of a lot of rounding-like FP operations. - for (MVT Ty : {MVT::f32, MVT::f64}) { - setOperationAction(ISD::FFLOOR, Ty, Legal); - setOperationAction(ISD::FNEARBYINT, Ty, Legal); - setOperationAction(ISD::FCEIL, Ty, Legal); - setOperationAction(ISD::FRINT, Ty, Legal); - setOperationAction(ISD::FTRUNC, Ty, Legal); - setOperationAction(ISD::FROUND, Ty, Legal); - setOperationAction(ISD::FROUNDEVEN, Ty, Legal); - setOperationAction(ISD::FMINNUM, Ty, Legal); - setOperationAction(ISD::FMAXNUM, Ty, Legal); - setOperationAction(ISD::FMINIMUM, Ty, Legal); - setOperationAction(ISD::FMAXIMUM, Ty, Legal); - setOperationAction(ISD::LROUND, Ty, Legal); - setOperationAction(ISD::LLROUND, Ty, Legal); - setOperationAction(ISD::LRINT, Ty, Legal); - setOperationAction(ISD::LLRINT, Ty, Legal); - } - - if (Subtarget->hasFullFP16()) { - setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal); - setOperationAction(ISD::FFLOOR, MVT::f16, Legal); - setOperationAction(ISD::FCEIL, MVT::f16, Legal); - setOperationAction(ISD::FRINT, MVT::f16, Legal); - setOperationAction(ISD::FTRUNC, MVT::f16, Legal); - setOperationAction(ISD::FROUND, MVT::f16, Legal); - setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal); - setOperationAction(ISD::FMINNUM, MVT::f16, Legal); - setOperationAction(ISD::FMAXNUM, MVT::f16, Legal); - setOperationAction(ISD::FMINIMUM, MVT::f16, Legal); - setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal); - } + for (auto Op : + {ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT, ISD::FTRUNC, + ISD::FROUND, ISD::FROUNDEVEN, ISD::FMINNUM, ISD::FMAXNUM, ISD::FMINIMUM, + ISD::FMAXIMUM, ISD::LROUND, ISD::LLROUND, ISD::LRINT, ISD::LLRINT, + ISD::STRICT_FFLOOR, ISD::STRICT_FNEARBYINT, ISD::STRICT_FCEIL, + ISD::STRICT_FRINT, ISD::STRICT_FTRUNC, ISD::STRICT_FROUND, + ISD::STRICT_FROUNDEVEN, ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM, + ISD::STRICT_FMINIMUM, ISD::STRICT_FMAXIMUM, ISD::STRICT_LROUND, + ISD::STRICT_LLROUND, ISD::STRICT_LRINT, ISD::STRICT_LLRINT}) { + for (MVT Ty : {MVT::f32, MVT::f64}) + setOperationAction(Op, Ty, Legal); + if (Subtarget->hasFullFP16()) + setOperationAction(Op, MVT::f16, Legal); + } + + // Basic strict FP operations are legal + for (auto Op : {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL, + ISD::STRICT_FDIV, ISD::STRICT_FMA, ISD::STRICT_FSQRT}) { + for (MVT Ty : {MVT::f32, MVT::f64}) + setOperationAction(Op, Ty, Legal); + if (Subtarget->hasFullFP16()) + setOperationAction(Op, MVT::f16, Legal); + } + + // Strict conversion to a larger type is legal + for (auto VT : {MVT::f32, MVT::f64}) + setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal); setOperationAction(ISD::PREFETCH, MVT::Other, Custom); @@ -978,43 +974,24 @@ if (Subtarget->hasNEON()) { // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to // silliness like this: - setOperationAction(ISD::FABS, MVT::v1f64, Expand); - setOperationAction(ISD::FADD, MVT::v1f64, Expand); - setOperationAction(ISD::FCEIL, MVT::v1f64, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand); - setOperationAction(ISD::FCOS, MVT::v1f64, Expand); - setOperationAction(ISD::FDIV, MVT::v1f64, Expand); - setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand); - setOperationAction(ISD::FMA, MVT::v1f64, Expand); - setOperationAction(ISD::FMUL, MVT::v1f64, Expand); - setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand); - setOperationAction(ISD::FNEG, MVT::v1f64, Expand); - setOperationAction(ISD::FPOW, MVT::v1f64, Expand); - setOperationAction(ISD::FREM, MVT::v1f64, Expand); - setOperationAction(ISD::FROUND, MVT::v1f64, Expand); - setOperationAction(ISD::FROUNDEVEN, MVT::v1f64, Expand); - setOperationAction(ISD::FRINT, MVT::v1f64, Expand); - setOperationAction(ISD::FSIN, MVT::v1f64, Expand); - setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand); - setOperationAction(ISD::FSQRT, MVT::v1f64, Expand); - setOperationAction(ISD::FSUB, MVT::v1f64, Expand); - setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand); - setOperationAction(ISD::SETCC, MVT::v1f64, Expand); - setOperationAction(ISD::BR_CC, MVT::v1f64, Expand); - setOperationAction(ISD::SELECT, MVT::v1f64, Expand); - setOperationAction(ISD::SELECT_CC, MVT::v1f64, Expand); - setOperationAction(ISD::FP_EXTEND, MVT::v1f64, Expand); - - setOperationAction(ISD::FP_TO_SINT, MVT::v1i64, Expand); - setOperationAction(ISD::FP_TO_UINT, MVT::v1i64, Expand); - setOperationAction(ISD::SINT_TO_FP, MVT::v1i64, Expand); - setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand); - setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand); - - setOperationAction(ISD::FP_TO_SINT_SAT, MVT::v1i64, Expand); - setOperationAction(ISD::FP_TO_UINT_SAT, MVT::v1i64, Expand); - - setOperationAction(ISD::MUL, MVT::v1i64, Expand); + for (auto Op : + {ISD::FABS, ISD::FADD, ISD::FCEIL, ISD::FCOPYSIGN, ISD::FCOS, + ISD::FDIV, ISD::FFLOOR, ISD::FMA, ISD::FMUL, ISD::FNEARBYINT, + ISD::FNEG, ISD::FPOW, ISD::FREM, ISD::FROUND, ISD::FROUNDEVEN, + ISD::FRINT, ISD::FSIN, ISD::FSINCOS, ISD::FSQRT, ISD::FSUB, + ISD::FTRUNC, ISD::SETCC, ISD::BR_CC, ISD::SELECT, ISD::SELECT_CC, + ISD::FP_EXTEND, ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL, + ISD::STRICT_FDIV, ISD::STRICT_FMA, ISD::STRICT_FSQRT, + ISD::STRICT_FMINIMUM, ISD::STRICT_FMAXIMUM, ISD::STRICT_FMINNUM, + ISD::STRICT_FMAXNUM}) + setOperationAction(Op, MVT::v1f64, Expand); + + for (auto Op : + {ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::SINT_TO_FP, ISD::UINT_TO_FP, + ISD::FP_ROUND, ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT, ISD::MUL, + ISD::STRICT_FP_TO_SINT, ISD::STRICT_FP_TO_UINT, + ISD::STRICT_SINT_TO_FP, ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_ROUND}) + setOperationAction(Op, MVT::v1i64, Expand); // AArch64 doesn't have a direct vector ->f32 conversion instructions for // elements smaller than i32, so promote the input to i32 first. @@ -1022,14 +999,12 @@ setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32); // Similarly, there is no direct i32 -> f64 vector conversion instruction. - setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom); // Or, direct i32 -> f16 vector conversion. Set it so custom, so the // conversion happens in two steps: v4i32 -> v4f32 -> v4f16 - setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom); + for (auto Op : {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::STRICT_SINT_TO_FP, + ISD::STRICT_UINT_TO_FP}) + for (auto VT : {MVT::v2i32, MVT::v2i64, MVT::v4i32}) + setOperationAction(Op, VT, Custom); if (Subtarget->hasFullFP16()) { setOperationAction(ISD::SINT_TO_FP, MVT::v8i8, Custom); @@ -1139,26 +1114,16 @@ } // AArch64 has implementations of a lot of rounding-like FP operations. - for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) { - setOperationAction(ISD::FFLOOR, Ty, Legal); - setOperationAction(ISD::FNEARBYINT, Ty, Legal); - setOperationAction(ISD::FCEIL, Ty, Legal); - setOperationAction(ISD::FRINT, Ty, Legal); - setOperationAction(ISD::FTRUNC, Ty, Legal); - setOperationAction(ISD::FROUND, Ty, Legal); - setOperationAction(ISD::FROUNDEVEN, Ty, Legal); - } - - if (Subtarget->hasFullFP16()) { - for (MVT Ty : {MVT::v4f16, MVT::v8f16}) { - setOperationAction(ISD::FFLOOR, Ty, Legal); - setOperationAction(ISD::FNEARBYINT, Ty, Legal); - setOperationAction(ISD::FCEIL, Ty, Legal); - setOperationAction(ISD::FRINT, Ty, Legal); - setOperationAction(ISD::FTRUNC, Ty, Legal); - setOperationAction(ISD::FROUND, Ty, Legal); - setOperationAction(ISD::FROUNDEVEN, Ty, Legal); - } + for (auto Op : + {ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT, ISD::FTRUNC, + ISD::FROUND, ISD::FROUNDEVEN, ISD::STRICT_FFLOOR, + ISD::STRICT_FNEARBYINT, ISD::STRICT_FCEIL, ISD::STRICT_FRINT, + ISD::STRICT_FTRUNC, ISD::STRICT_FROUND, ISD::STRICT_FROUNDEVEN}) { + for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) + setOperationAction(Op, Ty, Legal); + if (Subtarget->hasFullFP16()) + for (MVT Ty : {MVT::v4f16, MVT::v8f16}) + setOperationAction(Op, Ty, Legal); } if (Subtarget->hasSVE()) @@ -1483,10 +1448,10 @@ setOperationAction(ISD::SREM, VT, Expand); setOperationAction(ISD::FREM, VT, Expand); - setOperationAction(ISD::FP_TO_SINT, VT, Custom); - setOperationAction(ISD::FP_TO_UINT, VT, Custom); - setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom); - setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom); + for (unsigned Opcode : + {ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::FP_TO_SINT_SAT, + ISD::FP_TO_UINT_SAT, ISD::STRICT_FP_TO_SINT, ISD::STRICT_FP_TO_UINT}) + setOperationAction(Opcode, VT, Custom); if (!VT.isFloatingPoint()) setOperationAction(ISD::ABS, VT, Legal); @@ -1496,14 +1461,25 @@ for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) setOperationAction(Opcode, VT, Legal); - // F[MIN|MAX][NUM|NAN] are available for all FP NEON types. + // F[MIN|MAX][NUM|NAN] and simple strict operations are available for all FP + // NEON types. if (VT.isFloatingPoint() && VT.getVectorElementType() != MVT::bf16 && (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16())) for (unsigned Opcode : - {ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM}) + {ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM, + ISD::STRICT_FMINIMUM, ISD::STRICT_FMAXIMUM, ISD::STRICT_FMINNUM, + ISD::STRICT_FMAXNUM, ISD::STRICT_FADD, ISD::STRICT_FSUB, + ISD::STRICT_FMUL, ISD::STRICT_FDIV, ISD::STRICT_FMA, + ISD::STRICT_FSQRT}) setOperationAction(Opcode, VT, Legal); + // Strict fp extend and trunc are legal + if (VT.isFloatingPoint() && VT.getScalarSizeInBits() != 16) + setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal); + if (VT.isFloatingPoint() && VT.getScalarSizeInBits() != 64) + setOperationAction(ISD::STRICT_FP_ROUND, VT, Legal); + if (Subtarget->isLittleEndian()) { for (unsigned im = (unsigned)ISD::PRE_INC; im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { Index: llvm/lib/Target/AArch64/AArch64InstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -4963,15 +4963,15 @@ // Half-precision to Double-precision def DHr : BaseFPConversion<0b11, 0b01, FPR64, FPR16, asm, - [(set FPR64:$Rd, (fpextend (f16 FPR16:$Rn)))]>; + [(set FPR64:$Rd, (any_fpextend (f16 FPR16:$Rn)))]>; // Half-precision to Single-precision def SHr : BaseFPConversion<0b11, 0b00, FPR32, FPR16, asm, - [(set FPR32:$Rd, (fpextend (f16 FPR16:$Rn)))]>; + [(set FPR32:$Rd, (any_fpextend (f16 FPR16:$Rn)))]>; // Single-precision to Double-precision def DSr : BaseFPConversion<0b00, 0b01, FPR64, FPR32, asm, - [(set FPR64:$Rd, (fpextend FPR32:$Rn))]>; + [(set FPR64:$Rd, (any_fpextend FPR32:$Rn))]>; // Single-precision to Half-precision def HSr : BaseFPConversion<0b00, 0b11, FPR16, FPR32, asm, @@ -5075,7 +5075,8 @@ } } -multiclass TwoOperandFPDataNeg opcode, string asm, SDNode node> { +multiclass TwoOperandFPDataNeg opcode, string asm, + SDPatternOperator node> { def Hrr : BaseTwoOperandFPData { let Inst{23-22} = 0b11; // 16-bit size flag Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -3885,24 +3885,24 @@ let Predicates = [HasFullFP16] in { - def : Pat<(i32 (lround f16:$Rn)), + def : Pat<(i32 (any_lround f16:$Rn)), (!cast(FCVTASUWHr) f16:$Rn)>; - def : Pat<(i64 (lround f16:$Rn)), + def : Pat<(i64 (any_lround f16:$Rn)), (!cast(FCVTASUXHr) f16:$Rn)>; - def : Pat<(i64 (llround f16:$Rn)), + def : Pat<(i64 (any_llround f16:$Rn)), (!cast(FCVTASUXHr) f16:$Rn)>; } -def : Pat<(i32 (lround f32:$Rn)), +def : Pat<(i32 (any_lround f32:$Rn)), (!cast(FCVTASUWSr) f32:$Rn)>; -def : Pat<(i32 (lround f64:$Rn)), +def : Pat<(i32 (any_lround f64:$Rn)), (!cast(FCVTASUWDr) f64:$Rn)>; -def : Pat<(i64 (lround f32:$Rn)), +def : Pat<(i64 (any_lround f32:$Rn)), (!cast(FCVTASUXSr) f32:$Rn)>; -def : Pat<(i64 (lround f64:$Rn)), +def : Pat<(i64 (any_lround f64:$Rn)), (!cast(FCVTASUXDr) f64:$Rn)>; -def : Pat<(i64 (llround f32:$Rn)), +def : Pat<(i64 (any_llround f32:$Rn)), (!cast(FCVTASUXSr) f32:$Rn)>; -def : Pat<(i64 (llround f64:$Rn)), +def : Pat<(i64 (any_llround f64:$Rn)), (!cast(FCVTASUXDr) f64:$Rn)>; //===----------------------------------------------------------------------===// @@ -3946,17 +3946,17 @@ defm FABS : SingleOperandFPData<0b0001, "fabs", fabs>; defm FMOV : SingleOperandFPData<0b0000, "fmov">; defm FNEG : SingleOperandFPData<0b0010, "fneg", fneg>; -defm FRINTA : SingleOperandFPData<0b1100, "frinta", fround>; -defm FRINTI : SingleOperandFPData<0b1111, "frinti", fnearbyint>; -defm FRINTM : SingleOperandFPData<0b1010, "frintm", ffloor>; -defm FRINTN : SingleOperandFPData<0b1000, "frintn", froundeven>; -defm FRINTP : SingleOperandFPData<0b1001, "frintp", fceil>; +defm FRINTA : SingleOperandFPData<0b1100, "frinta", any_fround>; +defm FRINTI : SingleOperandFPData<0b1111, "frinti", any_fnearbyint>; +defm FRINTM : SingleOperandFPData<0b1010, "frintm", any_ffloor>; +defm FRINTN : SingleOperandFPData<0b1000, "frintn", any_froundeven>; +defm FRINTP : SingleOperandFPData<0b1001, "frintp", any_fceil>; -defm FRINTX : SingleOperandFPData<0b1110, "frintx", frint>; -defm FRINTZ : SingleOperandFPData<0b1011, "frintz", ftrunc>; +defm FRINTX : SingleOperandFPData<0b1110, "frintx", any_frint>; +defm FRINTZ : SingleOperandFPData<0b1011, "frintz", any_ftrunc>; let SchedRW = [WriteFDiv] in { -defm FSQRT : SingleOperandFPData<0b0011, "fsqrt", fsqrt>; +defm FSQRT : SingleOperandFPData<0b0011, "fsqrt", any_fsqrt>; } let Predicates = [HasFRInt3264] in { @@ -3966,44 +3966,48 @@ defm FRINT64X : FRIntNNT<0b11, "frint64x", int_aarch64_frint64x>; } // HasFRInt3264 +// Emitting strict_lrint as two instructions is valid as any exceptions that +// occur will happen in exactly one of the instructions (e.g. if the input is +// not an integer the inexact exception will happen in the FRINTX but not then +// in the FCVTZS as the output of FRINTX is an integer). let Predicates = [HasFullFP16] in { - def : Pat<(i32 (lrint f16:$Rn)), + def : Pat<(i32 (any_lrint f16:$Rn)), (FCVTZSUWHr (!cast(FRINTXHr) f16:$Rn))>; - def : Pat<(i64 (lrint f16:$Rn)), + def : Pat<(i64 (any_lrint f16:$Rn)), (FCVTZSUXHr (!cast(FRINTXHr) f16:$Rn))>; - def : Pat<(i64 (llrint f16:$Rn)), + def : Pat<(i64 (any_llrint f16:$Rn)), (FCVTZSUXHr (!cast(FRINTXHr) f16:$Rn))>; } -def : Pat<(i32 (lrint f32:$Rn)), +def : Pat<(i32 (any_lrint f32:$Rn)), (FCVTZSUWSr (!cast(FRINTXSr) f32:$Rn))>; -def : Pat<(i32 (lrint f64:$Rn)), +def : Pat<(i32 (any_lrint f64:$Rn)), (FCVTZSUWDr (!cast(FRINTXDr) f64:$Rn))>; -def : Pat<(i64 (lrint f32:$Rn)), +def : Pat<(i64 (any_lrint f32:$Rn)), (FCVTZSUXSr (!cast(FRINTXSr) f32:$Rn))>; -def : Pat<(i64 (lrint f64:$Rn)), +def : Pat<(i64 (any_lrint f64:$Rn)), (FCVTZSUXDr (!cast(FRINTXDr) f64:$Rn))>; -def : Pat<(i64 (llrint f32:$Rn)), +def : Pat<(i64 (any_llrint f32:$Rn)), (FCVTZSUXSr (!cast(FRINTXSr) f32:$Rn))>; -def : Pat<(i64 (llrint f64:$Rn)), +def : Pat<(i64 (any_llrint f64:$Rn)), (FCVTZSUXDr (!cast(FRINTXDr) f64:$Rn))>; //===----------------------------------------------------------------------===// // Floating point two operand instructions. //===----------------------------------------------------------------------===// -defm FADD : TwoOperandFPData<0b0010, "fadd", fadd>; +defm FADD : TwoOperandFPData<0b0010, "fadd", any_fadd>; let SchedRW = [WriteFDiv] in { -defm FDIV : TwoOperandFPData<0b0001, "fdiv", fdiv>; +defm FDIV : TwoOperandFPData<0b0001, "fdiv", any_fdiv>; } -defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", fmaxnum>; -defm FMAX : TwoOperandFPData<0b0100, "fmax", fmaximum>; -defm FMINNM : TwoOperandFPData<0b0111, "fminnm", fminnum>; -defm FMIN : TwoOperandFPData<0b0101, "fmin", fminimum>; +defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", any_fmaxnum>; +defm FMAX : TwoOperandFPData<0b0100, "fmax", any_fmaximum>; +defm FMINNM : TwoOperandFPData<0b0111, "fminnm", any_fminnum>; +defm FMIN : TwoOperandFPData<0b0101, "fmin", any_fminimum>; let SchedRW = [WriteFMul] in { -defm FMUL : TwoOperandFPData<0b0000, "fmul", fmul>; -defm FNMUL : TwoOperandFPDataNeg<0b1000, "fnmul", fmul>; +defm FMUL : TwoOperandFPData<0b0000, "fmul", any_fmul>; +defm FNMUL : TwoOperandFPDataNeg<0b1000, "fnmul", any_fmul>; } -defm FSUB : TwoOperandFPData<0b0011, "fsub", fsub>; +defm FSUB : TwoOperandFPData<0b0011, "fsub", any_fsub>; def : Pat<(v1f64 (fmaximum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), (FMAXDrr FPR64:$Rn, FPR64:$Rm)>; @@ -4018,13 +4022,13 @@ // Floating point three operand instructions. //===----------------------------------------------------------------------===// -defm FMADD : ThreeOperandFPData<0, 0, "fmadd", fma>; +defm FMADD : ThreeOperandFPData<0, 0, "fmadd", any_fma>; defm FMSUB : ThreeOperandFPData<0, 1, "fmsub", - TriOpFrag<(fma node:$LHS, (fneg node:$MHS), node:$RHS)> >; + TriOpFrag<(any_fma node:$LHS, (fneg node:$MHS), node:$RHS)> >; defm FNMADD : ThreeOperandFPData<1, 0, "fnmadd", - TriOpFrag<(fneg (fma node:$LHS, node:$MHS, node:$RHS))> >; + TriOpFrag<(fneg (any_fma node:$LHS, node:$MHS, node:$RHS))> >; defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub", - TriOpFrag<(fma node:$LHS, node:$MHS, (fneg node:$RHS))> >; + TriOpFrag<(any_fma node:$LHS, node:$MHS, (fneg node:$RHS))> >; // The following def pats catch the case where the LHS of an FMA is negated. // The TriOpFrag above catches the case where the middle operand is negated. @@ -4213,9 +4217,9 @@ def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn), (i64 4)))), (FCVTLv8i16 V128:$Rn)>; -def : Pat<(v2f64 (fpextend (v2f32 V64:$Rn))), (FCVTLv2i32 V64:$Rn)>; +def : Pat<(v2f64 (any_fpextend (v2f32 V64:$Rn))), (FCVTLv2i32 V64:$Rn)>; -def : Pat<(v4f32 (fpextend (v4f16 V64:$Rn))), (FCVTLv4i16 V64:$Rn)>; +def : Pat<(v4f32 (any_fpextend (v4f16 V64:$Rn))), (FCVTLv4i16 V64:$Rn)>; defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>; defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>; @@ -4227,16 +4231,16 @@ def : Pat<(concat_vectors V64:$Rd, (v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))), (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; -def : Pat<(v2f32 (fpround (v2f64 V128:$Rn))), (FCVTNv2i32 V128:$Rn)>; -def : Pat<(v4f16 (fpround (v4f32 V128:$Rn))), (FCVTNv4i16 V128:$Rn)>; -def : Pat<(concat_vectors V64:$Rd, (v2f32 (fpround (v2f64 V128:$Rn)))), +def : Pat<(v2f32 (any_fpround (v2f64 V128:$Rn))), (FCVTNv2i32 V128:$Rn)>; +def : Pat<(v4f16 (any_fpround (v4f32 V128:$Rn))), (FCVTNv4i16 V128:$Rn)>; +def : Pat<(concat_vectors V64:$Rd, (v2f32 (any_fpround (v2f64 V128:$Rn)))), (FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_aarch64_neon_fcvtps>; defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_aarch64_neon_fcvtpu>; defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn", int_aarch64_neon_fcvtxn>; -defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", fp_to_sint>; -defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", fp_to_uint>; +defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", any_fp_to_sint>; +defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", any_fp_to_uint>; // AArch64's FCVT instructions saturate when out of range. multiclass SIMDTwoVectorFPToIntSatPats { @@ -4268,13 +4272,13 @@ defm FNEG : SIMDTwoVectorFP<1, 1, 0b01111, "fneg", fneg>; defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_aarch64_neon_frecpe>; -defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", fround>; -defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", fnearbyint>; -defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", ffloor>; -defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", froundeven>; -defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", fceil>; -defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", frint>; -defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", ftrunc>; +defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", any_fround>; +defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", any_fnearbyint>; +defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", any_ffloor>; +defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", any_froundeven>; +defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", any_fceil>; +defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", any_frint>; +defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", any_ftrunc>; let Predicates = [HasFRInt3264] in { defm FRINT32Z : FRIntNNTVector<0, 0, "frint32z", int_aarch64_neon_frint32z>; @@ -4284,7 +4288,7 @@ } // HasFRInt3264 defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_aarch64_neon_frsqrte>; -defm FSQRT : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", fsqrt>; +defm FSQRT : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", any_fsqrt>; defm NEG : SIMDTwoVectorBHSD<1, 0b01011, "neg", UnOpFrag<(sub immAllZerosV, node:$LHS)> >; defm NOT : SIMDTwoVectorB<1, 0b00, 0b00101, "not", vnot>; @@ -4308,7 +4312,7 @@ defm SADALP : SIMDLongTwoVectorTied<0, 0b00110, "sadalp", BinOpFrag<(add node:$LHS, (int_aarch64_neon_saddlp node:$RHS))> >; defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", int_aarch64_neon_saddlp>; -defm SCVTF : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", sint_to_fp>; +defm SCVTF : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", any_sint_to_fp>; defm SHLL : SIMDVectorLShiftLongBySizeBHS; defm SQABS : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_aarch64_neon_sqabs>; defm SQNEG : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_aarch64_neon_sqneg>; @@ -4318,7 +4322,7 @@ defm UADALP : SIMDLongTwoVectorTied<1, 0b00110, "uadalp", BinOpFrag<(add node:$LHS, (AArch64uaddlp node:$RHS))> >; defm UADDLP : SIMDLongTwoVector<1, 0b00010, "uaddlp", AArch64uaddlp>; -defm UCVTF : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", uint_to_fp>; +defm UCVTF : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", any_uint_to_fp>; defm UQXTN : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_aarch64_neon_uqxtn>; defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_aarch64_neon_urecpe>; defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_aarch64_neon_ursqrte>; @@ -4442,32 +4446,32 @@ defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",int_aarch64_neon_facge>; defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",int_aarch64_neon_facgt>; defm FADDP : SIMDThreeSameVectorFP<1,0,0b010,"faddp",int_aarch64_neon_faddp>; -defm FADD : SIMDThreeSameVectorFP<0,0,0b010,"fadd", fadd>; +defm FADD : SIMDThreeSameVectorFP<0,0,0b010,"fadd", any_fadd>; defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>; defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>; defm FCMGT : SIMDThreeSameVectorFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>; -defm FDIV : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", fdiv>; +defm FDIV : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", any_fdiv>; defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b000,"fmaxnmp", int_aarch64_neon_fmaxnmp>; -defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", fmaxnum>; +defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", any_fmaxnum>; defm FMAXP : SIMDThreeSameVectorFP<1,0,0b110,"fmaxp", int_aarch64_neon_fmaxp>; -defm FMAX : SIMDThreeSameVectorFP<0,0,0b110,"fmax", fmaximum>; +defm FMAX : SIMDThreeSameVectorFP<0,0,0b110,"fmax", any_fmaximum>; defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b000,"fminnmp", int_aarch64_neon_fminnmp>; -defm FMINNM : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", fminnum>; +defm FMINNM : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", any_fminnum>; defm FMINP : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>; -defm FMIN : SIMDThreeSameVectorFP<0,1,0b110,"fmin", fminimum>; +defm FMIN : SIMDThreeSameVectorFP<0,1,0b110,"fmin", any_fminimum>; // NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the // instruction expects the addend first, while the fma intrinsic puts it last. defm FMLA : SIMDThreeSameVectorFPTied<0, 0, 0b001, "fmla", - TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >; + TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)> >; defm FMLS : SIMDThreeSameVectorFPTied<0, 1, 0b001, "fmls", - TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; + TriOpFrag<(any_fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; defm FMULX : SIMDThreeSameVectorFP<0,0,0b011,"fmulx", int_aarch64_neon_fmulx>; -defm FMUL : SIMDThreeSameVectorFP<1,0,0b011,"fmul", fmul>; +defm FMUL : SIMDThreeSameVectorFP<1,0,0b011,"fmul", any_fmul>; defm FRECPS : SIMDThreeSameVectorFP<0,0,0b111,"frecps", int_aarch64_neon_frecps>; defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b111,"frsqrts", int_aarch64_neon_frsqrts>; -defm FSUB : SIMDThreeSameVectorFP<0,1,0b010,"fsub", fsub>; +defm FSUB : SIMDThreeSameVectorFP<0,1,0b010,"fsub", any_fsub>; // MLA and MLS are generated in MachineCombine defm MLA : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", null_frag>; @@ -6271,18 +6275,18 @@ // On the other hand, there are quite a few valid combinatorial options due to // the commutativity of multiplication and the fact that (-x) * y = x * (-y). defm : SIMDFPIndexedTiedPatterns<"FMLA", - TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)>>; + TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)>>; defm : SIMDFPIndexedTiedPatterns<"FMLA", - TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)>>; + TriOpFrag<(any_fma node:$MHS, node:$RHS, node:$LHS)>>; defm : SIMDFPIndexedTiedPatterns<"FMLS", - TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; + TriOpFrag<(any_fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; defm : SIMDFPIndexedTiedPatterns<"FMLS", - TriOpFrag<(fma node:$RHS, (fneg node:$MHS), node:$LHS)> >; + TriOpFrag<(any_fma node:$RHS, (fneg node:$MHS), node:$LHS)> >; defm : SIMDFPIndexedTiedPatterns<"FMLS", - TriOpFrag<(fma (fneg node:$RHS), node:$MHS, node:$LHS)> >; + TriOpFrag<(any_fma (fneg node:$RHS), node:$MHS, node:$LHS)> >; defm : SIMDFPIndexedTiedPatterns<"FMLS", - TriOpFrag<(fma (fneg node:$MHS), node:$RHS, node:$LHS)> >; + TriOpFrag<(any_fma (fneg node:$MHS), node:$RHS, node:$LHS)> >; multiclass FMLSIndexedAfterNegPatterns { // 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit Index: llvm/test/CodeGen/AArch64/fp-intrinsics.ll =================================================================== --- llvm/test/CodeGen/AArch64/fp-intrinsics.ll +++ llvm/test/CodeGen/AArch64/fp-intrinsics.ll @@ -1,4 +1,5 @@ -; RUN: llc -mtriple=aarch64-none-eabi %s -o - | FileCheck %s +; RUN: llc -mtriple=aarch64-none-eabi %s -disable-strictnode-mutation -o - | FileCheck %s +; RUN: llc -mtriple=aarch64-none-eabi -global-isel=true -global-isel-abort=2 -disable-strictnode-mutation %s -o - | FileCheck %s ; Check that constrained fp intrinsics are correctly lowered.