Index: llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -4693,6 +4693,12 @@ Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp3, DAG.getIntPtrConstant(0, dl))); break; + case ISD::STRICT_FADD: + case ISD::STRICT_FSUB: + case ISD::STRICT_FMUL: + case ISD::STRICT_FDIV: + case ISD::STRICT_FMINNUM: + case ISD::STRICT_FMAXNUM: case ISD::STRICT_FREM: case ISD::STRICT_FPOW: Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other}, @@ -4717,6 +4723,22 @@ DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, Tmp3), DAG.getIntPtrConstant(0, dl))); break; + case ISD::STRICT_FMA: + Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other}, + {Node->getOperand(0), Node->getOperand(1)}); + Tmp2 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other}, + {Node->getOperand(0), Node->getOperand(2)}); + Tmp3 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other}, + {Node->getOperand(0), Node->getOperand(3)}); + Tmp4 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Tmp1.getValue(1), + Tmp2.getValue(1), Tmp3.getValue(1)); + Tmp4 = DAG.getNode(Node->getOpcode(), dl, {NVT, MVT::Other}, + {Tmp4, Tmp1, Tmp2, Tmp3}); + Tmp4 = DAG.getNode(ISD::STRICT_FP_ROUND, dl, {OVT, MVT::Other}, + {Tmp4.getValue(1), Tmp4, DAG.getIntPtrConstant(0, dl)}); + Results.push_back(Tmp4); + Results.push_back(Tmp4.getValue(1)); + break; case ISD::FCOPYSIGN: case ISD::FPOWI: { Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); @@ -4733,6 +4755,16 @@ Tmp3, DAG.getIntPtrConstant(isTrunc, dl))); break; } + case ISD::STRICT_FPOWI: + Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other}, + {Node->getOperand(0), Node->getOperand(1)}); + Tmp2 = DAG.getNode(Node->getOpcode(), dl, {NVT, MVT::Other}, + {Tmp1.getValue(1), Tmp1, Node->getOperand(2)}); + Tmp3 = DAG.getNode(ISD::STRICT_FP_ROUND, dl, {OVT, MVT::Other}, + {Tmp2.getValue(1), Tmp2, DAG.getIntPtrConstant(0, dl)}); + Results.push_back(Tmp3); + Results.push_back(Tmp3.getValue(1)); + break; case ISD::FFLOOR: case ISD::FCEIL: case ISD::FRINT: @@ -4757,12 +4789,19 @@ break; case ISD::STRICT_FFLOOR: case ISD::STRICT_FCEIL: + case ISD::STRICT_FRINT: + case ISD::STRICT_FNEARBYINT: case ISD::STRICT_FROUND: + case ISD::STRICT_FROUNDEVEN: + case ISD::STRICT_FTRUNC: + case ISD::STRICT_FSQRT: case ISD::STRICT_FSIN: case ISD::STRICT_FCOS: case ISD::STRICT_FLOG: + case ISD::STRICT_FLOG2: case ISD::STRICT_FLOG10: case ISD::STRICT_FEXP: + case ISD::STRICT_FEXP2: Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other}, {Node->getOperand(0), Node->getOperand(1)}); Tmp2 = DAG.getNode(Node->getOpcode(), dl, {NVT, MVT::Other}, Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1420,6 +1420,67 @@ } PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive(); + + // Strict operations that correspond to a single instruction are legal, though + // for fp16 they need to be promoted/expanded when we don't have those + // instructions. + for (auto Op : {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL, + ISD::STRICT_FDIV, ISD::STRICT_FMA, ISD::STRICT_FSQRT, + ISD::STRICT_FFLOOR, ISD::STRICT_FNEARBYINT, + ISD::STRICT_FCEIL, ISD::STRICT_FRINT, ISD::STRICT_FTRUNC, + ISD::STRICT_FROUND, ISD::STRICT_FROUNDEVEN, + ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM, + ISD::STRICT_FMINIMUM, ISD::STRICT_FMAXIMUM}) { + setOperationAction(Op, MVT::f16, + Subtarget->hasFullFP16() ? Legal : Promote); + setOperationAction(Op, MVT::v4f16, + Subtarget->hasFullFP16() ? Legal : Expand); + setOperationAction(Op, MVT::v8f16, + Subtarget->hasFullFP16() ? Legal : Expand); + for (auto VT : {MVT::f32, MVT::v2f32, MVT::v4f32, + MVT::f64, MVT::v1f64, MVT::v2f64}) + setOperationAction(Op, VT, Legal); + } + + // Round-to-integer need custom lowering for fp16, as Promote doesn't work + // because the result type is integer. + for (auto Op : {ISD::STRICT_LROUND, ISD::STRICT_LLROUND, ISD::STRICT_LRINT, + ISD::STRICT_LLRINT}) { + setOperationAction(Op, MVT::f16, + Subtarget->hasFullFP16() ? Legal : Custom); + setOperationAction(Op, MVT::v4f16, + Subtarget->hasFullFP16() ? Legal : Expand); + setOperationAction(Op, MVT::v8f16, + Subtarget->hasFullFP16() ? Legal : Expand); + for (auto VT : {MVT::f32, MVT::v2f32, MVT::v4f32, + MVT::f64, MVT::v1f64, MVT::v2f64}) + setOperationAction(Op, VT, Legal); + } + + // Library functions become calls, though f16 is promoted as there's no f16 + // functions and vector types are expanded out. + for (auto Op : {ISD::STRICT_FPOW, ISD::STRICT_FPOWI, ISD::STRICT_FSIN, + ISD::STRICT_FCOS, ISD::STRICT_FEXP, ISD::STRICT_FEXP2, + ISD::STRICT_FLOG, ISD::STRICT_FLOG10, ISD::STRICT_FLOG2}) { + setOperationAction(Op, MVT::f16, Promote); + setOperationAction(Op, MVT::f32, LibCall); + setOperationAction(Op, MVT::f64, LibCall); + for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, + MVT::v1f64, MVT::v2f64}) + setOperationAction(Op, VT, Expand); + } + + // Conversion to a larger type is always legal + for (auto VT : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) + setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal); + + // Strict frem is expanded like non-strict is + setOperationAction(ISD::STRICT_FREM, MVT::f16, Promote); + for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::f32, MVT::v2f32, MVT::v4f32, + MVT::f64, MVT::v1f64, MVT::v2f64}) + setOperationAction(ISD::STRICT_FREM, VT, Expand); + + IsStrictFPEnabled = true; } void AArch64TargetLowering::addTypeForNEON(MVT VT) { @@ -2552,7 +2613,18 @@ bool IsSignaling) { EVT VT = LHS.getValueType(); assert(VT != MVT::f128); - assert(VT != MVT::f16 && "Lowering of strict fp16 not yet implemented"); + + const bool FullFP16 = + static_cast(DAG.getSubtarget()).hasFullFP16(); + + if (VT == MVT::f16 && !FullFP16) { + LHS = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {MVT::f32, MVT::Other}, + {Chain, LHS}); + RHS = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {MVT::f32, MVT::Other}, + {LHS.getValue(1), RHS}); + Chain = RHS.getValue(1); + VT = MVT::f32; + } unsigned Opcode = IsSignaling ? AArch64ISD::STRICT_FCMPE : AArch64ISD::STRICT_FCMP; return DAG.getNode(Opcode, dl, {VT, MVT::Other}, {Chain, LHS, RHS}); @@ -3431,8 +3503,14 @@ // f16 conversions are promoted to f32 when full fp16 is not supported. if (SrcVal.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) { - assert(!IsStrict && "Lowering of strict fp16 not yet implemented"); SDLoc dl(Op); + if (IsStrict) { + SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, + {MVT::f32, MVT::Other}, + {Op.getOperand(0), SrcVal}); + return DAG.getNode(Op.getOpcode(), dl, {Op.getValueType(), MVT::Other}, + {Ext.getValue(1), Ext.getValue(0)}); + } return DAG.getNode( Op.getOpcode(), dl, Op.getValueType(), DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, SrcVal)); @@ -3634,8 +3712,15 @@ // f16 conversions are promoted to f32 when full fp16 is not supported. if (Op.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) { - assert(!IsStrict && "Lowering of strict fp16 not yet implemented"); SDLoc dl(Op); + if (IsStrict) { + SDValue Val = DAG.getNode(Op.getOpcode(), dl, {MVT::f32, MVT::Other}, + {Op.getOperand(0), SrcVal}); + return DAG.getNode(ISD::STRICT_FP_ROUND, dl, + {MVT::f16, MVT::Other}, + {Val.getValue(1), Val.getValue(0), + DAG.getIntPtrConstant(0, dl)}); + } return DAG.getNode( ISD::FP_ROUND, dl, MVT::f16, DAG.getNode(Op.getOpcode(), dl, MVT::f32, SrcVal), @@ -5173,6 +5258,18 @@ return LowerCTTZ(Op, DAG); case ISD::VECTOR_SPLICE: return LowerVECTOR_SPLICE(Op, DAG); + case ISD::STRICT_LROUND: + case ISD::STRICT_LLROUND: + case ISD::STRICT_LRINT: + case ISD::STRICT_LLRINT: { + assert(Op.getOperand(1).getValueType() == MVT::f16 && + "Expected custom lowering of rounding operations only for f16"); + SDLoc DL(Op); + SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other}, + {Op.getOperand(0), Op.getOperand(1)}); + return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other}, + {Ext.getValue(1), Ext.getValue(0)}); + } } } Index: llvm/lib/Target/AArch64/AArch64InstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -4963,15 +4963,15 @@ // Half-precision to Double-precision def DHr : BaseFPConversion<0b11, 0b01, FPR64, FPR16, asm, - [(set FPR64:$Rd, (fpextend (f16 FPR16:$Rn)))]>; + [(set FPR64:$Rd, (any_fpextend (f16 FPR16:$Rn)))]>; // Half-precision to Single-precision def SHr : BaseFPConversion<0b11, 0b00, FPR32, FPR16, asm, - [(set FPR32:$Rd, (fpextend (f16 FPR16:$Rn)))]>; + [(set FPR32:$Rd, (any_fpextend (f16 FPR16:$Rn)))]>; // Single-precision to Double-precision def DSr : BaseFPConversion<0b00, 0b01, FPR64, FPR32, asm, - [(set FPR64:$Rd, (fpextend FPR32:$Rn))]>; + [(set FPR64:$Rd, (any_fpextend FPR32:$Rn))]>; // Single-precision to Half-precision def HSr : BaseFPConversion<0b00, 0b11, FPR16, FPR32, asm, @@ -5075,7 +5075,8 @@ } } -multiclass TwoOperandFPDataNeg opcode, string asm, SDNode node> { +multiclass TwoOperandFPDataNeg opcode, string asm, + SDPatternOperator node> { def Hrr : BaseTwoOperandFPData { let Inst{23-22} = 0b11; // 16-bit size flag Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -3874,24 +3874,24 @@ let Predicates = [HasFullFP16] in { - def : Pat<(i32 (lround f16:$Rn)), + def : Pat<(i32 (any_lround f16:$Rn)), (!cast(FCVTASUWHr) f16:$Rn)>; - def : Pat<(i64 (lround f16:$Rn)), + def : Pat<(i64 (any_lround f16:$Rn)), (!cast(FCVTASUXHr) f16:$Rn)>; - def : Pat<(i64 (llround f16:$Rn)), + def : Pat<(i64 (any_llround f16:$Rn)), (!cast(FCVTASUXHr) f16:$Rn)>; } -def : Pat<(i32 (lround f32:$Rn)), +def : Pat<(i32 (any_lround f32:$Rn)), (!cast(FCVTASUWSr) f32:$Rn)>; -def : Pat<(i32 (lround f64:$Rn)), +def : Pat<(i32 (any_lround f64:$Rn)), (!cast(FCVTASUWDr) f64:$Rn)>; -def : Pat<(i64 (lround f32:$Rn)), +def : Pat<(i64 (any_lround f32:$Rn)), (!cast(FCVTASUXSr) f32:$Rn)>; -def : Pat<(i64 (lround f64:$Rn)), +def : Pat<(i64 (any_lround f64:$Rn)), (!cast(FCVTASUXDr) f64:$Rn)>; -def : Pat<(i64 (llround f32:$Rn)), +def : Pat<(i64 (any_llround f32:$Rn)), (!cast(FCVTASUXSr) f32:$Rn)>; -def : Pat<(i64 (llround f64:$Rn)), +def : Pat<(i64 (any_llround f64:$Rn)), (!cast(FCVTASUXDr) f64:$Rn)>; //===----------------------------------------------------------------------===// @@ -3935,17 +3935,17 @@ defm FABS : SingleOperandFPData<0b0001, "fabs", fabs>; defm FMOV : SingleOperandFPData<0b0000, "fmov">; defm FNEG : SingleOperandFPData<0b0010, "fneg", fneg>; -defm FRINTA : SingleOperandFPData<0b1100, "frinta", fround>; -defm FRINTI : SingleOperandFPData<0b1111, "frinti", fnearbyint>; -defm FRINTM : SingleOperandFPData<0b1010, "frintm", ffloor>; -defm FRINTN : SingleOperandFPData<0b1000, "frintn", froundeven>; -defm FRINTP : SingleOperandFPData<0b1001, "frintp", fceil>; +defm FRINTA : SingleOperandFPData<0b1100, "frinta", any_fround>; +defm FRINTI : SingleOperandFPData<0b1111, "frinti", any_fnearbyint>; +defm FRINTM : SingleOperandFPData<0b1010, "frintm", any_ffloor>; +defm FRINTN : SingleOperandFPData<0b1000, "frintn", any_froundeven>; +defm FRINTP : SingleOperandFPData<0b1001, "frintp", any_fceil>; -defm FRINTX : SingleOperandFPData<0b1110, "frintx", frint>; -defm FRINTZ : SingleOperandFPData<0b1011, "frintz", ftrunc>; +defm FRINTX : SingleOperandFPData<0b1110, "frintx", any_frint>; +defm FRINTZ : SingleOperandFPData<0b1011, "frintz", any_ftrunc>; let SchedRW = [WriteFDiv] in { -defm FSQRT : SingleOperandFPData<0b0011, "fsqrt", fsqrt>; +defm FSQRT : SingleOperandFPData<0b0011, "fsqrt", any_fsqrt>; } let Predicates = [HasFRInt3264] in { @@ -3956,43 +3956,43 @@ } // HasFRInt3264 let Predicates = [HasFullFP16] in { - def : Pat<(i32 (lrint f16:$Rn)), + def : Pat<(i32 (any_lrint f16:$Rn)), (FCVTZSUWHr (!cast(FRINTXHr) f16:$Rn))>; - def : Pat<(i64 (lrint f16:$Rn)), + def : Pat<(i64 (any_lrint f16:$Rn)), (FCVTZSUXHr (!cast(FRINTXHr) f16:$Rn))>; - def : Pat<(i64 (llrint f16:$Rn)), + def : Pat<(i64 (any_llrint f16:$Rn)), (FCVTZSUXHr (!cast(FRINTXHr) f16:$Rn))>; } -def : Pat<(i32 (lrint f32:$Rn)), +def : Pat<(i32 (any_lrint f32:$Rn)), (FCVTZSUWSr (!cast(FRINTXSr) f32:$Rn))>; -def : Pat<(i32 (lrint f64:$Rn)), +def : Pat<(i32 (any_lrint f64:$Rn)), (FCVTZSUWDr (!cast(FRINTXDr) f64:$Rn))>; -def : Pat<(i64 (lrint f32:$Rn)), +def : Pat<(i64 (any_lrint f32:$Rn)), (FCVTZSUXSr (!cast(FRINTXSr) f32:$Rn))>; -def : Pat<(i64 (lrint f64:$Rn)), +def : Pat<(i64 (any_lrint f64:$Rn)), (FCVTZSUXDr (!cast(FRINTXDr) f64:$Rn))>; -def : Pat<(i64 (llrint f32:$Rn)), +def : Pat<(i64 (any_llrint f32:$Rn)), (FCVTZSUXSr (!cast(FRINTXSr) f32:$Rn))>; -def : Pat<(i64 (llrint f64:$Rn)), +def : Pat<(i64 (any_llrint f64:$Rn)), (FCVTZSUXDr (!cast(FRINTXDr) f64:$Rn))>; //===----------------------------------------------------------------------===// // Floating point two operand instructions. //===----------------------------------------------------------------------===// -defm FADD : TwoOperandFPData<0b0010, "fadd", fadd>; +defm FADD : TwoOperandFPData<0b0010, "fadd", any_fadd>; let SchedRW = [WriteFDiv] in { -defm FDIV : TwoOperandFPData<0b0001, "fdiv", fdiv>; +defm FDIV : TwoOperandFPData<0b0001, "fdiv", any_fdiv>; } -defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", fmaxnum>; -defm FMAX : TwoOperandFPData<0b0100, "fmax", fmaximum>; -defm FMINNM : TwoOperandFPData<0b0111, "fminnm", fminnum>; -defm FMIN : TwoOperandFPData<0b0101, "fmin", fminimum>; +defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", any_fmaxnum>; +defm FMAX : TwoOperandFPData<0b0100, "fmax", any_fmaximum>; +defm FMINNM : TwoOperandFPData<0b0111, "fminnm", any_fminnum>; +defm FMIN : TwoOperandFPData<0b0101, "fmin", any_fminimum>; let SchedRW = [WriteFMul] in { -defm FMUL : TwoOperandFPData<0b0000, "fmul", fmul>; -defm FNMUL : TwoOperandFPDataNeg<0b1000, "fnmul", fmul>; +defm FMUL : TwoOperandFPData<0b0000, "fmul", any_fmul>; +defm FNMUL : TwoOperandFPDataNeg<0b1000, "fnmul", any_fmul>; } -defm FSUB : TwoOperandFPData<0b0011, "fsub", fsub>; +defm FSUB : TwoOperandFPData<0b0011, "fsub", any_fsub>; def : Pat<(v1f64 (fmaximum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), (FMAXDrr FPR64:$Rn, FPR64:$Rm)>; @@ -4007,13 +4007,13 @@ // Floating point three operand instructions. //===----------------------------------------------------------------------===// -defm FMADD : ThreeOperandFPData<0, 0, "fmadd", fma>; +defm FMADD : ThreeOperandFPData<0, 0, "fmadd", any_fma>; defm FMSUB : ThreeOperandFPData<0, 1, "fmsub", - TriOpFrag<(fma node:$LHS, (fneg node:$MHS), node:$RHS)> >; + TriOpFrag<(any_fma node:$LHS, (fneg node:$MHS), node:$RHS)> >; defm FNMADD : ThreeOperandFPData<1, 0, "fnmadd", - TriOpFrag<(fneg (fma node:$LHS, node:$MHS, node:$RHS))> >; + TriOpFrag<(fneg (any_fma node:$LHS, node:$MHS, node:$RHS))> >; defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub", - TriOpFrag<(fma node:$LHS, node:$MHS, (fneg node:$RHS))> >; + TriOpFrag<(any_fma node:$LHS, node:$MHS, (fneg node:$RHS))> >; // The following def pats catch the case where the LHS of an FMA is negated. // The TriOpFrag above catches the case where the middle operand is negated. @@ -4187,9 +4187,9 @@ def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn), (i64 4)))), (FCVTLv8i16 V128:$Rn)>; -def : Pat<(v2f64 (fpextend (v2f32 V64:$Rn))), (FCVTLv2i32 V64:$Rn)>; +def : Pat<(v2f64 (any_fpextend (v2f32 V64:$Rn))), (FCVTLv2i32 V64:$Rn)>; -def : Pat<(v4f32 (fpextend (v4f16 V64:$Rn))), (FCVTLv4i16 V64:$Rn)>; +def : Pat<(v4f32 (any_fpextend (v4f16 V64:$Rn))), (FCVTLv4i16 V64:$Rn)>; defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>; defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>; @@ -4201,16 +4201,16 @@ def : Pat<(concat_vectors V64:$Rd, (v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))), (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; -def : Pat<(v2f32 (fpround (v2f64 V128:$Rn))), (FCVTNv2i32 V128:$Rn)>; -def : Pat<(v4f16 (fpround (v4f32 V128:$Rn))), (FCVTNv4i16 V128:$Rn)>; -def : Pat<(concat_vectors V64:$Rd, (v2f32 (fpround (v2f64 V128:$Rn)))), +def : Pat<(v2f32 (any_fpround (v2f64 V128:$Rn))), (FCVTNv2i32 V128:$Rn)>; +def : Pat<(v4f16 (any_fpround (v4f32 V128:$Rn))), (FCVTNv4i16 V128:$Rn)>; +def : Pat<(concat_vectors V64:$Rd, (v2f32 (any_fpround (v2f64 V128:$Rn)))), (FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_aarch64_neon_fcvtps>; defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_aarch64_neon_fcvtpu>; defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn", int_aarch64_neon_fcvtxn>; -defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", fp_to_sint>; -defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", fp_to_uint>; +defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", any_fp_to_sint>; +defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", any_fp_to_uint>; // AArch64's FCVT instructions saturate when out of range. multiclass SIMDTwoVectorFPToIntSatPats { @@ -4242,13 +4242,13 @@ defm FNEG : SIMDTwoVectorFP<1, 1, 0b01111, "fneg", fneg>; defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_aarch64_neon_frecpe>; -defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", fround>; -defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", fnearbyint>; -defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", ffloor>; -defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", froundeven>; -defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", fceil>; -defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", frint>; -defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", ftrunc>; +defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", any_fround>; +defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", any_fnearbyint>; +defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", any_ffloor>; +defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", any_froundeven>; +defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", any_fceil>; +defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", any_frint>; +defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", any_ftrunc>; let Predicates = [HasFRInt3264] in { defm FRINT32Z : FRIntNNTVector<0, 0, "frint32z", int_aarch64_neon_frint32z>; @@ -4258,7 +4258,7 @@ } // HasFRInt3264 defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_aarch64_neon_frsqrte>; -defm FSQRT : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", fsqrt>; +defm FSQRT : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", any_fsqrt>; defm NEG : SIMDTwoVectorBHSD<1, 0b01011, "neg", UnOpFrag<(sub immAllZerosV, node:$LHS)> >; defm NOT : SIMDTwoVectorB<1, 0b00, 0b00101, "not", vnot>; @@ -4282,7 +4282,7 @@ defm SADALP : SIMDLongTwoVectorTied<0, 0b00110, "sadalp", BinOpFrag<(add node:$LHS, (int_aarch64_neon_saddlp node:$RHS))> >; defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", int_aarch64_neon_saddlp>; -defm SCVTF : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", sint_to_fp>; +defm SCVTF : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", any_sint_to_fp>; defm SHLL : SIMDVectorLShiftLongBySizeBHS; defm SQABS : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_aarch64_neon_sqabs>; defm SQNEG : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_aarch64_neon_sqneg>; @@ -4292,7 +4292,7 @@ defm UADALP : SIMDLongTwoVectorTied<1, 0b00110, "uadalp", BinOpFrag<(add node:$LHS, (AArch64uaddlp node:$RHS))> >; defm UADDLP : SIMDLongTwoVector<1, 0b00010, "uaddlp", AArch64uaddlp>; -defm UCVTF : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", uint_to_fp>; +defm UCVTF : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", any_uint_to_fp>; defm UQXTN : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_aarch64_neon_uqxtn>; defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_aarch64_neon_urecpe>; defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_aarch64_neon_ursqrte>; @@ -4390,32 +4390,32 @@ defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",int_aarch64_neon_facge>; defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",int_aarch64_neon_facgt>; defm FADDP : SIMDThreeSameVectorFP<1,0,0b010,"faddp",int_aarch64_neon_faddp>; -defm FADD : SIMDThreeSameVectorFP<0,0,0b010,"fadd", fadd>; +defm FADD : SIMDThreeSameVectorFP<0,0,0b010,"fadd", any_fadd>; defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>; defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>; defm FCMGT : SIMDThreeSameVectorFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>; -defm FDIV : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", fdiv>; +defm FDIV : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", any_fdiv>; defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b000,"fmaxnmp", int_aarch64_neon_fmaxnmp>; -defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", fmaxnum>; +defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", any_fmaxnum>; defm FMAXP : SIMDThreeSameVectorFP<1,0,0b110,"fmaxp", int_aarch64_neon_fmaxp>; -defm FMAX : SIMDThreeSameVectorFP<0,0,0b110,"fmax", fmaximum>; +defm FMAX : SIMDThreeSameVectorFP<0,0,0b110,"fmax", any_fmaximum>; defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b000,"fminnmp", int_aarch64_neon_fminnmp>; -defm FMINNM : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", fminnum>; +defm FMINNM : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", any_fminnum>; defm FMINP : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>; -defm FMIN : SIMDThreeSameVectorFP<0,1,0b110,"fmin", fminimum>; +defm FMIN : SIMDThreeSameVectorFP<0,1,0b110,"fmin", any_fminimum>; // NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the // instruction expects the addend first, while the fma intrinsic puts it last. defm FMLA : SIMDThreeSameVectorFPTied<0, 0, 0b001, "fmla", - TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >; + TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)> >; defm FMLS : SIMDThreeSameVectorFPTied<0, 1, 0b001, "fmls", - TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; + TriOpFrag<(any_fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; defm FMULX : SIMDThreeSameVectorFP<0,0,0b011,"fmulx", int_aarch64_neon_fmulx>; -defm FMUL : SIMDThreeSameVectorFP<1,0,0b011,"fmul", fmul>; +defm FMUL : SIMDThreeSameVectorFP<1,0,0b011,"fmul", any_fmul>; defm FRECPS : SIMDThreeSameVectorFP<0,0,0b111,"frecps", int_aarch64_neon_frecps>; defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b111,"frsqrts", int_aarch64_neon_frsqrts>; -defm FSUB : SIMDThreeSameVectorFP<0,1,0b010,"fsub", fsub>; +defm FSUB : SIMDThreeSameVectorFP<0,1,0b010,"fsub", any_fsub>; // MLA and MLS are generated in MachineCombine defm MLA : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", null_frag>; @@ -4923,19 +4923,19 @@ // int values in FP registers using the corresponding NEON instructions to // avoid more costly int <-> fp register transfers. let Predicates = [HasNEON] in { -def : Pat<(f64 (sint_to_fp (i64 (fp_to_sint f64:$Rn)))), +def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint f64:$Rn)))), (SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>; -def : Pat<(f32 (sint_to_fp (i32 (fp_to_sint f32:$Rn)))), +def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint f32:$Rn)))), (SCVTFv1i32 (i32 (FCVTZSv1i32 f32:$Rn)))>; -def : Pat<(f64 (uint_to_fp (i64 (fp_to_uint f64:$Rn)))), +def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint f64:$Rn)))), (UCVTFv1i64 (i64 (FCVTZUv1i64 f64:$Rn)))>; -def : Pat<(f32 (uint_to_fp (i32 (fp_to_uint f32:$Rn)))), +def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint f32:$Rn)))), (UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>; let Predicates = [HasFullFP16] in { -def : Pat<(f16 (sint_to_fp (i32 (fp_to_sint f16:$Rn)))), +def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))), (SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>; -def : Pat<(f16 (uint_to_fp (i32 (fp_to_uint f16:$Rn)))), +def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))), (UCVTFv1i16 (f16 (FCVTZUv1f16 f16:$Rn)))>; } } @@ -4948,14 +4948,14 @@ SDPatternOperator loadop, Instruction UCVTF, ROAddrMode ro, Instruction LDRW, Instruction LDRX, SubRegIndex sub> { - def : Pat<(DstTy (uint_to_fp (SrcTy + def : Pat<(DstTy (any_uint_to_fp (SrcTy (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))))), (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)), (LDRW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend), sub))>; - def : Pat<(DstTy (uint_to_fp (SrcTy + def : Pat<(DstTy (any_uint_to_fp (SrcTy (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Wext:$extend))))), (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)), @@ -4965,22 +4965,22 @@ defm : UIntToFPROLoadPat; -def : Pat <(f32 (uint_to_fp (i32 +def : Pat <(f32 (any_uint_to_fp (i32 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>; -def : Pat <(f32 (uint_to_fp (i32 +def : Pat <(f32 (any_uint_to_fp (i32 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))), (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>; // 16-bits -> float. defm : UIntToFPROLoadPat; -def : Pat <(f32 (uint_to_fp (i32 +def : Pat <(f32 (any_uint_to_fp (i32 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>; -def : Pat <(f32 (uint_to_fp (i32 +def : Pat <(f32 (any_uint_to_fp (i32 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))), (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>; @@ -4994,33 +4994,33 @@ // 8-bits -> double. defm : UIntToFPROLoadPat; -def : Pat <(f64 (uint_to_fp (i32 +def : Pat <(f64 (any_uint_to_fp (i32 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>; -def : Pat <(f64 (uint_to_fp (i32 +def : Pat <(f64 (any_uint_to_fp (i32 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))), (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>; // 16-bits -> double. defm : UIntToFPROLoadPat; -def : Pat <(f64 (uint_to_fp (i32 +def : Pat <(f64 (any_uint_to_fp (i32 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>; -def : Pat <(f64 (uint_to_fp (i32 +def : Pat <(f64 (any_uint_to_fp (i32 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))), (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>; // 32-bits -> double. defm : UIntToFPROLoadPat; -def : Pat <(f64 (uint_to_fp (i32 +def : Pat <(f64 (any_uint_to_fp (i32 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub))>; -def : Pat <(f64 (uint_to_fp (i32 +def : Pat <(f64 (any_uint_to_fp (i32 (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset))))), (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>; @@ -6206,18 +6206,18 @@ // On the other hand, there are quite a few valid combinatorial options due to // the commutativity of multiplication and the fact that (-x) * y = x * (-y). defm : SIMDFPIndexedTiedPatterns<"FMLA", - TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)>>; + TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)>>; defm : SIMDFPIndexedTiedPatterns<"FMLA", - TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)>>; + TriOpFrag<(any_fma node:$MHS, node:$RHS, node:$LHS)>>; defm : SIMDFPIndexedTiedPatterns<"FMLS", - TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; + TriOpFrag<(any_fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; defm : SIMDFPIndexedTiedPatterns<"FMLS", - TriOpFrag<(fma node:$RHS, (fneg node:$MHS), node:$LHS)> >; + TriOpFrag<(any_fma node:$RHS, (fneg node:$MHS), node:$LHS)> >; defm : SIMDFPIndexedTiedPatterns<"FMLS", - TriOpFrag<(fma (fneg node:$RHS), node:$MHS, node:$LHS)> >; + TriOpFrag<(any_fma (fneg node:$RHS), node:$MHS, node:$LHS)> >; defm : SIMDFPIndexedTiedPatterns<"FMLS", - TriOpFrag<(fma (fneg node:$MHS), node:$RHS, node:$LHS)> >; + TriOpFrag<(any_fma (fneg node:$MHS), node:$RHS, node:$LHS)> >; multiclass FMLSIndexedAfterNegPatterns { // 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit @@ -6296,22 +6296,22 @@ } defm : FMLSIndexedAfterNegPatterns< - TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >; + TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)> >; defm : FMLSIndexedAfterNegPatterns< - TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)> >; + TriOpFrag<(any_fma node:$MHS, node:$RHS, node:$LHS)> >; defm FMULX : SIMDFPIndexed<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>; -defm FMUL : SIMDFPIndexed<0, 0b1001, "fmul", fmul>; +defm FMUL : SIMDFPIndexed<0, 0b1001, "fmul", any_fmul>; -def : Pat<(v2f32 (fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))), +def : Pat<(v2f32 (any_fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))), (FMULv2i32_indexed V64:$Rn, (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub), (i64 0))>; -def : Pat<(v4f32 (fmul V128:$Rn, (AArch64dup (f32 FPR32:$Rm)))), +def : Pat<(v4f32 (any_fmul V128:$Rn, (AArch64dup (f32 FPR32:$Rm)))), (FMULv4i32_indexed V128:$Rn, (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub), (i64 0))>; -def : Pat<(v2f64 (fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))), +def : Pat<(v2f64 (any_fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))), (FMULv2i64_indexed V128:$Rn, (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rm, dsub), (i64 0))>; @@ -6663,7 +6663,7 @@ // However, this is not good for code size. // 8-bits -> float. 2 sizes step-up. class SExtLoadi8CVTf32Pat - : Pat<(f32 (sint_to_fp (i32 (sextloadi8 addrmode)))), + : Pat<(f32 (any_sint_to_fp (i32 (sextloadi8 addrmode)))), (SCVTFv1i32 (f32 (EXTRACT_SUBREG (SSHLLv4i16_shift (f64 @@ -6689,7 +6689,7 @@ // 16-bits -> float. 1 size step-up. class SExtLoadi16CVTf32Pat - : Pat<(f32 (sint_to_fp (i32 (sextloadi16 addrmode)))), + : Pat<(f32 (any_sint_to_fp (i32 (sextloadi16 addrmode)))), (SCVTFv1i32 (f32 (EXTRACT_SUBREG (SSHLLv4i16_shift (INSERT_SUBREG (f64 (IMPLICIT_DEF)), @@ -6717,7 +6717,7 @@ // 8-bits -> double. 3 size step-up: give up. // 16-bits -> double. 2 size step. class SExtLoadi16CVTf64Pat - : Pat <(f64 (sint_to_fp (i32 (sextloadi16 addrmode)))), + : Pat <(f64 (any_sint_to_fp (i32 (sextloadi16 addrmode)))), (SCVTFv1i64 (f64 (EXTRACT_SUBREG (SSHLLv2i32_shift (f64 @@ -6742,7 +6742,7 @@ (LDURHi GPR64sp:$Rn, simm9:$offset)>; // 32-bits -> double. 1 size step-up. class SExtLoadi32CVTf64Pat - : Pat <(f64 (sint_to_fp (i32 (load addrmode)))), + : Pat <(f64 (any_sint_to_fp (i32 (load addrmode)))), (SCVTFv1i64 (f64 (EXTRACT_SUBREG (SSHLLv2i32_shift (INSERT_SUBREG (f64 (IMPLICIT_DEF)), @@ -7979,17 +7979,17 @@ def : Pat<(i64 (add (vector_extract (v2i64 FPR128:$Rn), (i64 0)), (vector_extract (v2i64 FPR128:$Rn), (i64 1)))), (i64 (ADDPv2i64p (v2i64 FPR128:$Rn)))>; -def : Pat<(f64 (fadd (vector_extract (v2f64 FPR128:$Rn), (i64 0)), - (vector_extract (v2f64 FPR128:$Rn), (i64 1)))), +def : Pat<(f64 (any_fadd (vector_extract (v2f64 FPR128:$Rn), (i64 0)), + (vector_extract (v2f64 FPR128:$Rn), (i64 1)))), (f64 (FADDPv2i64p (v2f64 FPR128:$Rn)))>; // vector_extract on 64-bit vectors gets promoted to a 128 bit vector, // so we match on v4f32 here, not v2f32. This will also catch adding // the low two lanes of a true v4f32 vector. -def : Pat<(fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)), - (vector_extract (v4f32 FPR128:$Rn), (i64 1))), +def : Pat<(any_fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)), + (vector_extract (v4f32 FPR128:$Rn), (i64 1))), (f32 (FADDPv2i32p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>; -def : Pat<(fadd (vector_extract (v8f16 FPR128:$Rn), (i64 0)), - (vector_extract (v8f16 FPR128:$Rn), (i64 1))), +def : Pat<(any_fadd (vector_extract (v8f16 FPR128:$Rn), (i64 0)), + (vector_extract (v8f16 FPR128:$Rn), (i64 1))), (f16 (FADDPv2i16p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>; // Scalar 64-bit shifts in FPR64 registers. Index: llvm/test/CodeGen/AArch64/fp-intrinsics.ll =================================================================== --- llvm/test/CodeGen/AArch64/fp-intrinsics.ll +++ llvm/test/CodeGen/AArch64/fp-intrinsics.ll @@ -1,8 +1,656 @@ -; RUN: llc -mtriple=aarch64-none-eabi %s -o - | FileCheck %s +; RUN: llc -mtriple=aarch64-none-eabi %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16 +; RUN: llc -mtriple=aarch64-none-eabi -mattr=+fullfp16 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-FP16 +; RUN: llc -mtriple=aarch64-none-eabi -global-isel=true -global-isel-abort=2 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16 +; RUN: llc -mtriple=aarch64-none-eabi -global-isel=true -global-isel-abort=2 -mattr=+fullfp16 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-FP16 ; Check that constrained fp intrinsics are correctly lowered. +; Half-precision intrinsics + +; CHECK-LABEL: add_f16: +; CHECK-NOFP16-DAG: fcvt s1, h1 +; CHECK-NOFP16-DAG: fcvt s0, h0 +; CHECK-NOFP16: fadd s0, s0, s1 +; CHECK-NOFP16: fcvt h0, s0 +; CHECK-FP16: fadd h0, h0, h1 +define half @add_f16(half %x, half %y) #0 { + %val = call half @llvm.experimental.constrained.fadd.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +; CHECK-LABEL: sub_f16: +; CHECK-NOFP16-DAG: fcvt s1, h1 +; CHECK-NOFP16-DAG: fcvt s0, h0 +; CHECK-NOFP16: fsub s0, s0, s1 +; CHECK-NOFP16: fcvt h0, s0 +; CHECK-FP16: fsub h0, h0, h1 +define half @sub_f16(half %x, half %y) #0 { + %val = call half @llvm.experimental.constrained.fsub.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +; CHECK-LABEL: mul_f16: +; CHECK-NOFP16-DAG: fcvt s1, h1 +; CHECK-NOFP16-DAG: fcvt s0, h0 +; CHECK-NOFP16: fmul s0, s0, s1 +; CHECK-NOFP16: fcvt h0, s0 +; CHECK-FP16: fmul h0, h0, h1 +define half @mul_f16(half %x, half %y) #0 { + %val = call half @llvm.experimental.constrained.fmul.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +; CHECK-LABEL: div_f16: +; CHECK-NOFP16-DAG: fcvt s1, h1 +; CHECK-NOFP16-DAG: fcvt s0, h0 +; CHECK-NOFP16: fdiv s0, s0, s1 +; CHECK-NOFP16: fcvt h0, s0 +; CHECK-FP16: fdiv h0, h0, h1 +define half @div_f16(half %x, half %y) #0 { + %val = call half @llvm.experimental.constrained.fdiv.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +; CHECK-LABEL: frem_f16: +; CHECK-DAG: fcvt s1, h1 +; CHECK-DAG: fcvt s0, h0 +; CHECK: bl fmodf +; CHECK: fcvt h0, s0 +define half @frem_f16(half %x, half %y) #0 { + %val = call half @llvm.experimental.constrained.frem.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +; CHECK-LABEL: fma_f16: +; CHECK-NOFP16-DAG: fcvt s2, h2 +; CHECK-NOFP16-DAG: fcvt s1, h1 +; CHECK-NOFP16-DAG: fcvt s0, h0 +; CHECK-NOFP16: fmadd s0, s0, s1, s2 +; CHECK-NOFP16: fcvt h0, s0 +; CHECK-FP16: fmadd h0, h0, h1, h2 +define half @fma_f16(half %x, half %y, half %z) #0 { + %val = call half @llvm.experimental.constrained.fma.f16(half %x, half %y, half %z, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +; CHECK-LABEL: fptosi_i32_f16: +; CHECK-NOFP16: fcvt s0, h0 +; CHECK-NOFP16: fcvtzs w0, s0 +; CHECK-FP16: fcvtzs w0, h0 +define i32 @fptosi_i32_f16(half %x) #0 { + %val = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %x, metadata !"fpexcept.strict") #0 + ret i32 %val +} + +; CHECK-LABEL: fptoui_i32_f16: +; CHECK-NOFP16: fcvt s0, h0 +; CHECK-NOFP16: fcvtzu w0, s0 +; CHECK-FP16: fcvtzu w0, h0 +define i32 @fptoui_i32_f16(half %x) #0 { + %val = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %x, metadata !"fpexcept.strict") #0 + ret i32 %val +} + +; CHECK-LABEL: fptosi_i64_f16: +; CHECK-NOFP16: fcvt s0, h0 +; CHECK-NOFP16: fcvtzs x0, s0 +; CHECK-FP16: fcvtzs x0, h0 +define i64 @fptosi_i64_f16(half %x) #0 { + %val = call i64 @llvm.experimental.constrained.fptosi.i64.f16(half %x, metadata !"fpexcept.strict") #0 + ret i64 %val +} + +; CHECK-LABEL: fptoui_i64_f16: +; CHECK-NOFP16: fcvt s0, h0 +; CHECK-NOFP16: fcvtzu x0, s0 +; CHECK-FP16: fcvtzu x0, h0 +define i64 @fptoui_i64_f16(half %x) #0 { + %val = call i64 @llvm.experimental.constrained.fptoui.i64.f16(half %x, metadata !"fpexcept.strict") #0 + ret i64 %val +} + +; CHECK-LABEL: sitofp_f16_i32: +; CHECK-NOFP16: scvtf s0, w0 +; CHECK-NOFP16: fcvt h0, s0 +; CHECK-FP16: scvtf h0, w0 +define half @sitofp_f16_i32(i32 %x) #0 { + %val = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +; CHECK-LABEL: uitofp_f16_i32: +; CHECK-NOFP16: ucvtf s0, w0 +; CHECK-NOFP16: fcvt h0, s0 +; CHECK-FP16: ucvtf h0, w0 +define half @uitofp_f16_i32(i32 %x) #0 { + %val = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +; CHECK-LABEL: sitofp_f16_i64: +; CHECK-NOFP16: scvtf s0, x0 +; CHECK-NOFP16: fcvt h0, s0 +; CHECK-FP16: scvtf h0, x0 +define half @sitofp_f16_i64(i64 %x) #0 { + %val = call half @llvm.experimental.constrained.sitofp.f16.i64(i64 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +; CHECK-LABEL: uitofp_f16_i64: +; CHECK-NOFP16: ucvtf s0, x0 +; CHECK-NOFP16: fcvt h0, s0 +; CHECK-FP16: ucvtf h0, x0 +define half @uitofp_f16_i64(i64 %x) #0 { + %val = call half @llvm.experimental.constrained.uitofp.f16.i64(i64 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +; CHECK-LABEL: sitofp_f16_i128: +; CHECK-NOFP16: bl __floattisf +; CHECK-NOFP16: fcvt h0, s0 +; CHECK-FP16: bl __floattihf +define half @sitofp_f16_i128(i128 %x) #0 { + %val = call half @llvm.experimental.constrained.sitofp.f16.i128(i128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +; CHECK-LABEL: uitofp_f16_i128: +; CHECK-NOFP16: bl __floatuntisf +; CHECK-NOFP16: fcvt h0, s0 +; CHECK-FP16: bl __floatuntihf +define half @uitofp_f16_i128(i128 %x) #0 { + %val = call half @llvm.experimental.constrained.uitofp.f16.i128(i128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +; CHECK-LABEL: sqrt_f16: +; CHECK-NOFP16: fcvt s0, h0 +; CHECK-NOFP16: fsqrt s0, s0 +; CHECK-NOFP16: fcvt h0, s0 +; CHECK-FP16: fsqrt h0, h0 +define half @sqrt_f16(half %x) #0 { + %val = call half @llvm.experimental.constrained.sqrt.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +; CHECK-LABEL: powi_f16: +; CHECK: fcvt s0, h0 +; CHECK: bl __powisf2 +; CHECK: fcvt h0, s0 +define half @powi_f16(half %x, i32 %y) #0 { + %val = call half @llvm.experimental.constrained.powi.f16(half %x, i32 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +; CHECK-LABEL: sin_f16: +; CHECK: fcvt s0, h0 +; CHECK: bl sinf +; CHECK: fcvt h0, s0 +define half @sin_f16(half %x) #0 { + %val = call half @llvm.experimental.constrained.sin.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +; CHECK-LABEL: cos_f16: +; CHECK: fcvt s0, h0 +; CHECK: bl cosf +; CHECK: fcvt h0, s0 +define half @cos_f16(half %x) #0 { + %val = call half @llvm.experimental.constrained.cos.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +; CHECK-LABEL: pow_f16: +; CHECK-DAG: fcvt s1, h1 +; CHECK-DAG: fcvt s0, h0 +; CHECK: bl powf +; CHECK: fcvt h0, s0 +define half @pow_f16(half %x, half %y) #0 { + %val = call half @llvm.experimental.constrained.pow.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +; CHECK-LABEL: log_f16: +; CHECK: fcvt s0, h0 +; CHECK: bl logf +; CHECK: fcvt h0, s0 +define half @log_f16(half %x) #0 { + %val = call half @llvm.experimental.constrained.log.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +; CHECK-LABEL: log10_f16: +; CHECK: fcvt s0, h0 +; CHECK: bl log10f +; CHECK: fcvt h0, s0 +define half @log10_f16(half %x) #0 { + %val = call half @llvm.experimental.constrained.log10.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +; CHECK-LABEL: log2_f16: +; CHECK: fcvt s0, h0 +; CHECK: bl log2f +; CHECK: fcvt h0, s0 +define half @log2_f16(half %x) #0 { + %val = call half @llvm.experimental.constrained.log2.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +; CHECK-LABEL: exp_f16: +; CHECK: fcvt s0, h0 +; CHECK: bl expf +; CHECK: fcvt h0, s0 +define half @exp_f16(half %x) #0 { + %val = call half @llvm.experimental.constrained.exp.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +; CHECK-LABEL: exp2_f16: +; CHECK: fcvt s0, h0 +; CHECK: bl exp2f +; CHECK: fcvt h0, s0 +define half @exp2_f16(half %x) #0 { + %val = call half @llvm.experimental.constrained.exp2.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +; CHECK-LABEL: rint_f16: +; CHECK-NOFP16: fcvt s0, h0 +; CHECK-NOFP16: frintx s0, s0 +; CHECK-NOFP16: fcvt h0, s0 +; CHECK-FP16: frintx h0, h0 +define half @rint_f16(half %x) #0 { + %val = call half @llvm.experimental.constrained.rint.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +; CHECK-LABEL: nearbyint_f16: +; CHECK-NOFP16: fcvt s0, h0 +; CHECK-NOFP16: frinti s0, s0 +; CHECK-NOFP16: fcvt h0, s0 +; CHECK-FP16: frinti h0, h0 +define half @nearbyint_f16(half %x) #0 { + %val = call half @llvm.experimental.constrained.nearbyint.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +; CHECK-LABEL: lrint_f16: +; CHECK-NOFP16: fcvt s0, h0 +; CHECK-NOFP16: frintx [[REG:s[0-9]+]], s0 +; CHECK-FP16: frintx [[REG:h[0-9]+]], h0 +; CHECK: fcvtzs w0, [[REG]] +define i32 @lrint_f16(half %x) #0 { + %val = call i32 @llvm.experimental.constrained.lrint.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret i32 %val +} + +; CHECK-LABEL: llrint_f16: +; CHECK-NOFP16: fcvt s0, h0 +; CHECK-NOFP16: frintx [[REG:s[0-9]+]], s0 +; CHECK-FP16: frintx [[REG:h[0-9]+]], h0 +; CHECK: fcvtzs x0, [[REG]] +define i64 @llrint_f16(half %x) #0 { + %val = call i64 @llvm.experimental.constrained.llrint.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret i64 %val +} + +; CHECK-LABEL: maxnum_f16: +; CHECK-NOFP16: fcvt s1, h1 +; CHECK-NOFP16: fcvt s0, h0 +; CHECK-NOFP16: fmaxnm s0, s0, s1 +; CHECK-NOFP16: fcvt h0, s0 +; CHECK-FP16: fmaxnm h0, h0, h1 +define half @maxnum_f16(half %x, half %y) #0 { + %val = call half @llvm.experimental.constrained.maxnum.f16(half %x, half %y, metadata !"fpexcept.strict") #0 + ret half %val +} + +; CHECK-LABEL: minnum_f16: +; CHECK-NOFP16: fcvt s1, h1 +; CHECK-NOFP16: fcvt s0, h0 +; CHECK-NOFP16: fminnm s0, s0, s1 +; CHECK-NOFP16: fcvt h0, s0 +; CHECK-FP16: fminnm h0, h0, h1 +define half @minnum_f16(half %x, half %y) #0 { + %val = call half @llvm.experimental.constrained.minnum.f16(half %x, half %y, metadata !"fpexcept.strict") #0 + ret half %val +} + +; CHECK-LABEL: ceil_f16: +; CHECK-NOFP16: fcvt s0, h0 +; CHECK-NOFP16: frintp s0, s0 +; CHECK-NOFP16: fcvt h0, s0 +; CHECK-FP16: frintp h0, h0 +define half @ceil_f16(half %x) #0 { + %val = call half @llvm.experimental.constrained.ceil.f16(half %x, metadata !"fpexcept.strict") #0 + ret half %val +} + +; CHECK-LABEL: floor_f16: +; CHECK-NOFP16: fcvt s0, h0 +; CHECK-NOFP16: frintm s0, s0 +; CHECK-NOFP16: fcvt h0, s0 +; CHECK-FP16: frintm h0, h0 +define half @floor_f16(half %x) #0 { + %val = call half @llvm.experimental.constrained.floor.f16(half %x, metadata !"fpexcept.strict") #0 + ret half %val +} + +; CHECK-LABEL: lround_f16: +; CHECK-NOFP16: fcvt s0, h0 +; CHECK-NOFP16: fcvtas w0, s0 +; CHECK-FP16: fcvtas w0, h0 +define i32 @lround_f16(half %x) #0 { + %val = call i32 @llvm.experimental.constrained.lround.f16(half %x, metadata !"fpexcept.strict") #0 + ret i32 %val +} + +; CHECK-LABEL: llround_f16: +; CHECK-NOFP16: fcvt s0, h0 +; CHECK-NOFP16: fcvtas x0, s0 +; CHECK-FP16: fcvtas x0, h0 +define i64 @llround_f16(half %x) #0 { + %val = call i64 @llvm.experimental.constrained.llround.f16(half %x, metadata !"fpexcept.strict") #0 + ret i64 %val +} + +; CHECK-LABEL: round_f16: +; CHECK-NOFP16: fcvt s0, h0 +; CHECK-NOFP16: frinta s0, s0 +; CHECK-NOFP16: fcvt h0, s0 +; CHECK-FP16: frinta h0, h0 +define half @round_f16(half %x) #0 { + %val = call half @llvm.experimental.constrained.round.f16(half %x, metadata !"fpexcept.strict") #0 + ret half %val +} + +; CHECK-LABEL: roundeven_f16: +; CHECK-NOFP16: fcvt s0, h0 +; CHECK-NOFP16: frintn s0, s0 +; CHECK-NOFP16: fcvt h0, s0 +; CHECK-FP16: frintn h0, h0 +define half @roundeven_f16(half %x) #0 { + %val = call half @llvm.experimental.constrained.roundeven.f16(half %x, metadata !"fpexcept.strict") #0 + ret half %val +} + +; CHECK-LABEL: trunc_f16: +; CHECK-NOFP16: fcvt s0, h0 +; CHECK-NOFP16: frintz s0, s0 +; CHECK-NOFP16: fcvt h0, s0 +; CHECK-FP16: frintz h0, h0 +define half @trunc_f16(half %x) #0 { + %val = call half @llvm.experimental.constrained.trunc.f16(half %x, metadata !"fpexcept.strict") #0 + ret half %val +} + +; CHECK-LABEL: fcmp_olt_f16: +; CHECK-NOFP16-DAG: fcvt s0, h0 +; CHECK-NOFP16-DAG: fcvt s1, h1 +; CHECK-NOFP16: fcmp s0, s1 +; CHECK-FP16: fcmp h0, h1 +define i32 @fcmp_olt_f16(half %a, half %b) #0 { + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"olt", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +; CHECK-LABEL: fcmp_ole_f16: +; CHECK-NOFP16-DAG: fcvt s0, h0 +; CHECK-NOFP16-DAG: fcvt s1, h1 +; CHECK-NOFP16: fcmp s0, s1 +; CHECK-FP16: fcmp h0, h1 +define i32 @fcmp_ole_f16(half %a, half %b) #0 { + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ole", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +; CHECK-LABEL: fcmp_ogt_f16: +; CHECK-NOFP16-DAG: fcvt s0, h0 +; CHECK-NOFP16-DAG: fcvt s1, h1 +; CHECK-NOFP16: fcmp s0, s1 +; CHECK-FP16: fcmp h0, h1 +define i32 @fcmp_ogt_f16(half %a, half %b) #0 { + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ogt", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +; CHECK-LABEL: fcmp_oge_f16: +; CHECK-NOFP16-DAG: fcvt s0, h0 +; CHECK-NOFP16-DAG: fcvt s1, h1 +; CHECK-NOFP16: fcmp s0, s1 +; CHECK-FP16: fcmp h0, h1 +define i32 @fcmp_oge_f16(half %a, half %b) #0 { + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"oge", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +; CHECK-LABEL: fcmp_oeq_f16: +; CHECK-NOFP16-DAG: fcvt s0, h0 +; CHECK-NOFP16-DAG: fcvt s1, h1 +; CHECK-NOFP16: fcmp s0, s1 +; CHECK-FP16: fcmp h0, h1 +define i32 @fcmp_oeq_f16(half %a, half %b) #0 { + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"oeq", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +; CHECK-LABEL: fcmp_one_f16: +; CHECK-NOFP16-DAG: fcvt s0, h0 +; CHECK-NOFP16-DAG: fcvt s1, h1 +; CHECK-NOFP16: fcmp s0, s1 +; CHECK-FP16: fcmp h0, h1 +define i32 @fcmp_one_f16(half %a, half %b) #0 { + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"one", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +; CHECK-LABEL: fcmp_ult_f16: +; CHECK-NOFP16-DAG: fcvt s0, h0 +; CHECK-NOFP16-DAG: fcvt s1, h1 +; CHECK-NOFP16: fcmp s0, s1 +; CHECK-FP16: fcmp h0, h1 +define i32 @fcmp_ult_f16(half %a, half %b) #0 { + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ult", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +; CHECK-LABEL: fcmp_ule_f16: +; CHECK-NOFP16-DAG: fcvt s0, h0 +; CHECK-NOFP16-DAG: fcvt s1, h1 +; CHECK-NOFP16: fcmp s0, s1 +; CHECK-FP16: fcmp h0, h1 +define i32 @fcmp_ule_f16(half %a, half %b) #0 { + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ule", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +; CHECK-LABEL: fcmp_ugt_f16: +; CHECK-NOFP16-DAG: fcvt s0, h0 +; CHECK-NOFP16-DAG: fcvt s1, h1 +; CHECK-NOFP16: fcmp s0, s1 +; CHECK-FP16: fcmp h0, h1 +define i32 @fcmp_ugt_f16(half %a, half %b) #0 { + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ugt", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +; CHECK-LABEL: fcmp_uge_f16: +; CHECK-NOFP16-DAG: fcvt s0, h0 +; CHECK-NOFP16-DAG: fcvt s1, h1 +; CHECK-NOFP16: fcmp s0, s1 +; CHECK-FP16: fcmp h0, h1 +define i32 @fcmp_uge_f16(half %a, half %b) #0 { + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"uge", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +; CHECK-LABEL: fcmp_ueq_f16: +; CHECK-NOFP16-DAG: fcvt s0, h0 +; CHECK-NOFP16-DAG: fcvt s1, h1 +; CHECK-NOFP16: fcmp s0, s1 +; CHECK-FP16: fcmp h0, h1 +define i32 @fcmp_ueq_f16(half %a, half %b) #0 { + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ueq", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +; CHECK-LABEL: fcmp_une_f16: +; CHECK-NOFP16-DAG: fcvt s0, h0 +; CHECK-NOFP16-DAG: fcvt s1, h1 +; CHECK-NOFP16: fcmp s0, s1 +; CHECK-FP16: fcmp h0, h1 +define i32 @fcmp_une_f16(half %a, half %b) #0 { + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"une", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +; CHECK-LABEL: fcmps_olt_f16: +; CHECK-NOFP16-DAG: fcvt s0, h0 +; CHECK-NOFP16-DAG: fcvt s1, h1 +; CHECK-NOFP16: fcmpe s0, s1 +; CHECK-NP16: fcmpe h0, h1 +define i32 @fcmps_olt_f16(half %a, half %b) #0 { + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"olt", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +; CHECK-LABEL: fcmps_ole_f16: +; CHECK-NOFP16-DAG: fcvt s0, h0 +; CHECK-NOFP16-DAG: fcvt s1, h1 +; CHECK-NOFP16: fcmpe s0, s1 +; CHECK-NP16: fcmpe h0, h1 +define i32 @fcmps_ole_f16(half %a, half %b) #0 { + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ole", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +; CHECK-LABEL: fcmps_ogt_f16: +; CHECK-NOFP16-DAG: fcvt s0, h0 +; CHECK-NOFP16-DAG: fcvt s1, h1 +; CHECK-NOFP16: fcmpe s0, s1 +; CHECK-NP16: fcmpe h0, h1 +define i32 @fcmps_ogt_f16(half %a, half %b) #0 { + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ogt", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +; CHECK-LABEL: fcmps_oge_f16: +; CHECK-NOFP16-DAG: fcvt s0, h0 +; CHECK-NOFP16-DAG: fcvt s1, h1 +; CHECK-NOFP16: fcmpe s0, s1 +; CHECK-NP16: fcmpe h0, h1 +define i32 @fcmps_oge_f16(half %a, half %b) #0 { + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"oge", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +; CHECK-LABEL: fcmps_oeq_f16: +; CHECK-NOFP16-DAG: fcvt s0, h0 +; CHECK-NOFP16-DAG: fcvt s1, h1 +; CHECK-NOFP16: fcmpe s0, s1 +; CHECK-NP16: fcmpe h0, h1 +define i32 @fcmps_oeq_f16(half %a, half %b) #0 { + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"oeq", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +; CHECK-LABEL: fcmps_one_f16: +; CHECK-NOFP16-DAG: fcvt s0, h0 +; CHECK-NOFP16-DAG: fcvt s1, h1 +; CHECK-NOFP16: fcmpe s0, s1 +; CHECK-NP16: fcmpe h0, h1 +define i32 @fcmps_one_f16(half %a, half %b) #0 { + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"one", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +; CHECK-LABEL: fcmps_ult_f16: +; CHECK-NOFP16-DAG: fcvt s0, h0 +; CHECK-NOFP16-DAG: fcvt s1, h1 +; CHECK-NOFP16: fcmpe s0, s1 +; CHECK-NP16: fcmpe h0, h1 +define i32 @fcmps_ult_f16(half %a, half %b) #0 { + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ult", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +; CHECK-LABEL: fcmps_ule_f16: +; CHECK-NOFP16-DAG: fcvt s0, h0 +; CHECK-NOFP16-DAG: fcvt s1, h1 +; CHECK-NOFP16: fcmpe s0, s1 +; CHECK-NP16: fcmpe h0, h1 +define i32 @fcmps_ule_f16(half %a, half %b) #0 { + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ule", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +; CHECK-LABEL: fcmps_ugt_f16: +; CHECK-NOFP16-DAG: fcvt s0, h0 +; CHECK-NOFP16-DAG: fcvt s1, h1 +; CHECK-NOFP16: fcmpe s0, s1 +; CHECK-NP16: fcmpe h0, h1 +define i32 @fcmps_ugt_f16(half %a, half %b) #0 { + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ugt", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +; CHECK-LABEL: fcmps_uge_f16: +; CHECK-NOFP16-DAG: fcvt s0, h0 +; CHECK-NOFP16-DAG: fcvt s1, h1 +; CHECK-NOFP16: fcmpe s0, s1 +; CHECK-NP16: fcmpe h0, h1 +define i32 @fcmps_uge_f16(half %a, half %b) #0 { + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"uge", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +; CHECK-LABEL: fcmps_ueq_f16: +; CHECK-NOFP16-DAG: fcvt s0, h0 +; CHECK-NOFP16-DAG: fcvt s1, h1 +; CHECK-NOFP16: fcmpe s0, s1 +; CHECK-NP16: fcmpe h0, h1 +define i32 @fcmps_ueq_f16(half %a, half %b) #0 { + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ueq", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +; CHECK-LABEL: fcmps_une_f16: +; CHECK-NOFP16-DAG: fcvt s0, h0 +; CHECK-NOFP16-DAG: fcvt s1, h1 +; CHECK-NOFP16: fcmpe s0, s1 +; CHECK-FP16: fcmpe h0, h1 +define i32 @fcmps_une_f16(half %a, half %b) #0 { + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"une", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + + ; Single-precision intrinsics ; CHECK-LABEL: add_f32: @@ -1406,6 +2054,13 @@ ; Intrinsics to convert between floating-point types +; CHECK-LABEL: fptrunc_f16_f32: +; CHECK: fcvt h0, s0 +define half @fptrunc_f16_f32(float %x) #0 { + %val = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + ; CHECK-LABEL: fptrunc_f32_f64: ; CHECK: fcvt s0, d0 define float @fptrunc_f32_f64(double %x) #0 { @@ -1427,6 +2082,13 @@ ret double %val } +; CHECK-LABEL: fpext_f32_f16: +; CHECK: fcvt s0, h0 +define float @fpext_f32_f16(half %x) #0 { + %val = call float @llvm.experimental.constrained.fpext.f32.f16(half %x, metadata !"fpexcept.strict") #0 + ret float %val +} + ; CHECK-LABEL: fpext_f64_f32: ; CHECK: fcvt d0, s0 define double @fpext_f64_f32(float %x) #0 { @@ -1451,6 +2113,48 @@ attributes #0 = { strictfp } +declare half @llvm.experimental.constrained.fadd.f16(half, half, metadata, metadata) +declare half @llvm.experimental.constrained.fsub.f16(half, half, metadata, metadata) +declare half @llvm.experimental.constrained.fmul.f16(half, half, metadata, metadata) +declare half @llvm.experimental.constrained.fdiv.f16(half, half, metadata, metadata) +declare half @llvm.experimental.constrained.frem.f16(half, half, metadata, metadata) +declare half @llvm.experimental.constrained.fma.f16(half, half, half, metadata, metadata) +declare i32 @llvm.experimental.constrained.fptosi.i32.f16(half, metadata) +declare i32 @llvm.experimental.constrained.fptoui.i32.f16(half, metadata) +declare i64 @llvm.experimental.constrained.fptosi.i64.f16(half, metadata) +declare i64 @llvm.experimental.constrained.fptoui.i64.f16(half, metadata) +declare half @llvm.experimental.constrained.sitofp.f16.i32(i32, metadata, metadata) +declare half @llvm.experimental.constrained.uitofp.f16.i32(i32, metadata, metadata) +declare half @llvm.experimental.constrained.sitofp.f16.i64(i64, metadata, metadata) +declare half @llvm.experimental.constrained.uitofp.f16.i64(i64, metadata, metadata) +declare half @llvm.experimental.constrained.sitofp.f16.i128(i128, metadata, metadata) +declare half @llvm.experimental.constrained.uitofp.f16.i128(i128, metadata, metadata) +declare half @llvm.experimental.constrained.sqrt.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.powi.f16(half, i32, metadata, metadata) +declare half @llvm.experimental.constrained.sin.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.cos.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.pow.f16(half, half, metadata, metadata) +declare half @llvm.experimental.constrained.log.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.log10.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.log2.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.exp.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.exp2.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.rint.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.nearbyint.f16(half, metadata, metadata) +declare i32 @llvm.experimental.constrained.lrint.f16(half, metadata, metadata) +declare i64 @llvm.experimental.constrained.llrint.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.maxnum.f16(half, half, metadata) +declare half @llvm.experimental.constrained.minnum.f16(half, half, metadata) +declare half @llvm.experimental.constrained.ceil.f16(half, metadata) +declare half @llvm.experimental.constrained.floor.f16(half, metadata) +declare i32 @llvm.experimental.constrained.lround.f16(half, metadata) +declare i64 @llvm.experimental.constrained.llround.f16(half, metadata) +declare half @llvm.experimental.constrained.round.f16(half, metadata) +declare half @llvm.experimental.constrained.roundeven.f16(half, metadata) +declare half @llvm.experimental.constrained.trunc.f16(half, metadata) +declare i1 @llvm.experimental.constrained.fcmps.f16(half, half, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmp.f16(half, half, metadata, metadata) + declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata) declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata) @@ -1576,9 +2280,11 @@ declare i1 @llvm.experimental.constrained.fcmps.f128(fp128, fp128, metadata, metadata) declare i1 @llvm.experimental.constrained.fcmp.f128(fp128, fp128, metadata, metadata) +declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata) declare float @llvm.experimental.constrained.fptrunc.f32.f128(fp128, metadata, metadata) declare double @llvm.experimental.constrained.fptrunc.f64.f128(fp128, metadata, metadata) +declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata) declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata) declare fp128 @llvm.experimental.constrained.fpext.f128.f32(float, metadata) declare fp128 @llvm.experimental.constrained.fpext.f128.f64(double, metadata)