Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -91,6 +91,9 @@ SETCC_MERGE_ZERO, + //Unary Floating Point Operations + FRINTP_PRED, + // Arithmetic instructions which write flags. ADDS, SUBS, @@ -866,7 +869,7 @@ SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const; SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, - unsigned NewOp) const; + unsigned NewOp, bool Merge = false) const; SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const; Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -956,6 +956,7 @@ setOperationAction(ISD::FMA, VT, Custom); setOperationAction(ISD::FMUL, VT, Custom); setOperationAction(ISD::FSUB, VT, Custom); + setOperationAction(ISD::FCEIL, VT, Custom); } } @@ -1398,6 +1399,7 @@ MAKE_CASE(AArch64ISD::ADD_PRED) MAKE_CASE(AArch64ISD::SDIV_PRED) MAKE_CASE(AArch64ISD::UDIV_PRED) + MAKE_CASE(AArch64ISD::FRINTP_PRED) MAKE_CASE(AArch64ISD::SMIN_MERGE_OP1) MAKE_CASE(AArch64ISD::UMIN_MERGE_OP1) MAKE_CASE(AArch64ISD::SMAX_MERGE_OP1) @@ -3539,6 +3541,8 @@ return LowerToPredicatedOp(Op, DAG, AArch64ISD::SDIV_PRED); case ISD::UDIV: return LowerToPredicatedOp(Op, DAG, AArch64ISD::UDIV_PRED); + case ISD::FCEIL: + return LowerToPredicatedOp(Op, DAG, AArch64ISD::FRINTP_PRED, true); case ISD::SMIN: return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_MERGE_OP1); case ISD::UMIN: @@ -15184,7 +15188,8 @@ SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, - unsigned NewOp) const { + unsigned NewOp, + bool Merge) const { EVT VT = Op.getValueType(); SDLoc DL(Op); auto Pg = getPredicateForVector(DAG, DL, VT); @@ -15212,6 +15217,8 @@ assert(VT.isScalableVector() && "Only expect to lower scalable vector op!"); SmallVector Operands = {Pg}; + if(Merge) + Operands.insert(Operands.begin(), DAG.getUNDEF(VT)); for (const SDValue &V : Op->op_values()) { assert((isa(V) || V.getValueType().isScalableVector()) && "Only scalable vectors are supported!"); Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -162,6 +162,9 @@ def AArch64lasta : SDNode<"AArch64ISD::LASTA", SDT_AArch64Reduce>; def AArch64lastb : SDNode<"AArch64ISD::LASTB", SDT_AArch64Reduce>; +def SDT_AArch64UnaryOp : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; +def AArch64frintp_p : SDNode<"AArch64ISD::FRINTP_PRED", SDT_AArch64UnaryOp>; + def SDT_AArch64Arith : SDTypeProfile<1, 3, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>, SDTCVecEltisVT<1,i1>, SDTCisSameAs<2,3> @@ -1381,7 +1384,7 @@ defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64, int_aarch64_sve_fcvtzu, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>; defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn", int_aarch64_sve_frintn>; - defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", int_aarch64_sve_frintp>; + defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", int_aarch64_sve_frintp, AArch64frintp_p>; defm FRINTM_ZPmZ : sve_fp_2op_p_zd_HSD<0b00010, "frintm", int_aarch64_sve_frintm>; defm FRINTZ_ZPmZ : sve_fp_2op_p_zd_HSD<0b00011, "frintz", int_aarch64_sve_frintz>; defm FRINTA_ZPmZ : sve_fp_2op_p_zd_HSD<0b00100, "frinta", int_aarch64_sve_frinta>; Index: llvm/lib/Target/AArch64/SVEInstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/SVEInstrFormats.td +++ llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -2267,7 +2267,8 @@ def : SVE_3_Op_Pat(NAME)>; } -multiclass sve_fp_2op_p_zd_HSD opc, string asm, SDPatternOperator op> { +multiclass sve_fp_2op_p_zd_HSD opc, string asm, SDPatternOperator op, + SDPatternOperator op2 = null_frag> { def _H : sve_fp_2op_p_zd<{ 0b01, opc }, asm, ZPR16, ZPR16, ElementSizeH>; def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32, ElementSizeS>; def _D : sve_fp_2op_p_zd<{ 0b11, opc }, asm, ZPR64, ZPR64, ElementSizeD>; @@ -2275,6 +2276,13 @@ def : SVE_3_Op_Pat(NAME # _H)>; def : SVE_3_Op_Pat(NAME # _S)>; def : SVE_3_Op_Pat(NAME # _D)>; + + def : Pat<(nxv8f16 (op2 nxv8f16:$Op1)), + (!cast(NAME # _H) (IMPLICIT_DEF), (PTRUE_H 31), $Op1)>; + def : Pat<(nxv4f32 (op2 nxv4f32:$Op1)), + (!cast(NAME # _S) (IMPLICIT_DEF), (PTRUE_S 31), $Op1)>; + def : Pat<(nxv2f64 (op2 nxv2f64:$Op1)), + (!cast(NAME # _D) (IMPLICIT_DEF), (PTRUE_D 31), $Op1)>; } multiclass sve2_fp_flogb { Index: llvm/test/CodeGen/AArch64/sve-fp.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fp.ll +++ llvm/test/CodeGen/AArch64/sve-fp.ll @@ -408,6 +408,35 @@ ret void } +; FCEIL + +define @frintp_nxv8f16( %a) { +; CHECK-LABEL: frintp_nxv8f16: +; CHECK: ptrue p0.h +; CHECK-NEXT: frintp z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.ceil.nxv8f16( %a) + ret %res +} + +define @frintp_nxv4f32( %a) { +; CHECK-LABEL: frintp_nxv4f32: +; CHECK: ptrue p0.s +; CHECK-NEXT: frintp z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = call @llvm.ceil.nxv4f32( %a) + ret %res +} + +define @frintp_nxv2f64( %a) { +; CHECK-LABEL: frintp_nxv2f64: +; CHECK: ptrue p0.d +; CHECK-NEXT: frintp z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %res = call @llvm.ceil.nxv2f64( %a) + ret %res +} + declare @llvm.aarch64.sve.frecps.x.nxv8f16(, ) declare @llvm.aarch64.sve.frecps.x.nxv4f32( , ) declare @llvm.aarch64.sve.frecps.x.nxv2f64(, ) @@ -423,5 +452,9 @@ declare @llvm.fma.nxv4f16(, , ) declare @llvm.fma.nxv2f16(, , ) +declare @llvm.ceil.nxv8f16( ) +declare @llvm.ceil.nxv4f32() +declare @llvm.ceil.nxv2f64() + ; Function Attrs: nounwind readnone declare double @llvm.aarch64.sve.faddv.nxv2f64(, ) #2