diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -10620,17 +10620,23 @@ } case NEON::BI__builtin_neon_vrndnh_f16: { Ops.push_back(EmitScalarExpr(E->getArg(0))); - Int = Intrinsic::aarch64_neon_frintn; + Int = Builder.getIsFPConstrained() + ? Intrinsic::experimental_constrained_roundeven + : Intrinsic::roundeven; return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn"); } case NEON::BI__builtin_neon_vrndn_v: case NEON::BI__builtin_neon_vrndnq_v: { - Int = Intrinsic::aarch64_neon_frintn; + Int = Builder.getIsFPConstrained() + ? Intrinsic::experimental_constrained_roundeven + : Intrinsic::roundeven; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn"); } case NEON::BI__builtin_neon_vrndns_f32: { Ops.push_back(EmitScalarExpr(E->getArg(0))); - Int = Intrinsic::aarch64_neon_frintn; + Int = Builder.getIsFPConstrained() + ? Intrinsic::experimental_constrained_roundeven + : Intrinsic::roundeven; return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn"); } case NEON::BI__builtin_neon_vrndph_f16: { diff --git a/clang/test/CodeGen/aarch64-neon-intrinsics.c b/clang/test/CodeGen/aarch64-neon-intrinsics.c --- a/clang/test/CodeGen/aarch64-neon-intrinsics.c +++ b/clang/test/CodeGen/aarch64-neon-intrinsics.c @@ -18155,7 +18155,7 @@ // CHECK-LABEL: @test_vrndn_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[VRNDN1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frintn.v1f64(<1 x double> %a) +// CHECK: [[VRNDN1_I:%.*]] = call <1 x double> @llvm.roundeven.v1f64(<1 x double> %a) // CHECK: ret <1 x double> [[VRNDN1_I]] float64x1_t test_vrndn_f64(float64x1_t a) { return vrndn_f64(a); diff --git a/clang/test/CodeGen/aarch64-neon-misc.c b/clang/test/CodeGen/aarch64-neon-misc.c --- a/clang/test/CodeGen/aarch64-neon-misc.c +++ b/clang/test/CodeGen/aarch64-neon-misc.c @@ -2287,7 +2287,7 @@ // CHECK-LABEL: @test_vrndnq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VRNDN1_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double> %a) +// CHECK: [[VRNDN1_I:%.*]] = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %a) // CHECK: ret <2 x double> [[VRNDN1_I]] float64x2_t test_vrndnq_f64(float64x2_t a) { return vrndnq_f64(a); diff --git a/clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c b/clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c --- a/clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c +++ b/clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c @@ -366,7 +366,7 @@ } // CHECK-LABEL: test_vrndnh_f16 -// CHECK: [[RND:%.*]] = call half @llvm.aarch64.neon.frintn.f16(half %a) +// CHECK: [[RND:%.*]] = call half @llvm.roundeven.f16(half %a) // CHECK: ret half [[RND]] float16_t test_vrndnh_f16(float16_t a) { return vrndnh_f16(a); diff --git a/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c b/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c --- a/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c +++ b/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c @@ -348,14 +348,14 @@ } // CHECK-LABEL: test_vrndn_f16 -// CHECK: [[RND:%.*]] = call <4 x half> @llvm.aarch64.neon.frintn.v4f16(<4 x half> %a) +// CHECK: [[RND:%.*]] = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %a) // CHECK: ret <4 x half> [[RND]] float16x4_t test_vrndn_f16(float16x4_t a) { return vrndn_f16(a); } // CHECK-LABEL: test_vrndnq_f16 -// CHECK: [[RND:%.*]] = call <8 x half> @llvm.aarch64.neon.frintn.v8f16(<8 x half> %a) +// CHECK: [[RND:%.*]] = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %a) // CHECK: ret <8 x half> [[RND]] float16x8_t test_vrndnq_f16(float16x8_t a) { return vrndnq_f16(a); diff --git a/clang/test/CodeGen/arm-neon-directed-rounding.c b/clang/test/CodeGen/arm-neon-directed-rounding.c --- a/clang/test/CodeGen/arm-neon-directed-rounding.c +++ b/clang/test/CodeGen/arm-neon-directed-rounding.c @@ -41,7 +41,7 @@ // CHECK-LABEL: define{{.*}} <2 x float> @test_vrndn_f32(<2 x float> %a) // CHECK-A32: [[VRNDN_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintn.v2f32(<2 x float> %a) -// CHECK-A64: [[VRNDN_V1_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float> %a) +// CHECK-A64: [[VRNDN_V1_I:%.*]] = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %a) // CHECK: ret <2 x float> [[VRNDN_V1_I]] float32x2_t test_vrndn_f32(float32x2_t a) { return vrndn_f32(a); @@ -49,7 +49,7 @@ // CHECK-LABEL: define{{.*}} <4 x float> @test_vrndnq_f32(<4 x float> %a) // CHECK-A32: [[VRNDNQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintn.v4f32(<4 x float> %a) -// CHECK-A64: [[VRNDNQ_V1_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float> %a) +// CHECK-A64: [[VRNDNQ_V1_I:%.*]] = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %a) // CHECK: ret <4 x float> [[VRNDNQ_V1_I]] float32x4_t test_vrndnq_f32(float32x4_t a) { return vrndnq_f32(a); @@ -105,7 +105,7 @@ // CHECK-LABEL: define{{.*}} float @test_vrndns_f32(float %a) // CHECK-A32: [[VRNDN_I:%.*]] = call float @llvm.arm.neon.vrintn.f32(float %a) -// CHECK-A64: [[VRNDN_I:%.*]] = call float @llvm.aarch64.neon.frintn.f32(float %a) +// CHECK-A64: [[VRNDN_I:%.*]] = call float @llvm.roundeven.f32(float %a) // CHECK: ret float [[VRNDN_I]] float32_t test_vrndns_f32(float32_t a) { return vrndns_f32(a); diff --git a/clang/test/CodeGen/arm64-vrnd.c b/clang/test/CodeGen/arm64-vrnd.c --- a/clang/test/CodeGen/arm64-vrnd.c +++ b/clang/test/CodeGen/arm64-vrnd.c @@ -6,7 +6,7 @@ // CHECK: call <2 x double> @llvm.trunc.v2f64(<2 x double> float64x2_t rnd9(float64x2_t a) { return vrndnq_f64(a); } -// CHECK: call <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double> +// CHECK: call <2 x double> @llvm.roundeven.v2f64(<2 x double> float64x2_t rnd13(float64x2_t a) { return vrndmq_f64(a); } // CHECK: call <2 x double> @llvm.floor.v2f64(<2 x double> diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -471,10 +471,6 @@ def int_aarch64_neon_fcvtzs : AdvSIMD_FPToIntRounding_Intrinsic; def int_aarch64_neon_fcvtzu : AdvSIMD_FPToIntRounding_Intrinsic; - // Vector FP Rounding: only ties to even is unrepresented by a normal - // intrinsic. - def int_aarch64_neon_frintn : AdvSIMD_1FloatArg_Intrinsic; - // v8.5-A Vector FP Rounding def int_aarch64_neon_frint32x : AdvSIMD_1FloatArg_Intrinsic; def int_aarch64_neon_frint32z : AdvSIMD_1FloatArg_Intrinsic; diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -152,10 +152,10 @@ def SDTFPUnaryOp : SDTypeProfile<1, 1, [ // fneg, fsqrt, etc SDTCisSameAs<0, 1>, SDTCisFP<0> ]>; -def SDTFPRoundOp : SDTypeProfile<1, 1, [ // fround +def SDTFPRoundOp : SDTypeProfile<1, 1, [ // fpround SDTCisFP<0>, SDTCisFP<1>, SDTCisOpSmallerThanOp<0, 1>, SDTCisSameNumEltsAs<0, 1> ]>; -def SDTFPExtendOp : SDTypeProfile<1, 1, [ // fextend +def SDTFPExtendOp : SDTypeProfile<1, 1, [ // fpextend SDTCisFP<0>, SDTCisFP<1>, SDTCisOpSmallerThanOp<1, 0>, SDTCisSameNumEltsAs<0, 1> ]>; def SDTIntToFPOp : SDTypeProfile<1, 1, [ // [su]int_to_fp @@ -486,6 +486,7 @@ def ffloor : SDNode<"ISD::FFLOOR" , SDTFPUnaryOp>; def fnearbyint : SDNode<"ISD::FNEARBYINT" , SDTFPUnaryOp>; def fround : SDNode<"ISD::FROUND" , SDTFPUnaryOp>; +def froundeven : SDNode<"ISD::FROUNDEVEN" , SDTFPUnaryOp>; def lround : SDNode<"ISD::LROUND" , SDTFPToIntOp>; def llround : SDNode<"ISD::LLROUND" , SDTFPToIntOp>; @@ -547,6 +548,8 @@ SDTFPToIntOp, [SDNPHasChain]>; def strict_fround : SDNode<"ISD::STRICT_FROUND", SDTFPUnaryOp, [SDNPHasChain]>; +def strict_froundeven : SDNode<"ISD::STRICT_FROUNDEVEN", + SDTFPUnaryOp, [SDNPHasChain]>; def strict_ftrunc : SDNode<"ISD::STRICT_FTRUNC", SDTFPUnaryOp, [SDNPHasChain]>; def strict_fminnum : SDNode<"ISD::STRICT_FMINNUM", @@ -1414,6 +1417,9 @@ def any_fround : PatFrags<(ops node:$src), [(strict_fround node:$src), (fround node:$src)]>; +def any_froundeven : PatFrags<(ops node:$src), + [(strict_froundeven node:$src), + (froundeven node:$src)]>; def any_ftrunc : PatFrags<(ops node:$src), [(strict_ftrunc node:$src), (ftrunc node:$src)]>; diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -548,6 +548,11 @@ F->arg_begin()->getType()); return true; } + if (Name.startswith("aarch64.neon.frintn")) { + NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::roundeven, + F->arg_begin()->getType()); + return true; + } if (Name.startswith("arm.neon.vclz")) { Type* args[2] = { F->arg_begin()->getType(), diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -605,6 +605,7 @@ setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); setOperationAction(ISD::FRINT, MVT::f16, Promote); setOperationAction(ISD::FROUND, MVT::f16, Promote); + setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote); setOperationAction(ISD::FTRUNC, MVT::f16, Promote); setOperationAction(ISD::FMINNUM, MVT::f16, Promote); setOperationAction(ISD::FMAXNUM, MVT::f16, Promote); @@ -624,6 +625,7 @@ setOperationAction(ISD::FABS, MVT::v4f16, Expand); setOperationAction(ISD::FNEG, MVT::v4f16, Expand); setOperationAction(ISD::FROUND, MVT::v4f16, Expand); + setOperationAction(ISD::FROUNDEVEN, MVT::v4f16, Expand); setOperationAction(ISD::FMA, MVT::v4f16, Expand); setOperationAction(ISD::SETCC, MVT::v4f16, Expand); setOperationAction(ISD::BR_CC, MVT::v4f16, Expand); @@ -648,6 +650,7 @@ setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand); setOperationAction(ISD::FNEG, MVT::v8f16, Expand); setOperationAction(ISD::FROUND, MVT::v8f16, Expand); + setOperationAction(ISD::FROUNDEVEN, MVT::v8f16, Expand); setOperationAction(ISD::FRINT, MVT::v8f16, Expand); setOperationAction(ISD::FSQRT, MVT::v8f16, Expand); setOperationAction(ISD::FSUB, MVT::v8f16, Expand); @@ -667,6 +670,7 @@ setOperationAction(ISD::FRINT, Ty, Legal); setOperationAction(ISD::FTRUNC, Ty, Legal); setOperationAction(ISD::FROUND, Ty, Legal); + setOperationAction(ISD::FROUNDEVEN, Ty, Legal); setOperationAction(ISD::FMINNUM, Ty, Legal); setOperationAction(ISD::FMAXNUM, Ty, Legal); setOperationAction(ISD::FMINIMUM, Ty, Legal); @@ -684,6 +688,7 @@ setOperationAction(ISD::FRINT, MVT::f16, Legal); setOperationAction(ISD::FTRUNC, MVT::f16, Legal); setOperationAction(ISD::FROUND, MVT::f16, Legal); + setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal); setOperationAction(ISD::FMINNUM, MVT::f16, Legal); setOperationAction(ISD::FMAXNUM, MVT::f16, Legal); setOperationAction(ISD::FMINIMUM, MVT::f16, Legal); @@ -943,6 +948,7 @@ setOperationAction(ISD::FPOW, MVT::v1f64, Expand); setOperationAction(ISD::FREM, MVT::v1f64, Expand); setOperationAction(ISD::FROUND, MVT::v1f64, Expand); + setOperationAction(ISD::FROUNDEVEN, MVT::v1f64, Expand); setOperationAction(ISD::FRINT, MVT::v1f64, Expand); setOperationAction(ISD::FSIN, MVT::v1f64, Expand); setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand); @@ -1069,6 +1075,7 @@ setOperationAction(ISD::FRINT, Ty, Legal); setOperationAction(ISD::FTRUNC, Ty, Legal); setOperationAction(ISD::FROUND, Ty, Legal); + setOperationAction(ISD::FROUNDEVEN, Ty, Legal); } if (Subtarget->hasFullFP16()) { @@ -1079,6 +1086,7 @@ setOperationAction(ISD::FRINT, Ty, Legal); setOperationAction(ISD::FTRUNC, Ty, Legal); setOperationAction(ISD::FROUND, Ty, Legal); + setOperationAction(ISD::FROUNDEVEN, Ty, Legal); } } @@ -1403,6 +1411,7 @@ setOperationAction(ISD::FNEG, VT, Custom); setOperationAction(ISD::FRINT, VT, Custom); setOperationAction(ISD::FROUND, VT, Custom); + setOperationAction(ISD::FROUNDEVEN, VT, Custom); setOperationAction(ISD::FSQRT, VT, Custom); setOperationAction(ISD::FSUB, VT, Custom); setOperationAction(ISD::FTRUNC, VT, Custom); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -3796,12 +3796,9 @@ defm FRINTA : SingleOperandFPData<0b1100, "frinta", fround>; defm FRINTI : SingleOperandFPData<0b1111, "frinti", fnearbyint>; defm FRINTM : SingleOperandFPData<0b1010, "frintm", ffloor>; -defm FRINTN : SingleOperandFPData<0b1000, "frintn", int_aarch64_neon_frintn>; +defm FRINTN : SingleOperandFPData<0b1000, "frintn", froundeven>; defm FRINTP : SingleOperandFPData<0b1001, "frintp", fceil>; -def : Pat<(v1f64 (int_aarch64_neon_frintn (v1f64 FPR64:$Rn))), - (FRINTNDr FPR64:$Rn)>; - defm FRINTX : SingleOperandFPData<0b1110, "frintx", frint>; defm FRINTZ : SingleOperandFPData<0b1011, "frintz", ftrunc>; @@ -4090,7 +4087,7 @@ defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", fround>; defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", fnearbyint>; defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", ffloor>; -defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", int_aarch64_neon_frintn>; +defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", froundeven>; defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", fceil>; defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", frint>; defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", ftrunc>; diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt.ll --- a/llvm/test/CodeGen/AArch64/arm64-vcvt.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vcvt.ll @@ -590,7 +590,7 @@ ;CHECK-NOT: ld1 ;CHECK: frintn.2s v0, v0 ;CHECK-NEXT: ret - %tmp3 = call <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float> %A) + %tmp3 = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %A) ret <2 x float> %tmp3 } @@ -599,7 +599,7 @@ ;CHECK-NOT: ld1 ;CHECK: frintn.4s v0, v0 ;CHECK-NEXT: ret - %tmp3 = call <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float> %A) + %tmp3 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %A) ret <4 x float> %tmp3 } @@ -608,13 +608,13 @@ ;CHECK-NOT: ld1 ;CHECK: frintn.2d v0, v0 ;CHECK-NEXT: ret - %tmp3 = call <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double> %A) + %tmp3 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %A) ret <2 x double> %tmp3 } -declare <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float>) nounwind readnone -declare <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float>) nounwind readnone -declare <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double>) nounwind readnone +declare <2 x float> @llvm.roundeven.v2f32(<2 x float>) nounwind readnone +declare <4 x float> @llvm.roundeven.v4f32(<4 x float>) nounwind readnone +declare <2 x double> @llvm.roundeven.v2f64(<2 x double>) nounwind readnone ; FALLBACK-NOT: remark{{.*}}frintp_2s define <2 x float> @frintp_2s(<2 x float> %A) nounwind { diff --git a/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll b/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll --- a/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll @@ -245,6 +245,20 @@ %1 = call %v4f16 @llvm.round.v4f16(%v4f16 %a) ret %v4f16 %1 } +define %v4f16 @test_v4f16.roundeven(%v4f16 %a) { + ; CHECK-LABEL: test_v4f16.roundeven: + ; CHECK-NOFP16-COUNT-4: frintn s{{[0-9]+}}, s{{[0-9]+}} + ; CHECK-FP16-NOT: fcvt + ; CHECK-FP16: frintn.4h + ; CHECK-FP16-NEXT: ret + ; GISEL-LABEL: test_v4f16.roundeven: + ; GISEL-NOFP16-COUNT-4: frintn s{{[0-9]+}}, s{{[0-9]+}} + ; GISEL-FP16-NOT: fcvt + ; GISEL-FP16: frintn.4h + ; GISEL-FP16-NEXT: ret + %1 = call %v4f16 @llvm.roundeven.v4f16(%v4f16 %a) + ret %v4f16 %1 +} declare %v4f16 @llvm.sqrt.v4f16(%v4f16) #0 declare %v4f16 @llvm.powi.v4f16(%v4f16, i32) #0 @@ -264,6 +278,7 @@ declare %v4f16 @llvm.rint.v4f16(%v4f16) #0 declare %v4f16 @llvm.nearbyint.v4f16(%v4f16) #0 declare %v4f16 @llvm.round.v4f16(%v4f16) #0 +declare %v4f16 @llvm.roundeven.v4f16(%v4f16) #0 ;;; @@ -502,6 +517,20 @@ %1 = call %v8f16 @llvm.round.v8f16(%v8f16 %a) ret %v8f16 %1 } +define %v8f16 @test_v8f16.roundeven(%v8f16 %a) { + ; CHECK-LABEL: test_v8f16.roundeven: + ; CHECK-NOFP16-COUNT-8: frintn s{{[0-9]+}}, s{{[0-9]+}} + ; CHECK-FP16-NOT: fcvt + ; CHECK-FP16: frintn.8h + ; CHECK-FP16-NEXT: ret + ; GISEL-LABEL: test_v8f16.roundeven: + ; GISEL-NOFP16-COUNT-8: frintn s{{[0-9]+}}, s{{[0-9]+}} + ; GISEL-FP16-NOT: fcvt + ; GISEL-FP16: frintn.8h + ; GISEL-FP16-NEXT: ret + %1 = call %v8f16 @llvm.roundeven.v8f16(%v8f16 %a) + ret %v8f16 %1 +} declare %v8f16 @llvm.sqrt.v8f16(%v8f16) #0 declare %v8f16 @llvm.powi.v8f16(%v8f16, i32) #0 @@ -521,6 +550,7 @@ declare %v8f16 @llvm.rint.v8f16(%v8f16) #0 declare %v8f16 @llvm.nearbyint.v8f16(%v8f16) #0 declare %v8f16 @llvm.round.v8f16(%v8f16) #0 +declare %v8f16 @llvm.roundeven.v8f16(%v8f16) #0 ;;; Float vectors diff --git a/llvm/test/CodeGen/AArch64/f16-instructions.ll b/llvm/test/CodeGen/AArch64/f16-instructions.ll --- a/llvm/test/CodeGen/AArch64/f16-instructions.ll +++ b/llvm/test/CodeGen/AArch64/f16-instructions.ll @@ -796,6 +796,7 @@ declare half @llvm.rint.f16(half %a) #0 declare half @llvm.nearbyint.f16(half %a) #0 declare half @llvm.round.f16(half %a) #0 +declare half @llvm.roundeven.f16(half %a) #0 declare half @llvm.fmuladd.f16(half %a, half %b, half %c) #0 declare half @llvm.aarch64.neon.frecpe.f16(half %a) #0 declare half @llvm.aarch64.neon.frecpx.f16(half %a) #0 @@ -1313,6 +1314,32 @@ ret half %r } +; CHECK-CVT-LABEL: test_roundeven: +; CHECK-CVT-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0 +; CHECK-CVT-NEXT: frintn [[INT32:s[0-9]+]], [[FLOAT32]] +; CHECK-CVT-NEXT: fcvt h0, [[INT32]] +; CHECK-CVT-NEXT: ret + +; GISEL-CVT-LABEL: test_roundeven: +; GISEL-CVT-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0 +; GISEL-CVT-NEXT: frintn [[INT32:s[0-9]+]], [[FLOAT32]] +; GISEL-CVT-NEXT: fcvt h0, [[INT32]] +; GISEL-CVT-NEXT: ret + + +; CHECK-FP16-LABEL: test_roundeven: +; CHECK-FP16-NEXT: frintn h0, h0 +; CHECK-FP16-NEXT: ret + +; GISEL-FP16-LABEL: test_roundeven: +; GISEL-FP16-NEXT: frintn h0, h0 +; GISEL-FP16-NEXT: ret + +define half @test_roundeven(half %a) #0 { + %r = call half @llvm.roundeven.f16(half %a) + ret half %r +} + ; CHECK-CVT-LABEL: test_fmuladd: ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 diff --git a/llvm/test/CodeGen/AArch64/fp-intrinsics.ll b/llvm/test/CodeGen/AArch64/fp-intrinsics.ll --- a/llvm/test/CodeGen/AArch64/fp-intrinsics.ll +++ b/llvm/test/CodeGen/AArch64/fp-intrinsics.ll @@ -266,6 +266,13 @@ ret float %val } +; CHECK-LABEL: roundeven_f32: +; CHECK: frintn s0, s0 +define float @roundeven_f32(float %x) #0 { + %val = call float @llvm.experimental.constrained.roundeven.f32(float %x, metadata !"fpexcept.strict") #0 + ret float %val +} + ; CHECK-LABEL: trunc_f32: ; CHECK: frintz s0, s0 define float @trunc_f32(float %x) #0 { @@ -729,6 +736,13 @@ ret double %val } +; CHECK-LABEL: roundeven_f64: +; CHECK: frintn d0, d0 +define double @roundeven_f64(double %x) #0 { + %val = call double @llvm.experimental.constrained.roundeven.f64(double %x, metadata !"fpexcept.strict") #0 + ret double %val +} + ; CHECK-LABEL: trunc_f64: ; CHECK: frintz d0, d0 define double @trunc_f64(double %x) #0 { @@ -1474,6 +1488,7 @@ declare i32 @llvm.experimental.constrained.lround.f32(float, metadata) declare i64 @llvm.experimental.constrained.llround.f32(float, metadata) declare float @llvm.experimental.constrained.round.f32(float, metadata) +declare float @llvm.experimental.constrained.roundeven.f32(float, metadata) declare float @llvm.experimental.constrained.trunc.f32(float, metadata) declare i1 @llvm.experimental.constrained.fcmps.f32(float, float, metadata, metadata) declare i1 @llvm.experimental.constrained.fcmp.f32(float, float, metadata, metadata) @@ -1515,6 +1530,7 @@ declare i32 @llvm.experimental.constrained.lround.f64(double, metadata) declare i64 @llvm.experimental.constrained.llround.f64(double, metadata) declare double @llvm.experimental.constrained.round.f64(double, metadata) +declare double @llvm.experimental.constrained.roundeven.f64(double, metadata) declare double @llvm.experimental.constrained.trunc.f64(double, metadata) declare i1 @llvm.experimental.constrained.fcmps.f64(double, double, metadata, metadata) declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, metadata) diff --git a/llvm/test/CodeGen/AArch64/frintn.ll b/llvm/test/CodeGen/AArch64/frintn.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/frintn.ll @@ -0,0 +1,41 @@ +; RUN: llc -mtriple=aarch64-eabi -mattr=+fullfp16 %s -o - | FileCheck %s + +; The llvm.aarch64.neon.frintn intrinsic should be auto-upgraded to the +; target-independent roundeven intrinsic. + +define <4 x half> @frintn_4h(<4 x half> %A) nounwind { +;CHECK-LABEL: frintn_4h: +;CHECK: frintn v0.4h, v0.4h +;CHECK-NEXT: ret + %tmp3 = call <4 x half> @llvm.aarch64.neon.frintn.v4f16(<4 x half> %A) + ret <4 x half> %tmp3 +} + +define <2 x float> @frintn_2s(<2 x float> %A) nounwind { +;CHECK-LABEL: frintn_2s: +;CHECK: frintn v0.2s, v0.2s +;CHECK-NEXT: ret + %tmp3 = call <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float> %A) + ret <2 x float> %tmp3 +} + +define <4 x float> @frintn_4s(<4 x float> %A) nounwind { +;CHECK-LABEL: frintn_4s: +;CHECK: frintn v0.4s, v0.4s +;CHECK-NEXT: ret + %tmp3 = call <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float> %A) + ret <4 x float> %tmp3 +} + +define <2 x double> @frintn_2d(<2 x double> %A) nounwind { +;CHECK-LABEL: frintn_2d: +;CHECK: frintn v0.2d, v0.2d +;CHECK-NEXT: ret + %tmp3 = call <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double> %A) + ret <2 x double> %tmp3 +} + +declare <4 x half> @llvm.aarch64.neon.frintn.v4f16(<4 x half>) nounwind readnone +declare <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float>) nounwind readnone +declare <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float>) nounwind readnone +declare <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double>) nounwind readnone diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll @@ -1255,6 +1255,253 @@ ret void } +; +; ROUNDEVEN -> FRINTN +; + +; Don't use SVE for 64-bit vectors. +define <4 x half> @frintn_v4f16(<4 x half> %op) #0 { +; CHECK-LABEL: frintn_v4f16: +; CHECK: frintn v0.4h, v0.4h +; CHECK-NEXT: ret + %res = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %op) + ret <4 x half> %res +} + +; Don't use SVE for 128-bit vectors. +define <8 x half> @frintn_v8f16(<8 x half> %op) #0 { +; CHECK-LABEL: frintn_v8f16: +; CHECK: frintn v0.8h, v0.8h +; CHECK-NEXT: ret + %res = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %op) + ret <8 x half> %res +} + +define void @frintn_v16f16(<16 x half>* %a) #0 { +; CHECK-LABEL: frintn_v16f16: +; CHECK: ptrue [[PG:p[0-9]+]].h, vl16 +; CHECK-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; CHECK-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; CHECK-NEXT: ret + %op = load <16 x half>, <16 x half>* %a + %res = call <16 x half> @llvm.roundeven.v16f16(<16 x half> %op) + store <16 x half> %res, <16 x half>* %a + ret void +} + +define void @frintn_v32f16(<32 x half>* %a) #0 { +; CHECK-LABEL: frintn_v32f16: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32 +; VBITS_GE_512-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_512-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: frintn [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h +; VBITS_EQ_256-DAG: frintn [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h +; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-NEXT: ret + %op = load <32 x half>, <32 x half>* %a + %res = call <32 x half> @llvm.roundeven.v32f16(<32 x half> %op) + store <32 x half> %res, <32 x half>* %a + ret void +} + +define void @frintn_v64f16(<64 x half>* %a) #0 { +; CHECK-LABEL: frintn_v64f16: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64 +; VBITS_GE_1024-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; VBITS_GE_1024-NEXT: ret + %op = load <64 x half>, <64 x half>* %a + %res = call <64 x half> @llvm.roundeven.v64f16(<64 x half> %op) + store <64 x half> %res, <64 x half>* %a + ret void +} + +define void @frintn_v128f16(<128 x half>* %a) #0 { +; CHECK-LABEL: frintn_v128f16: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128 +; VBITS_GE_2048-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h +; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; VBITS_GE_2048-NEXT: ret + %op = load <128 x half>, <128 x half>* %a + %res = call <128 x half> @llvm.roundeven.v128f16(<128 x half> %op) + store <128 x half> %res, <128 x half>* %a + ret void +} + +; Don't use SVE for 64-bit vectors. +define <2 x float> @frintn_v2f32(<2 x float> %op) #0 { +; CHECK-LABEL: frintn_v2f32: +; CHECK: frintn v0.2s, v0.2s +; CHECK-NEXT: ret + %res = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %op) + ret <2 x float> %res +} + +; Don't use SVE for 128-bit vectors. +define <4 x float> @frintn_v4f32(<4 x float> %op) #0 { +; CHECK-LABEL: frintn_v4f32: +; CHECK: frintn v0.4s, v0.4s +; CHECK-NEXT: ret + %res = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %op) + ret <4 x float> %res +} + +define void @frintn_v8f32(<8 x float>* %a) #0 { +; CHECK-LABEL: frintn_v8f32: +; CHECK: ptrue [[PG:p[0-9]+]].s, vl8 +; CHECK-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; CHECK-NEXT: frintn [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; CHECK-NEXT: ret + %op = load <8 x float>, <8 x float>* %a + %res = call <8 x float> @llvm.roundeven.v8f32(<8 x float> %op) + store <8 x float> %res, <8 x float>* %a + ret void +} + +define void @frintn_v16f32(<16 x float>* %a) #0 { +; CHECK-LABEL: frintn_v16f32: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16 +; VBITS_GE_512-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_512-NEXT: frintn [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: frintn [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s +; VBITS_EQ_256-DAG: frintn [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s +; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-NEXT: ret + %op = load <16 x float>, <16 x float>* %a + %res = call <16 x float> @llvm.roundeven.v16f32(<16 x float> %op) + store <16 x float> %res, <16 x float>* %a + ret void +} + +define void @frintn_v32f32(<32 x float>* %a) #0 { +; CHECK-LABEL: frintn_v32f32: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32 +; VBITS_GE_1024-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: frintn [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; VBITS_GE_1024-NEXT: ret + %op = load <32 x float>, <32 x float>* %a + %res = call <32 x float> @llvm.roundeven.v32f32(<32 x float> %op) + store <32 x float> %res, <32 x float>* %a + ret void +} + +define void @frintn_v64f32(<64 x float>* %a) #0 { +; CHECK-LABEL: frintn_v64f32: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64 +; VBITS_GE_2048-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: frintn [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s +; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; VBITS_GE_2048-NEXT: ret + %op = load <64 x float>, <64 x float>* %a + %res = call <64 x float> @llvm.roundeven.v64f32(<64 x float> %op) + store <64 x float> %res, <64 x float>* %a + ret void +} + +; Don't use SVE for 64-bit vectors. +define <1 x double> @frintn_v1f64(<1 x double> %op) #0 { +; CHECK-LABEL: frintn_v1f64: +; CHECK: frintn d0, d0 +; CHECK-NEXT: ret + %res = call <1 x double> @llvm.roundeven.v1f64(<1 x double> %op) + ret <1 x double> %res +} + +; Don't use SVE for 128-bit vectors. +define <2 x double> @frintn_v2f64(<2 x double> %op) #0 { +; CHECK-LABEL: frintn_v2f64: +; CHECK: frintn v0.2d, v0.2d +; CHECK-NEXT: ret + %res = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %op) + ret <2 x double> %res +} + +define void @frintn_v4f64(<4 x double>* %a) #0 { +; CHECK-LABEL: frintn_v4f64: +; CHECK: ptrue [[PG:p[0-9]+]].d, vl4 +; CHECK-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; CHECK-NEXT: frintn [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; CHECK-NEXT: ret + %op = load <4 x double>, <4 x double>* %a + %res = call <4 x double> @llvm.roundeven.v4f64(<4 x double> %op) + store <4 x double> %res, <4 x double>* %a + ret void +} + +define void @frintn_v8f64(<8 x double>* %a) #0 { +; CHECK-LABEL: frintn_v8f64: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8 +; VBITS_GE_512-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_512-NEXT: frintn [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: frintn [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d +; VBITS_EQ_256-DAG: frintn [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d +; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-NEXT: ret + %op = load <8 x double>, <8 x double>* %a + %res = call <8 x double> @llvm.roundeven.v8f64(<8 x double> %op) + store <8 x double> %res, <8 x double>* %a + ret void +} + +define void @frintn_v16f64(<16 x double>* %a) #0 { +; CHECK-LABEL: frintn_v16f64: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16 +; VBITS_GE_1024-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: frintn [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; VBITS_GE_1024-NEXT: ret + %op = load <16 x double>, <16 x double>* %a + %res = call <16 x double> @llvm.roundeven.v16f64(<16 x double> %op) + store <16 x double> %res, <16 x double>* %a + ret void +} + +define void @frintn_v32f64(<32 x double>* %a) #0 { +; CHECK-LABEL: frintn_v32f64: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32 +; VBITS_GE_2048-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: frintn [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d +; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; VBITS_GE_2048-NEXT: ret + %op = load <32 x double>, <32 x double>* %a + %res = call <32 x double> @llvm.roundeven.v32f64(<32 x double> %op) + store <32 x double> %res, <32 x double>* %a + ret void +} + ; ; TRUNC -> FRINTZ ; @@ -1599,6 +1846,25 @@ declare <16 x double> @llvm.round.v16f64(<16 x double>) declare <32 x double> @llvm.round.v32f64(<32 x double>) +declare <4 x half> @llvm.roundeven.v4f16(<4 x half>) +declare <8 x half> @llvm.roundeven.v8f16(<8 x half>) +declare <16 x half> @llvm.roundeven.v16f16(<16 x half>) +declare <32 x half> @llvm.roundeven.v32f16(<32 x half>) +declare <64 x half> @llvm.roundeven.v64f16(<64 x half>) +declare <128 x half> @llvm.roundeven.v128f16(<128 x half>) +declare <2 x float> @llvm.roundeven.v2f32(<2 x float>) +declare <4 x float> @llvm.roundeven.v4f32(<4 x float>) +declare <8 x float> @llvm.roundeven.v8f32(<8 x float>) +declare <16 x float> @llvm.roundeven.v16f32(<16 x float>) +declare <32 x float> @llvm.roundeven.v32f32(<32 x float>) +declare <64 x float> @llvm.roundeven.v64f32(<64 x float>) +declare <1 x double> @llvm.roundeven.v1f64(<1 x double>) +declare <2 x double> @llvm.roundeven.v2f64(<2 x double>) +declare <4 x double> @llvm.roundeven.v4f64(<4 x double>) +declare <8 x double> @llvm.roundeven.v8f64(<8 x double>) +declare <16 x double> @llvm.roundeven.v16f64(<16 x double>) +declare <32 x double> @llvm.roundeven.v32f64(<32 x double>) + declare <4 x half> @llvm.trunc.v4f16(<4 x half>) declare <8 x half> @llvm.trunc.v8f16(<8 x half>) declare <16 x half> @llvm.trunc.v16f16(<16 x half>) diff --git a/llvm/test/CodeGen/AArch64/vec-libcalls.ll b/llvm/test/CodeGen/AArch64/vec-libcalls.ll --- a/llvm/test/CodeGen/AArch64/vec-libcalls.ll +++ b/llvm/test/CodeGen/AArch64/vec-libcalls.ll @@ -29,6 +29,7 @@ declare <3 x float> @llvm.nearbyint.v3f32(<3 x float>) declare <3 x float> @llvm.rint.v3f32(<3 x float>) declare <3 x float> @llvm.round.v3f32(<3 x float>) +declare <3 x float> @llvm.roundeven.v3f32(<3 x float>) declare <3 x float> @llvm.sqrt.v3f32(<3 x float>) declare <3 x float> @llvm.trunc.v3f32(<3 x float>) @@ -478,6 +479,15 @@ ret <3 x float> %r } +define <3 x float> @roundeven_v3f32(<3 x float> %x) nounwind { +; CHECK-LABEL: roundeven_v3f32: +; CHECK: // %bb.0: +; CHECK-NEXT: frintn v0.4s, v0.4s +; CHECK-NEXT: ret + %r = call <3 x float> @llvm.roundeven.v3f32(<3 x float> %x) + ret <3 x float> %r +} + define <3 x float> @sqrt_v3f32(<3 x float> %x) nounwind { ; CHECK-LABEL: sqrt_v3f32: ; CHECK: // %bb.0: