diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td --- a/clang/include/clang/Basic/arm_neon.td +++ b/clang/include/clang/Basic/arm_neon.td @@ -1232,6 +1232,11 @@ def FRINT32Z_S32 : SInst<"vrnd32z", "..", "fQf">; def FRINT64X_S32 : SInst<"vrnd64x", "..", "fQf">; def FRINT64Z_S32 : SInst<"vrnd64z", "..", "fQf">; + +def FRINT32X_S64 : SInst<"vrnd32x", "..", "dQd">; +def FRINT32Z_S64 : SInst<"vrnd32z", "..", "dQd">; +def FRINT64X_S64 : SInst<"vrnd64x", "..", "dQd">; +def FRINT64Z_S64 : SInst<"vrnd64z", "..", "dQd">; } //////////////////////////////////////////////////////////////////////////////// diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -6372,13 +6372,21 @@ NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType), + NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType), NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType), + NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType), NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType), + NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType), NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType), + NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType), NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType), + NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType), NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType), + NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType), NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType), + NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType), NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType), + NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType), NEONMAP0(vrndi_v), NEONMAP0(vrndiq_v), NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), @@ -11740,25 +11748,33 @@ return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz"); } case NEON::BI__builtin_neon_vrnd32x_f32: - case NEON::BI__builtin_neon_vrnd32xq_f32: { + case NEON::BI__builtin_neon_vrnd32xq_f32: + case NEON::BI__builtin_neon_vrnd32x_f64: + case NEON::BI__builtin_neon_vrnd32xq_f64: { Ops.push_back(EmitScalarExpr(E->getArg(0))); Int = Intrinsic::aarch64_neon_frint32x; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x"); } case NEON::BI__builtin_neon_vrnd32z_f32: - case NEON::BI__builtin_neon_vrnd32zq_f32: { + case NEON::BI__builtin_neon_vrnd32zq_f32: + case NEON::BI__builtin_neon_vrnd32z_f64: + case NEON::BI__builtin_neon_vrnd32zq_f64: { Ops.push_back(EmitScalarExpr(E->getArg(0))); Int = Intrinsic::aarch64_neon_frint32z; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z"); } case NEON::BI__builtin_neon_vrnd64x_f32: - case NEON::BI__builtin_neon_vrnd64xq_f32: { + case NEON::BI__builtin_neon_vrnd64xq_f32: + case NEON::BI__builtin_neon_vrnd64x_f64: + case NEON::BI__builtin_neon_vrnd64xq_f64: { Ops.push_back(EmitScalarExpr(E->getArg(0))); Int = Intrinsic::aarch64_neon_frint64x; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x"); } case NEON::BI__builtin_neon_vrnd64z_f32: - case NEON::BI__builtin_neon_vrnd64zq_f32: { + case NEON::BI__builtin_neon_vrnd64zq_f32: + case NEON::BI__builtin_neon_vrnd64z_f64: + case NEON::BI__builtin_neon_vrnd64zq_f64: { Ops.push_back(EmitScalarExpr(E->getArg(0))); Int = Intrinsic::aarch64_neon_frint64z; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z"); diff --git a/clang/test/CodeGen/aarch64-v8.5a-neon-frint3264-intrinsic.c b/clang/test/CodeGen/aarch64-v8.5a-neon-frint3264-intrinsic.c --- a/clang/test/CodeGen/aarch64-v8.5a-neon-frint3264-intrinsic.c +++ b/clang/test/CodeGen/aarch64-v8.5a-neon-frint3264-intrinsic.c @@ -62,3 +62,59 @@ float32x4_t test_vrnd64zq_f32(float32x4_t a) { return vrnd64zq_f32(a); } + +// CHECK-LABEL: test_vrnd32x_f64 +// CHECK: [[RND:%.*]] = call <1 x double> @llvm.aarch64.neon.frint32x.v1f64(<1 x double> %a) +// CHECK: ret <1 x double> [[RND]] +float64x1_t test_vrnd32x_f64(float64x1_t a) { + return vrnd32x_f64(a); +} + +// CHECK-LABEL: test_vrnd32xq_f64 +// CHECK: [[RND:%.*]] = call <2 x double> @llvm.aarch64.neon.frint32x.v2f64(<2 x double> %a) +// CHECK: ret <2 x double> [[RND]] +float64x2_t test_vrnd32xq_f64(float64x2_t a) { + return vrnd32xq_f64(a); +} + +// CHECK-LABEL: test_vrnd32z_f64 +// CHECK: [[RND:%.*]] = call <1 x double> @llvm.aarch64.neon.frint32z.v1f64(<1 x double> %a) +// CHECK: ret <1 x double> [[RND]] +float64x1_t test_vrnd32z_f64(float64x1_t a) { + return vrnd32z_f64(a); +} + +// CHECK-LABEL: test_vrnd32zq_f64 +// CHECK: [[RND:%.*]] = call <2 x double> @llvm.aarch64.neon.frint32z.v2f64(<2 x double> %a) +// CHECK: ret <2 x double> [[RND]] +float64x2_t test_vrnd32zq_f64(float64x2_t a) { + return vrnd32zq_f64(a); +} + +// CHECK-LABEL: test_vrnd64x_f64 +// CHECK: [[RND:%.*]] = call <1 x double> @llvm.aarch64.neon.frint64x.v1f64(<1 x double> %a) +// CHECK: ret <1 x double> [[RND]] +float64x1_t test_vrnd64x_f64(float64x1_t a) { + return vrnd64x_f64(a); +} + +// CHECK-LABEL: test_vrnd64xq_f64 +// CHECK: [[RND:%.*]] = call <2 x double> @llvm.aarch64.neon.frint64x.v2f64(<2 x double> %a) +// CHECK: ret <2 x double> [[RND]] +float64x2_t test_vrnd64xq_f64(float64x2_t a) { + return vrnd64xq_f64(a); +} + +// CHECK-LABEL: test_vrnd64z_f64 +// CHECK: [[RND:%.*]] = call <1 x double> @llvm.aarch64.neon.frint64z.v1f64(<1 x double> %a) +// CHECK: ret <1 x double> [[RND]] +float64x1_t test_vrnd64z_f64(float64x1_t a) { + return vrnd64z_f64(a); +} + +// CHECK-LABEL: test_vrnd64zq_f64 +// CHECK: [[RND:%.*]] = call <2 x double> @llvm.aarch64.neon.frint64z.v2f64(<2 x double> %a) +// CHECK: ret <2 x double> [[RND]] +float64x2_t test_vrnd64zq_f64(float64x2_t a) { + return vrnd64zq_f64(a); +} diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -6292,24 +6292,27 @@ : SIMDTwoVectorFP; // Supports only S and D element sizes -let mayRaiseFPException = 1, Uses = [FPCR] in -multiclass SIMDTwoVectorSD opc, string asm, +multiclass SIMDTwoVectorSD { - - def v2f32 : BaseSIMDTwoSameVector<0, U, 00, opc, 0b00, V64, + let mayRaiseFPException = 1, Uses = [FPCR] in { + def v2f32 : BaseSIMDTwoSameVector<0, U, 00, {0b1111, opc}, 0b00, V64, asm, ".2s", ".2s", [(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn)))]>; - def v4f32 : BaseSIMDTwoSameVector<1, U, 00, opc, 0b00, V128, + def v4f32 : BaseSIMDTwoSameVector<1, U, 00, {0b1111, opc}, 0b00, V128, asm, ".4s", ".4s", [(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn)))]>; - def v2f64 : BaseSIMDTwoSameVector<1, U, 01, opc, 0b00, V128, + def v2f64 : BaseSIMDTwoSameVector<1, U, 01, {0b1111, opc}, 0b00, V128, asm, ".2d", ".2d", [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>; + } + + def : Pat<(v1f64 (OpNode (v1f64 FPR64:$Rn))), + (!cast(NAME # Dr) FPR64:$Rn)>; } multiclass FRIntNNTVector : - SIMDTwoVectorSD; + SIMDTwoVectorSD; // Supports only S element size. multiclass SIMDTwoVectorS opc, string asm, diff --git a/llvm/test/CodeGen/AArch64/v8.5a-neon-frint3264-intrinsic.ll b/llvm/test/CodeGen/AArch64/v8.5a-neon-frint3264-intrinsic.ll --- a/llvm/test/CodeGen/AArch64/v8.5a-neon-frint3264-intrinsic.ll +++ b/llvm/test/CodeGen/AArch64/v8.5a-neon-frint3264-intrinsic.ll @@ -81,3 +81,85 @@ %val = tail call <4 x float> @llvm.aarch64.neon.frint64z.v4f32(<4 x float> %a) ret <4 x float> %val } + +declare <1 x double> @llvm.aarch64.neon.frint32x.v1f64(<1 x double>) +declare <2 x double> @llvm.aarch64.neon.frint32x.v2f64(<2 x double>) +declare <1 x double> @llvm.aarch64.neon.frint32z.v1f64(<1 x double>) +declare <2 x double> @llvm.aarch64.neon.frint32z.v2f64(<2 x double>) + +define dso_local <1 x double> @t_vrnd32x_f64(<1 x double> %a) { +; CHECK-LABEL: t_vrnd32x_f64: +; CHECK: frint32x d0, d0 +; CHECK-NEXT: ret +entry: + %val = tail call <1 x double> @llvm.aarch64.neon.frint32x.v1f64(<1 x double> %a) + ret <1 x double> %val +} + +define dso_local <2 x double> @t_vrnd32xq_f64(<2 x double> %a) { +; CHECK-LABEL: t_vrnd32xq_f64: +; CHECK: frint32x v0.2d, v0.2d +; CHECK-NEXT: ret +entry: + %val = tail call <2 x double> @llvm.aarch64.neon.frint32x.v2f64(<2 x double> %a) + ret <2 x double> %val +} + +define dso_local <1 x double> @t_vrnd32z_f64(<1 x double> %a) { +; CHECK-LABEL: t_vrnd32z_f64: +; CHECK: frint32z d0, d0 +; CHECK-NEXT: ret +entry: + %val = tail call <1 x double> @llvm.aarch64.neon.frint32z.v1f64(<1 x double> %a) + ret <1 x double> %val +} + +define dso_local <2 x double> @t_vrnd32zq_f64(<2 x double> %a) { +; CHECK-LABEL: t_vrnd32zq_f64: +; CHECK: frint32z v0.2d, v0.2d +; CHECK-NEXT: ret +entry: + %val = tail call <2 x double> @llvm.aarch64.neon.frint32z.v2f64(<2 x double> %a) + ret <2 x double> %val +} + +declare <1 x double> @llvm.aarch64.neon.frint64x.v1f64(<1 x double>) +declare <2 x double> @llvm.aarch64.neon.frint64x.v2f64(<2 x double>) +declare <1 x double> @llvm.aarch64.neon.frint64z.v1f64(<1 x double>) +declare <2 x double> @llvm.aarch64.neon.frint64z.v2f64(<2 x double>) + +define dso_local <1 x double> @t_vrnd64x_f64(<1 x double> %a) { +; CHECK-LABEL: t_vrnd64x_f64: +; CHECK: frint64x d0, d0 +; CHECK-NEXT: ret +entry: + %val = tail call <1 x double> @llvm.aarch64.neon.frint64x.v1f64(<1 x double> %a) + ret <1 x double> %val +} + +define dso_local <2 x double> @t_vrnd64xq_f64(<2 x double> %a) { +; CHECK-LABEL: t_vrnd64xq_f64: +; CHECK: frint64x v0.2d, v0.2d +; CHECK-NEXT: ret +entry: + %val = tail call <2 x double> @llvm.aarch64.neon.frint64x.v2f64(<2 x double> %a) + ret <2 x double> %val +} + +define dso_local <1 x double> @t_vrnd64z_f64(<1 x double> %a) { +; CHECK-LABEL: t_vrnd64z_f64: +; CHECK: frint64z d0, d0 +; CHECK-NEXT: ret +entry: + %val = tail call <1 x double> @llvm.aarch64.neon.frint64z.v1f64(<1 x double> %a) + ret <1 x double> %val +} + +define dso_local <2 x double> @t_vrnd64zq_f64(<2 x double> %a) { +; CHECK-LABEL: t_vrnd64zq_f64: +; CHECK: frint64z v0.2d, v0.2d +; CHECK-NEXT: ret +entry: + %val = tail call <2 x double> @llvm.aarch64.neon.frint64z.v2f64(<2 x double> %a) + ret <2 x double> %val +}