Index: clang/include/clang/Basic/arm_neon.td =================================================================== --- clang/include/clang/Basic/arm_neon.td +++ clang/include/clang/Basic/arm_neon.td @@ -1210,6 +1210,13 @@ def FRINTI_S64 : SInst<"vrndi", "..", "dQd">; } +let ArchGuard = "__ARM_ARCH >= 8 && defined(__aarch64__) && defined(__ARM_FEATURE_FRINT)" in { +def FRINT32X_S32 : SInst<"vrnd32x", "..", "fQf">; +def FRINT32Z_S32 : SInst<"vrnd32z", "..", "fQf">; +def FRINT64X_S32 : SInst<"vrnd64x", "..", "fQf">; +def FRINT64Z_S32 : SInst<"vrnd64z", "..", "fQf">; +} + //////////////////////////////////////////////////////////////////////////////// // MaxNum/MinNum Floating Point Index: clang/lib/Basic/Targets/AArch64.cpp =================================================================== --- clang/lib/Basic/Targets/AArch64.cpp +++ clang/lib/Basic/Targets/AArch64.cpp @@ -182,6 +182,7 @@ void AArch64TargetInfo::getTargetDefinesARMV85A(const LangOptions &Opts, MacroBuilder &Builder) const { + Builder.defineMacro("__ARM_FEATURE_FRINT", "1"); // Also include the Armv8.4 defines getTargetDefinesARMV84A(Opts, Builder); } Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -5845,6 +5845,14 @@ NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType), NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), + NEONMAP1(vrnd32x_v, aarch64_neon_frint32x, Add1ArgType), + NEONMAP1(vrnd32xq_v, aarch64_neon_frint32x, Add1ArgType), + NEONMAP1(vrnd32z_v, aarch64_neon_frint32z, Add1ArgType), + NEONMAP1(vrnd32zq_v, aarch64_neon_frint32z, Add1ArgType), + NEONMAP1(vrnd64x_v, aarch64_neon_frint64x, Add1ArgType), + NEONMAP1(vrnd64xq_v, aarch64_neon_frint64x, Add1ArgType), + NEONMAP1(vrnd64z_v, aarch64_neon_frint64z, Add1ArgType), + NEONMAP1(vrnd64zq_v, aarch64_neon_frint64z, Add1ArgType), NEONMAP0(vrndi_v), NEONMAP0(vrndiq_v), NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), @@ -10561,6 +10569,30 @@ : Intrinsic::trunc; return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz"); } + case NEON::BI__builtin_neon_vrnd32x_v: + case NEON::BI__builtin_neon_vrnd32xq_v: { + Ops.push_back(EmitScalarExpr(E->getArg(0))); + Int = Intrinsic::aarch64_neon_frint32x; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x"); + } + case NEON::BI__builtin_neon_vrnd32z_v: + case NEON::BI__builtin_neon_vrnd32zq_v: { + Ops.push_back(EmitScalarExpr(E->getArg(0))); + Int = Intrinsic::aarch64_neon_frint32z; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z"); + } + case NEON::BI__builtin_neon_vrnd64x_v: + case NEON::BI__builtin_neon_vrnd64xq_v: { + Ops.push_back(EmitScalarExpr(E->getArg(0))); + Int = Intrinsic::aarch64_neon_frint64x; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x"); + } + case NEON::BI__builtin_neon_vrnd64z_v: + case NEON::BI__builtin_neon_vrnd64zq_v: { + Ops.push_back(EmitScalarExpr(E->getArg(0))); + Int = Intrinsic::aarch64_neon_frint64z; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z"); + } case NEON::BI__builtin_neon_vrnd_v: case NEON::BI__builtin_neon_vrndq_v: { Int = Builder.getIsFPConstrained() Index: clang/test/CodeGen/aarch64-v8.5a-neon-frint3264-intrinsic.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64-v8.5a-neon-frint3264-intrinsic.c @@ -0,0 +1,64 @@ +// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +v8.5a\ +// RUN: -flax-vector-conversions=none -S -disable-O0-optnone -emit-llvm -o - %s \ +// RUN: | opt -S -mem2reg \ +// RUN: | FileCheck %s + +// REQUIRES: aarch64-registered-target + +#include + +// CHECK-LABEL: test_vrnd32x_f32 +// CHECK: [[RND:%.*]] = call <2 x float> @llvm.aarch64.neon.frint32x.v2f32(<2 x float> %a) +// CHECK: ret <2 x float> [[RND]] +float32x2_t test_vrnd32x_f32(float32x2_t a) { + return vrnd32x_f32(a); +} + +// CHECK-LABEL: test_vrnd32xq_f32 +// CHECK: [[RND:%.*]] = call <4 x float> @llvm.aarch64.neon.frint32x.v4f32(<4 x float> %a) +// CHECK: ret <4 x float> [[RND]] +float32x4_t test_vrnd32xq_f32(float32x4_t a) { + return vrnd32xq_f32(a); +} + +// CHECK-LABEL: test_vrnd32z_f32 +// CHECK: [[RND:%.*]] = call <2 x float> @llvm.aarch64.neon.frint32z.v2f32(<2 x float> %a) +// CHECK: ret <2 x float> [[RND]] +float32x2_t test_vrnd32z_f32(float32x2_t a) { + return vrnd32z_f32(a); +} + +// CHECK-LABEL: test_vrnd32zq_f32 +// CHECK: [[RND:%.*]] = call <4 x float> @llvm.aarch64.neon.frint32z.v4f32(<4 x float> %a) +// CHECK: ret <4 x float> [[RND]] +float32x4_t test_vrnd32zq_f32(float32x4_t a) { + return vrnd32zq_f32(a); +} + +// CHECK-LABEL: test_vrnd64x_f32 +// CHECK: [[RND:%.*]] = call <2 x float> @llvm.aarch64.neon.frint64x.v2f32(<2 x float> %a) +// CHECK: ret <2 x float> [[RND]] +float32x2_t test_vrnd64x_f32(float32x2_t a) { + return vrnd64x_f32(a); +} + +// CHECK-LABEL: test_vrnd64xq_f32 +// CHECK: [[RND:%.*]] = call <4 x float> @llvm.aarch64.neon.frint64x.v4f32(<4 x float> %a) +// CHECK: ret <4 x float> [[RND]] +float32x4_t test_vrnd64xq_f32(float32x4_t a) { + return vrnd64xq_f32(a); +} + +// CHECK-LABEL: test_vrnd64z_f32 +// CHECK: [[RND:%.*]] = call <2 x float> @llvm.aarch64.neon.frint64z.v2f32(<2 x float> %a) +// CHECK: ret <2 x float> [[RND]] +float32x2_t test_vrnd64z_f32(float32x2_t a) { + return vrnd64z_f32(a); +} + +// CHECK-LABEL: test_vrnd64zq_f32 +// CHECK: [[RND:%.*]] = call <4 x float> @llvm.aarch64.neon.frint64z.v4f32(<4 x float> %a) +// CHECK: ret <4 x float> [[RND]] +float32x4_t test_vrnd64zq_f32(float32x4_t a) { + return vrnd64zq_f32(a); +} Index: clang/test/Preprocessor/aarch64-target-features.c =================================================================== --- clang/test/Preprocessor/aarch64-target-features.c +++ clang/test/Preprocessor/aarch64-target-features.c @@ -58,6 +58,12 @@ // RUN: %clang -target arm64-none-linux-gnu -march=armv8-a+crypto -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-CRYPTO %s // CHECK-CRYPTO: __ARM_FEATURE_CRYPTO 1 +// RUN: %clang -target aarch64-none-linux-gnu -march=armv8.5-a -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-8_5 %s +// CHECK-8_5: __ARM_FEATURE_FRINT 1 + +// RUN: %clang -target aarch64-none-linux-gnu -march=armv8.4-a -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-8_4 %s +// CHECK-8_4-NOT: __ARM_FEATURE_FRINT 1 + // RUN: %clang -target aarch64-none-linux-gnu -mcrc -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-CRC32 %s // RUN: %clang -target arm64-none-linux-gnu -mcrc -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-CRC32 %s // RUN: %clang -target aarch64-none-linux-gnu -march=armv8-a+crc -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-CRC32 %s Index: llvm/include/llvm/IR/IntrinsicsAArch64.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsAArch64.td +++ llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -462,6 +462,12 @@ // intrinsic. def int_aarch64_neon_frintn : AdvSIMD_1FloatArg_Intrinsic; + // v8.5-A Vector FP Rounding + def int_aarch64_neon_frint32x : AdvSIMD_1FloatArg_Intrinsic; + def int_aarch64_neon_frint32z : AdvSIMD_1FloatArg_Intrinsic; + def int_aarch64_neon_frint64x : AdvSIMD_1FloatArg_Intrinsic; + def int_aarch64_neon_frint64z : AdvSIMD_1FloatArg_Intrinsic; + // Scalar FP->Int conversions // Vector FP Inexact Narrowing @@ -475,7 +481,7 @@ def int_aarch64_neon_udot : AdvSIMD_Dot_Intrinsic; def int_aarch64_neon_sdot : AdvSIMD_Dot_Intrinsic; -// v8.6-A Matrix Multiply Intrinsics + // v8.6-A Matrix Multiply Intrinsics def int_aarch64_neon_ummla : AdvSIMD_MatMul_Intrinsic; def int_aarch64_neon_smmla : AdvSIMD_MatMul_Intrinsic; def int_aarch64_neon_usmmla : AdvSIMD_MatMul_Intrinsic; Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -4091,10 +4091,10 @@ defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", ftrunc>; let Predicates = [HasFRInt3264] in { - defm FRINT32Z : FRIntNNTVector<0, 0, "frint32z">; - defm FRINT64Z : FRIntNNTVector<0, 1, "frint64z">; - defm FRINT32X : FRIntNNTVector<1, 0, "frint32x">; - defm FRINT64X : FRIntNNTVector<1, 1, "frint64x">; + defm FRINT32Z : FRIntNNTVector<0, 0, "frint32z", int_aarch64_neon_frint32z>; + defm FRINT64Z : FRIntNNTVector<0, 1, "frint64z", int_aarch64_neon_frint64z>; + defm FRINT32X : FRIntNNTVector<1, 0, "frint32x", int_aarch64_neon_frint32x>; + defm FRINT64X : FRIntNNTVector<1, 1, "frint64x", int_aarch64_neon_frint64x>; } // HasFRInt3264 defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_aarch64_neon_frsqrte>; Index: llvm/test/CodeGen/AArch64/v8.5a-neon-frint3264-intrinsic.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/v8.5a-neon-frint3264-intrinsic.ll @@ -0,0 +1,83 @@ +; RUN: llc < %s -mtriple=aarch64-eabi -mattr=+v8.5a | FileCheck %s + +declare <2 x float> @llvm.aarch64.neon.frint32x.v2f32(<2 x float>) +declare <4 x float> @llvm.aarch64.neon.frint32x.v4f32(<4 x float>) +declare <2 x float> @llvm.aarch64.neon.frint32z.v2f32(<2 x float>) +declare <4 x float> @llvm.aarch64.neon.frint32z.v4f32(<4 x float>) + +define dso_local <2 x float> @t_vrnd32x_f32(<2 x float> %a) { +; CHECK-LABEL: t_vrnd32x_f32: +; CHECK: frint32x v0.2s, v0.2s +; CHECK-NEXT: ret +entry: + %val = tail call <2 x float> @llvm.aarch64.neon.frint32x.v2f32(<2 x float> %a) + ret <2 x float> %val +} + +define dso_local <4 x float> @t_vrnd32xq_f32(<4 x float> %a) { +; CHECK-LABEL: t_vrnd32xq_f32: +; CHECK: frint32x v0.4s, v0.4s +; CHECK-NEXT: ret +entry: + %val = tail call <4 x float> @llvm.aarch64.neon.frint32x.v4f32(<4 x float> %a) + ret <4 x float> %val +} + +define dso_local <2 x float> @t_vrnd32z_f32(<2 x float> %a) { +; CHECK-LABEL: t_vrnd32z_f32: +; CHECK: frint32z v0.2s, v0.2s +; CHECK-NEXT: ret +entry: + %val = tail call <2 x float> @llvm.aarch64.neon.frint32z.v2f32(<2 x float> %a) + ret <2 x float> %val +} + +define dso_local <4 x float> @t_vrnd32zq_f32(<4 x float> %a) { +; CHECK-LABEL: t_vrnd32zq_f32: +; CHECK: frint32z v0.4s, v0.4s +; CHECK-NEXT: ret +entry: + %val = tail call <4 x float> @llvm.aarch64.neon.frint32z.v4f32(<4 x float> %a) + ret <4 x float> %val +} + +declare <2 x float> @llvm.aarch64.neon.frint64x.v2f32(<2 x float>) +declare <4 x float> @llvm.aarch64.neon.frint64x.v4f32(<4 x float>) +declare <2 x float> @llvm.aarch64.neon.frint64z.v2f32(<2 x float>) +declare <4 x float> @llvm.aarch64.neon.frint64z.v4f32(<4 x float>) + +define dso_local <2 x float> @t_vrnd64x_f32(<2 x float> %a) { +; CHECK-LABEL: t_vrnd64x_f32: +; CHECK: frint64x v0.2s, v0.2s +; CHECK-NEXT: ret +entry: + %val = tail call <2 x float> @llvm.aarch64.neon.frint64x.v2f32(<2 x float> %a) + ret <2 x float> %val +} + +define dso_local <4 x float> @t_vrnd64xq_f32(<4 x float> %a) { +; CHECK-LABEL: t_vrnd64xq_f32: +; CHECK: frint64x v0.4s, v0.4s +; CHECK-NEXT: ret +entry: + %val = tail call <4 x float> @llvm.aarch64.neon.frint64x.v4f32(<4 x float> %a) + ret <4 x float> %val +} + +define dso_local <2 x float> @t_vrnd64z_f32(<2 x float> %a) { +; CHECK-LABEL: t_vrnd64z_f32: +; CHECK: frint64z v0.2s, v0.2s +; CHECK-NEXT: ret +entry: + %val = tail call <2 x float> @llvm.aarch64.neon.frint64z.v2f32(<2 x float> %a) + ret <2 x float> %val +} + +define dso_local <4 x float> @t_vrnd64zq_f32(<4 x float> %a) { +; CHECK-LABEL: t_vrnd64zq_f32: +; CHECK: frint64z v0.4s, v0.4s +; CHECK-NEXT: ret +entry: + %val = tail call <4 x float> @llvm.aarch64.neon.frint64z.v4f32(<4 x float> %a) + ret <4 x float> %val +}