Index: include/clang/Basic/arm_neon.td =================================================================== --- include/clang/Basic/arm_neon.td +++ include/clang/Basic/arm_neon.td @@ -988,6 +988,7 @@ def FRINTM_S32 : SInst<"vrndm", "dd", "fQf">; def FRINTX_S32 : SInst<"vrndx", "dd", "fQf">; def FRINTZ_S32 : SInst<"vrnd", "dd", "fQf">; +def FRINTI_S32 : SInst<"vrndi", "dd", "fQf">; } let ArchGuard = "__ARM_ARCH >= 8 && defined(__aarch64__) && defined(__ARM_FEATURE_DIRECTED_ROUNDING)" in { @@ -997,7 +998,7 @@ def FRINTM_S64 : SInst<"vrndm", "dd", "dQd">; def FRINTX_S64 : SInst<"vrndx", "dd", "dQd">; def FRINTZ_S64 : SInst<"vrnd", "dd", "dQd">; -def FRINTI_S64 : SInst<"vrndi", "dd", "fdQfQd">; +def FRINTI_S64 : SInst<"vrndi", "dd", "dQd">; } //////////////////////////////////////////////////////////////////////////////// Index: lib/CodeGen/CGBuiltin.cpp =================================================================== --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -4155,6 +4155,8 @@ NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType), NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType), NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType), + NEONMAP0(vrndi_v), + NEONMAP0(vrndiq_v), NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType), NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType), NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType), @@ -4331,6 +4333,8 @@ NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType), NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), + NEONMAP0(vrndi_v), + NEONMAP0(vrndiq_v), NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), @@ -5091,7 +5095,10 @@ case NEON::BI__builtin_neon_vrsqrteq_v: Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint); - + case NEON::BI__builtin_neon_vrndi_v: + case NEON::BI__builtin_neon_vrndiq_v: + Int = Intrinsic::nearbyint; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint); case NEON::BI__builtin_neon_vrshr_n_v: case NEON::BI__builtin_neon_vrshrq_n_v: return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", @@ -7483,11 +7490,6 @@ Int = Intrinsic::nearbyint; return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi"); } - case NEON::BI__builtin_neon_vrndi_v: - case NEON::BI__builtin_neon_vrndiq_v: { - Int = Intrinsic::nearbyint; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi"); - } case NEON::BI__builtin_neon_vrndmh_f16: { Ops.push_back(EmitScalarExpr(E->getArg(0))); Int = Intrinsic::floor; @@ -7508,6 +7510,11 @@ Int = Intrinsic::aarch64_neon_frintn; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn"); } + case NEON::BI__builtin_neon_vrndns_f32: { + Ops.push_back(EmitScalarExpr(E->getArg(0))); + Int = Intrinsic::aarch64_neon_frintn; + return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn"); + } case NEON::BI__builtin_neon_vrndph_f16: { Ops.push_back(EmitScalarExpr(E->getArg(0))); Int = Intrinsic::ceil; Index: test/CodeGen/aarch64-neon-misc.c =================================================================== --- test/CodeGen/aarch64-neon-misc.c +++ test/CodeGen/aarch64-neon-misc.c @@ -2253,22 +2253,6 @@ return vcvt_high_f64_f32(a); } -// CHECK-LABEL: @test_vrndn_f32( -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VRNDN1_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float> %a) -// CHECK: ret <2 x float> [[VRNDN1_I]] -float32x2_t test_vrndn_f32(float32x2_t a) { - return vrndn_f32(a); -} - -// CHECK-LABEL: @test_vrndnq_f32( -// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VRNDN1_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float> %a) -// CHECK: ret <4 x float> [[VRNDN1_I]] -float32x4_t test_vrndnq_f32(float32x4_t a) { - return vrndnq_f32(a); -} - // CHECK-LABEL: @test_vrndnq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[VRNDN1_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double> %a) @@ -2277,22 +2261,6 @@ return vrndnq_f64(a); } -// CHECK-LABEL: @test_vrnda_f32( -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VRNDA1_I:%.*]] = call <2 x float> @llvm.round.v2f32(<2 x float> %a) -// CHECK: ret <2 x float> [[VRNDA1_I]] -float32x2_t test_vrnda_f32(float32x2_t a) { - return vrnda_f32(a); -} - -// CHECK-LABEL: @test_vrndaq_f32( -// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VRNDA1_I:%.*]] = call <4 x float> @llvm.round.v4f32(<4 x float> %a) -// CHECK: ret <4 x float> [[VRNDA1_I]] -float32x4_t test_vrndaq_f32(float32x4_t a) { - return vrndaq_f32(a); -} - // CHECK-LABEL: @test_vrndaq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[VRNDA1_I:%.*]] = call <2 x double> @llvm.round.v2f64(<2 x double> %a) @@ -2301,22 +2269,6 @@ return vrndaq_f64(a); } -// CHECK-LABEL: @test_vrndp_f32( -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VRNDP1_I:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> %a) -// CHECK: ret <2 x float> [[VRNDP1_I]] -float32x2_t test_vrndp_f32(float32x2_t a) { - return vrndp_f32(a); -} - -// CHECK-LABEL: @test_vrndpq_f32( -// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VRNDP1_I:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a) -// CHECK: ret <4 x float> [[VRNDP1_I]] -float32x4_t test_vrndpq_f32(float32x4_t a) { - return vrndpq_f32(a); -} - // CHECK-LABEL: @test_vrndpq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[VRNDP1_I:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> %a) @@ -2325,22 +2277,6 @@ return vrndpq_f64(a); } -// CHECK-LABEL: @test_vrndm_f32( -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VRNDM1_I:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> %a) -// CHECK: ret <2 x float> [[VRNDM1_I]] -float32x2_t test_vrndm_f32(float32x2_t a) { - return vrndm_f32(a); -} - -// CHECK-LABEL: @test_vrndmq_f32( -// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VRNDM1_I:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> %a) -// CHECK: ret <4 x float> [[VRNDM1_I]] -float32x4_t test_vrndmq_f32(float32x4_t a) { - return vrndmq_f32(a); -} - // CHECK-LABEL: @test_vrndmq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[VRNDM1_I:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> %a) @@ -2349,22 +2285,6 @@ return vrndmq_f64(a); } -// CHECK-LABEL: @test_vrndx_f32( -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VRNDX1_I:%.*]] = call <2 x float> @llvm.rint.v2f32(<2 x float> %a) -// CHECK: ret <2 x float> [[VRNDX1_I]] -float32x2_t test_vrndx_f32(float32x2_t a) { - return vrndx_f32(a); -} - -// CHECK-LABEL: @test_vrndxq_f32( -// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VRNDX1_I:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> %a) -// CHECK: ret <4 x float> [[VRNDX1_I]] -float32x4_t test_vrndxq_f32(float32x4_t a) { - return vrndxq_f32(a); -} - // CHECK-LABEL: @test_vrndxq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[VRNDX1_I:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> %a) @@ -2373,22 +2293,6 @@ return vrndxq_f64(a); } -// CHECK-LABEL: @test_vrnd_f32( -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VRNDZ1_I:%.*]] = call <2 x float> @llvm.trunc.v2f32(<2 x float> %a) -// CHECK: ret <2 x float> [[VRNDZ1_I]] -float32x2_t test_vrnd_f32(float32x2_t a) { - return vrnd_f32(a); -} - -// CHECK-LABEL: @test_vrndq_f32( -// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VRNDZ1_I:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> %a) -// CHECK: ret <4 x float> [[VRNDZ1_I]] -float32x4_t test_vrndq_f32(float32x4_t a) { - return vrndq_f32(a); -} - // CHECK-LABEL: @test_vrndq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[VRNDZ1_I:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> %a) @@ -2397,22 +2301,6 @@ return vrndq_f64(a); } -// CHECK-LABEL: @test_vrndi_f32( -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VRNDI1_I:%.*]] = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %a) -// CHECK: ret <2 x float> [[VRNDI1_I]] -float32x2_t test_vrndi_f32(float32x2_t a) { - return vrndi_f32(a); -} - -// CHECK-LABEL: @test_vrndiq_f32( -// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VRNDI1_I:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %a) -// CHECK: ret <4 x float> [[VRNDI1_I]] -float32x4_t test_vrndiq_f32(float32x4_t a) { - return vrndiq_f32(a); -} - // CHECK-LABEL: @test_vrndiq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[VRNDI1_I:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a) Index: test/CodeGen/arm-neon-directed-rounding.c =================================================================== --- test/CodeGen/arm-neon-directed-rounding.c +++ test/CodeGen/arm-neon-directed-rounding.c @@ -1,96 +1,128 @@ // RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu cortex-a57 \ // RUN: -ffreestanding -disable-O0-optnone -emit-llvm %s -o - | \ -// RUN: opt -S -mem2reg | FileCheck %s +// RUN: opt -S -mem2reg | FileCheck -check-prefixes=CHECK,CHECK-A32 %s +// RUN: %clang_cc1 -triple arm64-linux-gnueabihf -target-feature +neon \ +// RUN: -ffreestanding -disable-O0-optnone -emit-llvm %s -o - | \ +// RUN: opt -S -mem2reg | FileCheck -check-prefixes=CHECK,CHECK-A64 %s #include -// CHECK-LABEL: define <2 x float> @test_vrnda_f32(<2 x float> %a) #0 { -// CHECK: [[VRNDA_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrinta.v2f32(<2 x float> %a) #2 -// CHECK: ret <2 x float> [[VRNDA_V1_I]] +// CHECK-LABEL: define <2 x float> @test_vrnda_f32(<2 x float> %a) +// CHECK-A32: [[VRNDA_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrinta.v2f32(<2 x float> %a) +// CHECK-A64: [[VRNDA_V1_I:%.*]] = call <2 x float> @llvm.round.v2f32(<2 x float> %a) +// CHECK: ret <2 x float> [[VRNDA_V1_I]] float32x2_t test_vrnda_f32(float32x2_t a) { return vrnda_f32(a); } -// CHECK-LABEL: define <4 x float> @test_vrndaq_f32(<4 x float> %a) #0 { -// CHECK: [[VRNDAQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrinta.v4f32(<4 x float> %a) #2 -// CHECK: ret <4 x float> [[VRNDAQ_V1_I]] +// CHECK-LABEL: define <4 x float> @test_vrndaq_f32(<4 x float> %a) +// CHECK-A32: [[VRNDAQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrinta.v4f32(<4 x float> %a) +// CHECK-A64: [[VRNDAQ_V1_I:%.*]] = call <4 x float> @llvm.round.v4f32(<4 x float> %a) +// CHECK: ret <4 x float> [[VRNDAQ_V1_I]] float32x4_t test_vrndaq_f32(float32x4_t a) { return vrndaq_f32(a); } -// CHECK-LABEL: define <2 x float> @test_vrndm_f32(<2 x float> %a) #0 { -// CHECK: [[VRNDM_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintm.v2f32(<2 x float> %a) #2 -// CHECK: ret <2 x float> [[VRNDM_V1_I]] +// CHECK-LABEL: define <2 x float> @test_vrndm_f32(<2 x float> %a) +// CHECK-A32: [[VRNDM_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintm.v2f32(<2 x float> %a) +// CHECK-A64: [[VRNDM_V1_I:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> %a) +// CHECK: ret <2 x float> [[VRNDM_V1_I]] float32x2_t test_vrndm_f32(float32x2_t a) { return vrndm_f32(a); } -// CHECK-LABEL: define <4 x float> @test_vrndmq_f32(<4 x float> %a) #0 { -// CHECK: [[VRNDMQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintm.v4f32(<4 x float> %a) #2 -// CHECK: ret <4 x float> [[VRNDMQ_V1_I]] +// CHECK-LABEL: define <4 x float> @test_vrndmq_f32(<4 x float> %a) +// CHECK-A32: [[VRNDMQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintm.v4f32(<4 x float> %a) +// CHECK-A64: [[VRNDMQ_V1_I:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> %a) +// CHECK: ret <4 x float> [[VRNDMQ_V1_I]] float32x4_t test_vrndmq_f32(float32x4_t a) { return vrndmq_f32(a); } -// CHECK-LABEL: define <2 x float> @test_vrndn_f32(<2 x float> %a) #0 { -// CHECK: [[VRNDN_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintn.v2f32(<2 x float> %a) #2 -// CHECK: ret <2 x float> [[VRNDN_V1_I]] +// CHECK-LABEL: define <2 x float> @test_vrndn_f32(<2 x float> %a) +// CHECK-A32: [[VRNDN_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintn.v2f32(<2 x float> %a) +// CHECK-A64: [[VRNDN_V1_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float> %a) +// CHECK: ret <2 x float> [[VRNDN_V1_I]] float32x2_t test_vrndn_f32(float32x2_t a) { return vrndn_f32(a); } -// CHECK-LABEL: define <4 x float> @test_vrndnq_f32(<4 x float> %a) #0 { -// CHECK: [[VRNDNQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintn.v4f32(<4 x float> %a) #2 -// CHECK: ret <4 x float> [[VRNDNQ_V1_I]] +// CHECK-LABEL: define <4 x float> @test_vrndnq_f32(<4 x float> %a) +// CHECK-A32: [[VRNDNQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintn.v4f32(<4 x float> %a) +// CHECK-A64: [[VRNDNQ_V1_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float> %a) +// CHECK: ret <4 x float> [[VRNDNQ_V1_I]] float32x4_t test_vrndnq_f32(float32x4_t a) { return vrndnq_f32(a); } -// CHECK-LABEL: define <2 x float> @test_vrndp_f32(<2 x float> %a) #0 { -// CHECK: [[VRNDP_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintp.v2f32(<2 x float> %a) #2 -// CHECK: ret <2 x float> [[VRNDP_V1_I]] +// CHECK-LABEL: define <2 x float> @test_vrndp_f32(<2 x float> %a) +// CHECK-A32: [[VRNDP_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintp.v2f32(<2 x float> %a) +// CHECK-A64: [[VRNDP_V1_I:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> %a) +// CHECK: ret <2 x float> [[VRNDP_V1_I]] float32x2_t test_vrndp_f32(float32x2_t a) { return vrndp_f32(a); } -// CHECK-LABEL: define <4 x float> @test_vrndpq_f32(<4 x float> %a) #0 { -// CHECK: [[VRNDPQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintp.v4f32(<4 x float> %a) #2 -// CHECK: ret <4 x float> [[VRNDPQ_V1_I]] +// CHECK-LABEL: define <4 x float> @test_vrndpq_f32(<4 x float> %a) +// CHECK-A32: [[VRNDPQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintp.v4f32(<4 x float> %a) +// CHECK-A64: [[VRNDPQ_V1_I:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a) +// CHECK: ret <4 x float> [[VRNDPQ_V1_I]] float32x4_t test_vrndpq_f32(float32x4_t a) { return vrndpq_f32(a); } -// CHECK-LABEL: define <2 x float> @test_vrndx_f32(<2 x float> %a) #0 { -// CHECK: [[VRNDX_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintx.v2f32(<2 x float> %a) #2 -// CHECK: ret <2 x float> [[VRNDX_V1_I]] +// CHECK-LABEL: define <2 x float> @test_vrndx_f32(<2 x float> %a) +// CHECK-A32: [[VRNDX_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintx.v2f32(<2 x float> %a) +// CHECK-A64: [[VRNDX_V1_I:%.*]] = call <2 x float> @llvm.rint.v2f32(<2 x float> %a) +// CHECK: ret <2 x float> [[VRNDX_V1_I]] float32x2_t test_vrndx_f32(float32x2_t a) { return vrndx_f32(a); } -// CHECK-LABEL: define <4 x float> @test_vrndxq_f32(<4 x float> %a) #0 { -// CHECK: [[VRNDXQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintx.v4f32(<4 x float> %a) #2 -// CHECK: ret <4 x float> [[VRNDXQ_V1_I]] +// CHECK-LABEL: define <4 x float> @test_vrndxq_f32(<4 x float> %a) +// CHECK-A32: [[VRNDXQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintx.v4f32(<4 x float> %a) +// CHECK-A64: [[VRNDXQ_V1_I:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> %a) +// CHECK: ret <4 x float> [[VRNDXQ_V1_I]] float32x4_t test_vrndxq_f32(float32x4_t a) { return vrndxq_f32(a); } -// CHECK-LABEL: define <2 x float> @test_vrnd_f32(<2 x float> %a) #0 { -// CHECK: [[VRND_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintz.v2f32(<2 x float> %a) #2 -// CHECK: ret <2 x float> [[VRND_V1_I]] +// CHECK-LABEL: define <2 x float> @test_vrnd_f32(<2 x float> %a) +// CHECK-A32: [[VRND_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintz.v2f32(<2 x float> %a) +// CHECK-A64: [[VRND_V1_I:%.*]] = call <2 x float> @llvm.trunc.v2f32(<2 x float> %a) +// CHECK: ret <2 x float> [[VRND_V1_I]] float32x2_t test_vrnd_f32(float32x2_t a) { return vrnd_f32(a); } -// CHECK-LABEL: define <4 x float> @test_vrndq_f32(<4 x float> %a) #0 { -// CHECK: [[VRNDQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintz.v4f32(<4 x float> %a) #2 -// CHECK: ret <4 x float> [[VRNDQ_V1_I]] +// CHECK-LABEL: define <4 x float> @test_vrndq_f32(<4 x float> %a) +// CHECK-A32: [[VRNDQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintz.v4f32(<4 x float> %a) +// CHECK-A64: [[VRNDQ_V1_I:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> %a) +// CHECK: ret <4 x float> [[VRNDQ_V1_I]] float32x4_t test_vrndq_f32(float32x4_t a) { return vrndq_f32(a); } -// CHECK-LABEL: define float @test_vrndns_f32(float %a) #0 { -// CHECK: [[VRNDN_I:%.*]] = call float @llvm.arm.neon.vrintn.f32(float %a) #2 -// CHECK: ret float [[VRNDN_I]] +// CHECK-LABEL: define float @test_vrndns_f32(float %a) +// CHECK-A32: [[VRNDN_I:%.*]] = call float @llvm.arm.neon.vrintn.f32(float %a) +// CHECK-A64: [[VRNDN_I:%.*]] = call float @llvm.aarch64.neon.frintn.f32(float %a) +// CHECK: ret float [[VRNDN_I]] float32_t test_vrndns_f32(float32_t a) { return vrndns_f32(a); } + +// CHECK-LABEL: define <2 x float> @test_vrndi_f32(<2 x float> %a) +// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> +// CHECK: [[VRNDI1_I:%.*]] = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %a) +// CHECK: ret <2 x float> [[VRNDI1_I]] +float32x2_t test_vrndi_f32(float32x2_t a) { + return vrndi_f32(a); +} + +// CHECK-LABEL: define <4 x float> @test_vrndiq_f32(<4 x float> %a) +// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> +// CHECK: [[VRNDI1_I:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %a) +// CHECK: ret <4 x float> [[VRNDI1_I]] +float32x4_t test_vrndiq_f32(float32x4_t a) { + return vrndiq_f32(a); +} Index: test/CodeGen/arm64-vrnd.c =================================================================== --- test/CodeGen/arm64-vrnd.c +++ test/CodeGen/arm64-vrnd.c @@ -2,50 +2,21 @@ #include -int32x2_t rnd1(float32x2_t a) { return vrnd_f32(a); } -// CHECK: call <2 x float> @llvm.trunc.v2f32(<2 x float> -int32x4_t rnd3(float32x4_t a) { return vrndq_f32(a); } -// CHECK: call <4 x float> @llvm.trunc.v4f32(<4 x float> int64x2_t rnd5(float64x2_t a) { return vrndq_f64(a); } // CHECK: call <2 x double> @llvm.trunc.v2f64(<2 x double> - -int32x2_t rnd7(float32x2_t a) { return vrndn_f32(a); } -// CHECK: call <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float> -int32x4_t rnd8(float32x4_t a) { return vrndnq_f32(a); } -// CHECK: call <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float> -int64x2_t rnd9(float64x2_t a) { return vrndnq_f64(a); } -// CHECK: call <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double> -int64x2_t rnd10(float64x2_t a) { return vrndnq_f64(a); } +int64x2_t rnd9(float64x2_t a) { return vrndnq_f64(a); } // CHECK: call <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double> -int32x2_t rnd11(float32x2_t a) { return vrndm_f32(a); } -// CHECK: call <2 x float> @llvm.floor.v2f32(<2 x float> -int32x4_t rnd12(float32x4_t a) { return vrndmq_f32(a); } -// CHECK: call <4 x float> @llvm.floor.v4f32(<4 x float> int64x2_t rnd13(float64x2_t a) { return vrndmq_f64(a); } // CHECK: call <2 x double> @llvm.floor.v2f64(<2 x double> -int64x2_t rnd14(float64x2_t a) { return vrndmq_f64(a); } -// CHECK: call <2 x double> @llvm.floor.v2f64(<2 x double> -int32x2_t rnd15(float32x2_t a) { return vrndp_f32(a); } -// CHECK: call <2 x float> @llvm.ceil.v2f32(<2 x float> -int32x4_t rnd16(float32x4_t a) { return vrndpq_f32(a); } -// CHECK: call <4 x float> @llvm.ceil.v4f32(<4 x float> int64x2_t rnd18(float64x2_t a) { return vrndpq_f64(a); } // CHECK: call <2 x double> @llvm.ceil.v2f64(<2 x double> -int32x2_t rnd19(float32x2_t a) { return vrnda_f32(a); } -// CHECK: call <2 x float> @llvm.round.v2f32(<2 x float> -int32x4_t rnd20(float32x4_t a) { return vrndaq_f32(a); } -// CHECK: call <4 x float> @llvm.round.v4f32(<4 x float> int64x2_t rnd22(float64x2_t a) { return vrndaq_f64(a); } // CHECK: call <2 x double> @llvm.round.v2f64(<2 x double> -int32x2_t rnd23(float32x2_t a) { return vrndx_f32(a); } -// CHECK: call <2 x float> @llvm.rint.v2f32(<2 x float> -int32x4_t rnd24(float32x4_t a) { return vrndxq_f32(a); } -// CHECK: call <4 x float> @llvm.rint.v4f32(<4 x float> int64x2_t rnd25(float64x2_t a) { return vrndxq_f64(a); } // CHECK: call <2 x double> @llvm.rint.v2f64(<2 x double>