Index: clang/include/clang/Basic/arm_fp16.td =================================================================== --- clang/include/clang/Basic/arm_fp16.td +++ clang/include/clang/Basic/arm_fp16.td @@ -14,16 +14,63 @@ include "arm_neon_incl.td" -// ARMv8.2-A FP16 intrinsics. -let ArchGuard = "defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) && defined(__aarch64__)" in { - +// ARMv8.2-A FP16 intrinsics for A32/A64. +let ArchGuard = "defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC)" in { // Negate def VNEGSH : SInst<"vneg", "ss", "Sh">; - // Reciprocal/Sqrt - def SCALAR_FRECPSH : IInst<"vrecps", "sss", "Sh">; + // Sqrt def FSQRTSH : SInst<"vsqrt", "ss", "Sh">; - def SCALAR_FRSQRTSH : IInst<"vrsqrts", "sss", "Sh">; + + // Rounding + def FRINTZ_S64H : SInst<"vrnd", "ss", "Sh">; + def FRINTA_S64H : SInst<"vrnda", "ss", "Sh">; + def FRINTI_S64H : SInst<"vrndi", "ss", "Sh">; + def FRINTM_S64H : SInst<"vrndm", "ss", "Sh">; + def FRINTN_S64H : SInst<"vrndn", "ss", "Sh">; + def FRINTP_S64H : SInst<"vrndp", "ss", "Sh">; + def FRINTX_S64H : SInst<"vrndx", "ss", "Sh">; + + // Conversion + def SCALAR_SCVTFSH : SInst<"vcvth_f16", "Ys", "iUi">; + def SCALAR_FCVTZSH1 : SInst<"vcvt_s32", "Is", "Sh">; + def SCALAR_FCVTZUH1 : SInst<"vcvt_u32", "Us", "Sh">; + def SCALAR_FCVTASH1 : SInst<"vcvta_s32", "Is", "Sh">; + def SCALAR_FCVTAUH1 : SInst<"vcvta_u32", "Us", "Sh">; + def SCALAR_FCVTMSH1 : SInst<"vcvtm_s32", "Is", "Sh">; + def SCALAR_FCVTMUH1 : SInst<"vcvtm_u32", "Us", "Sh">; + def SCALAR_FCVTNSH1 : SInst<"vcvtn_s32", "Is", "Sh">; + def SCALAR_FCVTNUH1 : SInst<"vcvtn_u32", "Us", "Sh">; + def SCALAR_FCVTPSH1 : SInst<"vcvtp_s32", "Is", "Sh">; + def SCALAR_FCVTPUH1 : SInst<"vcvtp_u32", "Us", "Sh">; + let isVCVT_N = 1 in { + def SCALAR_SCVTFSHO : SInst<"vcvth_n_f16", "Ysi", "iUi">; + def SCALAR_FCVTZSH1O: SInst<"vcvt_n_s32", "Isi", "Sh">; + def SCALAR_FCVTZUH1O: SInst<"vcvt_n_u32", "Usi", "Sh">; + } + + // Scalar Absolute Value + def SCALAR_ABSH : SInst<"vabs", "ss", "Sh">; + + // Add/Sub + def VADDSH : SInst<"vadd", "sss", "Sh">; + def VSUBHS : SInst<"vsub", "sss", "Sh">; + + // Max/Min(nm) + def FMAXNMHS : SInst<"vmaxnm", "sss", "Sh">; + def FMINNMHS : SInst<"vminnm", "sss", "Sh">; + + // Multiplication/Division + def VMULHS : SInst<"vmul", "sss", "Sh">; + def FDIVHS : SInst<"vdiv", "sss", "Sh">; + + // Vector fused multiply-add operations + def VFMAHS : SInst<"vfma", "ssss", "Sh">; + def VFMSHS : SInst<"vfms", "ssss", "Sh">; +} + +// ARMv8.2-A FP16 intrinsics for A64 only. +let ArchGuard = "defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) && defined(__aarch64__)" in { // Reciprocal Estimate def SCALAR_FRECPEH : IInst<"vrecpe", "ss", "Sh">; @@ -34,67 +81,51 @@ // Reciprocal Square Root Estimate def SCALAR_FRSQRTEH : IInst<"vrsqrte", "ss", "Sh">; - // Rounding - def FRINTZ_S64H : SInst<"vrnd", "ss", "Sh">; - def FRINTA_S64H : SInst<"vrnda", "ss", "Sh">; - def FRINTI_S64H : SInst<"vrndi", "ss", "Sh">; - def FRINTM_S64H : SInst<"vrndm", "ss", "Sh">; - def FRINTN_S64H : SInst<"vrndn", "ss", "Sh">; - def FRINTP_S64H : SInst<"vrndp", "ss", "Sh">; - def FRINTX_S64H : SInst<"vrndx", "ss", "Sh">; + // Reciprocal + def SCALAR_FRECPSH : IInst<"vrecps", "sss", "Sh">; + def SCALAR_FRSQRTSH : IInst<"vrsqrts", "sss", "Sh">; + + // Comparison + def SCALAR_CMEQRH : SInst<"vceq", "bss", "Sh">; + def SCALAR_CMEQZH : SInst<"vceqz", "bs", "Sh">; + def SCALAR_CMGERH : SInst<"vcge", "bss", "Sh">; + def SCALAR_CMGEZH : SInst<"vcgez", "bs", "Sh">; + def SCALAR_CMGTRH : SInst<"vcgt", "bss", "Sh">; + def SCALAR_CMGTZH : SInst<"vcgtz", "bs", "Sh">; + def SCALAR_CMLERH : SInst<"vcle", "bss", "Sh">; + def SCALAR_CMLEZH : SInst<"vclez", "bs", "Sh">; + def SCALAR_CMLTH : SInst<"vclt", "bss", "Sh">; + def SCALAR_CMLTZH : SInst<"vcltz", "bs", "Sh">; // Conversion - def SCALAR_SCVTFSH : SInst<"vcvth_f16", "Ys", "silUsUiUl">; + def SCALAR_SCVTFSH1 : SInst<"vcvth_f16", "Ys", "slUsUl">; def SCALAR_FCVTZSH : SInst<"vcvt_s16", "$s", "Sh">; - def SCALAR_FCVTZSH1 : SInst<"vcvt_s32", "Is", "Sh">; def SCALAR_FCVTZSH2 : SInst<"vcvt_s64", "Ls", "Sh">; def SCALAR_FCVTZUH : SInst<"vcvt_u16", "bs", "Sh">; - def SCALAR_FCVTZUH1 : SInst<"vcvt_u32", "Us", "Sh">; def SCALAR_FCVTZUH2 : SInst<"vcvt_u64", "Os", "Sh">; def SCALAR_FCVTASH : SInst<"vcvta_s16", "$s", "Sh">; - def SCALAR_FCVTASH1 : SInst<"vcvta_s32", "Is", "Sh">; def SCALAR_FCVTASH2 : SInst<"vcvta_s64", "Ls", "Sh">; def SCALAR_FCVTAUH : SInst<"vcvta_u16", "bs", "Sh">; - def SCALAR_FCVTAUH1 : SInst<"vcvta_u32", "Us", "Sh">; def SCALAR_FCVTAUH2 : SInst<"vcvta_u64", "Os", "Sh">; def SCALAR_FCVTMSH : SInst<"vcvtm_s16", "$s", "Sh">; - def SCALAR_FCVTMSH1 : SInst<"vcvtm_s32", "Is", "Sh">; def SCALAR_FCVTMSH2 : SInst<"vcvtm_s64", "Ls", "Sh">; def SCALAR_FCVTMUH : SInst<"vcvtm_u16", "bs", "Sh">; - def SCALAR_FCVTMUH1 : SInst<"vcvtm_u32", "Us", "Sh">; def SCALAR_FCVTMUH2 : SInst<"vcvtm_u64", "Os", "Sh">; def SCALAR_FCVTNSH : SInst<"vcvtn_s16", "$s", "Sh">; - def SCALAR_FCVTNSH1 : SInst<"vcvtn_s32", "Is", "Sh">; def SCALAR_FCVTNSH2 : SInst<"vcvtn_s64", "Ls", "Sh">; def SCALAR_FCVTNUH : SInst<"vcvtn_u16", "bs", "Sh">; - def SCALAR_FCVTNUH1 : SInst<"vcvtn_u32", "Us", "Sh">; def SCALAR_FCVTNUH2 : SInst<"vcvtn_u64", "Os", "Sh">; def SCALAR_FCVTPSH : SInst<"vcvtp_s16", "$s", "Sh">; - def SCALAR_FCVTPSH1 : SInst<"vcvtp_s32", "Is", "Sh">; def SCALAR_FCVTPSH2 : SInst<"vcvtp_s64", "Ls", "Sh">; def SCALAR_FCVTPUH : SInst<"vcvtp_u16", "bs", "Sh">; - def SCALAR_FCVTPUH1 : SInst<"vcvtp_u32", "Us", "Sh">; def SCALAR_FCVTPUH2 : SInst<"vcvtp_u64", "Os", "Sh">; let isVCVT_N = 1 in { - def SCALAR_SCVTFSHO : SInst<"vcvth_n_f16", "Ysi", "silUsUiUl">; + def SCALAR_SCVTFSHO1: SInst<"vcvth_n_f16", "Ysi", "slUsUl">; def SCALAR_FCVTZSHO : SInst<"vcvt_n_s16", "$si", "Sh">; - def SCALAR_FCVTZSH1O: SInst<"vcvt_n_s32", "Isi", "Sh">; def SCALAR_FCVTZSH2O: SInst<"vcvt_n_s64", "Lsi", "Sh">; def SCALAR_FCVTZUHO : SInst<"vcvt_n_u16", "bsi", "Sh">; - def SCALAR_FCVTZUH1O: SInst<"vcvt_n_u32", "Usi", "Sh">; def SCALAR_FCVTZUH2O: SInst<"vcvt_n_u64", "Osi", "Sh">; } - // Comparison - def SCALAR_CMEQRH : SInst<"vceq", "bss", "Sh">; - def SCALAR_CMEQZH : SInst<"vceqz", "bs", "Sh">; - def SCALAR_CMGERH : SInst<"vcge", "bss", "Sh">; - def SCALAR_CMGEZH : SInst<"vcgez", "bs", "Sh">; - def SCALAR_CMGTRH : SInst<"vcgt", "bss", "Sh">; - def SCALAR_CMGTZH : SInst<"vcgtz", "bs", "Sh">; - def SCALAR_CMLERH : SInst<"vcle", "bss", "Sh">; - def SCALAR_CMLEZH : SInst<"vclez", "bs", "Sh">; - def SCALAR_CMLTH : SInst<"vclt", "bss", "Sh">; - def SCALAR_CMLTZH : SInst<"vcltz", "bs", "Sh">; // Absolute Compare Mask Greater Than Or Equal def SCALAR_FACGEH : IInst<"vcage", "bss", "Sh">; @@ -104,28 +135,13 @@ def SCALAR_FACGT : IInst<"vcagt", "bss", "Sh">; def SCALAR_FACLT : IInst<"vcalt", "bss", "Sh">; - // Scalar Absolute Value - def SCALAR_ABSH : SInst<"vabs", "ss", "Sh">; - // Scalar Absolute Difference def SCALAR_ABDH: IInst<"vabd", "sss", "Sh">; - // Add/Sub - def VADDSH : SInst<"vadd", "sss", "Sh">; - def VSUBHS : SInst<"vsub", "sss", "Sh">; - // Max/Min def VMAXHS : SInst<"vmax", "sss", "Sh">; def VMINHS : SInst<"vmin", "sss", "Sh">; - def FMAXNMHS : SInst<"vmaxnm", "sss", "Sh">; - def FMINNMHS : SInst<"vminnm", "sss", "Sh">; - // Multiplication/Division - def VMULHS : SInst<"vmul", "sss", "Sh">; + // Multiplication def MULXHS : SInst<"vmulx", "sss", "Sh">; - def FDIVHS : SInst<"vdiv", "sss", "Sh">; - - // Vector fused multiply-add operations - def VFMAHS : SInst<"vfma", "ssss", "Sh">; - def VFMSHS : SInst<"vfms", "ssss", "Sh">; } Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -4221,6 +4221,21 @@ NEONMAP0(vzipq_v) }; +static const NeonIntrinsicInfo ARMSISDIntrinsicMap [] = { + NEONMAP1(vcvtah_s32_f16, arm_neon_vcvtas, AddRetType | Add1ArgType), + NEONMAP1(vcvtah_u32_f16, arm_neon_vcvtau, AddRetType | Add1ArgType), + NEONMAP1(vcvth_n_f16_s32, arm_neon_vcvtfxs2fp, AddRetType | Add1ArgType), + NEONMAP1(vcvth_n_f16_u32, arm_neon_vcvtfxu2fp, AddRetType | Add1ArgType), + NEONMAP1(vcvth_n_s32_f16, arm_neon_vcvtfp2fxs, AddRetType | Add1ArgType), + NEONMAP1(vcvth_n_u32_f16, arm_neon_vcvtfp2fxu, AddRetType | Add1ArgType), + NEONMAP1(vcvtmh_s32_f16, arm_neon_vcvtms, AddRetType | Add1ArgType), + NEONMAP1(vcvtmh_u32_f16, arm_neon_vcvtmu, AddRetType | Add1ArgType), + NEONMAP1(vcvtnh_s32_f16, arm_neon_vcvtns, AddRetType | Add1ArgType), + NEONMAP1(vcvtnh_u32_f16, arm_neon_vcvtnu, AddRetType | Add1ArgType), + NEONMAP1(vcvtph_s32_f16, arm_neon_vcvtps, AddRetType | Add1ArgType), + NEONMAP1(vcvtph_u32_f16, arm_neon_vcvtpu, AddRetType | Add1ArgType), +}; + static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = { NEONMAP1(vabs_v, aarch64_neon_abs, 0), NEONMAP1(vabsq_v, aarch64_neon_abs, 0), @@ -5906,6 +5921,121 @@ } } + // Emit the ARM SISD builtins wih identical semantics. + auto SISDMap = makeArrayRef(ARMSISDIntrinsicMap); + const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(SISDMap, BuiltinID, + AArch64SISDIntrinsicsProvenSorted); + if (Builtin) { + Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1))); + Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E); + assert(Result && "SISD intrinsic should have been handled"); + return Result; + } + + // fp16 scalar intrinscs + bool usgn = false; + switch (BuiltinID) { + default: break; + case NEON::BI__builtin_neon_vabsh_f16: + Ops.push_back(EmitScalarExpr(E->getArg(0))); + return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs"); + case NEON::BI__builtin_neon_vcvth_u32_f16: + usgn = true; + // FALL THROUGH + case NEON::BI__builtin_neon_vcvth_s32_f16: { + Ops.push_back(EmitScalarExpr(E->getArg(0))); + Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy); + if (usgn) + return Builder.CreateFPToUI(Ops[0], Int32Ty); + return Builder.CreateFPToSI(Ops[0], Int32Ty); + } + case NEON::BI__builtin_neon_vcvth_f16_u32: + usgn = true; + // FALL THROUGH + case NEON::BI__builtin_neon_vcvth_f16_s32: { + Ops.push_back(EmitScalarExpr(E->getArg(0))); + Ops[0] = Builder.CreateBitCast(Ops[0], Int32Ty); + if (usgn) + return Builder.CreateUIToFP(Ops[0], HalfTy); + return Builder.CreateSIToFP(Ops[0], HalfTy); + } + case NEON::BI__builtin_neon_vnegh_f16: + return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh"); + case NEON::BI__builtin_neon_vrndh_f16: { + Ops.push_back(EmitScalarExpr(E->getArg(0))); + unsigned Int = Intrinsic::trunc; + return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz"); + } + case NEON::BI__builtin_neon_vrndah_f16: { + Ops.push_back(EmitScalarExpr(E->getArg(0))); + unsigned Int = Intrinsic::round; + return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda"); + } + case NEON::BI__builtin_neon_vrndih_f16: { + Ops.push_back(EmitScalarExpr(E->getArg(0))); + unsigned Int = Intrinsic::nearbyint; + return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi"); + } + case NEON::BI__builtin_neon_vrndmh_f16: { + Ops.push_back(EmitScalarExpr(E->getArg(0))); + unsigned Int = Intrinsic::floor; + return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm"); + } + case NEON::BI__builtin_neon_vrndnh_f16: { + Ops.push_back(EmitScalarExpr(E->getArg(0))); + unsigned Int = Intrinsic::arm_neon_vrintn; + return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn"); + } + case NEON::BI__builtin_neon_vrndph_f16: { + Ops.push_back(EmitScalarExpr(E->getArg(0))); + unsigned Int = Intrinsic::ceil; + return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp"); + } + case NEON::BI__builtin_neon_vrndxh_f16: { + Ops.push_back(EmitScalarExpr(E->getArg(0))); + unsigned Int = Intrinsic::rint; + return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx"); + } + case NEON::BI__builtin_neon_vsqrth_f16: { + Ops.push_back(EmitScalarExpr(E->getArg(0))); + unsigned Int = Intrinsic::sqrt; + return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt"); + } + case NEON::BI__builtin_neon_vaddh_f16: + Ops.push_back(EmitScalarExpr(E->getArg(1))); + return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh"); + case NEON::BI__builtin_neon_vsubh_f16: + Ops.push_back(EmitScalarExpr(E->getArg(1))); + return Builder.CreateFSub(Ops[0], Ops[1], "vsubh"); + case NEON::BI__builtin_neon_vmulh_f16: + Ops.push_back(EmitScalarExpr(E->getArg(1))); + return Builder.CreateFMul(Ops[0], Ops[1], "vmulh"); + case NEON::BI__builtin_neon_vdivh_f16: + Ops.push_back(EmitScalarExpr(E->getArg(1))); + return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh"); + case NEON::BI__builtin_neon_vminnmh_f16: { + Ops.push_back(EmitScalarExpr(E->getArg(1))); + unsigned Int = Intrinsic::arm_neon_vminnm; + return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm"); + } + case NEON::BI__builtin_neon_vmaxnmh_f16: { + Ops.push_back(EmitScalarExpr(E->getArg(1))); + unsigned Int = Intrinsic::arm_neon_vmaxnm; + return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm"); + } + case NEON::BI__builtin_neon_vfmah_f16: { + Value *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy); + return Builder.CreateCall(F, + {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]}); + } + case NEON::BI__builtin_neon_vfmsh_f16: { + Value *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy); + Value *Zero = llvm::ConstantFP::getZeroValueForNegation(HalfTy); + Value* Sub = Builder.CreateFSub(Zero, EmitScalarExpr(E->getArg(1)), "vsubh"); + return Builder.CreateCall(F, {Sub, EmitScalarExpr(E->getArg(2)), Ops[0]}); + } + } + switch (BuiltinID) { default: break; @@ -6013,7 +6143,7 @@ // Determine the type of this overloaded NEON intrinsic. NeonTypeFlags Type(Result.getZExtValue()); - bool usgn = Type.isUnsigned(); + usgn = Type.isUnsigned(); bool rightShift = false; llvm::VectorType *VTy = GetNeonType(this, Type, @@ -6025,7 +6155,7 @@ // Many NEON builtins have identical semantics and uses in ARM and // AArch64. Emit these in a single function. auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap); - const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap( + Builtin = findNeonIntrinsicInMap( IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted); if (Builtin) return EmitCommonNeonBuiltinExpr( Index: clang/test/CodeGen/arm-v8.2a-fp16-intrinsics.c =================================================================== --- /dev/null +++ clang/test/CodeGen/arm-v8.2a-fp16-intrinsics.c @@ -0,0 +1,225 @@ +// RUN: %clang_cc1 -triple armv8.2a-linux-gnu -target-abi apcs-gnu -target-feature +neon -target-feature +fullfp16 \ +// RUN: -fallow-half-arguments-and-returns -S -disable-O0-optnone -emit-llvm -o - %s \ +// RUN: | opt -S -mem2reg \ +// RUN: | FileCheck %s + +// REQUIRES: arm-registered-target + +#include + +// CHECK-LABEL: test_vabsh_f16 +// CHECK: [[ABS:%.*]] = call half @llvm.fabs.f16(half %{{.*}}) +float16_t test_vabsh_f16(float16_t a) { + return vabsh_f16(a); +} + +// CHECK-LABEL: test_vcvth_f16_s32 +// CHECK: [[VCVT:%.*]] = sitofp i32 %a to half +float16_t test_vcvth_f16_s32 (int32_t a) { + return vcvth_f16_s32(a); +} + +// CHECK-LABEL: test_vcvth_f16_u32 +// CHECK: [[VCVT:%.*]] = uitofp i32 %a to half +float16_t test_vcvth_f16_u32 (uint32_t a) { + return vcvth_f16_u32(a); +} + +// CHECK-LABEL: test_vcvth_s32_f16 +// CHECK: [[VCVT:%.*]] = fptosi half %{{.*}} to i32 +// CHECK: ret i32 [[VCVT]] +int32_t test_vcvth_s32_f16 (float16_t a) { + return vcvth_s32_f16(a); +} + +// CHECK-LABEL: test_vcvth_u32_f16 +// CHECK: [[VCVT:%.*]] = fptoui half %{{.*}} to i32 +// CHECK: ret i32 [[VCVT]] +uint32_t test_vcvth_u32_f16 (float16_t a) { + return vcvth_u32_f16(a); +} + +// CHECK-LABEL: test_vcvtah_s32_f16 +// CHECK: [[VCVT:%.*]] = call i32 @llvm.arm.neon.vcvtas.i32.f16(half %{{.*}}) +// CHECK: ret i32 [[VCVT]] +int32_t test_vcvtah_s32_f16 (float16_t a) { + return vcvtah_s32_f16(a); +} + +// CHECK-LABEL: test_vcvtah_u32_f16 +// CHECK: [[VCVT:%.*]] = call i32 @llvm.arm.neon.vcvtau.i32.f16(half %{{.*}}) +// CHECK: ret i32 [[VCVT]] +uint32_t test_vcvtah_u32_f16 (float16_t a) { + return vcvtah_u32_f16(a); +} + +// CHECK-LABEL: test_vcvtmh_s32_f16 +// CHECK: [[VCVT:%.*]] = call i32 @llvm.arm.neon.vcvtms.i32.f16(half %{{.*}}) +// CHECK: ret i32 [[VCVT]] +int32_t test_vcvtmh_s32_f16 (float16_t a) { + return vcvtmh_s32_f16(a); +} + +// CHECK-LABEL: test_vcvtmh_u32_f16 +// CHECK: [[VCVT:%.*]] = call i32 @llvm.arm.neon.vcvtmu.i32.f16(half %{{.*}}) +// CHECK: ret i32 [[VCVT]] +uint32_t test_vcvtmh_u32_f16 (float16_t a) { + return vcvtmh_u32_f16(a); +} + +// CHECK-LABEL: test_vcvtnh_s32_f16 +// CHECK: [[VCVT:%.*]] = call i32 @llvm.arm.neon.vcvtns.i32.f16(half %{{.*}}) +// CHECK: ret i32 [[VCVT]] +int32_t test_vcvtnh_s32_f16 (float16_t a) { + return vcvtnh_s32_f16(a); +} + +// CHECK-LABEL: test_vcvtnh_u32_f16 +// CHECK: [[VCVT:%.*]] = call i32 @llvm.arm.neon.vcvtnu.i32.f16(half %{{.*}}) +// CHECK: ret i32 [[VCVT]] +uint32_t test_vcvtnh_u32_f16 (float16_t a) { + return vcvtnh_u32_f16(a); +} + +// CHECK-LABEL: test_vcvtph_s32_f16 +// CHECK: [[VCVT:%.*]] = call i32 @llvm.arm.neon.vcvtps.i32.f16(half %{{.*}}) +// CHECK: ret i32 [[VCVT]] +int32_t test_vcvtph_s32_f16 (float16_t a) { + return vcvtph_s32_f16(a); +} + +// CHECK-LABEL: test_vcvtph_u32_f16 +// CHECK: [[VCVT:%.*]] = call i32 @llvm.arm.neon.vcvtpu.i32.f16(half %{{.*}}) +// CHECK: ret i32 [[VCVT]] +uint32_t test_vcvtph_u32_f16 (float16_t a) { + return vcvtph_u32_f16(a); +} + +// CHECK-LABEL: test_vnegh_f16 +// CHECK: [[NEG:%.*]] = fsub half 0xH8000, %a +float16_t test_vnegh_f16(float16_t a) { + return vnegh_f16(a); +} + +// CHECK-LABEL: test_vrndh_f16 +// CHECK: [[RND:%.*]] = call half @llvm.trunc.f16(half %{{.*}}) +float16_t test_vrndh_f16(float16_t a) { + return vrndh_f16(a); +} + +// CHECK-LABEL: test_vrndah_f16 +// CHECK: [[RND:%.*]] = call half @llvm.round.f16(half %{{.*}}) +float16_t test_vrndah_f16(float16_t a) { + return vrndah_f16(a); +} + +// CHECK-LABEL: test_vrndih_f16 +// CHECK: [[RND:%.*]] = call half @llvm.nearbyint.f16(half %{{.*}}) +float16_t test_vrndih_f16(float16_t a) { + return vrndih_f16(a); +} + +// CHECK-LABEL: test_vrndmh_f16 +// CHECK: [[RND:%.*]] = call half @llvm.floor.f16(half %{{.*}}) +float16_t test_vrndmh_f16(float16_t a) { + return vrndmh_f16(a); +} + +// CHECK-LABEL: test_vrndnh_f16 +// CHECK: [[RND:%.*]] = call half @llvm.arm.neon.vrintn.f16(half %{{.*}}) +float16_t test_vrndnh_f16(float16_t a) { + return vrndnh_f16(a); +} + +// CHECK-LABEL: test_vrndph_f16 +// CHECK: [[RND:%.*]] = call half @llvm.ceil.f16(half %{{.*}}) +float16_t test_vrndph_f16(float16_t a) { + return vrndph_f16(a); +} + +// CHECK-LABEL: test_vrndxh_f16 +// CHECK: [[RND:%.*]] = call half @llvm.rint.f16(half %{{.*}}) +float16_t test_vrndxh_f16(float16_t a) { + return vrndxh_f16(a); +} + +// CHECK-LABEL: test_vsqrth_f16 +// CHECK: [[SQR:%.*]] = call half @llvm.sqrt.f16(half %{{.*}}) +float16_t test_vsqrth_f16(float16_t a) { + return vsqrth_f16(a); +} + +// CHECK-LABEL: test_vaddh_f16 +// CHECK: [[ADD:%.*]] = fadd half [[A:%.*]], [[B:%.*]] +float16_t test_vaddh_f16(float16_t a, float16_t b) { + return vaddh_f16(a, b); +} + +// CHECK-LABEL: test_vcvth_n_f16_s32 +// CHECK: [[CVT:%.*]] = call half @llvm.arm.neon.vcvtfxs2fp.f16.i32(i32 %a, i32 1) +float16_t test_vcvth_n_f16_s32(int32_t a) { + return vcvth_n_f16_s32(a, 1); +} + +// CHECK-LABEL: test_vcvth_n_s32_f16 +// CHECK: [[CVT:%.*]] = call i32 @llvm.arm.neon.vcvtfp2fxs.i32.f16(half {{%.*}}, i32 1) +// CHECK: ret i32 [[CVT]] +int32_t test_vcvth_n_s32_f16(float16_t a) { + return vcvth_n_s32_f16(a, 1); +} + +// CHECK-LABEL: test_vcvth_n_f16_u32 +// CHECK: [[CVT:%.*]] = call half @llvm.arm.neon.vcvtfxu2fp.f16.i32(i32 %a, i32 1) +float16_t test_vcvth_n_f16_u32(int32_t a) { + return vcvth_n_f16_u32(a, 1); +} + +// CHECK-LABEL: test_vcvth_n_u32_f16 +// CHECK: [[CVT:%.*]] = call i32 @llvm.arm.neon.vcvtfp2fxu.i32.f16(half {{%.*}}, i32 1) +// CHECK: ret i32 [[CVT]] +int32_t test_vcvth_n_u32_f16(float16_t a) { + return vcvth_n_u32_f16(a, 1); +} + +// CHECK-LABEL: test_vdivh_f16 +// CHECK: [[DIV:%.*]] = fdiv half [[A:%.*]], [[B:%.*]] +float16_t test_vdivh_f16(float16_t a, float16_t b) { + return vdivh_f16(a, b); +} + +// CHECK-LABEL: test_vmaxnmh_f16 +// CHECK: [[MAX:%.*]] = call half @llvm.arm.neon.vmaxnm.f16(half [[A:%.*]], half [[B:%.*]]) +float16_t test_vmaxnmh_f16(float16_t a, float16_t b) { + return vmaxnmh_f16(a, b); +} + +// CHECK-LABEL: test_vminnmh_f16 +// CHECK: [[MIN:%.*]] = call half @llvm.arm.neon.vminnm.f16(half [[A:%.*]], half [[B:%.*]]) +float16_t test_vminnmh_f16(float16_t a, float16_t b) { + return vminnmh_f16(a, b); +} + +// CHECK-LABEL: test_vmulh_f16 +// CHECK: [[MUL:%.*]] = fmul half [[A:%.*]], [[B:%.*]] +float16_t test_vmulh_f16(float16_t a, float16_t b) { + return vmulh_f16(a, b); +} + +// CHECK-LABEL: test_vsubh_f16 +// CHECK: [[SUB:%.*]] = fsub half [[A:%.*]], [[B:%.*]] +float16_t test_vsubh_f16(float16_t a, float16_t b) { + return vsubh_f16(a, b); +} + +// CHECK-LABEL: test_vfmah_f16 +// CHECK: [[FMA:%.*]] = call half @llvm.fma.f16(half [[A:%.*]], half [[B:%.*]], half [[C:%.*]]) +float16_t test_vfmah_f16(float16_t a, float16_t b, float16_t c) { + return vfmah_f16(a, b, c); +} + +// CHECK-LABEL: test_vfmsh_f16 +// CHECK: [[SUB:%.*]] = fsub half 0xH8000, %b +// CHECK: [[ADD:%.*]] = call half @llvm.fma.f16(half [[SUB]], half [[B:%.*]], half [[C:%.*]]) +float16_t test_vfmsh_f16(float16_t a, float16_t b, float16_t c) { + return vfmsh_f16(a, b, c); +} Index: llvm/include/llvm/IR/IntrinsicsARM.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsARM.td +++ llvm/include/llvm/IR/IntrinsicsARM.td @@ -355,6 +355,9 @@ class Neon_2Arg_Intrinsic : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; +class Float_2Arg_Intrinsic + : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem]>; class Neon_2Arg_Narrow_Intrinsic : Intrinsic<[llvm_anyvector_ty], [LLVMExtendedType<0>, LLVMExtendedType<0>], [IntrNoMem]>; @@ -377,8 +380,8 @@ : Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>; class Neon_CvtFPToFx_Intrinsic : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem]>; -class Neon_CvtFPtoInt_1Arg_Intrinsic - : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>; +class CvtFPtoInt_1Arg_Intrinsic + : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>; class Neon_Compare_Intrinsic : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, LLVMMatchType<1>], @@ -431,12 +434,12 @@ // Vector Maximum. def int_arm_neon_vmaxs : Neon_2Arg_Intrinsic; def int_arm_neon_vmaxu : Neon_2Arg_Intrinsic; - def int_arm_neon_vmaxnm : Neon_2Arg_Intrinsic; + def int_arm_neon_vmaxnm : Float_2Arg_Intrinsic; // Vector Minimum. def int_arm_neon_vmins : Neon_2Arg_Intrinsic; def int_arm_neon_vminu : Neon_2Arg_Intrinsic; - def int_arm_neon_vminnm : Neon_2Arg_Intrinsic; + def int_arm_neon_vminnm : Float_2Arg_Intrinsic; // Vector Reciprocal Step. def int_arm_neon_vrecps : Neon_2Arg_Intrinsic; @@ -552,15 +555,15 @@ // Vector Reciprocal Square Root Estimate. def int_arm_neon_vrsqrte : Neon_1Arg_Intrinsic; -// Vector Conversions Between Floating-point and Integer -def int_arm_neon_vcvtau : Neon_CvtFPtoInt_1Arg_Intrinsic; -def int_arm_neon_vcvtas : Neon_CvtFPtoInt_1Arg_Intrinsic; -def int_arm_neon_vcvtnu : Neon_CvtFPtoInt_1Arg_Intrinsic; -def int_arm_neon_vcvtns : Neon_CvtFPtoInt_1Arg_Intrinsic; -def int_arm_neon_vcvtpu : Neon_CvtFPtoInt_1Arg_Intrinsic; -def int_arm_neon_vcvtps : Neon_CvtFPtoInt_1Arg_Intrinsic; -def int_arm_neon_vcvtmu : Neon_CvtFPtoInt_1Arg_Intrinsic; -def int_arm_neon_vcvtms : Neon_CvtFPtoInt_1Arg_Intrinsic; +// Conversions Between Floating-point and Integer +def int_arm_neon_vcvtau : CvtFPtoInt_1Arg_Intrinsic; +def int_arm_neon_vcvtas : CvtFPtoInt_1Arg_Intrinsic; +def int_arm_neon_vcvtnu : CvtFPtoInt_1Arg_Intrinsic; +def int_arm_neon_vcvtns : CvtFPtoInt_1Arg_Intrinsic; +def int_arm_neon_vcvtpu : CvtFPtoInt_1Arg_Intrinsic; +def int_arm_neon_vcvtps : CvtFPtoInt_1Arg_Intrinsic; +def int_arm_neon_vcvtmu : CvtFPtoInt_1Arg_Intrinsic; +def int_arm_neon_vcvtms : CvtFPtoInt_1Arg_Intrinsic; // Vector Conversions Between Floating-point and Fixed-point. def int_arm_neon_vcvtfp2fxs : Neon_CvtFPToFx_Intrinsic;