Index: clang/include/clang/Basic/BuiltinsNEON.def =================================================================== --- clang/include/clang/Basic/BuiltinsNEON.def +++ clang/include/clang/Basic/BuiltinsNEON.def @@ -16,6 +16,7 @@ #define GET_NEON_BUILTINS #include "clang/Basic/arm_neon.inc" +#include "clang/Basic/arm_fp16.inc" #undef GET_NEON_BUILTINS #undef BUILTIN Index: clang/include/clang/Basic/CMakeLists.txt =================================================================== --- clang/include/clang/Basic/CMakeLists.txt +++ clang/include/clang/Basic/CMakeLists.txt @@ -46,3 +46,7 @@ -I ${CMAKE_CURRENT_SOURCE_DIR}/../../ SOURCE arm_neon.td TARGET ClangARMNeon) +clang_tablegen(arm_fp16.inc -gen-arm-neon-sema + -I ${CMAKE_CURRENT_SOURCE_DIR}/../../ + SOURCE arm_fp16.td + TARGET ClangARMFP16) Index: clang/include/clang/Basic/arm_fp16.td =================================================================== --- /dev/null +++ clang/include/clang/Basic/arm_fp16.td @@ -0,0 +1,173 @@ +//===--- arm_fp16.td - ARM FP16 compiler interface ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the TableGen definitions from which the ARM FP16 header +// file will be generated. +// +//===----------------------------------------------------------------------===// +// +// Each intrinsic is a subclass of the Inst class. An intrinsic can either +// generate a __builtin_* call or it can expand to a set of generic operations. +// +// The operations are subclasses of Operation providing a list of DAGs, the +// last of which is the return value. +// +//===----------------------------------------------------------------------===// + +// The base Operation class. All operations must subclass this. +class Operation ops=[]> { + list Ops = ops; + bit Unavailable = 0; +} + +def OP_NONE : Operation; + +//===----------------------------------------------------------------------===// +// Instruction definitions +//===----------------------------------------------------------------------===// +// See arm_neon.td for more information + +// Every intrinsic subclasses Inst. +class Inst { + string Name = n; + string Prototype = p; + string Types = t; + string ArchGuard = ""; + + Operation Operation = o; + bit CartesianProductOfTypes = 0; + bit BigEndianSafe = 0; + bit isShift = 0; + bit isScalarShift = 0; + bit isVCVT_N = 0; +} + +// The following instruction classes are implemented via builtins. +// These declarations are used to generate Builtins.def: +// +// SInst: Instruction with signed/unsigned suffix (e.g., "s8", "u8", "p8") +// IInst: Instruction with generic integer suffix (e.g., "i8") +class SInst : Inst {} +class IInst : Inst {} + +// ARMv8.2-A FP16 intrinsics. +let ArchGuard = "defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) && defined(__aarch64__)" in { + + // Negate + def VNEGSH : SInst<"vneg", "ss", "Sh">; + + // Reciprocal/Sqrt + def SCALAR_FRECPSH : IInst<"vrecps", "sss", "Sh">; + def FSQRTSH : SInst<"vsqrt", "ss", "Sh">; + def SCALAR_FRSQRTSH: IInst<"vrsqrts", "sss", "Sh">; + + // Reciprocal Estimate + def SCALAR_FRECPEH : IInst<"vrecpe", "ss", "Sh">; + + // Reciprocal Exponent + def SCALAR_FRECPXH : IInst<"vrecpx", "ss", "Sh">; + + // Reciprocal Square Root Estimate + def SCALAR_FRSQRTEH: IInst<"vrsqrte", "ss", "Sh">; + + // Rounding + def FRINTZ_S64H : SInst<"vrnd", "ss", "Sh">; + def FRINTA_S64H : SInst<"vrnda", "ss", "Sh">; + def FRINTI_S64H : SInst<"vrndi", "ss", "Sh">; + def FRINTM_S64H : SInst<"vrndm", "ss", "Sh">; + def FRINTN_S64H : SInst<"vrndn", "ss", "Sh">; + def FRINTP_S64H : SInst<"vrndp", "ss", "Sh">; + def FRINTX_S64H : SInst<"vrndx", "ss", "Sh">; + + // Conversion + def SCALAR_SCVTFSH : SInst<"vcvth_f16", "Ys", "silUsUiUl">; + def SCALAR_FCVTZSH : SInst<"vcvt_s16", "$s", "Sh">; + def SCALAR_FCVTZSH1 : SInst<"vcvt_s32", "Is", "Sh">; + def SCALAR_FCVTZSH2 : SInst<"vcvt_s64", "Ls", "Sh">; + def SCALAR_FCVTZUH : SInst<"vcvt_u16", "bs", "Sh">; + def SCALAR_FCVTZUH1 : SInst<"vcvt_u32", "Us", "Sh">; + def SCALAR_FCVTZUH2 : SInst<"vcvt_u64", "Os", "Sh">; + def SCALAR_FCVTASH : SInst<"vcvta_s16", "$s", "Sh">; + def SCALAR_FCVTASH1 : SInst<"vcvta_s32", "Is", "Sh">; + def SCALAR_FCVTASH2 : SInst<"vcvta_s64", "Ls", "Sh">; + def SCALAR_FCVTAUH : SInst<"vcvta_u16", "bs", "Sh">; + def SCALAR_FCVTAUH1 : SInst<"vcvta_u32", "Us", "Sh">; + def SCALAR_FCVTAUH2 : SInst<"vcvta_u64", "Os", "Sh">; + def SCALAR_FCVTMSH : SInst<"vcvtm_s16", "$s", "Sh">; + def SCALAR_FCVTMSH1 : SInst<"vcvtm_s32", "Is", "Sh">; + def SCALAR_FCVTMSH2 : SInst<"vcvtm_s64", "Ls", "Sh">; + def SCALAR_FCVTMUH : SInst<"vcvtm_u16", "bs", "Sh">; + def SCALAR_FCVTMUH1 : SInst<"vcvtm_u32", "Us", "Sh">; + def SCALAR_FCVTMUH2 : SInst<"vcvtm_u64", "Os", "Sh">; + def SCALAR_FCVTNSH : SInst<"vcvtn_s16", "$s", "Sh">; + def SCALAR_FCVTNSH1 : SInst<"vcvtn_s32", "Is", "Sh">; + def SCALAR_FCVTNSH2 : SInst<"vcvtn_s64", "Ls", "Sh">; + def SCALAR_FCVTNUH : SInst<"vcvtn_u16", "bs", "Sh">; + def SCALAR_FCVTNUH1 : SInst<"vcvtn_u32", "Us", "Sh">; + def SCALAR_FCVTNUH2 : SInst<"vcvtn_u64", "Os", "Sh">; + def SCALAR_FCVTPSH : SInst<"vcvtp_s16", "$s", "Sh">; + def SCALAR_FCVTPSH1 : SInst<"vcvtp_s32", "Is", "Sh">; + def SCALAR_FCVTPSH2 : SInst<"vcvtp_s64", "Ls", "Sh">; + def SCALAR_FCVTPUH : SInst<"vcvtp_u16", "bs", "Sh">; + def SCALAR_FCVTPUH1 : SInst<"vcvtp_u32", "Us", "Sh">; + def SCALAR_FCVTPUH2 : SInst<"vcvtp_u64", "Os", "Sh">; + + def SCALAR_SCVTFSHO : SInst<"vcvth_n_f16", "Ysi", "silUsUiUl">; + def SCALAR_FCVTZSHO : SInst<"vcvt_n_s16", "$si", "Sh">; + def SCALAR_FCVTZSH1O : SInst<"vcvt_n_s32", "Isi", "Sh">; + def SCALAR_FCVTZSH2O : SInst<"vcvt_n_s64", "Lsi", "Sh">; + def SCALAR_FCVTZUHO : SInst<"vcvt_n_u16", "bsi", "Sh">; + def SCALAR_FCVTZUH1O : SInst<"vcvt_n_u32", "Usi", "Sh">; + def SCALAR_FCVTZUH2O : SInst<"vcvt_n_u64", "Osi", "Sh">; + + // Comparison + def SCALAR_CMEQRH : SInst<"vceq", "bss", "Sh">; + def SCALAR_CMEQZH : SInst<"vceqz", "bs", "Sh">; + def SCALAR_CMGERH : SInst<"vcge", "bss", "Sh">; + def SCALAR_CMGEZH : SInst<"vcgez", "bs", "Sh">; + def SCALAR_CMGTRH : SInst<"vcgt", "bss", "Sh">; + def SCALAR_CMGTZH : SInst<"vcgtz", "bs", "Sh">; + def SCALAR_CMLERH : SInst<"vcle", "bss", "Sh">; + def SCALAR_CMLEZH : SInst<"vclez", "bs", "Sh">; + def SCALAR_CMLTH : SInst<"vclt", "bss", "Sh">; + def SCALAR_CMLTZH : SInst<"vcltz", "bs", "Sh">; + + // Absolute Compare Mask Greater Than Or Equal + def SCALAR_FACGEH: IInst<"vcage", "bss", "Sh">; + def SCALAR_FACLEH: IInst<"vcale", "bss", "Sh">; + + // Absolute Compare Mask Greater Than + def SCALAR_FACGT : IInst<"vcagt", "bss", "Sh">; + def SCALAR_FACLT : IInst<"vcalt", "bss", "Sh">; + + // Scalar Absolute Value + def SCALAR_ABSH : SInst<"vabs", "ss", "Sh">; + + // Scalar Absolute Difference + def SCALAR_ABDH: IInst<"vabd", "sss", "Sh">; + + // Add/Sub + def VADDSH : SInst<"vadd", "sss", "Sh">; + def VSUBHS : SInst<"vsub", "sss", "Sh">; + + // Max/Min + def VMAXHS : SInst<"vmax", "sss", "Sh">; + def VMINHS : SInst<"vmin", "sss", "Sh">; + def FMAXNMHS : SInst<"vmaxnm", "sss", "Sh">; + def FMINNMHS : SInst<"vminnm", "sss", "Sh">; + + // Multiplication/Division + def VMULHS : SInst<"vmul", "sss", "Sh">; + def MULXHS : SInst<"vmulx", "sss", "Sh">; + def FDIVHS : SInst<"vdiv", "sss", "Sh">; + + // Vector fused multiply-add operations + def VFMAHS : SInst<"vfma", "ssss", "Sh">; + def VFMSHS : SInst<"vfms", "ssss", "Sh">; +} Index: clang/lib/Basic/Targets/AArch64.cpp =================================================================== --- clang/lib/Basic/Targets/AArch64.cpp +++ clang/lib/Basic/Targets/AArch64.cpp @@ -183,6 +183,8 @@ if ((FPU & NeonMode) && HasFullFP16) Builder.defineMacro("__ARM_FEATURE_FP16_VECTOR_ARITHMETIC", "1"); + if (HasFullFP16) + Builder.defineMacro("__ARM_FEATURE_FP16_SCALAR_ARITHMETIC", "1"); switch (ArchKind) { default: Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -4099,6 +4099,54 @@ NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType), NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType), + // FP16 scalar intrinisics go here. + NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType), + NEONMAP1(vabsh_f16, aarch64_neon_abs, Add1ArgType), + NEONMAP1(vcageh_f16, aarch64_neon_facge, AddRetType | Add1ArgType), + NEONMAP1(vcagth_f16, aarch64_neon_facgt, AddRetType | Add1ArgType), + NEONMAP1(vcaleh_f16, aarch64_neon_facge, AddRetType | Add1ArgType), + NEONMAP1(vcalth_f16, aarch64_neon_facgt, AddRetType | Add1ArgType), + NEONMAP1(vcvtah_s16_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType), + NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType), + NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType), + NEONMAP1(vcvtah_u16_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType), + NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType), + NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType), + NEONMAP1(vcvth_n_f16_s16, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), + NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), + NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), + NEONMAP1(vcvth_n_f16_u16, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), + NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), + NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), + NEONMAP1(vcvth_n_s16_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), + NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), + NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), + NEONMAP1(vcvth_n_u16_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), + NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), + NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), + NEONMAP1(vcvtmh_s16_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType), + NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType), + NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType), + NEONMAP1(vcvtmh_u16_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), + NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), + NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), + NEONMAP1(vcvtnh_s16_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType), + NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType), + NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType), + NEONMAP1(vcvtnh_u16_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), + NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), + NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), + NEONMAP1(vcvtph_s16_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType), + NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType), + NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType), + NEONMAP1(vcvtph_u16_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), + NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), + NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), + NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType), + NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType), + NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType), + NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType), + NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType), }; #undef NEONMAP0 @@ -6123,6 +6171,58 @@ return Builder.CreateUIToFP(Ops[0], FTy); return Builder.CreateSIToFP(Ops[0], FTy); } + case NEON::BI__builtin_neon_vcvth_f16_u16: + case NEON::BI__builtin_neon_vcvth_f16_u32: + case NEON::BI__builtin_neon_vcvth_f16_u64: + usgn = true; + // FALL THROUGH + case NEON::BI__builtin_neon_vcvth_f16_s16: + case NEON::BI__builtin_neon_vcvth_f16_s32: + case NEON::BI__builtin_neon_vcvth_f16_s64: { + Ops.push_back(EmitScalarExpr(E->getArg(0))); + llvm::Type *FTy = HalfTy; + llvm::Type *InTy; + if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64) + InTy = Int64Ty; + else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32) + InTy = Int32Ty; + else + InTy = Int16Ty; + Ops[0] = Builder.CreateBitCast(Ops[0], InTy); + if (usgn) + return Builder.CreateUIToFP(Ops[0], FTy); + return Builder.CreateSIToFP(Ops[0], FTy); + } + case NEON::BI__builtin_neon_vcvth_u16_f16: + usgn = true; + // FALL THROUGH + case NEON::BI__builtin_neon_vcvth_s16_f16: { + Ops.push_back(EmitScalarExpr(E->getArg(0))); + Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy); + if (usgn) + return Builder.CreateFPToUI(Ops[0], Int16Ty); + return Builder.CreateFPToSI(Ops[0], Int16Ty); + } + case NEON::BI__builtin_neon_vcvth_u32_f16: + usgn = true; + // FALL THROUGH + case NEON::BI__builtin_neon_vcvth_s32_f16: { + Ops.push_back(EmitScalarExpr(E->getArg(0))); + Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy); + if (usgn) + return Builder.CreateFPToUI(Ops[0], Int32Ty); + return Builder.CreateFPToSI(Ops[0], Int32Ty); + } + case NEON::BI__builtin_neon_vcvth_u64_f16: + usgn = true; + // FALL THROUGH + case NEON::BI__builtin_neon_vcvth_s64_f16: { + Ops.push_back(EmitScalarExpr(E->getArg(0))); + Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy); + if (usgn) + return Builder.CreateFPToUI(Ops[0], Int64Ty); + return Builder.CreateFPToSI(Ops[0], Int64Ty); + } case NEON::BI__builtin_neon_vpaddd_s64: { llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2); Value *Vec = EmitScalarExpr(E->getArg(0)); @@ -6164,6 +6264,7 @@ case NEON::BI__builtin_neon_vceqzd_s64: case NEON::BI__builtin_neon_vceqzd_f64: case NEON::BI__builtin_neon_vceqzs_f32: + case NEON::BI__builtin_neon_vceqzh_f16: Ops.push_back(EmitScalarExpr(E->getArg(0))); return EmitAArch64CompareBuiltinExpr( Ops[0], ConvertType(E->getCallReturnType(getContext())), @@ -6171,6 +6272,7 @@ case NEON::BI__builtin_neon_vcgezd_s64: case NEON::BI__builtin_neon_vcgezd_f64: case NEON::BI__builtin_neon_vcgezs_f32: + case NEON::BI__builtin_neon_vcgezh_f16: Ops.push_back(EmitScalarExpr(E->getArg(0))); return EmitAArch64CompareBuiltinExpr( Ops[0], ConvertType(E->getCallReturnType(getContext())), @@ -6178,6 +6280,7 @@ case NEON::BI__builtin_neon_vclezd_s64: case NEON::BI__builtin_neon_vclezd_f64: case NEON::BI__builtin_neon_vclezs_f32: + case NEON::BI__builtin_neon_vclezh_f16: Ops.push_back(EmitScalarExpr(E->getArg(0))); return EmitAArch64CompareBuiltinExpr( Ops[0], ConvertType(E->getCallReturnType(getContext())), @@ -6185,6 +6288,7 @@ case NEON::BI__builtin_neon_vcgtzd_s64: case NEON::BI__builtin_neon_vcgtzd_f64: case NEON::BI__builtin_neon_vcgtzs_f32: + case NEON::BI__builtin_neon_vcgtzh_f16: Ops.push_back(EmitScalarExpr(E->getArg(0))); return EmitAArch64CompareBuiltinExpr( Ops[0], ConvertType(E->getCallReturnType(getContext())), @@ -6192,6 +6296,7 @@ case NEON::BI__builtin_neon_vcltzd_s64: case NEON::BI__builtin_neon_vcltzd_f64: case NEON::BI__builtin_neon_vcltzs_f32: + case NEON::BI__builtin_neon_vcltzh_f16: Ops.push_back(EmitScalarExpr(E->getArg(0))); return EmitAArch64CompareBuiltinExpr( Ops[0], ConvertType(E->getCallReturnType(getContext())), @@ -6244,6 +6349,26 @@ Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd"); } + case NEON::BI__builtin_neon_vceqh_f16: + case NEON::BI__builtin_neon_vcleh_f16: + case NEON::BI__builtin_neon_vclth_f16: + case NEON::BI__builtin_neon_vcgeh_f16: + case NEON::BI__builtin_neon_vcgth_f16: { + llvm::CmpInst::Predicate P; + switch (BuiltinID) { + default: llvm_unreachable("missing builtin ID in switch!"); + case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break; + case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break; + case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break; + case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break; + case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break; + } + Ops.push_back(EmitScalarExpr(E->getArg(1))); + Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy); + Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy); + Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); + return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd"); + } case NEON::BI__builtin_neon_vceqd_s64: case NEON::BI__builtin_neon_vceqd_u64: case NEON::BI__builtin_neon_vcgtd_s64: @@ -6381,6 +6506,31 @@ llvm::VectorType::get(DoubleTy, 2)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vgetq_lane"); + case NEON::BI__builtin_neon_vaddh_f16: + Ops.push_back(EmitScalarExpr(E->getArg(1))); + return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh"); + case NEON::BI__builtin_neon_vsubh_f16: + Ops.push_back(EmitScalarExpr(E->getArg(1))); + return Builder.CreateFSub(Ops[0], Ops[1], "vsubh"); + case NEON::BI__builtin_neon_vmulh_f16: + Ops.push_back(EmitScalarExpr(E->getArg(1))); + return Builder.CreateFMul(Ops[0], Ops[1], "vmulh"); + case NEON::BI__builtin_neon_vdivh_f16: + Ops.push_back(EmitScalarExpr(E->getArg(1))); + return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh"); + case NEON::BI__builtin_neon_vfmah_f16: { + Value *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy); + // NEON intrinsic puts accumulator first, unlike the LLVM fma. + return Builder.CreateCall(F, + {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]}); + } + case NEON::BI__builtin_neon_vfmsh_f16: { + Value *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy); + Value *Zero = llvm::ConstantFP::getZeroValueForNegation(HalfTy); + Value* Sub = Builder.CreateFSub(Zero, EmitScalarExpr(E->getArg(1)), "vsubh"); + // NEON intrinsic puts accumulator first, unlike the LLVM fma. + return Builder.CreateCall(F, {Sub, EmitScalarExpr(E->getArg(2)), Ops[0]}); + } case NEON::BI__builtin_neon_vaddd_s64: case NEON::BI__builtin_neon_vaddd_u64: return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd"); @@ -6655,12 +6805,22 @@ Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax; if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax"); + case NEON::BI__builtin_neon_vmaxh_f16: { + Ops.push_back(EmitScalarExpr(E->getArg(1))); + Int = Intrinsic::aarch64_neon_fmax; + return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax"); + } case NEON::BI__builtin_neon_vmin_v: case NEON::BI__builtin_neon_vminq_v: // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin; if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin"); + case NEON::BI__builtin_neon_vminh_f16: { + Ops.push_back(EmitScalarExpr(E->getArg(1))); + Int = Intrinsic::aarch64_neon_fmin; + return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin"); + } case NEON::BI__builtin_neon_vabd_v: case NEON::BI__builtin_neon_vabdq_v: // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. @@ -6699,20 +6859,31 @@ case NEON::BI__builtin_neon_vminnmq_v: Int = Intrinsic::aarch64_neon_fminnm; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm"); + case NEON::BI__builtin_neon_vminnmh_f16: + Ops.push_back(EmitScalarExpr(E->getArg(1))); + Int = Intrinsic::aarch64_neon_fminnm; + return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm"); case NEON::BI__builtin_neon_vmaxnm_v: case NEON::BI__builtin_neon_vmaxnmq_v: Int = Intrinsic::aarch64_neon_fmaxnm; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm"); + case NEON::BI__builtin_neon_vmaxnmh_f16: + Ops.push_back(EmitScalarExpr(E->getArg(1))); + Int = Intrinsic::aarch64_neon_fmaxnm; + return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm"); case NEON::BI__builtin_neon_vrecpss_f32: { Ops.push_back(EmitScalarExpr(E->getArg(1))); return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy), Ops, "vrecps"); } - case NEON::BI__builtin_neon_vrecpsd_f64: { + case NEON::BI__builtin_neon_vrecpsd_f64: Ops.push_back(EmitScalarExpr(E->getArg(1))); return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy), Ops, "vrecps"); - } + case NEON::BI__builtin_neon_vrecpsh_f16: + Ops.push_back(EmitScalarExpr(E->getArg(1))); + return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy), + Ops, "vrecps"); case NEON::BI__builtin_neon_vqshrun_n_v: Int = Intrinsic::aarch64_neon_sqshrun; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n"); @@ -6728,36 +6899,71 @@ case NEON::BI__builtin_neon_vqrshrn_n_v: Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n"); + case NEON::BI__builtin_neon_vrndah_f16: { + Ops.push_back(EmitScalarExpr(E->getArg(0))); + Int = Intrinsic::round; + return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda"); + } case NEON::BI__builtin_neon_vrnda_v: case NEON::BI__builtin_neon_vrndaq_v: { Int = Intrinsic::round; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda"); } + case NEON::BI__builtin_neon_vrndih_f16: { + Ops.push_back(EmitScalarExpr(E->getArg(0))); + Int = Intrinsic::nearbyint; + return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi"); + } case NEON::BI__builtin_neon_vrndi_v: case NEON::BI__builtin_neon_vrndiq_v: { Int = Intrinsic::nearbyint; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi"); } + case NEON::BI__builtin_neon_vrndmh_f16: { + Ops.push_back(EmitScalarExpr(E->getArg(0))); + Int = Intrinsic::floor; + return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm"); + } case NEON::BI__builtin_neon_vrndm_v: case NEON::BI__builtin_neon_vrndmq_v: { Int = Intrinsic::floor; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm"); } + case NEON::BI__builtin_neon_vrndnh_f16: { + Ops.push_back(EmitScalarExpr(E->getArg(0))); + Int = Intrinsic::aarch64_neon_frintn; + return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn"); + } case NEON::BI__builtin_neon_vrndn_v: case NEON::BI__builtin_neon_vrndnq_v: { Int = Intrinsic::aarch64_neon_frintn; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn"); } + case NEON::BI__builtin_neon_vrndph_f16: { + Ops.push_back(EmitScalarExpr(E->getArg(0))); + Int = Intrinsic::ceil; + return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp"); + } case NEON::BI__builtin_neon_vrndp_v: case NEON::BI__builtin_neon_vrndpq_v: { Int = Intrinsic::ceil; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp"); } + case NEON::BI__builtin_neon_vrndxh_f16: { + Ops.push_back(EmitScalarExpr(E->getArg(0))); + Int = Intrinsic::rint; + return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx"); + } case NEON::BI__builtin_neon_vrndx_v: case NEON::BI__builtin_neon_vrndxq_v: { Int = Intrinsic::rint; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx"); } + case NEON::BI__builtin_neon_vrndh_f16: { + Ops.push_back(EmitScalarExpr(E->getArg(0))); + Int = Intrinsic::trunc; + return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz"); + } case NEON::BI__builtin_neon_vrnd_v: case NEON::BI__builtin_neon_vrndq_v: { Int = Intrinsic::trunc; @@ -6906,6 +7112,8 @@ } case NEON::BI__builtin_neon_vnegd_s64: return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd"); + case NEON::BI__builtin_neon_vnegh_f16: + return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh"); case NEON::BI__builtin_neon_vpmaxnm_v: case NEON::BI__builtin_neon_vpmaxnmq_v: { Int = Intrinsic::aarch64_neon_fmaxnmp; @@ -6916,6 +7124,11 @@ Int = Intrinsic::aarch64_neon_fminnmp; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm"); } + case NEON::BI__builtin_neon_vsqrth_f16: { + Ops.push_back(EmitScalarExpr(E->getArg(0))); + Int = Intrinsic::sqrt; + return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt"); + } case NEON::BI__builtin_neon_vsqrt_v: case NEON::BI__builtin_neon_vsqrtq_v: { Int = Intrinsic::sqrt; Index: clang/lib/Headers/CMakeLists.txt =================================================================== --- clang/lib/Headers/CMakeLists.txt +++ clang/lib/Headers/CMakeLists.txt @@ -117,6 +117,9 @@ # Generate arm_neon.h clang_tablegen(arm_neon.h -gen-arm-neon SOURCE ${CLANG_SOURCE_DIR}/include/clang/Basic/arm_neon.td) +# Generate arm_fp16.h +clang_tablegen(arm_fp16.h -gen-arm-fp16 + SOURCE ${CLANG_SOURCE_DIR}/include/clang/Basic/arm_fp16.td) set(out_files) foreach( f ${files} ${cuda_wrapper_files} ) @@ -134,6 +137,11 @@ COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_BINARY_DIR}/arm_neon.h ${output_dir}/arm_neon.h COMMENT "Copying clang's arm_neon.h...") list(APPEND out_files ${output_dir}/arm_neon.h) +add_custom_command(OUTPUT ${output_dir}/arm_fp16.h + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/arm_fp16.h + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_BINARY_DIR}/arm_fp16.h ${output_dir}/arm_fp16.h + COMMENT "Copying clang's arm_fp16.h...") +list(APPEND out_files ${output_dir}/arm_fp16.h) add_custom_target(clang-headers ALL DEPENDS ${out_files}) set_target_properties(clang-headers PROPERTIES FOLDER "Misc") @@ -145,6 +153,12 @@ DESTINATION lib${LLVM_LIBDIR_SUFFIX}/clang/${CLANG_VERSION}/include) install( + FILES ${files} ${CMAKE_CURRENT_BINARY_DIR}/arm_fp16.h + COMPONENT clang-headers + PERMISSIONS OWNER_READ OWNER_WRITE GROUP_READ WORLD_READ + DESTINATION lib${LLVM_LIBDIR_SUFFIX}/clang/${CLANG_VERSION}/include) + +install( FILES ${cuda_wrapper_files} COMPONENT clang-headers PERMISSIONS OWNER_READ OWNER_WRITE GROUP_READ WORLD_READ Index: clang/lib/Headers/module.modulemap =================================================================== --- clang/lib/Headers/module.modulemap +++ clang/lib/Headers/module.modulemap @@ -38,6 +38,7 @@ explicit module neon { requires neon header "arm_neon.h" + header "arm_fp16.h" export * } } Index: clang/lib/Sema/SemaChecking.cpp =================================================================== --- clang/lib/Sema/SemaChecking.cpp +++ clang/lib/Sema/SemaChecking.cpp @@ -1353,6 +1353,7 @@ switch (BuiltinID) { #define GET_NEON_OVERLOAD_CHECK #include "clang/Basic/arm_neon.inc" +#include "clang/Basic/arm_fp16.inc" #undef GET_NEON_OVERLOAD_CHECK } @@ -1404,6 +1405,7 @@ return false; #define GET_NEON_IMMEDIATE_CHECK #include "clang/Basic/arm_neon.inc" +#include "clang/Basic/arm_fp16.inc" #undef GET_NEON_IMMEDIATE_CHECK } Index: clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c @@ -0,0 +1,643 @@ +// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +fullfp16\ +// RUN: -fallow-half-arguments-and-returns -S -disable-O0-optnone -emit-llvm -o - %s \ +// RUN: | opt -S -mem2reg \ +// RUN: | FileCheck %s + +// REQUIRES: aarch64-registered-target + +#include + +// CHECK-LABEL: test_vabsh_f16 +// CHECK: [[ABS:%.*]] = call half @llvm.aarch64.neon.abs.f16(half %a) +// CHECK: ret half [[ABS]] +float16_t test_vabsh_f16(float16_t a) { + return vabsh_f16(a); +} + +// CHECK-LABEL: test_vceqzh_f16 +// CHECK: [[TMP1:%.*]] = fcmp oeq half %a, 0xH0000 +// CHECK: [[TMP2:%.*]] = sext i1 [[TMP1]] to i16 +// CHECK: ret i16 [[TMP2]] +uint16_t test_vceqzh_f16(float16_t a) { + return vceqzh_f16(a); +} + +// CHECK-LABEL: test_vcgezh_f16 +// CHECK: [[TMP1:%.*]] = fcmp oge half %a, 0xH0000 +// CHECK: [[TMP2:%.*]] = sext i1 [[TMP1]] to i16 +// CHECK: ret i16 [[TMP2]] +uint16_t test_vcgezh_f16(float16_t a) { + return vcgezh_f16(a); +} + +// CHECK-LABEL: test_vcgtzh_f16 +// CHECK: [[TMP1:%.*]] = fcmp ogt half %a, 0xH0000 +// CHECK: [[TMP2:%.*]] = sext i1 [[TMP1]] to i16 +// CHECK: ret i16 [[TMP2]] +uint16_t test_vcgtzh_f16(float16_t a) { + return vcgtzh_f16(a); +} + +// CHECK-LABEL: test_vclezh_f16 +// CHECK: [[TMP1:%.*]] = fcmp ole half %a, 0xH0000 +// CHECK: [[TMP2:%.*]] = sext i1 [[TMP1]] to i16 +// CHECK: ret i16 [[TMP2]] +uint16_t test_vclezh_f16(float16_t a) { + return vclezh_f16(a); +} + +// CHECK-LABEL: test_vcltzh_f16 +// CHECK: [[TMP1:%.*]] = fcmp olt half %a, 0xH0000 +// CHECK: [[TMP2:%.*]] = sext i1 [[TMP1]] to i16 +// CHECK: ret i16 [[TMP2]] +uint16_t test_vcltzh_f16(float16_t a) { + return vcltzh_f16(a); +} + +// CHECK-LABEL: test_vcvth_f16_s16 +// CHECK: [[VCVT:%.*]] = sitofp i16 %a to half +// CHECK: ret half [[VCVT]] +float16_t test_vcvth_f16_s16 (int16_t a) { + return vcvth_f16_s16(a); +} + +// CHECK-LABEL: test_vcvth_f16_s32 +// CHECK: [[VCVT:%.*]] = sitofp i32 %a to half +// CHECK: ret half [[VCVT]] +float16_t test_vcvth_f16_s32 (int32_t a) { + return vcvth_f16_s32(a); +} + +// CHECK-LABEL: test_vcvth_f16_s64 +// CHECK: [[VCVT:%.*]] = sitofp i64 %a to half +// CHECK: ret half [[VCVT]] +float16_t test_vcvth_f16_s64 (int64_t a) { + return vcvth_f16_s64(a); +} + +// CHECK-LABEL: test_vcvth_f16_u16 +// CHECK: [[VCVT:%.*]] = uitofp i16 %a to half +// CHECK: ret half [[VCVT]] +float16_t test_vcvth_f16_u16 (uint16_t a) { + return vcvth_f16_u16(a); +} + +// CHECK-LABEL: test_vcvth_f16_u32 +// CHECK: [[VCVT:%.*]] = uitofp i32 %a to half +// CHECK: ret half [[VCVT]] +float16_t test_vcvth_f16_u32 (uint32_t a) { + return vcvth_f16_u32(a); +} + +// CHECK-LABEL: test_vcvth_f16_u64 +// CHECK: [[VCVT:%.*]] = uitofp i64 %a to half +// CHECK: ret half [[VCVT]] +float16_t test_vcvth_f16_u64 (uint64_t a) { + return vcvth_f16_u64(a); +} + +// CHECK-LABEL: test_vcvth_s16_f16 +// CHECK: [[VCVT:%.*]] = fptosi half %a to i16 +// CHECK: ret i16 [[VCVT]] +int16_t test_vcvth_s16_f16 (float16_t a) { + return vcvth_s16_f16(a); +} + +// CHECK-LABEL: test_vcvth_s32_f16 +// CHECK: [[VCVT:%.*]] = fptosi half %a to i32 +// CHECK: ret i32 [[VCVT]] +int32_t test_vcvth_s32_f16 (float16_t a) { + return vcvth_s32_f16(a); +} + +// CHECK-LABEL: test_vcvth_s64_f16 +// CHECK: [[VCVT:%.*]] = fptosi half %a to i64 +// CHECK: ret i64 [[VCVT]] +int64_t test_vcvth_s64_f16 (float16_t a) { + return vcvth_s64_f16(a); +} + +// CHECK-LABEL: test_vcvth_u16_f16 +// CHECK: [[VCVT:%.*]] = fptoui half %a to i16 +// CHECK: ret i16 [[VCVT]] +uint16_t test_vcvth_u16_f16 (float16_t a) { + return vcvth_u16_f16(a); +} + +// CHECK-LABEL: test_vcvth_u32_f16 +// CHECK: [[VCVT:%.*]] = fptoui half %a to i32 +// CHECK: ret i32 [[VCVT]] +uint32_t test_vcvth_u32_f16 (float16_t a) { + return vcvth_u32_f16(a); +} + +// CHECK-LABEL: test_vcvth_u64_f16 +// CHECK: [[VCVT:%.*]] = fptoui half %a to i64 +// CHECK: ret i64 [[VCVT]] +uint64_t test_vcvth_u64_f16 (float16_t a) { + return vcvth_u64_f16(a); +} + +// CHECK-LABEL: test_vcvtah_s16_f16 +// CHECK: [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtas.i16.f16(half %a) +// CHECK: ret i16 [[VCVT]] +int16_t test_vcvtah_s16_f16 (float16_t a) { + return vcvtah_s16_f16(a); +} + +// CHECK-LABEL: test_vcvtah_s32_f16 +// CHECK: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtas.i32.f16(half %a) +// CHECK: ret i32 [[VCVT]] +int32_t test_vcvtah_s32_f16 (float16_t a) { + return vcvtah_s32_f16(a); +} + +// CHECK-LABEL: test_vcvtah_s64_f16 +// CHECK: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtas.i64.f16(half %a) +// CHECK: ret i64 [[VCVT]] +int64_t test_vcvtah_s64_f16 (float16_t a) { + return vcvtah_s64_f16(a); +} + +// CHECK-LABEL: test_vcvtah_u16_f16 +// CHECK: [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtau.i16.f16(half %a) +// CHECK: ret i16 [[VCVT]] +uint16_t test_vcvtah_u16_f16 (float16_t a) { + return vcvtah_u16_f16(a); +} + +// CHECK-LABEL: test_vcvtah_u32_f16 +// CHECK: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtau.i32.f16(half %a) +// CHECK: ret i32 [[VCVT]] +uint32_t test_vcvtah_u32_f16 (float16_t a) { + return vcvtah_u32_f16(a); +} + +// CHECK-LABEL: test_vcvtah_u64_f16 +// CHECK: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtau.i64.f16(half %a) +// CHECK: ret i64 [[VCVT]] +uint64_t test_vcvtah_u64_f16 (float16_t a) { + return vcvtah_u64_f16(a); +} + +// CHECK-LABEL: test_vcvtmh_s16_f16 +// CHECK: [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtms.i16.f16(half %a) +// CHECK: ret i16 [[VCVT]] +int16_t test_vcvtmh_s16_f16 (float16_t a) { + return vcvtmh_s16_f16(a); +} + +// CHECK-LABEL: test_vcvtmh_s32_f16 +// CHECK: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtms.i32.f16(half %a) +// CHECK: ret i32 [[VCVT]] +int32_t test_vcvtmh_s32_f16 (float16_t a) { + return vcvtmh_s32_f16(a); +} + +// CHECK-LABEL: test_vcvtmh_s64_f16 +// CHECK: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtms.i64.f16(half %a) +// CHECK: ret i64 [[VCVT]] +int64_t test_vcvtmh_s64_f16 (float16_t a) { + return vcvtmh_s64_f16(a); +} + +// CHECK-LABEL: test_vcvtmh_u16_f16 +// CHECK: [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtmu.i16.f16(half %a) +// CHECK: ret i16 [[VCVT]] +uint16_t test_vcvtmh_u16_f16 (float16_t a) { + return vcvtmh_u16_f16(a); +} + +// CHECK-LABEL: test_vcvtmh_u32_f16 +// CHECK: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtmu.i32.f16(half %a) +// CHECK: ret i32 [[VCVT]] +uint32_t test_vcvtmh_u32_f16 (float16_t a) { + return vcvtmh_u32_f16(a); +} + +// CHECK-LABEL: test_vcvtmh_u64_f16 +// CHECK: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtmu.i64.f16(half %a) +// CHECK: ret i64 [[VCVT]] +uint64_t test_vcvtmh_u64_f16 (float16_t a) { + return vcvtmh_u64_f16(a); +} + +// CHECK-LABEL: test_vcvtnh_s16_f16 +// CHECK: [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtns.i16.f16(half %a) +// CHECK: ret i16 [[VCVT]] +int16_t test_vcvtnh_s16_f16 (float16_t a) { + return vcvtnh_s16_f16(a); +} + +// CHECK-LABEL: test_vcvtnh_s32_f16 +// CHECK: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtns.i32.f16(half %a) +// CHECK: ret i32 [[VCVT]] +int32_t test_vcvtnh_s32_f16 (float16_t a) { + return vcvtnh_s32_f16(a); +} + +// CHECK-LABEL: test_vcvtnh_s64_f16 +// CHECK: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtns.i64.f16(half %a) +// CHECK: ret i64 [[VCVT]] +int64_t test_vcvtnh_s64_f16 (float16_t a) { + return vcvtnh_s64_f16(a); +} + +// CHECK-LABEL: test_vcvtnh_u16_f16 +// CHECK: [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtnu.i16.f16(half %a) +// CHECK: ret i16 [[VCVT]] +uint16_t test_vcvtnh_u16_f16 (float16_t a) { + return vcvtnh_u16_f16(a); +} + +// CHECK-LABEL: test_vcvtnh_u32_f16 +// CHECK: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtnu.i32.f16(half %a) +// CHECK: ret i32 [[VCVT]] +uint32_t test_vcvtnh_u32_f16 (float16_t a) { + return vcvtnh_u32_f16(a); +} + +// CHECK-LABEL: test_vcvtnh_u64_f16 +// CHECK: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtnu.i64.f16(half %a) +// CHECK: ret i64 [[VCVT]] +uint64_t test_vcvtnh_u64_f16 (float16_t a) { + return vcvtnh_u64_f16(a); +} + +// CHECK-LABEL: test_vcvtph_s16_f16 +// CHECK: [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtps.i16.f16(half %a) +// CHECK: ret i16 [[VCVT]] +int16_t test_vcvtph_s16_f16 (float16_t a) { + return vcvtph_s16_f16(a); +} + +// CHECK-LABEL: test_vcvtph_s32_f16 +// CHECK: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtps.i32.f16(half %a) +// CHECK: ret i32 [[VCVT]] +int32_t test_vcvtph_s32_f16 (float16_t a) { + return vcvtph_s32_f16(a); +} + +// CHECK-LABEL: test_vcvtph_s64_f16 +// CHECK: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtps.i64.f16(half %a) +// CHECK: ret i64 [[VCVT]] +int64_t test_vcvtph_s64_f16 (float16_t a) { + return vcvtph_s64_f16(a); +} + +// CHECK-LABEL: test_vcvtph_u16_f16 +// CHECK: [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtpu.i16.f16(half %a) +// CHECK: ret i16 [[VCVT]] +uint16_t test_vcvtph_u16_f16 (float16_t a) { + return vcvtph_u16_f16(a); +} + +// CHECK-LABEL: test_vcvtph_u32_f16 +// CHECK: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtpu.i32.f16(half %a) +// CHECK: ret i32 [[VCVT]] +uint32_t test_vcvtph_u32_f16 (float16_t a) { + return vcvtph_u32_f16(a); +} + +// CHECK-LABEL: test_vcvtph_u64_f16 +// CHECK: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtpu.i64.f16(half %a) +// CHECK: ret i64 [[VCVT]] +uint64_t test_vcvtph_u64_f16 (float16_t a) { + return vcvtph_u64_f16(a); +} + +// CHECK-LABEL: test_vnegh_f16 +// CHECK: [[NEG:%.*]] = fsub half 0xH8000, %a +// CHECK: ret half [[NEG]] +float16_t test_vnegh_f16(float16_t a) { + return vnegh_f16(a); +} + +// CHECK-LABEL: test_vrecpeh_f16 +// CHECK: [[VREC:%.*]] = call half @llvm.aarch64.neon.frecpe.f16(half %a) +// CHECK: ret half [[VREC]] +float16_t test_vrecpeh_f16(float16_t a) { + return vrecpeh_f16(a); +} + +// CHECK-LABEL: test_vrecpxh_f16 +// CHECK: [[VREC:%.*]] = call half @llvm.aarch64.neon.frecpx.f16(half %a) +// CHECK: ret half [[VREC]] +float16_t test_vrecpxh_f16(float16_t a) { + return vrecpxh_f16(a); +} + +// CHECK-LABEL: test_vrndh_f16 +// CHECK: [[RND:%.*]] = call half @llvm.trunc.f16(half %a) +// CHECK: ret half [[RND]] +float16_t test_vrndh_f16(float16_t a) { + return vrndh_f16(a); +} + +// CHECK-LABEL: test_vrndah_f16 +// CHECK: [[RND:%.*]] = call half @llvm.round.f16(half %a) +// CHECK: ret half [[RND]] +float16_t test_vrndah_f16(float16_t a) { + return vrndah_f16(a); +} + +// CHECK-LABEL: test_vrndih_f16 +// CHECK: [[RND:%.*]] = call half @llvm.nearbyint.f16(half %a) +// CHECK: ret half [[RND]] +float16_t test_vrndih_f16(float16_t a) { + return vrndih_f16(a); +} + +// CHECK-LABEL: test_vrndmh_f16 +// CHECK: [[RND:%.*]] = call half @llvm.floor.f16(half %a) +// CHECK: ret half [[RND]] +float16_t test_vrndmh_f16(float16_t a) { + return vrndmh_f16(a); +} + +// CHECK-LABEL: test_vrndnh_f16 +// CHECK: [[RND:%.*]] = call half @llvm.aarch64.neon.frintn.f16(half %a) +// CHECK: ret half [[RND]] +float16_t test_vrndnh_f16(float16_t a) { + return vrndnh_f16(a); +} + +// CHECK-LABEL: test_vrndph_f16 +// CHECK: [[RND:%.*]] = call half @llvm.ceil.f16(half %a) +// CHECK: ret half [[RND]] +float16_t test_vrndph_f16(float16_t a) { + return vrndph_f16(a); +} + +// CHECK-LABEL: test_vrndxh_f16 +// CHECK: [[RND:%.*]] = call half @llvm.rint.f16(half %a) +// CHECK: ret half [[RND]] +float16_t test_vrndxh_f16(float16_t a) { + return vrndxh_f16(a); +} + +// CHECK-LABEL: test_vrsqrteh_f16 +// CHECK: [[RND:%.*]] = call half @llvm.aarch64.neon.frsqrte.f16(half %a) +// CHECK: ret half [[RND]] +float16_t test_vrsqrteh_f16(float16_t a) { + return vrsqrteh_f16(a); +} + +// CHECK-LABEL: test_vsqrth_f16 +// CHECK: [[SQR:%.*]] = call half @llvm.sqrt.f16(half %a) +// CHECK: ret half [[SQR]] +float16_t test_vsqrth_f16(float16_t a) { + return vsqrth_f16(a); +} + +// CHECK-LABEL: test_vaddh_f16 +// CHECK: [[ADD:%.*]] = fadd half %a, %b +// CHECK: ret half [[ADD]] +float16_t test_vaddh_f16(float16_t a, float16_t b) { + return vaddh_f16(a, b); +} + +// CHECK-LABEL: test_vabdh_f16 +// CHECK: [[ABD:%.*]] = call half @llvm.aarch64.sisd.fabd.f16(half %a, half %b) +// CHECK: ret half [[ABD]] +float16_t test_vabdh_f16(float16_t a, float16_t b) { + return vabdh_f16(a, b); +} + +// CHECK-LABEL: test_vcageh_f16 +// CHECK: [[ABS:%.*]] = call i16 @llvm.aarch64.neon.facge.i16.f16(half %a, half %b) +// CHECK: ret i16 [[ABS]] +uint16_t test_vcageh_f16(float16_t a, float16_t b) { + return vcageh_f16(a, b); +} + +// CHECK-LABEL: test_vcagth_f16 +// CHECK: [[ABS:%.*]] = call i16 @llvm.aarch64.neon.facgt.i16.f16(half %a, half %b) +// CHECK: ret i16 [[ABS]] +uint16_t test_vcagth_f16(float16_t a, float16_t b) { + return vcagth_f16(a, b); +} + +// CHECK-LABEL: test_vcaleh_f16 +// CHECK: [[ABS:%.*]] = call i16 @llvm.aarch64.neon.facge.i16.f16(half %a, half %b) +// CHECK: ret i16 [[ABS]] +uint16_t test_vcaleh_f16(float16_t a, float16_t b) { + return vcaleh_f16(a, b); +} + +// CHECK-LABEL: test_vcalth_f16 +// CHECK: [[ABS:%.*]] = call i16 @llvm.aarch64.neon.facgt.i16.f16(half %a, half %b) +// CHECK: ret i16 [[ABS]] +uint16_t test_vcalth_f16(float16_t a, float16_t b) { + return vcalth_f16(a, b); +} + +// CHECK-LABEL: test_vceqh_f16 +// CHECK: [[TMP1:%.*]] = fcmp oeq half %a, %b +// CHECK: [[TMP2:%.*]] = sext i1 [[TMP1]] to i16 +// CHECK: ret i16 [[TMP2]] +uint16_t test_vceqh_f16(float16_t a, float16_t b) { + return vceqh_f16(a, b); +} + +// CHECK-LABEL: test_vcgeh_f16 +// CHECK: [[TMP1:%.*]] = fcmp oge half %a, %b +// CHECK: [[TMP2:%.*]] = sext i1 [[TMP1]] to i16 +// CHECK: ret i16 [[TMP2]] +uint16_t test_vcgeh_f16(float16_t a, float16_t b) { + return vcgeh_f16(a, b); +} + +// CHECK-LABEL: test_vcgth_f16 +//CHECK: [[TMP1:%.*]] = fcmp ogt half %a, %b +// CHECK: [[TMP2:%.*]] = sext i1 [[TMP1]] to i16 +// CHECK: ret i16 [[TMP2]] +uint16_t test_vcgth_f16(float16_t a, float16_t b) { + return vcgth_f16(a, b); +} + +// CHECK-LABEL: test_vcleh_f16 +// CHECK: [[TMP1:%.*]] = fcmp ole half %a, %b +// CHECK: [[TMP2:%.*]] = sext i1 [[TMP1]] to i16 +// CHECK: ret i16 [[TMP2]] +uint16_t test_vcleh_f16(float16_t a, float16_t b) { + return vcleh_f16(a, b); +} + +// CHECK-LABEL: test_vclth_f16 +// CHECK: [[TMP1:%.*]] = fcmp olt half %a, %b +// CHECK: [[TMP2:%.*]] = sext i1 [[TMP1]] to i16 +// CHECK: ret i16 [[TMP2]] +uint16_t test_vclth_f16(float16_t a, float16_t b) { + return vclth_f16(a, b); +} + +// CHECK-LABEL: test_vcvth_n_f16_s16 +// CHECK: [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i16(i16 %a, i32 0) +// CHECK: ret half [[CVT]] +float16_t test_vcvth_n_f16_s16(int16_t a) { + return vcvth_n_f16_s16(a, 0); +} + +// CHECK-LABEL: test_vcvth_n_f16_s32 +// CHECK: [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 %a, i32 0) +// CHECK: ret half [[CVT]] +float16_t test_vcvth_n_f16_s32(int32_t a) { + return vcvth_n_f16_s32(a, 0); +} + +// CHECK-LABEL: test_vcvth_n_f16_s64 +// CHECK: [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i64(i64 %a, i32 0) +// CHECK: ret half [[CVT]] +float16_t test_vcvth_n_f16_s64(int64_t a) { + return vcvth_n_f16_s64(a, 0); +} + +// CHECK-LABEL: test_vcvth_n_s16_f16 +// CHECK: [[CVT:%.*]] = call i16 @llvm.aarch64.neon.vcvtfp2fxs.i16.f16(half %a, i32 0) +// CHECK: ret i16 [[CVT]] +int16_t test_vcvth_n_s16_f16(float16_t a) { + return vcvth_n_s16_f16(a, 0); +} + +// CHECK-LABEL: test_vcvth_n_s32_f16 +// CHECK: [[CVT:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f16(half %a, i32 0) +// CHECK: ret i32 [[CVT]] +int32_t test_vcvth_n_s32_f16(float16_t a) { + return vcvth_n_s32_f16(a, 0); +} + +// CHECK-LABEL: test_vcvth_n_s64_f16 +// CHECK: [[CVT:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxs.i64.f16(half %a, i32 0) +// CHECK: ret i64 [[CVT]] +int64_t test_vcvth_n_s64_f16(float16_t a) { + return vcvth_n_s64_f16(a, 0); +} + +// CHECK-LABEL: test_vcvth_n_f16_u16 +// CHECK: [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i16(i16 %a, i32 0) +// CHECK: ret half [[CVT]] +float16_t test_vcvth_n_f16_u16(int16_t a) { + return vcvth_n_f16_u16(a, 0); +} + +// CHECK-LABEL: test_vcvth_n_f16_u32 +// CHECK: [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32 %a, i32 0) +// CHECK: ret half [[CVT]] +float16_t test_vcvth_n_f16_u32(int32_t a) { + return vcvth_n_f16_u32(a, 0); +} + +// CHECK-LABEL: test_vcvth_n_f16_u64 +// CHECK: [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i64(i64 %a, i32 0) +// CHECK: ret half [[CVT]] +float16_t test_vcvth_n_f16_u64(int64_t a) { + return vcvth_n_f16_u64(a, 0); +} + +// CHECK-LABEL: test_vcvth_n_u16_f16 +// CHECK: [[CVT:%.*]] = call i16 @llvm.aarch64.neon.vcvtfp2fxu.i16.f16(half %a, i32 0) +// CHECK: ret i16 [[CVT]] +int16_t test_vcvth_n_u16_f16(float16_t a) { + return vcvth_n_u16_f16(a, 0); +} + +// CHECK-LABEL: test_vcvth_n_u32_f16 +// CHECK: [[CVT:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f16(half %a, i32 0) +// CHECK: ret i32 [[CVT]] +int32_t test_vcvth_n_u32_f16(float16_t a) { + return vcvth_n_u32_f16(a, 0); +} + +// CHECK-LABEL: test_vcvth_n_u64_f16 +// CHECK: [[CVT:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxu.i64.f16(half %a, i32 0) +// CHECK: ret i64 [[CVT]] +int64_t test_vcvth_n_u64_f16(float16_t a) { + return vcvth_n_u64_f16(a, 0); +} + +// CHECK-LABEL: test_vdivh_f16 +// CHECK: [[DIV:%.*]] = fdiv half %a, %b +// CHECK: ret half [[DIV]] +float16_t test_vdivh_f16(float16_t a, float16_t b) { + return vdivh_f16(a, b); +} + +// CHECK-LABEL: test_vmaxh_f16 +// CHECK: [[MAX:%.*]] = call half @llvm.aarch64.neon.fmax.f16(half %a, half %b) +// CHECK: ret half [[MAX]] +float16_t test_vmaxh_f16(float16_t a, float16_t b) { + return vmaxh_f16(a, b); +} + +// CHECK-LABEL: test_vmaxnmh_f16 +// CHECK: [[MAX:%.*]] = call half @llvm.aarch64.neon.fmaxnm.f16(half %a, half %b) +// CHECK: ret half [[MAX]] +float16_t test_vmaxnmh_f16(float16_t a, float16_t b) { + return vmaxnmh_f16(a, b); +} + +// CHECK-LABEL: test_vminh_f16 +// CHECK: [[MIN:%.*]] = call half @llvm.aarch64.neon.fmin.f16(half %a, half %b) +// CHECK: ret half [[MIN]] +float16_t test_vminh_f16(float16_t a, float16_t b) { + return vminh_f16(a, b); +} + +// CHECK-LABEL: test_vminnmh_f16 +// CHECK: [[MIN:%.*]] = call half @llvm.aarch64.neon.fminnm.f16(half %a, half %b) +// CHECK: ret half [[MIN]] +float16_t test_vminnmh_f16(float16_t a, float16_t b) { + return vminnmh_f16(a, b); +} + +// CHECK-LABEL: test_vmulh_f16 +// CHECK: [[MUL:%.*]] = fmul half %a, %b +// CHECK: ret half [[MUL]] +float16_t test_vmulh_f16(float16_t a, float16_t b) { + return vmulh_f16(a, b); +} + +// CHECK-LABEL: test_vmulxh_f16 +// CHECK: [[MUL:%.*]] = call half @llvm.aarch64.neon.fmulx.f16(half %a, half %b) +// CHECK: ret half [[MUL]] +float16_t test_vmulxh_f16(float16_t a, float16_t b) { + return vmulxh_f16(a, b); +} + +// CHECK-LABEL: test_vrecpsh_f16 +// CHECK: [[RECPS:%.*]] = call half @llvm.aarch64.neon.frecps.f16(half %a, half %b) +// CHECK: ret half [[RECPS]] +float16_t test_vrecpsh_f16(float16_t a, float16_t b) { + return vrecpsh_f16(a, b); +} + +// CHECK-LABEL: test_vrsqrtsh_f16 +// CHECK: [[RSQRTS:%.*]] = call half @llvm.aarch64.neon.frsqrts.f16(half %a, half %b) +// CHECK: ret half [[RSQRTS]] +float16_t test_vrsqrtsh_f16(float16_t a, float16_t b) { + return vrsqrtsh_f16(a, b); +} + +// CHECK-LABEL: test_vsubh_f16 +// CHECK: [[SUB:%.*]] = fsub half %a, %b +// CHECK: ret half [[SUB]] +float16_t test_vsubh_f16(float16_t a, float16_t b) { + return vsubh_f16(a, b); +} + +// CHECK-LABEL: test_vfmah_f16 +// CHECK: [[FMA:%.*]] = call half @llvm.fma.f16(half %b, half %c, half %a) +// CHECK: ret half [[FMA]] +float16_t test_vfmah_f16(float16_t a, float16_t b, float16_t c) { + return vfmah_f16(a, b, c); +} + +// CHECK-LABEL: test_vfmsh_f16 +// CHECK: [[SUB:%.*]] = fsub half 0xH8000, %b +// CHECK: [[ADD:%.*]] = call half @llvm.fma.f16(half [[SUB]], half %c, half %a) +// CHECK: ret half [[ADD]] +float16_t test_vfmsh_f16(float16_t a, float16_t b, float16_t c) { + return vfmsh_f16(a, b, c); +} + Index: clang/utils/TableGen/NeonEmitter.cpp =================================================================== --- clang/utils/TableGen/NeonEmitter.cpp +++ clang/utils/TableGen/NeonEmitter.cpp @@ -552,7 +552,11 @@ // run - Emit arm_neon.h.inc void run(raw_ostream &o); + // runFP16 - Emit arm_fp16.h.inc + void runFP16(raw_ostream &o); + // runHeader - Emit all the __builtin prototypes used in arm_neon.h + // and arm_fp16.h void runHeader(raw_ostream &o); // runTests - Emit tests for all the Neon intrinsics. @@ -852,6 +856,35 @@ NumVectors = 0; Float = true; break; + case 'Y': + Bitwidth = ElementBitwidth = 16; + NumVectors = 0; + Float = true; + break; + case 'I': + Bitwidth = ElementBitwidth = 32; + NumVectors = 0; + Float = false; + Signed = true; + break; + case 'L': + Bitwidth = ElementBitwidth = 64; + NumVectors = 0; + Float = false; + Signed = true; + break; + case 'U': + Bitwidth = ElementBitwidth = 32; + NumVectors = 0; + Float = false; + Signed = false; + break; + case 'O': + Bitwidth = ElementBitwidth = 64; + NumVectors = 0; + Float = false; + Signed = false; + break; case 'f': Float = true; ElementBitwidth = 32; @@ -1010,7 +1043,7 @@ } static bool isFloatingPointProtoModifier(char Mod) { - return Mod == 'F' || Mod == 'f' || Mod == 'H'; + return Mod == 'F' || Mod == 'f' || Mod == 'H' || Mod == 'Y' || Mod == 'I'; } std::string Intrinsic::getBuiltinTypeStr() { @@ -2420,12 +2453,128 @@ OS << "#endif /* __ARM_NEON_H */\n"; } +/// run - Read the records in arm_fp16.td and output arm_fp16.h. arm_fp16.h +/// is comprised of type definitions and function declarations. +void NeonEmitter::runFP16(raw_ostream &OS) { + OS << "/*===---- arm_fp16.h - ARM FP16 intrinsics " + "------------------------------" + "---===\n" + " *\n" + " * Permission is hereby granted, free of charge, to any person " + "obtaining " + "a copy\n" + " * of this software and associated documentation files (the " + "\"Software\")," + " to deal\n" + " * in the Software without restriction, including without limitation " + "the " + "rights\n" + " * to use, copy, modify, merge, publish, distribute, sublicense, " + "and/or sell\n" + " * copies of the Software, and to permit persons to whom the Software " + "is\n" + " * furnished to do so, subject to the following conditions:\n" + " *\n" + " * The above copyright notice and this permission notice shall be " + "included in\n" + " * all copies or substantial portions of the Software.\n" + " *\n" + " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, " + "EXPRESS OR\n" + " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF " + "MERCHANTABILITY,\n" + " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT " + "SHALL THE\n" + " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR " + "OTHER\n" + " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, " + "ARISING FROM,\n" + " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER " + "DEALINGS IN\n" + " * THE SOFTWARE.\n" + " *\n" + " *===-----------------------------------------------------------------" + "---" + "---===\n" + " */\n\n"; + + OS << "#ifndef __ARM_FP16_H\n"; + OS << "#define __ARM_FP16_H\n\n"; + + OS << "#include \n\n"; + + OS << "typedef __fp16 float16_t;\n"; + + OS << "#define __ai static inline __attribute__((__always_inline__, " + "__nodebug__))\n\n"; + + SmallVector Defs; + std::vector RV = Records.getAllDerivedDefinitions("Inst"); + for (auto *R : RV) + createIntrinsic(R, Defs); + + for (auto *I : Defs) + I->indexBody(); + + std::stable_sort( + Defs.begin(), Defs.end(), + [](const Intrinsic *A, const Intrinsic *B) { return *A < *B; }); + + // Only emit a def when its requirements have been met. + // FIXME: This loop could be made faster, but it's fast enough for now. + bool MadeProgress = true; + std::string InGuard; + while (!Defs.empty() && MadeProgress) { + MadeProgress = false; + + for (SmallVector::iterator I = Defs.begin(); + I != Defs.end(); /*No step*/) { + bool DependenciesSatisfied = true; + for (auto *II : (*I)->getDependencies()) { + if (std::find(Defs.begin(), Defs.end(), II) != Defs.end()) + DependenciesSatisfied = false; + } + if (!DependenciesSatisfied) { + // Try the next one. + ++I; + continue; + } + + // Emit #endif/#if pair if needed. + if ((*I)->getGuard() != InGuard) { + if (!InGuard.empty()) + OS << "#endif\n"; + InGuard = (*I)->getGuard(); + if (!InGuard.empty()) + OS << "#if " << InGuard << "\n"; + } + + // Actually generate the intrinsic code. + OS << (*I)->generate(); + + MadeProgress = true; + I = Defs.erase(I); + } + } + assert(Defs.empty() && "Some requirements were not satisfied!"); + if (!InGuard.empty()) + OS << "#endif\n"; + + OS << "\n"; + OS << "#undef __ai\n\n"; + OS << "#endif /* __ARM_FP16_H */\n"; +} + namespace clang { void EmitNeon(RecordKeeper &Records, raw_ostream &OS) { NeonEmitter(Records).run(OS); } +void EmitFP16(RecordKeeper &Records, raw_ostream &OS) { + NeonEmitter(Records).runFP16(OS); +} + void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) { NeonEmitter(Records).runHeader(OS); } Index: clang/utils/TableGen/TableGen.cpp =================================================================== --- clang/utils/TableGen/TableGen.cpp +++ clang/utils/TableGen/TableGen.cpp @@ -52,6 +52,7 @@ GenClangCommentCommandInfo, GenClangCommentCommandList, GenArmNeon, + GenArmFP16, GenArmNeonSema, GenArmNeonTest, GenAttrDocs, @@ -139,6 +140,7 @@ "Generate list of commands that are used in " "documentation comments"), clEnumValN(GenArmNeon, "gen-arm-neon", "Generate arm_neon.h for clang"), + clEnumValN(GenArmFP16, "gen-arm-fp16", "Generate arm_fp16.h for clang"), clEnumValN(GenArmNeonSema, "gen-arm-neon-sema", "Generate ARM NEON sema support for clang"), clEnumValN(GenArmNeonTest, "gen-arm-neon-test", @@ -250,6 +252,9 @@ case GenArmNeon: EmitNeon(Records, OS); break; + case GenArmFP16: + EmitFP16(Records, OS); + break; case GenArmNeonSema: EmitNeonSema(Records, OS); break; Index: clang/utils/TableGen/TableGenBackends.h =================================================================== --- clang/utils/TableGen/TableGenBackends.h +++ clang/utils/TableGen/TableGenBackends.h @@ -65,6 +65,7 @@ void EmitClangCommentCommandList(RecordKeeper &Records, raw_ostream &OS); void EmitNeon(RecordKeeper &Records, raw_ostream &OS); +void EmitFP16(RecordKeeper &Records, raw_ostream &OS); void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS); void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS); void EmitNeon2(RecordKeeper &Records, raw_ostream &OS); Index: llvm/include/llvm/IR/IntrinsicsAArch64.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsAArch64.td +++ llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -146,6 +146,9 @@ class AdvSIMD_CvtFPToFx_Intrinsic : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem]>; + + class AdvSIMD_1Arg_Intrinsic + : Intrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrNoMem]>; } // Arithmetic ops @@ -244,7 +247,7 @@ // Vector Max def int_aarch64_neon_smax : AdvSIMD_2VectorArg_Intrinsic; def int_aarch64_neon_umax : AdvSIMD_2VectorArg_Intrinsic; - def int_aarch64_neon_fmax : AdvSIMD_2VectorArg_Intrinsic; + def int_aarch64_neon_fmax : AdvSIMD_2FloatArg_Intrinsic; def int_aarch64_neon_fmaxnmp : AdvSIMD_2VectorArg_Intrinsic; // Vector Max Across Lanes @@ -256,7 +259,7 @@ // Vector Min def int_aarch64_neon_smin : AdvSIMD_2VectorArg_Intrinsic; def int_aarch64_neon_umin : AdvSIMD_2VectorArg_Intrinsic; - def int_aarch64_neon_fmin : AdvSIMD_2VectorArg_Intrinsic; + def int_aarch64_neon_fmin : AdvSIMD_2FloatArg_Intrinsic; def int_aarch64_neon_fminnmp : AdvSIMD_2VectorArg_Intrinsic; // Vector Min/Max Number @@ -354,7 +357,8 @@ def int_aarch64_neon_sqxtun : AdvSIMD_1VectorArg_Narrow_Intrinsic; // Vector Absolute Value - def int_aarch64_neon_abs : AdvSIMD_1IntArg_Intrinsic; + //def int_aarch64_neon_abs : AdvSIMD_1IntArg_Intrinsic; + def int_aarch64_neon_abs : AdvSIMD_1Arg_Intrinsic; // Vector Saturating Absolute Value def int_aarch64_neon_sqabs : AdvSIMD_1IntArg_Intrinsic;