Index: include/clang/Basic/arm_neon.td =================================================================== --- include/clang/Basic/arm_neon.td +++ include/clang/Basic/arm_neon.td @@ -155,6 +155,7 @@ // i: constant int // l: constant uint64 // s: scalar of element type +// r: scalar of double width element type // a: scalar of element type (splat to vector type) // k: default elt width, double num elts // #: array of default vectors @@ -630,6 +631,15 @@ def VQDMLSL_HIGH : SOpInst<"vqdmlsl_high", "wwkk", "si", OP_QDMLSLHi>; //////////////////////////////////////////////////////////////////////////////// +// Across vectors class +def VADDLV : SInst<"vaddlv", "rd", "csiUcUsUiQcQsQiQUcQUsQUi">; +def VMAXV : SInst<"vmaxv", "sd", "csiUcUsUiQcQsQiQUcQUsQUiQf">; +def VMINV : SInst<"vminv", "sd", "csiUcUsUiQcQsQiQUcQUsQUiQf">; +def VADDV : SInst<"vaddv", "sd", "csiUcUsUiQcQsQiQUcQUsQUi">; +def FMAXNMV : SInst<"vmaxnmv", "sd", "Qf">; +def FMINNMV : SInst<"vminnmv", "sd", "Qf">; + +//////////////////////////////////////////////////////////////////////////////// // Scalar Arithmetic // Scalar Addition Index: lib/CodeGen/CGBuiltin.cpp =================================================================== --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -1715,209 +1715,245 @@ static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E) { - NeonTypeFlags::EltType ET; - bool usgn; unsigned int Int = 0; - bool OverloadInt = true; + // Scalar result generated across vectors + bool AcrossVec = false; + // Extend element of one-element vector + bool ExtendEle = false; + bool OverloadInt = false; const char *s = NULL; - SmallVector Ops; - for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { - Ops.push_back(CGF.EmitScalarExpr(E->getArg(i))); - } - // AArch64 scalar builtins are not overloaded, they do not have an extra // argument that specifies the vector type, need to handle each case. switch (BuiltinID) { default: break; // Scalar Add case AArch64::BI__builtin_neon_vaddd_s64: - ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vaddds; - s = "vaddds"; usgn = false; OverloadInt = false; break; + Int = Intrinsic::aarch64_neon_vaddds; + s = "vaddds"; break; case AArch64::BI__builtin_neon_vaddd_u64: - ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vadddu; - s = "vadddu"; usgn = true; OverloadInt = false; break; + Int = Intrinsic::aarch64_neon_vadddu; + s = "vadddu"; break; // Scalar Sub case AArch64::BI__builtin_neon_vsubd_s64: - ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vsubds; - s = "vsubds"; usgn = false; OverloadInt = false; break; + Int = Intrinsic::aarch64_neon_vsubds; + s = "vsubds"; break; case AArch64::BI__builtin_neon_vsubd_u64: - ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vsubdu; - s = "vsubdu"; usgn = true; OverloadInt = false; break; + Int = Intrinsic::aarch64_neon_vsubdu; + s = "vsubdu"; break; // Scalar Saturating Add case AArch64::BI__builtin_neon_vqaddb_s8: - ET = NeonTypeFlags::Int8; Int = Intrinsic::aarch64_neon_vqadds; - s = "vqadds"; usgn = false; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqaddh_s16: - ET = NeonTypeFlags::Int16; Int = Intrinsic::aarch64_neon_vqadds; - s = "vqadds"; usgn = false; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqadds_s32: - ET = NeonTypeFlags::Int32; Int = Intrinsic::aarch64_neon_vqadds; - s = "vqadds"; usgn = false; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqaddd_s64: - ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vqadds; - s = "vqadds"; usgn = false; OverloadInt = true; break; + Int = Intrinsic::aarch64_neon_vqadds; + s = "vqadds"; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqaddb_u8: - ET = NeonTypeFlags::Int8; Int = Intrinsic::aarch64_neon_vqaddu; - s = "vqaddu"; usgn = true; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqaddh_u16: - ET = NeonTypeFlags::Int16; Int = Intrinsic::aarch64_neon_vqaddu; - s = "vqaddu"; usgn = true; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqadds_u32: - ET = NeonTypeFlags::Int32; Int = Intrinsic::aarch64_neon_vqaddu; - s = "vqaddu"; usgn = true; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqaddd_u64: - ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vqaddu; - s = "vqaddu"; usgn = true; OverloadInt = true; break; + Int = Intrinsic::aarch64_neon_vqaddu; + s = "vqaddu"; OverloadInt = true; break; // Scalar Saturating Sub case AArch64::BI__builtin_neon_vqsubb_s8: - ET = NeonTypeFlags::Int8; Int = Intrinsic::aarch64_neon_vqsubs; - s = "vqsubs"; usgn = false; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqsubh_s16: - ET = NeonTypeFlags::Int16; Int = Intrinsic::aarch64_neon_vqsubs; - s = "vqsubs"; usgn = false; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqsubs_s32: - ET = NeonTypeFlags::Int32; Int = Intrinsic::aarch64_neon_vqsubs; - s = "vqsubs"; usgn = false; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqsubd_s64: - ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vqsubs; - s = "vqsubs"; usgn = false; OverloadInt = true; break; + Int = Intrinsic::aarch64_neon_vqsubs; + s = "vqsubs"; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqsubb_u8: - ET = NeonTypeFlags::Int8; Int = Intrinsic::aarch64_neon_vqsubu; - s = "vqsubu"; usgn = true; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqsubh_u16: - ET = NeonTypeFlags::Int16; Int = Intrinsic::aarch64_neon_vqsubu; - s = "vqsubu"; usgn = true; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqsubs_u32: - ET = NeonTypeFlags::Int32; Int = Intrinsic::aarch64_neon_vqsubu; - s = "vqsubu"; usgn = true; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqsubd_u64: - ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vqsubu; - s = "vqsubu"; usgn = true; OverloadInt = true; break; + Int = Intrinsic::aarch64_neon_vqsubu; + s = "vqsubu"; OverloadInt = true; break; // Scalar Shift Left case AArch64::BI__builtin_neon_vshld_s64: - ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vshlds; - s = "vshlds"; usgn = false; OverloadInt=false; break; + Int = Intrinsic::aarch64_neon_vshlds; + s = "vshlds"; break; case AArch64::BI__builtin_neon_vshld_u64: - ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vshldu; - s = "vshldu"; usgn = true; OverloadInt = false; break; + Int = Intrinsic::aarch64_neon_vshldu; + s = "vshldu"; break; // Scalar Saturating Shift Left case AArch64::BI__builtin_neon_vqshlb_s8: - ET = NeonTypeFlags::Int8; Int = Intrinsic::aarch64_neon_vqshls; - s = "vqshls"; usgn = false; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqshlh_s16: - ET = NeonTypeFlags::Int16; Int = Intrinsic::aarch64_neon_vqshls; - s = "vqshls"; usgn = false; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqshls_s32: - ET = NeonTypeFlags::Int32; Int = Intrinsic::aarch64_neon_vqshls; - s = "vqshls"; usgn = false; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqshld_s64: - ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vqshls; - s = "vqshls"; usgn = false; OverloadInt = true; break; + Int = Intrinsic::aarch64_neon_vqshls; + s = "vqshls"; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqshlb_u8: - ET = NeonTypeFlags::Int8; Int = Intrinsic::aarch64_neon_vqshlu; - s = "vqshlu"; usgn = true; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqshlh_u16: - ET = NeonTypeFlags::Int16; Int = Intrinsic::aarch64_neon_vqshlu; - s = "vqshlu"; usgn = true; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqshls_u32: - ET = NeonTypeFlags::Int32; Int = Intrinsic::aarch64_neon_vqshlu; - s = "vqshlu"; usgn = true; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqshld_u64: - ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vqshlu; - s = "vqshlu"; usgn = true; OverloadInt = true; break; + Int = Intrinsic::aarch64_neon_vqshlu; + s = "vqshlu"; OverloadInt = true; break; // Scalar Rouding Shift Left case AArch64::BI__builtin_neon_vrshld_s64: - ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vrshlds; - s = "vrshlds"; usgn = false; OverloadInt=false; break; + Int = Intrinsic::aarch64_neon_vrshlds; + s = "vrshlds"; break; case AArch64::BI__builtin_neon_vrshld_u64: - ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vrshldu; - s = "vrshldu"; usgn = true; OverloadInt=false; break; + Int = Intrinsic::aarch64_neon_vrshldu; + s = "vrshldu"; break; // Scalar Saturating Rouding Shift Left case AArch64::BI__builtin_neon_vqrshlb_s8: - ET = NeonTypeFlags::Int8; Int = Intrinsic::aarch64_neon_vqrshls; - s = "vqrshls"; usgn = false; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqrshlh_s16: - ET = NeonTypeFlags::Int16; Int = Intrinsic::aarch64_neon_vqrshls; - s = "vqrshls"; usgn = false; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqrshls_s32: - ET = NeonTypeFlags::Int32; Int = Intrinsic::aarch64_neon_vqrshls; - s = "vqrshls"; usgn = false; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqrshld_s64: - ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vqrshls; - s = "vqrshls"; usgn = false; OverloadInt = true; break; + Int = Intrinsic::aarch64_neon_vqrshls; + s = "vqrshls"; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqrshlb_u8: - ET = NeonTypeFlags::Int8; Int = Intrinsic::aarch64_neon_vqrshlu; - s = "vqrshlu"; usgn = true; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqrshlh_u16: - ET = NeonTypeFlags::Int16; Int = Intrinsic::aarch64_neon_vqrshlu; - s = "vqrshlu"; usgn = true; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqrshls_u32: - ET = NeonTypeFlags::Int32; Int = Intrinsic::aarch64_neon_vqrshlu; - s = "vqrshlu"; usgn = true; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqrshld_u64: - ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vqrshlu; - s = "vqrshlu"; usgn = true; OverloadInt = true; break; + Int = Intrinsic::aarch64_neon_vqrshlu; + s = "vqrshlu"; OverloadInt = true; break; // Scalar Reduce Pairwise Add case AArch64::BI__builtin_neon_vpaddd_s64: Int = Intrinsic::aarch64_neon_vpadd; s = "vpadd"; - OverloadInt = false; break; + break; case AArch64::BI__builtin_neon_vpadds_f32: Int = Intrinsic::aarch64_neon_vpfadd; s = "vpfadd"; - OverloadInt = false; break; + break; case AArch64::BI__builtin_neon_vpaddd_f64: Int = Intrinsic::aarch64_neon_vpfaddq; s = "vpfaddq"; - OverloadInt = false; break; + break; // Scalar Reduce Pairwise Floating Point Max case AArch64::BI__builtin_neon_vpmaxs_f32: Int = Intrinsic::aarch64_neon_vpmax; s = "vpmax"; - OverloadInt = false; break; + break; case AArch64::BI__builtin_neon_vpmaxqd_f64: Int = Intrinsic::aarch64_neon_vpmaxq; s = "vpmaxq"; - OverloadInt = false; break; + break; // Scalar Reduce Pairwise Floating Point Min case AArch64::BI__builtin_neon_vpmins_f32: Int = Intrinsic::aarch64_neon_vpmin; s = "vpmin"; - OverloadInt = false; break; + break; case AArch64::BI__builtin_neon_vpminqd_f64: Int = Intrinsic::aarch64_neon_vpminq; s = "vpminq"; - OverloadInt = false; break; + break; // Scalar Reduce Pairwise Floating Point Maxnm case AArch64::BI__builtin_neon_vpmaxnms_f32: Int = Intrinsic::aarch64_neon_vpfmaxnm; s = "vpfmaxnm"; - OverloadInt = false; break; + break; case AArch64::BI__builtin_neon_vpmaxnmqd_f64: Int = Intrinsic::aarch64_neon_vpfmaxnmq; s = "vpfmaxnmq"; - OverloadInt = false; break; - // Scalar Reduce Pairwise Floating Point Minnm + break; + // Scalar Reduce Pairwise Floating Point Minnm case AArch64::BI__builtin_neon_vpminnms_f32: Int = Intrinsic::aarch64_neon_vpfminnm; s = "vpfminnm"; - OverloadInt = false; break; + break; case AArch64::BI__builtin_neon_vpminnmqd_f64: Int = Intrinsic::aarch64_neon_vpfminnmq; s = "vpfminnmq"; - OverloadInt = false; break; + break; + // The followings are intrinsics with scalar results generated AcrossVec vectors + case AArch64::BI__builtin_neon_vaddlv_s8: + case AArch64::BI__builtin_neon_vaddlv_s16: + case AArch64::BI__builtin_neon_vaddlvq_s8: + case AArch64::BI__builtin_neon_vaddlvq_s16: + case AArch64::BI__builtin_neon_vaddlvq_s32: + Int = Intrinsic::aarch64_neon_saddlv; + AcrossVec = true; ExtendEle = true; s = "saddlv"; break; + case AArch64::BI__builtin_neon_vaddlv_u8: + case AArch64::BI__builtin_neon_vaddlv_u16: + case AArch64::BI__builtin_neon_vaddlvq_u8: + case AArch64::BI__builtin_neon_vaddlvq_u16: + case AArch64::BI__builtin_neon_vaddlvq_u32: + Int = Intrinsic::aarch64_neon_uaddlv; + AcrossVec = true; ExtendEle = true; s = "uaddlv"; break; + case AArch64::BI__builtin_neon_vmaxv_s8: + case AArch64::BI__builtin_neon_vmaxv_s16: + case AArch64::BI__builtin_neon_vmaxvq_s8: + case AArch64::BI__builtin_neon_vmaxvq_s16: + case AArch64::BI__builtin_neon_vmaxvq_s32: + Int = Intrinsic::aarch64_neon_smaxv; + AcrossVec = true; ExtendEle = false; s = "smaxv"; break; + case AArch64::BI__builtin_neon_vmaxv_u8: + case AArch64::BI__builtin_neon_vmaxv_u16: + case AArch64::BI__builtin_neon_vmaxvq_u8: + case AArch64::BI__builtin_neon_vmaxvq_u16: + case AArch64::BI__builtin_neon_vmaxvq_u32: + Int = Intrinsic::aarch64_neon_umaxv; + AcrossVec = true; ExtendEle = false; s = "umaxv"; break; + case AArch64::BI__builtin_neon_vminv_s8: + case AArch64::BI__builtin_neon_vminv_s16: + case AArch64::BI__builtin_neon_vminvq_s8: + case AArch64::BI__builtin_neon_vminvq_s16: + case AArch64::BI__builtin_neon_vminvq_s32: + Int = Intrinsic::aarch64_neon_sminv; + AcrossVec = true; ExtendEle = false; s = "sminv"; break; + case AArch64::BI__builtin_neon_vminv_u8: + case AArch64::BI__builtin_neon_vminv_u16: + case AArch64::BI__builtin_neon_vminvq_u8: + case AArch64::BI__builtin_neon_vminvq_u16: + case AArch64::BI__builtin_neon_vminvq_u32: + Int = Intrinsic::aarch64_neon_uminv; + AcrossVec = true; ExtendEle = false; s = "uminv"; break; + case AArch64::BI__builtin_neon_vaddv_s8: + case AArch64::BI__builtin_neon_vaddv_s16: + case AArch64::BI__builtin_neon_vaddvq_s8: + case AArch64::BI__builtin_neon_vaddvq_s16: + case AArch64::BI__builtin_neon_vaddvq_s32: + case AArch64::BI__builtin_neon_vaddv_u8: + case AArch64::BI__builtin_neon_vaddv_u16: + case AArch64::BI__builtin_neon_vaddvq_u8: + case AArch64::BI__builtin_neon_vaddvq_u16: + case AArch64::BI__builtin_neon_vaddvq_u32: + Int = Intrinsic::aarch64_neon_vaddv; + AcrossVec = true; ExtendEle = false; s = "vaddv"; break; + case AArch64::BI__builtin_neon_vmaxvq_f32: + Int = Intrinsic::aarch64_neon_vmaxv; + AcrossVec = true; ExtendEle = false; s = "vmaxv"; break; + case AArch64::BI__builtin_neon_vminvq_f32: + Int = Intrinsic::aarch64_neon_vminv; + AcrossVec = true; ExtendEle = false; s = "vminv"; break; + case AArch64::BI__builtin_neon_vmaxnmvq_f32: + Int = Intrinsic::aarch64_neon_vmaxnmv; + AcrossVec = true; ExtendEle = false; s = "vmaxnmv"; break; + case AArch64::BI__builtin_neon_vminnmvq_f32: + Int = Intrinsic::aarch64_neon_vminnmv; + AcrossVec = true; ExtendEle = false; s = "vminnmv"; break; } if (!Int) return 0; // AArch64 scalar builtin that returns scalar type - // and should be mapped to AArch64 intrinsic that takes - // one-element vector type arguments and returns + // and should be mapped to AArch64 intrinsic that returns // one-element vector type. - llvm::Type *Ty = 0; Function *F = 0; - if (OverloadInt) { + SmallVector Ops; + if (AcrossVec) { + // Gen arg type + const Expr *Arg = E->getArg(E->getNumArgs()-1); + llvm::Type *Ty = CGF.ConvertType(Arg->getType()); + llvm::VectorType *VTy = cast(Ty); + llvm::Type *ETy = VTy->getElementType(); + llvm::VectorType *RTy = llvm::VectorType::get(ETy, 1); + + if (ExtendEle) { + assert(!ETy->isFloatingPointTy()); + RTy = llvm::VectorType::getExtendedElementVectorType(RTy); + } + + llvm::Type *Tys[2] = {RTy, VTy}; + F = CGF.CGM.getIntrinsic(Int, Tys); + assert(E->getNumArgs() == 1); + } + else if (OverloadInt) { // Determine the type of this overloaded AArch64 intrinsic - NeonTypeFlags Type(ET, usgn, false); - llvm::VectorType *VTy = GetNeonType(&CGF, Type, true); - Ty = VTy; - if (!Ty) - return 0; - F = CGF.CGM.getIntrinsic(Int, Ty); + const Expr *Arg = E->getArg(E->getNumArgs()-1); + llvm::Type *Ty = CGF.ConvertType(Arg->getType()); + llvm::VectorType *VTy = llvm::VectorType::get(Ty, 1); + assert(VTy); + + F = CGF.CGM.getIntrinsic(Int, VTy); } else F = CGF.CGM.getIntrinsic(Int); + for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { + Ops.push_back(CGF.EmitScalarExpr(E->getArg(i))); + } + Value *Result = CGF.EmitNeonCall(F, Ops, s); llvm::Type *ResultType = CGF.ConvertType(E->getType()); // AArch64 intrinsic one-element vector type cast to @@ -1926,7 +1962,7 @@ } Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, - const CallExpr *E) { + const CallExpr *E) { // Process AArch64 scalar builtins if (Value *Result = EmitAArch64ScalarBuiltinExpr(*this, BuiltinID, E)) Index: test/CodeGen/aarch64-neon-across.c =================================================================== --- /dev/null +++ test/CodeGen/aarch64-neon-across.c @@ -0,0 +1,271 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \ +// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s + +// Test new aarch64 intrinsics and types + +#include + +int16_t test_vaddlv_s8(int8x8_t a) { + // CHECK: test_vaddlv_s8 + return vaddlv_s8(a); + // CHECK: saddlv {{h[0-9]+}}, {{v[0-9]+}}.8b +} + +int32_t test_vaddlv_s16(int16x4_t a) { + // CHECK: test_vaddlv_s16 + return vaddlv_s16(a); + // CHECK: saddlv {{s[0-9]+}}, {{v[0-9]+}}.4h +} + +uint16_t test_vaddlv_u8(uint8x8_t a) { + // CHECK: test_vaddlv_u8 + return vaddlv_u8(a); + // CHECK: uaddlv {{h[0-9]+}}, {{v[0-9]+}}.8b +} + +uint32_t test_vaddlv_u16(uint16x4_t a) { + // CHECK: test_vaddlv_u16 + return vaddlv_u16(a); + // CHECK: uaddlv {{s[0-9]+}}, {{v[0-9]+}}.4h +} + +int16_t test_vaddlvq_s8(int8x16_t a) { + // CHECK: test_vaddlvq_s8 + return vaddlvq_s8(a); + // CHECK: saddlv {{h[0-9]+}}, {{v[0-9]+}}.16b +} + +int32_t test_vaddlvq_s16(int16x8_t a) { + // CHECK: test_vaddlvq_s16 + return vaddlvq_s16(a); + // CHECK: saddlv {{s[0-9]+}}, {{v[0-9]+}}.8h +} + +int64_t test_vaddlvq_s32(int32x4_t a) { + // CHECK: test_vaddlvq_s32 + return vaddlvq_s32(a); + // CHECK: saddlv {{d[0-9]+}}, {{v[0-9]+}}.4s +} + +uint16_t test_vaddlvq_u8(uint8x16_t a) { + // CHECK: test_vaddlvq_u8 + return vaddlvq_u8(a); + // CHECK: uaddlv {{h[0-9]+}}, {{v[0-9]+}}.16b +} + +uint32_t test_vaddlvq_u16(uint16x8_t a) { + // CHECK: test_vaddlvq_u16 + return vaddlvq_u16(a); + // CHECK: uaddlv {{s[0-9]+}}, {{v[0-9]+}}.8h +} + +uint64_t test_vaddlvq_u32(uint32x4_t a) { + // CHECK: test_vaddlvq_u32 + return vaddlvq_u32(a); + // CHECK: uaddlv {{d[0-9]+}}, {{v[0-9]+}}.4s +} + +int8_t test_vmaxv_s8(int8x8_t a) { + // CHECK: test_vmaxv_s8 + return vmaxv_s8(a); + // CHECK: smaxv {{b[0-9]+}}, {{v[0-9]+}}.8b +} + +int16_t test_vmaxv_s16(int16x4_t a) { + // CHECK: test_vmaxv_s16 + return vmaxv_s16(a); + // CHECK: smaxv {{h[0-9]+}}, {{v[0-9]+}}.4h +} + +uint8_t test_vmaxv_u8(uint8x8_t a) { + // CHECK: test_vmaxv_u8 + return vmaxv_u8(a); + // CHECK: umaxv {{b[0-9]+}}, {{v[0-9]+}}.8b +} + +uint16_t test_vmaxv_u16(uint16x4_t a) { + // CHECK: test_vmaxv_u16 + return vmaxv_u16(a); + // CHECK: umaxv {{h[0-9]+}}, {{v[0-9]+}}.4h +} + +int8_t test_vmaxvq_s8(int8x16_t a) { + // CHECK: test_vmaxvq_s8 + return vmaxvq_s8(a); + // CHECK: smaxv {{b[0-9]+}}, {{v[0-9]+}}.16b +} + +int16_t test_vmaxvq_s16(int16x8_t a) { + // CHECK: test_vmaxvq_s16 + return vmaxvq_s16(a); + // CHECK: smaxv {{h[0-9]+}}, {{v[0-9]+}}.8h +} + +int32_t test_vmaxvq_s32(int32x4_t a) { + // CHECK: test_vmaxvq_s32 + return vmaxvq_s32(a); + // CHECK: smaxv {{s[0-9]+}}, {{v[0-9]+}}.4s +} + +uint8_t test_vmaxvq_u8(uint8x16_t a) { + // CHECK: test_vmaxvq_u8 + return vmaxvq_u8(a); + // CHECK: umaxv {{b[0-9]+}}, {{v[0-9]+}}.16b +} + +uint16_t test_vmaxvq_u16(uint16x8_t a) { + // CHECK: test_vmaxvq_u16 + return vmaxvq_u16(a); + // CHECK: umaxv {{h[0-9]+}}, {{v[0-9]+}}.8h +} + +uint32_t test_vmaxvq_u32(uint32x4_t a) { + // CHECK: test_vmaxvq_u32 + return vmaxvq_u32(a); + // CHECK: umaxv {{s[0-9]+}}, {{v[0-9]+}}.4s +} + +int8_t test_vminv_s8(int8x8_t a) { + // CHECK: test_vminv_s8 + return vminv_s8(a); + // CHECK: sminv {{b[0-9]+}}, {{v[0-9]+}}.8b +} + +int16_t test_vminv_s16(int16x4_t a) { + // CHECK: test_vminv_s16 + return vminv_s16(a); + // CHECK: sminv {{h[0-9]+}}, {{v[0-9]+}}.4h +} + +uint8_t test_vminv_u8(uint8x8_t a) { + // CHECK: test_vminv_u8 + return vminv_u8(a); + // CHECK: uminv {{b[0-9]+}}, {{v[0-9]+}}.8b +} + +uint16_t test_vminv_u16(uint16x4_t a) { + // CHECK: test_vminv_u16 + return vminv_u16(a); + // CHECK: uminv {{h[0-9]+}}, {{v[0-9]+}}.4h +} + +int8_t test_vminvq_s8(int8x16_t a) { + // CHECK: test_vminvq_s8 + return vminvq_s8(a); + // CHECK: sminv {{b[0-9]+}}, {{v[0-9]+}}.16b +} + +int16_t test_vminvq_s16(int16x8_t a) { + // CHECK: test_vminvq_s16 + return vminvq_s16(a); + // CHECK: sminv {{h[0-9]+}}, {{v[0-9]+}}.8h +} + +int32_t test_vminvq_s32(int32x4_t a) { + // CHECK: test_vminvq_s32 + return vminvq_s32(a); + // CHECK: sminv {{s[0-9]+}}, {{v[0-9]+}}.4s +} + +uint8_t test_vminvq_u8(uint8x16_t a) { + // CHECK: test_vminvq_u8 + return vminvq_u8(a); + // CHECK: uminv {{b[0-9]+}}, {{v[0-9]+}}.16b +} + +uint16_t test_vminvq_u16(uint16x8_t a) { + // CHECK: test_vminvq_u16 + return vminvq_u16(a); + // CHECK: uminv {{h[0-9]+}}, {{v[0-9]+}}.8h +} + +uint32_t test_vminvq_u32(uint32x4_t a) { + // CHECK: test_vminvq_u32 + return vminvq_u32(a); + // CHECK: uminv {{s[0-9]+}}, {{v[0-9]+}}.4s +} + +int8_t test_vaddv_s8(int8x8_t a) { + // CHECK: test_vaddv_s8 + return vaddv_s8(a); + // CHECK: addv {{b[0-9]+}}, {{v[0-9]+}}.8b +} + +int16_t test_vaddv_s16(int16x4_t a) { + // CHECK: test_vaddv_s16 + return vaddv_s16(a); + // CHECK: addv {{h[0-9]+}}, {{v[0-9]+}}.4h +} + +uint8_t test_vaddv_u8(uint8x8_t a) { + // CHECK: test_vaddv_u8 + return vaddv_u8(a); + // CHECK: addv {{b[0-9]+}}, {{v[0-9]+}}.8b +} + +uint16_t test_vaddv_u16(uint16x4_t a) { + // CHECK: test_vaddv_u16 + return vaddv_u16(a); + // CHECK: addv {{h[0-9]+}}, {{v[0-9]+}}.4h +} + +int8_t test_vaddvq_s8(int8x16_t a) { + // CHECK: test_vaddvq_s8 + return vaddvq_s8(a); + // CHECK: addv {{b[0-9]+}}, {{v[0-9]+}}.16b +} + +int16_t test_vaddvq_s16(int16x8_t a) { + // CHECK: test_vaddvq_s16 + return vaddvq_s16(a); + // CHECK: addv {{h[0-9]+}}, {{v[0-9]+}}.8h +} + +int32_t test_vaddvq_s32(int32x4_t a) { + // CHECK: test_vaddvq_s32 + return vaddvq_s32(a); + // CHECK: addv {{s[0-9]+}}, {{v[0-9]+}}.4s +} + +uint8_t test_vaddvq_u8(uint8x16_t a) { + // CHECK: test_vaddvq_u8 + return vaddvq_u8(a); + // CHECK: addv {{b[0-9]+}}, {{v[0-9]+}}.16b +} + +uint16_t test_vaddvq_u16(uint16x8_t a) { + // CHECK: test_vaddvq_u16 + return vaddvq_u16(a); + // CHECK: addv {{h[0-9]+}}, {{v[0-9]+}}.8h +} + +uint32_t test_vaddvq_u32(uint32x4_t a) { + // CHECK: test_vaddvq_u32 + return vaddvq_u32(a); + // CHECK: addv {{s[0-9]+}}, {{v[0-9]+}}.4s +} + +float32_t test_vmaxvq_f32(float32x4_t a) { + // CHECK: test_vmaxvq_f32 + return vmaxvq_f32(a); + // CHECK: fmaxv {{s[0-9]+}}, {{v[0-9]+}}.4s +} + +float32_t test_vminvq_f32(float32x4_t a) { + // CHECK: test_vminvq_f32 + return vminvq_f32(a); + // CHECK: fminv {{s[0-9]+}}, {{v[0-9]+}}.4s +} + +float32_t test_vmaxnmvq_f32(float32x4_t a) { + // CHECK: test_vmaxnmvq_f32 + return vmaxnmvq_f32(a); + // CHECK: fmaxnmv {{s[0-9]+}}, {{v[0-9]+}}.4s +} + +float32_t test_vminnmvq_f32(float32x4_t a) { + // CHECK: test_vminnmvq_f32 + return vminnmvq_f32(a); + // CHECK: fminnmv {{s[0-9]+}}, {{v[0-9]+}}.4s +} Index: utils/TableGen/NeonEmitter.cpp =================================================================== --- utils/TableGen/NeonEmitter.cpp +++ utils/TableGen/NeonEmitter.cpp @@ -463,6 +463,8 @@ scal = true; usgn = true; break; + case 'r': + type = Widen(type); case 's': case 'a': scal = true; @@ -1811,6 +1813,12 @@ return Flags.getFlags(); } +static bool ProtoHasScalar(const std::string proto) +{ + return (proto.find('s') != std::string::npos + || proto.find('r') != std::string::npos); +} + // Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a) static std::string GenBuiltin(const std::string &name, const std::string &proto, StringRef typestr, ClassKind ck) { @@ -1825,7 +1833,7 @@ // Check if the prototype has a scalar operand with the type of the vector // elements. If not, bitcasting the args will take care of arg checking. // The actual signedness etc. will be taken care of with special enums. - if (proto.find('s') == std::string::npos) + if (!ProtoHasScalar(proto)) ck = ClassB; if (proto[0] != 'v') { @@ -1936,7 +1944,7 @@ // If all types are the same size, bitcasting the args will take care // of arg checking. The actual signedness etc. will be taken care of with // special enums. - if (proto.find('s') == std::string::npos) + if (!ProtoHasScalar(proto)) ck = ClassB; s += MangleName(name, typestr, ck); @@ -2335,7 +2343,7 @@ else PrintFatalError(R->getLoc(), "Fixed point convert name should contains \"32\" or \"64\""); - } else if (Proto.find('s') == std::string::npos) { + } else if (!ProtoHasScalar(Proto)) { // Builtins which are overloaded by type will need to have their upper // bound computed at Sema time based on the type constant. ck = ClassB; @@ -2432,7 +2440,7 @@ // Functions which have a scalar argument cannot be overloaded, no need to // check them if we are emitting the type checking code. - if (Proto.find('s') != std::string::npos) + if (ProtoHasScalar(Proto)) continue; SmallVector TypeVec;