Index: include/clang/Basic/LangOptions.def =================================================================== --- include/clang/Basic/LangOptions.def +++ include/clang/Basic/LangOptions.def @@ -128,6 +128,7 @@ LANGOPT(OpenCL , 1, 0, "OpenCL") LANGOPT(OpenCLVersion , 32, 0, "OpenCL version") LANGOPT(NativeHalfType , 1, 0, "Native half type support") +LANGOPT(HalfArgsAndReturns, 1, 0, "half args and returns") LANGOPT(CUDA , 1, 0, "CUDA") LANGOPT(OpenMP , 1, 0, "OpenMP support") Index: include/clang/Driver/CC1Options.td =================================================================== --- include/clang/Driver/CC1Options.td +++ include/clang/Driver/CC1Options.td @@ -507,6 +507,8 @@ HelpText<"Control vtordisp placement on win32 targets">; def fno_rtti_data : Flag<["-"], "fno-rtti-data">, HelpText<"Control emission of RTTI data">; +def fallow_half_arguments_and_returns : Flag<["-"], "fallow-half-arguments-and-returns">, + HelpText<"Allow function arguments and returns of type half">; //===----------------------------------------------------------------------===// // Header Search Options Index: lib/CodeGen/CGExprConstant.cpp =================================================================== --- lib/CodeGen/CGExprConstant.cpp +++ lib/CodeGen/CGExprConstant.cpp @@ -1132,7 +1132,8 @@ case APValue::Float: { const llvm::APFloat &Init = Value.getFloat(); if (&Init.getSemantics() == &llvm::APFloat::IEEEhalf && - !Context.getLangOpts().NativeHalfType) + !Context.getLangOpts().NativeHalfType && + !Context.getLangOpts().HalfArgsAndReturns) return llvm::ConstantInt::get(VMContext, Init.bitcastToAPInt()); else return llvm::ConstantFP::get(VMContext, Init); Index: lib/CodeGen/CGExprScalar.cpp =================================================================== --- lib/CodeGen/CGExprScalar.cpp +++ lib/CodeGen/CGExprScalar.cpp @@ -701,7 +701,8 @@ llvm::Type *SrcTy = Src->getType(); // If casting to/from storage-only half FP, use special intrinsics. - if (SrcType->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) { + if (SrcType->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType && + !CGF.getContext().getLangOpts().HalfArgsAndReturns) { Src = Builder.CreateCall( CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_from_fp16, CGF.CGM.FloatTy), @@ -773,7 +774,8 @@ DstTy); // Cast to half via float - if (DstType->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) + if (DstType->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType && + !CGF.getContext().getLangOpts().HalfArgsAndReturns) DstTy = CGF.FloatTy; if (isa(SrcTy)) { @@ -1691,7 +1693,8 @@ // Add the inc/dec to the real part. llvm::Value *amt; - if (type->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) { + if (type->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType && + !CGF.getContext().getLangOpts().HalfArgsAndReturns) { // Another special case: half FP increment should be done via float value = Builder.CreateCall( CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_from_fp16, @@ -1714,7 +1717,8 @@ } value = Builder.CreateFAdd(value, amt, isInc ? "inc" : "dec"); - if (type->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) + if (type->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType && + !CGF.getContext().getLangOpts().HalfArgsAndReturns) value = Builder.CreateCall( CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_to_fp16, CGF.CGM.FloatTy), Index: lib/CodeGen/CodeGenTypes.cpp =================================================================== --- lib/CodeGen/CodeGenTypes.cpp +++ lib/CodeGen/CodeGenTypes.cpp @@ -353,9 +353,10 @@ case BuiltinType::Half: // Half FP can either be storage-only (lowered to i16) or native. - ResultType = getTypeForFormat(getLLVMContext(), - Context.getFloatTypeSemantics(T), - Context.getLangOpts().NativeHalfType); + ResultType = + getTypeForFormat(getLLVMContext(), Context.getFloatTypeSemantics(T), + Context.getLangOpts().NativeHalfType || + Context.getLangOpts().HalfArgsAndReturns); break; case BuiltinType::Float: case BuiltinType::Double: Index: lib/CodeGen/TargetInfo.cpp =================================================================== --- lib/CodeGen/TargetInfo.cpp +++ lib/CodeGen/TargetInfo.cpp @@ -3542,8 +3542,9 @@ }; } -static bool isHomogeneousAggregate(QualType Ty, const Type *&Base, +static bool isARMHomogeneousAggregate(QualType Ty, const Type *&Base, ASTContext &Context, + bool isAArch64, uint64_t *HAMembers = nullptr); ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty, @@ -3625,7 +3626,7 @@ // Homogeneous Floating-point Aggregates (HFAs) need to be expanded. const Type *Base = nullptr; uint64_t Members = 0; - if (isHomogeneousAggregate(Ty, Base, getContext(), &Members)) { + if (isARMHomogeneousAggregate(Ty, Base, getContext(), true, &Members)) { IsHA = true; if (!IsNamedArg && isDarwinPCS()) { // With the Darwin ABI, variadic arguments are always passed on the stack @@ -3683,7 +3684,7 @@ return ABIArgInfo::getIgnore(); const Type *Base = nullptr; - if (isHomogeneousAggregate(RetTy, Base, getContext())) + if (isARMHomogeneousAggregate(RetTy, Base, getContext(), true)) // Homogeneous Floating-point Aggregates (HFAs) are returned directly. return ABIArgInfo::getDirect(); @@ -3820,7 +3821,7 @@ const Type *Base = nullptr; uint64_t NumMembers; - bool IsHFA = isHomogeneousAggregate(Ty, Base, Ctx, &NumMembers); + bool IsHFA = isARMHomogeneousAggregate(Ty, Base, Ctx, true, &NumMembers); if (IsHFA && NumMembers > 1) { // Homogeneous aggregates passed in registers will have their elements split // and stored 16-bytes apart regardless of size (they're notionally in qN, @@ -3963,7 +3964,7 @@ uint64_t Align = CGF.getContext().getTypeAlign(Ty) / 8; const Type *Base = nullptr; - bool isHA = isHomogeneousAggregate(Ty, Base, getContext()); + bool isHA = isARMHomogeneousAggregate(Ty, Base, getContext(), true); bool isIndirect = false; // Arguments bigger than 16 bytes which aren't homogeneous aggregates should @@ -4246,15 +4247,16 @@ RuntimeCC = abiCC; } -/// isHomogeneousAggregate - Return true if a type is an AAPCS-VFP homogeneous +/// isARMHomogeneousAggregate - Return true if a type is an AAPCS-VFP homogeneous /// aggregate. If HAMembers is non-null, the number of base elements /// contained in the type is returned through it; this is used for the /// recursive calls that check aggregate component types. -static bool isHomogeneousAggregate(QualType Ty, const Type *&Base, - ASTContext &Context, uint64_t *HAMembers) { +static bool isARMHomogeneousAggregate(QualType Ty, const Type *&Base, + ASTContext &Context, bool isAArch64, + uint64_t *HAMembers) { uint64_t Members = 0; if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty)) { - if (!isHomogeneousAggregate(AT->getElementType(), Base, Context, &Members)) + if (!isARMHomogeneousAggregate(AT->getElementType(), Base, Context, isAArch64, &Members)) return false; Members *= AT->getSize().getZExtValue(); } else if (const RecordType *RT = Ty->getAs()) { @@ -4265,7 +4267,7 @@ Members = 0; for (const auto *FD : RD->fields()) { uint64_t FldMembers; - if (!isHomogeneousAggregate(FD->getType(), Base, Context, &FldMembers)) + if (!isARMHomogeneousAggregate(FD->getType(), Base, Context, isAArch64, &FldMembers)) return false; Members = (RD->isUnion() ? @@ -4279,12 +4281,22 @@ } // Homogeneous aggregates for AAPCS-VFP must have base types of float, - // double, or 64-bit or 128-bit vectors. + // double, or 64-bit or 128-bit vectors. "long double" has the same machine + // type as double, so it is also allowed as a base type. + // Homogeneous aggregates for AAPCS64 must have base types of a floating + // point type or a short-vector type. This is the same as the 32-bit ABI, + // but with the difference that any floating-point type is allowed, + // including __fp16. if (const BuiltinType *BT = Ty->getAs()) { - if (BT->getKind() != BuiltinType::Float && - BT->getKind() != BuiltinType::Double && - BT->getKind() != BuiltinType::LongDouble) - return false; + if (isAArch64) { + if (!BT->isFloatingPoint()) + return false; + } else { + if (BT->getKind() != BuiltinType::Float && + BT->getKind() != BuiltinType::Double && + BT->getKind() != BuiltinType::LongDouble) + return false; + } } else if (const VectorType *VT = Ty->getAs()) { unsigned VecSize = Context.getTypeSize(VT); if (VecSize != 64 && VecSize != 128) @@ -4482,7 +4494,7 @@ // into VFP registers. const Type *Base = nullptr; uint64_t Members = 0; - if (isHomogeneousAggregate(Ty, Base, getContext(), &Members)) { + if (isARMHomogeneousAggregate(Ty, Base, getContext(), false, &Members)) { assert(Base && "Base class should be set for homogeneous aggregate"); // Base can be a floating-point or a vector. if (Base->isVectorType()) { @@ -4683,7 +4695,7 @@ // Check for homogeneous aggregates with AAPCS-VFP. if (getABIKind() == AAPCS_VFP && !isVariadic) { const Type *Base = nullptr; - if (isHomogeneousAggregate(RetTy, Base, getContext())) { + if (isARMHomogeneousAggregate(RetTy, Base, getContext(), false)) { assert(Base && "Base class should be set for homogeneous aggregate"); // Homogeneous Aggregates are returned directly. return ABIArgInfo::getDirect(); Index: lib/Driver/Tools.cpp =================================================================== --- lib/Driver/Tools.cpp +++ lib/Driver/Tools.cpp @@ -3698,6 +3698,10 @@ CmdArgs.push_back(Args.MakeArgString("-mstack-alignment=" + alignment)); } + if (getToolChain().getTriple().getArch() == llvm::Triple::aarch64 || + getToolChain().getTriple().getArch() == llvm::Triple::aarch64_be) + CmdArgs.push_back("-fallow-half-arguments-and-returns"); + if (Arg *A = Args.getLastArg(options::OPT_mrestrict_it, options::OPT_mno_restrict_it)) { if (A->getOption().matches(options::OPT_mrestrict_it)) { Index: lib/Frontend/CompilerInvocation.cpp =================================================================== --- lib/Frontend/CompilerInvocation.cpp +++ lib/Frontend/CompilerInvocation.cpp @@ -1499,6 +1499,8 @@ Opts.CurrentModule = Args.getLastArgValue(OPT_fmodule_name); Opts.ImplementationOfModule = Args.getLastArgValue(OPT_fmodule_implementation_of); + Opts.NativeHalfType = Opts.NativeHalfType; + Opts.HalfArgsAndReturns = Args.hasArg(OPT_fallow_half_arguments_and_returns); if (!Opts.CurrentModule.empty() && !Opts.ImplementationOfModule.empty() && Opts.CurrentModule != Opts.ImplementationOfModule) { Index: lib/Sema/SemaType.cpp =================================================================== --- lib/Sema/SemaType.cpp +++ lib/Sema/SemaType.cpp @@ -1746,7 +1746,7 @@ } // Functions cannot return half FP. - if (T->isHalfType()) { + if (T->isHalfType() && !getLangOpts().HalfArgsAndReturns) { Diag(Loc, diag::err_parameters_retval_cannot_have_fp16_type) << 1 << FixItHint::CreateInsertion(Loc, "*"); return true; @@ -1776,7 +1776,7 @@ if (ParamType->isVoidType()) { Diag(Loc, diag::err_param_with_void_type); Invalid = true; - } else if (ParamType->isHalfType()) { + } else if (ParamType->isHalfType() && !getLangOpts().HalfArgsAndReturns) { // Disallow half FP arguments. Diag(Loc, diag::err_parameters_retval_cannot_have_fp16_type) << 0 << FixItHint::CreateInsertion(Loc, "*"); @@ -2751,7 +2751,7 @@ S.Diag(D.getIdentifierLoc(), diag::err_opencl_half_return) << T; D.setInvalidType(true); } - } else { + } else if (!S.getLangOpts().HalfArgsAndReturns) { S.Diag(D.getIdentifierLoc(), diag::err_parameters_retval_cannot_have_fp16_type) << 1; D.setInvalidType(true); @@ -2941,7 +2941,7 @@ D.setInvalidType(); Param->setInvalidDecl(); } - } else { + } else if (!S.getLangOpts().HalfArgsAndReturns) { S.Diag(Param->getLocation(), diag::err_parameters_retval_cannot_have_fp16_type) << 0; D.setInvalidType(); Index: test/CodeGen/arm64-aapcs-arguments.c =================================================================== --- test/CodeGen/arm64-aapcs-arguments.c +++ test/CodeGen/arm64-aapcs-arguments.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple arm64-linux-gnu -target-feature +neon -target-abi aapcs -ffreestanding -emit-llvm -w -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -target-abi aapcs -ffreestanding -fallow-half-arguments-and-returns -emit-llvm -w -o - %s | FileCheck %s // AAPCS clause C.8 says: If the argument has an alignment of 16 then the NGRN // is rounded up to the next even number. @@ -40,3 +40,12 @@ // CHECK: define i8 @test5(i8 %a, i16 %b) unsigned char test5(unsigned char a, signed short b) { } + +// __fp16 can be used as a function argument or return type (ACLE 2.0) +// CHECK: define half @test_half(half %{{.*}}) +__fp16 test_half(__fp16 A) { } + +// __fp16 is a base type for homogeneous floating-point aggregates for AArch64 (but not 32-bit ARM). +// CHECK: define %struct.HFA_half @test_half_hfa(half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}) +struct HFA_half { __fp16 a[4]; }; +struct HFA_half test_half_hfa(struct HFA_half A) { }