Index: lib/CodeGen/ABIInfo.h =================================================================== --- lib/CodeGen/ABIInfo.h +++ lib/CodeGen/ABIInfo.h @@ -73,6 +73,15 @@ // abstract this out. virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty, CodeGen::CodeGenFunction &CGF) const = 0; + + virtual bool isHomogeneousAggregateBaseType(QualType Ty) const; + + virtual bool isHomogeneousAggregateSmallEnough(const Type *Base, + uint64_t Members) const; + + bool isHomogeneousAggregate(QualType Ty, const Type *&Base, + uint64_t &Members) const; + }; } // end namespace clang Index: lib/CodeGen/CGCall.cpp =================================================================== --- lib/CodeGen/CGCall.cpp +++ lib/CodeGen/CGCall.cpp @@ -575,6 +575,27 @@ }; } // namespace +// Collect the fields of a record. Check for non-virtual bases and recursively +// add their fields first. Doesn't work on dynamic classes. +void addFields(const RecordDecl *RD, + SmallVectorImpl &Fields) { + if (const auto *CXXRD = dyn_cast(RD)) { + // This code assumes that layout order is the same as base specifier order, + // which is true for non-dynamic classes in all supported ABIs. + assert(!CXXRD->isDynamicClass() && "can't expand dynamic class"); + for (const CXXBaseSpecifier &BS : CXXRD->bases()) { + const CXXRecordDecl *Base = BS.getType()->getAsCXXRecordDecl(); + addFields(Base, Fields); + } + } + + for (const auto *FD : RD->fields()) { + assert(!FD->isBitField() && + "Cannot expand structure with bit-field members."); + Fields.push_back(FD); + } +} + static std::unique_ptr getTypeExpansion(QualType Ty, const ASTContext &Context) { if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty)) { @@ -604,11 +625,7 @@ if (LargestFD) Fields.push_back(LargestFD); } else { - for (const auto *FD : RD->fields()) { - assert(!FD->isBitField() && - "Cannot expand structure with bit-field members."); - Fields.push_back(FD); - } + addFields(RD, Fields); } return llvm::make_unique(std::move(Fields)); } Index: lib/CodeGen/TargetInfo.cpp =================================================================== --- lib/CodeGen/TargetInfo.cpp +++ lib/CodeGen/TargetInfo.cpp @@ -85,6 +85,15 @@ return CGT.getTarget(); } +bool ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { + return false; +} + +bool ABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base, + uint64_t Members) const { + return false; +} + void ABIArgInfo::dump() const { raw_ostream &OS = llvm::errs(); OS << "(ABIArgInfo Kind="; @@ -3044,12 +3053,14 @@ bool isPromotableTypeForABI(QualType Ty) const; bool isAlignedParamType(QualType Ty) const; - bool isHomogeneousAggregate(QualType Ty, const Type *&Base, - uint64_t &Members) const; ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyArgumentType(QualType Ty) const; + bool isHomogeneousAggregateBaseType(QualType Ty) const override; + bool isHomogeneousAggregateSmallEnough(const Type *Ty, + uint64_t Members) const override; + // TODO: We can add more logic to computeInfo to improve performance. // Example: For aggregate arguments that fit in a register, we could // use getDirectInReg (as is done below for structs containing a single @@ -3192,9 +3203,8 @@ /// isHomogeneousAggregate - Return true if a type is an ELFv2 homogeneous /// aggregate. Base is set to the base element type, and Members is set /// to the number of base elements. -bool -PPC64_SVR4_ABIInfo::isHomogeneousAggregate(QualType Ty, const Type *&Base, - uint64_t &Members) const { +bool ABIInfo::isHomogeneousAggregate(QualType Ty, const Type *&Base, + uint64_t &Members) const { if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) { uint64_t NElements = AT->getSize().getZExtValue(); if (NElements == 0) @@ -3263,19 +3273,9 @@ Ty = CT->getElementType(); } - // Homogeneous aggregates for ELFv2 must have base types of float, - // double, long double, or 128-bit vectors. - if (const BuiltinType *BT = Ty->getAs()) { - if (BT->getKind() != BuiltinType::Float && - BT->getKind() != BuiltinType::Double && - BT->getKind() != BuiltinType::LongDouble) - return false; - } else if (const VectorType *VT = Ty->getAs()) { - if (getContext().getTypeSize(VT) != 128) - return false; - } else { + // Most ABIs only support float, double, and some vector type widths. + if (!isHomogeneousAggregateBaseType(Ty)) return false; - } // The base type must be the same for all members. Types that // agree in both total size and mode (float vs. vector) are @@ -3288,14 +3288,34 @@ getContext().getTypeSize(Base) != getContext().getTypeSize(TyPtr)) return false; } + return Members > 0 && isHomogeneousAggregateSmallEnough(Base, Members); +} + +bool PPC64_SVR4_ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { + // Homogeneous aggregates for ELFv2 must have base types of float, + // double, long double, or 128-bit vectors. + if (const BuiltinType *BT = Ty->getAs()) { + if (BT->getKind() == BuiltinType::Float || + BT->getKind() == BuiltinType::Double || + BT->getKind() == BuiltinType::LongDouble) + return true; + } + if (const VectorType *VT = Ty->getAs()) { + if (getContext().getTypeSize(VT) == 128) + return true; + } + return false; +} +bool PPC64_SVR4_ABIInfo::isHomogeneousAggregateSmallEnough( + const Type *Base, uint64_t Members) const { // Vector types require one register, floating point types require one // or two registers depending on their size. - uint32_t NumRegs = Base->isVectorType() ? 1 : - (getContext().getTypeSize(Base) + 63) / 64; + uint32_t NumRegs = + Base->isVectorType() ? 1 : (getContext().getTypeSize(Base) + 63) / 64; // Homogeneous Aggregates may occupy at most 8 registers. - return (Members > 0 && Members * NumRegs <= 8); + return Members * NumRegs <= 8; } ABIArgInfo @@ -3586,6 +3606,10 @@ ABIArgInfo classifyArgumentType(QualType RetTy, unsigned &AllocatedVFP, bool &IsHA, unsigned &AllocatedGPR, bool &IsSmallAggr, bool IsNamedArg) const; + bool isHomogeneousAggregateBaseType(QualType Ty) const override; + bool isHomogeneousAggregateSmallEnough(const Type *Ty, + uint64_t Members) const override; + bool isIllegalVectorType(QualType Ty) const; virtual void computeInfo(CGFunctionInfo &FI) const { @@ -3681,11 +3705,6 @@ }; } -static bool isARMHomogeneousAggregate(QualType Ty, const Type *&Base, - ASTContext &Context, - bool isAArch64, - uint64_t *HAMembers = nullptr); - ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty, unsigned &AllocatedVFP, bool &IsHA, @@ -3765,7 +3784,7 @@ // Homogeneous Floating-point Aggregates (HFAs) need to be expanded. const Type *Base = nullptr; uint64_t Members = 0; - if (isARMHomogeneousAggregate(Ty, Base, getContext(), true, &Members)) { + if (isHomogeneousAggregate(Ty, Base, Members)) { IsHA = true; if (!IsNamedArg && isDarwinPCS()) { // With the Darwin ABI, variadic arguments are always passed on the stack @@ -3823,7 +3842,8 @@ return ABIArgInfo::getIgnore(); const Type *Base = nullptr; - if (isARMHomogeneousAggregate(RetTy, Base, getContext(), true)) + uint64_t Members = 0; + if (isHomogeneousAggregate(RetTy, Base, Members)) // Homogeneous Floating-point Aggregates (HFAs) are returned directly. return ABIArgInfo::getDirect(); @@ -3851,9 +3871,35 @@ return false; } -static llvm::Value *EmitAArch64VAArg(llvm::Value *VAListAddr, QualType Ty, - int AllocatedGPR, int AllocatedVFP, - bool IsIndirect, CodeGenFunction &CGF) { +bool AArch64ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { + // Homogeneous aggregates for AAPCS64 must have base types of a floating + // point type or a short-vector type. This is the same as the 32-bit ABI, + // but with the difference that any floating-point type is allowed, + // including __fp16. + if (const BuiltinType *BT = Ty->getAs()) { + if (BT->isFloatingPoint()) + return true; + } else if (const VectorType *VT = Ty->getAs()) { + unsigned VecSize = getContext().getTypeSize(VT); + if (VecSize == 64 || VecSize == 128) + return true; + } + return false; +} + +bool AArch64ABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base, + uint64_t Members) const { + return Members <= 4; +} + +llvm::Value *AArch64ABIInfo::EmitAAPCSVAArg(llvm::Value *VAListAddr, QualType Ty, + CodeGenFunction &CGF) const { + unsigned AllocatedGPR = 0, AllocatedVFP = 0; + bool IsHA = false, IsSmallAggr = false; + ABIArgInfo AI = classifyArgumentType(Ty, AllocatedVFP, IsHA, AllocatedGPR, + IsSmallAggr, false /*IsNamedArg*/); + bool IsIndirect = AI.isIndirect(); + // The AArch64 va_list type and handling is specified in the Procedure Call // Standard, section B.4: // @@ -3959,8 +4005,8 @@ } const Type *Base = nullptr; - uint64_t NumMembers; - bool IsHFA = isARMHomogeneousAggregate(Ty, Base, Ctx, true, &NumMembers); + uint64_t NumMembers = 0; + bool IsHFA = isHomogeneousAggregate(Ty, Base, NumMembers); if (IsHFA && NumMembers > 1) { // Homogeneous aggregates passed in registers will have their elements split // and stored 16-bytes apart regardless of size (they're notionally in qN, @@ -4079,18 +4125,6 @@ return ResAddr; } -llvm::Value *AArch64ABIInfo::EmitAAPCSVAArg(llvm::Value *VAListAddr, QualType Ty, - CodeGenFunction &CGF) const { - - unsigned AllocatedGPR = 0, AllocatedVFP = 0; - bool IsHA = false, IsSmallAggr = false; - ABIArgInfo AI = classifyArgumentType(Ty, AllocatedVFP, IsHA, AllocatedGPR, - IsSmallAggr, false /*IsNamedArg*/); - - return EmitAArch64VAArg(VAListAddr, Ty, AllocatedGPR, AllocatedVFP, - AI.isIndirect(), CGF); -} - llvm::Value *AArch64ABIInfo::EmitDarwinVAArg(llvm::Value *VAListAddr, QualType Ty, CodeGenFunction &CGF) const { // We do not support va_arg for aggregates or illegal vector types. @@ -4103,7 +4137,8 @@ uint64_t Align = CGF.getContext().getTypeAlign(Ty) / 8; const Type *Base = nullptr; - bool isHA = isARMHomogeneousAggregate(Ty, Base, getContext(), true); + uint64_t Members = 0; + bool isHA = isHomogeneousAggregate(Ty, Base, Members); bool isIndirect = false; // Arguments bigger than 16 bytes which aren't homogeneous aggregates should @@ -4210,6 +4245,10 @@ bool &IsCPRC) const; bool isIllegalVectorType(QualType Ty) const; + bool isHomogeneousAggregateBaseType(QualType Ty) const override; + bool isHomogeneousAggregateSmallEnough(const Type *Ty, + uint64_t Members) const override; + void computeInfo(CGFunctionInfo &FI) const override; llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty, @@ -4389,101 +4428,6 @@ RuntimeCC = abiCC; } -/// isARMHomogeneousAggregate - Return true if a type is an AAPCS-VFP homogeneous -/// aggregate. If HAMembers is non-null, the number of base elements -/// contained in the type is returned through it; this is used for the -/// recursive calls that check aggregate component types. -static bool isARMHomogeneousAggregate(QualType Ty, const Type *&Base, - ASTContext &Context, bool isAArch64, - uint64_t *HAMembers) { - uint64_t Members = 0; - if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty)) { - if (!isARMHomogeneousAggregate(AT->getElementType(), Base, Context, isAArch64, &Members)) - return false; - Members *= AT->getSize().getZExtValue(); - } else if (const RecordType *RT = Ty->getAs()) { - const RecordDecl *RD = RT->getDecl(); - if (RD->hasFlexibleArrayMember()) - return false; - - Members = 0; - for (const auto *FD : RD->fields()) { - uint64_t FldMembers; - if (!isARMHomogeneousAggregate(FD->getType(), Base, Context, isAArch64, &FldMembers)) - return false; - - Members = (RD->isUnion() ? - std::max(Members, FldMembers) : Members + FldMembers); - } - } else { - Members = 1; - if (const ComplexType *CT = Ty->getAs()) { - Members = 2; - Ty = CT->getElementType(); - } - - // Homogeneous aggregates for AAPCS-VFP must have base types of float, - // double, or 64-bit or 128-bit vectors. "long double" has the same machine - // type as double, so it is also allowed as a base type. - // Homogeneous aggregates for AAPCS64 must have base types of a floating - // point type or a short-vector type. This is the same as the 32-bit ABI, - // but with the difference that any floating-point type is allowed, - // including __fp16. - if (const BuiltinType *BT = Ty->getAs()) { - if (isAArch64) { - if (!BT->isFloatingPoint()) - return false; - } else { - if (BT->getKind() != BuiltinType::Float && - BT->getKind() != BuiltinType::Double && - BT->getKind() != BuiltinType::LongDouble) - return false; - } - } else if (const VectorType *VT = Ty->getAs()) { - unsigned VecSize = Context.getTypeSize(VT); - if (VecSize != 64 && VecSize != 128) - return false; - } else { - return false; - } - - // The base type must be the same for all members. Vector types of the - // same total size are treated as being equivalent here. - const Type *TyPtr = Ty.getTypePtr(); - if (!Base) - Base = TyPtr; - - if (Base != TyPtr) { - // Homogeneous aggregates are defined as containing members with the - // same machine type. There are two cases in which two members have - // different TypePtrs but the same machine type: - - // 1) Vectors of the same length, regardless of the type and number - // of their members. - const bool SameLengthVectors = Base->isVectorType() && TyPtr->isVectorType() - && (Context.getTypeSize(Base) == Context.getTypeSize(TyPtr)); - - // 2) In the 32-bit AAPCS, `double' and `long double' have the same - // machine type. This is not the case for the 64-bit AAPCS. - const bool SameSizeDoubles = - ( ( Base->isSpecificBuiltinType(BuiltinType::Double) - && TyPtr->isSpecificBuiltinType(BuiltinType::LongDouble)) - || ( Base->isSpecificBuiltinType(BuiltinType::LongDouble) - && TyPtr->isSpecificBuiltinType(BuiltinType::Double))) - && (Context.getTypeSize(Base) == Context.getTypeSize(TyPtr)); - - if (!SameLengthVectors && !SameSizeDoubles) - return false; - } - } - - // Homogeneous Aggregates can have at most 4 members of the base type. - if (HAMembers) - *HAMembers = Members; - - return (Members > 0 && Members <= 4); -} - /// markAllocatedVFPs - update VFPRegs according to the alignment and /// number of VFP registers (unit is S register) requested. void ARMABIInfo::markAllocatedVFPs(unsigned Alignment, @@ -4640,7 +4584,7 @@ // into VFP registers. const Type *Base = nullptr; uint64_t Members = 0; - if (isARMHomogeneousAggregate(Ty, Base, getContext(), false, &Members)) { + if (isHomogeneousAggregate(Ty, Base, Members)) { assert(Base && "Base class should be set for homogeneous aggregate"); // Base can be a floating-point or a vector. if (Base->isVectorType()) { @@ -4845,7 +4789,8 @@ // Check for homogeneous aggregates with AAPCS-VFP. if (getABIKind() == AAPCS_VFP && !isVariadic) { const Type *Base = nullptr; - if (isARMHomogeneousAggregate(RetTy, Base, getContext(), false)) { + uint64_t Members; + if (isHomogeneousAggregate(RetTy, Base, Members)) { assert(Base && "Base class should be set for homogeneous aggregate"); // Homogeneous Aggregates are returned directly. return ABIArgInfo::getDirect(nullptr, 0, nullptr, !isAAPCS_VFP); @@ -4891,6 +4836,27 @@ return false; } +bool ARMABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { + // Homogeneous aggregates for AAPCS-VFP must have base types of float, + // double, or 64-bit or 128-bit vectors. + if (const BuiltinType *BT = Ty->getAs()) { + if (BT->getKind() == BuiltinType::Float || + BT->getKind() == BuiltinType::Double || + BT->getKind() == BuiltinType::LongDouble) + return true; + } else if (const VectorType *VT = Ty->getAs()) { + unsigned VecSize = getContext().getTypeSize(VT); + if (VecSize == 64 || VecSize == 128) + return true; + } + return false; +} + +bool ARMABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base, + uint64_t Members) const { + return Members <= 4; +} + llvm::Value *ARMABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty, CodeGenFunction &CGF) const { llvm::Type *BP = CGF.Int8PtrTy; Index: test/CodeGen/ppc64le-aggregates-cpp.cpp =================================================================== --- test/CodeGen/ppc64le-aggregates-cpp.cpp +++ /dev/null @@ -1,39 +0,0 @@ -// REQUIRES: powerpc-registered-target -// RUN: %clang_cc1 -triple powerpc64le-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s - -// Test that C++ classes are correctly classified as homogeneous aggregates. - -struct Base1 { - int x; -}; -struct Base2 { - double x; -}; -struct Base3 { - double x; -}; -struct D1 : Base1 { // non-homogeneous aggregate - double y, z; -}; -struct D2 : Base2 { // homogeneous aggregate - double y, z; -}; -struct D3 : Base1, Base2 { // non-homogeneous aggregate - double y, z; -}; -struct D4 : Base2, Base3 { // homogeneous aggregate - double y, z; -}; - -// CHECK: define void @_Z7func_D12D1(%struct.D1* noalias sret %agg.result, [3 x i64] %x.coerce) -D1 func_D1(D1 x) { return x; } - -// CHECK: define [3 x double] @_Z7func_D22D2([3 x double] %x.coerce) -D2 func_D2(D2 x) { return x; } - -// CHECK: define void @_Z7func_D32D3(%struct.D3* noalias sret %agg.result, [4 x i64] %x.coerce) -D3 func_D3(D3 x) { return x; } - -// CHECK: define [4 x double] @_Z7func_D42D4([4 x double] %x.coerce) -D4 func_D4(D4 x) { return x; } - Index: test/CodeGen/ppc64le-aggregates.c =================================================================== --- test/CodeGen/ppc64le-aggregates.c +++ test/CodeGen/ppc64le-aggregates.c @@ -1,4 +1,3 @@ -// REQUIRES: powerpc-registered-target // RUN: %clang_cc1 -faltivec -triple powerpc64le-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s // Test homogeneous float aggregate passing and returning. @@ -16,6 +15,8 @@ struct fab { float a; float b; }; struct fabc { float a; float b; float c; }; +struct f2a2b { float a[2]; float b[2]; }; + // CHECK: define [1 x float] @func_f1(float inreg %x.coerce) struct f1 func_f1(struct f1 x) { return x; } @@ -49,6 +50,9 @@ // CHECK: define [3 x float] @func_fabc([3 x float] %x.coerce) struct fabc func_fabc(struct fabc x) { return x; } +// CHECK: define [4 x float] @func_f2a2b([4 x float] %x.coerce) +struct f2a2b func_f2a2b(struct f2a2b x) { return x; } + // CHECK-LABEL: @call_f1 // CHECK: %[[TMP:[^ ]+]] = load float* getelementptr inbounds (%struct.f1* @global_f1, i32 0, i32 0, i32 0), align 1 // CHECK: call [1 x float] @func_f1(float inreg %[[TMP]]) Index: test/CodeGenCXX/homogeneous-aggregates.cpp =================================================================== --- /dev/null +++ test/CodeGenCXX/homogeneous-aggregates.cpp @@ -0,0 +1,47 @@ +// RUNxX: %clang_cc1 -triple powerpc64le-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s --check-prefix=PPC +// RUN: %clang_cc1 -mfloat-abi hard -triple armv7-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s --check-prefix=ARM32 +// RUN: %clang_cc1 -mfloat-abi hard -triple aarch64-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s --check-prefix=ARM64 + +// Test that C++ classes are correctly classified as homogeneous aggregates. + +struct Base1 { + int x; +}; +struct Base2 { + double x; +}; +struct Base3 { + double x; +}; +struct D1 : Base1 { // non-homogeneous aggregate + double y, z; +}; +struct D2 : Base2 { // homogeneous aggregate + double y, z; +}; +struct D3 : Base1, Base2 { // non-homogeneous aggregate + double y, z; +}; +struct D4 : Base2, Base3 { // homogeneous aggregate + double y, z; +}; + +// PPC: define void @_Z7func_D12D1(%struct.D1* noalias sret %agg.result, [3 x i64] %x.coerce) +// ARM32: define arm_aapcs_vfpcc void @_Z7func_D12D1(%struct.D1* noalias sret %agg.result, { [3 x i64] } %x.coerce) +// ARM64: define void @_Z7func_D12D1(%struct.D1* noalias sret %agg.result, %struct.D1* %x) +D1 func_D1(D1 x) { return x; } + +// PPC: define [3 x double] @_Z7func_D22D2([3 x double] %x.coerce) +// ARM32: define arm_aapcs_vfpcc %struct.D2 @_Z7func_D22D2(%struct.D2 %x.coerce) +// ARM64: define %struct.D2 @_Z7func_D22D2(double %x.0, double %x.1, double %x.2) +D2 func_D2(D2 x) { return x; } + +// PPC: define void @_Z7func_D32D3(%struct.D3* noalias sret %agg.result, [4 x i64] %x.coerce) +// ARM32: define arm_aapcs_vfpcc void @_Z7func_D32D3(%struct.D3* noalias sret %agg.result, { [4 x i64] } %x.coerce) +// ARM64: define void @_Z7func_D32D3(%struct.D3* noalias sret %agg.result, %struct.D3* %x) +D3 func_D3(D3 x) { return x; } + +// PPC: define [4 x double] @_Z7func_D42D4([4 x double] %x.coerce) +// ARM32: define arm_aapcs_vfpcc %struct.D4 @_Z7func_D42D4(%struct.D4 %x.coerce) +// ARM64: define %struct.D4 @_Z7func_D42D4(double %x.0, double %x.1, double %x.2, double %x.3) +D4 func_D4(D4 x) { return x; }