Index: lib/CodeGen/CGCall.cpp =================================================================== --- lib/CodeGen/CGCall.cpp +++ lib/CodeGen/CGCall.cpp @@ -158,6 +158,23 @@ return CC_C; } +static bool isAAPCSVFP(const CGFunctionInfo &FI, const TargetInfo &Target) { + switch (FI.getEffectiveCallingConvention()) { + case llvm::CallingConv::C: + switch (Target.getTriple().getEnvironment()) { + case llvm::Triple::EABIHF: + case llvm::Triple::GNUEABIHF: + return true; + default: + return false; + } + case llvm::CallingConv::ARM_AAPCS_VFP: + return true; + default: + return false; + } +} + /// Arrange the argument and result information for a call to an /// unknown C++ non-static member function of the given abstract type. /// (Zero value of RD means we don't have any meaningful "this" argument type, @@ -995,8 +1012,11 @@ // If the coerce-to type is a first class aggregate, flatten it. Either // way is semantically identical, but fast-isel and the optimizer // generally likes scalar values better than FCAs. + // We cannot do this for functions using the AAPCS calling convention, + // as structures are treated differently by that calling convention. llvm::Type *argType = argAI.getCoerceToType(); - if (llvm::StructType *st = dyn_cast(argType)) { + llvm::StructType *st = dyn_cast(argType); + if (st && !isAAPCSVFP(FI, getTarget())) { for (unsigned i = 0, e = st->getNumElements(); i != e; ++i) argTypes.push_back(st->getElementType(i)); } else { @@ -1199,14 +1219,15 @@ else if (ParamType->isUnsignedIntegerOrEnumerationType()) Attrs.addAttribute(llvm::Attribute::ZExt); // FALL THROUGH - case ABIArgInfo::Direct: + case ABIArgInfo::Direct: { if (AI.getInReg()) Attrs.addAttribute(llvm::Attribute::InReg); // FIXME: handle sseregparm someday... - if (llvm::StructType *STy = - dyn_cast(AI.getCoerceToType())) { + llvm::StructType *STy = + dyn_cast(AI.getCoerceToType()); + if (!isAAPCSVFP(FI, getTarget()) && STy) { unsigned Extra = STy->getNumElements()-1; // 1 will be added below. if (Attrs.hasAttributes()) for (unsigned I = 0; I < Extra; ++I) @@ -1215,7 +1236,7 @@ Index += Extra; } break; - + } case ABIArgInfo::Indirect: if (AI.getInReg()) Attrs.addAttribute(llvm::Attribute::InReg); @@ -1474,8 +1495,10 @@ // If the coerce-to type is a first class aggregate, we flatten it and // pass the elements. Either way is semantically identical, but fast-isel // and the optimizer generally likes scalar values better than FCAs. + // We cannot do this for functions using the AAPCS calling convention, + // as structures are treated differently by that calling convention. llvm::StructType *STy = dyn_cast(ArgI.getCoerceToType()); - if (STy && STy->getNumElements() > 1) { + if (!isAAPCSVFP(FI, getTarget()) && STy && STy->getNumElements() > 1) { uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(STy); llvm::Type *DstTy = cast(Ptr->getType())->getElementType(); @@ -2735,8 +2758,11 @@ // If the coerce-to type is a first class aggregate, we flatten it and // pass the elements. Either way is semantically identical, but fast-isel // and the optimizer generally likes scalar values better than FCAs. - if (llvm::StructType *STy = - dyn_cast(ArgInfo.getCoerceToType())) { + // We cannot do this for functions using the AAPCS calling convention, + // as structures are treated differently by that calling convention. + llvm::StructType *STy = + dyn_cast(ArgInfo.getCoerceToType()); + if (STy && !isAAPCSVFP(CallInfo, getTarget())) { llvm::Type *SrcTy = cast(SrcPtr->getType())->getElementType(); uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(SrcTy); Index: lib/CodeGen/TargetInfo.cpp =================================================================== --- lib/CodeGen/TargetInfo.cpp +++ lib/CodeGen/TargetInfo.cpp @@ -3796,7 +3796,7 @@ private: ABIArgInfo classifyReturnType(QualType RetTy, bool isVariadic) const; - ABIArgInfo classifyArgumentType(QualType RetTy, bool &IsHA, bool isVariadic, + ABIArgInfo classifyArgumentType(QualType RetTy, bool isVariadic, bool &IsCPRC) const; bool isIllegalVectorType(QualType Ty) const; @@ -3901,22 +3901,10 @@ for (auto &I : FI.arguments()) { unsigned PreAllocationVFPs = AllocatedVFPs; unsigned PreAllocationGPRs = AllocatedGPRs; - bool IsHA = false; bool IsCPRC = false; // 6.1.2.3 There is one VFP co-processor register class using registers // s0-s15 (d0-d7) for passing arguments. - I.info = classifyArgumentType(I.type, IsHA, FI.isVariadic(), IsCPRC); - assert((IsCPRC || !IsHA) && "Homogeneous aggregates must be CPRCs"); - // If we do not have enough VFP registers for the HA, any VFP registers - // that are unallocated are marked as unavailable. To achieve this, we add - // padding of (NumVFPs - PreAllocationVFP) floats. - // Note that IsHA will only be set when using the AAPCS-VFP calling convention, - // and the callee is not variadic. - if (IsHA && AllocatedVFPs > NumVFPs && PreAllocationVFPs < NumVFPs) { - llvm::Type *PaddingTy = llvm::ArrayType::get( - llvm::Type::getFloatTy(getVMContext()), NumVFPs - PreAllocationVFPs); - I.info = ABIArgInfo::getExpandWithPadding(false, PaddingTy); - } + I.info = classifyArgumentType(I.type, FI.isVariadic(), IsCPRC); // If we have allocated some arguments onto the stack (due to running // out of VFP registers), we cannot split an argument between GPRs and @@ -3930,6 +3918,7 @@ llvm::Type::getInt32Ty(getVMContext()), NumGPRs - PreAllocationGPRs); I.info = ABIArgInfo::getDirect(nullptr /* type */, 0 /* offset */, PaddingTy); + } } @@ -4113,8 +4102,7 @@ VFPRegs[i] = 0; } -ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool &IsHA, - bool isVariadic, +ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic, bool &IsCPRC) const { // We update number of allocated VFPs according to // 6.1.2.1 The following argument types are VFP CPRCs: @@ -4226,9 +4214,8 @@ Base->isSpecificBuiltinType(BuiltinType::LongDouble)); markAllocatedVFPs(2, Members * 2); } - IsHA = true; IsCPRC = true; - return ABIArgInfo::getExpand(); + return ABIArgInfo::getDirect(); } } @@ -4242,7 +4229,7 @@ getABIKind() == ARMABIInfo::AAPCS) ABIAlign = std::min(std::max(TyAlign, (uint64_t)4), (uint64_t)8); if (getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(64)) { - // Update Allocated GPRs + // Update Allocated GPRs markAllocatedGPRs(1, 1); return ABIArgInfo::getIndirect(TyAlign, /*ByVal=*/true, /*Realign=*/TyAlign > ABIAlign); Index: test/CodeGen/arm-aapcs-vfp.c =================================================================== --- test/CodeGen/arm-aapcs-vfp.c +++ test/CodeGen/arm-aapcs-vfp.c @@ -27,14 +27,14 @@ float f3; float f4; }; -// CHECK: define arm_aapcs_vfpcc %struct.homogeneous_struct @test_struct(float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}) +// CHECK: define arm_aapcs_vfpcc %struct.homogeneous_struct @test_struct(%struct.homogeneous_struct %{{.*}}) // CHECK64: define %struct.homogeneous_struct @test_struct(float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}) extern struct homogeneous_struct struct_callee(struct homogeneous_struct); struct homogeneous_struct test_struct(struct homogeneous_struct arg) { return struct_callee(arg); } -// CHECK: define arm_aapcs_vfpcc void @test_struct_variadic(%struct.homogeneous_struct* {{.*}}, [4 x i32] %{{.*}}, ...) +// CHECK: define arm_aapcs_vfpcc void @test_struct_variadic(%struct.homogeneous_struct* {{.*}}, ...) struct homogeneous_struct test_struct_variadic(struct homogeneous_struct arg, ...) { return struct_callee(arg); } @@ -42,7 +42,7 @@ struct nested_array { double d[4]; }; -// CHECK: define arm_aapcs_vfpcc void @test_array(double %{{.*}}, double %{{.*}}, double %{{.*}}, double %{{.*}}) +// CHECK: define arm_aapcs_vfpcc void @test_array(%struct.nested_array %{{.*}}) // CHECK64: define void @test_array(double %{{.*}}, double %{{.*}}, double %{{.*}}, double %{{.*}}) extern void array_callee(struct nested_array); void test_array(struct nested_array arg) { @@ -50,7 +50,7 @@ } extern void complex_callee(__complex__ double); -// CHECK: define arm_aapcs_vfpcc void @test_complex(double %{{.*}}, double %{{.*}}) +// CHECK: define arm_aapcs_vfpcc void @test_complex({ double, double } %{{.*}}) // CHECK64: define void @test_complex(double %{{.*}}, double %{{.*}}) void test_complex(__complex__ double cd) { complex_callee(cd); @@ -58,7 +58,7 @@ // Long double is the same as double on AAPCS, it should be homogeneous. extern void complex_long_callee(__complex__ long double); -// CHECK: define arm_aapcs_vfpcc void @test_complex_long(double %{{.*}}, double %{{.*}}) +// CHECK: define arm_aapcs_vfpcc void @test_complex_long({ double, double } %{{.*}}) void test_complex_long(__complex__ long double cd) { complex_callee(cd); } @@ -72,7 +72,7 @@ float f3; float f4; }; -// CHECK: define arm_aapcs_vfpcc void @test_big([5 x i32] %{{.*}}) +// CHECK: define arm_aapcs_vfpcc void @test_big({ [5 x i32] } %{{.*}}) // CHECK64: define void @test_big(%struct.big_struct* %{{.*}}) // CHECK64: call void @llvm.memcpy // CHECK64: call void @big_callee(%struct.big_struct* @@ -88,7 +88,7 @@ float f1; int i2; }; -// CHECK: define arm_aapcs_vfpcc void @test_hetero([2 x i32] %{{.*}}) +// CHECK: define arm_aapcs_vfpcc void @test_hetero({ [2 x i32] } %{{.*}}) // CHECK64: define void @test_hetero(i64 %{{.*}}) extern void hetero_callee(struct heterogeneous_struct); void test_hetero(struct heterogeneous_struct arg) { @@ -96,7 +96,7 @@ } // Neon multi-vector types are homogeneous aggregates. -// CHECK: define arm_aapcs_vfpcc <16 x i8> @f0(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}) +// CHECK: define arm_aapcs_vfpcc <16 x i8> @f0(%struct.int8x16x4_t %{{.*}}) // CHECK64: define <16 x i8> @f0(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}) int8x16_t f0(int8x16x4_t v4) { return vaddq_s8(v4.val[0], v4.val[3]); @@ -110,7 +110,7 @@ int32x2_t v3; int16x4_t v4; }; -// CHECK: define arm_aapcs_vfpcc void @test_neon(<8 x i8> %{{.*}}, <8 x i8> %{{.*}}, <2 x i32> %{{.*}}, <4 x i16> %{{.*}}) +// CHECK: define arm_aapcs_vfpcc void @test_neon(%struct.neon_struct %{{.*}}) // CHECK64: define void @test_neon(<8 x i8> %{{.*}}, <8 x i8> %{{.*}}, <2 x i32> %{{.*}}, <4 x i16> %{{.*}}) extern void neon_callee(struct neon_struct); void test_neon(struct neon_struct arg) { @@ -125,12 +125,12 @@ // CHECK: define arm_aapcs_vfpcc void @test_vfp_stack_gpr_split_1(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, i32 %j, i64 %k, i32 %l) void test_vfp_stack_gpr_split_1(double a, double b, double c, double d, double e, double f, double g, double h, double i, int j, long long k, int l) {} -// CHECK: define arm_aapcs_vfpcc void @test_vfp_stack_gpr_split_2(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, i32 %j, [3 x i32], i64 %k.coerce0, i32 %k.coerce1) +// CHECK: define arm_aapcs_vfpcc void @test_vfp_stack_gpr_split_2(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, i32 %j, [3 x i32], %struct.struct_long_long_int %k.coerce) void test_vfp_stack_gpr_split_2(double a, double b, double c, double d, double e, double f, double g, double h, double i, int j, struct_long_long_int k) {} -// CHECK: define arm_aapcs_vfpcc void @test_vfp_stack_gpr_split_3(%struct.struct_long_long_int* noalias sret %agg.result, double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, [3 x i32], i64 %k.coerce0, i32 %k.coerce1) +// CHECK: define arm_aapcs_vfpcc void @test_vfp_stack_gpr_split_3(%struct.struct_long_long_int* noalias sret %agg.result, double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, [3 x i32], %struct.struct_long_long_int %k.coerce) struct_long_long_int test_vfp_stack_gpr_split_3(double a, double b, double c, double d, double e, double f, double g, double h, double i, struct_long_long_int k) {} typedef struct { int a; int b:4; int c; } struct_int_bitfield_int; -// CHECK: define arm_aapcs_vfpcc void @test_test_vfp_stack_gpr_split_bitfield(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, i32 %j, i32 %k, [2 x i32], i32 %l.coerce0, i8 %l.coerce1, i32 %l.coerce2) +// CHECK: define arm_aapcs_vfpcc void @test_test_vfp_stack_gpr_split_bitfield(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, i32 %j, i32 %k, [2 x i32], %struct.struct_int_bitfield_int %l.coerce) void test_test_vfp_stack_gpr_split_bitfield(double a, double b, double c, double d, double e, double f, double g, double h, double i, int j, int k, struct_int_bitfield_int l) {} Index: test/CodeGen/arm-homogenous.c =================================================================== --- test/CodeGen/arm-homogenous.c +++ test/CodeGen/arm-homogenous.c @@ -22,7 +22,7 @@ void test_union_with_first_floats(void) { takes_union_with_first_floats(g_u_f); } -// CHECK: declare arm_aapcs_vfpcc void @takes_union_with_first_floats([4 x i32]) +// CHECK: declare arm_aapcs_vfpcc void @takes_union_with_first_floats({ [4 x i32] }) void test_return_union_with_first_floats(void) { g_u_f = returns_union_with_first_floats(); @@ -42,7 +42,7 @@ void test_union_with_non_first_floats(void) { takes_union_with_non_first_floats(g_u_nf_f); } -// CHECK: declare arm_aapcs_vfpcc void @takes_union_with_non_first_floats([4 x i32]) +// CHECK: declare arm_aapcs_vfpcc void @takes_union_with_non_first_floats({ [4 x i32] }) void test_return_union_with_non_first_floats(void) { g_u_nf_f = returns_union_with_non_first_floats(); @@ -62,7 +62,7 @@ void test_struct_with_union_with_first_floats(void) { takes_struct_with_union_with_first_floats(g_s_f); } -// CHECK: declare arm_aapcs_vfpcc void @takes_struct_with_union_with_first_floats([5 x i32]) +// CHECK: declare arm_aapcs_vfpcc void @takes_struct_with_union_with_first_floats({ [5 x i32] }) void test_return_struct_with_union_with_first_floats(void) { g_s_f = returns_struct_with_union_with_first_floats(); @@ -82,7 +82,7 @@ void test_struct_with_union_with_non_first_floats(void) { takes_struct_with_union_with_non_first_floats(g_s_nf_f); } -// CHECK: declare arm_aapcs_vfpcc void @takes_struct_with_union_with_non_first_floats([5 x i32]) +// CHECK: declare arm_aapcs_vfpcc void @takes_struct_with_union_with_non_first_floats({ [5 x i32] }) void test_return_struct_with_union_with_non_first_floats(void) { g_s_nf_f = returns_struct_with_union_with_non_first_floats(); @@ -108,9 +108,9 @@ void test_struct_with_fundamental_elems(void) { takes_struct_with_fundamental_elems(g_s); -// CHECK: call arm_aapcs_vfpcc void @takes_struct_with_fundamental_elems(float {{.*}}, float {{.*}}, float{{.*}}, float {{.*}}) +// CHECK: call arm_aapcs_vfpcc void @takes_struct_with_fundamental_elems(%struct.struct_with_fundamental_elems {{.*}}) } -// CHECK: declare arm_aapcs_vfpcc void @takes_struct_with_fundamental_elems(float, float, float, float) +// CHECK: declare arm_aapcs_vfpcc void @takes_struct_with_fundamental_elems(%struct.struct_with_fundamental_elems) void test_return_struct_with_fundamental_elems(void) { g_s = returns_struct_with_fundamental_elems(); @@ -129,9 +129,9 @@ void test_struct_with_array(void) { takes_struct_with_array(g_s_a); -// CHECK: call arm_aapcs_vfpcc void @takes_struct_with_array(float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}) +// CHECK: call arm_aapcs_vfpcc void @takes_struct_with_array(%struct.struct_with_array {{.*}}) } -// CHECK: declare arm_aapcs_vfpcc void @takes_struct_with_array(float, float, float, float) +// CHECK: declare arm_aapcs_vfpcc void @takes_struct_with_array(%struct.struct_with_array) void test_return_struct_with_array(void) { g_s_a = returns_struct_with_array(); @@ -151,9 +151,9 @@ void test_union_with_struct_with_fundamental_elems(void) { takes_union_with_struct_with_fundamental_elems(g_u_s_fe); -// CHECK: call arm_aapcs_vfpcc void @takes_union_with_struct_with_fundamental_elems(float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}) +// CHECK: call arm_aapcs_vfpcc void @takes_union_with_struct_with_fundamental_elems(%union.union_with_struct_with_fundamental_elems {{.*}}) } -// CHECK: declare arm_aapcs_vfpcc void @takes_union_with_struct_with_fundamental_elems(float, float, float, float) +// CHECK: declare arm_aapcs_vfpcc void @takes_union_with_struct_with_fundamental_elems(%union.union_with_struct_with_fundamental_elems) void test_return_union_with_struct_with_fundamental_elems(void) { g_u_s_fe = returns_union_with_struct_with_fundamental_elems(); @@ -174,7 +174,7 @@ void test_struct_of_four_doubles(void) { // CHECK: test_struct_of_four_doubles -// CHECK: call arm_aapcs_vfpcc void @takes_struct_of_four_doubles(double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, [6 x float] undef, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}) +// CHECK: call arm_aapcs_vfpcc void @takes_struct_of_four_doubles(double {{.*}}, %struct.struct_of_four_doubles {{.*}}, %struct.struct_of_four_doubles {{.*}}, double {{.*}}) // CHECK64: test_struct_of_four_doubles // CHECK64: call void @takes_struct_of_four_doubles(double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, [3 x float] undef, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}) // CHECK64-AAPCS: test_struct_of_four_doubles @@ -186,14 +186,14 @@ void test_struct_of_four_doubles_variadic(void) { // CHECK: test_struct_of_four_doubles_variadic -// CHECK: call arm_aapcs_vfpcc void (double, [4 x i64], [4 x i64], double, ...)* @takes_struct_of_four_doubles_variadic(double {{.*}}, [4 x i64] {{.*}}, [4 x i64] {{.*}}, double {{.*}}) +// CHECK: call arm_aapcs_vfpcc void (double, { [4 x i64] }, { [4 x i64] }, double, ...)* @takes_struct_of_four_doubles_variadic(double {{.*}}, { [4 x i64] } {{.*}}, { [4 x i64] } {{.*}}, double {{.*}}) takes_struct_of_four_doubles_variadic(3.0, g_s4d, g_s4d, 4.0); } extern void takes_struct_with_backfill(float f1, double a, float f2, struct_of_four_doubles b, struct_of_four_doubles c, double d); void test_struct_with_backfill(void) { // CHECK: test_struct_with_backfill -// CHECK: call arm_aapcs_vfpcc void @takes_struct_with_backfill(float {{.*}}, double {{.*}}, float {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, [4 x float] undef, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}) +// CHECK: call arm_aapcs_vfpcc void @takes_struct_with_backfill(float {{.*}}, double {{.*}}, float {{.*}}, %struct.struct_of_four_doubles {{.*}}, %struct.struct_of_four_doubles {{.*}}, double {{.*}}) takes_struct_with_backfill(3.0, 3.1, 3.2, g_s4d, g_s4d, 4.0); } @@ -210,7 +210,7 @@ void test_struct_of_vecs(void) { // CHECK: test_struct_of_vecs -// CHECK: call arm_aapcs_vfpcc void @takes_struct_of_vecs(double {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, [6 x float] undef, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, double {{.*}}) +// CHECK: call arm_aapcs_vfpcc void @takes_struct_of_vecs(double {{.*}}, %struct.struct_of_vecs {{.*}}, %struct.struct_of_vecs {{.*}}, double {{.*}}) // CHECK64: test_struct_of_vecs // CHECK64: call void @takes_struct_of_vecs(double {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, [3 x float] undef, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, double {{.*}}) // CHECK64-AAPCS: test_struct_of_vecs