Index: lib/CodeGen/CGCall.cpp =================================================================== --- lib/CodeGen/CGCall.cpp +++ lib/CodeGen/CGCall.cpp @@ -158,6 +158,23 @@ return CC_C; } +static bool isAAPCSVFP(const CGFunctionInfo &FI, const TargetInfo &Target) { + switch (FI.getEffectiveCallingConvention()) { + case llvm::CallingConv::C: + switch (Target.getTriple().getEnvironment()) { + case llvm::Triple::EABIHF: + case llvm::Triple::GNUEABIHF: + return true; + default: + return false; + } + case llvm::CallingConv::ARM_AAPCS_VFP: + return true; + default: + return false; + } +} + /// Arrange the argument and result information for a call to an /// unknown C++ non-static member function of the given abstract type. /// (Zero value of RD means we don't have any meaningful "this" argument type, @@ -990,8 +1007,11 @@ // If the coerce-to type is a first class aggregate, flatten it. Either // way is semantically identical, but fast-isel and the optimizer // generally likes scalar values better than FCAs. + // We cannot do this for functions using the AAPCS calling convention, + // as structures are treated differently by that calling convention. llvm::Type *argType = argAI.getCoerceToType(); - if (llvm::StructType *st = dyn_cast(argType)) { + llvm::StructType *st = dyn_cast(argType); + if (st && !isAAPCSVFP(FI, getTarget())) { for (unsigned i = 0, e = st->getNumElements(); i != e; ++i) argTypes.push_back(st->getElementType(i)); } else { @@ -1193,14 +1213,15 @@ else if (ParamType->isUnsignedIntegerOrEnumerationType()) Attrs.addAttribute(llvm::Attribute::ZExt); // FALL THROUGH - case ABIArgInfo::Direct: + case ABIArgInfo::Direct: { if (AI.getInReg()) Attrs.addAttribute(llvm::Attribute::InReg); // FIXME: handle sseregparm someday... - if (llvm::StructType *STy = - dyn_cast(AI.getCoerceToType())) { + llvm::StructType *STy = + dyn_cast(AI.getCoerceToType()); + if (!isAAPCSVFP(FI, getTarget()) && STy) { unsigned Extra = STy->getNumElements()-1; // 1 will be added below. if (Attrs.hasAttributes()) for (unsigned I = 0; I < Extra; ++I) @@ -1209,7 +1230,7 @@ Index += Extra; } break; - + } case ABIArgInfo::Indirect: if (AI.getInReg()) Attrs.addAttribute(llvm::Attribute::InReg); @@ -1468,8 +1489,10 @@ // If the coerce-to type is a first class aggregate, we flatten it and // pass the elements. Either way is semantically identical, but fast-isel // and the optimizer generally likes scalar values better than FCAs. + // We cannot do this for functions using the AAPCS calling convention, + // as structures are treated differently by that calling convention. llvm::StructType *STy = dyn_cast(ArgI.getCoerceToType()); - if (STy && STy->getNumElements() > 1) { + if (!isAAPCSVFP(FI, getTarget()) && STy && STy->getNumElements() > 1) { uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(STy); llvm::Type *DstTy = cast(Ptr->getType())->getElementType(); @@ -2705,8 +2728,11 @@ // If the coerce-to type is a first class aggregate, we flatten it and // pass the elements. Either way is semantically identical, but fast-isel // and the optimizer generally likes scalar values better than FCAs. - if (llvm::StructType *STy = - dyn_cast(ArgInfo.getCoerceToType())) { + // We cannot do this for functions using the AAPCS calling convention, + // as structures are treated differently by that calling convention. + llvm::StructType *STy = + dyn_cast(ArgInfo.getCoerceToType()); + if (STy && !isAAPCSVFP(CallInfo, getTarget())) { llvm::Type *SrcTy = cast(SrcPtr->getType())->getElementType(); uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(SrcTy); Index: lib/CodeGen/TargetInfo.cpp =================================================================== --- lib/CodeGen/TargetInfo.cpp +++ lib/CodeGen/TargetInfo.cpp @@ -3199,7 +3199,7 @@ private: ABIArgInfo classifyReturnType(QualType RetTy, bool isVariadic) const; - ABIArgInfo classifyArgumentType(QualType RetTy, bool &IsHA, bool isVariadic, + ABIArgInfo classifyArgumentType(QualType RetTy, bool isVariadic, bool &IsCPRC) const; bool isIllegalVectorType(QualType Ty) const; @@ -3305,22 +3305,10 @@ it != ie; ++it) { unsigned PreAllocationVFPs = AllocatedVFPs; unsigned PreAllocationGPRs = AllocatedGPRs; - bool IsHA = false; bool IsCPRC = false; // 6.1.2.3 There is one VFP co-processor register class using registers // s0-s15 (d0-d7) for passing arguments. - it->info = classifyArgumentType(it->type, IsHA, FI.isVariadic(), IsCPRC); - assert((IsCPRC || !IsHA) && "Homogeneous aggregates must be CPRCs"); - // If we do not have enough VFP registers for the HA, any VFP registers - // that are unallocated are marked as unavailable. To achieve this, we add - // padding of (NumVFPs - PreAllocationVFP) floats. - // Note that IsHA will only be set when using the AAPCS-VFP calling convention, - // and the callee is not variadic. - if (IsHA && AllocatedVFPs > NumVFPs && PreAllocationVFPs < NumVFPs) { - llvm::Type *PaddingTy = llvm::ArrayType::get( - llvm::Type::getFloatTy(getVMContext()), NumVFPs - PreAllocationVFPs); - it->info = ABIArgInfo::getExpandWithPadding(false, PaddingTy); - } + it->info = classifyArgumentType(it->type, FI.isVariadic(), IsCPRC); // If we have allocated some arguments onto the stack (due to running // out of VFP registers), we cannot split an argument between GPRs and @@ -3517,8 +3505,7 @@ VFPRegs[i] = 0; } -ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool &IsHA, - bool isVariadic, +ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic, bool &IsCPRC) const { // We update number of allocated VFPs according to // 6.1.2.1 The following argument types are VFP CPRCs: @@ -3630,9 +3617,8 @@ Base->isSpecificBuiltinType(BuiltinType::LongDouble)); markAllocatedVFPs(2, Members * 2); } - IsHA = true; IsCPRC = true; - return ABIArgInfo::getExpand(); + return ABIArgInfo::getDirect(); } } Index: test/CodeGen/arm-aapcs-vfp.c =================================================================== --- test/CodeGen/arm-aapcs-vfp.c +++ test/CodeGen/arm-aapcs-vfp.c @@ -19,13 +19,13 @@ float f3; float f4; }; -// CHECK: define arm_aapcs_vfpcc %struct.homogeneous_struct @test_struct(float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}) +// CHECK: define arm_aapcs_vfpcc %struct.homogeneous_struct @test_struct(%struct.homogeneous_struct %{{.*}}) extern struct homogeneous_struct struct_callee(struct homogeneous_struct); struct homogeneous_struct test_struct(struct homogeneous_struct arg) { return struct_callee(arg); } -// CHECK: define arm_aapcs_vfpcc void @test_struct_variadic(%struct.homogeneous_struct* {{.*}}, [4 x i32] %{{.*}}, ...) +// CHECK: define arm_aapcs_vfpcc void @test_struct_variadic(%struct.homogeneous_struct* {{.*}}, ...) struct homogeneous_struct test_struct_variadic(struct homogeneous_struct arg, ...) { return struct_callee(arg); } @@ -33,21 +33,21 @@ struct nested_array { double d[4]; }; -// CHECK: define arm_aapcs_vfpcc void @test_array(double %{{.*}}, double %{{.*}}, double %{{.*}}, double %{{.*}}) +// CHECK: define arm_aapcs_vfpcc void @test_array(%struct.nested_array %{{.*}}) extern void array_callee(struct nested_array); void test_array(struct nested_array arg) { array_callee(arg); } extern void complex_callee(__complex__ double); -// CHECK: define arm_aapcs_vfpcc void @test_complex(double %{{.*}}, double %{{.*}}) +// CHECK: define arm_aapcs_vfpcc void @test_complex({ double, double } %{{.*}}) void test_complex(__complex__ double cd) { complex_callee(cd); } // Long double is the same as double on AAPCS, it should be homogeneous. extern void complex_long_callee(__complex__ long double); -// CHECK: define arm_aapcs_vfpcc void @test_complex_long(double %{{.*}}, double %{{.*}}) +// CHECK: define arm_aapcs_vfpcc void @test_complex_long({ double, double } %{{.*}}) void test_complex_long(__complex__ long double cd) { complex_callee(cd); } @@ -61,7 +61,7 @@ float f3; float f4; }; -// CHECK: define arm_aapcs_vfpcc void @test_big([5 x i32] %{{.*}}) +// CHECK: define arm_aapcs_vfpcc void @test_big({ [5 x i32] } %{{.*}}) extern void big_callee(struct big_struct); void test_big(struct big_struct arg) { big_callee(arg); @@ -74,14 +74,14 @@ float f1; int i2; }; -// CHECK: define arm_aapcs_vfpcc void @test_hetero([2 x i32] %{{.*}}) +// CHECK: define arm_aapcs_vfpcc void @test_hetero({ [2 x i32] } %{{.*}}) extern void hetero_callee(struct heterogeneous_struct); void test_hetero(struct heterogeneous_struct arg) { hetero_callee(arg); } // Neon multi-vector types are homogeneous aggregates. -// CHECK: define arm_aapcs_vfpcc <16 x i8> @f0(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}) +// CHECK: define arm_aapcs_vfpcc <16 x i8> @f0(%struct.int8x16x4_t %{{.*}}) int8x16_t f0(int8x16x4_t v4) { return vaddq_s8(v4.val[0], v4.val[3]); } @@ -94,7 +94,7 @@ int32x2_t v3; int16x4_t v4; }; -// CHECK: define arm_aapcs_vfpcc void @test_neon(<8 x i8> %{{.*}}, <8 x i8> %{{.*}}, <2 x i32> %{{.*}}, <4 x i16> %{{.*}}) +// CHECK: define arm_aapcs_vfpcc void @test_neon(%struct.neon_struct %{{.*}}) extern void neon_callee(struct neon_struct); void test_neon(struct neon_struct arg) { neon_callee(arg); @@ -108,8 +108,8 @@ // CHECK: define arm_aapcs_vfpcc void @test_vfp_stack_gpr_split_1(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, i32 %j, i64 %k, i32 %l) void test_vfp_stack_gpr_split_1(double a, double b, double c, double d, double e, double f, double g, double h, double i, int j, long long k, int l) {} -// CHECK: define arm_aapcs_vfpcc void @test_vfp_stack_gpr_split_2(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, i32 %j, [3 x i32], i64 %k.0, i32 %k.1) +// CHECK: define arm_aapcs_vfpcc void @test_vfp_stack_gpr_split_2(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, i32 %j, { [2 x i64] } %{{.*}}) void test_vfp_stack_gpr_split_2(double a, double b, double c, double d, double e, double f, double g, double h, double i, int j, struct_long_long_int k) {} -// CHECK: define arm_aapcs_vfpcc void @test_vfp_stack_gpr_split_3(%struct.struct_long_long_int* noalias sret %agg.result, double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, [3 x i32], i64 %k.0, i32 %k.1) +// CHECK: define arm_aapcs_vfpcc void @test_vfp_stack_gpr_split_3(%struct.struct_long_long_int* noalias sret %agg.result, double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, { [2 x i64] } %{{.*}}) struct_long_long_int test_vfp_stack_gpr_split_3(double a, double b, double c, double d, double e, double f, double g, double h, double i, struct_long_long_int k) {} Index: test/CodeGen/arm-homogenous.c =================================================================== --- test/CodeGen/arm-homogenous.c +++ test/CodeGen/arm-homogenous.c @@ -17,7 +17,7 @@ void test_union_with_first_floats(void) { takes_union_with_first_floats(g_u_f); } -// CHECK: declare arm_aapcs_vfpcc void @takes_union_with_first_floats([4 x i32]) +// CHECK: declare arm_aapcs_vfpcc void @takes_union_with_first_floats({ [4 x i32] }) void test_return_union_with_first_floats(void) { g_u_f = returns_union_with_first_floats(); @@ -37,7 +37,7 @@ void test_union_with_non_first_floats(void) { takes_union_with_non_first_floats(g_u_nf_f); } -// CHECK: declare arm_aapcs_vfpcc void @takes_union_with_non_first_floats([4 x i32]) +// CHECK: declare arm_aapcs_vfpcc void @takes_union_with_non_first_floats({ [4 x i32] }) void test_return_union_with_non_first_floats(void) { g_u_nf_f = returns_union_with_non_first_floats(); @@ -57,7 +57,7 @@ void test_struct_with_union_with_first_floats(void) { takes_struct_with_union_with_first_floats(g_s_f); } -// CHECK: declare arm_aapcs_vfpcc void @takes_struct_with_union_with_first_floats([5 x i32]) +// CHECK: declare arm_aapcs_vfpcc void @takes_struct_with_union_with_first_floats({ [5 x i32] }) void test_return_struct_with_union_with_first_floats(void) { g_s_f = returns_struct_with_union_with_first_floats(); @@ -77,7 +77,7 @@ void test_struct_with_union_with_non_first_floats(void) { takes_struct_with_union_with_non_first_floats(g_s_nf_f); } -// CHECK: declare arm_aapcs_vfpcc void @takes_struct_with_union_with_non_first_floats([5 x i32]) +// CHECK: declare arm_aapcs_vfpcc void @takes_struct_with_union_with_non_first_floats({ [5 x i32] }) void test_return_struct_with_union_with_non_first_floats(void) { g_s_nf_f = returns_struct_with_union_with_non_first_floats(); @@ -103,9 +103,9 @@ void test_struct_with_fundamental_elems(void) { takes_struct_with_fundamental_elems(g_s); -// CHECK: call arm_aapcs_vfpcc void @takes_struct_with_fundamental_elems(float {{.*}}, float {{.*}}, float{{.*}}, float {{.*}}) +// CHECK: call arm_aapcs_vfpcc void @takes_struct_with_fundamental_elems(%struct.struct_with_fundamental_elems {{.*}}) } -// CHECK: declare arm_aapcs_vfpcc void @takes_struct_with_fundamental_elems(float, float, float, float) +// CHECK: declare arm_aapcs_vfpcc void @takes_struct_with_fundamental_elems(%struct.struct_with_fundamental_elems) void test_return_struct_with_fundamental_elems(void) { g_s = returns_struct_with_fundamental_elems(); @@ -124,9 +124,9 @@ void test_struct_with_array(void) { takes_struct_with_array(g_s_a); -// CHECK: call arm_aapcs_vfpcc void @takes_struct_with_array(float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}) +// CHECK: call arm_aapcs_vfpcc void @takes_struct_with_array(%struct.struct_with_array {{.*}}) } -// CHECK: declare arm_aapcs_vfpcc void @takes_struct_with_array(float, float, float, float) +// CHECK: declare arm_aapcs_vfpcc void @takes_struct_with_array(%struct.struct_with_array) void test_return_struct_with_array(void) { g_s_a = returns_struct_with_array(); @@ -146,9 +146,9 @@ void test_union_with_struct_with_fundamental_elems(void) { takes_union_with_struct_with_fundamental_elems(g_u_s_fe); -// CHECK: call arm_aapcs_vfpcc void @takes_union_with_struct_with_fundamental_elems(float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}) +// CHECK: call arm_aapcs_vfpcc void @takes_union_with_struct_with_fundamental_elems(%union.union_with_struct_with_fundamental_elems {{.*}}) } -// CHECK: declare arm_aapcs_vfpcc void @takes_union_with_struct_with_fundamental_elems(float, float, float, float) +// CHECK: declare arm_aapcs_vfpcc void @takes_union_with_struct_with_fundamental_elems(%union.union_with_struct_with_fundamental_elems) void test_return_union_with_struct_with_fundamental_elems(void) { g_u_s_fe = returns_union_with_struct_with_fundamental_elems(); @@ -169,7 +169,7 @@ void test_struct_of_four_doubles(void) { // CHECK: test_struct_of_four_doubles -// CHECK: call arm_aapcs_vfpcc void @takes_struct_of_four_doubles(double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, [6 x float] undef, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}) +// CHECK: call arm_aapcs_vfpcc void @takes_struct_of_four_doubles(double {{.*}}, %struct.struct_of_four_doubles {{.*}}, %struct.struct_of_four_doubles {{.*}}, double {{.*}}) takes_struct_of_four_doubles(3.0, g_s4d, g_s4d, 4.0); } @@ -177,14 +177,14 @@ void test_struct_of_four_doubles_variadic(void) { // CHECK: test_struct_of_four_doubles_variadic -// CHECK: call arm_aapcs_vfpcc void (double, [4 x i64], [4 x i64], double, ...)* @takes_struct_of_four_doubles_variadic(double {{.*}}, [4 x i64] {{.*}}, [4 x i64] {{.*}}, double {{.*}}) +// CHECK: call arm_aapcs_vfpcc void (double, { [4 x i64] }, { [4 x i64] }, double, ...)* @takes_struct_of_four_doubles_variadic(double {{.*}}, { [4 x i64] } {{.*}}, { [4 x i64] } {{.*}}, double {{.*}}) takes_struct_of_four_doubles_variadic(3.0, g_s4d, g_s4d, 4.0); } extern void takes_struct_with_backfill(float f1, double a, float f2, struct_of_four_doubles b, struct_of_four_doubles c, double d); void test_struct_with_backfill(void) { // CHECK: test_struct_with_backfill -// CHECK: call arm_aapcs_vfpcc void @takes_struct_with_backfill(float {{.*}}, double {{.*}}, float {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, [4 x float] undef, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}) +// CHECK: call arm_aapcs_vfpcc void @takes_struct_with_backfill(float {{.*}}, double {{.*}}, float {{.*}}, %struct.struct_of_four_doubles {{.*}}, %struct.struct_of_four_doubles {{.*}}, double {{.*}}) takes_struct_with_backfill(3.0, 3.1, 3.2, g_s4d, g_s4d, 4.0); } @@ -201,7 +201,7 @@ void test_struct_of_vecs(void) { // CHECK: test_struct_of_vecs -// CHECK: call arm_aapcs_vfpcc void @takes_struct_of_vecs(double {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, [6 x float] undef, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, double {{.*}}) +// CHECK: call arm_aapcs_vfpcc void @takes_struct_of_vecs(double {{.*}}, %struct.struct_of_vecs {{.*}}, %struct.struct_of_vecs {{.*}}, double {{.*}}) takes_struct_of_vecs(3.0, g_vec, g_vec, 4.0); }