diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -5934,7 +5934,10 @@ return ABIArgInfo::getDirect(Ty, 0, nullptr, false); } } - return ABIArgInfo::getDirect(nullptr, 0, nullptr, false); + bool NeedsStackAlignment = getContext().getTypeAlignInChars(Ty) != + getContext().getTypeAlignInChars(Base); + return ABIArgInfo::getDirect(nullptr, /*Offset=*/0, /*Padding=*/nullptr, + /*CanBeFlattened=*/false, NeedsStackAlignment); } ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic, @@ -6000,9 +6003,13 @@ uint64_t Members = 0; if (isHomogeneousAggregate(Ty, Base, Members)) { assert(Base && Members <= 4 && "unexpected homogeneous aggregate"); + bool NeedsStackAlignment = getContext().getTypeAlignInChars(Ty) != + getContext().getTypeAlignInChars(Base); llvm::Type *Ty = llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members); - return ABIArgInfo::getDirect(Ty, 0, nullptr, false); + return ABIArgInfo::getDirect(Ty, /*Offset=*/0, /*Padding=*/nullptr, + /*CanBeFlattened=*/false, + NeedsStackAlignment); } } diff --git a/clang/test/CodeGen/arm-aapcs-vfp.c b/clang/test/CodeGen/arm-aapcs-vfp.c --- a/clang/test/CodeGen/arm-aapcs-vfp.c +++ b/clang/test/CodeGen/arm-aapcs-vfp.c @@ -147,3 +147,17 @@ // is passed ByVal (due to being > 64 bytes), so the backend handles this instead. void test_vfp_stack_gpr_split_6(double a, double b, double c, double d, double e, double f, double g, double h, double i, int j, struct_seventeen_ints k) {} // CHECK: define arm_aapcs_vfpcc void @test_vfp_stack_gpr_split_6(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, i32 %j, %struct.struct_seventeen_ints* byval(%struct.struct_seventeen_ints) align 4 %k) + +// Make sure over-alignment information is propagated to the backend properly +typedef struct { + __attribute__((__aligned__(8))) float v[2]; +} hfa_align; +// CHECK: define arm_aapcs_vfpcc float @test_hfa_align_arg(%struct.hfa_align alignstack(8) %h1.coerce) #0 +float test_hfa_align_arg(hfa_align h1) { + return h1.v[0]; +} +// CHECK: %call = call arm_aapcs_vfpcc float @test_hfa_align_arg(%struct.hfa_align alignstack(8) %1) #4 +float test_hfa_align_call() { + hfa_align h = {1.0, 2.0}; + return test_hfa_align_arg(h); +} diff --git a/llvm/lib/Target/ARM/ARMCallingConv.cpp b/llvm/lib/Target/ARM/ARMCallingConv.cpp --- a/llvm/lib/Target/ARM/ARMCallingConv.cpp +++ b/llvm/lib/Target/ARM/ARMCallingConv.cpp @@ -266,7 +266,10 @@ // possible. (E.g. an incoming i64 would have starting Align of 8, but we'll // be allocating a bunch of i32 slots). unsigned RestAlign = std::min(Align, Size); - + if (ArgFlags.getStackAlign()) { + const llvm::Align ArgStackAlign(ArgFlags.getStackAlign()); + Align = std::max(Align, unsigned(ArgStackAlign.value())); + } for (auto &It : PendingMembers) { It.convertToMem(State.AllocateStack(Size, Align)); State.addLoc(It); diff --git a/llvm/test/CodeGen/ARM/aapcs-hfa-code.ll b/llvm/test/CodeGen/ARM/aapcs-hfa-code.ll --- a/llvm/test/CodeGen/ARM/aapcs-hfa-code.ll +++ b/llvm/test/CodeGen/ARM/aapcs-hfa-code.ll @@ -3,6 +3,8 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" +%struct.hfa_align = type { [2 x float] } + define arm_aapcs_vfpcc void @test_1float({ float } %a) { call arm_aapcs_vfpcc void @test_1float({ float } { float 1.0 }) ret void @@ -104,3 +106,73 @@ ret void } + +; Over-aligned HFA argument placed on register - one element per register +define arm_aapcs_vfpcc float @test_hfa_align_reg(%struct.hfa_align alignstack(8) %h1.coerce) local_unnamed_addr #3 { +entry: +; CHECK-LABEL: test_hfa_align_reg: +; CHECK-DAG: bx lr + +; CHECK-M4F-LABEL: test_hfa_align_reg: +; CHECK-M4F-DAG: bx lr + + %h1.coerce.fca.0.0.extract = extractvalue %struct.hfa_align %h1.coerce, 0, 0 + ret float %h1.coerce.fca.0.0.extract +} + +; Call with over-align HFA argument placed on registers - one element per register +define arm_aapcs_vfpcc float @test_hfa_align_reg_call() local_unnamed_addr #3 { +entry: +; CHECK-LABEL: test_hfa_align_reg_call: +; CHECK-DAG: vmov.f32 s0, #1.000000e+00 +; CHECK-DAG: vmov.f32 s1, #2.000000e+00 +; CHECK-DAG: bl test_hfa_align_reg + +; CHECK-M4F-LABEL: test_hfa_align_reg_call: +; CHECK-M4F-DAG: vmov.f32 s0, #1.000000e+00 +; CHECK-M4F-DAG: vmov.f32 s1, #2.000000e+00 +; CHECK-M4F-DAG: bl test_hfa_align_reg + + %call = call arm_aapcs_vfpcc float @test_hfa_align_reg(%struct.hfa_align alignstack(8) { [2 x float] [float 1.000000e+00, float 2.000000e+00] }) #5 + ret float %call +} + +; Over-aligned HFA argument placed on the stack - stack round up to alignment +define arm_aapcs_vfpcc float @test_hfa_align_stack(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, float %f1, %struct.hfa_align alignstack(8) %h1.coerce) local_unnamed_addr #3 { +entry: +; CHECK-LABEL: test_hfa_align_stack: +; CHECK-DAG: vldr s0, [sp, #8] +; CHECK-DAG: bx lr + +; CHECK-M4F-LABEL: test_hfa_align_stack: +; CHECK-M4F-DAG: vldr s0, [sp, #8] +; CHECK-M4F-DAG: bx lr + + %h1.coerce.fca.0.0.extract = extractvalue %struct.hfa_align %h1.coerce, 0, 0 + ret float %h1.coerce.fca.0.0.extract +} + +; Call with over-aligned HFA argument placed on the stack - stack round up to alignment +define arm_aapcs_vfpcc float @test_hfa_align_stack_call() local_unnamed_addr #3 { +entry: +; CHECK-LABEL: test_hfa_align_stack_call: +; CHECK-DAG: sub sp, sp, #16 +; CHECK-DAG: mov r0, #1073741824 +; CHECK-DAG: mov r1, #1065353216 +; CHECK-DAG: str r1, [sp, #8] +; CHECK-DAG: str r0, [sp, #12] +; CHECK-DAG: bl test_hfa_align_stack +; CHECK-DAG: add sp, sp, #16 + +; CHECK-M4F-LABEL: test_hfa_align_stack_call: +; CHECK-M4F-DAG: sub sp, #16 +; CHECK-M4F-DAG: mov.w r0, #1073741824 +; CHECK-M4F-DAG: mov.w r1, #1065353216 +; CHECK-M4F-DAG: strd r1, r0, [sp, #8] +; CHECK-M4F-DAG: bl test_hfa_align_stack +; CHECK-M4F-DAG: add sp, #16 + + %call = call arm_aapcs_vfpcc float @test_hfa_align_stack(double undef, double undef, double undef, double undef, double undef, double undef, double undef, double undef, float undef, %struct.hfa_align alignstack(8) { [2 x float] [float 1.000000e+00, float 2.000000e+00] }) #5 + ret float %call +} +