Index: clang/include/clang/CodeGen/CGFunctionInfo.h =================================================================== --- clang/include/clang/CodeGen/CGFunctionInfo.h +++ clang/include/clang/CodeGen/CGFunctionInfo.h @@ -94,12 +94,17 @@ llvm::Type *UnpaddedCoerceAndExpandType; // isCoerceAndExpand() }; union { - unsigned DirectOffset; // isDirect() || isExtend() - unsigned IndirectAlign; // isIndirect() + struct { + unsigned Offset; // isDirect() || isExtend() + unsigned Align; + } DirectAttr; + struct { + unsigned Align; // isIndirect() + unsigned AddrSpace; + } IndirectAttr; unsigned AllocaFieldIndex; // isInAlloca() }; Kind TheKind; - unsigned IndirectAddrSpace : 24; // isIndirect() bool PaddingInReg : 1; bool InAllocaSRet : 1; // isInAlloca() bool InAllocaIndirect : 1;// isInAlloca() @@ -126,19 +131,21 @@ public: ABIArgInfo(Kind K = Direct) - : TypeData(nullptr), PaddingType(nullptr), DirectOffset(0), TheKind(K), - IndirectAddrSpace(0), PaddingInReg(false), InAllocaSRet(false), + : TypeData(nullptr), PaddingType(nullptr), DirectAttr{0, 0}, TheKind(K), + PaddingInReg(false), InAllocaSRet(false), InAllocaIndirect(false), IndirectByVal(false), IndirectRealign(false), SRetAfterThis(false), InReg(false), CanBeFlattened(false), SignExt(false) {} static ABIArgInfo getDirect(llvm::Type *T = nullptr, unsigned Offset = 0, llvm::Type *Padding = nullptr, - bool CanBeFlattened = true) { + bool CanBeFlattened = true, + unsigned Align = 0) { auto AI = ABIArgInfo(Direct); AI.setCoerceToType(T); AI.setPaddingType(Padding); AI.setDirectOffset(Offset); + AI.setDirectAlign(Align); AI.setCanBeFlattened(CanBeFlattened); return AI; } @@ -154,6 +161,7 @@ AI.setCoerceToType(T); AI.setPaddingType(nullptr); AI.setDirectOffset(0); + AI.setDirectAlign(0); AI.setSignExt(true); return AI; } @@ -164,6 +172,7 @@ AI.setCoerceToType(T); AI.setPaddingType(nullptr); AI.setDirectOffset(0); + AI.setDirectAlign(0); AI.setSignExt(false); return AI; } @@ -299,11 +308,20 @@ // Direct/Extend accessors unsigned getDirectOffset() const { assert((isDirect() || isExtend()) && "Not a direct or extend kind"); - return DirectOffset; + return DirectAttr.Offset; } void setDirectOffset(unsigned Offset) { assert((isDirect() || isExtend()) && "Not a direct or extend kind"); - DirectOffset = Offset; + DirectAttr.Offset = Offset; + } + + unsigned getDirectAlign() const { + assert((isDirect() || isExtend()) && "Not a direct or extend kind"); + return DirectAttr.Align; + } + void setDirectAlign(unsigned Align) { + assert((isDirect() || isExtend()) && "Not a direct or extend kind"); + DirectAttr.Align = Align; } bool isSignExt() const { @@ -369,11 +387,11 @@ // Indirect accessors CharUnits getIndirectAlign() const { assert((isIndirect() || isIndirectAliased()) && "Invalid kind!"); - return CharUnits::fromQuantity(IndirectAlign); + return CharUnits::fromQuantity(IndirectAttr.Align); } void setIndirectAlign(CharUnits IA) { assert((isIndirect() || isIndirectAliased()) && "Invalid kind!"); - IndirectAlign = IA.getQuantity(); + IndirectAttr.Align = IA.getQuantity(); } bool getIndirectByVal() const { @@ -387,12 +405,12 @@ unsigned getIndirectAddrSpace() const { assert(isIndirectAliased() && "Invalid kind!"); - return IndirectAddrSpace; + return IndirectAttr.AddrSpace; } void setIndirectAddrSpace(unsigned AddrSpace) { assert(isIndirectAliased() && "Invalid kind!"); - IndirectAddrSpace = AddrSpace; + IndirectAttr.AddrSpace = AddrSpace; } bool getIndirectRealign() const { Index: clang/lib/CodeGen/CGCall.cpp =================================================================== --- clang/lib/CodeGen/CGCall.cpp +++ clang/lib/CodeGen/CGCall.cpp @@ -2363,6 +2363,7 @@ Attrs.addAttribute(llvm::Attribute::Nest); else if (AI.getInReg()) Attrs.addAttribute(llvm::Attribute::InReg); + Attrs.addStackAlignmentAttr(llvm::MaybeAlign(AI.getDirectAlign())); break; case ABIArgInfo::Indirect: { Index: clang/lib/CodeGen/TargetInfo.cpp =================================================================== --- clang/lib/CodeGen/TargetInfo.cpp +++ clang/lib/CodeGen/TargetInfo.cpp @@ -5673,8 +5673,19 @@ const Type *Base = nullptr; uint64_t Members = 0; if (isHomogeneousAggregate(Ty, Base, Members)) { + if (Kind != AArch64ABIInfo::AAPCS) + return ABIArgInfo::getDirect( + llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members)); + + // For alignment adjusted HFAs, cap the argument alignment to 16, leave it + // default otherwise. + unsigned Align = + getContext().getTypeUnadjustedAlignInChars(Ty).getQuantity(); + unsigned BaseAlign = getContext().getTypeAlignInChars(Base).getQuantity(); + Align = (Align > BaseAlign && Align >= 16) ? 16 : 0; return ABIArgInfo::getDirect( - llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members)); + llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members), 0, + nullptr, true, Align); } // Aggregates <= 16 bytes are passed directly in registers or on the stack. Index: clang/test/CodeGen/aarch64-args-hfa.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64-args-hfa.c @@ -0,0 +1,69 @@ +// RUN: %clang_cc1 -triple aarch64-none-eabi -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-AAPCS +// RUN: %clang_cc1 -triple arm64-apple-ios7.0 -target-abi darwinpcs -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-DARWIN +// RUN: %clang_cc1 -triple aarch64-linux-gnu -emit-llvm -o - -x c %s | FileCheck %s --check-prefixes=CHECK,CHECK-AAPCS + +typedef struct { + float v[2]; +} S0; + +// CHECK: define{{.*}} float @f0([2 x float] %h.coerce) +float f0(S0 h) { + return h.v[0]; +} + +// CHECK: define{{.*}} float @f0_call() +// CHECK: %call = call float @f0([2 x float] %1) +float f0_call() { + S0 h = {1.0f, 2.0f}; + return f0(h); +} +typedef struct { + double v[2]; +} S1; + +// CHECK: define{{.*}} double @f1([2 x double] %h.coerce) +double f1(S1 h) { + return h.v[0]; +} + +// CHECK: define{{.*}} double @f1_call() +// CHECK: %call = call double @f1([2 x double] %1 +double f1_call() { + S1 h = {1.0, 2.0}; + return f1(h); +} +typedef struct { + __attribute__((__aligned__(16))) double v[2]; +} S2; + +// CHECK-AAPCS: define{{.*}} double @f2([2 x double] alignstack(16) %h.coerce) +// CHECK-DARWIN: define{{.*}} double @f2([2 x double] %h.coerce) +double f2(S2 h) { + return h.v[0]; +} + +// CHECK: define{{.*}} double @f2_call() +// CHECK-AAPCS: %call = call double @f2([2 x double] alignstack(16) %1) +// CHECK-DARWIN: %call = call double @f2([2 x double] %1 +double f2_call() { + S2 h = {1.0, 2.0}; + return f2(h); +} + +typedef struct { + __attribute__((__aligned__(32))) double v[4]; +} S3; + +// CHECK-AAPCS: define{{.*}} double @f3([4 x double] alignstack(16) %h.coerce) +// CHECK-DARWIN: define{{.*}} double @f3([4 x double] %h.coerce) +double f3(S3 h) { + return h.v[0]; +} + +// CHECK: define{{.*}} double @f3_call() +// CHECK-AAPCS: %call = call double @f3([4 x double] alignstack(16) %1) +// CHECK-DARWIN: %call = call double @f3([4 x double] %1 +double f3_call() { + S3 h = {1.0, 2.0}; + return f3(h); +} Index: llvm/docs/LangRef.rst =================================================================== --- llvm/docs/LangRef.rst +++ llvm/docs/LangRef.rst @@ -1297,6 +1297,15 @@ undefined. Note that this does not refer to padding introduced by the type's storage representation. +``alignstack()`` + This indicates the alignment that should be considered by the backend when + assigning this parameter to a stack slot during calling convention + lowering. The enforcement of the specified alignment is target-dependent, + as target-specific calling convention rules may override this value. This + attribute serves the purpose of carrying language specific alignment + information that is not mapped to base types in the backend (for example, + over-alignment specification through language attributes). + .. _gc: Garbage Collector Strategy Names Index: llvm/include/llvm/IR/Argument.h =================================================================== --- llvm/include/llvm/IR/Argument.h +++ llvm/include/llvm/IR/Argument.h @@ -102,6 +102,8 @@ /// If this is a byval or inalloca argument, return its alignment. MaybeAlign getParamAlign() const; + MaybeAlign getParamStackAlign() const; + /// If this is a byval argument, return its type. Type *getParamByValType() const; Index: llvm/include/llvm/IR/Attributes.h =================================================================== --- llvm/include/llvm/IR/Attributes.h +++ llvm/include/llvm/IR/Attributes.h @@ -652,6 +652,9 @@ /// Return the alignment for the specified function parameter. MaybeAlign getParamAlignment(unsigned ArgNo) const; + /// Return the stack alignment for the specified function parameter. + MaybeAlign getParamStackAlignment(unsigned ArgNo) const; + /// Return the byval type for the specified function parameter. Type *getParamByValType(unsigned ArgNo) const; Index: llvm/include/llvm/IR/Function.h =================================================================== --- llvm/include/llvm/IR/Function.h +++ llvm/include/llvm/IR/Function.h @@ -479,6 +479,10 @@ return AttributeSets.getParamAlignment(ArgNo); } + MaybeAlign getParamStackAlign(unsigned ArgNo) const { + return AttributeSets.getParamStackAlignment(ArgNo); + } + /// Extract the byval type for a parameter. Type *getParamByValType(unsigned ArgNo) const { return AttributeSets.getParamByValType(ArgNo); Index: llvm/include/llvm/IR/InstrTypes.h =================================================================== --- llvm/include/llvm/IR/InstrTypes.h +++ llvm/include/llvm/IR/InstrTypes.h @@ -1722,6 +1722,10 @@ return Attrs.getParamAlignment(ArgNo); } + MaybeAlign getParamStackAlign(unsigned ArgNo) const { + return Attrs.getParamStackAlignment(ArgNo); + } + /// Extract the byval type for a call or parameter. Type *getParamByValType(unsigned ArgNo) const { Type *Ty = Attrs.getParamByValType(ArgNo); Index: llvm/lib/AsmParser/LLParser.cpp =================================================================== --- llvm/lib/AsmParser/LLParser.cpp +++ llvm/lib/AsmParser/LLParser.cpp @@ -1707,6 +1707,13 @@ B.addAlignmentAttr(Alignment); continue; } + case lltok::kw_alignstack: { + unsigned Alignment; + if (parseOptionalStackAlignment(Alignment)) + return true; + B.addStackAlignmentAttr(Alignment); + continue; + } case lltok::kw_byval: { Type *Ty; if (parseRequiredTypeAttr(Ty, lltok::kw_byval)) @@ -1769,7 +1776,6 @@ case lltok::kw_zeroext: B.addAttribute(Attribute::ZExt); break; case lltok::kw_immarg: B.addAttribute(Attribute::ImmArg); break; - case lltok::kw_alignstack: case lltok::kw_alwaysinline: case lltok::kw_argmemonly: case lltok::kw_builtin: Index: llvm/lib/CodeGen/GlobalISel/CallLowering.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -163,7 +163,9 @@ // For ByVal, alignment should be passed from FE. BE will guess if // this info is not there but there are cases it cannot get right. Align FrameAlign; - if (auto ParamAlign = FuncInfo.getParamAlign(OpIdx - 1)) + if (auto ParamAlign = FuncInfo.getParamStackAlign(OpIdx - 1)) + FrameAlign = *ParamAlign; + else if ((ParamAlign = FuncInfo.getParamAlign(OpIdx - 1))) FrameAlign = *ParamAlign; else FrameAlign = Align(getTLI()->getByValTypeAlignment(ElementTy, DL)); @@ -175,6 +177,9 @@ // swiftself, since it won't be passed in x0. if (Flags.isSwiftSelf()) Flags.setReturned(false); + + if (auto ParamAlign = FuncInfo.getParamStackAlign(OpIdx - 2)) + Flags.setOrigAlign(*ParamAlign); } template void Index: llvm/lib/CodeGen/SelectionDAG/FastISel.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1143,7 +1143,8 @@ if (NeedsRegBlock) Flags.setInConsecutiveRegs(); Flags.setOrigAlign(DL.getABITypeAlign(Arg.Ty)); - + if (Arg.Alignment) + Flags.setOrigAlign(*Arg.Alignment); CLI.OutVals.push_back(Arg.Val); CLI.OutFlags.push_back(Flags); } Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -9421,8 +9421,12 @@ // Certain targets (such as MIPS), may have a different ABI alignment // for a type depending on the context. Give the target a chance to // specify the alignment it wants. - const Align OriginalAlignment(getABIAlignmentForCallingConv(ArgTy, DL)); - + if (Args[i].Alignment) + Flags.setOrigAlign(*Args[i].Alignment); + else { + const Align OriginalAlignment(getABIAlignmentForCallingConv(ArgTy, DL)); + Flags.setOrigAlign(OriginalAlignment); + } if (Args[i].Ty->isPointerTy()) { Flags.setPointer(); Flags.setPointerAddrSpace( @@ -9495,7 +9499,6 @@ Flags.setNest(); if (NeedsRegBlock) Flags.setInConsecutiveRegs(); - Flags.setOrigAlign(OriginalAlignment); MVT PartVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(), CLI.CallConv, VT); @@ -10043,6 +10046,8 @@ if (NeedsRegBlock) Flags.setInConsecutiveRegs(); Flags.setOrigAlign(OriginalAlignment); + if (Arg.getParamStackAlign()) + Flags.setOrigAlign(*Arg.getParamStackAlign()); if (ArgCopyElisionCandidates.count(&Arg)) Flags.setCopyElisionCandidate(); if (Arg.hasAttribute(Attribute::Returned)) Index: llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -115,10 +115,13 @@ IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned); IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf); IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError); - Alignment = Call->getParamAlign(ArgIdx); + Alignment = Call->getParamStackAlign(ArgIdx); ByValType = nullptr; - if (IsByVal) + if (IsByVal) { ByValType = Call->getParamByValType(ArgIdx); + if (!Alignment) + Alignment = Call->getParamAlign(ArgIdx); + } PreallocatedType = nullptr; if (IsPreallocated) PreallocatedType = Call->getParamPreallocatedType(ArgIdx); Index: llvm/lib/IR/Attributes.cpp =================================================================== --- llvm/lib/IR/Attributes.cpp +++ llvm/lib/IR/Attributes.cpp @@ -1491,6 +1491,10 @@ return getAttributes(ArgNo + FirstArgIndex).getAlignment(); } +MaybeAlign AttributeList::getParamStackAlignment(unsigned ArgNo) const { + return getAttributes(ArgNo + FirstArgIndex).getStackAlignment(); +} + Type *AttributeList::getParamByValType(unsigned Index) const { return getAttributes(Index+FirstArgIndex).getByValType(); } Index: llvm/lib/IR/Function.cpp =================================================================== --- llvm/lib/IR/Function.cpp +++ llvm/lib/IR/Function.cpp @@ -198,6 +198,10 @@ return getParent()->getParamAlign(getArgNo()); } +MaybeAlign Argument::getParamStackAlign() const { + return getParent()->getParamStackAlign(getArgNo()); +} + Type *Argument::getParamByValType() const { assert(getType()->isPointerTy() && "Only pointers have byval types"); return getParent()->getParamByValType(getArgNo()); Index: llvm/lib/IR/Verifier.cpp =================================================================== --- llvm/lib/IR/Verifier.cpp +++ llvm/lib/IR/Verifier.cpp @@ -1627,7 +1627,6 @@ case Attribute::NoImplicitFloat: case Attribute::Naked: case Attribute::InlineHint: - case Attribute::StackAlignment: case Attribute::UWTable: case Attribute::NonLazyBind: case Attribute::ReturnsTwice: @@ -1669,7 +1668,7 @@ static bool isFuncOrArgAttr(Attribute::AttrKind Kind) { return Kind == Attribute::ReadOnly || Kind == Attribute::WriteOnly || Kind == Attribute::ReadNone || Kind == Attribute::NoFree || - Kind == Attribute::Preallocated; + Kind == Attribute::Preallocated || Kind == Attribute::StackAlignment; } void Verifier::verifyAttributeTypes(AttributeSet Attrs, bool IsFunction, Index: llvm/lib/Target/AArch64/AArch64CallingConvention.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64CallingConvention.cpp +++ llvm/lib/Target/AArch64/AArch64CallingConvention.cpp @@ -88,13 +88,8 @@ } unsigned Size = LocVT.getSizeInBits() / 8; - const Align StackAlign = - State.getMachineFunction().getDataLayout().getStackAlignment(); - const Align OrigAlign = ArgFlags.getNonZeroOrigAlign(); - const Align Alignment = std::min(OrigAlign, StackAlign); - for (auto &It : PendingMembers) { - It.convertToMem(State.AllocateStack(Size, std::max(Alignment, SlotAlign))); + It.convertToMem(State.AllocateStack(Size, SlotAlign)); State.addLoc(It); SlotAlign = Align(1); } @@ -197,7 +192,12 @@ State.AllocateReg(Reg); } - const Align SlotAlign = Subtarget.isTargetDarwin() ? Align(1) : Align(8); + const Align StackAlign = + State.getMachineFunction().getDataLayout().getStackAlignment(); + const Align OrigAlign = ArgFlags.getNonZeroOrigAlign(); + Align SlotAlign = std::min(OrigAlign, StackAlign); + if (!Subtarget.isTargetDarwin()) + SlotAlign = std::max(SlotAlign, Align(8)); return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, SlotAlign); } Index: llvm/test/Bitcode/compatibility.ll =================================================================== --- llvm/test/Bitcode/compatibility.ll +++ llvm/test/Bitcode/compatibility.ll @@ -550,6 +550,8 @@ ; CHECK: declare void @f.param.dereferenceable(i8* dereferenceable(4)) declare void @f.param.dereferenceable_or_null(i8* dereferenceable_or_null(4)) ; CHECK: declare void @f.param.dereferenceable_or_null(i8* dereferenceable_or_null(4)) +declare void @f.param.stack_align([2 x double] alignstack(16)) +; CHECK: declare void @f.param.stack_align([2 x double] alignstack(16)) ; Functions -- unnamed_addr and local_unnamed_addr declare void @f.unnamed_addr() unnamed_addr Index: llvm/test/CodeGen/AArch64/arm64-abi-hfa-args.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/arm64-abi-hfa-args.ll @@ -0,0 +1,33 @@ +; RUN: llc < %s -mtriple=arm64-none-eabi | FileCheck %s + +; Over-aligned HFA argument placed on register - one element per register +define double @test_hfa_align_arg_reg([2 x double] alignstack(16) %h.coerce) local_unnamed_addr #0 { +entry: +; CHECK-LABEL: test_hfa_align_arg_reg: +; CHECK-NOT: mov +; CHECK-NOT: ld +; CHECK: ret + %h.coerce.fca.0.extract = extractvalue [2 x double] %h.coerce, 0 + ret double %h.coerce.fca.0.extract +} + +; Call with over-aligned HFA argument placed on register - one element per register +define double @test_hfa_align_call_reg() local_unnamed_addr #0 { +entry: +; CHECK-LABEL: test_hfa_align_call_reg: +; CHECK-DAG: fmov d0, #1.00000000 +; CHECK-DAG: fmov d1, #2.00000000 +; CHECK: bl test_hfa_align_arg_reg + %call = call double @test_hfa_align_arg_reg([2 x double] alignstack(16) [double 1.000000e+00, double 2.000000e+00]) + ret double %call +} + +; Over-aligned HFA argument placed on stack - stack round up to alignment +define double @test_hfa_align_arg_stack(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, float %f, [2 x double] alignstack(16) %h.coerce) local_unnamed_addr #0 { +entry: +; CHECK-LABEL: test_hfa_align_arg_stack: +; CHECK: ldr d0, [sp, #16] +; CHECK-NEXT: ret + %h.coerce.fca.0.extract = extractvalue [2 x double] %h.coerce, 0 + ret double %h.coerce.fca.0.extract +}