diff --git a/clang/include/clang/CodeGen/CGFunctionInfo.h b/clang/include/clang/CodeGen/CGFunctionInfo.h --- a/clang/include/clang/CodeGen/CGFunctionInfo.h +++ b/clang/include/clang/CodeGen/CGFunctionInfo.h @@ -94,6 +94,7 @@ bool SRetAfterThis : 1; // isIndirect() bool InReg : 1; // isDirect() || isExtend() || isIndirect() bool CanBeFlattened: 1; // isDirect() + bool HasStackAlign: 1; // isDirect() bool SignExt : 1; // isExtend() bool canHavePaddingType() const { @@ -118,12 +119,14 @@ static ABIArgInfo getDirect(llvm::Type *T = nullptr, unsigned Offset = 0, llvm::Type *Padding = nullptr, - bool CanBeFlattened = true) { + bool CanBeFlattened = true, + bool HasStackAlign = false) { auto AI = ABIArgInfo(Direct); AI.setCoerceToType(T); AI.setPaddingType(Padding); AI.setDirectOffset(Offset); AI.setCanBeFlattened(CanBeFlattened); + AI.setStackAlign(HasStackAlign); return AI; } static ABIArgInfo getDirectInReg(llvm::Type *T = nullptr) { @@ -413,6 +416,15 @@ CanBeFlattened = Flatten; } + bool hasStackAlign() const { + return HasStackAlign; + } + + void setStackAlign(bool HasAlign) { + assert(isDirect() && "Invalid kind!"); + HasStackAlign = HasAlign; + } + void dump() const; }; diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -2123,6 +2123,9 @@ Attrs.addAttribute(llvm::Attribute::Nest); else if (AI.getInReg()) Attrs.addAttribute(llvm::Attribute::InReg); + if (AI.hasStackAlign()) + Attrs.addStackAlignmentAttr( + getContext().getTypeAlignInChars(ParamType).getQuantity()); break; case ABIArgInfo::Indirect: { diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -5233,8 +5233,12 @@ const Type *Base = nullptr; uint64_t Members = 0; if (isHomogeneousAggregate(Ty, Base, Members)) { + bool NeedsStackAlignment = getContext().getTypeAlignInChars(Ty) != + getContext().getTypeAlignInChars(Base); return ABIArgInfo::getDirect( - llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members)); + llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members), + /*Offset=*/0, /*Padding=*/nullptr, /*CanBeFlattened=*/true, + NeedsStackAlignment); } // Aggregates <= 16 bytes are passed directly in registers or on the stack. diff --git a/clang/test/CodeGen/aarch64-args-hfa.c b/clang/test/CodeGen/aarch64-args-hfa.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-args-hfa.c @@ -0,0 +1,21 @@ +// RUN: %clang_cc1 -triple aarch64-none-eabi -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple arm64-apple-ios7.0 -target-abi darwinpcs -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-linux-gnu -emit-llvm -o - -x c %s | FileCheck %s + +// CHECK: %struct.hfa_align = type { [2 x double] } +typedef struct { + __attribute__((__aligned__(16))) double v[2]; +} hfa_align; + +// CHECK: define double @test_hfa_align_arg([2 x double] alignstack(16) %h.coerce) +double test_hfa_align_arg(hfa_align h) { + return h.v[0]; +} + +// CHECK: define double @test_hfa_align_call() +// CHECK: %call = call double @test_hfa_align_arg([2 x double] alignstack(16) %1) +double test_hfa_align_call() { + hfa_align h1 = {1.0, 2.0}; + return test_hfa_align_arg(h1); +} + diff --git a/clang/test/CodeGen/arm64-arguments.c b/clang/test/CodeGen/arm64-arguments.c --- a/clang/test/CodeGen/arm64-arguments.c +++ b/clang/test/CodeGen/arm64-arguments.c @@ -196,7 +196,7 @@ typedef __attribute__((neon_vector_type(4))) float float32x4_t; float32x4_t f35(int i, s35_with_align s1, s35_with_align s2) { -// CHECK: define <4 x float> @f35(i32 %i, [4 x float] %s1.coerce, [4 x float] %s2.coerce) +// CHECK: define <4 x float> @f35(i32 %i, [4 x float] alignstack(16) %s1.coerce, [4 x float] alignstack(16) %s2.coerce) // CHECK: %s1 = alloca %struct.s35, align 16 // CHECK: %s2 = alloca %struct.s35, align 16 // CHECK: %[[a:.*]] = bitcast %struct.s35* %s1 to <4 x float>* diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -1216,6 +1216,15 @@ only valid on intrinsic declarations and cannot be applied to a call site or arbitrary function. +``alignstack()`` + This indicates the alignment that should be considered by the backend when + assigning this parameter to a stack slot during calling convention + lowering. The enforcement of the specified alignment is target-dependent, + as target-specific calling convention rules may override this value. This + attribute serves the purpose of carrying language specific alignment + information that is not mapped to base types in the backend (for example, + over-alignment specification through language attributes). + .. _gc: Garbage Collector Strategy Names diff --git a/llvm/include/llvm/CodeGen/TargetCallingConv.h b/llvm/include/llvm/CodeGen/TargetCallingConv.h --- a/llvm/include/llvm/CodeGen/TargetCallingConv.h +++ b/llvm/include/llvm/CodeGen/TargetCallingConv.h @@ -44,6 +44,7 @@ unsigned IsSecArgPass : 1; ///< Second argument unsigned ByValAlign : 4; ///< Log 2 of byval alignment unsigned OrigAlign : 5; ///< Log 2 of original alignment + unsigned StackAlign : 3; ///< Log 2 of stack slot alignment unsigned IsInConsecutiveRegsLast : 1; unsigned IsInConsecutiveRegs : 1; unsigned IsCopyElisionCandidate : 1; ///< Argument copy elision candidate @@ -59,7 +60,7 @@ IsReturned(0), IsSplit(0), IsInAlloca(0), IsSplitEnd(0), IsSwiftSelf(0), IsSwiftError(0), IsCFGuardTarget(0), IsHva(0), IsHvaStart(0), IsSecArgPass(0), ByValAlign(0), OrigAlign(0), - IsInConsecutiveRegsLast(0), IsInConsecutiveRegs(0), + StackAlign(0), IsInConsecutiveRegsLast(0), IsInConsecutiveRegs(0), IsCopyElisionCandidate(0), IsPointer(0), ByValSize(0), PointerAddrSpace(0) { static_assert(sizeof(*this) == 3 * sizeof(unsigned), "flags are too big"); @@ -149,6 +150,15 @@ assert(getOrigAlign() == A.value() && "bitfield overflow"); } + unsigned getStackAlign() const { + MaybeAlign A = decodeMaybeAlign(StackAlign); + return A ? A->value() : 0; + } + void setStackAlign(Align A) { + StackAlign = encode(A); + assert(getStackAlign() == A.value() && "bitfield overflow"); + } + unsigned getByValSize() const { return ByValSize; } void setByValSize(unsigned S) { ByValSize = S; } diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -278,13 +278,15 @@ bool IsSwiftSelf : 1; bool IsSwiftError : 1; bool IsCFGuardTarget : 1; + MaybeAlign StackAlignment; uint16_t Alignment = 0; Type *ByValType = nullptr; ArgListEntry() : IsSExt(false), IsZExt(false), IsInReg(false), IsSRet(false), IsNest(false), IsByVal(false), IsInAlloca(false), IsReturned(false), - IsSwiftSelf(false), IsSwiftError(false), IsCFGuardTarget(false) {} + IsSwiftSelf(false), IsSwiftError(false), IsCFGuardTarget(false), + StackAlignment() {} void setAttributes(const CallBase *Call, unsigned ArgIdx); diff --git a/llvm/include/llvm/IR/Argument.h b/llvm/include/llvm/IR/Argument.h --- a/llvm/include/llvm/IR/Argument.h +++ b/llvm/include/llvm/IR/Argument.h @@ -83,6 +83,9 @@ /// If this is a byval or inalloca argument, return its alignment. MaybeAlign getParamAlign() const; + /// Return argument's stack alignment if specified + MaybeAlign getParamStackAlign() const; + /// If this is a byval argument, return its type. Type *getParamByValType() const; diff --git a/llvm/include/llvm/IR/Attributes.h b/llvm/include/llvm/IR/Attributes.h --- a/llvm/include/llvm/IR/Attributes.h +++ b/llvm/include/llvm/IR/Attributes.h @@ -617,6 +617,9 @@ /// Return the alignment for the specified function parameter. MaybeAlign getParamAlignment(unsigned ArgNo) const; + /// Return the stack alignment for the specified function parameter. + MaybeAlign getParamStackAlignment(unsigned ArgNo) const; + /// Return the byval type for the specified function parameter. Type *getParamByValType(unsigned ArgNo) const; diff --git a/llvm/include/llvm/IR/Attributes.td b/llvm/include/llvm/IR/Attributes.td --- a/llvm/include/llvm/IR/Attributes.td +++ b/llvm/include/llvm/IR/Attributes.td @@ -151,8 +151,8 @@ /// Sign extended before/after call. def SExt : EnumAttr<"signext">; -/// Alignment of stack for function (3 bits) stored as log2 of alignment with -/// +1 bias 0 means unaligned (different from alignstack=(1)). +/// Alignment of stack for function or argument (3 bits) stored as log2 of +/// alignment with +1 bias 0 means unaligned (different from alignstack=(1)). def StackAlignment : EnumAttr<"alignstack">; /// Function can be speculated. diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h --- a/llvm/include/llvm/IR/Function.h +++ b/llvm/include/llvm/IR/Function.h @@ -447,6 +447,10 @@ return AttributeSets.getParamAlignment(ArgNo); } + MaybeAlign getParamStackAlign(unsigned ArgNo) const { + return AttributeSets.getParamStackAlignment(ArgNo); + } + /// Extract the byval type for a parameter. Type *getParamByValType(unsigned ArgNo) const { Type *Ty = AttributeSets.getParamByValType(ArgNo); diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h --- a/llvm/include/llvm/IR/InstrTypes.h +++ b/llvm/include/llvm/IR/InstrTypes.h @@ -1603,6 +1603,11 @@ return Attrs.getParamAlignment(ArgNo); } + /// Extract the stack alignment for a call or parameter (0=unknown). + MaybeAlign getParamStackAlign(unsigned ArgNo) const { + return Attrs.getParamStackAlignment(ArgNo); + } + /// Extract the byval type for a call or parameter. Type *getParamByValType(unsigned ArgNo) const { Type *Ty = Attrs.getParamByValType(ArgNo); diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -1613,6 +1613,13 @@ B.addAlignmentAttr(Alignment); continue; } + case lltok::kw_alignstack: { + unsigned Alignment; + if (ParseOptionalStackAlignment(Alignment)) + return true; + B.addStackAlignmentAttr(Alignment); + continue; + } case lltok::kw_byval: { Type *Ty; if (ParseByValWithOptionalType(Ty)) @@ -1652,7 +1659,6 @@ case lltok::kw_zeroext: B.addAttribute(Attribute::ZExt); break; case lltok::kw_immarg: B.addAttribute(Attribute::ImmArg); break; - case lltok::kw_alignstack: case lltok::kw_alwaysinline: case lltok::kw_argmemonly: case lltok::kw_builtin: diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -9315,6 +9315,8 @@ if (NeedsRegBlock) Flags.setInConsecutiveRegs(); Flags.setOrigAlign(OriginalAlignment); + if (Args[i].StackAlignment) + Flags.setStackAlign(*Args[i].StackAlignment); MVT PartVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(), CLI.CallConv, VT); @@ -9826,6 +9828,8 @@ Flags.setCopyElisionCandidate(); if (Arg.hasAttribute(Attribute::Returned)) Flags.setReturned(); + if (Arg.getParamStackAlign()) + Flags.setStackAlign(*Arg.getParamStackAlign()); MVT RegisterVT = TLI->getRegisterTypeForCallingConv( *CurDAG->getContext(), F.getCallingConv(), VT); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -115,6 +115,7 @@ IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf); IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError); Alignment = Call->getParamAlignment(ArgIdx); + StackAlignment = Call->getParamStackAlign(ArgIdx); ByValType = nullptr; if (Call->paramHasAttr(ArgIdx, Attribute::ByVal)) ByValType = Call->getParamByValType(ArgIdx); diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp --- a/llvm/lib/IR/Attributes.cpp +++ b/llvm/lib/IR/Attributes.cpp @@ -1392,6 +1392,10 @@ return getAttributes(ArgNo + FirstArgIndex).getAlignment(); } +MaybeAlign AttributeList::getParamStackAlignment(unsigned ArgNo) const { + return getAttributes(ArgNo + FirstArgIndex).getStackAlignment(); +} + Type *AttributeList::getParamByValType(unsigned Index) const { return getAttributes(Index+FirstArgIndex).getByValType(); } diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -131,6 +131,10 @@ return getParent()->getParamAlign(getArgNo()); } +MaybeAlign Argument::getParamStackAlign() const { + return getParent()->getParamStackAlign(getArgNo()); +} + Type *Argument::getParamByValType() const { assert(getType()->isPointerTy() && "Only pointers have byval types"); return getParent()->getParamByValType(getArgNo()); diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -1523,7 +1523,6 @@ case Attribute::NoImplicitFloat: case Attribute::Naked: case Attribute::InlineHint: - case Attribute::StackAlignment: case Attribute::UWTable: case Attribute::NonLazyBind: case Attribute::ReturnsTwice: @@ -1560,7 +1559,8 @@ /// arguments. static bool isFuncOrArgAttr(Attribute::AttrKind Kind) { return Kind == Attribute::ReadOnly || Kind == Attribute::WriteOnly || - Kind == Attribute::ReadNone || Kind == Attribute::NoFree; + Kind == Attribute::ReadNone || Kind == Attribute::NoFree || + Kind == Attribute::StackAlignment; } void Verifier::verifyAttributeTypes(AttributeSet Attrs, bool IsFunction, diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.cpp b/llvm/lib/Target/AArch64/AArch64CallingConvention.cpp --- a/llvm/lib/Target/AArch64/AArch64CallingConvention.cpp +++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.cpp @@ -43,11 +43,16 @@ const Align StackAlign = State.getMachineFunction().getDataLayout().getStackAlignment(); const Align OrigAlign(ArgFlags.getOrigAlign()); - const Align Align = std::min(OrigAlign, StackAlign); + const Align Alignment = std::min(OrigAlign, StackAlign); + + if (ArgFlags.getStackAlign()) { + const Align ArgStackAlign(ArgFlags.getStackAlign()); + State.AllocateStack(0, ArgStackAlign.value()); + } for (auto &It : PendingMembers) { It.convertToMem(State.AllocateStack( - Size, std::max((unsigned)Align.value(), SlotAlign))); + Size, std::max((unsigned)Alignment.value(), SlotAlign))); State.addLoc(It); SlotAlign = 1; } diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll --- a/llvm/test/Bitcode/compatibility.ll +++ b/llvm/test/Bitcode/compatibility.ll @@ -548,6 +548,8 @@ ; CHECK: declare void @f.param.dereferenceable(i8* dereferenceable(4)) declare void @f.param.dereferenceable_or_null(i8* dereferenceable_or_null(4)) ; CHECK: declare void @f.param.dereferenceable_or_null(i8* dereferenceable_or_null(4)) +declare void @f.param.stack_align([2 x double] alignstack(16)) +; CHECK: declare void @f.param.stack_align([2 x double] alignstack(16)) ; Functions -- unnamed_addr and local_unnamed_addr declare void @f.unnamed_addr() unnamed_addr diff --git a/llvm/test/CodeGen/AArch64/arm64-abi-hfa-args.ll b/llvm/test/CodeGen/AArch64/arm64-abi-hfa-args.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64-abi-hfa-args.ll @@ -0,0 +1,54 @@ +; RUN: llc < %s -mtriple=arm64-none-eabi | FileCheck %s + +; Over-aligned HFA argument placed on register - one element per register +define double @test_hfa_align_arg_reg([2 x double] alignstack(16) %h.coerce) local_unnamed_addr #0 { +entry: +; CHECK-LABEL: test_hfa_align_arg_reg: +; CHECK-DAG: ret + + %h.coerce.fca.0.extract = extractvalue [2 x double] %h.coerce, 0 + ret double %h.coerce.fca.0.extract +} + +; Call with over-aligned HFA argument placed on register - one element per register +define double @test_hfa_align_call_reg() local_unnamed_addr #0 { +entry: +; CHECK-LABEL: test_hfa_align_call_reg: +; CHECK-DAG: fmov d0, #1.00000000 +; CHECK-DAG: fmov d1, #2.00000000 +; CHECK-DAG: bl test_hfa_align_arg_reg +; CHECK-DAG: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-DAG: ret + + %call = call double @test_hfa_align_arg_reg([2 x double] alignstack(16) [double 1.000000e+00, double 2.000000e+00]) + ret double %call +} + +; Over-aligned HFA argument placed on stack - stack round up to alignment +define double @test_hfa_align_arg_stack(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, float %f, [2 x double] alignstack(16) %h.coerce) local_unnamed_addr #0 { +entry: +; CHECK-LABEL: test_hfa_align_arg_stack: +; CHECK-DAG: ldr d0, [sp, #16] +; CHECK-DAG: ret + + %h.coerce.fca.0.extract = extractvalue [2 x double] %h.coerce, 0 + ret double %h.coerce.fca.0.extract +} + +; Call with over-aligned HFA argument placed on stack - stack round up to alignment +define double @test_hfa_align_call_stack() local_unnamed_addr #0 { +entry: +; CHECK-LABEL: test_hfa_align_call_stack: +; CHECK-DAG: mov x8, #4611686018427387904 +; CHECK-DAG: mov x9, #4607182418800017408 +; CHECK-DAG: stp x8, x30, [sp, #24] // 8-byte Folded Spill +; CHECK-DAG: str x9, [sp, #16] +; CHECK-DAG: bl test_hfa_align_arg +; CHECK-DAG: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-DAG: add sp, sp, #48 // =48 +; CHECK-DAG: ret + + %call = call double @test_hfa_align_arg_stack(double undef, double undef, double undef, double undef, double undef, double undef, double undef, double undef, float undef, [2 x double] alignstack(16) [double 1.000000e+00, double 2.000000e+00]) + ret double %call +} +