Index: llvm/trunk/include/llvm/CodeGen/MachineFrameInfo.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/MachineFrameInfo.h +++ llvm/trunk/include/llvm/CodeGen/MachineFrameInfo.h @@ -253,6 +253,9 @@ /// Whether the "realign-stack" option is on. bool RealignOption; + /// Whether the function has the \c alignstack attribute. + bool ForcedRealign; + /// True if the function dynamically adjusts the stack pointer through some /// opaque mechanism like inline assembly or Win32 EH. bool HasOpaqueSPAdjustment; @@ -279,9 +282,9 @@ public: explicit MachineFrameInfo(unsigned StackAlign, bool isStackRealign, - bool RealignOpt) + bool RealignOpt, bool ForceRealign) : StackAlignment(StackAlign), StackRealignable(isStackRealign), - RealignOption(RealignOpt) { + RealignOption(RealignOpt), ForcedRealign(ForceRealign) { StackSize = NumFixedObjects = OffsetAdjustment = MaxAlignment = 0; HasVarSizedObjects = false; FrameAddressTaken = false; Index: llvm/trunk/lib/CodeGen/MachineFunction.cpp =================================================================== --- llvm/trunk/lib/CodeGen/MachineFunction.cpp +++ llvm/trunk/lib/CodeGen/MachineFunction.cpp @@ -84,6 +84,13 @@ MBB->getParent()->DeleteMachineBasicBlock(MBB); } +static inline unsigned getFnStackAlignment(const TargetSubtargetInfo *STI, + const Function *Fn) { + if (Fn->hasFnAttribute(Attribute::StackAlignment)) + return Fn->getFnStackAlignment(); + return STI->getFrameLowering()->getStackAlignment(); +} + MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, unsigned FunctionNum, MachineModuleInfo &mmi) : Fn(F), Target(TM), STI(TM.getSubtargetImpl(*F)), Ctx(mmi.getContext()), @@ -97,9 +104,11 @@ MFInfo = nullptr; FrameInfo = new (Allocator) - MachineFrameInfo(STI->getFrameLowering()->getStackAlignment(), + MachineFrameInfo(getFnStackAlignment(STI, Fn), STI->getFrameLowering()->isStackRealignable(), - !F->hasFnAttribute("no-realign-stack")); + !F->hasFnAttribute("no-realign-stack"), + !F->hasFnAttribute("no-realign-stack") && + F->hasFnAttribute(Attribute::StackAlignment)); if (Fn->hasFnAttribute(Attribute::StackAlignment)) FrameInfo->ensureMaxAlignment(Fn->getFnStackAlignment()); @@ -613,8 +622,10 @@ // The alignment of the frame index can be determined from its offset from // the incoming frame position. If the frame object is at offset 32 and // the stack is guaranteed to be 16-byte aligned, then we know that the - // object is 16-byte aligned. - unsigned Align = MinAlign(SPOffset, StackAlignment); + // object is 16-byte aligned. Note that unlike the non-fixed case, if the + // stack needs realignment, we can't assume that the stack will in fact be + // aligned. + unsigned Align = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment); Align = clampStackAlignment(!StackRealignable || !RealignOption, Align, StackAlignment); Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, @@ -627,7 +638,7 @@ /// Returns an index with a negative value. int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size, int64_t SPOffset) { - unsigned Align = MinAlign(SPOffset, StackAlignment); + unsigned Align = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment); Align = clampStackAlignment(!StackRealignable || !RealignOption, Align, StackAlignment); Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Index: llvm/trunk/test/CodeGen/X86/stack-align.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/stack-align.ll +++ llvm/trunk/test/CodeGen/X86/stack-align.ll @@ -61,3 +61,31 @@ ; CHECK-NOT: and ; CHECK: ret } + +%struct.sixteen = type { [16 x i8] } + +; Accessing stack parameters shouldn't assume stack alignment. Here we should +; emit two 8-byte loads, followed by two 8-byte stores. +define x86_stdcallcc void @test5(%struct.sixteen* byval nocapture readonly align 4 %s) #0 { + %d.sroa.0 = alloca [16 x i8], align 1 + %1 = getelementptr inbounds [16 x i8], [16 x i8]* %d.sroa.0, i32 0, i32 0 + call void @llvm.lifetime.start(i64 16, i8* %1) + %2 = getelementptr inbounds %struct.sixteen, %struct.sixteen* %s, i32 0, i32 0, i32 0 + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %2, i32 16, i32 1, i1 true) + call void @llvm.lifetime.end(i64 16, i8* %1) + ret void +; CHECK-LABEL: test5: +; CHECK: and +; CHECK: movsd +; CHECK-NEXT: movsd +; CHECK-NEXT: movsd +; CHECK-NEXT: movsd +} + +declare void @llvm.lifetime.start(i64, i8* nocapture) argmemonly nounwind + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) argmemonly nounwind + +declare void @llvm.lifetime.end(i64, i8* nocapture) argmemonly nounwind + +attributes #0 = { nounwind alignstack=16 "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" }