diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -7091,6 +7091,14 @@ Type *Ty = MemOps[0].getTypeForEVT(C); Align NewAlign = DL.getABITypeAlign(Ty); if (NewAlign > Alignment) { + + // Don't promote to an alignment that would require dynamic stack + // realignment. + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + if (!TRI->hasStackRealignment(MF)) + while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign)) + NewAlign = NewAlign.previous(); + // Give the stack frame object a larger alignment if needed. if (MFI.getObjectAlign(FI->getIndex()) < NewAlign) MFI.setObjectAlignment(FI->getIndex(), NewAlign); @@ -7198,7 +7206,16 @@ if (DstAlignCanChange) { Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); - Align NewAlign = DAG.getDataLayout().getABITypeAlign(Ty); + const DataLayout &DL = DAG.getDataLayout(); + Align NewAlign = DL.getABITypeAlign(Ty); + + // Don't promote to an alignment that would require dynamic stack + // realignment. + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + if (!TRI->hasStackRealignment(MF)) + while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign)) + NewAlign = NewAlign.previous(); + if (NewAlign > Alignment) { // Give the stack frame object a larger alignment if needed. if (MFI.getObjectAlign(FI->getIndex()) < NewAlign) diff --git a/llvm/test/CodeGen/X86/memset-sse-stack-realignment.ll b/llvm/test/CodeGen/X86/memset-sse-stack-realignment.ll --- a/llvm/test/CodeGen/X86/memset-sse-stack-realignment.ll +++ b/llvm/test/CodeGen/X86/memset-sse-stack-realignment.ll @@ -3,10 +3,10 @@ ; need 16 bytes for SSE and 32 bytes for AVX. ; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=pentium2 | FileCheck %s --check-prefix=NOSSE -; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=pentium3 | FileCheck %s --check-prefix=SSE -; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=yonah | FileCheck %s --check-prefix=SSE -; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=corei7-avx | FileCheck %s --check-prefix=AVX -; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=core-avx2 | FileCheck %s --check-prefix=AVX +; RUN: llc < %s -stackrealign -mtriple=i386-pc-mingw32 -mcpu=pentium3 | FileCheck %s --check-prefix=SSE +; RUN: llc < %s -stackrealign -mtriple=i386-pc-mingw32 -mcpu=yonah | FileCheck %s --check-prefix=SSE +; RUN: llc < %s -stackrealign -mtriple=i386-pc-mingw32 -mcpu=corei7-avx | FileCheck %s --check-prefix=AVX +; RUN: llc < %s -stackrealign -mtriple=i386-pc-mingw32 -mcpu=core-avx2 | FileCheck %s --check-prefix=AVX define void @test1(i32 %t) nounwind { ; NOSSE-LABEL: test1: diff --git a/llvm/test/CodeGen/X86/memset.ll b/llvm/test/CodeGen/X86/memset.ll --- a/llvm/test/CodeGen/X86/memset.ll +++ b/llvm/test/CodeGen/X86/memset.ll @@ -37,18 +37,14 @@ ; ; YMM-LABEL: t: ; YMM: ## %bb.0: ## %entry -; YMM-NEXT: pushl %ebp -; YMM-NEXT: movl %esp, %ebp -; YMM-NEXT: andl $-32, %esp -; YMM-NEXT: subl $96, %esp +; YMM-NEXT: subl $60, %esp ; YMM-NEXT: leal {{[0-9]+}}(%esp), %eax ; YMM-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; YMM-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp) +; YMM-NEXT: vmovups %ymm0, {{[0-9]+}}(%esp) ; YMM-NEXT: movl %eax, (%esp) ; YMM-NEXT: vzeroupper ; YMM-NEXT: calll _foo -; YMM-NEXT: movl %ebp, %esp -; YMM-NEXT: popl %ebp +; YMM-NEXT: addl $60, %esp ; YMM-NEXT: retl entry: %up_mvd = alloca [8 x %struct.x] ; [#uses=2] diff --git a/llvm/test/CodeGen/X86/pr42064.ll b/llvm/test/CodeGen/X86/pr42064.ll --- a/llvm/test/CodeGen/X86/pr42064.ll +++ b/llvm/test/CodeGen/X86/pr42064.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-pc-windows-msvc19.11.0 -mattr=+avx,+cx16 | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -stackrealign -mtriple=x86_64-pc-windows-msvc19.11.0 -mattr=+avx,+cx16 | FileCheck %s %struct.TestStruct = type { %union.Int128 } %union.Int128 = type { i128 } diff --git a/llvm/test/DebugInfo/COFF/frameproc-flags.ll b/llvm/test/DebugInfo/COFF/frameproc-flags.ll --- a/llvm/test/DebugInfo/COFF/frameproc-flags.ll +++ b/llvm/test/DebugInfo/COFF/frameproc-flags.ll @@ -100,7 +100,7 @@ ; CHECK: flags = has inline asm | naked | safe buffers | opt speed ; CHECK-LABEL: S_GPROC32_ID [size = 52] `stack_guard` ; CHECK: S_FRAMEPROC [size = 32] -; CHECK: local fp reg = VFRAME, param fp reg = EBP +; CHECK: local fp reg = VFRAME, param fp reg = VFRAME ; CHECK: flags = secure checks | strict secure checks | opt speed ; ModuleID = 'frameproc-flags.cpp'