Index: lib/Target/X86/X86CallFrameOptimization.cpp =================================================================== --- lib/Target/X86/X86CallFrameOptimization.cpp +++ lib/Target/X86/X86CallFrameOptimization.cpp @@ -180,10 +180,6 @@ if (CannotReserveFrame) return true; - // Don't do this when not optimizing for size. - if (!MF.getFunction()->optForSize()) - return false; - unsigned StackAlign = TFL->getStackAlignment(); int64_t Advantage = 0; Index: test/CodeGen/X86/2006-05-02-InstrSched1.ll =================================================================== --- test/CodeGen/X86/2006-05-02-InstrSched1.ll +++ test/CodeGen/X86/2006-05-02-InstrSched1.ll @@ -1,6 +1,6 @@ ; REQUIRES: asserts ; RUN: llc < %s -march=x86 -relocation-model=static -stats 2>&1 | \ -; RUN: grep asm-printer | grep 16 +; RUN: grep asm-printer | grep 15 ; ; It's possible to schedule this in 14 instructions by avoiding ; callee-save registers, but the scheduler isn't currently that Index: test/CodeGen/X86/2006-11-12-CSRetCC.ll =================================================================== --- test/CodeGen/X86/2006-11-12-CSRetCC.ll +++ test/CodeGen/X86/2006-11-12-CSRetCC.ll @@ -6,7 +6,14 @@ define i32 @main() { ; CHECK-LABEL: main: ; CHECK-NOT: ret -; CHECK: subl $4, %{{.*}} +; CHECK: subl $12, %esp +; CHECK: pushl +; CHECK: pushl +; CHECK: pushl +; CHECK: pushl +; CHECK: pushl +; CHECK: calll cexp +; CHECK: addl $28, %esp ; CHECK: ret entry: Index: test/CodeGen/X86/atom-lea-sp.ll =================================================================== --- test/CodeGen/X86/atom-lea-sp.ll +++ test/CodeGen/X86/atom-lea-sp.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck -check-prefix=ATOM %s -; RUN: llc < %s -mcpu=core2 -mtriple=i686-linux | FileCheck %s +; RUN: llc < %s -mcpu=atom -mtriple=i686-linux -no-x86-call-frame-opt | FileCheck -check-prefix=ATOM %s +; RUN: llc < %s -mcpu=core2 -mtriple=i686-linux -no-x86-call-frame-opt | FileCheck %s declare void @use_arr(i8*) declare void @many_params(i32, i32, i32, i32, i32, i32) Index: test/CodeGen/X86/avx-intel-ocl.ll =================================================================== --- test/CodeGen/X86/avx-intel-ocl.ll +++ test/CodeGen/X86/avx-intel-ocl.ll @@ -15,9 +15,10 @@ ; WIN64: ret ; X32-LABEL: testf16_inp -; X32: movl %eax, (%esp) ; X32: vaddps {{.*}}, {{%ymm[0-1]}} ; X32: vaddps {{.*}}, {{%ymm[0-1]}} +; Push is not deemed profitable if we're realigning the stack. +; X32: {{pushl|movl}} %eax ; X32: call ; X32: ret @@ -114,8 +115,8 @@ ; test functions with integer parameters ; pass parameters on stack for 32-bit platform ; X32-LABEL: test_int -; X32: movl {{.*}}, 4(%esp) -; X32: movl {{.*}}, (%esp) +; X32: pushl {{.*}} +; X32: pushl {{.*}} ; X32: call ; X32: addl {{.*}}, %eax Index: test/CodeGen/X86/avx512-intel-ocl.ll =================================================================== --- test/CodeGen/X86/avx512-intel-ocl.ll +++ test/CodeGen/X86/avx512-intel-ocl.ll @@ -15,7 +15,8 @@ ; X32-LABEL: testf16_inp ; X32: vaddps {{.*}}, {{%zmm[0-1]}} -; X32: movl %eax, (%esp) +; Push is not deemed profitable if we're realigning the stack. +; X32: {{pushl|movl}} %eax ; X32: call ; X32: ret @@ -102,4 +103,4 @@ %mask1 = xor <16 x i1> %cmp_res, %mask %c = call intel_ocl_bicc <16 x float> @func_float16_mask(<16 x float> %a, <16 x i1>%mask1) ret <16 x float> %c -} \ No newline at end of file +} Index: test/CodeGen/X86/call-push.ll =================================================================== --- test/CodeGen/X86/call-push.ll +++ test/CodeGen/X86/call-push.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin -disable-fp-elim | FileCheck %s +; RUN: llc < %s -mtriple=i386-apple-darwin -disable-fp-elim -no-x86-call-frame-opt | FileCheck %s %struct.decode_t = type { i8, i8, i8, i8, i16, i8, i8, %struct.range_t** } %struct.range_t = type { float, float, i32, i32, i32, [0 x i8] } Index: test/CodeGen/X86/cmpxchg-clobber-flags.ll =================================================================== --- test/CodeGen/X86/cmpxchg-clobber-flags.ll +++ test/CodeGen/X86/cmpxchg-clobber-flags.ll @@ -21,9 +21,11 @@ ; i386-NEXT: lahf ; i386-NEXT: movl %eax, [[FLAGS:%.*]] ; i386-NEXT: popl %eax -; i386-NEXT: movl %edx, 4(%esp) -; i386-NEXT: movl %eax, (%esp) +; i386-NEXT: subl $8, %esp +; i386-NEXT: pushl %edx +; i386-NEXT: pushl %eax ; i386-NEXT: calll bar +; i386-NEXT: addl $16, %esp ; i386-NEXT: movl [[FLAGS]], %eax ; i386-NEXT: addb $127, %al ; i386-NEXT: sahf Index: test/CodeGen/X86/coalescer-commute3.ll =================================================================== --- test/CodeGen/X86/coalescer-commute3.ll +++ test/CodeGen/X86/coalescer-commute3.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | grep mov | count 6 +; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 -no-x86-call-frame-opt | grep mov | count 6 %struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* } Index: test/CodeGen/X86/hipe-prologue.ll =================================================================== --- test/CodeGen/X86/hipe-prologue.ll +++ test/CodeGen/X86/hipe-prologue.ll @@ -24,7 +24,7 @@ define cc 11 {i32, i32} @test_basic_hipecc(i32 %hp, i32 %p) { ; X32-Linux-LABEL: test_basic_hipecc: - ; X32-Linux: leal -156(%esp), %ebx + ; X32-Linux: leal -140(%esp), %ebx ; X32-Linux-NEXT: cmpl 76(%ebp), %ebx ; X32-Linux-NEXT: jb .LBB1_1 Index: test/CodeGen/X86/i386-shrink-wrapping.ll =================================================================== --- test/CodeGen/X86/i386-shrink-wrapping.ll +++ test/CodeGen/X86/i386-shrink-wrapping.ll @@ -1,5 +1,5 @@ -; RUN: llc %s -o - -enable-shrink-wrap=true | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE -; RUN: llc %s -o - -enable-shrink-wrap=false | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE +; RUN: llc %s -o - -enable-shrink-wrap=true -no-x86-call-frame-opt | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE +; RUN: llc %s -o - -enable-shrink-wrap=false -no-x86-call-frame-opt | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" target triple = "i386-apple-macosx" Index: test/CodeGen/X86/libcall-sret.ll =================================================================== --- test/CodeGen/X86/libcall-sret.ll +++ test/CodeGen/X86/libcall-sret.ll @@ -10,14 +10,25 @@ ; CHECK-LABEL: test_sret_libcall: ; Stack for call: 4(sret ptr), 16(i128 %l), 16(128 %r). So next logical - ; (aligned) place for the actual sret data is %esp + 40. -; CHECK: leal 40(%esp), [[SRET_ADDR:%[a-z]+]] -; CHECK: movl [[SRET_ADDR]], (%esp) + ; (aligned) place for the actual sret data is %esp + 20. +; CHECK: leal 20(%esp), [[SRET_ADDR:%[a-z]+]] +; CHECK: pushl 72(%esp) +; CHECK: pushl 72(%esp) +; CHECK: pushl 72(%esp) +; CHECK: pushl 72(%esp) +; CHECK: pushl 72(%esp) +; CHECK: pushl 72(%esp) +; CHECK: pushl 72(%esp) +; CHECK: pushl 72(%esp) +; CHECK: pushl [[SRET_ADDR]] + ; CHECK: calll __multi3 -; CHECK-DAG: movl 40(%esp), [[RES0:%[a-z]+]] -; CHECK-DAG: movl 44(%esp), [[RES1:%[a-z]+]] -; CHECK-DAG: movl 48(%esp), [[RES2:%[a-z]+]] -; CHECK-DAG: movl 52(%esp), [[RES3:%[a-z]+]] + +; CHECK: addl $44, %esp +; CHECK-DAG: movl 8(%esp), [[RES0:%[a-z]+]] +; CHECK-DAG: movl 12(%esp), [[RES1:%[a-z]+]] +; CHECK-DAG: movl 16(%esp), [[RES2:%[a-z]+]] +; CHECK-DAG: movl 20(%esp), [[RES3:%[a-z]+]] ; CHECK-DAG: movl [[RES0]], var ; CHECK-DAG: movl [[RES1]], var+4 ; CHECK-DAG: movl [[RES2]], var+8 Index: test/CodeGen/X86/localescape.ll =================================================================== --- test/CodeGen/X86/localescape.ll +++ test/CodeGen/X86/localescape.ll @@ -39,21 +39,19 @@ ; X86-LABEL: print_framealloc_from_fp: ; X86: pushl %esi -; X86: subl $8, %esp -; X86: movl 16(%esp), %esi -; X86: movl Lalloc_func$frame_escape_0(%esi), %eax -; X86: movl %eax, 4(%esp) -; X86: movl $_str, (%esp) +; X86: movl 8(%esp), %esi +; X86: pushl Lalloc_func$frame_escape_0(%esi) +; X86: pushl $_str ; X86: calll _printf -; X86: movl Lalloc_func$frame_escape_1(%esi), %eax -; X86: movl %eax, 4(%esp) -; X86: movl $_str, (%esp) +; X86: addl $8, %esp +; X86: pushl Lalloc_func$frame_escape_1(%esi) +; X86: pushl $_str ; X86: calll _printf +; X86: addl $8, %esp ; X86: movl $42, Lalloc_func$frame_escape_1(%esi) ; X86: movl $4, %eax -; X86: movl Lalloc_func$frame_escape_1(%esi,%eax), %eax -; X86: movl %eax, 4(%esp) -; X86: movl $_str, (%esp) +; X86: pushl Lalloc_func$frame_escape_1(%esi,%eax) +; X86: pushl $_str ; X86: calll _printf ; X86: addl $8, %esp ; X86: popl %esi @@ -132,12 +130,13 @@ ; X64: retq ; X86-LABEL: alloc_func_no_frameaddr: -; X86: subl $12, %esp -; X86: Lalloc_func_no_frameaddr$frame_escape_0 = 8 -; X86: Lalloc_func_no_frameaddr$frame_escape_1 = 4 -; X86: movl $42, 8(%esp) -; X86: movl $13, 4(%esp) -; X86: movl $0, (%esp) +; X86: subl $8, %esp +; X86: Lalloc_func_no_frameaddr$frame_escape_0 = 4 +; X86: Lalloc_func_no_frameaddr$frame_escape_1 = 0 +; X86: movl $42, 4(%esp) +; X86: movl $13, (%esp) +; X86: pushl $0 ; X86: calll _print_framealloc_from_fp -; X86: addl $12, %esp +; X86: addl $4, %esp +; X86: addl $8, %esp ; X86: retl Index: test/CodeGen/X86/mcu-abi.ll =================================================================== --- test/CodeGen/X86/mcu-abi.ll +++ test/CodeGen/X86/mcu-abi.ll @@ -93,14 +93,10 @@ } ; CHECK-LABEL: test_fp128: -; CHECK: movl (%eax), %e[[CX:..]] -; CHECK-NEXT: movl 4(%eax), %e[[DX:..]] -; CHECK-NEXT: movl 8(%eax), %e[[SI:..]] -; CHECK-NEXT: movl 12(%eax), %e[[AX:..]] -; CHECK-NEXT: movl %e[[AX]], 12(%esp) -; CHECK-NEXT: movl %e[[SI]], 8(%esp) -; CHECK-NEXT: movl %e[[DX]], 4(%esp) -; CHECK-NEXT: movl %e[[CX]], (%esp) +; CHECK: pushl 12(%eax) +; CHECK-NEXT: pushl 8(%eax) +; CHECK-NEXT: pushl 4(%eax) +; CHECK-NEXT: pushl (%eax) ; CHECK-NEXT: calll __fixtfsi define i32 @test_fp128(fp128* %ptr) #0 { %v = load fp128, fp128* %ptr Index: test/CodeGen/X86/mingw-alloca.ll =================================================================== --- test/CodeGen/X86/mingw-alloca.ll +++ test/CodeGen/X86/mingw-alloca.ll @@ -22,12 +22,12 @@ ; COFF: andl $-16, %esp ; COFF: pushl %eax ; COFF: calll __alloca -; COFF: movl 8028(%esp), %eax +; COFF: movl 8012(%esp), %eax ; ELF: foo2: ; ELF: andl $-16, %esp ; ELF: pushl %eax ; ELF: calll _alloca -; ELF: movl 8028(%esp), %eax +; ELF: movl 8012(%esp), %eax %A2 = alloca [2000 x i32], align 16 ; <[2000 x i32]*> [#uses=1] %A2.sub = getelementptr [2000 x i32], [2000 x i32]* %A2, i32 0, i32 0 ; [#uses=1] call void @bar2( i32* %A2.sub, i32 %N ) Index: test/CodeGen/X86/movtopush.ll =================================================================== --- test/CodeGen/X86/movtopush.ll +++ test/CodeGen/X86/movtopush.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=i686-windows | FileCheck %s -check-prefix=NORMAL +; RUN: llc < %s -mtriple=i686-windows -no-x86-call-frame-opt | FileCheck %s -check-prefix=NOPUSH ; RUN: llc < %s -mtriple=x86_64-windows | FileCheck %s -check-prefix=X64 ; RUN: llc < %s -mtriple=i686-windows -stackrealign -stack-alignment=32 | FileCheck %s -check-prefix=ALIGNED @@ -12,25 +13,9 @@ declare void @eightparams(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) declare void @struct(%struct.s* byval %a, i32 %b, i32 %c, i32 %d) -; Here, we should have a reserved frame, so we don't expect pushes +; We should get pushes for x86, even though there is a reserved call frame. +; Make sure we don't touch x86-64, and that turning it off works. ; NORMAL-LABEL: test1: -; NORMAL: subl $16, %esp -; NORMAL-NEXT: movl $4, 12(%esp) -; NORMAL-NEXT: movl $3, 8(%esp) -; NORMAL-NEXT: movl $2, 4(%esp) -; NORMAL-NEXT: movl $1, (%esp) -; NORMAL-NEXT: call -; NORMAL-NEXT: addl $16, %esp -define void @test1() { -entry: - call void @good(i32 1, i32 2, i32 3, i32 4) - ret void -} - -; We're optimizing for code size, so we should get pushes for x86, -; even though there is a reserved call frame. -; Make sure we don't touch x86-64 -; NORMAL-LABEL: test1b: ; NORMAL-NOT: subl {{.*}} %esp ; NORMAL: pushl $4 ; NORMAL-NEXT: pushl $3 @@ -38,28 +23,21 @@ ; NORMAL-NEXT: pushl $1 ; NORMAL-NEXT: call ; NORMAL-NEXT: addl $16, %esp -; X64-LABEL: test1b: +; X64-LABEL: test1: ; X64: movl $1, %ecx ; X64-NEXT: movl $2, %edx ; X64-NEXT: movl $3, %r8d ; X64-NEXT: movl $4, %r9d ; X64-NEXT: callq good -define void @test1b() optsize { -entry: - call void @good(i32 1, i32 2, i32 3, i32 4) - ret void -} - -; Same as above, but for minsize -; NORMAL-LABEL: test1c: -; NORMAL-NOT: subl {{.*}} %esp -; NORMAL: pushl $4 -; NORMAL-NEXT: pushl $3 -; NORMAL-NEXT: pushl $2 -; NORMAL-NEXT: pushl $1 -; NORMAL-NEXT: call -; NORMAL-NEXT: addl $16, %esp -define void @test1c() minsize { +; NOPUSH-LABEL: test1: +; NOPUSH: subl $16, %esp +; NOPUSH-NEXT: movl $4, 12(%esp) +; NOPUSH-NEXT: movl $3, 8(%esp) +; NOPUSH-NEXT: movl $2, 4(%esp) +; NOPUSH-NEXT: movl $1, (%esp) +; NOPUSH-NEXT: call +; NOPUSH-NEXT: addl $16, %esp +define void @test1() { entry: call void @good(i32 1, i32 2, i32 3, i32 4) ret void Index: test/CodeGen/X86/phys-reg-local-regalloc.ll =================================================================== --- test/CodeGen/X86/phys-reg-local-regalloc.ll +++ test/CodeGen/X86/phys-reg-local-regalloc.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -stack-symbol-ordering=0 -march=x86 -mtriple=i386-apple-darwin9 -mcpu=generic -regalloc=fast -optimize-regalloc=0 | FileCheck %s -; RUN: llc -O0 < %s -stack-symbol-ordering=0 -march=x86 -mtriple=i386-apple-darwin9 -mcpu=generic -regalloc=fast | FileCheck %s -; RUN: llc < %s -stack-symbol-ordering=0 -march=x86 -mtriple=i386-apple-darwin9 -mcpu=atom -regalloc=fast -optimize-regalloc=0 | FileCheck -check-prefix=ATOM %s +; RUN: llc < %s -stack-symbol-ordering=0 -march=x86 -mtriple=i386-apple-darwin9 -mcpu=generic -regalloc=fast -optimize-regalloc=0 -no-x86-call-frame-opt | FileCheck %s +; RUN: llc -O0 < %s -stack-symbol-ordering=0 -march=x86 -mtriple=i386-apple-darwin9 -mcpu=generic -regalloc=fast -no-x86-call-frame-opt | FileCheck %s +; RUN: llc < %s -stack-symbol-ordering=0 -march=x86 -mtriple=i386-apple-darwin9 -mcpu=atom -regalloc=fast -optimize-regalloc=0 -no-x86-call-frame-opt | FileCheck -check-prefix=ATOM %s ; CHECKed instructions should be the same with or without -O0 except on Intel Atom due to instruction scheduling. @.str = private constant [12 x i8] c"x + y = %i\0A\00", align 1 ; <[12 x i8]*> [#uses=1] Index: test/CodeGen/X86/segmented-stacks.ll =================================================================== --- test/CodeGen/X86/segmented-stacks.ll +++ test/CodeGen/X86/segmented-stacks.ll @@ -44,7 +44,7 @@ ; X32-Linux-NEXT: ja .LBB0_2 ; X32-Linux: pushl $0 -; X32-Linux-NEXT: pushl $60 +; X32-Linux-NEXT: pushl $44 ; X32-Linux-NEXT: calll __morestack ; X32-Linux-NEXT: ret @@ -105,7 +105,7 @@ ; X32-MinGW-NEXT: ja LBB0_2 ; X32-MinGW: pushl $0 -; X32-MinGW-NEXT: pushl $48 +; X32-MinGW-NEXT: pushl $40 ; X32-MinGW-NEXT: calll ___morestack ; X32-MinGW-NEXT: ret @@ -135,7 +135,7 @@ ; X32-DFlyBSD-NEXT: ja .LBB0_2 ; X32-DFlyBSD: pushl $0 -; X32-DFlyBSD-NEXT: pushl $48 +; X32-DFlyBSD-NEXT: pushl $40 ; X32-DFlyBSD-NEXT: calll __morestack ; X32-DFlyBSD-NEXT: ret @@ -162,7 +162,7 @@ ; X32-Linux-NEXT: ja .LBB1_2 ; X32-Linux: pushl $4 -; X32-Linux-NEXT: pushl $60 +; X32-Linux-NEXT: pushl $44 ; X32-Linux-NEXT: calll __morestack ; X32-Linux-NEXT: ret @@ -209,7 +209,7 @@ ; X32-MinGW-NEXT: ja LBB1_2 ; X32-MinGW: pushl $4 -; X32-MinGW-NEXT: pushl $52 +; X32-MinGW-NEXT: pushl $44 ; X32-MinGW-NEXT: calll ___morestack ; X32-MinGW-NEXT: ret @@ -238,7 +238,7 @@ ; X32-DFlyBSD-NEXT: ja .LBB1_2 ; X32-DFlyBSD: pushl $4 -; X32-DFlyBSD-NEXT: pushl $52 +; X32-DFlyBSD-NEXT: pushl $44 ; X32-DFlyBSD-NEXT: calll __morestack ; X32-DFlyBSD-NEXT: ret @@ -305,12 +305,12 @@ ; X64-Darwin-NEXT: callq ___morestack ; X64-Darwin-NEXT: ret -; X32-MinGW: leal -40008(%esp), %ecx +; X32-MinGW: leal -40000(%esp), %ecx ; X32-MinGW-NEXT: cmpl %fs:20, %ecx ; X32-MinGW-NEXT: ja LBB2_2 ; X32-MinGW: pushl $0 -; X32-MinGW-NEXT: pushl $40008 +; X32-MinGW-NEXT: pushl $40000 ; X32-MinGW-NEXT: calll ___morestack ; X32-MinGW-NEXT: ret @@ -333,12 +333,12 @@ ; X64-FreeBSD-NEXT: callq __morestack ; X64-FreeBSD-NEXT: ret -; X32-DFlyBSD: leal -40008(%esp), %ecx +; X32-DFlyBSD: leal -40000(%esp), %ecx ; X32-DFlyBSD-NEXT: cmpl %fs:16, %ecx ; X32-DFlyBSD-NEXT: ja .LBB2_2 ; X32-DFlyBSD: pushl $0 -; X32-DFlyBSD-NEXT: pushl $40008 +; X32-DFlyBSD-NEXT: pushl $40000 ; X32-DFlyBSD-NEXT: calll __morestack ; X32-DFlyBSD-NEXT: ret @@ -364,7 +364,7 @@ ; X32-Linux-NEXT: ja .LBB3_2 ; X32-Linux: pushl $0 -; X32-Linux-NEXT: pushl $60 +; X32-Linux-NEXT: pushl $44 ; X32-Linux-NEXT: calll __morestack ; X32-Linux-NEXT: ret @@ -415,7 +415,7 @@ ; X32-MinGW-NEXT: ja LBB3_2 ; X32-MinGW: pushl $0 -; X32-MinGW-NEXT: pushl $48 +; X32-MinGW-NEXT: pushl $40 ; X32-MinGW-NEXT: calll ___morestack ; X32-MinGW-NEXT: ret @@ -445,7 +445,7 @@ ; X32-DFlyBSD-NEXT: ja .LBB3_2 ; X32-DFlyBSD: pushl $0 -; X32-DFlyBSD-NEXT: pushl $48 +; X32-DFlyBSD-NEXT: pushl $40 ; X32-DFlyBSD-NEXT: calll __morestack ; X32-DFlyBSD-NEXT: ret @@ -524,12 +524,12 @@ ; X32-MinGW-LABEL: test_fastcc_large: -; X32-MinGW: leal -40008(%esp), %eax +; X32-MinGW: leal -40000(%esp), %eax ; X32-MinGW-NEXT: cmpl %fs:20, %eax ; X32-MinGW-NEXT: ja LBB4_2 ; X32-MinGW: pushl $0 -; X32-MinGW-NEXT: pushl $40008 +; X32-MinGW-NEXT: pushl $40000 ; X32-MinGW-NEXT: calll ___morestack ; X32-MinGW-NEXT: ret @@ -557,12 +557,12 @@ ; X32-DFlyBSD-LABEL: test_fastcc_large: -; X32-DFlyBSD: leal -40008(%esp), %eax +; X32-DFlyBSD: leal -40000(%esp), %eax ; X32-DFlyBSD-NEXT: cmpl %fs:16, %eax ; X32-DFlyBSD-NEXT: ja .LBB4_2 ; X32-DFlyBSD: pushl $0 -; X32-DFlyBSD-NEXT: pushl $40008 +; X32-DFlyBSD-NEXT: pushl $40000 ; X32-DFlyBSD-NEXT: calll __morestack ; X32-DFlyBSD-NEXT: ret Index: test/CodeGen/X86/seh-catch-all-win32.ll =================================================================== --- test/CodeGen/X86/seh-catch-all-win32.ll +++ test/CodeGen/X86/seh-catch-all-win32.ll @@ -75,8 +75,8 @@ ; CHECK: movl -24(%ebp), %esp ; EH state -1 ; CHECK: movl [[code_offs]](%ebp), %[[code:[a-z]+]] -; CHECK-DAG: movl %[[code]], 4(%esp) -; CHECK-DAG: movl $_str, (%esp) +; CHECK: pushl %[[code]] +; CHECK: pushl $_str ; CHECK: calll _printf ; CHECK: .section .xdata,"dr" Index: test/CodeGen/X86/seh-stack-realign.ll =================================================================== --- test/CodeGen/X86/seh-stack-realign.ll +++ test/CodeGen/X86/seh-stack-realign.ll @@ -57,19 +57,19 @@ ; CHECK: movl %esp, [[reg_offs:[-0-9]+]](%esi) ; CHECK: movl $L__ehtable$main, ; EH state 0 -; CHECK: movl $0, 40(%esi) +; CHECK: movl $0, 32(%esi) ; CHECK: calll _crash ; CHECK: retl ; CHECK: LBB0_[[lpbb:[0-9]+]]: # %__except ; Restore ESP ; CHECK: movl -24(%ebp), %esp ; Restore ESI -; CHECK: leal -44(%ebp), %esi +; CHECK: leal -36(%ebp), %esi ; Restore EBP -; CHECK: movl 12(%esi), %ebp +; CHECK: movl 4(%esi), %ebp ; CHECK: movl [[code_offs]](%esi), %[[code:[a-z]+]] -; CHECK-DAG: movl %[[code]], 4(%esp) -; CHECK-DAG: movl $_str, (%esp) +; CHECK: pushl %[[code]] +; CHECK: pushl $_str ; CHECK: calll _printf ; CHECK: .section .xdata,"dr" Index: test/CodeGen/X86/shrink-wrap-chkstk.ll =================================================================== --- test/CodeGen/X86/shrink-wrap-chkstk.ll +++ test/CodeGen/X86/shrink-wrap-chkstk.ll @@ -64,9 +64,9 @@ ; CHECK: cmpl %edx, %eax ; CHECK: jge LBB1_2 ; CHECK: pushl %eax -; CHECK: movl $4100, %eax +; CHECK: movl $4092, %eax ; CHECK: calll __chkstk -; CHECK: movl 4100(%esp), %eax +; CHECK: movl 4092(%esp), %eax ; CHECK: calll _doSomething ; CHECK: LBB1_2: ; CHECK: retl Index: test/CodeGen/X86/sse-intel-ocl.ll =================================================================== --- test/CodeGen/X86/sse-intel-ocl.ll +++ test/CodeGen/X86/sse-intel-ocl.ll @@ -14,7 +14,7 @@ ; WIN64: ret ; WIN32: testf16_inp -; WIN32: movl %eax, (%esp) +; WIN32: pushl %eax ; WIN32: addps {{.*}}, {{%xmm[0-3]}} ; WIN32: addps {{.*}}, {{%xmm[0-3]}} ; WIN32: addps {{.*}}, {{%xmm[0-3]}} Index: test/CodeGen/X86/tailcall-stackalign.ll =================================================================== --- test/CodeGen/X86/tailcall-stackalign.ll +++ test/CodeGen/X86/tailcall-stackalign.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=i686-unknown-linux -tailcallopt | FileCheck %s +; RUN: llc < %s -mtriple=i686-unknown-linux -tailcallopt -no-x86-call-frame-opt | FileCheck %s ; Linux has 8 byte alignment so the params cause stack size 20 when tailcallopt ; is enabled, ensure that a normal fastcc call has matching stack size Index: test/CodeGen/X86/twoaddr-coalesce.ll =================================================================== --- test/CodeGen/X86/twoaddr-coalesce.ll +++ test/CodeGen/X86/twoaddr-coalesce.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 | grep mov | count 4 +; RUN: llc < %s -march=x86 | grep mov | count 2 ; rdar://6523745 @"\01LC" = internal constant [4 x i8] c"%d\0A\00" ; <[4 x i8]*> [#uses=1] Index: test/CodeGen/X86/vararg-callee-cleanup.ll =================================================================== --- test/CodeGen/X86/vararg-callee-cleanup.ll +++ test/CodeGen/X86/vararg-callee-cleanup.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=i686-pc-windows < %s | FileCheck %s +; RUN: llc -mtriple=i686-pc-windows -no-x86-call-frame-opt < %s | FileCheck %s target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32" Index: test/CodeGen/X86/win-catchpad-csrs.ll =================================================================== --- test/CodeGen/X86/win-catchpad-csrs.ll +++ test/CodeGen/X86/win-catchpad-csrs.ll @@ -51,7 +51,7 @@ ; X86: calll _getint ; X86: calll _useints ; X86: movl $0, -{{[0-9]+}}(%ebp) -; X86: movl $1, (%esp) +; X86: pushl $1 ; X86: calll _f ; X86: [[contbb:LBB0_[0-9]+]]: # %try.cont ; X86: popl %esi @@ -71,7 +71,7 @@ ; X86: subl $16, %esp ; X86: addl $12, %ebp ; X86: movl $1, -{{[0-9]+}}(%ebp) -; X86: movl $2, (%esp) +; X86: pushl $2 ; X86: calll _f ; X86: movl $[[restorebb]], %eax ; X86-NEXT: addl $16, %esp Index: test/CodeGen/X86/win-catchpad.ll =================================================================== --- test/CodeGen/X86/win-catchpad.ll +++ test/CodeGen/X86/win-catchpad.ll @@ -57,8 +57,8 @@ ; X86: movl %esp, -[[sp_offset:[0-9]+]](%ebp) ; X86: movl $0, -{{[0-9]+}}(%ebp) ; X86: leal -[[local_offs:[0-9]+]](%ebp), %[[addr_reg:[a-z]+]] -; X86-DAG: movl %[[addr_reg]], 4(%esp) -; X86-DAG: movl $1, (%esp) +; X86-DAG: pushl %[[addr_reg]] +; X86-DAG: pushl $1 ; X86: calll _f ; X86: [[contbb:LBB0_[0-9]+]]: # %try.cont ; X86: retl @@ -83,13 +83,14 @@ ; X86-DAG: movl -32(%ebp), %[[e_reg:[a-z]+]] ; X86-DAG: leal -[[local_offs]](%ebp), %[[addr_reg:[a-z]+]] ; X86-DAG: movl $1, -{{[0-9]+}}(%ebp) -; X86-DAG: movl %[[addr_reg]], 4(%esp) -; X86-DAG: movl %[[e_reg]], (%esp) +; X86: pushl %[[addr_reg]] +; X86: pushl %[[e_reg]] ; X86: calll _f -; X86-NEXT: movl $[[restorebb1]], %eax -; X86-NEXT: addl $8, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl +; X86: addl $8, %esp +; X86: movl $[[restorebb1]], %eax +; X86: addl $8, %esp +; X86: popl %ebp +; X86: retl ; X86: "?catch$[[catch2bb:[0-9]+]]@?0?try_catch_catch@4HA": ; X86: LBB0_[[catch2bb]]: # %handler2{{$}} @@ -99,13 +100,14 @@ ; X86: movl %esp, -[[sp_offset]](%ebp) ; X86-DAG: leal -[[local_offs]](%ebp), %[[addr_reg:[a-z]+]] ; X86-DAG: movl $1, -{{[0-9]+}}(%ebp) -; X86-DAG: movl %[[addr_reg]], 4(%esp) -; X86-DAG: movl $3, (%esp) +; X86: pushl %[[addr_reg]] +; X86: pushl $3 ; X86: calll _f -; X86-NEXT: movl $[[restorebb2]], %eax -; X86-NEXT: addl $8, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl +; X86: addl $8, %esp +; X86: movl $[[restorebb2]], %eax +; X86: addl $8, %esp +; X86: popl %ebp +; X86: retl ; X86: L__ehtable$try_catch_catch: ; X86: $handlerMap$0$try_catch_catch: Index: test/CodeGen/X86/win-cleanuppad.ll =================================================================== --- test/CodeGen/X86/win-cleanuppad.ll +++ test/CodeGen/X86/win-cleanuppad.ll @@ -88,11 +88,11 @@ } ; X86-LABEL: _nested_cleanup: -; X86: movl $1, (%esp) +; X86: pushl $1 ; X86: calll _f -; X86: movl $2, (%esp) +; X86: pushl $2 ; X86: calll _f -; X86: movl $3, (%esp) +; X86: pushl $3 ; X86: calll _f ; X86: "?dtor$[[cleanup_inner:[0-9]+]]@?0?nested_cleanup@4HA": Index: test/CodeGen/X86/win32-eh-states.ll =================================================================== --- test/CodeGen/X86/win32-eh-states.ll +++ test/CodeGen/X86/win32-eh-states.ll @@ -68,19 +68,19 @@ ; X86: movl $___ehhandler$f, {{.*}} ; ; X86: movl $0, [[state]](%ebp) -; X86: movl $1, (%esp) +; X86: pushl $1 ; X86: calll _may_throw ; ; X86: movl $1, [[state]](%ebp) -; X86: movl $2, (%esp) +; X86: pushl $2 ; X86: calll _may_throw ; ; X86: movl $2, [[state]](%ebp) -; X86: movl $3, (%esp) +; X86: pushl $3 ; X86: calll _may_throw ; ; X86: movl $3, [[state]](%ebp) -; X86: movl $4, (%esp) +; X86: pushl $4 ; X86: calll _may_throw @@ -172,19 +172,19 @@ ; X86: movl $___ehhandler$g, {{.*}} ; ; X86: movl $1, [[state]](%ebp) -; X86: movl $-1, (%esp) +; X86: pushl $-1 ; X86: calll _may_throw ; ; X86: movl $2, [[state]](%ebp) -; X86: movl $0, (%esp) +; X86: pushl $0 ; X86: calll _may_throw ; ; X86: movl $3, [[state]](%ebp) -; X86: movl $1, (%esp) +; X86: pushl $1 ; X86: calll _may_throw ; ; X86: movl $2, [[state]](%ebp) -; X86: movl $2, (%esp) +; X86: pushl $2 ; X86: calll _may_throw ; X64-LABEL: g: Index: test/CodeGen/X86/win32-seh-catchpad.ll =================================================================== --- test/CodeGen/X86/win32-seh-catchpad.ll +++ test/CodeGen/X86/win32-seh-catchpad.ll @@ -32,16 +32,16 @@ ; CHECK-LABEL: _try_except: ; Store state #0 ; CHECK: movl $0, -[[state:[0-9]+]](%ebp) -; CHECK: movl $1, (%esp) +; CHECK: pushl $1 ; CHECK: calll _f ; CHECK: movl $-1, -[[state]](%ebp) -; CHECK: movl $3, (%esp) +; CHECK: pushl $3 ; CHECK: calll _f ; CHECK: retl ; __except ; CHECK: movl $-1, -[[state]](%ebp) -; CHECK: movl $2, (%esp) +; CHECK: pushl $2 ; CHECK: calll _f ; CHECK: .section .xdata,"dr" @@ -205,7 +205,7 @@ ; CHECK-NEXT: movl -24(%ebp), %esp ; CHECK-NEXT: addl $12, %ebp ; CHECK-NEXT: movl $-1, -16(%ebp) -; CHECK-NEXT: movl $2, (%esp) +; CHECK-NEXT: pushl $2 ; CHECK-NEXT: calll _f Index: test/CodeGen/X86/win32-seh-nested-finally.ll =================================================================== --- test/CodeGen/X86/win32-seh-nested-finally.ll +++ test/CodeGen/X86/win32-seh-nested-finally.ll @@ -43,20 +43,23 @@ ; CHECK: movl $-1, -[[state:[0-9]+]](%ebp) ; CHECK: movl {{.*}}, %fs:0 ; CHECK: movl $1, -[[state]](%ebp) -; CHECK: movl $1, (%esp) +; CHECK: pushl $1 ; CHECK: calll _f +; CHECK: addl $4, %esp ; CHECK: movl $0, -[[state]](%ebp) -; CHECK: movl $2, (%esp) +; CHECK: pushl $2 ; CHECK: calll _f +; CHECK: addl $4, %esp ; CHECK: movl $-1, -[[state]](%ebp) -; CHECK: movl $3, (%esp) +; CHECK: pushl $3 ; CHECK: calll _f +; CHECK: addl $4, %esp ; CHECK: retl ; CHECK: LBB0_[[inner:[0-9]+]]: # %ehcleanup ; CHECK: pushl %ebp ; CHECK: addl $12, %ebp -; CHECK: movl $2, (%esp) +; CHECK: pushl $2 ; CHECK: calll _f ; CHECK: popl %ebp ; CHECK: retl @@ -64,7 +67,7 @@ ; CHECK: LBB0_[[outer:[0-9]+]]: # %ehcleanup.3 ; CHECK: pushl %ebp ; CHECK: addl $12, %ebp -; CHECK: movl $3, (%esp) +; CHECK: pushl $3 ; CHECK: calll _f ; CHECK: popl %ebp ; CHECK: retl Index: test/CodeGen/X86/win32_sret.ll =================================================================== --- test/CodeGen/X86/win32_sret.ll +++ test/CodeGen/X86/win32_sret.ll @@ -135,12 +135,11 @@ ; Load the address of the result and put it onto stack -; (through %ecx in the -O0 build). -; WIN32: leal {{[0-9]+}}(%esp), %e{{[a-d]}}x -; WIN32: movl %e{{[a-d]}}x, (%e{{([a-d]x)|(sp)}}) - ; The this pointer goes to ECX. -; WIN32-NEXT: leal {{[0-9]+}}(%esp), %ecx +; (through %ecx in the -O0 build). +; WIN32: leal {{[0-9]*}}(%esp), %e{{[a-d]}}x +; WIN32: leal {{[0-9]*}}(%esp), %ecx +; WIN32: pushl %e{{[a-d]}}x ; WIN32-NEXT: calll "?foo@C5@@QAE?AUS5@@XZ" ; WIN32: retl ret void @@ -155,25 +154,21 @@ ; LINUX-LABEL: test6_f: ; The %x argument is moved to %ecx. It will be the this pointer. -; WIN32: movl 20(%esp), %ecx - -; The %x argument is moved to (%esp). It will be the this pointer. With -O0 -; we copy esp to ecx and use (ecx) instead of (esp). -; MINGW_X86: movl 20(%esp), %eax -; MINGW_X86: movl %eax, (%e{{([a-d]x)|(sp)}}) +; WIN32: movl 16(%esp), %ecx -; CYGWIN: movl 20(%esp), %eax -; CYGWIN: movl %eax, (%e{{([a-d]x)|(sp)}}) ; The sret pointer is (%esp) -; WIN32: leal 4(%esp), %[[REG:e[a-d]x]] -; WIN32-NEXT: movl %[[REG]], (%e{{([a-d]x)|(sp)}}) +; WIN32: leal (%esp), %[[REG:e[a-d]x]] +; WIN32-NEXT: pushl %[[REG]] ; The sret pointer is %ecx -; MINGW_X86-NEXT: leal 4(%esp), %ecx +; The %x argument is moved to (%esp). It will be the this pointer. +; MINGW_X86: leal (%esp), %ecx +; MINGW_X86-NEXT: pushl 16(%esp) ; MINGW_X86-NEXT: calll _test6_g -; CYGWIN-NEXT: leal 4(%esp), %ecx +; CYGWIN: leal (%esp), %ecx +; CYGWIN-NEXT: pushl 16(%esp) ; CYGWIN-NEXT: calll _test6_g %tmp = alloca %struct.test6, align 4 @@ -191,17 +186,17 @@ ; LINUX-LABEL: test7_f: ; The %x argument is moved to %ecx on all OSs. It will be the this pointer. -; WIN32: movl 20(%esp), %ecx -; MINGW_X86: movl 20(%esp), %ecx -; CYGWIN: movl 20(%esp), %ecx +; WIN32: movl 16(%esp), %ecx +; MINGW_X86: movl 16(%esp), %ecx +; CYGWIN: movl 16(%esp), %ecx ; The sret pointer is (%esp) -; WIN32: leal 4(%esp), %[[REG:e[a-d]x]] -; WIN32-NEXT: movl %[[REG]], (%e{{([a-d]x)|(sp)}}) -; MINGW_X86: leal 4(%esp), %[[REG:e[a-d]x]] -; MINGW_X86-NEXT: movl %[[REG]], (%e{{([a-d]x)|(sp)}}) -; CYGWIN: leal 4(%esp), %[[REG:e[a-d]x]] -; CYGWIN-NEXT: movl %[[REG]], (%e{{([a-d]x)|(sp)}}) +; WIN32: leal (%esp), %[[REG:e[a-d]x]] +; WIN32-NEXT: pushl %[[REG]] +; MINGW_X86: leal (%esp), %[[REG:e[a-d]x]] +; MINGW_X86-NEXT: pushl %[[REG]] +; CYGWIN: leal (%esp), %[[REG:e[a-d]x]] +; CYGWIN-NEXT: pushl %[[REG]] %tmp = alloca %struct.test7, align 4 call x86_thiscallcc void @test7_g(%struct.test7* %x, %struct.test7* sret %tmp) Index: test/CodeGen/X86/xmulo.ll =================================================================== --- test/CodeGen/X86/xmulo.ll +++ test/CodeGen/X86/xmulo.ll @@ -9,9 +9,9 @@ define i32 @t1() nounwind { ; CHECK-LABEL: t1: -; CHECK: movl $0, 12(%esp) -; CHECK: movl $0, 8(%esp) -; CHECK: movl $72, 4(%esp) +; CHECK: pushl $0 +; CHECK: pushl $0 +; CHECK: pushl $72 %1 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 9, i64 8) %2 = extractvalue {i64, i1} %1, 0 @@ -23,9 +23,9 @@ define i32 @t2() nounwind { ; CHECK-LABEL: t2: -; CHECK: movl $0, 12(%esp) -; CHECK: movl $0, 8(%esp) -; CHECK: movl $0, 4(%esp) +; CHECK: pushl $0 +; CHECK: pushl $0 +; CHECK: pushl $0 %1 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 9, i64 0) %2 = extractvalue {i64, i1} %1, 0 @@ -37,9 +37,9 @@ define i32 @t3() nounwind { ; CHECK-LABEL: t3: -; CHECK: movl $1, 12(%esp) -; CHECK: movl $-1, 8(%esp) -; CHECK: movl $-9, 4(%esp) +; CHECK: pushl $1 +; CHECK: pushl $-1 +; CHECK: pushl $-9 %1 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 9, i64 -1) %2 = extractvalue {i64, i1} %1, 0 Index: test/CodeGen/X86/zext-fold.ll =================================================================== --- test/CodeGen/X86/zext-fold.ll +++ test/CodeGen/X86/zext-fold.ll @@ -35,7 +35,8 @@ } ; CHECK: test3 ; CHECK: movzbl {{[0-9]+}}(%esp), [[REGISTER:%e[a-z]{2}]] -; CHECK-NEXT: movl [[REGISTER]], 4(%esp) +; CHECK: subl $8, %esp +; CHECK-NEXT: pushl [[REGISTER]] ; CHECK-NEXT: andl $224, [[REGISTER]] -; CHECK-NEXT: movl [[REGISTER]], (%esp) +; CHECK-NEXT: pushl [[REGISTER]] ; CHECK-NEXT: call{{.*}}use