diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td --- a/llvm/lib/Target/X86/X86CallingConv.td +++ b/llvm/lib/Target/X86/X86CallingConv.td @@ -1102,7 +1102,7 @@ // All GPRs - except r11 def CSR_64_RT_MostRegs : CalleeSavedRegs<(add CSR_64, RAX, RCX, RDX, RSI, RDI, - R8, R9, R10, RSP)>; + R8, R9, R10)>; // All registers - except r11 def CSR_64_RT_AllRegs : CalleeSavedRegs<(add CSR_64_RT_MostRegs, @@ -1160,17 +1160,16 @@ def CSR_64_HHVM : CalleeSavedRegs<(add R12)>; // Register calling convention preserves few GPR and XMM8-15 -def CSR_32_RegCall_NoSSE : CalleeSavedRegs<(add ESI, EDI, EBX, EBP, ESP)>; +def CSR_32_RegCall_NoSSE : CalleeSavedRegs<(add ESI, EDI, EBX, EBP)>; def CSR_32_RegCall : CalleeSavedRegs<(add CSR_32_RegCall_NoSSE, (sequence "XMM%u", 4, 7))>; def CSR_Win32_CFGuard_Check_NoSSE : CalleeSavedRegs<(add CSR_32_RegCall_NoSSE, ECX)>; def CSR_Win32_CFGuard_Check : CalleeSavedRegs<(add CSR_32_RegCall, ECX)>; -def CSR_Win64_RegCall_NoSSE : CalleeSavedRegs<(add RBX, RBP, RSP, +def CSR_Win64_RegCall_NoSSE : CalleeSavedRegs<(add RBX, RBP, (sequence "R%u", 10, 15))>; def CSR_Win64_RegCall : CalleeSavedRegs<(add CSR_Win64_RegCall_NoSSE, (sequence "XMM%u", 8, 15))>; -def CSR_SysV64_RegCall_NoSSE : CalleeSavedRegs<(add RBX, RBP, RSP, +def CSR_SysV64_RegCall_NoSSE : CalleeSavedRegs<(add RBX, RBP, (sequence "R%u", 12, 15))>; def CSR_SysV64_RegCall : CalleeSavedRegs<(add CSR_SysV64_RegCall_NoSSE, (sequence "XMM%u", 8, 15))>; - diff --git a/llvm/test/CodeGen/X86/avx512-regcall-Mask.ll b/llvm/test/CodeGen/X86/avx512-regcall-Mask.ll --- a/llvm/test/CodeGen/X86/avx512-regcall-Mask.ll +++ b/llvm/test/CodeGen/X86/avx512-regcall-Mask.ll @@ -283,8 +283,7 @@ define dso_local x86_regcallcc i32 @test_argv32i1(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> %x2) { ; X32-LABEL: test_argv32i1: ; X32: # %bb.0: # %entry -; X32-NEXT: pushl %esp -; X32-NEXT: subl $72, %esp +; X32-NEXT: subl $76, %esp ; X32-NEXT: vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; X32-NEXT: vmovups %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; X32-NEXT: vmovups %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill @@ -303,8 +302,7 @@ ; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm5 # 16-byte Reload ; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm6 # 16-byte Reload ; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload -; X32-NEXT: addl $72, %esp -; X32-NEXT: popl %esp +; X32-NEXT: addl $76, %esp ; X32-NEXT: vzeroupper ; X32-NEXT: retl ; @@ -316,10 +314,8 @@ ; WIN64-NEXT: .seh_pushreg %r11 ; WIN64-NEXT: pushq %r10 ; WIN64-NEXT: .seh_pushreg %r10 -; WIN64-NEXT: pushq %rsp -; WIN64-NEXT: .seh_pushreg %rsp -; WIN64-NEXT: subq $152, %rsp -; WIN64-NEXT: .seh_stackalloc 152 +; WIN64-NEXT: subq $128, %rsp +; WIN64-NEXT: .seh_stackalloc 128 ; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rbp ; WIN64-NEXT: .seh_setframe %rbp, 128 ; WIN64-NEXT: .seh_endprologue @@ -339,8 +335,7 @@ ; WIN64-NEXT: vzeroupper ; WIN64-NEXT: callq test_argv32i1helper ; WIN64-NEXT: nop -; WIN64-NEXT: leaq 24(%rbp), %rsp -; WIN64-NEXT: popq %rsp +; WIN64-NEXT: movq %rbp, %rsp ; WIN64-NEXT: popq %r10 ; WIN64-NEXT: popq %r11 ; WIN64-NEXT: popq %rbp @@ -349,9 +344,7 @@ ; ; LINUXOSX64-LABEL: test_argv32i1: ; LINUXOSX64: # %bb.0: # %entry -; LINUXOSX64-NEXT: pushq %rsp -; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 -; LINUXOSX64-NEXT: subq $128, %rsp +; LINUXOSX64-NEXT: subq $136, %rsp ; LINUXOSX64-NEXT: vmovaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; LINUXOSX64-NEXT: vmovaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; LINUXOSX64-NEXT: vmovaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill @@ -361,7 +354,6 @@ ; LINUXOSX64-NEXT: vmovaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 144 -; LINUXOSX64-NEXT: .cfi_offset %rsp, -16 ; LINUXOSX64-NEXT: .cfi_offset %xmm8, -144 ; LINUXOSX64-NEXT: .cfi_offset %xmm9, -128 ; LINUXOSX64-NEXT: .cfi_offset %xmm10, -112 @@ -388,9 +380,7 @@ ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload -; LINUXOSX64-NEXT: addq $128, %rsp -; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 -; LINUXOSX64-NEXT: popq %rsp +; LINUXOSX64-NEXT: addq $136, %rsp ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 ; LINUXOSX64-NEXT: vzeroupper ; LINUXOSX64-NEXT: retq @@ -518,8 +508,7 @@ define dso_local x86_regcallcc i16 @test_argv16i1(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2) { ; X32-LABEL: test_argv16i1: ; X32: # %bb.0: -; X32-NEXT: pushl %esp -; X32-NEXT: subl $72, %esp +; X32-NEXT: subl $76, %esp ; X32-NEXT: vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; X32-NEXT: vmovups %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; X32-NEXT: vmovups %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill @@ -539,8 +528,7 @@ ; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm5 # 16-byte Reload ; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm6 # 16-byte Reload ; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload -; X32-NEXT: addl $72, %esp -; X32-NEXT: popl %esp +; X32-NEXT: addl $76, %esp ; X32-NEXT: retl ; ; WIN64-LABEL: test_argv16i1: @@ -549,10 +537,8 @@ ; WIN64-NEXT: .seh_pushreg %r11 ; WIN64-NEXT: pushq %r10 ; WIN64-NEXT: .seh_pushreg %r10 -; WIN64-NEXT: pushq %rsp -; WIN64-NEXT: .seh_pushreg %rsp -; WIN64-NEXT: subq $80, %rsp -; WIN64-NEXT: .seh_stackalloc 80 +; WIN64-NEXT: subq $88, %rsp +; WIN64-NEXT: .seh_stackalloc 88 ; WIN64-NEXT: .seh_endprologue ; WIN64-NEXT: kmovd %edx, %k0 ; WIN64-NEXT: kmovd %eax, %k1 @@ -569,8 +555,7 @@ ; WIN64-NEXT: vzeroupper ; WIN64-NEXT: callq test_argv16i1helper ; WIN64-NEXT: nop -; WIN64-NEXT: addq $80, %rsp -; WIN64-NEXT: popq %rsp +; WIN64-NEXT: addq $88, %rsp ; WIN64-NEXT: popq %r10 ; WIN64-NEXT: popq %r11 ; WIN64-NEXT: retq @@ -578,9 +563,7 @@ ; ; LINUXOSX64-LABEL: test_argv16i1: ; LINUXOSX64: # %bb.0: -; LINUXOSX64-NEXT: pushq %rsp -; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 -; LINUXOSX64-NEXT: subq $128, %rsp +; LINUXOSX64-NEXT: subq $136, %rsp ; LINUXOSX64-NEXT: vmovaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; LINUXOSX64-NEXT: vmovaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; LINUXOSX64-NEXT: vmovaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill @@ -590,7 +573,6 @@ ; LINUXOSX64-NEXT: vmovaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 144 -; LINUXOSX64-NEXT: .cfi_offset %rsp, -16 ; LINUXOSX64-NEXT: .cfi_offset %xmm8, -144 ; LINUXOSX64-NEXT: .cfi_offset %xmm9, -128 ; LINUXOSX64-NEXT: .cfi_offset %xmm10, -112 @@ -618,9 +600,7 @@ ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload -; LINUXOSX64-NEXT: addq $128, %rsp -; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 -; LINUXOSX64-NEXT: popq %rsp +; LINUXOSX64-NEXT: addq $136, %rsp ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 ; LINUXOSX64-NEXT: retq %res = call i16 @test_argv16i1helper(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2) @@ -752,8 +732,7 @@ define dso_local x86_regcallcc i8 @test_argv8i1(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2) { ; X32-LABEL: test_argv8i1: ; X32: # %bb.0: -; X32-NEXT: pushl %esp -; X32-NEXT: subl $72, %esp +; X32-NEXT: subl $76, %esp ; X32-NEXT: vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; X32-NEXT: vmovups %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; X32-NEXT: vmovups %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill @@ -773,8 +752,7 @@ ; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm5 # 16-byte Reload ; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm6 # 16-byte Reload ; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload -; X32-NEXT: addl $72, %esp -; X32-NEXT: popl %esp +; X32-NEXT: addl $76, %esp ; X32-NEXT: retl ; ; WIN64-LABEL: test_argv8i1: @@ -783,10 +761,8 @@ ; WIN64-NEXT: .seh_pushreg %r11 ; WIN64-NEXT: pushq %r10 ; WIN64-NEXT: .seh_pushreg %r10 -; WIN64-NEXT: pushq %rsp -; WIN64-NEXT: .seh_pushreg %rsp -; WIN64-NEXT: subq $80, %rsp -; WIN64-NEXT: .seh_stackalloc 80 +; WIN64-NEXT: subq $88, %rsp +; WIN64-NEXT: .seh_stackalloc 88 ; WIN64-NEXT: .seh_endprologue ; WIN64-NEXT: kmovd %edx, %k0 ; WIN64-NEXT: kmovd %eax, %k1 @@ -803,8 +779,7 @@ ; WIN64-NEXT: vzeroupper ; WIN64-NEXT: callq test_argv8i1helper ; WIN64-NEXT: nop -; WIN64-NEXT: addq $80, %rsp -; WIN64-NEXT: popq %rsp +; WIN64-NEXT: addq $88, %rsp ; WIN64-NEXT: popq %r10 ; WIN64-NEXT: popq %r11 ; WIN64-NEXT: retq @@ -812,9 +787,7 @@ ; ; LINUXOSX64-LABEL: test_argv8i1: ; LINUXOSX64: # %bb.0: -; LINUXOSX64-NEXT: pushq %rsp -; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 -; LINUXOSX64-NEXT: subq $128, %rsp +; LINUXOSX64-NEXT: subq $136, %rsp ; LINUXOSX64-NEXT: vmovaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; LINUXOSX64-NEXT: vmovaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; LINUXOSX64-NEXT: vmovaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill @@ -824,7 +797,6 @@ ; LINUXOSX64-NEXT: vmovaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 144 -; LINUXOSX64-NEXT: .cfi_offset %rsp, -16 ; LINUXOSX64-NEXT: .cfi_offset %xmm8, -144 ; LINUXOSX64-NEXT: .cfi_offset %xmm9, -128 ; LINUXOSX64-NEXT: .cfi_offset %xmm10, -112 @@ -852,9 +824,7 @@ ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload -; LINUXOSX64-NEXT: addq $128, %rsp -; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 -; LINUXOSX64-NEXT: popq %rsp +; LINUXOSX64-NEXT: addq $136, %rsp ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 ; LINUXOSX64-NEXT: retq %res = call i8 @test_argv8i1helper(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2) diff --git a/llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll b/llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll --- a/llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll +++ b/llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll @@ -30,37 +30,34 @@ define dso_local x86_regcallcc i1 @test_CallargReti1(i1 %a) { ; X32-LABEL: test_CallargReti1: ; X32: # %bb.0: -; X32-NEXT: pushl %esp ; X32-NEXT: incb %al ; X32-NEXT: movzbl %al, %eax ; X32-NEXT: calll _test_argReti1 ; X32-NEXT: incb %al -; X32-NEXT: popl %esp ; X32-NEXT: retl ; ; WIN64-LABEL: test_CallargReti1: ; WIN64: # %bb.0: -; WIN64-NEXT: pushq %rsp -; WIN64-NEXT: .seh_pushreg %rsp +; WIN64-NEXT: pushq %rax +; WIN64-NEXT: .seh_stackalloc 8 ; WIN64-NEXT: .seh_endprologue ; WIN64-NEXT: incb %al ; WIN64-NEXT: movzbl %al, %eax ; WIN64-NEXT: callq test_argReti1 ; WIN64-NEXT: incb %al -; WIN64-NEXT: popq %rsp +; WIN64-NEXT: popq %rcx ; WIN64-NEXT: retq ; WIN64-NEXT: .seh_endproc ; ; LINUXOSX64-LABEL: test_CallargReti1: ; LINUXOSX64: # %bb.0: -; LINUXOSX64-NEXT: pushq %rsp +; LINUXOSX64-NEXT: pushq %rax ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 -; LINUXOSX64-NEXT: .cfi_offset %rsp, -16 ; LINUXOSX64-NEXT: incb %al ; LINUXOSX64-NEXT: movzbl %al, %eax ; LINUXOSX64-NEXT: callq test_argReti1 ; LINUXOSX64-NEXT: incb %al -; LINUXOSX64-NEXT: popq %rsp +; LINUXOSX64-NEXT: popq %rcx ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 ; LINUXOSX64-NEXT: retq %b = add i1 %a, 1 @@ -96,37 +93,34 @@ define dso_local x86_regcallcc i8 @test_CallargReti8(i8 %a) { ; X32-LABEL: test_CallargReti8: ; X32: # %bb.0: -; X32-NEXT: pushl %esp ; X32-NEXT: incb %al ; X32-NEXT: movzbl %al, %eax ; X32-NEXT: calll _test_argReti8 ; X32-NEXT: incb %al -; X32-NEXT: popl %esp ; X32-NEXT: retl ; ; WIN64-LABEL: test_CallargReti8: ; WIN64: # %bb.0: -; WIN64-NEXT: pushq %rsp -; WIN64-NEXT: .seh_pushreg %rsp +; WIN64-NEXT: pushq %rax +; WIN64-NEXT: .seh_stackalloc 8 ; WIN64-NEXT: .seh_endprologue ; WIN64-NEXT: incb %al ; WIN64-NEXT: movzbl %al, %eax ; WIN64-NEXT: callq test_argReti8 ; WIN64-NEXT: incb %al -; WIN64-NEXT: popq %rsp +; WIN64-NEXT: popq %rcx ; WIN64-NEXT: retq ; WIN64-NEXT: .seh_endproc ; ; LINUXOSX64-LABEL: test_CallargReti8: ; LINUXOSX64: # %bb.0: -; LINUXOSX64-NEXT: pushq %rsp +; LINUXOSX64-NEXT: pushq %rax ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 -; LINUXOSX64-NEXT: .cfi_offset %rsp, -16 ; LINUXOSX64-NEXT: incb %al ; LINUXOSX64-NEXT: movzbl %al, %eax ; LINUXOSX64-NEXT: callq test_argReti8 ; LINUXOSX64-NEXT: incb %al -; LINUXOSX64-NEXT: popq %rsp +; LINUXOSX64-NEXT: popq %rcx ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 ; LINUXOSX64-NEXT: retq %b = add i8 %a, 1 @@ -162,40 +156,37 @@ define dso_local x86_regcallcc i16 @test_CallargReti16(i16 %a) { ; X32-LABEL: test_CallargReti16: ; X32: # %bb.0: -; X32-NEXT: pushl %esp ; X32-NEXT: incl %eax ; X32-NEXT: calll _test_argReti16 ; X32-NEXT: # kill: def $ax killed $ax def $eax ; X32-NEXT: incl %eax ; X32-NEXT: # kill: def $ax killed $ax killed $eax -; X32-NEXT: popl %esp ; X32-NEXT: retl ; ; WIN64-LABEL: test_CallargReti16: ; WIN64: # %bb.0: -; WIN64-NEXT: pushq %rsp -; WIN64-NEXT: .seh_pushreg %rsp +; WIN64-NEXT: pushq %rax +; WIN64-NEXT: .seh_stackalloc 8 ; WIN64-NEXT: .seh_endprologue ; WIN64-NEXT: incl %eax ; WIN64-NEXT: callq test_argReti16 ; WIN64-NEXT: # kill: def $ax killed $ax def $eax ; WIN64-NEXT: incl %eax ; WIN64-NEXT: # kill: def $ax killed $ax killed $eax -; WIN64-NEXT: popq %rsp +; WIN64-NEXT: popq %rcx ; WIN64-NEXT: retq ; WIN64-NEXT: .seh_endproc ; ; LINUXOSX64-LABEL: test_CallargReti16: ; LINUXOSX64: # %bb.0: -; LINUXOSX64-NEXT: pushq %rsp +; LINUXOSX64-NEXT: pushq %rax ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 -; LINUXOSX64-NEXT: .cfi_offset %rsp, -16 ; LINUXOSX64-NEXT: incl %eax ; LINUXOSX64-NEXT: callq test_argReti16 ; LINUXOSX64-NEXT: # kill: def $ax killed $ax def $eax ; LINUXOSX64-NEXT: incl %eax ; LINUXOSX64-NEXT: # kill: def $ax killed $ax killed $eax -; LINUXOSX64-NEXT: popq %rsp +; LINUXOSX64-NEXT: popq %rcx ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 ; LINUXOSX64-NEXT: retq %b = add i16 %a, 1 @@ -228,34 +219,31 @@ define dso_local x86_regcallcc i32 @test_CallargReti32(i32 %a) { ; X32-LABEL: test_CallargReti32: ; X32: # %bb.0: -; X32-NEXT: pushl %esp ; X32-NEXT: incl %eax ; X32-NEXT: calll _test_argReti32 ; X32-NEXT: incl %eax -; X32-NEXT: popl %esp ; X32-NEXT: retl ; ; WIN64-LABEL: test_CallargReti32: ; WIN64: # %bb.0: -; WIN64-NEXT: pushq %rsp -; WIN64-NEXT: .seh_pushreg %rsp +; WIN64-NEXT: pushq %rax +; WIN64-NEXT: .seh_stackalloc 8 ; WIN64-NEXT: .seh_endprologue ; WIN64-NEXT: incl %eax ; WIN64-NEXT: callq test_argReti32 ; WIN64-NEXT: incl %eax -; WIN64-NEXT: popq %rsp +; WIN64-NEXT: popq %rcx ; WIN64-NEXT: retq ; WIN64-NEXT: .seh_endproc ; ; LINUXOSX64-LABEL: test_CallargReti32: ; LINUXOSX64: # %bb.0: -; LINUXOSX64-NEXT: pushq %rsp +; LINUXOSX64-NEXT: pushq %rax ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 -; LINUXOSX64-NEXT: .cfi_offset %rsp, -16 ; LINUXOSX64-NEXT: incl %eax ; LINUXOSX64-NEXT: callq test_argReti32 ; LINUXOSX64-NEXT: incl %eax -; LINUXOSX64-NEXT: popq %rsp +; LINUXOSX64-NEXT: popq %rcx ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 ; LINUXOSX64-NEXT: retq %b = add i32 %a, 1 @@ -291,36 +279,33 @@ define dso_local x86_regcallcc i64 @test_CallargReti64(i64 %a) { ; X32-LABEL: test_CallargReti64: ; X32: # %bb.0: -; X32-NEXT: pushl %esp ; X32-NEXT: addl $1, %eax ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: calll _test_argReti64 ; X32-NEXT: addl $1, %eax ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: popl %esp ; X32-NEXT: retl ; ; WIN64-LABEL: test_CallargReti64: ; WIN64: # %bb.0: -; WIN64-NEXT: pushq %rsp -; WIN64-NEXT: .seh_pushreg %rsp +; WIN64-NEXT: pushq %rax +; WIN64-NEXT: .seh_stackalloc 8 ; WIN64-NEXT: .seh_endprologue ; WIN64-NEXT: incq %rax ; WIN64-NEXT: callq test_argReti64 ; WIN64-NEXT: incq %rax -; WIN64-NEXT: popq %rsp +; WIN64-NEXT: popq %rcx ; WIN64-NEXT: retq ; WIN64-NEXT: .seh_endproc ; ; LINUXOSX64-LABEL: test_CallargReti64: ; LINUXOSX64: # %bb.0: -; LINUXOSX64-NEXT: pushq %rsp +; LINUXOSX64-NEXT: pushq %rax ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 -; LINUXOSX64-NEXT: .cfi_offset %rsp, -16 ; LINUXOSX64-NEXT: incq %rax ; LINUXOSX64-NEXT: callq test_argReti64 ; LINUXOSX64-NEXT: incq %rax -; LINUXOSX64-NEXT: popq %rsp +; LINUXOSX64-NEXT: popq %rcx ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 ; LINUXOSX64-NEXT: retq %b = add i64 %a, 1 @@ -353,24 +338,20 @@ define dso_local x86_regcallcc float @test_CallargRetFloat(float %a) { ; X32-LABEL: test_CallargRetFloat: ; X32: # %bb.0: -; X32-NEXT: pushl %esp -; X32-NEXT: subl $24, %esp +; X32-NEXT: subl $28, %esp ; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill ; X32-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero ; X32-NEXT: vaddss %xmm4, %xmm0, %xmm0 ; X32-NEXT: calll _test_argRetFloat ; X32-NEXT: vaddss %xmm4, %xmm0, %xmm0 ; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload -; X32-NEXT: addl $24, %esp -; X32-NEXT: popl %esp +; X32-NEXT: addl $28, %esp ; X32-NEXT: retl ; ; WIN64-LABEL: test_CallargRetFloat: ; WIN64: # %bb.0: -; WIN64-NEXT: pushq %rsp -; WIN64-NEXT: .seh_pushreg %rsp -; WIN64-NEXT: subq $16, %rsp -; WIN64-NEXT: .seh_stackalloc 16 +; WIN64-NEXT: subq $24, %rsp +; WIN64-NEXT: .seh_stackalloc 24 ; WIN64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill ; WIN64-NEXT: .seh_savexmm %xmm8, 0 ; WIN64-NEXT: .seh_endprologue @@ -379,28 +360,22 @@ ; WIN64-NEXT: callq test_argRetFloat ; WIN64-NEXT: vaddss %xmm0, %xmm8, %xmm0 ; WIN64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload -; WIN64-NEXT: addq $16, %rsp -; WIN64-NEXT: popq %rsp +; WIN64-NEXT: addq $24, %rsp ; WIN64-NEXT: retq ; WIN64-NEXT: .seh_endproc ; ; LINUXOSX64-LABEL: test_CallargRetFloat: ; LINUXOSX64: # %bb.0: -; LINUXOSX64-NEXT: pushq %rsp -; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 -; LINUXOSX64-NEXT: subq $16, %rsp +; LINUXOSX64-NEXT: subq $24, %rsp ; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 32 -; LINUXOSX64-NEXT: .cfi_offset %rsp, -16 ; LINUXOSX64-NEXT: .cfi_offset %xmm8, -32 ; LINUXOSX64-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero ; LINUXOSX64-NEXT: vaddss %xmm0, %xmm8, %xmm0 ; LINUXOSX64-NEXT: callq test_argRetFloat ; LINUXOSX64-NEXT: vaddss %xmm0, %xmm8, %xmm0 ; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload -; LINUXOSX64-NEXT: addq $16, %rsp -; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 -; LINUXOSX64-NEXT: popq %rsp +; LINUXOSX64-NEXT: addq $24, %rsp ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 ; LINUXOSX64-NEXT: retq %b = fadd float 1.0, %a @@ -433,24 +408,20 @@ define dso_local x86_regcallcc double @test_CallargRetDouble(double %a) { ; X32-LABEL: test_CallargRetDouble: ; X32: # %bb.0: -; X32-NEXT: pushl %esp -; X32-NEXT: subl $24, %esp +; X32-NEXT: subl $28, %esp ; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill ; X32-NEXT: vmovsd {{.*#+}} xmm4 = mem[0],zero ; X32-NEXT: vaddsd %xmm4, %xmm0, %xmm0 ; X32-NEXT: calll _test_argRetDouble ; X32-NEXT: vaddsd %xmm4, %xmm0, %xmm0 ; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload -; X32-NEXT: addl $24, %esp -; X32-NEXT: popl %esp +; X32-NEXT: addl $28, %esp ; X32-NEXT: retl ; ; WIN64-LABEL: test_CallargRetDouble: ; WIN64: # %bb.0: -; WIN64-NEXT: pushq %rsp -; WIN64-NEXT: .seh_pushreg %rsp -; WIN64-NEXT: subq $16, %rsp -; WIN64-NEXT: .seh_stackalloc 16 +; WIN64-NEXT: subq $24, %rsp +; WIN64-NEXT: .seh_stackalloc 24 ; WIN64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill ; WIN64-NEXT: .seh_savexmm %xmm8, 0 ; WIN64-NEXT: .seh_endprologue @@ -459,28 +430,22 @@ ; WIN64-NEXT: callq test_argRetDouble ; WIN64-NEXT: vaddsd %xmm0, %xmm8, %xmm0 ; WIN64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload -; WIN64-NEXT: addq $16, %rsp -; WIN64-NEXT: popq %rsp +; WIN64-NEXT: addq $24, %rsp ; WIN64-NEXT: retq ; WIN64-NEXT: .seh_endproc ; ; LINUXOSX64-LABEL: test_CallargRetDouble: ; LINUXOSX64: # %bb.0: -; LINUXOSX64-NEXT: pushq %rsp -; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 -; LINUXOSX64-NEXT: subq $16, %rsp +; LINUXOSX64-NEXT: subq $24, %rsp ; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 32 -; LINUXOSX64-NEXT: .cfi_offset %rsp, -16 ; LINUXOSX64-NEXT: .cfi_offset %xmm8, -32 ; LINUXOSX64-NEXT: vmovsd {{.*#+}} xmm8 = mem[0],zero ; LINUXOSX64-NEXT: vaddsd %xmm0, %xmm8, %xmm0 ; LINUXOSX64-NEXT: callq test_argRetDouble ; LINUXOSX64-NEXT: vaddsd %xmm0, %xmm8, %xmm0 ; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload -; LINUXOSX64-NEXT: addq $16, %rsp -; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 -; LINUXOSX64-NEXT: popq %rsp +; LINUXOSX64-NEXT: addq $24, %rsp ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 ; LINUXOSX64-NEXT: retq %b = fadd double 1.0, %a @@ -544,34 +509,31 @@ define x86_regcallcc x86_fp80 @test_CallargRetf80(x86_fp80 %a) { ; X32-LABEL: test_CallargRetf80: ; X32: # %bb.0: -; X32-NEXT: pushl %esp ; X32-NEXT: fadd %st, %st(0) ; X32-NEXT: calll _test_argRetf80 ; X32-NEXT: fadd %st, %st(0) -; X32-NEXT: popl %esp ; X32-NEXT: retl ; ; WIN64-LABEL: test_CallargRetf80: ; WIN64: # %bb.0: -; WIN64-NEXT: pushq %rsp -; WIN64-NEXT: .seh_pushreg %rsp +; WIN64-NEXT: pushq %rax +; WIN64-NEXT: .seh_stackalloc 8 ; WIN64-NEXT: .seh_endprologue ; WIN64-NEXT: fadd %st, %st(0) ; WIN64-NEXT: callq test_argRetf80 ; WIN64-NEXT: fadd %st, %st(0) -; WIN64-NEXT: popq %rsp +; WIN64-NEXT: popq %rax ; WIN64-NEXT: retq ; WIN64-NEXT: .seh_endproc ; ; LINUXOSX64-LABEL: test_CallargRetf80: ; LINUXOSX64: # %bb.0: -; LINUXOSX64-NEXT: pushq %rsp +; LINUXOSX64-NEXT: pushq %rax ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 -; LINUXOSX64-NEXT: .cfi_offset %rsp, -16 ; LINUXOSX64-NEXT: fadd %st, %st(0) ; LINUXOSX64-NEXT: callq test_argRetf80 ; LINUXOSX64-NEXT: fadd %st, %st(0) -; LINUXOSX64-NEXT: popq %rsp +; LINUXOSX64-NEXT: popq %rax ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 ; LINUXOSX64-NEXT: retq %b = fadd x86_fp80 %a, %a @@ -583,34 +545,31 @@ define dso_local x86_regcallcc double @test_CallargParamf80(x86_fp80 %a) { ; X32-LABEL: test_CallargParamf80: ; X32: # %bb.0: -; X32-NEXT: pushl %esp ; X32-NEXT: fadd %st, %st(0) ; X32-NEXT: calll _test_argParamf80 ; X32-NEXT: vaddsd %xmm0, %xmm0, %xmm0 -; X32-NEXT: popl %esp ; X32-NEXT: retl ; ; WIN64-LABEL: test_CallargParamf80: ; WIN64: # %bb.0: -; WIN64-NEXT: pushq %rsp -; WIN64-NEXT: .seh_pushreg %rsp +; WIN64-NEXT: pushq %rax +; WIN64-NEXT: .seh_stackalloc 8 ; WIN64-NEXT: .seh_endprologue ; WIN64-NEXT: fadd %st, %st(0) ; WIN64-NEXT: callq test_argParamf80 ; WIN64-NEXT: vaddsd %xmm0, %xmm0, %xmm0 -; WIN64-NEXT: popq %rsp +; WIN64-NEXT: popq %rax ; WIN64-NEXT: retq ; WIN64-NEXT: .seh_endproc ; ; LINUXOSX64-LABEL: test_CallargParamf80: ; LINUXOSX64: # %bb.0: -; LINUXOSX64-NEXT: pushq %rsp +; LINUXOSX64-NEXT: pushq %rax ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 -; LINUXOSX64-NEXT: .cfi_offset %rsp, -16 ; LINUXOSX64-NEXT: fadd %st, %st(0) ; LINUXOSX64-NEXT: callq test_argParamf80 ; LINUXOSX64-NEXT: vaddsd %xmm0, %xmm0, %xmm0 -; LINUXOSX64-NEXT: popq %rsp +; LINUXOSX64-NEXT: popq %rax ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 ; LINUXOSX64-NEXT: retq %b = fadd x86_fp80 %a, %a @@ -645,34 +604,31 @@ define dso_local x86_regcallcc [4 x i32]* @test_CallargRetPointer([4 x i32]* %a) { ; X32-LABEL: test_CallargRetPointer: ; X32: # %bb.0: -; X32-NEXT: pushl %esp ; X32-NEXT: incl %eax ; X32-NEXT: calll _test_argRetPointer ; X32-NEXT: incl %eax -; X32-NEXT: popl %esp ; X32-NEXT: retl ; ; WIN64-LABEL: test_CallargRetPointer: ; WIN64: # %bb.0: -; WIN64-NEXT: pushq %rsp -; WIN64-NEXT: .seh_pushreg %rsp +; WIN64-NEXT: pushq %rax +; WIN64-NEXT: .seh_stackalloc 8 ; WIN64-NEXT: .seh_endprologue ; WIN64-NEXT: incl %eax ; WIN64-NEXT: callq test_argRetPointer ; WIN64-NEXT: incl %eax -; WIN64-NEXT: popq %rsp +; WIN64-NEXT: popq %rcx ; WIN64-NEXT: retq ; WIN64-NEXT: .seh_endproc ; ; LINUXOSX64-LABEL: test_CallargRetPointer: ; LINUXOSX64: # %bb.0: -; LINUXOSX64-NEXT: pushq %rsp +; LINUXOSX64-NEXT: pushq %rax ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 -; LINUXOSX64-NEXT: .cfi_offset %rsp, -16 ; LINUXOSX64-NEXT: incl %eax ; LINUXOSX64-NEXT: callq test_argRetPointer ; LINUXOSX64-NEXT: incl %eax -; LINUXOSX64-NEXT: popq %rsp +; LINUXOSX64-NEXT: popq %rcx ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 ; LINUXOSX64-NEXT: retq %b = ptrtoint [4 x i32]* %a to i32 @@ -715,8 +671,7 @@ define dso_local x86_regcallcc <4 x i32> @test_CallargRet128Vector(<4 x i1> %x, <4 x i32> %a) { ; X32-LABEL: test_CallargRet128Vector: ; X32: # %bb.0: -; X32-NEXT: pushl %esp -; X32-NEXT: subl $40, %esp +; X32-NEXT: subl $44, %esp ; X32-NEXT: vmovups %xmm4, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; X32-NEXT: vmovdqa %xmm1, %xmm4 ; X32-NEXT: vpslld $31, %xmm0, %xmm1 @@ -728,16 +683,13 @@ ; X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 # 2-byte Reload ; X32-NEXT: vmovdqa32 %xmm4, %xmm0 {%k1} ; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm4 # 16-byte Reload -; X32-NEXT: addl $40, %esp -; X32-NEXT: popl %esp +; X32-NEXT: addl $44, %esp ; X32-NEXT: retl ; ; WIN64-LABEL: test_CallargRet128Vector: ; WIN64: # %bb.0: -; WIN64-NEXT: pushq %rsp -; WIN64-NEXT: .seh_pushreg %rsp -; WIN64-NEXT: subq $32, %rsp -; WIN64-NEXT: .seh_stackalloc 32 +; WIN64-NEXT: subq $40, %rsp +; WIN64-NEXT: .seh_stackalloc 40 ; WIN64-NEXT: vmovaps %xmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; WIN64-NEXT: .seh_savexmm %xmm8, 16 ; WIN64-NEXT: .seh_endprologue @@ -751,19 +703,15 @@ ; WIN64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload ; WIN64-NEXT: vmovdqa32 %xmm8, %xmm0 {%k1} ; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm8 # 16-byte Reload -; WIN64-NEXT: addq $32, %rsp -; WIN64-NEXT: popq %rsp +; WIN64-NEXT: addq $40, %rsp ; WIN64-NEXT: retq ; WIN64-NEXT: .seh_endproc ; ; LINUXOSX64-LABEL: test_CallargRet128Vector: ; LINUXOSX64: # %bb.0: -; LINUXOSX64-NEXT: pushq %rsp -; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 -; LINUXOSX64-NEXT: subq $32, %rsp +; LINUXOSX64-NEXT: subq $40, %rsp ; LINUXOSX64-NEXT: vmovaps %xmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 48 -; LINUXOSX64-NEXT: .cfi_offset %rsp, -16 ; LINUXOSX64-NEXT: .cfi_offset %xmm8, -32 ; LINUXOSX64-NEXT: vmovdqa %xmm1, %xmm8 ; LINUXOSX64-NEXT: vpslld $31, %xmm0, %xmm1 @@ -775,9 +723,7 @@ ; LINUXOSX64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload ; LINUXOSX64-NEXT: vmovdqa32 %xmm8, %xmm0 {%k1} ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm8 # 16-byte Reload -; LINUXOSX64-NEXT: addq $32, %rsp -; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 -; LINUXOSX64-NEXT: popq %rsp +; LINUXOSX64-NEXT: addq $40, %rsp ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 ; LINUXOSX64-NEXT: retq %b = call x86_regcallcc <4 x i32> @test_argRet128Vector(<4 x i1> %x, <4 x i32> %a, <4 x i32> %a) @@ -812,8 +758,7 @@ define dso_local x86_regcallcc <8 x i32> @test_CallargRet256Vector(<8 x i1> %x, <8 x i32> %a) { ; X32-LABEL: test_CallargRet256Vector: ; X32: # %bb.0: -; X32-NEXT: pushl %esp -; X32-NEXT: subl $88, %esp +; X32-NEXT: subl $92, %esp ; X32-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%e{{[sb]}}p) # 32-byte Spill ; X32-NEXT: kmovd %eax, %k1 ; X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill @@ -822,16 +767,13 @@ ; X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 # 2-byte Reload ; X32-NEXT: vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %ymm1 # 32-byte Reload ; X32-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1} -; X32-NEXT: addl $88, %esp -; X32-NEXT: popl %esp +; X32-NEXT: addl $92, %esp ; X32-NEXT: retl ; ; WIN64-LABEL: test_CallargRet256Vector: ; WIN64: # %bb.0: -; WIN64-NEXT: pushq %rsp -; WIN64-NEXT: .seh_pushreg %rsp -; WIN64-NEXT: subq $80, %rsp -; WIN64-NEXT: .seh_stackalloc 80 +; WIN64-NEXT: subq $88, %rsp +; WIN64-NEXT: .seh_stackalloc 88 ; WIN64-NEXT: .seh_endprologue ; WIN64-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; WIN64-NEXT: kmovd %eax, %k1 @@ -841,18 +783,14 @@ ; WIN64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload ; WIN64-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload ; WIN64-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1} -; WIN64-NEXT: addq $80, %rsp -; WIN64-NEXT: popq %rsp +; WIN64-NEXT: addq $88, %rsp ; WIN64-NEXT: retq ; WIN64-NEXT: .seh_endproc ; ; LINUXOSX64-LABEL: test_CallargRet256Vector: ; LINUXOSX64: # %bb.0: -; LINUXOSX64-NEXT: pushq %rsp -; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 -; LINUXOSX64-NEXT: subq $80, %rsp +; LINUXOSX64-NEXT: subq $88, %rsp ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 96 -; LINUXOSX64-NEXT: .cfi_offset %rsp, -16 ; LINUXOSX64-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; LINUXOSX64-NEXT: kmovd %eax, %k1 ; LINUXOSX64-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill @@ -861,9 +799,7 @@ ; LINUXOSX64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload ; LINUXOSX64-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload ; LINUXOSX64-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1} -; LINUXOSX64-NEXT: addq $80, %rsp -; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 -; LINUXOSX64-NEXT: popq %rsp +; LINUXOSX64-NEXT: addq $88, %rsp ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 ; LINUXOSX64-NEXT: retq %b = call x86_regcallcc <8 x i32> @test_argRet256Vector(<8 x i1> %x, <8 x i32> %a, <8 x i32> %a) @@ -898,8 +834,7 @@ define dso_local x86_regcallcc <16 x i32> @test_CallargRet512Vector(<16 x i1> %x, <16 x i32> %a) { ; X32-LABEL: test_CallargRet512Vector: ; X32: # %bb.0: -; X32-NEXT: pushl %esp -; X32-NEXT: subl $184, %esp +; X32-NEXT: subl $188, %esp ; X32-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 64-byte Spill ; X32-NEXT: kmovd %eax, %k1 ; X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill @@ -908,16 +843,13 @@ ; X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 # 2-byte Reload ; X32-NEXT: vmovdqu64 {{[-0-9]+}}(%e{{[sb]}}p), %zmm1 # 64-byte Reload ; X32-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} -; X32-NEXT: addl $184, %esp -; X32-NEXT: popl %esp +; X32-NEXT: addl $188, %esp ; X32-NEXT: retl ; ; WIN64-LABEL: test_CallargRet512Vector: ; WIN64: # %bb.0: -; WIN64-NEXT: pushq %rsp -; WIN64-NEXT: .seh_pushreg %rsp -; WIN64-NEXT: subq $176, %rsp -; WIN64-NEXT: .seh_stackalloc 176 +; WIN64-NEXT: subq $184, %rsp +; WIN64-NEXT: .seh_stackalloc 184 ; WIN64-NEXT: .seh_endprologue ; WIN64-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill ; WIN64-NEXT: kmovd %eax, %k1 @@ -927,18 +859,14 @@ ; WIN64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload ; WIN64-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload ; WIN64-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} -; WIN64-NEXT: addq $176, %rsp -; WIN64-NEXT: popq %rsp +; WIN64-NEXT: addq $184, %rsp ; WIN64-NEXT: retq ; WIN64-NEXT: .seh_endproc ; ; LINUXOSX64-LABEL: test_CallargRet512Vector: ; LINUXOSX64: # %bb.0: -; LINUXOSX64-NEXT: pushq %rsp -; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 -; LINUXOSX64-NEXT: subq $176, %rsp +; LINUXOSX64-NEXT: subq $184, %rsp ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 192 -; LINUXOSX64-NEXT: .cfi_offset %rsp, -16 ; LINUXOSX64-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill ; LINUXOSX64-NEXT: kmovd %eax, %k1 ; LINUXOSX64-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill @@ -947,9 +875,7 @@ ; LINUXOSX64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload ; LINUXOSX64-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload ; LINUXOSX64-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} -; LINUXOSX64-NEXT: addq $176, %rsp -; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 -; LINUXOSX64-NEXT: popq %rsp +; LINUXOSX64-NEXT: addq $184, %rsp ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 ; LINUXOSX64-NEXT: retq %b = call x86_regcallcc <16 x i32> @test_argRet512Vector(<16 x i1> %x, <16 x i32> %a, <16 x i32> %a) diff --git a/llvm/test/CodeGen/X86/dynamic-regmask.ll b/llvm/test/CodeGen/X86/dynamic-regmask.ll --- a/llvm/test/CodeGen/X86/dynamic-regmask.ll +++ b/llvm/test/CodeGen/X86/dynamic-regmask.ll @@ -11,7 +11,7 @@ ret i32 %b2 } ; CHECK: name: caller -; CHECK: CALL64pcrel32 @callee, CustomRegMask($bh,$bl,$bp,$bph,$bpl,$bx,$ebp,$ebx,$esp,$hbp,$hbx,$hsp,$rbp,$rbx,$rsp,$sp,$sph,$spl,$r10,$r11,$r12,$r13,$r14,$r15,$xmm8,$xmm9,$xmm10,$xmm11,$xmm12,$xmm13,$xmm14,$xmm15,$r10b,$r11b,$r12b,$r13b,$r14b,$r15b,$r10bh,$r11bh,$r12bh,$r13bh,$r14bh,$r15bh,$r10d,$r11d,$r12d,$r13d,$r14d,$r15d,$r10w,$r11w,$r12w,$r13w,$r14w,$r15w,$r10wh,$r11wh,$r12wh,$r13wh,$r14wh,$r15wh), implicit $rsp, implicit $ssp, implicit $eax, implicit $ecx, implicit $edx, implicit $edi, implicit $esi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax +; CHECK: CALL64pcrel32 @callee, CustomRegMask($bh,$bl,$bp,$bph,$bpl,$bx,$ebp,$ebx,$hbp,$hbx,$rbp,$rbx,$r10,$r11,$r12,$r13,$r14,$r15,$xmm8,$xmm9,$xmm10,$xmm11,$xmm12,$xmm13,$xmm14,$xmm15,$r10b,$r11b,$r12b,$r13b,$r14b,$r15b,$r10bh,$r11bh,$r12bh,$r13bh,$r14bh,$r15bh,$r10d,$r11d,$r12d,$r13d,$r14d,$r15d,$r10w,$r11w,$r12w,$r13w,$r14w,$r15w,$r10wh,$r11wh,$r12wh,$r13wh,$r14wh,$r15wh), implicit $rsp, implicit $ssp, implicit $eax, implicit $ecx, implicit $edx, implicit $edi, implicit $esi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax ; CHECK: RET 0, $eax define x86_regcallcc {i32, i32, i32} @test_callee(i32 %a0, i32 %b0, i32 %c0, i32 %d0, i32 %e0) nounwind { @@ -24,7 +24,7 @@ ret {i32, i32, i32} %b6 } ; CHECK: name: test_callee -; CHECK: calleeSavedRegisters: [ '$rbx', '$rbp', '$rsp', '$r10', '$r11', '$r12', -; CHECK: '$r13', '$r14', '$r15', '$xmm8', '$xmm9', '$xmm10', -; CHECK: '$xmm11', '$xmm12', '$xmm13', '$xmm14', '$xmm15' ] +; CHECK: calleeSavedRegisters: [ '$rbx', '$rbp', '$r10', '$r11', '$r12', '$r13', +; CHECK: '$r14', '$r15', '$xmm8', '$xmm9', '$xmm10', '$xmm11', +; CHECK: '$xmm12', '$xmm13', '$xmm14', '$xmm15' ] ; CHECK: RET 0, $eax, $ecx, $edx diff --git a/llvm/test/CodeGen/X86/ipra-reg-usage.ll b/llvm/test/CodeGen/X86/ipra-reg-usage.ll --- a/llvm/test/CodeGen/X86/ipra-reg-usage.ll +++ b/llvm/test/CodeGen/X86/ipra-reg-usage.ll @@ -3,7 +3,7 @@ target triple = "x86_64-unknown-unknown" declare void @bar1() define preserve_allcc void @foo()#0 { -; CHECK: foo Clobbered Registers: $cs $df $ds $eflags $eip $eiz $es $fpcw $fpsw $fs $gs $hip $ip $mxcsr $rip $riz $ss $ssp $tmmcfg $bnd0 $bnd1 $bnd2 $bnd3 $cr0 $cr1 $cr2 $cr3 $cr4 $cr5 $cr6 $cr7 $cr8 $cr9 $cr10 $cr11 $cr12 $cr13 $cr14 $cr15 $dr0 $dr1 $dr2 $dr3 $dr4 $dr5 $dr6 $dr7 $dr8 $dr9 $dr10 $dr11 $dr12 $dr13 $dr14 $dr15 $fp0 $fp1 $fp2 $fp3 $fp4 $fp5 $fp6 $fp7 $k0 $k1 $k2 $k3 $k4 $k5 $k6 $k7 $mm0 $mm1 $mm2 $mm3 $mm4 $mm5 $mm6 $mm7 $r11 $st0 $st1 $st2 $st3 $st4 $st5 $st6 $st7 $tmm0 $tmm1 $tmm2 $tmm3 $tmm4 $tmm5 $tmm6 $tmm7 $xmm16 $xmm17 $xmm18 $xmm19 $xmm20 $xmm21 $xmm22 $xmm23 $xmm24 $xmm25 $xmm26 $xmm27 $xmm28 $xmm29 $xmm30 $xmm31 $ymm0 $ymm1 $ymm2 $ymm3 $ymm4 $ymm5 $ymm6 $ymm7 $ymm8 $ymm9 $ymm10 $ymm11 $ymm12 $ymm13 $ymm14 $ymm15 $ymm16 $ymm17 $ymm18 $ymm19 $ymm20 $ymm21 $ymm22 $ymm23 $ymm24 $ymm25 $ymm26 $ymm27 $ymm28 $ymm29 $ymm30 $ymm31 $zmm0 $zmm1 $zmm2 $zmm3 $zmm4 $zmm5 $zmm6 $zmm7 $zmm8 $zmm9 $zmm10 $zmm11 $zmm12 $zmm13 $zmm14 $zmm15 $zmm16 $zmm17 $zmm18 $zmm19 $zmm20 $zmm21 $zmm22 $zmm23 $zmm24 $zmm25 $zmm26 $zmm27 $zmm28 $zmm29 $zmm30 $zmm31 $r11b $r11bh $r11d $r11w $r11wh $k0_k1 $k2_k3 $k4_k5 $k6_k7 +; CHECK: foo Clobbered Registers: $cs $df $ds $eflags $eip $eiz $es $esp $fpcw $fpsw $fs $gs $hip $hsp $ip $mxcsr $rip $riz $rsp $sp $sph $spl $ss $ssp $tmmcfg $bnd0 $bnd1 $bnd2 $bnd3 $cr0 $cr1 $cr2 $cr3 $cr4 $cr5 $cr6 $cr7 $cr8 $cr9 $cr10 $cr11 $cr12 $cr13 $cr14 $cr15 $dr0 $dr1 $dr2 $dr3 $dr4 $dr5 $dr6 $dr7 $dr8 $dr9 $dr10 $dr11 $dr12 $dr13 $dr14 $dr15 $fp0 $fp1 $fp2 $fp3 $fp4 $fp5 $fp6 $fp7 $k0 $k1 $k2 $k3 $k4 $k5 $k6 $k7 $mm0 $mm1 $mm2 $mm3 $mm4 $mm5 $mm6 $mm7 $r11 $st0 $st1 $st2 $st3 $st4 $st5 $st6 $st7 $tmm0 $tmm1 $tmm2 $tmm3 $tmm4 $tmm5 $tmm6 $tmm7 $xmm16 $xmm17 $xmm18 $xmm19 $xmm20 $xmm21 $xmm22 $xmm23 $xmm24 $xmm25 $xmm26 $xmm27 $xmm28 $xmm29 $xmm30 $xmm31 $ymm0 $ymm1 $ymm2 $ymm3 $ymm4 $ymm5 $ymm6 $ymm7 $ymm8 $ymm9 $ymm10 $ymm11 $ymm12 $ymm13 $ymm14 $ymm15 $ymm16 $ymm17 $ymm18 $ymm19 $ymm20 $ymm21 $ymm22 $ymm23 $ymm24 $ymm25 $ymm26 $ymm27 $ymm28 $ymm29 $ymm30 $ymm31 $zmm0 $zmm1 $zmm2 $zmm3 $zmm4 $zmm5 $zmm6 $zmm7 $zmm8 $zmm9 $zmm10 $zmm11 $zmm12 $zmm13 $zmm14 $zmm15 $zmm16 $zmm17 $zmm18 $zmm19 $zmm20 $zmm21 $zmm22 $zmm23 $zmm24 $zmm25 $zmm26 $zmm27 $zmm28 $zmm29 $zmm30 $zmm31 $r11b $r11bh $r11d $r11w $r11wh $k0_k1 $k2_k3 $k4_k5 $k6_k7 call void @bar1() call void @bar2() ret void