diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -366,8 +366,9 @@ apply for values returned in callee-saved registers. - On X86-64 the callee preserves all general purpose registers, except for - R11. R11 can be used as a scratch register. Floating-point registers - (XMMs/YMMs) are not preserved and need to be saved by the caller. + R11 and return registers, if any. R11 can be used as a scratch register. + Floating-point registers (XMMs/YMMs) are not preserved and need to be + saved by the caller. The idea behind this convention is to support calls to runtime functions that have a hot path and a cold path. The hot path is usually a small piece diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td --- a/llvm/lib/Target/X86/X86CallingConv.td +++ b/llvm/lib/Target/X86/X86CallingConv.td @@ -1154,11 +1154,11 @@ // CSRs that are handled explicitly via copies. def CSR_64_CXX_TLS_Darwin_ViaCopy : CalleeSavedRegs<(sub CSR_64_TLS_Darwin, RBP)>; -// All GPRs - except r11 +// All GPRs - except r11 and return registers. def CSR_64_RT_MostRegs : CalleeSavedRegs<(add CSR_64, RAX, RCX, RDX, RSI, RDI, R8, R9, R10)>; -// All registers - except r11 +// All registers - except r11 and return registers. def CSR_64_RT_AllRegs : CalleeSavedRegs<(add CSR_64_RT_MostRegs, (sequence "XMM%u", 0, 15))>; def CSR_64_RT_AllRegs_AVX : CalleeSavedRegs<(add CSR_64_RT_MostRegs, diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -104,6 +104,20 @@ DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc())); } +/// Returns true if a CC can dynamically exclude a register from the list of +/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on +/// params/returns. +static bool shouldDisableCalleeSavedRegisterCC(CallingConv::ID CC) { + switch (CC) { + default: + return false; + case CallingConv::X86_RegCall: + case CallingConv::PreserveMost: + case CallingConv::PreserveAll: + return true; + } +} + X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, const X86Subtarget &STI) : TargetLowering(TM), Subtarget(STI) { @@ -3123,7 +3137,7 @@ // In some cases we need to disable registers from the default CSR list. // For example, when they are used for argument passing. bool ShouldDisableCalleeSavedRegister = - CallConv == CallingConv::X86_RegCall || + shouldDisableCalleeSavedRegisterCC(CallConv) || MF.getFunction().hasFnAttribute("no_caller_saved_registers"); if (CallConv == CallingConv::X86_INTR && !Outs.empty()) @@ -4275,7 +4289,7 @@ } } - if (CallConv == CallingConv::X86_RegCall || + if (shouldDisableCalleeSavedRegisterCC(CallConv) || F.hasFnAttribute("no_caller_saved_registers")) { MachineRegisterInfo &MRI = MF.getRegInfo(); for (std::pair Pair : MRI.liveins()) @@ -4836,7 +4850,7 @@ // In some calling conventions we need to remove the used physical registers // from the reg mask. - if (CallConv == CallingConv::X86_RegCall || HasNCSR) { + if (shouldDisableCalleeSavedRegisterCC(CallConv) || HasNCSR) { const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); // Allocate a new Reg Mask and copy Mask. diff --git a/llvm/test/CodeGen/X86/preserve_allcc64.ll b/llvm/test/CodeGen/X86/preserve_allcc64.ll --- a/llvm/test/CodeGen/X86/preserve_allcc64.ll +++ b/llvm/test/CodeGen/X86/preserve_allcc64.ll @@ -1,82 +1,153 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck --check-prefix=SSE %s -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck --check-prefix=AVX %s +; RUN: sed -e "s/RETTYPE/void/;s/RETVAL//" %s | llc -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck --check-prefixes=ALL,SSE,VOID %s +; RUN: sed -e "s/RETTYPE/i32/;s/RETVAL/undef/" %s | llc -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck --check-prefixes=ALL,SSE,INT %s +; RUN: sed -e "s/RETTYPE/\{i64\,i64\}/;s/RETVAL/undef/" %s | llc -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck --check-prefixes=ALL,SSE,INT128 %s +; +; RUN: sed -e "s/RETTYPE/void/;s/RETVAL//" %s | llc -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck --check-prefixes=ALL,AVX,VOID %s +; RUN: sed -e "s/RETTYPE/i32/;s/RETVAL/undef/" %s | llc -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck --check-prefixes=ALL,AVX,INT %s +; RUN: sed -e "s/RETTYPE/\{i64\,i64\}/;s/RETVAL/undef/" %s | llc -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck --check-prefixes=ALL,AVX,INT128 %s -define preserve_allcc void @preserve_allcc1() nounwind { +define preserve_allcc RETTYPE @preserve_allcc1() nounwind { entry: -;SSE-LABEL: preserve_allcc1 -;SSE: pushq %r10 -;SSE-NEXT: pushq %r9 -;SSE-NEXT: pushq %r8 -;SSE-NEXT: pushq %rdi -;SSE-NEXT: pushq %rsi -;SSE-NEXT: pushq %rdx -;SSE-NEXT: pushq %rcx -;SSE-NEXT: pushq %rax -;SSE-NEXT: pushq %rbp -;SSE-NEXT: pushq %r15 -;SSE-NEXT: pushq %r14 -;SSE-NEXT: pushq %r13 -;SSE-NEXT: pushq %r12 -;SSE-NEXT: pushq %rbx -;SSE: movaps %xmm15 -;SSE-NEXT: movaps %xmm14 -;SSE-NEXT: movaps %xmm13 -;SSE-NEXT: movaps %xmm12 -;SSE-NEXT: movaps %xmm11 -;SSE-NEXT: movaps %xmm10 -;SSE-NEXT: movaps %xmm9 -;SSE-NEXT: movaps %xmm8 -;SSE-NEXT: movaps %xmm7 -;SSE-NEXT: movaps %xmm6 -;SSE-NEXT: movaps %xmm5 -;SSE-NEXT: movaps %xmm4 -;SSE-NEXT: movaps %xmm3 -;SSE-NEXT: movaps %xmm2 -;SSE-NEXT: movaps %xmm1 -;SSE-NEXT: movaps %xmm0 -;AVX-LABEL: preserve_allcc1 -;AVX: pushq %r10 -;AVX-NEXT: pushq %r9 -;AVX-NEXT: pushq %r8 -;AVX-NEXT: pushq %rdi -;AVX-NEXT: pushq %rsi -;AVX-NEXT: pushq %rdx -;AVX-NEXT: pushq %rcx -;AVX-NEXT: pushq %rax -;AVX-NEXT: pushq %rbp -;AVX-NEXT: pushq %r15 -;AVX-NEXT: pushq %r14 -;AVX-NEXT: pushq %r13 -;AVX-NEXT: pushq %r12 -;AVX-NEXT: pushq %rbx -;AVX: vmovups %ymm15 -;AVX-NEXT: vmovups %ymm14 -;AVX-NEXT: vmovups %ymm13 -;AVX-NEXT: vmovups %ymm12 -;AVX-NEXT: vmovups %ymm11 -;AVX-NEXT: vmovups %ymm10 -;AVX-NEXT: vmovups %ymm9 -;AVX-NEXT: vmovups %ymm8 -;AVX-NEXT: vmovups %ymm7 -;AVX-NEXT: vmovups %ymm6 -;AVX-NEXT: vmovups %ymm5 -;AVX-NEXT: vmovups %ymm4 -;AVX-NEXT: vmovups %ymm3 -;AVX-NEXT: vmovups %ymm2 -;AVX-NEXT: vmovups %ymm1 -;AVX-NEXT: vmovups %ymm0 +;ALL-LABEL: preserve_allcc1 +;ALL: pushq %r10 +;ALL-NEXT: pushq %r9 +;ALL-NEXT: pushq %r8 +;ALL-NEXT: pushq %rdi +;ALL-NEXT: pushq %rsi +;VOID-NEXT: pushq %rdx +;INT-NEXT: pushq %rdx +;INT128-NOT: pushq %rdx +;ALL-NEXT: pushq %rcx +;VOID-NEXT: pushq %rax +;INT-NOT: pushq %rax +;INT128-NOT: pushq %rax +;ALL-NEXT: pushq %rbp +;ALL-NEXT: pushq %r15 +;ALL-NEXT: pushq %r14 +;ALL-NEXT: pushq %r13 +;ALL-NEXT: pushq %r12 +;ALL-NEXT: pushq %rbx +;SSE: movaps %xmm15 +;SSE-NEXT: movaps %xmm14 +;SSE-NEXT: movaps %xmm13 +;SSE-NEXT: movaps %xmm12 +;SSE-NEXT: movaps %xmm11 +;SSE-NEXT: movaps %xmm10 +;SSE-NEXT: movaps %xmm9 +;SSE-NEXT: movaps %xmm8 +;SSE-NEXT: movaps %xmm7 +;SSE-NEXT: movaps %xmm6 +;SSE-NEXT: movaps %xmm5 +;SSE-NEXT: movaps %xmm4 +;SSE-NEXT: movaps %xmm3 +;SSE-NEXT: movaps %xmm2 +;SSE-NEXT: movaps %xmm1 +;SSE-NEXT: movaps %xmm0 +;AVX: vmovups %ymm15 +;AVX-NEXT: vmovups %ymm14 +;AVX-NEXT: vmovups %ymm13 +;AVX-NEXT: vmovups %ymm12 +;AVX-NEXT: vmovups %ymm11 +;AVX-NEXT: vmovups %ymm10 +;AVX-NEXT: vmovups %ymm9 +;AVX-NEXT: vmovups %ymm8 +;AVX-NEXT: vmovups %ymm7 +;AVX-NEXT: vmovups %ymm6 +;AVX-NEXT: vmovups %ymm5 +;AVX-NEXT: vmovups %ymm4 +;AVX-NEXT: vmovups %ymm3 +;AVX-NEXT: vmovups %ymm2 +;AVX-NEXT: vmovups %ymm1 +;AVX-NEXT: vmovups %ymm0 +;SSE: movaps {{.*}} %xmm0 +;SSE-NEXT: movaps {{.*}} %xmm1 +;SSE-NEXT: movaps {{.*}} %xmm2 +;SSE-NEXT: movaps {{.*}} %xmm3 +;SSE-NEXT: movaps {{.*}} %xmm4 +;SSE-NEXT: movaps {{.*}} %xmm5 +;SSE-NEXT: movaps {{.*}} %xmm6 +;SSE-NEXT: movaps {{.*}} %xmm7 +;SSE-NEXT: movaps {{.*}} %xmm8 +;SSE-NEXT: movaps {{.*}} %xmm9 +;SSE-NEXT: movaps {{.*}} %xmm10 +;SSE-NEXT: movaps {{.*}} %xmm11 +;SSE-NEXT: movaps {{.*}} %xmm12 +;SSE-NEXT: movaps {{.*}} %xmm13 +;SSE-NEXT: movaps {{.*}} %xmm14 +;SSE-NEXT: movaps {{.*}} %xmm15 +;AVX: vmovups {{.*}} %ymm0 +;AVX-NEXT: vmovups {{.*}} %ymm1 +;AVX-NEXT: vmovups {{.*}} %ymm2 +;AVX-NEXT: vmovups {{.*}} %ymm3 +;AVX-NEXT: vmovups {{.*}} %ymm4 +;AVX-NEXT: vmovups {{.*}} %ymm5 +;AVX-NEXT: vmovups {{.*}} %ymm6 +;AVX-NEXT: vmovups {{.*}} %ymm7 +;AVX-NEXT: vmovups {{.*}} %ymm8 +;AVX-NEXT: vmovups {{.*}} %ymm9 +;AVX-NEXT: vmovups {{.*}} %ymm10 +;AVX-NEXT: vmovups {{.*}} %ymm11 +;AVX-NEXT: vmovups {{.*}} %ymm12 +;AVX-NEXT: vmovups {{.*}} %ymm13 +;AVX-NEXT: vmovups {{.*}} %ymm14 +;AVX-NEXT: vmovups {{.*}} %ymm15 +;ALL: popq %rbx +;ALL-NEXT: popq %r12 +;ALL-NEXT: popq %r13 +;ALL-NEXT: popq %r14 +;ALL-NEXT: popq %r15 +;ALL-NEXT: popq %rbp +;VOID-NEXT: popq %rax +;INT-NOT: popq %rax +;INT128-NOT: popq %rax +;ALL-NEXT: popq %rcx +;VOID-NEXT: popq %rdx +;INT-NEXT: popq %rdx +;INT128-NOT: popq %rdx +;ALL-NEXT: popq %rsi +;ALL-NEXT: popq %rdi +;ALL-NEXT: popq %r8 +;ALL-NEXT: popq %r9 +;ALL-NEXT: popq %r10 call void asm sideeffect "", "~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{rbp},~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15}"() - ret void + ret RETTYPE RETVAL +} + +define preserve_allcc double @preserve_allcc2() nounwind { +entry: +;ALL-LABEL: preserve_allcc2 +;SSE: movaps %xmm1 +;SSE-NOT: movaps %xmm0 +;AVX: vmovups %ymm1 +;AVX-NOT: vmovups %ymm0 +;SSE-NOT: movaps {{.*}} %xmm0 +;SSE: movaps {{.*}} %xmm1 +;AVX-NOT: vmovups {{.*}} %ymm0 +;AVX: vmovups {{.*}} %ymm1 + call void asm sideeffect "", "~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{rbp},~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15}"() + ret double 0. } -; Make sure only R11 is saved before the call -declare preserve_allcc void @bar(i64, i64, double, double) -define void @preserve_allcc2() nounwind { +; Make sure R11 and return registers are saved before the call +declare preserve_allcc RETTYPE @bar(i64, i64, double, double) +define void @preserve_allcc3() nounwind { entry: -;SSE-LABEL: preserve_allcc2 -;SSE: movq %r11, [[REG:%[a-z0-9]+]] -;SSE-NOT: movaps %xmm -;SSE: movq [[REG]], %r11 +;ALL-LABEL: preserve_allcc3 +;VOID-NOT: movq %rax, [[REG1:%[a-z0-9]+]] +;INT: movq %rax, [[REG1:%[a-z0-9]+]] +;INT128: movq %rax, [[REG1:%[a-z0-9]+]] +;VOID-NOT: movq %rdx, [[REG2:%[a-z0-9]+]] +;INT-NOT: movq %rdx, [[REG2:%[a-z0-9]+]] +;INT128: movq %rdx, [[REG2:%[a-z0-9]+]] +;ALL: movq %r11, [[REG3:%[a-z0-9]+]] +;ALL-NOT: movaps %xmm +;VOID-NOT: movq {{.*}}, %rax +;INT: movq [[REG1]], %rax +;INT128: movq [[REG1]], %rax +;VOID-NOT: movq {{.*}}, %rdx +;INT-NOT: movq {{.*}}, %rdx +;INT128: movq [[REG2]], %rdx +;ALL: movq [[REG3]], %r11 %a0 = call i64 asm sideeffect "", "={rax}"() nounwind %a1 = call i64 asm sideeffect "", "={rcx}"() nounwind %a2 = call i64 asm sideeffect "", "={rdx}"() nounwind @@ -98,7 +169,44 @@ %a21 = call <2 x double> asm sideeffect "", "={xmm13}"() nounwind %a22 = call <2 x double> asm sideeffect "", "={xmm14}"() nounwind %a23 = call <2 x double> asm sideeffect "", "={xmm15}"() nounwind - call preserve_allcc void @bar(i64 1, i64 2, double 3.0, double 4.0) + call preserve_allcc RETTYPE @bar(i64 1, i64 2, double 3.0, double 4.0) call void asm sideeffect "", "{rax},{rcx},{rdx},{r8},{r9},{r10},{r11},{xmm2},{xmm3},{xmm4},{xmm5},{xmm6},{xmm7},{xmm8},{xmm9},{xmm10},{xmm11},{xmm12},{xmm13},{xmm14},{xmm15}"(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, <2 x double> %a10, <2 x double> %a11, <2 x double> %a12, <2 x double> %a13, <2 x double> %a14, <2 x double> %a15, <2 x double> %a16, <2 x double> %a17, <2 x double> %a18, <2 x double> %a19, <2 x double> %a20, <2 x double> %a21, <2 x double> %a22, <2 x double> %a23) ret void } + +; Make sure XMM0 (return register) and R11 are saved before the call +declare preserve_allcc double @bar_double(i64, i64) +define void @preserve_allcc6() nounwind { +entry: +;SSE-LABEL: preserve_allcc6 +;SSE: movq %r11, [[REG1:%[a-z0-9]+]] +;SSE: movaps %xmm0, [[REG2:[-0-9]*\(%r[sb]p\)]] +;SSE: movq [[REG1]], %r11 +;SSE: movaps [[REG2]], %xmm0 + %a0 = call i64 asm sideeffect "", "={rax}"() nounwind + %a1 = call i64 asm sideeffect "", "={rcx}"() nounwind + %a2 = call i64 asm sideeffect "", "={rdx}"() nounwind + %a3 = call i64 asm sideeffect "", "={r8}"() nounwind + %a4 = call i64 asm sideeffect "", "={r9}"() nounwind + %a5 = call i64 asm sideeffect "", "={r10}"() nounwind + %a6 = call i64 asm sideeffect "", "={r11}"() nounwind + %a10 = call <2 x double> asm sideeffect "", "={xmm0}"() nounwind + %a11 = call <2 x double> asm sideeffect "", "={xmm1}"() nounwind + %a12 = call <2 x double> asm sideeffect "", "={xmm2}"() nounwind + %a13 = call <2 x double> asm sideeffect "", "={xmm3}"() nounwind + %a14 = call <2 x double> asm sideeffect "", "={xmm4}"() nounwind + %a15 = call <2 x double> asm sideeffect "", "={xmm5}"() nounwind + %a16 = call <2 x double> asm sideeffect "", "={xmm6}"() nounwind + %a17 = call <2 x double> asm sideeffect "", "={xmm7}"() nounwind + %a18 = call <2 x double> asm sideeffect "", "={xmm8}"() nounwind + %a19 = call <2 x double> asm sideeffect "", "={xmm9}"() nounwind + %a20 = call <2 x double> asm sideeffect "", "={xmm10}"() nounwind + %a21 = call <2 x double> asm sideeffect "", "={xmm11}"() nounwind + %a22 = call <2 x double> asm sideeffect "", "={xmm12}"() nounwind + %a23 = call <2 x double> asm sideeffect "", "={xmm13}"() nounwind + %a24 = call <2 x double> asm sideeffect "", "={xmm14}"() nounwind + %a25 = call <2 x double> asm sideeffect "", "={xmm15}"() nounwind + call preserve_allcc double @bar_double(i64 1, i64 2) + call void asm sideeffect "", "{rax},{rcx},{rdx},{r8},{r9},{r10},{r11},{xmm0},{xmm1},{xmm2},{xmm3},{xmm4},{xmm5},{xmm6},{xmm7},{xmm8},{xmm9},{xmm10},{xmm11},{xmm12},{xmm13},{xmm14},{xmm15}"(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, <2 x double> %a10, <2 x double> %a11, <2 x double> %a12, <2 x double> %a13, <2 x double> %a14, <2 x double> %a15, <2 x double> %a16, <2 x double> %a17, <2 x double> %a18, <2 x double> %a19, <2 x double> %a20, <2 x double> %a21, <2 x double> %a22, <2 x double> %a23, <2 x double> %a24, <2 x double> %a25) + ret void +} diff --git a/llvm/test/CodeGen/X86/preserve_mostcc64.ll b/llvm/test/CodeGen/X86/preserve_mostcc64.ll --- a/llvm/test/CodeGen/X86/preserve_mostcc64.ll +++ b/llvm/test/CodeGen/X86/preserve_mostcc64.ll @@ -1,64 +1,99 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck --check-prefix=SSE %s -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck --check-prefix=AVX %s +; RUN: sed -e "s/RETTYPE/void/;s/RETVAL//" %s | llc -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck --check-prefixes=ALL,VOID %s +; RUN: sed -e "s/RETTYPE/i32/;s/RETVAL/undef/" %s | llc -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck --check-prefixes=ALL,INT %s +; RUN: sed -e "s/RETTYPE/\{i64\,i64\}/;s/RETVAL/undef/" %s | llc -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck --check-prefixes=ALL,INT128 %s -; Every GPR should be saved - except r11 -define preserve_mostcc void @preserve_mostcc1() nounwind { +; Every GPR should be saved - except r11 and return registers +define preserve_mostcc RETTYPE @preserve_mostcc1() nounwind { entry: -;SSE-LABEL: preserve_mostcc1 -;SSE: pushq %r10 -;SSE-NEXT: pushq %r9 -;SSE-NEXT: pushq %r8 -;SSE-NEXT: pushq %rdi -;SSE-NEXT: pushq %rsi -;SSE-NEXT: pushq %rdx -;SSE-NEXT: pushq %rcx -;SSE-NEXT: pushq %rax -;SSE-NEXT: pushq %rbp -;SSE-NEXT: pushq %r15 -;SSE-NEXT: pushq %r14 -;SSE-NEXT: pushq %r13 -;SSE-NEXT: pushq %r12 -;SSE-NEXT: pushq %rbx -;AVX-LABEL: preserve_mostcc1 -;AVX: pushq %r10 -;AVX-NEXT: pushq %r9 -;AVX-NEXT: pushq %r8 -;AVX-NEXT: pushq %rdi -;AVX-NEXT: pushq %rsi -;AVX-NEXT: pushq %rdx -;AVX-NEXT: pushq %rcx -;AVX-NEXT: pushq %rax -;AVX-NEXT: pushq %rbp -;AVX-NEXT: pushq %r15 -;AVX-NEXT: pushq %r14 -;AVX-NEXT: pushq %r13 -;AVX-NEXT: pushq %r12 -;AVX-NEXT: pushq %rbx +;ALL-LABEL: preserve_mostcc1 +;ALL: pushq %r10 +;ALL-NEXT: pushq %r9 +;ALL-NEXT: pushq %r8 +;ALL-NEXT: pushq %rdi +;ALL-NEXT: pushq %rsi +;VOID-NEXT: pushq %rdx +;INT-NEXT: pushq %rdx +;INT128-NOT: pushq %rdx +;ALL-NEXT: pushq %rcx +;VOID-NEXT: pushq %rax +;INT-NOT: pushq %rax +;INT128-NOT: pushq %rax +;ALL-NEXT: pushq %rbp +;ALL-NEXT: pushq %r15 +;ALL-NEXT: pushq %r14 +;ALL-NEXT: pushq %r13 +;ALL-NEXT: pushq %r12 +;ALL-NEXT: pushq %rbx +;ALL: popq %rbx +;ALL-NEXT: popq %r12 +;ALL-NEXT: popq %r13 +;ALL-NEXT: popq %r14 +;ALL-NEXT: popq %r15 +;ALL-NEXT: popq %rbp +;VOID-NEXT: popq %rax +;INT-NOT: popq %rax +;INT128-NOT: popq %rax +;ALL-NEXT: popq %rcx +;VOID-NEXT: popq %rdx +;INT-NEXT: popq %rdx +;INT128-NOT: popq %rdx +;ALL-NEXT: popq %rsi +;ALL-NEXT: popq %rdi +;ALL-NEXT: popq %r8 +;ALL-NEXT: popq %r9 +;ALL-NEXT: popq %r10 call void asm sideeffect "", "~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{rbp},~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15}"() - ret void + ret RETTYPE RETVAL } -; Make sure R11 and XMMs are saved before the call -declare preserve_mostcc void @foo(i64, i64, double, double) +; Make sure R11, return registers and XMMs are saved before the call +declare preserve_mostcc RETTYPE @foo(i64, i64, double, double) define void @preserve_mostcc2() nounwind { entry: -;SSE-LABEL: preserve_mostcc2 -;SSE: movq %r11, [[REG:%[a-z0-9]+]] -;SSE: movaps %xmm2 -;SSE: movaps %xmm3 -;SSE: movaps %xmm4 -;SSE: movaps %xmm5 -;SSE: movaps %xmm6 -;SSE: movaps %xmm7 -;SSE: movaps %xmm8 -;SSE: movaps %xmm9 -;SSE: movaps %xmm10 -;SSE: movaps %xmm11 -;SSE: movaps %xmm12 -;SSE: movaps %xmm13 -;SSE: movaps %xmm14 -;SSE: movaps %xmm15 -;SSE: movq [[REG]], %r11 +;ALL-LABEL: preserve_mostcc2 +;VOID-NOT: movq %rax, [[REG1:%[a-z0-9]+]] +;INT: movq %rax, [[REG1:%[a-z0-9]+]] +;INT128: movq %rax, [[REG1:%[a-z0-9]+]] +;VOID-NOT: movq %rdx, [[REG2:%[a-z0-9]+]] +;INT-NOT: movq %rdx, [[REG2:%[a-z0-9]+]] +;INT128: movq %rdx, [[REG2:%[a-z0-9]+]] +;ALL: movq %r11, [[REG3:%[a-z0-9]+]] +;ALL: movaps %xmm2 +;ALL: movaps %xmm3 +;ALL: movaps %xmm4 +;ALL: movaps %xmm5 +;ALL: movaps %xmm6 +;ALL: movaps %xmm7 +;ALL: movaps %xmm8 +;ALL: movaps %xmm9 +;ALL: movaps %xmm10 +;ALL: movaps %xmm11 +;ALL: movaps %xmm12 +;ALL: movaps %xmm13 +;ALL: movaps %xmm14 +;ALL: movaps %xmm15 +;ALL: call +;VOID-NOT: movq {{.*}}, %rax +;INT: movq [[REG1]], %rax +;INT128: movq [[REG1]], %rax +;VOID-NOT: movq {{.*}}, %rdx +;INT-NOT: movq {{.*}}, %rdx +;INT128: movq [[REG2]], %rdx +;ALL: movq [[REG3]], %r11 +;ALL: movaps {{.*}} %xmm2 +;ALL: movaps {{.*}} %xmm3 +;ALL: movaps {{.*}} %xmm4 +;ALL: movaps {{.*}} %xmm5 +;ALL: movaps {{.*}} %xmm6 +;ALL: movaps {{.*}} %xmm7 +;ALL: movaps {{.*}} %xmm8 +;ALL: movaps {{.*}} %xmm9 +;ALL: movaps {{.*}} %xmm10 +;ALL: movaps {{.*}} %xmm11 +;ALL: movaps {{.*}} %xmm12 +;ALL: movaps {{.*}} %xmm13 +;ALL: movaps {{.*}} %xmm14 +;ALL: movaps {{.*}} %xmm15 %a0 = call i64 asm sideeffect "", "={rax}"() nounwind %a1 = call i64 asm sideeffect "", "={rcx}"() nounwind %a2 = call i64 asm sideeffect "", "={rdx}"() nounwind @@ -80,7 +115,45 @@ %a21 = call <2 x double> asm sideeffect "", "={xmm13}"() nounwind %a22 = call <2 x double> asm sideeffect "", "={xmm14}"() nounwind %a23 = call <2 x double> asm sideeffect "", "={xmm15}"() nounwind - call preserve_mostcc void @foo(i64 1, i64 2, double 3.0, double 4.0) + call preserve_mostcc RETTYPE @foo(i64 1, i64 2, double 3.0, double 4.0) call void asm sideeffect "", "{rax},{rcx},{rdx},{r8},{r9},{r10},{r11},{xmm2},{xmm3},{xmm4},{xmm5},{xmm6},{xmm7},{xmm8},{xmm9},{xmm10},{xmm11},{xmm12},{xmm13},{xmm14},{xmm15}"(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, <2 x double> %a10, <2 x double> %a11, <2 x double> %a12, <2 x double> %a13, <2 x double> %a14, <2 x double> %a15, <2 x double> %a16, <2 x double> %a17, <2 x double> %a18, <2 x double> %a19, <2 x double> %a20, <2 x double> %a21, <2 x double> %a22, <2 x double> %a23) ret void } + +; Make sure XMM0 (return register) and R11 are saved before the call +declare preserve_mostcc double @foo_double(i64, i64) +define void @preserve_mostcc3() nounwind { +entry: +;ALL-LABEL: preserve_mostcc3 +;ALL: movq %r11, [[REG1:%[a-z0-9]+]] +;ALL: movaps %xmm0, [[REG2:[-0-9]*\(%r[sb]p\)]] +;ALL: call +;ALL: movq [[REG1]], %r11 +;ALL: movaps [[REG2]], %xmm0 + %a0 = call i64 asm sideeffect "", "={rax}"() nounwind + %a1 = call i64 asm sideeffect "", "={rcx}"() nounwind + %a2 = call i64 asm sideeffect "", "={rdx}"() nounwind + %a3 = call i64 asm sideeffect "", "={r8}"() nounwind + %a4 = call i64 asm sideeffect "", "={r9}"() nounwind + %a5 = call i64 asm sideeffect "", "={r10}"() nounwind + %a6 = call i64 asm sideeffect "", "={r11}"() nounwind + %a10 = call <2 x double> asm sideeffect "", "={xmm0}"() nounwind + %a11 = call <2 x double> asm sideeffect "", "={xmm1}"() nounwind + %a12 = call <2 x double> asm sideeffect "", "={xmm2}"() nounwind + %a13 = call <2 x double> asm sideeffect "", "={xmm3}"() nounwind + %a14 = call <2 x double> asm sideeffect "", "={xmm4}"() nounwind + %a15 = call <2 x double> asm sideeffect "", "={xmm5}"() nounwind + %a16 = call <2 x double> asm sideeffect "", "={xmm6}"() nounwind + %a17 = call <2 x double> asm sideeffect "", "={xmm7}"() nounwind + %a18 = call <2 x double> asm sideeffect "", "={xmm8}"() nounwind + %a19 = call <2 x double> asm sideeffect "", "={xmm9}"() nounwind + %a20 = call <2 x double> asm sideeffect "", "={xmm10}"() nounwind + %a21 = call <2 x double> asm sideeffect "", "={xmm11}"() nounwind + %a22 = call <2 x double> asm sideeffect "", "={xmm12}"() nounwind + %a23 = call <2 x double> asm sideeffect "", "={xmm13}"() nounwind + %a24 = call <2 x double> asm sideeffect "", "={xmm14}"() nounwind + %a25 = call <2 x double> asm sideeffect "", "={xmm15}"() nounwind + call preserve_mostcc double @foo_double(i64 1, i64 2) + call void asm sideeffect "", "{rax},{rcx},{rdx},{r8},{r9},{r10},{r11},{xmm0},{xmm1},{xmm2},{xmm3},{xmm4},{xmm5},{xmm6},{xmm7},{xmm8},{xmm9},{xmm10},{xmm11},{xmm12},{xmm13},{xmm14},{xmm15}"(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, <2 x double> %a10, <2 x double> %a11, <2 x double> %a12, <2 x double> %a13, <2 x double> %a14, <2 x double> %a15, <2 x double> %a16, <2 x double> %a17, <2 x double> %a18, <2 x double> %a19, <2 x double> %a20, <2 x double> %a21, <2 x double> %a22, <2 x double> %a23, <2 x double> %a24, <2 x double> %a25) + ret void +}