diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -366,8 +366,9 @@ apply for values returned in callee-saved registers. - On X86-64 the callee preserves all general purpose registers, except for - R11. R11 can be used as a scratch register. Floating-point registers - (XMMs/YMMs) are not preserved and need to be saved by the caller. + R11 and RAX. R11 can be used as a scratch register, whereas RAX is used as + a return register. Floating-point registers (XMMs/YMMs) are not preserved + and need to be saved by the caller. The idea behind this convention is to support calls to runtime functions that have a hot path and a cold path. The hot path is usually a small piece diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td --- a/llvm/lib/Target/X86/X86CallingConv.td +++ b/llvm/lib/Target/X86/X86CallingConv.td @@ -1154,11 +1154,11 @@ // CSRs that are handled explicitly via copies. def CSR_64_CXX_TLS_Darwin_ViaCopy : CalleeSavedRegs<(sub CSR_64_TLS_Darwin, RBP)>; -// All GPRs - except r11 -def CSR_64_RT_MostRegs : CalleeSavedRegs<(add CSR_64, RAX, RCX, RDX, RSI, RDI, - R8, R9, R10)>; +// All GPRs - except r11 and rax +def CSR_64_RT_MostRegs : CalleeSavedRegs<(add CSR_64, RCX, RDX, RSI, RDI, R8, + R9, R10)>; -// All registers - except r11 +// All registers - except r11 and rax def CSR_64_RT_AllRegs : CalleeSavedRegs<(add CSR_64_RT_MostRegs, (sequence "XMM%u", 0, 15))>; def CSR_64_RT_AllRegs_AVX : CalleeSavedRegs<(add CSR_64_RT_MostRegs, diff --git a/llvm/test/CodeGen/X86/ipra-reg-usage.ll b/llvm/test/CodeGen/X86/ipra-reg-usage.ll --- a/llvm/test/CodeGen/X86/ipra-reg-usage.ll +++ b/llvm/test/CodeGen/X86/ipra-reg-usage.ll @@ -3,7 +3,7 @@ target triple = "x86_64-unknown-unknown" declare void @bar1() define preserve_allcc void @foo()#0 { -; CHECK: foo Clobbered Registers: $cs $df $ds $eflags $eip $eiz $es $esp $fpcw $fpsw $fs $gs $hip $hsp $ip $mxcsr $rip $riz $rsp $sp $sph $spl $ss $ssp $tmmcfg $cr0 $cr1 $cr2 $cr3 $cr4 $cr5 $cr6 $cr7 $cr8 $cr9 $cr10 $cr11 $cr12 $cr13 $cr14 $cr15 $dr0 $dr1 $dr2 $dr3 $dr4 $dr5 $dr6 $dr7 $dr8 $dr9 $dr10 $dr11 $dr12 $dr13 $dr14 $dr15 $fp0 $fp1 $fp2 $fp3 $fp4 $fp5 $fp6 $fp7 $k0 $k1 $k2 $k3 $k4 $k5 $k6 $k7 $mm0 $mm1 $mm2 $mm3 $mm4 $mm5 $mm6 $mm7 $r11 $st0 $st1 $st2 $st3 $st4 $st5 $st6 $st7 $tmm0 $tmm1 $tmm2 $tmm3 $tmm4 $tmm5 $tmm6 $tmm7 $xmm16 $xmm17 $xmm18 $xmm19 $xmm20 $xmm21 $xmm22 $xmm23 $xmm24 $xmm25 $xmm26 $xmm27 $xmm28 $xmm29 $xmm30 $xmm31 $ymm0 $ymm1 $ymm2 $ymm3 $ymm4 $ymm5 $ymm6 $ymm7 $ymm8 $ymm9 $ymm10 $ymm11 $ymm12 $ymm13 $ymm14 $ymm15 $ymm16 $ymm17 $ymm18 $ymm19 $ymm20 $ymm21 $ymm22 $ymm23 $ymm24 $ymm25 $ymm26 $ymm27 $ymm28 $ymm29 $ymm30 $ymm31 $zmm0 $zmm1 $zmm2 $zmm3 $zmm4 $zmm5 $zmm6 $zmm7 $zmm8 $zmm9 $zmm10 $zmm11 $zmm12 $zmm13 $zmm14 $zmm15 $zmm16 $zmm17 $zmm18 $zmm19 $zmm20 $zmm21 $zmm22 $zmm23 $zmm24 $zmm25 $zmm26 $zmm27 $zmm28 $zmm29 $zmm30 $zmm31 $r11b $r11bh $r11d $r11w $r11wh $k0_k1 $k2_k3 $k4_k5 $k6_k7 +; CHECK: foo Clobbered Registers: $ah $al $ax $cs $df $ds $eax $eflags $eip $eiz $es $esp $fpcw $fpsw $fs $gs $hax $hip $hsp $ip $mxcsr $rax $rip $riz $rsp $sp $sph $spl $ss $ssp $tmmcfg $cr0 $cr1 $cr2 $cr3 $cr4 $cr5 $cr6 $cr7 $cr8 $cr9 $cr10 $cr11 $cr12 $cr13 $cr14 $cr15 $dr0 $dr1 $dr2 $dr3 $dr4 $dr5 $dr6 $dr7 $dr8 $dr9 $dr10 $dr11 $dr12 $dr13 $dr14 $dr15 $fp0 $fp1 $fp2 $fp3 $fp4 $fp5 $fp6 $fp7 $k0 $k1 $k2 $k3 $k4 $k5 $k6 $k7 $mm0 $mm1 $mm2 $mm3 $mm4 $mm5 $mm6 $mm7 $r11 $st0 $st1 $st2 $st3 $st4 $st5 $st6 $st7 $tmm0 $tmm1 $tmm2 $tmm3 $tmm4 $tmm5 $tmm6 $tmm7 $xmm16 $xmm17 $xmm18 $xmm19 $xmm20 $xmm21 $xmm22 $xmm23 $xmm24 $xmm25 $xmm26 $xmm27 $xmm28 $xmm29 $xmm30 $xmm31 $ymm0 $ymm1 $ymm2 $ymm3 $ymm4 $ymm5 $ymm6 $ymm7 $ymm8 $ymm9 $ymm10 $ymm11 $ymm12 $ymm13 $ymm14 $ymm15 $ymm16 $ymm17 $ymm18 $ymm19 $ymm20 $ymm21 $ymm22 $ymm23 $ymm24 $ymm25 $ymm26 $ymm27 $ymm28 $ymm29 $ymm30 $ymm31 $zmm0 $zmm1 $zmm2 $zmm3 $zmm4 $zmm5 $zmm6 $zmm7 $zmm8 $zmm9 $zmm10 $zmm11 $zmm12 $zmm13 $zmm14 $zmm15 $zmm16 $zmm17 $zmm18 $zmm19 $zmm20 $zmm21 $zmm22 $zmm23 $zmm24 $zmm25 $zmm26 $zmm27 $zmm28 $zmm29 $zmm30 $zmm31 $r11b $r11bh $r11d $r11w $r11wh $k0_k1 $k2_k3 $k4_k5 $k6_k7 call void @bar1() call void @bar2() ret void diff --git a/llvm/test/CodeGen/X86/machine-copy-prop.mir b/llvm/test/CodeGen/X86/machine-copy-prop.mir --- a/llvm/test/CodeGen/X86/machine-copy-prop.mir +++ b/llvm/test/CodeGen/X86/machine-copy-prop.mir @@ -67,12 +67,12 @@ NOOP implicit $rax, implicit $rdi ... --- -# The second copy is redundant; the call preserves the source and dest register. +# The first copy is redundant, since the call will clobber the dest register. # CHECK-LABEL: name: copyprop0 # CHECK: bb.0: -# CHECK-NEXT: $rax = COPY $rdi # CHECK-NEXT: CALL64pcrel32 @foo, csr_64_rt_mostregs -# CHECK-NEXT: NOOP implicit $edi +# CHECK-NEXT: NOOP implicit killed $edi +# CHECK-NEXT: $rdi = COPY $rax # CHECK-NOT: COPY # CHECK-NEXT: NOOP implicit $rax, implicit $rdi name: copyprop0 @@ -102,19 +102,19 @@ --- # CHECK-LABEL: name: copyprop2 # CHECK: bb.0: -# CHECK-NEXT: $rax = COPY $rdi -# CHECK-NEXT: NOOP implicit $ax +# CHECK-NEXT: $rcx = COPY $rdi +# CHECK-NEXT: NOOP implicit $cx # CHECK-NEXT: CALL64pcrel32 @foo, csr_64_rt_mostregs -# CHECK-NOT: $rax = COPY $rdi -# CHECK-NEXT: NOOP implicit $rax, implicit $rdi +# CHECK-NOT: $rcx = COPY $rdi +# CHECK-NEXT: NOOP implicit $rcx, implicit $rdi name: copyprop2 body: | bb.0: - $rax = COPY $rdi - NOOP implicit killed $ax + $rcx = COPY $rdi + NOOP implicit killed $cx CALL64pcrel32 @foo, csr_64_rt_mostregs - $rax = COPY $rdi - NOOP implicit $rax, implicit $rdi + $rcx = COPY $rdi + NOOP implicit $rcx, implicit $rdi ... --- # The second copy is not redundant if the source register ($rax) is clobbered diff --git a/llvm/test/CodeGen/X86/preserve_allcc64.ll b/llvm/test/CodeGen/X86/preserve_allcc64.ll --- a/llvm/test/CodeGen/X86/preserve_allcc64.ll +++ b/llvm/test/CodeGen/X86/preserve_allcc64.ll @@ -11,7 +11,6 @@ ;SSE-NEXT: pushq %rsi ;SSE-NEXT: pushq %rdx ;SSE-NEXT: pushq %rcx -;SSE-NEXT: pushq %rax ;SSE-NEXT: pushq %rbp ;SSE-NEXT: pushq %r15 ;SSE-NEXT: pushq %r14 @@ -34,6 +33,36 @@ ;SSE-NEXT: movaps %xmm2 ;SSE-NEXT: movaps %xmm1 ;SSE-NEXT: movaps %xmm0 +;SSE: movaps {{.*}} %xmm0 +;SSE-NEXT: movaps {{.*}} %xmm1 +;SSE-NEXT: movaps {{.*}} %xmm2 +;SSE-NEXT: movaps {{.*}} %xmm3 +;SSE-NEXT: movaps {{.*}} %xmm4 +;SSE-NEXT: movaps {{.*}} %xmm5 +;SSE-NEXT: movaps {{.*}} %xmm6 +;SSE-NEXT: movaps {{.*}} %xmm7 +;SSE-NEXT: movaps {{.*}} %xmm8 +;SSE-NEXT: movaps {{.*}} %xmm9 +;SSE-NEXT: movaps {{.*}} %xmm10 +;SSE-NEXT: movaps {{.*}} %xmm11 +;SSE-NEXT: movaps {{.*}} %xmm12 +;SSE-NEXT: movaps {{.*}} %xmm13 +;SSE-NEXT: movaps {{.*}} %xmm14 +;SSE-NEXT: movaps {{.*}} %xmm15 +;SSE: popq %rbx +;SSE-NEXT: popq %r12 +;SSE-NEXT: popq %r13 +;SSE-NEXT: popq %r14 +;SSE-NEXT: popq %r15 +;SSE-NEXT: popq %rbp +;SSE-NEXT: popq %rcx +;SSE-NEXT: popq %rdx +;SSE-NEXT: popq %rsi +;SSE-NEXT: popq %rdi +;SSE-NEXT: popq %r8 +;SSE-NEXT: popq %r9 +;SSE-NEXT: popq %r10 + ;AVX-LABEL: preserve_allcc1 ;AVX: pushq %r10 ;AVX-NEXT: pushq %r9 @@ -42,7 +71,6 @@ ;AVX-NEXT: pushq %rsi ;AVX-NEXT: pushq %rdx ;AVX-NEXT: pushq %rcx -;AVX-NEXT: pushq %rax ;AVX-NEXT: pushq %rbp ;AVX-NEXT: pushq %r15 ;AVX-NEXT: pushq %r14 @@ -65,18 +93,49 @@ ;AVX-NEXT: vmovups %ymm2 ;AVX-NEXT: vmovups %ymm1 ;AVX-NEXT: vmovups %ymm0 +;AVX: vmovups {{.*}} %ymm0 +;AVX-NEXT: vmovups {{.*}} %ymm1 +;AVX-NEXT: vmovups {{.*}} %ymm2 +;AVX-NEXT: vmovups {{.*}} %ymm3 +;AVX-NEXT: vmovups {{.*}} %ymm4 +;AVX-NEXT: vmovups {{.*}} %ymm5 +;AVX-NEXT: vmovups {{.*}} %ymm6 +;AVX-NEXT: vmovups {{.*}} %ymm7 +;AVX-NEXT: vmovups {{.*}} %ymm8 +;AVX-NEXT: vmovups {{.*}} %ymm9 +;AVX-NEXT: vmovups {{.*}} %ymm10 +;AVX-NEXT: vmovups {{.*}} %ymm11 +;AVX-NEXT: vmovups {{.*}} %ymm12 +;AVX-NEXT: vmovups {{.*}} %ymm13 +;AVX-NEXT: vmovups {{.*}} %ymm14 +;AVX-NEXT: vmovups {{.*}} %ymm15 +;AVX: popq %rbx +;AVX-NEXT: popq %r12 +;AVX-NEXT: popq %r13 +;AVX-NEXT: popq %r14 +;AVX-NEXT: popq %r15 +;AVX-NEXT: popq %rbp +;AVX-NEXT: popq %rcx +;AVX-NEXT: popq %rdx +;AVX-NEXT: popq %rsi +;AVX-NEXT: popq %rdi +;AVX-NEXT: popq %r8 +;AVX-NEXT: popq %r9 +;AVX-NEXT: popq %r10 call void asm sideeffect "", "~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{rbp},~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15}"() ret void } -; Make sure only R11 is saved before the call +; Make sure RAX and R11 are saved before the call declare preserve_allcc void @bar(i64, i64, double, double) define void @preserve_allcc2() nounwind { entry: ;SSE-LABEL: preserve_allcc2 -;SSE: movq %r11, [[REG:%[a-z0-9]+]] +;SSE: movq %rax, [[REG1:%[a-z0-9]+]] +;SSE: movq %r11, [[REG2:%[a-z0-9]+]] ;SSE-NOT: movaps %xmm -;SSE: movq [[REG]], %r11 +;SSE: movq [[REG1]], %rax +;SSE: movq [[REG2]], %r11 %a0 = call i64 asm sideeffect "", "={rax}"() nounwind %a1 = call i64 asm sideeffect "", "={rcx}"() nounwind %a2 = call i64 asm sideeffect "", "={rdx}"() nounwind diff --git a/llvm/test/CodeGen/X86/preserve_mostcc64.ll b/llvm/test/CodeGen/X86/preserve_mostcc64.ll --- a/llvm/test/CodeGen/X86/preserve_mostcc64.ll +++ b/llvm/test/CodeGen/X86/preserve_mostcc64.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck --check-prefix=SSE %s ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck --check-prefix=AVX %s -; Every GPR should be saved - except r11 +; Every GPR should be saved - except r11 and rax define preserve_mostcc void @preserve_mostcc1() nounwind { entry: ;SSE-LABEL: preserve_mostcc1 @@ -12,13 +12,25 @@ ;SSE-NEXT: pushq %rsi ;SSE-NEXT: pushq %rdx ;SSE-NEXT: pushq %rcx -;SSE-NEXT: pushq %rax ;SSE-NEXT: pushq %rbp ;SSE-NEXT: pushq %r15 ;SSE-NEXT: pushq %r14 ;SSE-NEXT: pushq %r13 ;SSE-NEXT: pushq %r12 ;SSE-NEXT: pushq %rbx +;SSE: popq %rbx +;SSE-NEXT: popq %r12 +;SSE-NEXT: popq %r13 +;SSE-NEXT: popq %r14 +;SSE-NEXT: popq %r15 +;SSE-NEXT: popq %rbp +;SSE-NEXT: popq %rcx +;SSE-NEXT: popq %rdx +;SSE-NEXT: popq %rsi +;SSE-NEXT: popq %rdi +;SSE-NEXT: popq %r8 +;SSE-NEXT: popq %r9 +;SSE-NEXT: popq %r10 ;AVX-LABEL: preserve_mostcc1 ;AVX: pushq %r10 ;AVX-NEXT: pushq %r9 @@ -27,23 +39,36 @@ ;AVX-NEXT: pushq %rsi ;AVX-NEXT: pushq %rdx ;AVX-NEXT: pushq %rcx -;AVX-NEXT: pushq %rax ;AVX-NEXT: pushq %rbp ;AVX-NEXT: pushq %r15 ;AVX-NEXT: pushq %r14 ;AVX-NEXT: pushq %r13 ;AVX-NEXT: pushq %r12 ;AVX-NEXT: pushq %rbx +;AVX: popq %rbx +;AVX-NEXT: popq %r12 +;AVX-NEXT: popq %r13 +;AVX-NEXT: popq %r14 +;AVX-NEXT: popq %r15 +;AVX-NEXT: popq %rbp +;AVX-NEXT: popq %rcx +;AVX-NEXT: popq %rdx +;AVX-NEXT: popq %rsi +;AVX-NEXT: popq %rdi +;AVX-NEXT: popq %r8 +;AVX-NEXT: popq %r9 +;AVX-NEXT: popq %r10 call void asm sideeffect "", "~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{rbp},~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15}"() ret void } -; Make sure R11 and XMMs are saved before the call +; Make sure RAX, R11 and XMMs are saved before the call declare preserve_mostcc void @foo(i64, i64, double, double) define void @preserve_mostcc2() nounwind { entry: ;SSE-LABEL: preserve_mostcc2 -;SSE: movq %r11, [[REG:%[a-z0-9]+]] +;SSE: movq %rax, [[REG1:%[a-z0-9]+]] +;SSE: movq %r11, [[REG2:%[a-z0-9]+]] ;SSE: movaps %xmm2 ;SSE: movaps %xmm3 ;SSE: movaps %xmm4 @@ -58,7 +83,23 @@ ;SSE: movaps %xmm13 ;SSE: movaps %xmm14 ;SSE: movaps %xmm15 -;SSE: movq [[REG]], %r11 +;SSE: call +;SSE: movq [[REG1]], %rax +;SSE: movq [[REG2]], %r11 +;SSE: movaps {{.*}} %xmm2 +;SSE: movaps {{.*}} %xmm3 +;SSE: movaps {{.*}} %xmm4 +;SSE: movaps {{.*}} %xmm5 +;SSE: movaps {{.*}} %xmm6 +;SSE: movaps {{.*}} %xmm7 +;SSE: movaps {{.*}} %xmm8 +;SSE: movaps {{.*}} %xmm9 +;SSE: movaps {{.*}} %xmm10 +;SSE: movaps {{.*}} %xmm11 +;SSE: movaps {{.*}} %xmm12 +;SSE: movaps {{.*}} %xmm13 +;SSE: movaps {{.*}} %xmm14 +;SSE: movaps {{.*}} %xmm15 %a0 = call i64 asm sideeffect "", "={rax}"() nounwind %a1 = call i64 asm sideeffect "", "={rcx}"() nounwind %a2 = call i64 asm sideeffect "", "={rdx}"() nounwind