Index: include/llvm/Target/TargetRegisterInfo.h =================================================================== --- include/llvm/Target/TargetRegisterInfo.h +++ include/llvm/Target/TargetRegisterInfo.h @@ -635,6 +635,13 @@ return RC; } + /// Returns a register class with registers that can be used in forming + /// tail calls (~ caller save registers). + virtual const TargetRegisterClass * + getGPRsForTailCall(const MachineFunction &MF) const { + llvm_unreachable("Target didn't implement getGPRsForTailCall!"); + } + /// Returns the largest super class of RC that is legal to use in the current /// sub-target and has the same spill size. /// The returned register class can be used to create virtual registers which Index: lib/Target/X86/X86FrameLowering.cpp =================================================================== --- lib/Target/X86/X86FrameLowering.cpp +++ lib/Target/X86/X86FrameLowering.cpp @@ -153,14 +153,7 @@ if (!F || MF->getMMI().callsEHReturn()) return 0; - static const uint16_t CallerSavedRegs32Bit[] = { - X86::EAX, X86::EDX, X86::ECX, 0 - }; - - static const uint16_t CallerSavedRegs64Bit[] = { - X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI, - X86::R8, X86::R9, X86::R10, X86::R11, 0 - }; + const TargetRegisterClass &AvailableRegs = *TRI->getGPRsForTailCall(*MF); unsigned Opc = MBBI->getOpcode(); switch (Opc) { @@ -189,10 +182,9 @@ Uses.insert(*AI); } - const uint16_t *CS = Is64Bit ? CallerSavedRegs64Bit : CallerSavedRegs32Bit; - for (; *CS; ++CS) - if (!Uses.count(*CS)) - return *CS; + for (auto CS : AvailableRegs) + if (!Uses.count(CS) && CS != X86::RIP) + return CS; } } Index: lib/Target/X86/X86RegisterInfo.h =================================================================== --- lib/Target/X86/X86RegisterInfo.h +++ lib/Target/X86/X86RegisterInfo.h @@ -87,6 +87,11 @@ const TargetRegisterClass * getCrossCopyRegClass(const TargetRegisterClass *RC) const override; + /// getGPRsForTailCall - Returns a register class with registers that can be + /// used in forming tail calls. + const TargetRegisterClass * + getGPRsForTailCall(const MachineFunction &MF) const override; + unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override; Index: lib/Target/X86/X86RegisterInfo.cpp =================================================================== --- lib/Target/X86/X86RegisterInfo.cpp +++ lib/Target/X86/X86RegisterInfo.cpp @@ -177,20 +177,25 @@ return &X86::GR64_NOREX_NOSPRegClass; return &X86::GR32_NOREX_NOSPRegClass; case 4: // Available for tailcall (not callee-saved GPRs). - const Function *F = MF.getFunction(); - if (IsWin64 || (F && F->getCallingConv() == CallingConv::X86_64_Win64)) - return &X86::GR64_TCW64RegClass; - else if (Is64Bit) - return &X86::GR64_TCRegClass; - - bool hasHipeCC = (F ? F->getCallingConv() == CallingConv::HiPE : false); - if (hasHipeCC) - return &X86::GR32RegClass; - return &X86::GR32_TCRegClass; + return getGPRsForTailCall(MF); } } const TargetRegisterClass * +X86RegisterInfo::getGPRsForTailCall(const MachineFunction &MF) const { + const Function *F = MF.getFunction(); + if (IsWin64 || (F && F->getCallingConv() == CallingConv::X86_64_Win64)) + return &X86::GR64_TCW64RegClass; + else if (Is64Bit) + return &X86::GR64_TCRegClass; + + bool hasHipeCC = (F ? F->getCallingConv() == CallingConv::HiPE : false); + if (hasHipeCC) + return &X86::GR32RegClass; + return &X86::GR32_TCRegClass; +} + +const TargetRegisterClass * X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { if (RC == &X86::CCRRegClass) { if (Is64Bit) Index: lib/Target/X86/X86RegisterInfo.td =================================================================== --- lib/Target/X86/X86RegisterInfo.td +++ lib/Target/X86/X86RegisterInfo.td @@ -375,7 +375,7 @@ def GR64_TC : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RSI, RDI, R8, R9, R11, RIP)>; def GR64_TCW64 : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, - R8, R9, R11)>; + R8, R9, R10, R11)>; // GR8_NOREX - GR8 registers which do not require a REX prefix. def GR8_NOREX : RegisterClass<"X86", [i8], 8, Index: test/CodeGen/X86/emutls-pie.ll =================================================================== --- test/CodeGen/X86/emutls-pie.ll +++ test/CodeGen/X86/emutls-pie.ll @@ -24,7 +24,7 @@ ; X64: movq my_emutls_v_xyz@GOTPCREL(%rip), %rdi ; X64-NEXT: callq my_emutls_get_address@PLT ; X64-NEXT: movl (%rax), %eax -; X64-NEXT: popq %rdx +; X64-NEXT: popq %rcx ; X64-NEXT: retq entry: @@ -50,7 +50,7 @@ ; X64: movq __emutls_v.i@GOTPCREL(%rip), %rdi ; X64-NEXT: callq __emutls_get_address@PLT ; X64-NEXT: movl (%rax), %eax -; X64-NEXT: popq %rdx +; X64-NEXT: popq %rcx ; X64-NEXT: retq entry: Index: test/CodeGen/X86/emutls.ll =================================================================== --- test/CodeGen/X86/emutls.ll +++ test/CodeGen/X86/emutls.ll @@ -21,7 +21,7 @@ ; X64: movl $my_emutls_v_xyz, %edi ; X64-NEXT: callq my_emutls_get_address ; X64-NEXT: movl (%rax), %eax -; X64-NEXT: popq %rdx +; X64-NEXT: popq %rcx ; X64-NEXT: retq entry: @@ -50,7 +50,7 @@ ; X64: movl $__emutls_v.i1, %edi ; X64-NEXT: callq __emutls_get_address ; X64-NEXT: movl (%rax), %eax -; X64-NEXT: popq %rdx +; X64-NEXT: popq %rcx ; X64-NEXT: retq entry: @@ -67,7 +67,7 @@ ; X64-LABEL: f2: ; X64: movl $__emutls_v.i1, %edi ; X64-NEXT: callq __emutls_get_address -; X64-NEXT: popq %rdx +; X64-NEXT: popq %rcx ; X64-NEXT: retq entry: Index: test/CodeGen/X86/half.ll =================================================================== --- test/CodeGen/X86/half.ll +++ test/CodeGen/X86/half.ll @@ -77,7 +77,7 @@ ; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi ; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee ; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rax -; CHECK-LIBCALL-NEXT: popq %rdx +; CHECK-LIBCALL-NEXT: popq %rcx ; CHECK-LIBCALL-NEXT: retq ; CHECK-F16C-NEXT: movswl (%rdi), [[REG0:%[a-z0-9]+]] @@ -127,7 +127,7 @@ ; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, [[REG5:%[a-z0-9]+]] ; CHECK-LIBCALL-NEXT: ucomiss [[REG1]], %xmm0 ; CHECK-LIBCALL-NEXT: cmovaeq [[REG4]], [[REG5]] -; CHECK-LIBCALL-NEXT: popq %rdx +; CHECK-LIBCALL-NEXT: popq %rcx ; CHECK-LIBCALL-NEXT: retq ; CHECK-F16C-NEXT: movswl (%rdi), [[REG0:%[a-z0-9]+]] Index: test/CodeGen/X86/statepoint-allocas.ll =================================================================== --- test/CodeGen/X86/statepoint-allocas.ll +++ test/CodeGen/X86/statepoint-allocas.ll @@ -16,7 +16,7 @@ ; CHECK: movq %rdi, (%rsp) ; CHECK: callq return_i1 ; CHECK: movq (%rsp), %rax -; CHECK: popq %rdx +; CHECK: popq %rcx ; CHECK: retq entry: %alloca = alloca i32 addrspace(1)*, align 8 @@ -33,7 +33,7 @@ ; CHECK: movq %rdi, (%rsp) ; CHECK: callq return_i1 ; CHECK: xorl %eax, %eax -; CHECK: popq %rdx +; CHECK: popq %rcx ; CHECK: retq entry: %alloca = alloca i32 addrspace(1)*, align 8 Index: test/CodeGen/X86/statepoint-call-lowering.ll =================================================================== --- test/CodeGen/X86/statepoint-call-lowering.ll +++ test/CodeGen/X86/statepoint-call-lowering.ll @@ -20,7 +20,7 @@ ; state arguments to the statepoint ; CHECK: pushq %rax ; CHECK: callq return_i1 -; CHECK: popq %rdx +; CHECK: popq %rcx ; CHECK: retq entry: %safepoint_token = tail call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0) @@ -32,7 +32,7 @@ ; CHECK-LABEL: test_i32_return ; CHECK: pushq %rax ; CHECK: callq return_i32 -; CHECK: popq %rdx +; CHECK: popq %rcx ; CHECK: retq entry: %safepoint_token = tail call i32 (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 0, i32 0, i32 ()* @return_i32, i32 0, i32 0, i32 0, i32 0) @@ -44,7 +44,7 @@ ; CHECK-LABEL: test_i32ptr_return ; CHECK: pushq %rax ; CHECK: callq return_i32ptr -; CHECK: popq %rdx +; CHECK: popq %rcx ; CHECK: retq entry: %safepoint_token = tail call i32 (i64, i32, i32* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p0i32f(i64 0, i32 0, i32* ()* @return_i32ptr, i32 0, i32 0, i32 0, i32 0) @@ -82,7 +82,7 @@ ; CHECK: pushq %rax ; CHECK: callq return_i1 ; CHECK-NEXT: .Ltmp11: -; CHECK-NEXT: popq %rdx +; CHECK-NEXT: popq %rcx ; CHECK-NEXT: retq entry: %safepoint_token = tail call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %a) @@ -107,7 +107,7 @@ ; A patchable variant of test_i1_return ; CHECK: pushq %rax ; CHECK: nopl -; CHECK: popq %rdx +; CHECK: popq %rcx ; CHECK: retq entry: %safepoint_token = tail call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 3, i1 ()*null, i32 0, i32 0, i32 0, i32 0) Index: test/CodeGen/X86/statepoint-gctransition-call-lowering.ll =================================================================== --- test/CodeGen/X86/statepoint-gctransition-call-lowering.ll +++ test/CodeGen/X86/statepoint-gctransition-call-lowering.ll @@ -18,7 +18,7 @@ ; state arguments to the statepoint ; CHECK: pushq %rax ; CHECK: callq return_i1 -; CHECK: popq %rdx +; CHECK: popq %rcx ; CHECK: retq entry: %safepoint_token = tail call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 1, i32 0, i32 0) @@ -30,7 +30,7 @@ ; CHECK-LABEL: test_i32_return ; CHECK: pushq %rax ; CHECK: callq return_i32 -; CHECK: popq %rdx +; CHECK: popq %rcx ; CHECK: retq entry: %safepoint_token = tail call i32 (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 0, i32 0, i32 ()* @return_i32, i32 0, i32 1, i32 0, i32 0) @@ -42,7 +42,7 @@ ; CHECK-LABEL: test_i32ptr_return ; CHECK: pushq %rax ; CHECK: callq return_i32ptr -; CHECK: popq %rdx +; CHECK: popq %rcx ; CHECK: retq entry: %safepoint_token = tail call i32 (i64, i32, i32* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p0i32f(i64 0, i32 0, i32* ()* @return_i32ptr, i32 0, i32 1, i32 0, i32 0) @@ -68,7 +68,7 @@ ; CHECK: pushq %rax ; CHECK: callq return_i1 ; CHECK-NEXT: .Ltmp9: -; CHECK-NEXT: popq %rdx +; CHECK-NEXT: popq %rcx ; CHECK-NEXT: retq entry: %safepoint_token = tail call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 1, i32 0, i32 0, i32 addrspace(1)* %a) @@ -92,7 +92,7 @@ ; CHECK-LABEL: test_transition_args ; CHECK: pushq %rax ; CHECK: callq return_i32 -; CHECK: popq %rdx +; CHECK: popq %rcx ; CHECK: retq entry: %val = alloca i32 @@ -105,7 +105,7 @@ ; CHECK-LABEL: test_transition_args_2 ; CHECK: pushq %rax ; CHECK: callq return_i32 -; CHECK: popq %rdx +; CHECK: popq %rcx ; CHECK: retq entry: %val = alloca i32 Index: test/CodeGen/X86/vector-sext.ll =================================================================== --- test/CodeGen/X86/vector-sext.ll +++ test/CodeGen/X86/vector-sext.ll @@ -3695,7 +3695,7 @@ ; X32-SSE41-NEXT: .cfi_def_cfa_offset 8 ; X32-SSE41-NEXT: pmovsxbw %xmm0, %xmm0 ; X32-SSE41-NEXT: movd %xmm0, %eax -; X32-SSE41-NEXT: popl %edx +; X32-SSE41-NEXT: popl %ecx ; X32-SSE41-NEXT: retl entry: %Shuf = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> Index: test/CodeGen/X86/win64_sibcall.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/win64_sibcall.ll @@ -0,0 +1,38 @@ +; RUN: llc < %s -mtriple=x86_64-pc-win32-coreclr | FileCheck %s -check-prefix=WIN_X64 +; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s -check-prefix=LINUX + +%Object = type <{ [0 x i64*]* }> + +define void @C1(%Object addrspace(1)* %param0) gc "coreclr" { +entry: + +; WIN_X64: # BB#0: +; WIN_X64: pushq %rax +; LINUX: # BB#0: # %entry +; LINUX: movq $0, -8(%rsp) + + %this = alloca %Object addrspace(1)* + store %Object addrspace(1)* null, %Object addrspace(1)** %this + store %Object addrspace(1)* %param0, %Object addrspace(1)** %this + br label %0 + +;