diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -366,8 +366,9 @@ apply for values returned in callee-saved registers. - On X86-64 the callee preserves all general purpose registers, except for - R11. R11 can be used as a scratch register. Floating-point registers - (XMMs/YMMs) are not preserved and need to be saved by the caller. + R11 and return registers, if any. R11 can be used as a scratch register. + Floating-point registers (XMMs/YMMs) are not preserved and need to be + saved by the caller. The idea behind this convention is to support calls to runtime functions that have a hot path and a cold path. The hot path is usually a small piece diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td --- a/llvm/lib/Target/X86/X86CallingConv.td +++ b/llvm/lib/Target/X86/X86CallingConv.td @@ -1154,11 +1154,11 @@ // CSRs that are handled explicitly via copies. def CSR_64_CXX_TLS_Darwin_ViaCopy : CalleeSavedRegs<(sub CSR_64_TLS_Darwin, RBP)>; -// All GPRs - except r11 +// All GPRs - except r11 and return registers. def CSR_64_RT_MostRegs : CalleeSavedRegs<(add CSR_64, RAX, RCX, RDX, RSI, RDI, R8, R9, R10)>; -// All registers - except r11 +// All registers - except r11 and return registers. def CSR_64_RT_AllRegs : CalleeSavedRegs<(add CSR_64_RT_MostRegs, (sequence "XMM%u", 0, 15))>; def CSR_64_RT_AllRegs_AVX : CalleeSavedRegs<(add CSR_64_RT_MostRegs, diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -104,6 +104,27 @@ DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc())); } +/// Returns true if a CC can dynamically exclude a register from the list of +/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on +/// the return registers. +static bool shouldDisableRetRegFromCSR(CallingConv::ID CC) { + switch (CC) { + default: + return false; + case CallingConv::X86_RegCall: + case CallingConv::PreserveMost: + case CallingConv::PreserveAll: + return true; + } +} + +/// Returns true if a CC can dynamically exclude a register from the list of +/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on +/// the parameters. +static bool shouldDisableArgRegFromCSR(CallingConv::ID CC) { + return CC == CallingConv::X86_RegCall; +} + X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, const X86Subtarget &STI) : TargetLowering(TM), Subtarget(STI) { @@ -3193,9 +3214,10 @@ X86MachineFunctionInfo *FuncInfo = MF.getInfo(); // In some cases we need to disable registers from the default CSR list. - // For example, when they are used for argument passing. + // For example, when they are used as return registers (preserve_* and X86's + // regcall) or for argument passing (X86's regcall). bool ShouldDisableCalleeSavedRegister = - CallConv == CallingConv::X86_RegCall || + shouldDisableRetRegFromCSR(CallConv) || MF.getFunction().hasFnAttribute("no_caller_saved_registers"); if (CallConv == CallingConv::X86_INTR && !Outs.empty()) @@ -3358,8 +3380,12 @@ RetOps.push_back( DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout()))); - // Add the returned register to the CalleeSaveDisableRegs list. - if (ShouldDisableCalleeSavedRegister) + // Add the returned register to the CalleeSaveDisableRegs list. Don't do + // this however for preserve_most/preserve_all to minimize the number of + // callee-saved registers for these CCs. + if (ShouldDisableCalleeSavedRegister && + CallConv != CallingConv::PreserveAll && + CallConv != CallingConv::PreserveMost) MF.getRegInfo().disableCalleeSavedRegister(RetValReg); } @@ -4347,7 +4373,7 @@ } } - if (CallConv == CallingConv::X86_RegCall || + if (shouldDisableArgRegFromCSR(CallConv) || F.hasFnAttribute("no_caller_saved_registers")) { MachineRegisterInfo &MRI = MF.getRegInfo(); for (std::pair Pair : MRI.liveins()) @@ -4907,8 +4933,11 @@ uint32_t *RegMask = nullptr; // In some calling conventions we need to remove the used physical registers - // from the reg mask. - if (CallConv == CallingConv::X86_RegCall || HasNCSR) { + // from the reg mask. Create a new RegMask for such calling conventions. + // RegMask for calling conventions that disable only return registers (e.g. + // preserve_most) will be modified later in LowerCallResult. + bool ShouldDisableArgRegs = shouldDisableArgRegFromCSR(CallConv) || HasNCSR; + if (ShouldDisableArgRegs || shouldDisableRetRegFromCSR(CallConv)) { const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); // Allocate a new Reg Mask and copy Mask. @@ -4918,10 +4947,12 @@ // Make sure all sub registers of the argument registers are reset // in the RegMask. - for (auto const &RegPair : RegsToPass) - for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true); - SubRegs.isValid(); ++SubRegs) - RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32)); + if (ShouldDisableArgRegs) { + for (auto const &RegPair : RegsToPass) + for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32)); + } // Create the RegMask Operand according to our updated mask. Ops.push_back(DAG.getRegisterMask(RegMask)); diff --git a/llvm/test/CodeGen/X86/dynamic-regmask-preserve-all.ll b/llvm/test/CodeGen/X86/dynamic-regmask-preserve-all.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/dynamic-regmask-preserve-all.ll @@ -0,0 +1,107 @@ +; RUN: llc -mtriple=x86_64-apple-darwin -stop-after finalize-isel <%s | FileCheck %s + +; Check that the callee excludes the return register (%rax) from the list of +; callee-saved-registers. +define preserve_allcc i64 @callee1(i64 %a0, i64 %b0, i64 %c0, i64 %d0, i64 %e0) nounwind { + %a1 = mul i64 %a0, %b0 + %a2 = mul i64 %a1, %c0 + %a3 = mul i64 %a2, %d0 + %a4 = mul i64 %a3, %e0 + ret i64 %a4 +} +; CHECK: name: callee1 +; CHECK: calleeSavedRegisters: [ '$rbx', '$r12', '$r13', '$r14', '$r15', '$rbp', +; CHECK: '$rcx', '$rdx', '$rsi', '$rdi', '$r8', '$r9', '$r10', +; CHECK: '$xmm0', '$xmm1', '$xmm2', '$xmm3', '$xmm4', '$xmm5', +; CHECK: '$xmm6', '$xmm7', '$xmm8', '$xmm9', '$xmm10', '$xmm11', +; CHECK: '$xmm12', '$xmm13', '$xmm14', '$xmm15' ] +; CHECK: RET 0, $rax + +; Check that RegMask contains parameter registers (%rdi, %rsi, %rdx, %rcx, +; %r8), but doesn't contain the return register (%rax). +define i64 @caller1(i64 %a0) nounwind { + %b1 = call preserve_allcc i64 @callee1(i64 %a0, i64 %a0, i64 %a0, i64 %a0, i64 %a0) + %b2 = add i64 %b1, %a0 + ret i64 %b2 +} +; CHECK: name: caller1 +; CHECK: CALL64pcrel32 @callee1, CustomRegMask($bh,$bl,$bp,$bph,$bpl,$bx,$ch,$cl,$cx,$dh,$di,$dih,$dil,$dl,$dx,$ebp,$ebx,$ecx,$edi,$edx,$esi,$hbp,$hbx,$hcx,$hdi,$hdx,$hsi,$rbp,$rbx,$rcx,$rdi,$rdx,$rsi,$si,$sih,$sil,$r8,$r9,$r10,$r12,$r13,$r14,$r15,$xmm0,$xmm1,$xmm2,$xmm3,$xmm4,$xmm5,$xmm6,$xmm7,$xmm8,$xmm9,$xmm10,$xmm11,$xmm12,$xmm13,$xmm14,$xmm15,$r8b,$r9b,$r10b,$r12b,$r13b,$r14b,$r15b,$r8bh,$r9bh,$r10bh,$r12bh,$r13bh,$r14bh,$r15bh,$r8d,$r9d,$r10d,$r12d,$r13d,$r14d,$r15d,$r8w,$r9w,$r10w,$r12w,$r13w,$r14w,$r15w,$r8wh,$r9wh,$r10wh,$r12wh,$r13wh,$r14wh,$r15wh), implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit $rdx, implicit $rcx, implicit $r8, implicit-def $rsp, implicit-def $ssp, implicit-def $rax +; CHECK: RET 0, $rax + + +; Check that the callee excludes the return registers (%rax, %rdx) from the list +; of callee-saved-registers. +define preserve_allcc {i64, i64} @callee2(i64 %a0, i64 %b0, i64 %c0, i64 %d0, i64 %e0) nounwind { + %a1 = mul i64 %a0, %b0 + %a2 = mul i64 %a1, %c0 + %a3 = mul i64 %a2, %d0 + %a4 = mul i64 %a3, %e0 + %b4 = insertvalue {i64, i64} undef, i64 %a3, 0 + %b5 = insertvalue {i64, i64} %b4, i64 %a4, 1 + ret {i64, i64} %b5 +} +; CHECK: name: callee2 +; CHECK: calleeSavedRegisters: [ '$rbx', '$r12', '$r13', '$r14', '$r15', '$rbp', +; CHECK: '$rcx', '$rsi', '$rdi', '$r8', '$r9', '$r10', '$xmm0', +; CHECK: '$xmm1', '$xmm2', '$xmm3', '$xmm4', '$xmm5', '$xmm6', +; CHECK: '$xmm7', '$xmm8', '$xmm9', '$xmm10', '$xmm11', +; CHECK: '$xmm12', '$xmm13', '$xmm14', '$xmm15' ] +; CHECK: RET 0, $rax, $rdx + + +; Check that RegMask contains parameter registers (%rdi, %rsi, %rdx, %rcx, +; %r8), but doesn't contain the return registers (%rax, %rdx). +define {i64, i64} @caller2(i64 %a0) nounwind { + %b1 = call preserve_allcc {i64, i64} @callee2(i64 %a0, i64 %a0, i64 %a0, i64 %a0, i64 %a0) + ret {i64, i64} %b1 +} +; CHECK: name: caller2 +; CHECL: CALL64pcrel32 @callee2, CustomRegMask($bh,$bl,$bp,$bph,$bpl,$bx,$ch,$cl,$cx,$di,$dih,$dil,$ebp,$ebx,$ecx,$edi,$esi,$hbp,$hbx,$hcx,$hdi,$hsi,$rbp,$rbx,$rcx,$rdi,$rsi,$si,$sih,$sil,$r8,$r9,$r10,$r12,$r13,$r14,$r15,$xmm0,$xmm1,$xmm2,$xmm3,$xmm4,$xmm5,$xmm6,$xmm7,$xmm8,$xmm9,$xmm10,$xmm11,$xmm12,$xmm13,$xmm14,$xmm15,$r8b,$r9b,$r10b,$r12b,$r13b,$r14b,$r15b,$r8bh,$r9bh,$r10bh,$r12bh,$r13bh,$r14bh,$r15bh,$r8d,$r9d,$r10d,$r12d,$r13d,$r14d,$r15d,$r8w,$r9w,$r10w,$r12w,$r13w,$r14w,$r15w,$r8wh,$r9wh,$r10wh,$r12wh,$r13wh,$r14wh,$r15wh), implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit $rdx, implicit $rcx, implicit $r8, implicit-def $rsp, implicit-def $ssp, implicit-def $rax, implicit-def $rdx +; CHECK: RET 0, $rax, $rdx + + +%struct.Large = type { i64, double, double } + +; Declare the callee with a sret parameter. +declare preserve_allcc void @callee3(ptr noalias nocapture writeonly sret(%struct.Large) align 4 %a0, i64 %b0) nounwind; + +; Check that RegMask contains %rax and subregisters. +define void @caller3(i64 %a0) nounwind { + %a1 = alloca %struct.Large, align 8 + call preserve_allcc void @callee3(ptr nonnull sret(%struct.Large) align 8 %a1, i64 %a0) + ret void +} +; CHECK: name: caller3 +; CHECK: CALL64pcrel32 @callee3, CustomRegMask($ah,$al,$ax,$bh,$bl,$bp,$bph,$bpl,$bx,$ch,$cl,$cx,$dh,$di,$dih,$dil,$dl,$dx,$eax,$ebp,$ebx,$ecx,$edi,$edx,$esi,$hax,$hbp,$hbx,$hcx,$hdi,$hdx,$hsi,$rax,$rbp,$rbx,$rcx,$rdi,$rdx,$rsi,$si,$sih,$sil,$r8,$r9,$r10,$r12,$r13,$r14,$r15,$xmm0,$xmm1,$xmm2,$xmm3,$xmm4,$xmm5,$xmm6,$xmm7,$xmm8,$xmm9,$xmm10,$xmm11,$xmm12,$xmm13,$xmm14,$xmm15,$r8b,$r9b,$r10b,$r12b,$r13b,$r14b,$r15b,$r8bh,$r9bh,$r10bh,$r12bh,$r13bh,$r14bh,$r15bh,$r8d,$r9d,$r10d,$r12d,$r13d,$r14d,$r15d,$r8w,$r9w,$r10w,$r12w,$r13w,$r14w,$r15w,$r8wh,$r9wh,$r10wh,$r12wh,$r13wh,$r14wh,$r15wh), implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit-def $rsp, implicit-def $ssp +; CHECK: RET 0 + + +; Check that the callee excludes the return registers (%rax, %xmm0) from the list +; of callee-saved-registers. +define preserve_allcc {i64, double} @callee4(i64 %a0, i64 %b0, i64 %c0, i64 %d0, i64 %e0) nounwind { + %a1 = mul i64 %a0, %b0 + %a2 = mul i64 %a1, %c0 + %a3 = mul i64 %a2, %d0 + %a4 = mul i64 %a3, %e0 + %b4 = insertvalue {i64, double} undef, i64 %a3, 0 + %b5 = insertvalue {i64, double} %b4, double 1.2, 1 + ret {i64, double} %b5 +} +; CHECK: name: callee4 +; CHECK: calleeSavedRegisters: [ '$rbx', '$r12', '$r13', '$r14', '$r15', '$rbp', +; CHECK: '$rcx', '$rdx', '$rsi', '$rdi', '$r8', '$r9', '$r10', +; CHECK: '$xmm1', '$xmm2', '$xmm3', '$xmm4', '$xmm5', '$xmm6', +; CHECK: '$xmm7', '$xmm8', '$xmm9', '$xmm10', '$xmm11', +; CHECK: '$xmm12', '$xmm13', '$xmm14', '$xmm15' ] +; CHECK: RET 0, $rax, $xmm0 + +; Check that RegMask contains parameter registers (%rdi, %rsi, %rdx, %rcx, +; %r8), but doesn't contain the return registers (%rax, %xmm0). +define {i64, double} @caller4(i64 %a0) nounwind { + %b1 = call preserve_allcc {i64, double} @callee4(i64 %a0, i64 %a0, i64 %a0, i64 %a0, i64 %a0) + ret {i64, double} %b1 +} +; CHECK: name: caller4 +; CHECK: CALL64pcrel32 @callee4, CustomRegMask($bh,$bl,$bp,$bph,$bpl,$bx,$ch,$cl,$cx,$dh,$di,$dih,$dil,$dl,$dx,$ebp,$ebx,$ecx,$edi,$edx,$esi,$hbp,$hbx,$hcx,$hdi,$hdx,$hsi,$rbp,$rbx,$rcx,$rdi,$rdx,$rsi,$si,$sih,$sil,$r8,$r9,$r10,$r12,$r13,$r14,$r15,$xmm1,$xmm2,$xmm3,$xmm4,$xmm5,$xmm6,$xmm7,$xmm8,$xmm9,$xmm10,$xmm11,$xmm12,$xmm13,$xmm14,$xmm15,$r8b,$r9b,$r10b,$r12b,$r13b,$r14b,$r15b,$r8bh,$r9bh,$r10bh,$r12bh,$r13bh,$r14bh,$r15bh,$r8d,$r9d,$r10d,$r12d,$r13d,$r14d,$r15d,$r8w,$r9w,$r10w,$r12w,$r13w,$r14w,$r15w,$r8wh,$r9wh,$r10wh,$r12wh,$r13wh,$r14wh,$r15wh), implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit $rdx, implicit $rcx, implicit $r8, implicit-def $rsp, implicit-def $ssp, implicit-def $rax, implicit-def $xmm0 + +; CHECK: RET 0, $rax, $xmm0 diff --git a/llvm/test/CodeGen/X86/dynamic-regmask-preserve-most.ll b/llvm/test/CodeGen/X86/dynamic-regmask-preserve-most.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/dynamic-regmask-preserve-most.ll @@ -0,0 +1,70 @@ +; RUN: llc -mtriple=x86_64-apple-darwin -stop-after finalize-isel <%s | FileCheck %s + +; Check that the callee excludes the return register (%rax) from the list of +; callee-saved-registers. +define preserve_mostcc i64 @callee1(i64 %a0, i64 %b0, i64 %c0, i64 %d0, i64 %e0) nounwind { + %a1 = mul i64 %a0, %b0 + %a2 = mul i64 %a1, %c0 + %a3 = mul i64 %a2, %d0 + %a4 = mul i64 %a3, %e0 + ret i64 %a4 +} +; CHECK: name: callee1 +; CHECK: calleeSavedRegisters: [ '$rbx', '$r12', '$r13', '$r14', '$r15', '$rbp', +; CHECK: '$rcx', '$rdx', '$rsi', '$rdi', '$r8', '$r9', '$r10' ] +; CHECK: RET 0, $rax + +; Check that RegMask contains parameter registers (%rdi, %rsi, %rdx, %rcx, +; %r8), but doesn't contain the return register (%rax). +define i64 @caller1(i64 %a0) nounwind { + %b1 = call preserve_mostcc i64 @callee1(i64 %a0, i64 %a0, i64 %a0, i64 %a0, i64 %a0) + %b2 = add i64 %b1, %a0 + ret i64 %b2 +} +; CHECK: name: caller1 +; CHECK: CALL64pcrel32 @callee1, CustomRegMask($bh,$bl,$bp,$bph,$bpl,$bx,$ch,$cl,$cx,$dh,$di,$dih,$dil,$dl,$dx,$ebp,$ebx,$ecx,$edi,$edx,$esi,$hbp,$hbx,$hcx,$hdi,$hdx,$hsi,$rbp,$rbx,$rcx,$rdi,$rdx,$rsi,$si,$sih,$sil,$r8,$r9,$r10,$r12,$r13,$r14,$r15,$r8b,$r9b,$r10b,$r12b,$r13b,$r14b,$r15b,$r8bh,$r9bh,$r10bh,$r12bh,$r13bh,$r14bh,$r15bh,$r8d,$r9d,$r10d,$r12d,$r13d,$r14d,$r15d,$r8w,$r9w,$r10w,$r12w,$r13w,$r14w,$r15w,$r8wh,$r9wh,$r10wh,$r12wh,$r13wh,$r14wh,$r15wh), implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit $rdx, implicit $rcx, implicit $r8, implicit-def $rsp, implicit-def $ssp, implicit-def $rax +; CHECK: RET 0, $rax + + +; Check that the callee excludes the return registers (%rax, %rdx) from the list +; of callee-saved-registers. +define preserve_mostcc {i64, i64} @callee2(i64 %a0, i64 %b0, i64 %c0, i64 %d0, i64 %e0) nounwind { + %a1 = mul i64 %a0, %b0 + %a2 = mul i64 %a1, %c0 + %a3 = mul i64 %a2, %d0 + %a4 = mul i64 %a3, %e0 + %b4 = insertvalue {i64, i64} undef, i64 %a3, 0 + %b5 = insertvalue {i64, i64} %b4, i64 %a4, 1 + ret {i64, i64} %b5 +} +; CHECK: name: callee2 +; CHECK: calleeSavedRegisters: [ '$rbx', '$r12', '$r13', '$r14', '$r15', '$rbp', +; CHECK: '$rcx', '$rsi', '$rdi', '$r8', '$r9', '$r10' ] +; CHECK: RET 0, $rax, $rdx + + +; Check that RegMask contains parameter registers (%rdi, %rsi, %rdx, %rcx, +; %r8), but doesn't contain the return registers (%rax, %rdx). +define {i64, i64} @caller2(i64 %a0) nounwind { + %b1 = call preserve_mostcc {i64, i64} @callee2(i64 %a0, i64 %a0, i64 %a0, i64 %a0, i64 %a0) + ret {i64, i64} %b1 +} +; CHECK: name: caller2 +; CHECK: CALL64pcrel32 @callee2, CustomRegMask($bh,$bl,$bp,$bph,$bpl,$bx,$ch,$cl,$cx,$di,$dih,$dil,$ebp,$ebx,$ecx,$edi,$esi,$hbp,$hbx,$hcx,$hdi,$hsi,$rbp,$rbx,$rcx,$rdi,$rsi,$si,$sih,$sil,$r8,$r9,$r10,$r12,$r13,$r14,$r15,$r8b,$r9b,$r10b,$r12b,$r13b,$r14b,$r15b,$r8bh,$r9bh,$r10bh,$r12bh,$r13bh,$r14bh,$r15bh,$r8d,$r9d,$r10d,$r12d,$r13d,$r14d,$r15d,$r8w,$r9w,$r10w,$r12w,$r13w,$r14w,$r15w,$r8wh,$r9wh,$r10wh,$r12wh,$r13wh,$r14wh,$r15wh), implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit $rdx, implicit $rcx, implicit $r8, implicit-def $rsp, implicit-def $ssp, implicit-def $rax, implicit-def $rdx +; CHECK: RET 0, $rax, $rdx + + +%struct.Large = type { i64, double, double } + +; Declare the callee with a sret parameter. +declare preserve_mostcc void @callee3(ptr noalias nocapture writeonly sret(%struct.Large) align 4 %a0, i64 %b0) nounwind; + +; Check that RegMask contains %rax and subregisters. +define void @caller3(i64 %a0) nounwind { + %a1 = alloca %struct.Large, align 8 + call preserve_mostcc void @callee3(ptr nonnull sret(%struct.Large) align 8 %a1, i64 %a0) + ret void +} +; CHECK: name: caller3 +; CHECK: CALL64pcrel32 @callee3, CustomRegMask($ah,$al,$ax,$bh,$bl,$bp,$bph,$bpl,$bx,$ch,$cl,$cx,$dh,$di,$dih,$dil,$dl,$dx,$eax,$ebp,$ebx,$ecx,$edi,$edx,$esi,$hax,$hbp,$hbx,$hcx,$hdi,$hdx,$hsi,$rax,$rbp,$rbx,$rcx,$rdi,$rdx,$rsi,$si,$sih,$sil,$r8,$r9,$r10,$r12,$r13,$r14,$r15,$r8b,$r9b,$r10b,$r12b,$r13b,$r14b,$r15b,$r8bh,$r9bh,$r10bh,$r12bh,$r13bh,$r14bh,$r15bh,$r8d,$r9d,$r10d,$r12d,$r13d,$r14d,$r15d,$r8w,$r9w,$r10w,$r12w,$r13w,$r14w,$r15w,$r8wh,$r9wh,$r10wh,$r12wh,$r13wh,$r14wh,$r15wh), implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit-def $rsp, implicit-def $ssp +; CHECK: RET 0 diff --git a/llvm/test/CodeGen/X86/preserve_allcc64-ret-double.ll b/llvm/test/CodeGen/X86/preserve_allcc64-ret-double.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/preserve_allcc64-ret-double.ll @@ -0,0 +1,54 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck --check-prefixes=ALL,SSE %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck --check-prefixes=ALL,AVX %s + +define preserve_allcc double @preserve_allcc1() nounwind { +entry: +;ALL-LABEL: preserve_allcc1 +;SSE: movaps %xmm1 +;SSE-NOT: movaps %xmm0 +;AVX: vmovups %ymm1 +;AVX-NOT: vmovups %ymm0 +;SSE-NOT: movaps {{.*}} %xmm0 +;SSE: movaps {{.*}} %xmm1 +;AVX-NOT: vmovups {{.*}} %ymm0 +;AVX: vmovups {{.*}} %ymm1 + call void asm sideeffect "", "~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{rbp},~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15}"() + ret double 0. +} + +; Make sure XMM0 (return register) and R11 are saved before the call +declare preserve_allcc double @bar_double(i64, i64) +define void @preserve_allcc2() nounwind { +entry: +;SSE-LABEL: preserve_allcc2 +;SSE: movq %r11, [[REG1:%[a-z0-9]+]] +;SSE: movaps %xmm0, [[REG2:[-0-9]*\(%r[sb]p\)]] +;SSE: movq [[REG1]], %r11 +;SSE: movaps [[REG2]], %xmm0 + %a0 = call i64 asm sideeffect "", "={rax}"() nounwind + %a1 = call i64 asm sideeffect "", "={rcx}"() nounwind + %a2 = call i64 asm sideeffect "", "={rdx}"() nounwind + %a3 = call i64 asm sideeffect "", "={r8}"() nounwind + %a4 = call i64 asm sideeffect "", "={r9}"() nounwind + %a5 = call i64 asm sideeffect "", "={r10}"() nounwind + %a6 = call i64 asm sideeffect "", "={r11}"() nounwind + %a10 = call <2 x double> asm sideeffect "", "={xmm0}"() nounwind + %a11 = call <2 x double> asm sideeffect "", "={xmm1}"() nounwind + %a12 = call <2 x double> asm sideeffect "", "={xmm2}"() nounwind + %a13 = call <2 x double> asm sideeffect "", "={xmm3}"() nounwind + %a14 = call <2 x double> asm sideeffect "", "={xmm4}"() nounwind + %a15 = call <2 x double> asm sideeffect "", "={xmm5}"() nounwind + %a16 = call <2 x double> asm sideeffect "", "={xmm6}"() nounwind + %a17 = call <2 x double> asm sideeffect "", "={xmm7}"() nounwind + %a18 = call <2 x double> asm sideeffect "", "={xmm8}"() nounwind + %a19 = call <2 x double> asm sideeffect "", "={xmm9}"() nounwind + %a20 = call <2 x double> asm sideeffect "", "={xmm10}"() nounwind + %a21 = call <2 x double> asm sideeffect "", "={xmm11}"() nounwind + %a22 = call <2 x double> asm sideeffect "", "={xmm12}"() nounwind + %a23 = call <2 x double> asm sideeffect "", "={xmm13}"() nounwind + %a24 = call <2 x double> asm sideeffect "", "={xmm14}"() nounwind + %a25 = call <2 x double> asm sideeffect "", "={xmm15}"() nounwind + call preserve_allcc double @bar_double(i64 1, i64 2) + call void asm sideeffect "", "{rax},{rcx},{rdx},{r8},{r9},{r10},{r11},{xmm0},{xmm1},{xmm2},{xmm3},{xmm4},{xmm5},{xmm6},{xmm7},{xmm8},{xmm9},{xmm10},{xmm11},{xmm12},{xmm13},{xmm14},{xmm15}"(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, <2 x double> %a10, <2 x double> %a11, <2 x double> %a12, <2 x double> %a13, <2 x double> %a14, <2 x double> %a15, <2 x double> %a16, <2 x double> %a17, <2 x double> %a18, <2 x double> %a19, <2 x double> %a20, <2 x double> %a21, <2 x double> %a22, <2 x double> %a23, <2 x double> %a24, <2 x double> %a25) + ret void +} diff --git a/llvm/test/CodeGen/X86/preserve_allcc64.ll b/llvm/test/CodeGen/X86/preserve_allcc64.ll --- a/llvm/test/CodeGen/X86/preserve_allcc64.ll +++ b/llvm/test/CodeGen/X86/preserve_allcc64.ll @@ -1,82 +1,138 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck --check-prefix=SSE %s -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck --check-prefix=AVX %s +; RUN: sed -e "s/RETTYPE/void/;s/RETVAL//" %s | llc -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck --check-prefixes=ALL,SSE,VOID %s +; RUN: sed -e "s/RETTYPE/i32/;s/RETVAL/undef/" %s | llc -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck --check-prefixes=ALL,SSE,INT %s +; RUN: sed -e "s/RETTYPE/\{i64\,i64\}/;s/RETVAL/undef/" %s | llc -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck --check-prefixes=ALL,SSE,INT128 %s +; +; RUN: sed -e "s/RETTYPE/void/;s/RETVAL//" %s | llc -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck --check-prefixes=ALL,AVX,VOID %s +; RUN: sed -e "s/RETTYPE/i32/;s/RETVAL/undef/" %s | llc -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck --check-prefixes=ALL,AVX,INT %s +; RUN: sed -e "s/RETTYPE/\{i64\,i64\}/;s/RETVAL/undef/" %s | llc -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck --check-prefixes=ALL,AVX,INT128 %s -define preserve_allcc void @preserve_allcc1() nounwind { +define preserve_allcc RETTYPE @preserve_allcc1(i64, i64, double, double) nounwind { entry: -;SSE-LABEL: preserve_allcc1 -;SSE: pushq %r10 -;SSE-NEXT: pushq %r9 -;SSE-NEXT: pushq %r8 -;SSE-NEXT: pushq %rdi -;SSE-NEXT: pushq %rsi -;SSE-NEXT: pushq %rdx -;SSE-NEXT: pushq %rcx -;SSE-NEXT: pushq %rax -;SSE-NEXT: pushq %rbp -;SSE-NEXT: pushq %r15 -;SSE-NEXT: pushq %r14 -;SSE-NEXT: pushq %r13 -;SSE-NEXT: pushq %r12 -;SSE-NEXT: pushq %rbx -;SSE: movaps %xmm15 -;SSE-NEXT: movaps %xmm14 -;SSE-NEXT: movaps %xmm13 -;SSE-NEXT: movaps %xmm12 -;SSE-NEXT: movaps %xmm11 -;SSE-NEXT: movaps %xmm10 -;SSE-NEXT: movaps %xmm9 -;SSE-NEXT: movaps %xmm8 -;SSE-NEXT: movaps %xmm7 -;SSE-NEXT: movaps %xmm6 -;SSE-NEXT: movaps %xmm5 -;SSE-NEXT: movaps %xmm4 -;SSE-NEXT: movaps %xmm3 -;SSE-NEXT: movaps %xmm2 -;SSE-NEXT: movaps %xmm1 -;SSE-NEXT: movaps %xmm0 -;AVX-LABEL: preserve_allcc1 -;AVX: pushq %r10 -;AVX-NEXT: pushq %r9 -;AVX-NEXT: pushq %r8 -;AVX-NEXT: pushq %rdi -;AVX-NEXT: pushq %rsi -;AVX-NEXT: pushq %rdx -;AVX-NEXT: pushq %rcx -;AVX-NEXT: pushq %rax -;AVX-NEXT: pushq %rbp -;AVX-NEXT: pushq %r15 -;AVX-NEXT: pushq %r14 -;AVX-NEXT: pushq %r13 -;AVX-NEXT: pushq %r12 -;AVX-NEXT: pushq %rbx -;AVX: vmovups %ymm15 -;AVX-NEXT: vmovups %ymm14 -;AVX-NEXT: vmovups %ymm13 -;AVX-NEXT: vmovups %ymm12 -;AVX-NEXT: vmovups %ymm11 -;AVX-NEXT: vmovups %ymm10 -;AVX-NEXT: vmovups %ymm9 -;AVX-NEXT: vmovups %ymm8 -;AVX-NEXT: vmovups %ymm7 -;AVX-NEXT: vmovups %ymm6 -;AVX-NEXT: vmovups %ymm5 -;AVX-NEXT: vmovups %ymm4 -;AVX-NEXT: vmovups %ymm3 -;AVX-NEXT: vmovups %ymm2 -;AVX-NEXT: vmovups %ymm1 -;AVX-NEXT: vmovups %ymm0 +;ALL-LABEL: preserve_allcc1 +;ALL: pushq %r10 +;ALL-NEXT: pushq %r9 +;ALL-NEXT: pushq %r8 +;ALL-NEXT: pushq %rdi +;ALL-NEXT: pushq %rsi +;VOID-NEXT: pushq %rdx +;INT-NEXT: pushq %rdx +;INT128-NOT: pushq %rdx +;ALL-NEXT: pushq %rcx +;VOID-NEXT: pushq %rax +;INT-NOT: pushq %rax +;INT128-NOT: pushq %rax +;ALL-NEXT: pushq %rbp +;ALL-NEXT: pushq %r15 +;ALL-NEXT: pushq %r14 +;ALL-NEXT: pushq %r13 +;ALL-NEXT: pushq %r12 +;ALL-NEXT: pushq %rbx +;SSE: movaps %xmm15 +;SSE-NEXT: movaps %xmm14 +;SSE-NEXT: movaps %xmm13 +;SSE-NEXT: movaps %xmm12 +;SSE-NEXT: movaps %xmm11 +;SSE-NEXT: movaps %xmm10 +;SSE-NEXT: movaps %xmm9 +;SSE-NEXT: movaps %xmm8 +;SSE-NEXT: movaps %xmm7 +;SSE-NEXT: movaps %xmm6 +;SSE-NEXT: movaps %xmm5 +;SSE-NEXT: movaps %xmm4 +;SSE-NEXT: movaps %xmm3 +;SSE-NEXT: movaps %xmm2 +;SSE-NEXT: movaps %xmm1 +;SSE-NEXT: movaps %xmm0 +;AVX: vmovups %ymm15 +;AVX-NEXT: vmovups %ymm14 +;AVX-NEXT: vmovups %ymm13 +;AVX-NEXT: vmovups %ymm12 +;AVX-NEXT: vmovups %ymm11 +;AVX-NEXT: vmovups %ymm10 +;AVX-NEXT: vmovups %ymm9 +;AVX-NEXT: vmovups %ymm8 +;AVX-NEXT: vmovups %ymm7 +;AVX-NEXT: vmovups %ymm6 +;AVX-NEXT: vmovups %ymm5 +;AVX-NEXT: vmovups %ymm4 +;AVX-NEXT: vmovups %ymm3 +;AVX-NEXT: vmovups %ymm2 +;AVX-NEXT: vmovups %ymm1 +;AVX-NEXT: vmovups %ymm0 +;SSE: movaps {{.*}} %xmm0 +;SSE-NEXT: movaps {{.*}} %xmm1 +;SSE-NEXT: movaps {{.*}} %xmm2 +;SSE-NEXT: movaps {{.*}} %xmm3 +;SSE-NEXT: movaps {{.*}} %xmm4 +;SSE-NEXT: movaps {{.*}} %xmm5 +;SSE-NEXT: movaps {{.*}} %xmm6 +;SSE-NEXT: movaps {{.*}} %xmm7 +;SSE-NEXT: movaps {{.*}} %xmm8 +;SSE-NEXT: movaps {{.*}} %xmm9 +;SSE-NEXT: movaps {{.*}} %xmm10 +;SSE-NEXT: movaps {{.*}} %xmm11 +;SSE-NEXT: movaps {{.*}} %xmm12 +;SSE-NEXT: movaps {{.*}} %xmm13 +;SSE-NEXT: movaps {{.*}} %xmm14 +;SSE-NEXT: movaps {{.*}} %xmm15 +;AVX: vmovups {{.*}} %ymm0 +;AVX-NEXT: vmovups {{.*}} %ymm1 +;AVX-NEXT: vmovups {{.*}} %ymm2 +;AVX-NEXT: vmovups {{.*}} %ymm3 +;AVX-NEXT: vmovups {{.*}} %ymm4 +;AVX-NEXT: vmovups {{.*}} %ymm5 +;AVX-NEXT: vmovups {{.*}} %ymm6 +;AVX-NEXT: vmovups {{.*}} %ymm7 +;AVX-NEXT: vmovups {{.*}} %ymm8 +;AVX-NEXT: vmovups {{.*}} %ymm9 +;AVX-NEXT: vmovups {{.*}} %ymm10 +;AVX-NEXT: vmovups {{.*}} %ymm11 +;AVX-NEXT: vmovups {{.*}} %ymm12 +;AVX-NEXT: vmovups {{.*}} %ymm13 +;AVX-NEXT: vmovups {{.*}} %ymm14 +;AVX-NEXT: vmovups {{.*}} %ymm15 +;ALL: popq %rbx +;ALL-NEXT: popq %r12 +;ALL-NEXT: popq %r13 +;ALL-NEXT: popq %r14 +;ALL-NEXT: popq %r15 +;ALL-NEXT: popq %rbp +;VOID-NEXT: popq %rax +;INT-NOT: popq %rax +;INT128-NOT: popq %rax +;ALL-NEXT: popq %rcx +;VOID-NEXT: popq %rdx +;INT-NEXT: popq %rdx +;INT128-NOT: popq %rdx +;ALL-NEXT: popq %rsi +;ALL-NEXT: popq %rdi +;ALL-NEXT: popq %r8 +;ALL-NEXT: popq %r9 +;ALL-NEXT: popq %r10 call void asm sideeffect "", "~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{rbp},~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15}"() - ret void + ret RETTYPE RETVAL } -; Make sure only R11 is saved before the call -declare preserve_allcc void @bar(i64, i64, double, double) +; Make sure R11 and return registers are saved before the call +declare preserve_allcc RETTYPE @bar(i64, i64, double, double) define void @preserve_allcc2() nounwind { entry: -;SSE-LABEL: preserve_allcc2 -;SSE: movq %r11, [[REG:%[a-z0-9]+]] -;SSE-NOT: movaps %xmm -;SSE: movq [[REG]], %r11 +;ALL-LABEL: preserve_allcc2 +;VOID-NOT: movq %rax, [[REG1:%[a-z0-9]+]] +;INT: movq %rax, [[REG1:%[a-z0-9]+]] +;INT128: movq %rax, [[REG1:%[a-z0-9]+]] +;VOID-NOT: movq %rdx, [[REG2:%[a-z0-9]+]] +;INT-NOT: movq %rdx, [[REG2:%[a-z0-9]+]] +;INT128: movq %rdx, [[REG2:%[a-z0-9]+]] +;ALL: movq %r11, [[REG3:%[a-z0-9]+]] +;ALL-NOT: movaps %xmm +;VOID-NOT: movq {{.*}}, %rax +;INT: movq [[REG1]], %rax +;INT128: movq [[REG1]], %rax +;VOID-NOT: movq {{.*}}, %rdx +;INT-NOT: movq {{.*}}, %rdx +;INT128: movq [[REG2]], %rdx +;ALL: movq [[REG3]], %r11 %a0 = call i64 asm sideeffect "", "={rax}"() nounwind %a1 = call i64 asm sideeffect "", "={rcx}"() nounwind %a2 = call i64 asm sideeffect "", "={rdx}"() nounwind @@ -98,7 +154,7 @@ %a21 = call <2 x double> asm sideeffect "", "={xmm13}"() nounwind %a22 = call <2 x double> asm sideeffect "", "={xmm14}"() nounwind %a23 = call <2 x double> asm sideeffect "", "={xmm15}"() nounwind - call preserve_allcc void @bar(i64 1, i64 2, double 3.0, double 4.0) + call preserve_allcc RETTYPE @bar(i64 1, i64 2, double 3.0, double 4.0) call void asm sideeffect "", "{rax},{rcx},{rdx},{r8},{r9},{r10},{r11},{xmm2},{xmm3},{xmm4},{xmm5},{xmm6},{xmm7},{xmm8},{xmm9},{xmm10},{xmm11},{xmm12},{xmm13},{xmm14},{xmm15}"(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, <2 x double> %a10, <2 x double> %a11, <2 x double> %a12, <2 x double> %a13, <2 x double> %a14, <2 x double> %a15, <2 x double> %a16, <2 x double> %a17, <2 x double> %a18, <2 x double> %a19, <2 x double> %a20, <2 x double> %a21, <2 x double> %a22, <2 x double> %a23) ret void } diff --git a/llvm/test/CodeGen/X86/preserve_mostcc64-ret-double.ll b/llvm/test/CodeGen/X86/preserve_mostcc64-ret-double.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/preserve_mostcc64-ret-double.ll @@ -0,0 +1,39 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck %s + +; Make sure XMM0 (return register) and R11 are saved before the call +declare preserve_mostcc double @foo_double(i64, i64) +define void @preserve_mostcc1() nounwind { +entry: +;CHECK-LABEL: preserve_mostcc1 +;CHECK: movq %r11, [[REG1:%[a-z0-9]+]] +;CHECK: movaps %xmm0, [[REG2:[-0-9]*\(%r[sb]p\)]] +;CHECK: call +;CHECK: movq [[REG1]], %r11 +;CHECK: movaps [[REG2]], %xmm0 + %a0 = call i64 asm sideeffect "", "={rax}"() nounwind + %a1 = call i64 asm sideeffect "", "={rcx}"() nounwind + %a2 = call i64 asm sideeffect "", "={rdx}"() nounwind + %a3 = call i64 asm sideeffect "", "={r8}"() nounwind + %a4 = call i64 asm sideeffect "", "={r9}"() nounwind + %a5 = call i64 asm sideeffect "", "={r10}"() nounwind + %a6 = call i64 asm sideeffect "", "={r11}"() nounwind + %a10 = call <2 x double> asm sideeffect "", "={xmm0}"() nounwind + %a11 = call <2 x double> asm sideeffect "", "={xmm1}"() nounwind + %a12 = call <2 x double> asm sideeffect "", "={xmm2}"() nounwind + %a13 = call <2 x double> asm sideeffect "", "={xmm3}"() nounwind + %a14 = call <2 x double> asm sideeffect "", "={xmm4}"() nounwind + %a15 = call <2 x double> asm sideeffect "", "={xmm5}"() nounwind + %a16 = call <2 x double> asm sideeffect "", "={xmm6}"() nounwind + %a17 = call <2 x double> asm sideeffect "", "={xmm7}"() nounwind + %a18 = call <2 x double> asm sideeffect "", "={xmm8}"() nounwind + %a19 = call <2 x double> asm sideeffect "", "={xmm9}"() nounwind + %a20 = call <2 x double> asm sideeffect "", "={xmm10}"() nounwind + %a21 = call <2 x double> asm sideeffect "", "={xmm11}"() nounwind + %a22 = call <2 x double> asm sideeffect "", "={xmm12}"() nounwind + %a23 = call <2 x double> asm sideeffect "", "={xmm13}"() nounwind + %a24 = call <2 x double> asm sideeffect "", "={xmm14}"() nounwind + %a25 = call <2 x double> asm sideeffect "", "={xmm15}"() nounwind + call preserve_mostcc double @foo_double(i64 1, i64 2) + call void asm sideeffect "", "{rax},{rcx},{rdx},{r8},{r9},{r10},{r11},{xmm0},{xmm1},{xmm2},{xmm3},{xmm4},{xmm5},{xmm6},{xmm7},{xmm8},{xmm9},{xmm10},{xmm11},{xmm12},{xmm13},{xmm14},{xmm15}"(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, <2 x double> %a10, <2 x double> %a11, <2 x double> %a12, <2 x double> %a13, <2 x double> %a14, <2 x double> %a15, <2 x double> %a16, <2 x double> %a17, <2 x double> %a18, <2 x double> %a19, <2 x double> %a20, <2 x double> %a21, <2 x double> %a22, <2 x double> %a23, <2 x double> %a24, <2 x double> %a25) + ret void +} diff --git a/llvm/test/CodeGen/X86/preserve_mostcc64-sret.ll b/llvm/test/CodeGen/X86/preserve_mostcc64-sret.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/preserve_mostcc64-sret.ll @@ -0,0 +1,102 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s + +%struct.Large = type { [64 x i32] } + +; Check that %rax is also preserved for a function with sret parameter, +define preserve_mostcc void @sret_foo(ptr noalias nocapture writeonly sret(%struct.Large) align 4, i32 noundef) nounwind { +entry: +;CHECK: pushq %r10 +;CHECK-NEXT: pushq %r9 +;CHECK-NEXT: pushq %r8 +;CHECK-NEXT: pushq %rdi +;CHECK-NEXT: pushq %rsi +;CHECK-NEXT: pushq %rdx +;CHECK-NEXT: pushq %rcx +;CHECK-NEXT: pushq %rax +;CHECK-NEXT: pushq %rbp +;CHECK-NEXT: pushq %r15 +;CHECK-NEXT: pushq %r14 +;CHECK-NEXT: pushq %r13 +;CHECK-NEXT: pushq %r12 +;CHECK-NEXT: pushq %rbx +;CHECK: popq %rbx +;CHECK-NEXT: popq %r12 +;CHECK-NEXT: popq %r13 +;CHECK-NEXT: popq %r14 +;CHECK-NEXT: popq %r15 +;CHECK-NEXT: popq %rbp +;CHECK-NEXT: popq %rax +;CHECK-NEXT: popq %rcx +;CHECK-NEXT: popq %rdx +;CHECK-NEXT: popq %rsi +;CHECK-NEXT: popq %rdi +;CHECK-NEXT: popq %r8 +;CHECK-NEXT: popq %r9 +;CHECK-NEXT: popq %r10 + call void asm sideeffect "", "~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{rbp},~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15}"() + ret void +} + +; Check that neither %rax no %rdi are caller-saved. +define void @foo(ptr noalias nocapture writeonly sret(%struct.Large) align 4 %0, i32 noundef %1) nounwind { +entry: +;CHECK-NOT: movq %rax, [[REG1:%[a-z0-9]+]] +;CHECK-NOT: movq %rdi, [[REG2:%[a-z0-9]+]] +;CHECK: movq %r11, [[REG3:%[a-z0-9]+]] +;CHECK: movaps %xmm2 +;CHECK: movaps %xmm3 +;CHECK: movaps %xmm4 +;CHECK: movaps %xmm5 +;CHECK: movaps %xmm6 +;CHECK: movaps %xmm7 +;CHECK: movaps %xmm8 +;CHECK: movaps %xmm9 +;CHECK: movaps %xmm10 +;CHECK: movaps %xmm11 +;CHECK: movaps %xmm12 +;CHECK: movaps %xmm13 +;CHECK: movaps %xmm14 +;CHECK: movaps %xmm15 +;CHECK: call +;CHECK-NOT: movq {{.*}}, %rax +;CHECK-NOT: movq {{.*}}, %rdi +;CHECK: movq [[REG3]], %r11 +;CHECK: movaps {{.*}} %xmm2 +;CHECK: movaps {{.*}} %xmm3 +;CHECK: movaps {{.*}} %xmm4 +;CHECK: movaps {{.*}} %xmm5 +;CHECK: movaps {{.*}} %xmm6 +;CHECK: movaps {{.*}} %xmm7 +;CHECK: movaps {{.*}} %xmm8 +;CHECK: movaps {{.*}} %xmm9 +;CHECK: movaps {{.*}} %xmm10 +;CHECK: movaps {{.*}} %xmm11 +;CHECK: movaps {{.*}} %xmm12 +;CHECK: movaps {{.*}} %xmm13 +;CHECK: movaps {{.*}} %xmm14 +;CHECK: movaps {{.*}} %xmm15 + %a0 = call i64 asm sideeffect "", "={rax}"() nounwind + %a1 = call i64 asm sideeffect "", "={rcx}"() nounwind + %a2 = call i64 asm sideeffect "", "={rdx}"() nounwind + %a3 = call i64 asm sideeffect "", "={r8}"() nounwind + %a4 = call i64 asm sideeffect "", "={r9}"() nounwind + %a5 = call i64 asm sideeffect "", "={r10}"() nounwind + %a6 = call i64 asm sideeffect "", "={r11}"() nounwind + %a10 = call <2 x double> asm sideeffect "", "={xmm2}"() nounwind + %a11 = call <2 x double> asm sideeffect "", "={xmm3}"() nounwind + %a12 = call <2 x double> asm sideeffect "", "={xmm4}"() nounwind + %a13 = call <2 x double> asm sideeffect "", "={xmm5}"() nounwind + %a14 = call <2 x double> asm sideeffect "", "={xmm6}"() nounwind + %a15 = call <2 x double> asm sideeffect "", "={xmm7}"() nounwind + %a16 = call <2 x double> asm sideeffect "", "={xmm8}"() nounwind + %a17 = call <2 x double> asm sideeffect "", "={xmm9}"() nounwind + %a18 = call <2 x double> asm sideeffect "", "={xmm10}"() nounwind + %a19 = call <2 x double> asm sideeffect "", "={xmm11}"() nounwind + %a20 = call <2 x double> asm sideeffect "", "={xmm12}"() nounwind + %a21 = call <2 x double> asm sideeffect "", "={xmm13}"() nounwind + %a22 = call <2 x double> asm sideeffect "", "={xmm14}"() nounwind + %a23 = call <2 x double> asm sideeffect "", "={xmm15}"() nounwind + call preserve_mostcc void @sret_foo(ptr sret(%struct.Large) align 4 %0, i32 noundef %1) + call void asm sideeffect "", "{rax},{rcx},{rdx},{r8},{r9},{r10},{r11},{xmm2},{xmm3},{xmm4},{xmm5},{xmm6},{xmm7},{xmm8},{xmm9},{xmm10},{xmm11},{xmm12},{xmm13},{xmm14},{xmm15}"(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, <2 x double> %a10, <2 x double> %a11, <2 x double> %a12, <2 x double> %a13, <2 x double> %a14, <2 x double> %a15, <2 x double> %a16, <2 x double> %a17, <2 x double> %a18, <2 x double> %a19, <2 x double> %a20, <2 x double> %a21, <2 x double> %a22, <2 x double> %a23) + ret void +} diff --git a/llvm/test/CodeGen/X86/preserve_mostcc64.ll b/llvm/test/CodeGen/X86/preserve_mostcc64.ll --- a/llvm/test/CodeGen/X86/preserve_mostcc64.ll +++ b/llvm/test/CodeGen/X86/preserve_mostcc64.ll @@ -1,64 +1,99 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck --check-prefix=SSE %s -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck --check-prefix=AVX %s +; RUN: sed -e "s/RETTYPE/void/;s/RETVAL//" %s | llc -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck --check-prefixes=ALL,VOID %s +; RUN: sed -e "s/RETTYPE/i32/;s/RETVAL/undef/" %s | llc -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck --check-prefixes=ALL,INT %s +; RUN: sed -e "s/RETTYPE/\{i64\,i64\}/;s/RETVAL/undef/" %s | llc -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck --check-prefixes=ALL,INT128 %s -; Every GPR should be saved - except r11 -define preserve_mostcc void @preserve_mostcc1() nounwind { +; Every GPR should be saved - except r11 and return registers +define preserve_mostcc RETTYPE @preserve_mostcc1(i64, i64, double, double) nounwind { entry: -;SSE-LABEL: preserve_mostcc1 -;SSE: pushq %r10 -;SSE-NEXT: pushq %r9 -;SSE-NEXT: pushq %r8 -;SSE-NEXT: pushq %rdi -;SSE-NEXT: pushq %rsi -;SSE-NEXT: pushq %rdx -;SSE-NEXT: pushq %rcx -;SSE-NEXT: pushq %rax -;SSE-NEXT: pushq %rbp -;SSE-NEXT: pushq %r15 -;SSE-NEXT: pushq %r14 -;SSE-NEXT: pushq %r13 -;SSE-NEXT: pushq %r12 -;SSE-NEXT: pushq %rbx -;AVX-LABEL: preserve_mostcc1 -;AVX: pushq %r10 -;AVX-NEXT: pushq %r9 -;AVX-NEXT: pushq %r8 -;AVX-NEXT: pushq %rdi -;AVX-NEXT: pushq %rsi -;AVX-NEXT: pushq %rdx -;AVX-NEXT: pushq %rcx -;AVX-NEXT: pushq %rax -;AVX-NEXT: pushq %rbp -;AVX-NEXT: pushq %r15 -;AVX-NEXT: pushq %r14 -;AVX-NEXT: pushq %r13 -;AVX-NEXT: pushq %r12 -;AVX-NEXT: pushq %rbx +;ALL-LABEL: preserve_mostcc1 +;ALL: pushq %r10 +;ALL-NEXT: pushq %r9 +;ALL-NEXT: pushq %r8 +;ALL-NEXT: pushq %rdi +;ALL-NEXT: pushq %rsi +;VOID-NEXT: pushq %rdx +;INT-NEXT: pushq %rdx +;INT128-NOT: pushq %rdx +;ALL-NEXT: pushq %rcx +;VOID-NEXT: pushq %rax +;INT-NOT: pushq %rax +;INT128-NOT: pushq %rax +;ALL-NEXT: pushq %rbp +;ALL-NEXT: pushq %r15 +;ALL-NEXT: pushq %r14 +;ALL-NEXT: pushq %r13 +;ALL-NEXT: pushq %r12 +;ALL-NEXT: pushq %rbx +;ALL: popq %rbx +;ALL-NEXT: popq %r12 +;ALL-NEXT: popq %r13 +;ALL-NEXT: popq %r14 +;ALL-NEXT: popq %r15 +;ALL-NEXT: popq %rbp +;VOID-NEXT: popq %rax +;INT-NOT: popq %rax +;INT128-NOT: popq %rax +;ALL-NEXT: popq %rcx +;VOID-NEXT: popq %rdx +;INT-NEXT: popq %rdx +;INT128-NOT: popq %rdx +;ALL-NEXT: popq %rsi +;ALL-NEXT: popq %rdi +;ALL-NEXT: popq %r8 +;ALL-NEXT: popq %r9 +;ALL-NEXT: popq %r10 call void asm sideeffect "", "~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{rbp},~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15}"() - ret void + ret RETTYPE RETVAL } -; Make sure R11 and XMMs are saved before the call -declare preserve_mostcc void @foo(i64, i64, double, double) +; Make sure R11, return registers and XMMs are saved before the call +declare preserve_mostcc RETTYPE @foo(i64, i64, double, double) define void @preserve_mostcc2() nounwind { entry: -;SSE-LABEL: preserve_mostcc2 -;SSE: movq %r11, [[REG:%[a-z0-9]+]] -;SSE: movaps %xmm2 -;SSE: movaps %xmm3 -;SSE: movaps %xmm4 -;SSE: movaps %xmm5 -;SSE: movaps %xmm6 -;SSE: movaps %xmm7 -;SSE: movaps %xmm8 -;SSE: movaps %xmm9 -;SSE: movaps %xmm10 -;SSE: movaps %xmm11 -;SSE: movaps %xmm12 -;SSE: movaps %xmm13 -;SSE: movaps %xmm14 -;SSE: movaps %xmm15 -;SSE: movq [[REG]], %r11 +;ALL-LABEL: preserve_mostcc2 +;VOID-NOT: movq %rax, [[REG1:%[a-z0-9]+]] +;INT: movq %rax, [[REG1:%[a-z0-9]+]] +;INT128: movq %rax, [[REG1:%[a-z0-9]+]] +;VOID-NOT: movq %rdx, [[REG2:%[a-z0-9]+]] +;INT-NOT: movq %rdx, [[REG2:%[a-z0-9]+]] +;INT128: movq %rdx, [[REG2:%[a-z0-9]+]] +;ALL: movq %r11, [[REG3:%[a-z0-9]+]] +;ALL: movaps %xmm2 +;ALL: movaps %xmm3 +;ALL: movaps %xmm4 +;ALL: movaps %xmm5 +;ALL: movaps %xmm6 +;ALL: movaps %xmm7 +;ALL: movaps %xmm8 +;ALL: movaps %xmm9 +;ALL: movaps %xmm10 +;ALL: movaps %xmm11 +;ALL: movaps %xmm12 +;ALL: movaps %xmm13 +;ALL: movaps %xmm14 +;ALL: movaps %xmm15 +;ALL: call +;VOID-NOT: movq {{.*}}, %rax +;INT: movq [[REG1]], %rax +;INT128: movq [[REG1]], %rax +;VOID-NOT: movq {{.*}}, %rdx +;INT-NOT: movq {{.*}}, %rdx +;INT128: movq [[REG2]], %rdx +;ALL: movq [[REG3]], %r11 +;ALL: movaps {{.*}} %xmm2 +;ALL: movaps {{.*}} %xmm3 +;ALL: movaps {{.*}} %xmm4 +;ALL: movaps {{.*}} %xmm5 +;ALL: movaps {{.*}} %xmm6 +;ALL: movaps {{.*}} %xmm7 +;ALL: movaps {{.*}} %xmm8 +;ALL: movaps {{.*}} %xmm9 +;ALL: movaps {{.*}} %xmm10 +;ALL: movaps {{.*}} %xmm11 +;ALL: movaps {{.*}} %xmm12 +;ALL: movaps {{.*}} %xmm13 +;ALL: movaps {{.*}} %xmm14 +;ALL: movaps {{.*}} %xmm15 %a0 = call i64 asm sideeffect "", "={rax}"() nounwind %a1 = call i64 asm sideeffect "", "={rcx}"() nounwind %a2 = call i64 asm sideeffect "", "={rdx}"() nounwind @@ -80,7 +115,7 @@ %a21 = call <2 x double> asm sideeffect "", "={xmm13}"() nounwind %a22 = call <2 x double> asm sideeffect "", "={xmm14}"() nounwind %a23 = call <2 x double> asm sideeffect "", "={xmm15}"() nounwind - call preserve_mostcc void @foo(i64 1, i64 2, double 3.0, double 4.0) + call preserve_mostcc RETTYPE @foo(i64 1, i64 2, double 3.0, double 4.0) call void asm sideeffect "", "{rax},{rcx},{rdx},{r8},{r9},{r10},{r11},{xmm2},{xmm3},{xmm4},{xmm5},{xmm6},{xmm7},{xmm8},{xmm9},{xmm10},{xmm11},{xmm12},{xmm13},{xmm14},{xmm15}"(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, <2 x double> %a10, <2 x double> %a11, <2 x double> %a12, <2 x double> %a13, <2 x double> %a14, <2 x double> %a15, <2 x double> %a16, <2 x double> %a17, <2 x double> %a18, <2 x double> %a19, <2 x double> %a20, <2 x double> %a21, <2 x double> %a22, <2 x double> %a23) ret void }