Index: lib/Target/X86/X86FastISel.cpp =================================================================== --- lib/Target/X86/X86FastISel.cpp +++ lib/Target/X86/X86FastISel.cpp @@ -3179,6 +3179,18 @@ bool Is64Bit = Subtarget->is64Bit(); bool IsWin64 = Subtarget->isCallingConvWin64(CC); + const CallInst *CI = + CLI.CS ? dyn_cast(CLI.CS->getInstruction()) : nullptr; + const Function *CalledFn = CI ? CI->getCalledFunction() : nullptr; + const Function *CallerFn = CI ? CI->getParent()->getParent() : nullptr; + bool HasNCSR = + (CI && CI->hasFnAttr("no_caller_saved_registers")) || + (CalledFn && CalledFn->hasFnAttribute("no_caller_saved_registers")) || + (CallerFn && CallerFn->hasFnAttribute("no_caller_saved_registers")); + + if (HasNCSR) + return false; + // Handle only C, fastcc, and webkit_js calling conventions for now. switch (CC) { default: return false; Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -1091,8 +1091,8 @@ CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, - SmallVectorImpl &InVals, - uint32_t *RegMask) const; + SmallVectorImpl &InVals, uint32_t *RegMask, + bool RemoveReturnedRegs) const; SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, const SmallVectorImpl &ArgInfo, const SDLoc &dl, SelectionDAG &DAG, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -2257,7 +2257,8 @@ assert(VA.isRegLoc() && "Can only return in registers!"); // Add the register to the CalleeSaveDisableRegs list. - if (CallConv == CallingConv::X86_RegCall) + if (CallConv == CallingConv::X86_RegCall || + MF.getFunction()->hasFnAttribute("no_caller_saved_registers")) MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg()); SDValue ValToCopy = OutVals[OutsIndex]; @@ -2336,7 +2337,8 @@ "Expecting two registers after Pass64BitArgInRegs"); // Add the second register to the CalleeSaveDisableRegs list. - if (CallConv == CallingConv::X86_RegCall) + if (CallConv == CallingConv::X86_RegCall || + MF.getFunction()->hasFnAttribute("no_caller_saved_registers")) MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg()); } else { RegsToPass.push_back(std::make_pair(VA.getLocReg(), ValToCopy)); @@ -2396,7 +2398,8 @@ DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout()))); // Add the returned register to the CalleeSaveDisableRegs list. - if (CallConv == CallingConv::X86_RegCall) + if (CallConv == CallingConv::X86_RegCall || + MF.getFunction()->hasFnAttribute("no_caller_saved_registers")) MF.getRegInfo().disableCalleeSavedRegister(RetValReg); } @@ -2577,8 +2580,8 @@ SDValue X86TargetLowering::LowerCallResult( SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, - SelectionDAG &DAG, SmallVectorImpl &InVals, - uint32_t *RegMask) const { + SelectionDAG &DAG, SmallVectorImpl &InVals, uint32_t *RegMask, + bool RemoveReturnedRegs) const { const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); // Assign locations to each value returned by this call. @@ -2596,7 +2599,7 @@ // In some calling conventions we need to remove the used registers // from the register mask. - if (RegMask && CallConv == CallingConv::X86_RegCall) { + if (RegMask && RemoveReturnedRegs) { for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true); SubRegs.isValid(); ++SubRegs) RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32)); @@ -3293,7 +3296,8 @@ } } - if (CallConv == CallingConv::X86_RegCall) { + if (CallConv == CallingConv::X86_RegCall || + Fn->hasFnAttribute("no_caller_saved_registers")) { const MachineRegisterInfo &MRI = MF.getRegInfo(); for (const auto &Pair : make_range(MRI.livein_begin(), MRI.livein_end())) MF.getRegInfo().disableCalleeSavedRegister(Pair.first); @@ -3385,6 +3389,11 @@ bool IsSibcall = false; X86MachineFunctionInfo *X86Info = MF.getInfo(); auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls"); + const CallInst *CI = + CLI.CS ? dyn_cast(CLI.CS->getInstruction()) : nullptr; + const Function *Fn = CI ? CI->getCalledFunction() : nullptr; + bool HasNCSR = (CI && CI->hasFnAttr("no_caller_saved_registers")) || + (Fn && Fn->hasFnAttribute("no_caller_saved_registers")); if (CallConv == CallingConv::X86_INTR) report_fatal_error("X86 interrupts may not be called directly"); @@ -3797,7 +3806,8 @@ RegsToPass[i].second.getValueType())); // Add a register mask operand representing the call-preserved registers. - const uint32_t *Mask = RegInfo->getCallPreservedMask(MF, CallConv); + const uint32_t *Mask = RegInfo->getCallPreservedMask( + MF, HasNCSR ? CallingConv::X86_INTR : CallConv); assert(Mask && "Missing call preserved mask for calling convention"); // If this is an invoke in a 32-bit function using a funclet-based @@ -3820,7 +3830,9 @@ // In some calling conventions we need to remove the used physical registers // from the reg mask. - if (CallConv == CallingConv::X86_RegCall) { + bool RemovePassedReturnedRegs = + (CallConv == CallingConv::X86_RegCall || HasNCSR); + if (RemovePassedReturnedRegs) { const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); // Allocate a new Reg Mask and copy Mask. @@ -3894,7 +3906,7 @@ // Handle result values, copying them out of physregs into vregs that we // return. return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, - InVals, RegMask); + InVals, RegMask, RemovePassedReturnedRegs); } //===----------------------------------------------------------------------===// Index: lib/Target/X86/X86RegisterInfo.cpp =================================================================== --- lib/Target/X86/X86RegisterInfo.cpp +++ lib/Target/X86/X86RegisterInfo.cpp @@ -272,7 +272,10 @@ bool HasAVX512 = Subtarget.hasAVX512(); bool CallsEHReturn = MF->callsEHReturn(); - switch (MF->getFunction()->getCallingConv()) { + CallingConv::ID CC = MF->getFunction()->getCallingConv(); + if (MF->getFunction()->hasFnAttribute("no_caller_saved_registers")) + CC = CallingConv::X86_INTR; + switch (CC) { case CallingConv::GHC: case CallingConv::HiPE: return CSR_NoRegs_SaveList; Index: test/CodeGen/X86/x86-32-intrcc.ll =================================================================== --- test/CodeGen/X86/x86-32-intrcc.ll +++ test/CodeGen/X86/x86-32-intrcc.ll @@ -57,23 +57,23 @@ define x86_intrcc void @test_isr_clobbers(%struct.interrupt_frame* %frame, i32 %ecode) { call void asm sideeffect "", "~{eax},~{ebx},~{ebp}"() ; CHECK-LABEL: test_isr_clobbers - ; CHECK-SSE-NEXT: pushl %ebp - ; CHECK-SSE-NEXT: pushl %ebx - ; CHECK-SSE-NEXT; pushl %eax - ; CHECK-SSE-NEXT: popl %eax - ; CHECK-SSE-NEXT: popl %ebx - ; CHECK-SSE-NEXT: popl %ebp - ; CHECK-SSE-NEXT: addl $4, %esp - ; CHECK-SSE-NEXT: iretl + ; CHECK: pushl %ebp + ; CHECK: pushl %ebx + ; CHECK: pushl %eax + ; CHECK: popl %eax + ; CHECK: popl %ebx + ; CHECK: popl %ebp + ; CHECK: addl $4, %esp + ; CHECK: iretl ; CHECK0-LABEL: test_isr_clobbers - ; CHECK0-SSE-NEXT: pushl %ebp - ; CHECK0-SSE-NEXT: pushl %ebx - ; CHECK0-SSE-NEXT; pushl %eax - ; CHECK0-SSE-NEXT: popl %eax - ; CHECK0-SSE-NEXT: popl %ebx - ; CHECK0-SSE-NEXT: popl %ebp - ; CHECK0-SSE-NEXT: addl $4, %esp - ; CHECK0-SSE-NEXT: iretl + ; CHECK0: pushl %ebp + ; CHECK0: pushl %ebx + ; CHECK0: pushl %eax + ; CHECK0: popl %eax + ; CHECK0: popl %ebx + ; CHECK0: popl %ebp + ; CHECK0: addl $4, %esp + ; CHECK0: iretl ret void } Index: test/CodeGen/X86/x86-64-intrcc.ll =================================================================== --- test/CodeGen/X86/x86-64-intrcc.ll +++ test/CodeGen/X86/x86-64-intrcc.ll @@ -59,32 +59,33 @@ define x86_intrcc void @test_isr_clobbers(%struct.interrupt_frame* %frame, i64 %ecode) { call void asm sideeffect "", "~{rax},~{rbx},~{rbp},~{r11},~{xmm0}"() ; CHECK-LABEL: test_isr_clobbers - ; CHECK-SSE-NEXT: pushq %rax - ; CHECK-SSE-NEXT: pushq %rax - ; CHECK-SSE-NEXT; pushq %r11 - ; CHECK-SSE-NEXT: pushq %rbp - ; CHECK-SSE-NEXT: pushq %rbx - ; CHECK-SSE-NEXT: movaps %xmm0 - ; CHECK-SSE-NEXT: movaps %xmm0 - ; CHECK-SSE-NEXT: popq %rbx - ; CHECK-SSE-NEXT: popq %rbp - ; CHECK-SSE-NEXT: popq %r11 - ; CHECK-SSE-NEXT: popq %rax - ; CHECK-SSE-NEXT: addq $8, %rsp - ; CHECK-SSE-NEXT: iretq + + ; CHECK: pushq %rax + ; CHECK: pushq %rbp + ; CHECK: pushq %r11 + ; CHECK: pushq %rbx + ; CHECK: movaps %xmm0 + ; CHECK: movaps {{.*}}, %xmm0 + ; CHECK: popq %rbx + ; CHECK: popq %r11 + ; CHECK: popq %rbp + ; CHECK: popq %rax + ; CHECK: addq $16, %rsp + ; CHECK: iretq ; CHECK0-LABEL: test_isr_clobbers - ; CHECK0-SSE-NEXT: pushq %rax - ; CHECK0-SSE-NEXT; pushq %r11 - ; CHECK0-SSE-NEXT: pushq %rbp - ; CHECK0-SSE-NEXT: pushq %rbx - ; CHECK0-SSE-NEXT: movaps %xmm0 - ; CHECK0-SSE-NEXT: movaps %xmm0 - ; CHECK0-SSE-NEXT: popq %rbx - ; CHECK0-SSE-NEXT: popq %rbp - ; CHECK0-SSE-NEXT: popq %r11 - ; CHECK0-SSE-NEXT: popq %rax - ; CHECK0-SSE-NEXT: addq $16, %rsp - ; CHECK0-SSE-NEXT: iretq + + ; CHECK0: pushq %rax + ; CHECK0: pushq %rbp + ; CHECK0: pushq %r11 + ; CHECK0: pushq %rbx + ; CHECK0: movaps %xmm0 + ; CHECK0: movaps {{.*}}, %xmm0 + ; CHECK0: popq %rbx + ; CHECK0: popq %r11 + ; CHECK0: popq %rbp + ; CHECK0: popq %rax + ; CHECK0: addq $16, %rsp + ; CHECK0: iretq ret void } Index: test/CodeGen/X86/x86-no_caller_saved_registers.ll =================================================================== --- test/CodeGen/X86/x86-no_caller_saved_registers.ll +++ test/CodeGen/X86/x86-no_caller_saved_registers.ll @@ -0,0 +1,56 @@ +; RUN: llc -mtriple=x86_64-unknown-unknown < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-unknown-unknown -O0 < %s | FileCheck %s +; RUN: llc -mtriple=i686-unknown-unknown -mattr=+sse2 < %s | FileCheck %s +; RUN: llc -mtriple=i686-unknown-unknown -O0 -mattr=+sse2 < %s | FileCheck %s + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Checks that caller function "foo" does not store registers for callee "bar". +;; Where: +;; bar0 is marked as "no_caller_saved_registers" on call-site only. +;; bar1 is marked as "no_caller_saved_registers" on function declaration only. +;; For example, there is no store/load/access to xmm registers. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Command line: -S -o - -emit-llvm -O2 +;; Source: +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;extern void bar (int, int, int, int, int, int, int, int, int) +;; __attribute__ ((no_caller_saved_registers)); +;; +;;void +;;foo (void* c) __attribute__((interrupt)) +;;{ +;; bar (0, 1, 2, 3, 4, 5, 6, 7, 8); +;;} +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +@llvm.used = appending global [2 x i8*] [i8* bitcast (void (i8*)* @foo1 to i8*), i8* bitcast (void (i8*)* @foo1 to i8*)], section "llvm.metadata" + +; CHECK-NOT: xmm + +define x86_intrcc void @foo0(i8* nocapture readnone %c) #0 { +entry: + tail call void @bar0(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8) #1 + ret void +} + +define x86_intrcc void @foo1(i8* nocapture readnone %c) #0 { +entry: + tail call i32 @bar1(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8) + ret void +} + +declare void @bar0(i32, i32, i32, i32, i32, i32, i32, i32, i32) +define i32 @bar1(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8) local_unnamed_addr #1 { +; CHECK-LABEL: bar1 +; CHECK: mov{{.*}} %xmm0 +; CHECK: mov{{.*}} {{.*}}, %xmm0 +; CHECK: ret +entry: + %div = sdiv i32 %a0, %a1 + %conv = sitofp i32 %div to float + %conv1 = fptosi float %conv to i32 + ret i32 %conv1 +} + +attributes #0 = { "disable-tail-calls"="true" } +attributes #1 = { "no_caller_saved_registers" }