Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -2980,6 +2980,13 @@ bool IsSibcall = false; X86MachineFunctionInfo *X86Info = MF.getInfo(); auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls"); + CallingConv::ID CallerCallConv = MF.getFunction()->getCallingConv(); + const CallInst *CI = + CLI.CS ? dyn_cast(CLI.CS->getInstruction()) : nullptr; + const Function *Fn = CI ? CI->getCalledFunction() : nullptr; + bool HasNCSR = + (CI && CI->hasFnAttr("no_caller_saved_registers")) || + (Fn && Fn->hasFnAttribute("no_caller_saved_registers")); if (CallConv == CallingConv::X86_INTR) report_fatal_error("X86 interrupts may not be called directly"); @@ -3361,7 +3368,10 @@ RegsToPass[i].second.getValueType())); // Add a register mask operand representing the call-preserved registers. - const uint32_t *Mask = RegInfo->getCallPreservedMask(MF, CallConv); + const uint32_t *Mask = RegInfo->getCallPreservedMask( + // FIXME: NoCallerSavedRegisters has same preserved registers as + // X86_INTR calling convention. Is there a better way to do this? + MF, HasNCSR ? CallingConv::X86_INTR: CallConv); assert(Mask && "Missing call preserved mask for calling convention"); // If this is an invoke in a 32-bit function using a funclet-based Index: test/CodeGen/X86/x86-32-intrcc.ll =================================================================== --- test/CodeGen/X86/x86-32-intrcc.ll +++ test/CodeGen/X86/x86-32-intrcc.ll @@ -57,23 +57,23 @@ define x86_intrcc void @test_isr_clobbers(%struct.interrupt_frame* %frame, i32 %ecode) { call void asm sideeffect "", "~{eax},~{ebx},~{ebp}"() ; CHECK-LABEL: test_isr_clobbers - ; CHECK-SSE-NEXT: pushl %ebp - ; CHECK-SSE-NEXT: pushl %ebx - ; CHECK-SSE-NEXT; pushl %eax - ; CHECK-SSE-NEXT: popl %eax - ; CHECK-SSE-NEXT: popl %ebx - ; CHECK-SSE-NEXT: popl %ebp - ; CHECK-SSE-NEXT: addl $4, %esp - ; CHECK-SSE-NEXT: iretl + ; CHECK: pushl %ebp + ; CHECK: pushl %ebx + ; CHECK: pushl %eax + ; CHECK: popl %eax + ; CHECK: popl %ebx + ; CHECK: popl %ebp + ; CHECK: addl $4, %esp + ; CHECK: iretl ; CHECK0-LABEL: test_isr_clobbers - ; CHECK0-SSE-NEXT: pushl %ebp - ; CHECK0-SSE-NEXT: pushl %ebx - ; CHECK0-SSE-NEXT; pushl %eax - ; CHECK0-SSE-NEXT: popl %eax - ; CHECK0-SSE-NEXT: popl %ebx - ; CHECK0-SSE-NEXT: popl %ebp - ; CHECK0-SSE-NEXT: addl $4, %esp - ; CHECK0-SSE-NEXT: iretl + ; CHECK0: pushl %ebp + ; CHECK0: pushl %ebx + ; CHECK0: pushl %eax + ; CHECK0: popl %eax + ; CHECK0: popl %ebx + ; CHECK0: popl %ebp + ; CHECK0: addl $4, %esp + ; CHECK0: iretl ret void } Index: test/CodeGen/X86/x86-64-intrcc.ll =================================================================== --- test/CodeGen/X86/x86-64-intrcc.ll +++ test/CodeGen/X86/x86-64-intrcc.ll @@ -57,31 +57,31 @@ define x86_intrcc void @test_isr_clobbers(%struct.interrupt_frame* %frame, i64 %ecode) { call void asm sideeffect "", "~{rax},~{rbx},~{rbp},~{r11},~{xmm0}"() ; CHECK-LABEL: test_isr_clobbers - ; CHECK-SSE-NEXT: pushq %rax - ; CHECK-SSE-NEXT; pushq %r11 - ; CHECK-SSE-NEXT: pushq %rbp - ; CHECK-SSE-NEXT: pushq %rbx - ; CHECK-SSE-NEXT: movaps %xmm0 - ; CHECK-SSE-NEXT: movaps %xmm0 - ; CHECK-SSE-NEXT: popq %rbx - ; CHECK-SSE-NEXT: popq %rbp - ; CHECK-SSE-NEXT: popq %r11 - ; CHECK-SSE-NEXT: popq %rax - ; CHECK-SSE-NEXT: addq $8, %rsp - ; CHECK-SSE-NEXT: iretq + ; CHECK: pushq %rax + ; CHECK: pushq %rbp + ; CHECK: pushq %r11 + ; CHECK: pushq %rbx + ; CHECK: movaps %xmm0 + ; CHECK: movaps {{.*}}, %xmm0 + ; CHECK: popq %rbx + ; CHECK: popq %r11 + ; CHECK: popq %rbp + ; CHECK: popq %rax + ; CHECK: addq $8, %rsp + ; CHECK: iretq ; CHECK0-LABEL: test_isr_clobbers - ; CHECK0-SSE-NEXT: pushq %rax - ; CHECK0-SSE-NEXT; pushq %r11 - ; CHECK0-SSE-NEXT: pushq %rbp - ; CHECK0-SSE-NEXT: pushq %rbx - ; CHECK0-SSE-NEXT: movaps %xmm0 - ; CHECK0-SSE-NEXT: movaps %xmm0 - ; CHECK0-SSE-NEXT: popq %rbx - ; CHECK0-SSE-NEXT: popq %rbp - ; CHECK0-SSE-NEXT: popq %r11 - ; CHECK0-SSE-NEXT: popq %rax - ; CHECK0-SSE-NEXT: addq $8, %rsp - ; CHECK0-SSE-NEXT: iretq + ; CHECK0: pushq %rax + ; CHECK0: pushq %rbp + ; CHECK0: pushq %r11 + ; CHECK0: pushq %rbx + ; CHECK0: movaps %xmm0 + ; CHECK0: movaps {{.*}}, %xmm0 + ; CHECK0: popq %rbx + ; CHECK0: popq %r11 + ; CHECK0: popq %rbp + ; CHECK0: popq %rax + ; CHECK0: addq $8, %rsp + ; CHECK0: iretq ret void } Index: test/CodeGen/X86/x86-no_caller_saved_registers.ll =================================================================== --- test/CodeGen/X86/x86-no_caller_saved_registers.ll +++ test/CodeGen/X86/x86-no_caller_saved_registers.ll @@ -0,0 +1,50 @@ +; RUN: llc -mtriple=x86_64-unknown-unknown < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-unknown-unknown -O0 < %s | FileCheck %s +; RUN: llc -mtriple=i686-unknown-unknown < %s | FileCheck %s +; RUN: llc -mtriple=i686-unknown-unknown -O0 < %s | FileCheck %s + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Checks that caller function "foo" does not store registers for callee "bar". +;; Where: +;; bar0 is marked as "no_caller_saved_registers" on call-site only. +;; bar1 is marked as "no_caller_saved_registers" on function declaration only. +;; For example, there is no store/load/access to xmm registers. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Command line: -S -o - -emit-llvm -O2 +;; Source: +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;extern void bar (int, int, int, int, int, int, int, int, int) +;; __attribute__ ((no_caller_saved_registers)); +;; +;;void +;;foo (void* c) __attribute__((interrupt)) +;;{ +;; bar (0, 1, 2, 3, 4, 5, 6, 7, 8); +;;} +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; CHECK-NOT: xmm + +@llvm.used = appending global [2 x i8*] [i8* bitcast (void (i8*)* @foo1 to i8*), i8* bitcast (void (i8*)* @foo1 to i8*)], section "llvm.metadata" + +define x86_intrcc void @foo0(i8* nocapture readnone %c) #0 { +entry: + tail call void @bar0(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8) #1 + ret void +} + +define x86_intrcc void @foo1(i8* nocapture readnone %c) #0 { +entry: + tail call void @bar1(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8) + ret void +} + +declare void @bar0(i32, i32, i32, i32, i32, i32, i32, i32, i32) +declare void @bar1(i32, i32, i32, i32, i32, i32, i32, i32, i32) #1 + +attributes #0 = { "disable-tail-calls"="true" } +attributes #1 = { "no_caller_saved_registers" } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 3.9.0 (trunk 274564)"}