Index: llvm/test/CodeGen/X86/sibcall.ll =================================================================== --- llvm/test/CodeGen/X86/sibcall.ll +++ llvm/test/CodeGen/X86/sibcall.ll @@ -2,8 +2,13 @@ ; RUN: llc -verify-machineinstrs < %s -mtriple=i686-linux -mcpu=core2 -mattr=+sse2 | FileCheck %s --check-prefix=X86 ; RUN: llc -verify-machineinstrs < %s -mtriple=x86_64-linux -mcpu=core2 -mattr=+sse2 | FileCheck %s --check-prefix=X64 ; RUN: llc -verify-machineinstrs < %s -mtriple=x86_64-linux-gnux32 -mcpu=core2 -mattr=+sse2 | FileCheck %s --check-prefix=X32 +; RUN: llc -verify-machineinstrs < %s -mtriple=x86_64-win64 -mcpu=core2 -mattr=+sse2 | FileCheck %s --check-prefix=X64 +;; Can tailcall incomming arg is discarded (and maybe popped by caller) define dso_local void @t1(i32 %x) nounwind ssp { + tail call void @foo() nounwind + ret void +} ; X86-LABEL: t1: ; X86: # %bb.0: ; X86-NEXT: jmp foo # TAILCALL @@ -15,13 +20,14 @@ ; X32-LABEL: t1: ; X32: # %bb.0: ; X32-NEXT: jmp foo # TAILCALL - tail call void @foo() nounwind - ret void -} declare dso_local void @foo() +;; Can tailcall, caller and callee return nothing define dso_local void @t2() nounwind ssp { + %t0 = tail call i32 @foo2() nounwind + ret void +} ; X86-LABEL: t2: ; X86: # %bb.0: ; X86-NEXT: jmp foo2 # TAILCALL @@ -33,13 +39,14 @@ ; X32-LABEL: t2: ; X32: # %bb.0: ; X32-NEXT: jmp foo2 # TAILCALL - %t0 = tail call i32 @foo2() nounwind - ret void -} declare dso_local i32 @foo2() +;; Can tailcall, callee's result is discarded define dso_local void @t3() nounwind ssp { + %t0 = tail call i32 @foo3() nounwind + ret void +} ; X86-LABEL: t3: ; X86: # %bb.0: ; X86-NEXT: jmp foo3 # TAILCALL @@ -51,13 +58,18 @@ ; X32-LABEL: t3: ; X32: # %bb.0: ; X32-NEXT: jmp foo3 # TAILCALL - %t0 = tail call i32 @foo3() nounwind - ret void -} declare dso_local i32 @foo3() +;; Can tailcall unless we pass args on the stack (i686) +;; QOI: that might be a missed optimization, as we could overwrite our +;; incoming arg, or does the ABI specify that that's not +;; callee-clobberable?, of course such rewriting can get hairy due +;; to RAW conflicts define dso_local void @t4(void (i32)* nocapture %x) nounwind ssp { + tail call void %x(i32 0) nounwind + ret void +} ; X86-LABEL: t4: ; X86: # %bb.0: ; X86-NEXT: subl $12, %esp @@ -77,11 +89,12 @@ ; X32-NEXT: movq %rdi, %rax ; X32-NEXT: xorl %edi, %edi ; X32-NEXT: jmpq *%rax # TAILCALL - tail call void %x(i32 0) nounwind - ret void -} +;; Can tailcall through an incoming function pointer define dso_local void @t5(void ()* nocapture %x) nounwind ssp { + tail call void %x() nounwind + ret void +} ; X86-LABEL: t5: ; X86: # %bb.0: ; X86-NEXT: jmpl *{{[0-9]+}}(%esp) # TAILCALL @@ -93,14 +106,13 @@ ; X32-LABEL: t5: ; X32: # %bb.0: ; X32-NEXT: jmpq *%rdi # TAILCALL + +; Can tailcall, basically the same test as t5, except pass the +; function pointer on the stack for x86_64. +define dso_local void @t5_x64(i32, i32, i32, i32, i32, i32, void ()* nocapture %x) nounwind ssp { tail call void %x() nounwind ret void } - -; Basically the same test as t5, except pass the function pointer on the stack -; for x86_64. - -define dso_local void @t5_x64(i32, i32, i32, i32, i32, i32, void ()* nocapture %x) nounwind ssp { ; X86-LABEL: t5_x64: ; X86: # %bb.0: ; X86-NEXT: jmpl *{{[0-9]+}}(%esp) # TAILCALL @@ -113,12 +125,22 @@ ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: jmpq *%rax # TAILCALL - tail call void %x() nounwind - ret void -} - +;; Can tailcall @bar, but @t6 can only be tailcalled if args in regs +;; (so not on i686) define dso_local i32 @t6(i32 %x) nounwind ssp { + %t0 = icmp slt i32 %x, 10 + br i1 %t0, label %bb, label %bb1 + +bb: + %t1 = add nsw i32 %x, -1 + %t2 = tail call i32 @t6(i32 %t1) nounwind ssp + ret i32 %t2 + +bb1: + %t3 = tail call i32 @bar(i32 %x) nounwind + ret i32 %t3 +} ; X86-LABEL: t6: ; X86: # %bb.0: ; X86-NEXT: subl $12, %esp @@ -154,22 +176,14 @@ ; X32-NEXT: jmp t6 # TAILCALL ; X32-NEXT: .LBB6_2: # %bb1 ; X32-NEXT: jmp bar # TAILCALL - %t0 = icmp slt i32 %x, 10 - br i1 %t0, label %bb, label %bb1 - -bb: - %t1 = add nsw i32 %x, -1 - %t2 = tail call i32 @t6(i32 %t1) nounwind ssp - ret i32 %t2 - -bb1: - %t3 = tail call i32 @bar(i32 %x) nounwind - ret i32 %t3 -} declare dso_local i32 @bar(i32) +;; Can tailcall, we're just a forwarding function define dso_local i32 @t7(i32 %a, i32 %b, i32 %c) nounwind ssp { + %t0 = tail call i32 @bar2(i32 %a, i32 %b, i32 %c) nounwind + ret i32 %t0 +} ; X86-LABEL: t7: ; X86: # %bb.0: ; X86-NEXT: jmp bar2 # TAILCALL @@ -181,13 +195,15 @@ ; X32-LABEL: t7: ; X32: # %bb.0: ; X32-NEXT: jmp bar2 # TAILCALL - %t0 = tail call i32 @bar2(i32 %a, i32 %b, i32 %c) nounwind - ret i32 %t0 -} declare dso_local i32 @bar2(i32, i32, i32) +;; Can tailcall, still forwarding define signext i16 @t8() nounwind ssp { +entry: + %0 = tail call signext i16 @bar3() nounwind ; [#uses=1] + ret i16 %0 +} ; X86-LABEL: t8: ; X86: # %bb.0: # %entry ; X86-NEXT: jmp bar3 # TAILCALL @@ -199,14 +215,16 @@ ; X32-LABEL: t8: ; X32: # %bb.0: # %entry ; X32-NEXT: jmp bar3 # TAILCALL -entry: - %0 = tail call signext i16 @bar3() nounwind ; [#uses=1] - ret i16 %0 -} declare dso_local signext i16 @bar3() +;; Can tailcall except when stack used for args (i686) define signext i16 @t9(i32 (i32)* nocapture %x) nounwind ssp { +entry: + %0 = bitcast i32 (i32)* %x to i16 (i32)* + %1 = tail call signext i16 %0(i32 0) nounwind + ret i16 %1 +} ; X86-LABEL: t9: ; X86: # %bb.0: # %entry ; X86-NEXT: subl $12, %esp @@ -226,13 +244,13 @@ ; X32-NEXT: movq %rdi, %rax ; X32-NEXT: xorl %edi, %edi ; X32-NEXT: jmpq *%rax # TAILCALL -entry: - %0 = bitcast i32 (i32)* %x to i16 (i32)* - %1 = tail call signext i16 %0(i32 0) nounwind - ret i16 %1 -} +;; QOI: We can't sibcall this, but we should define dso_local void @t10() nounwind ssp { +entry: + %0 = tail call i32 @foo4() noreturn nounwind + unreachable +} ; X86-LABEL: t10: ; X86: # %bb.0: # %entry ; X86-NEXT: subl $12, %esp @@ -247,17 +265,22 @@ ; X32: # %bb.0: # %entry ; X32-NEXT: pushq %rax ; X32-NEXT: callq foo4 -entry: - %0 = tail call i32 @foo4() noreturn nounwind - unreachable -} declare dso_local i32 @foo4() -; In 32-bit mode, it's emitting a bunch of dead loads that are not being -; eliminated currently. - +;; Can tailcall, we're a conditional forwarding function define dso_local i32 @t11(i32 %x, i32 %y, i32 %z.0, i32 %z.1, i32 %z.2) nounwind ssp { +entry: + %0 = icmp eq i32 %x, 0 + br i1 %0, label %bb6, label %bb + +bb: + %1 = tail call i32 @foo5(i32 %x, i32 %y, i32 %z.0, i32 %z.1, i32 %z.2) nounwind + ret i32 %1 + +bb6: + ret i32 0 +} ; X86-LABEL: t11: ; X86: # %bb.0: # %entry ; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp) @@ -287,23 +310,24 @@ ; X32-NEXT: .LBB11_1: # %bb6 ; X32-NEXT: xorl %eax, %eax ; X32-NEXT: retq + +declare dso_local i32 @foo5(i32, i32, i32, i32, i32) + +%struct.t = type { i32, i32, i32, i32, i32 } + +;; Can tailcall, still a conditional forwarding function +define dso_local i32 @t12(i32 %x, i32 %y, %struct.t* byval(%struct.t) align 4 %z) nounwind ssp { entry: %0 = icmp eq i32 %x, 0 - br i1 %0, label %bb6, label %bb + br i1 %0, label %bb2, label %bb bb: - %1 = tail call i32 @foo5(i32 %x, i32 %y, i32 %z.0, i32 %z.1, i32 %z.2) nounwind + %1 = tail call i32 @foo6(i32 %x, i32 %y, %struct.t* byval(%struct.t) align 4 %z) nounwind ret i32 %1 -bb6: +bb2: ret i32 0 } - -declare dso_local i32 @foo5(i32, i32, i32, i32, i32) - -%struct.t = type { i32, i32, i32, i32, i32 } - -define dso_local i32 @t12(i32 %x, i32 %y, %struct.t* byval(%struct.t) align 4 %z) nounwind ssp { ; X86-LABEL: t12: ; X86: # %bb.0: # %entry ; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp) @@ -333,17 +357,6 @@ ; X32-NEXT: .LBB12_1: # %bb2 ; X32-NEXT: xorl %eax, %eax ; X32-NEXT: retq -entry: - %0 = icmp eq i32 %x, 0 - br i1 %0, label %bb2, label %bb - -bb: - %1 = tail call i32 @foo6(i32 %x, i32 %y, %struct.t* byval(%struct.t) align 4 %z) nounwind - ret i32 %1 - -bb2: - ret i32 0 -} declare dso_local i32 @foo6(i32, i32, %struct.t* byval(%struct.t) align 4) @@ -351,7 +364,12 @@ %struct.ns = type { i32, i32 } %struct.cp = type { float, float, float, float, float } +;; Cannot tailcall, need to copy incoming struct to stack-based outgoing define %struct.ns* @t13(%struct.cp* %yy) nounwind ssp { +entry: + %0 = tail call fastcc %struct.ns* @foo7(%struct.cp* byval(%struct.cp) align 4 %yy, i8 signext 0) nounwind + ret %struct.ns* %0 +} ; X86-LABEL: t13: ; X86: # %bb.0: # %entry ; X86-NEXT: subl $28, %esp @@ -399,13 +417,9 @@ ; X32-NEXT: movl %eax, %eax ; X32-NEXT: popq %rcx ; X32-NEXT: retq -entry: - %0 = tail call fastcc %struct.ns* @foo7(%struct.cp* byval(%struct.cp) align 4 %yy, i8 signext 0) nounwind - ret %struct.ns* %0 -} ; rdar://6195379 -; llvm can't do sibcall for this in 32-bit mode (yet). +; QOI:llvm can't do sibcall for this in 32-bit mode (yet). declare dso_local fastcc %struct.ns* @foo7(%struct.cp* byval(%struct.cp) align 4, i8 signext) nounwind ssp %struct.__block_descriptor = type { i64, i64 } @@ -414,6 +428,17 @@ %struct.__block_literal_2 = type { i8*, i32, i32, i8*, %struct.__block_descriptor_withcopydispose*, void ()* } define dso_local void @t14(%struct.__block_literal_2* nocapture %.block_descriptor) nounwind ssp { +entry: + %0 = getelementptr inbounds %struct.__block_literal_2, %struct.__block_literal_2* %.block_descriptor, i64 0, i32 5 ; [#uses=1] + %1 = load void ()*, void ()** %0, align 8 ; [#uses=2] + %2 = bitcast void ()* %1 to %struct.__block_literal_1* ; <%struct.__block_literal_1*> [#uses=1] + %3 = getelementptr inbounds %struct.__block_literal_1, %struct.__block_literal_1* %2, i64 0, i32 3 ; [#uses=1] + %4 = load i8*, i8** %3, align 8 ; [#uses=1] + %5 = bitcast i8* %4 to void (i8*)* ; [#uses=1] + %6 = bitcast void ()* %1 to i8* ; [#uses=1] + tail call void %5(i8* %6) nounwind + ret void +} ; X86-LABEL: t14: ; X86: # %bb.0: # %entry ; X86-NEXT: subl $12, %esp @@ -434,22 +459,15 @@ ; X32-NEXT: movl 20(%edi), %edi ; X32-NEXT: movl 12(%edi), %eax ; X32-NEXT: jmpq *%rax # TAILCALL -entry: - %0 = getelementptr inbounds %struct.__block_literal_2, %struct.__block_literal_2* %.block_descriptor, i64 0, i32 5 ; [#uses=1] - %1 = load void ()*, void ()** %0, align 8 ; [#uses=2] - %2 = bitcast void ()* %1 to %struct.__block_literal_1* ; <%struct.__block_literal_1*> [#uses=1] - %3 = getelementptr inbounds %struct.__block_literal_1, %struct.__block_literal_1* %2, i64 0, i32 3 ; [#uses=1] - %4 = load i8*, i8** %3, align 8 ; [#uses=1] - %5 = bitcast i8* %4 to void (i8*)* ; [#uses=1] - %6 = bitcast void ()* %1 to i8* ; [#uses=1] - tail call void %5(i8* %6) nounwind - ret void -} ; rdar://7726868 %struct.foo = type { [4 x i32] } +;; QOI: we can't sibcall this due to sret define dso_local void @t15(%struct.foo* noalias sret(%struct.foo) %agg.result) nounwind { + tail call fastcc void @f(%struct.foo* noalias sret(%struct.foo) %agg.result) nounwind + ret void +} ; X86-LABEL: t15: ; X86: # %bb.0: ; X86-NEXT: pushl %esi @@ -479,9 +497,6 @@ ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: popq %rbx ; X32-NEXT: retq - tail call fastcc void @f(%struct.foo* noalias sret(%struct.foo) %agg.result) nounwind - ret void -} declare dso_local void @f(%struct.foo* noalias sret(%struct.foo)) nounwind @@ -699,6 +714,9 @@ } define fastcc void @t21_sret_to_sret_more_args(%struct.foo* noalias sret(%struct.foo) %agg.result, i32 %a, i32 %b) nounwind { + tail call fastcc void @f_sret(%struct.foo* noalias sret(%struct.foo) %agg.result, i32 %a, i32 %b) nounwind + ret void +} ; X86-LABEL: t21_sret_to_sret_more_args: ; X86: # %bb.0: ; X86-NEXT: pushl %esi @@ -729,9 +747,6 @@ ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: popq %rbx ; X32-NEXT: retq - tail call fastcc void @f_sret(%struct.foo* noalias sret(%struct.foo) %agg.result, i32 %a, i32 %b) nounwind - ret void -} define fastcc void @t21_sret_to_sret_second_arg_sret(%struct.foo* noalias %agg.result, %struct.foo* noalias sret(%struct.foo) %ret) nounwind { ; X86-LABEL: t21_sret_to_sret_second_arg_sret: @@ -769,7 +784,11 @@ ret void } +;; Can be tailcalled, we're just a forwarding function define fastcc void @t21_sret_to_sret_more_args2(%struct.foo* noalias sret(%struct.foo) %agg.result, i32 %a, i32 %b) nounwind { + tail call fastcc void @f_sret(%struct.foo* noalias sret(%struct.foo) %agg.result, i32 %b, i32 %a) nounwind + ret void +} ; X86-LABEL: t21_sret_to_sret_more_args2: ; X86: # %bb.0: ; X86-NEXT: pushl %esi @@ -807,12 +826,12 @@ ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: popq %rbx ; X32-NEXT: retq - tail call fastcc void @f_sret(%struct.foo* noalias sret(%struct.foo) %agg.result, i32 %b, i32 %a) nounwind - ret void -} - +;; Cannot tailcall as callee doesn't return our sret define fastcc void @t21_sret_to_sret_args_mismatch(%struct.foo* noalias sret(%struct.foo) %agg.result, %struct.foo* noalias %ret) nounwind { + tail call fastcc void @t21_f_sret(%struct.foo* noalias sret(%struct.foo) %ret) nounwind + ret void +} ; X86-LABEL: t21_sret_to_sret_args_mismatch: ; X86: # %bb.0: ; X86-NEXT: pushl %esi @@ -844,11 +863,12 @@ ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: popq %rbx ; X32-NEXT: retq + +;; Cannot tailcall as callee doesn't return our sret +define fastcc void @t21_sret_to_sret_args_mismatch2(%struct.foo* noalias sret(%struct.foo) %agg.result, %struct.foo* noalias %ret) nounwind { tail call fastcc void @t21_f_sret(%struct.foo* noalias sret(%struct.foo) %ret) nounwind ret void } - -define fastcc void @t21_sret_to_sret_args_mismatch2(%struct.foo* noalias sret(%struct.foo) %agg.result, %struct.foo* noalias %ret) nounwind { ; X86-LABEL: t21_sret_to_sret_args_mismatch2: ; X86: # %bb.0: ; X86-NEXT: pushl %esi @@ -880,11 +900,13 @@ ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: popq %rbx ; X32-NEXT: retq - tail call fastcc void @t21_f_sret(%struct.foo* noalias sret(%struct.foo) %ret) nounwind - ret void -} +;; Cannot tailcall as callee doesn't return our sret define fastcc void @t21_sret_to_sret_arg_mismatch(%struct.foo* noalias sret(%struct.foo) %agg.result) nounwind { + %a = call fastcc %struct.foo* @ret_struct() + tail call fastcc void @t21_f_sret(%struct.foo* noalias sret(%struct.foo) %a) nounwind + ret void +} ; X86-LABEL: t21_sret_to_sret_arg_mismatch: ; X86: # %bb.0: ; X86-NEXT: pushl %esi @@ -919,12 +941,13 @@ ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: popq %rbx ; X32-NEXT: retq - %a = call fastcc %struct.foo* @ret_struct() - tail call fastcc void @t21_f_sret(%struct.foo* noalias sret(%struct.foo) %a) nounwind - ret void -} +;; Cannot tailcall due to sret difference define fastcc void @t21_sret_to_sret_structs_mismatch(%struct.foo* noalias sret(%struct.foo) %agg.result, %struct.foo* noalias %a) nounwind { + %b = call fastcc %struct.foo* @ret_struct() + tail call fastcc void @t21_f_sret2(%struct.foo* noalias sret(%struct.foo) %a, %struct.foo* noalias %b) nounwind + ret void +} ; X86-LABEL: t21_sret_to_sret_structs_mismatch: ; X86: # %bb.0: ; X86-NEXT: pushl %edi @@ -975,15 +998,14 @@ ; X32-NEXT: popq %rbx ; X32-NEXT: popq %r14 ; X32-NEXT: retq - %b = call fastcc %struct.foo* @ret_struct() - tail call fastcc void @t21_f_sret2(%struct.foo* noalias sret(%struct.foo) %a, %struct.foo* noalias %b) nounwind - ret void -} declare ccc %struct.foo* @ret_struct() nounwind - +;; Cannot tailcall as callee doesn't return our sret define fastcc void @t21_sret_to_non_sret(%struct.foo* noalias sret(%struct.foo) %agg.result) nounwind { + tail call fastcc void @t21_f_non_sret(%struct.foo* %agg.result) nounwind + ret void +} ; X86-LABEL: t21_sret_to_non_sret: ; X86: # %bb.0: ; X86-NEXT: pushl %esi @@ -1012,12 +1034,12 @@ ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: popq %rbx ; X32-NEXT: retq - tail call fastcc void @t21_f_non_sret(%struct.foo* %agg.result) nounwind - ret void -} - +;; QOI: should be tailcallable, as we don't care about callee's sret define ccc void @t22_non_sret_to_sret(%struct.foo* %agg.result) nounwind { + tail call ccc void @t22_f_sret(%struct.foo* noalias sret(%struct.foo) %agg.result) nounwind + ret void +} ; X86-LABEL: t22_non_sret_to_sret: ; X86: # %bb.0: ; X86-NEXT: subl $12, %esp @@ -1040,9 +1062,6 @@ ; X32-NEXT: callq t22_f_sret@PLT ; X32-NEXT: popq %rax ; X32-NEXT: retq - tail call ccc void @t22_f_sret(%struct.foo* noalias sret(%struct.foo) %agg.result) nounwind - ret void -} declare dso_local fastcc void @t21_f_sret(%struct.foo* noalias sret(%struct.foo)) nounwind declare dso_local fastcc void @t21_f_sret2(%struct.foo* noalias sret(%struct.foo), %struct.foo* noalias) nounwind