Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5588,6 +5588,11 @@ // Skip the first return-type Attribute to get to params. Entry.setAttributes(&CS, i - CS.arg_begin() + 1); Args.push_back(Entry); + + // If we have an explicit sret argument that is an Instruction, (i.e., it + // might point to function-local memory), we can't meaningfully tail-call. + if (Entry.isSRet && isa(V)) + isTailCall = false; } // Check if target-independent constraints permit a tail call here. @@ -7225,6 +7230,10 @@ Entry.Alignment = Align; CLI.getArgs().insert(CLI.getArgs().begin(), Entry); CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext()); + + // sret demotion isn't compatible with tail-calls, since the sret argument + // points into the callers stack frame. + CLI.IsTailCall = false; } else { for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { EVT VT = RetTys[I]; Index: test/CodeGen/AArch64/tailcall-sret.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/tailcall-sret.ll @@ -0,0 +1,142 @@ +; RUN: llc < %s -mtriple arm64-apple-darwin -aarch64-load-store-opt=false -asm-verbose=false | FileCheck %s +; Disable the load/store optimizer to avoid having LDP/STPs and simplify checks. + +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + +; Check that we don't try to tail-call with a non-forwarded sret parameter. + +declare i1024 @test_sret() #0 +declare void @test_explicit_sret(i1024* sret) #0 + +; CHECK-LABEL: _test_call_sret: +; CHECK: mov x[[CALLERX8NUM:[0-9]+]], x8 +; CHECK: mov x8, sp +; CHECK-NEXT: bl _test_sret +; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp] +; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]] +; CHECK: ret +define i1024 @test_call_sret() #0 { + %a = call i1024 @test_sret() + ret i1024 %a +} + +; CHECK-LABEL: _test_tailcall_sret: +; CHECK: mov x[[CALLERX8NUM:[0-9]+]], x8 +; CHECK: mov x8, sp +; CHECK-NEXT: bl _test_sret +; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp] +; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]] +; CHECK: ret +define i1024 @test_tailcall_sret() #0 { + %a = tail call i1024 @test_sret() + ret i1024 %a +} + +; CHECK-LABEL: _test_indirect_tailcall_sret: +; CHECK: mov x[[CALLERX8NUM:[0-9]+]], x8 +; CHECK: mov x8, sp +; CHECK-NEXT: blr x0 +; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp] +; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]] +; CHECK: ret +define i1024 @test_indirect_tailcall_sret(i1024 ()* %f) #0 { + %a = tail call i1024 %f() + ret i1024 %a +} + +; This is the only OK case, where we forward the explicit sret pointer. + +; CHECK-LABEL: _test_tailcall_explicit_sret: +; CHECK-NEXT: b _test_explicit_sret +define void @test_tailcall_explicit_sret(i1024* sret %arg) #0 { + tail call void @test_explicit_sret(i1024* %arg) + ret void +} + +; CHECK-LABEL: _test_call_explicit_sret: +; CHECK-NOT: mov x8 +; CHECK: bl _test_explicit_sret +; CHECK: ret +define void @test_call_explicit_sret(i1024* sret %arg) #0 { + call void @test_explicit_sret(i1024* %arg) + ret void +} + +; CHECK-LABEL: _test_tailcall_explicit_sret_alloca_unused: +; CHECK: mov x8, sp +; CHECK-NEXT: bl _test_explicit_sret +; CHECK: ret +define void @test_tailcall_explicit_sret_alloca_unused() #0 { + %l = alloca i1024, align 8 + tail call void @test_explicit_sret(i1024* %l) + ret void +} + +; CHECK-LABEL: _test_tailcall_explicit_sret_alloca_dummyusers: +; CHECK: ldr [[PTRLOAD1:x[0-9]+]], [x0] +; CHECK: str [[PTRLOAD1]], [sp] +; CHECK: mov x8, sp +; CHECK-NEXT: bl _test_explicit_sret +; CHECK: ret +define void @test_tailcall_explicit_sret_alloca_dummyusers(i1024* %ptr) #0 { + %l = alloca i1024, align 8 + %r = load i1024, i1024* %ptr, align 8 + store i1024 %r, i1024* %l, align 8 + tail call void @test_explicit_sret(i1024* %l) + ret void +} + +; CHECK-LABEL: _test_tailcall_explicit_sret_gep: +; CHECK: add x8, x0, #128 +; CHECK-NEXT: bl _test_explicit_sret +; CHECK: ret +define void @test_tailcall_explicit_sret_gep(i1024* %ptr) #0 { + %ptr2 = getelementptr i1024, i1024* %ptr, i32 1 + tail call void @test_explicit_sret(i1024* %ptr2) + ret void +} + +; CHECK-LABEL: _test_tailcall_explicit_sret_alloca_returned: +; CHECK: mov x[[CALLERX8NUM:[0-9]+]], x8 +; CHECK: mov x8, sp +; CHECK-NEXT: bl _test_explicit_sret +; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp] +; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]] +; CHECK: ret +define i1024 @test_tailcall_explicit_sret_alloca_returned() #0 { + %l = alloca i1024, align 8 + tail call void @test_explicit_sret(i1024* %l) + %r = load i1024, i1024* %l, align 8 + ret i1024 %r +} + +; CHECK-LABEL: _test_indirect_tailcall_explicit_sret_nosret_arg: +; CHECK-DAG: mov x[[CALLERX8NUM:[0-9]+]], x8 +; CHECK-DAG: mov [[FPTR:x[0-9]+]], x0 +; CHECK: mov x0, sp +; CHECK-NEXT: blr [[FPTR]] +; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp] +; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]] +; CHECK: ret +define void @test_indirect_tailcall_explicit_sret_nosret_arg(i1024* sret %arg, void (i1024*)* %f) #0 { + %l = alloca i1024, align 8 + tail call void %f(i1024* %l) + %r = load i1024, i1024* %l, align 8 + store i1024 %r, i1024* %arg, align 8 + ret void +} + +; CHECK-LABEL: _test_indirect_tailcall_explicit_sret_: +; CHECK: mov x[[CALLERX8NUM:[0-9]+]], x8 +; CHECK: mov x8, sp +; CHECK-NEXT: blr x0 +; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp] +; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]] +; CHECK: ret +define void @test_indirect_tailcall_explicit_sret_(i1024* sret %arg, i1024 ()* %f) #0 { + %ret = tail call i1024 %f() + store i1024 %ret, i1024* %arg, align 8 + ret void +} + +attributes #0 = { nounwind }