This is an archive of the discontinued LLVM Phabricator instance.

[CodeGen] Don't attempt a tail-call with a non-forwarded sret.
ClosedPublic

Authored by ab on Mar 20 2015, 7:08 PM.

Download Raw Diff

Details

Reviewers

Summary

We used to do that, and it makes no sense. I haven't looked at other targets, but: ARM and X86 already disable tail-calls whenever there's sret (caller or callee, explicit or implicit), but AArch64 bravely tries to continue. In some cases, it generated broken code such as:

_test_tailcall_sret:
        sub     sp, sp, #128
        mov      x8, sp
        add     sp, sp, #128
        b       _test_sret

for:

declare i1024 @test_sret() #0
define i1024 @test_tailcall_sret() #0 {
  %a = tail call i1024 @test_sret()
  ret i1024 %a
}

There are two parts to this (I'll commit separately, but it makes sense to review together):

implicit sret: this will be part of the stack frame, so there's no way we can tail-call
explicit sret: a good enough approximation is: if the sret pointer is an Instruction, it might be function-local (alloca, usually).

In practice, both of these don't happen with well-behaved frontends such as clang, which will have an explicit sret, and forward it across tail calls.

Also, I say approximation because there's one case we pessimize (the GEP testcase), but that's a really weird situation.. Having a GetUnderlyingObject around the sret pointer origin check does the trick though, so I can add it if desired.

-Ahmed

Diff Detail

Event Timeline

ab updated this revision to Diff 22404.Mar 20 2015, 7:08 PM

ab retitled this revision from to [CodeGen] Don't attempt a tail-call with a non-forwarded sret..

ab updated this object.

ab edited the test plan for this revision. (Show Details)

ab added subscribers: Unknown Object (MLST), atrick, t.p.northover.

Herald added a subscriber: aemerson. · View Herald TranscriptMar 20 2015, 7:08 PM

Ping!

-Ahmed

LGTM. I can't immediately think of a better hack than checking isa<Instruction>, and I can't think of a way to acquire a local stack address without an instruction.

This revision is now accepted and ready to land.Mar 27 2015, 12:29 PM

This went in already, r233409/r233410.

Revision Contents

Path

Size

		lib/	CodeGen/	SelectionDAG/
	c/	lib/	CodeGen/	SelectionDAG/

SelectionDAGBuilder.cpp

9 lines

test/

CodeGen/

AArch64/

tailcall-sret.ll

142 lines

Diff 22404

lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 5,582 Lines • ▼ Show 20 Lines	if (V->getType()->isEmptyTy())
continue;		continue;

SDValue ArgNode = getValue(V);		SDValue ArgNode = getValue(V);
Entry.Node = ArgNode; Entry.Ty = V->getType();		Entry.Node = ArgNode; Entry.Ty = V->getType();

// Skip the first return-type Attribute to get to params.		// Skip the first return-type Attribute to get to params.
Entry.setAttributes(&CS, i - CS.arg_begin() + 1);		Entry.setAttributes(&CS, i - CS.arg_begin() + 1);
Args.push_back(Entry);		Args.push_back(Entry);

		// If we have an explicit sret argument that is an Instruction, (i.e., it
		// might point to function-local memory), we can't meaningfully tail-call.
		if (Entry.isSRet && isa<Instruction>(V))
		isTailCall = false;
}		}

// Check if target-independent constraints permit a tail call here.		// Check if target-independent constraints permit a tail call here.
// Target-dependent constraints are checked within TLI->LowerCallTo.		// Target-dependent constraints are checked within TLI->LowerCallTo.
if (isTailCall && !isInTailCallPosition(CS, DAG.getTarget()))		if (isTailCall && !isInTailCallPosition(CS, DAG.getTarget()))
isTailCall = false;		isTailCall = false;

TargetLowering::CallLoweringInfo CLI(DAG);		TargetLowering::CallLoweringInfo CLI(DAG);
▲ Show 20 Lines • Show All 1,621 Lines • ▼ Show 20 Lines	if (!CanLowerReturn) {
Entry.isInReg = false;		Entry.isInReg = false;
Entry.isSRet = true;		Entry.isSRet = true;
Entry.isNest = false;		Entry.isNest = false;
Entry.isByVal = false;		Entry.isByVal = false;
Entry.isReturned = false;		Entry.isReturned = false;
Entry.Alignment = Align;		Entry.Alignment = Align;
CLI.getArgs().insert(CLI.getArgs().begin(), Entry);		CLI.getArgs().insert(CLI.getArgs().begin(), Entry);
CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext());		CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext());

		// sret demotion isn't compatible with tail-calls, since the sret argument
		// points into the callers stack frame.
		CLI.IsTailCall = false;
} else {		} else {
for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {		for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
EVT VT = RetTys[I];		EVT VT = RetTys[I];
MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);		MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);		unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
for (unsigned i = 0; i != NumRegs; ++i) {		for (unsigned i = 0; i != NumRegs; ++i) {
ISD::InputArg MyFlags;		ISD::InputArg MyFlags;
MyFlags.VT = RegisterVT;		MyFlags.VT = RegisterVT;
▲ Show 20 Lines • Show All 592 Lines • Show Last 20 Lines

test/CodeGen/AArch64/tailcall-sret.ll

This file was added.

				; RUN: llc < %s -mtriple arm64-apple-darwin -aarch64-load-store-opt=false -asm-verbose=false \| FileCheck %s
				; Disable the load/store optimizer to avoid having LDP/STPs and simplify checks.

				target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"

				; Check that we don't try to tail-call with a non-forwarded sret parameter.

				declare i1024 @test_sret() #0
				declare void @test_explicit_sret(i1024* sret) #0

				; CHECK-LABEL: _test_call_sret:
				; CHECK: mov x[[CALLERX8NUM:[0-9]+]], x8
				; CHECK: mov x8, sp
				; CHECK-NEXT: bl _test_sret
				; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp]
				; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]]
				; CHECK: ret
				define i1024 @test_call_sret() #0 {
				%a = call i1024 @test_sret()
				ret i1024 %a
				}

				; CHECK-LABEL: _test_tailcall_sret:
				; CHECK: mov x[[CALLERX8NUM:[0-9]+]], x8
				; CHECK: mov x8, sp
				; CHECK-NEXT: bl _test_sret
				; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp]
				; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]]
				; CHECK: ret
				define i1024 @test_tailcall_sret() #0 {
				%a = tail call i1024 @test_sret()
				ret i1024 %a
				}

				; CHECK-LABEL: _test_indirect_tailcall_sret:
				; CHECK: mov x[[CALLERX8NUM:[0-9]+]], x8
				; CHECK: mov x8, sp
				; CHECK-NEXT: blr x0
				; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp]
				; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]]
				; CHECK: ret
				define i1024 @test_indirect_tailcall_sret(i1024 ()* %f) #0 {
				%a = tail call i1024 %f()
				ret i1024 %a
				}

				; This is the only OK case, where we forward the explicit sret pointer.

				; CHECK-LABEL: _test_tailcall_explicit_sret:
				; CHECK-NEXT: b _test_explicit_sret
				define void @test_tailcall_explicit_sret(i1024* sret %arg) #0 {
				tail call void @test_explicit_sret(i1024* %arg)
				ret void
				}

				; CHECK-LABEL: _test_call_explicit_sret:
				; CHECK-NOT: mov x8
				; CHECK: bl _test_explicit_sret
				; CHECK: ret
				define void @test_call_explicit_sret(i1024* sret %arg) #0 {
				call void @test_explicit_sret(i1024* %arg)
				ret void
				}

				; CHECK-LABEL: _test_tailcall_explicit_sret_alloca_unused:
				; CHECK: mov x8, sp
				; CHECK-NEXT: bl _test_explicit_sret
				; CHECK: ret
				define void @test_tailcall_explicit_sret_alloca_unused() #0 {
				%l = alloca i1024, align 8
				tail call void @test_explicit_sret(i1024* %l)
				ret void
				}

				; CHECK-LABEL: _test_tailcall_explicit_sret_alloca_dummyusers:
				; CHECK: ldr [[PTRLOAD1:x[0-9]+]], [x0]
				; CHECK: str [[PTRLOAD1]], [sp]
				; CHECK: mov x8, sp
				; CHECK-NEXT: bl _test_explicit_sret
				; CHECK: ret
				define void @test_tailcall_explicit_sret_alloca_dummyusers(i1024* %ptr) #0 {
				%l = alloca i1024, align 8
				%r = load i1024, i1024* %ptr, align 8
				store i1024 %r, i1024* %l, align 8
				tail call void @test_explicit_sret(i1024* %l)
				ret void
				}

				; CHECK-LABEL: _test_tailcall_explicit_sret_gep:
				; CHECK: add x8, x0, #128
				; CHECK-NEXT: bl _test_explicit_sret
				; CHECK: ret
				define void @test_tailcall_explicit_sret_gep(i1024* %ptr) #0 {
				%ptr2 = getelementptr i1024, i1024* %ptr, i32 1
				tail call void @test_explicit_sret(i1024* %ptr2)
				ret void
				}

				; CHECK-LABEL: _test_tailcall_explicit_sret_alloca_returned:
				; CHECK: mov x[[CALLERX8NUM:[0-9]+]], x8
				; CHECK: mov x8, sp
				; CHECK-NEXT: bl _test_explicit_sret
				; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp]
				; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]]
				; CHECK: ret
				define i1024 @test_tailcall_explicit_sret_alloca_returned() #0 {
				%l = alloca i1024, align 8
				tail call void @test_explicit_sret(i1024* %l)
				%r = load i1024, i1024* %l, align 8
				ret i1024 %r
				}

				; CHECK-LABEL: _test_indirect_tailcall_explicit_sret_nosret_arg:
				; CHECK-DAG: mov x[[CALLERX8NUM:[0-9]+]], x8
				; CHECK-DAG: mov [[FPTR:x[0-9]+]], x0
				; CHECK: mov x0, sp
				; CHECK-NEXT: blr [[FPTR]]
				; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp]
				; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]]
				; CHECK: ret
				define void @test_indirect_tailcall_explicit_sret_nosret_arg(i1024* sret %arg, void (i1024) %f) #0 {
				%l = alloca i1024, align 8
				tail call void %f(i1024* %l)
				%r = load i1024, i1024* %l, align 8
				store i1024 %r, i1024* %arg, align 8
				ret void
				}

				; CHECK-LABEL: _test_indirect_tailcall_explicit_sret_:
				; CHECK: mov x[[CALLERX8NUM:[0-9]+]], x8
				; CHECK: mov x8, sp
				; CHECK-NEXT: blr x0
				; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp]
				; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]]
				; CHECK: ret
				define void @test_indirect_tailcall_explicit_sret_(i1024* sret %arg, i1024 ()* %f) #0 {
				%ret = tail call i1024 %f()
				store i1024 %ret, i1024* %arg, align 8
				ret void
				}

				attributes #0 = { nounwind }