Index: lib/Transforms/Scalar/TailRecursionElimination.cpp =================================================================== --- lib/Transforms/Scalar/TailRecursionElimination.cpp +++ lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -127,6 +127,12 @@ case Instruction::Call: case Instruction::Invoke: { CallSite CS(I); + // If the alloca-derived argument is passed byval it is not an escape + // point, or a use of an alloca. Calling with byval copies the contents + // of the alloca into argument registers or stack slots, which exist + // beyond the lifetime of the current frame. + if (CS.isArgOperand(U) && CS.isByValArgument(CS.getArgumentNo(U))) + continue; bool IsNocapture = CS.isDataOperand(U) && CS.doesNotCapture(CS.getDataOperandNo(U)); callUsesLocalStack(CS, IsNocapture); Index: test/Transforms/Inline/byval-tail-call.ll =================================================================== --- test/Transforms/Inline/byval-tail-call.ll +++ test/Transforms/Inline/byval-tail-call.ll @@ -40,3 +40,36 @@ tail call void @qux(i32* byval %x) ret void } + +; A byval parameter passed into a function which is passed out as byval does +; not block the call from being marked as tail. + +declare void @ext2(i32* byval) + +define void @bar2(i32* byval %x) { + call void @ext2(i32* byval %x) + ret void +} + +define void @foobar(i32* %x) { +; CHECK-LABEL: define void @foobar( +; CHECK: %[[POS:.*]] = alloca i32 +; CHECK: %[[VAL:.*]] = load i32, i32* %x +; CHECK: store i32 %[[VAL]], i32* %[[POS]] +; CHECK: tail call void @ext2(i32* byval nonnull %[[POS]] +; CHECK: ret void + tail call void @bar2(i32* byval %x) + ret void +} + +define void @barfoo() { +; CHECK-LABEL: define void @barfoo( +; CHECK: %[[POS:.*]] = alloca i32 +; CHECK: %[[VAL:.*]] = load i32, i32* %x +; CHECK: store i32 %[[VAL]], i32* %[[POS]] +; CHECK: tail call void @ext2(i32* byval nonnull %[[POS]] +; CHECK: ret void + %x = alloca i32 + tail call void @bar2(i32* byval %x) + ret void +} Index: test/Transforms/TailCallElim/basic.ll =================================================================== --- test/Transforms/TailCallElim/basic.ll +++ test/Transforms/TailCallElim/basic.ll @@ -198,3 +198,44 @@ call void undef(i8* undef) [ "foo"(i8* %e) ] unreachable } + +%struct.foo = type { [10 x i32] } + +; If an alloca is passed byval it is not a use of the alloca or an escape +; point, and both calls below can be marked tail. +define void @test13() { +; CHECK-LABEL: @test13 +; CHECK: tail call void @bar(%struct.foo* byval %f) +; CHECK: tail call void @bar(%struct.foo* null) +entry: + %f = alloca %struct.foo + call void @bar(%struct.foo* byval %f) + call void @bar(%struct.foo* null) + ret void +} + +; A call which passes a byval parameter using byval can be marked tail. +define void @test14(%struct.foo* byval %f) { +; CHECK-LABEL: @test14 +; CHECK: tail call void @bar +entry: + call void @bar(%struct.foo* byval %f) + ret void +} + +; If a byval parameter is copied into an alloca and passed byval the call can +; be marked tail. +define void @test15(%struct.foo* byval %f) { +; CHECK-LABEL: @test15 +; CHECK: tail call void @bar +entry: + %agg.tmp = alloca %struct.foo + %0 = bitcast %struct.foo* %agg.tmp to i8* + %1 = bitcast %struct.foo* %f to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 40, i1 false) + call void @bar(%struct.foo* byval %agg.tmp) + ret void +} + +declare void @bar(%struct.foo* byval) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1)