diff --git a/llvm/lib/Transforms/Coroutines/CoroElide.cpp b/llvm/lib/Transforms/Coroutines/CoroElide.cpp --- a/llvm/lib/Transforms/Coroutines/CoroElide.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroElide.cpp @@ -83,14 +83,8 @@ Function &F = *Frame->getFunction(); for (Instruction &I : instructions(F)) if (auto *Call = dyn_cast(&I)) - if (Call->isTailCall() && operandReferences(Call, Frame, AA)) { - // FIXME: If we ever hit this check. Evaluate whether it is more - // appropriate to retain musttail and allow the code to compile. - if (Call->isMustTailCall()) - report_fatal_error("Call referring to the coroutine frame cannot be " - "marked as musttail"); + if (Call->isTailCall() && operandReferences(Call, Frame, AA)) Call->setTailCall(false); - } } // Given a resume function @f.resume(%f.frame* %frame), returns the size @@ -252,7 +246,21 @@ // If size of the set is the same as total number of coro.begin, that means we // found a coro.free or coro.destroy referencing each coro.begin, so we can // perform heap elision. - return ReferencedCoroBegins.size() == CoroBegins.size(); + if (ReferencedCoroBegins.size() != CoroBegins.size()) + return false; + + // If any call in the function is a musttail call, it usually won't work + // because we cannot drop the tailcall attribute, and a tail call will reuse + // the entire stack where we are going to put the new frame. In theory a more + // precise analysis can be done to check whether the new frame aliases with + // the call, however it's challenging to do so before the elision actually + // happened. + for (Instruction &I : instructions(F)) + if (auto *Call = dyn_cast(&I)) + if (Call->isMustTailCall()) + return false; + + return true; } void Lowerer::collectPostSplitCoroIds(Function *F) { diff --git a/llvm/test/Transforms/Coroutines/coro-elide-musttail.ll b/llvm/test/Transforms/Coroutines/coro-elide-musttail.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-elide-musttail.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; Only run with new pass manager since old pass manager's alias analysis isn't +; powerful enough to tell that the tailcall's arguments don't alias the frame. +; +; RUN: opt < %s -passes='coro-elide' -S | FileCheck %s + +%"bar.Frame" = type { void (%"bar.Frame"*)*, void (%"bar.Frame"*)*, %"struct.coroutine::promise_type", i1 } +%"struct.coroutine::promise_type" = type { i32 } +%"foo.Frame" = type { void (%"foo.Frame"*)*, void (%"foo.Frame"*)*, %"struct.coroutine::promise_type", i1, %"struct.coroutine::promise_type::final_awaitable" } +%"struct.coroutine::promise_type" = type { i32 } +%"struct.coroutine::promise_type::final_awaitable" = type { i8 } +@"bar.resumers" = private constant [3 x void (%"bar.Frame"*)*] [void (%"bar.Frame"*)* @"bar.resume", void (%"bar.Frame"*)* undef, void (%"bar.Frame"*)* undef] + +declare dso_local void @"bar"() align 2 +declare dso_local fastcc void @"bar.resume"(%"bar.Frame"*) align 2 + +define internal fastcc void @foo.resume_musttail(%"foo.Frame"* %FramePtr) { +; CHECK-LABEL: @foo.resume_musttail( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call token @llvm.coro.id +; CHECK-NEXT: [[TMP1:%.*]] = tail call i1 @llvm.coro.alloc(token [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call i8* @llvm.coro.begin(token [[TMP0]], i8* null) +; CHECK-NEXT: [[CALL34:%.*]] = call i8* undef() +; CHECK-NEXT: musttail call fastcc void undef(i8* [[CALL34]]) +; CHECK-NEXT: ret void +; +entry: + %0 = tail call token @llvm.coro.id(i32 16, i8* null, i8* bitcast (void ()* @"bar" to i8*), i8* bitcast ([3 x void (%"bar.Frame"*)*]* @"bar.resumers" to i8*)) + %1 = tail call i1 @llvm.coro.alloc(token %0) + %2 = tail call i8* @llvm.coro.begin(token %0, i8* null) + call i8* @llvm.coro.subfn.addr(i8* %2, i8 1) + %call34 = call i8* undef() + musttail call fastcc void undef(i8* %call34) + ret void +} + +define internal fastcc void @foo.resume_no_musttail(%"foo.Frame"* %FramePtr) { +; CHECK-LABEL: @foo.resume_no_musttail( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = alloca [24 x i8], align 8 +; CHECK-NEXT: [[VFRAME:%.*]] = bitcast [24 x i8]* [[TMP0]] to i8* +; CHECK-NEXT: [[TMP1:%.*]] = call token @llvm.coro.id +; CHECK-NEXT: [[CALL34:%.*]] = call i8* undef() +; CHECK-NEXT: call fastcc void undef(i8* [[CALL34]]) +; CHECK-NEXT: ret void +; +entry: + %0 = tail call token @llvm.coro.id(i32 16, i8* null, i8* bitcast (void ()* @"bar" to i8*), i8* bitcast ([3 x void (%"bar.Frame"*)*]* @"bar.resumers" to i8*)) + %1 = tail call i1 @llvm.coro.alloc(token %0) + %2 = tail call i8* @llvm.coro.begin(token %0, i8* null) + call i8* @llvm.coro.subfn.addr(i8* %2, i8 1) + %call34 = call i8* undef() + tail call fastcc void undef(i8* %call34) + ret void +} + +; Function Attrs: argmemonly nofree nosync nounwind willreturn +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #0 + +; Function Attrs: argmemonly nounwind readonly +declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) #1 + +; Function Attrs: nounwind +declare i1 @llvm.coro.alloc(token) #2 + +; Function Attrs: nounwind +declare i8* @llvm.coro.begin(token, i8* writeonly) #2 + +; Function Attrs: argmemonly nounwind readonly +declare i8* @llvm.coro.subfn.addr(i8* nocapture readonly, i8) #1 + +attributes #0 = { argmemonly nofree nosync nounwind willreturn } +attributes #1 = { argmemonly nounwind readonly } +attributes #2 = { nounwind }