diff --git a/clang/include/clang/AST/ExprCXX.h b/clang/include/clang/AST/ExprCXX.h --- a/clang/include/clang/AST/ExprCXX.h +++ b/clang/include/clang/AST/ExprCXX.h @@ -4678,27 +4678,46 @@ /// If the coroutine is not suspended, or when it is resumed, the 'resume' /// expression is evaluated, and its result is the result of the overall /// expression. +/// When there is a symmetric transfer, i.e. await_suspend call returns a +/// coroutine handler, AwaitSuspendCall will be the AST that represents +/// the call to await_suspend; OVESuspend will be a OpaqueValueExpr wrapping +/// around AwaitSuspendCall, and Suspend will be the transfer call built +/// on top of OVESuspend. That is, we used a OpaqueValueExpr to divide Suspend +/// at the end of await_suspend call, so that we can emit instructions right +/// after the await_suspend call. Specifically, we emit coro.forcestack.begin +/// intrinsic to indicate that from that point on, any data accessed must not +/// be put on the coroutine frame, but on the stack. This is a critical hint to +/// the CoroSplit pass that any alloca used here untill coro.forcestack.end +/// shall remain on the stack. This is necessary because the await_suspend call +/// could potentially destroy the current frame, and there is no stable way +/// to guanarantee that the compiler can always put the temporaries used +/// afterwards on the stack. class CoroutineSuspendExpr : public Expr { friend class ASTStmtReader; SourceLocation KeywordLoc; - enum SubExpr { Common, Ready, Suspend, Resume, Count }; + enum SubExpr { Common, Ready, AwaitSuspendCall, Suspend, Resume, Count }; Stmt *SubExprs[SubExpr::Count]; - OpaqueValueExpr *OpaqueValue = nullptr; + OpaqueValueExpr *OVECommon = nullptr; + OpaqueValueExpr *OVESuspend = nullptr; public: CoroutineSuspendExpr(StmtClass SC, SourceLocation KeywordLoc, Expr *Common, - Expr *Ready, Expr *Suspend, Expr *Resume, - OpaqueValueExpr *OpaqueValue) + Expr *Ready, Expr *AwaitSuspendCall, Expr *Suspend, + Expr *Resume, OpaqueValueExpr *OVECommon, + OpaqueValueExpr *OVESuspend) : Expr(SC, Resume->getType(), Resume->getValueKind(), Resume->getObjectKind()), - KeywordLoc(KeywordLoc), OpaqueValue(OpaqueValue) { + KeywordLoc(KeywordLoc), OVECommon(OVECommon), OVESuspend(OVESuspend) { SubExprs[SubExpr::Common] = Common; SubExprs[SubExpr::Ready] = Ready; + SubExprs[SubExpr::AwaitSuspendCall] = AwaitSuspendCall; SubExprs[SubExpr::Suspend] = Suspend; SubExprs[SubExpr::Resume] = Resume; + assert((!AwaitSuspendCall) == (!OVESuspend) && + "AwaitSuspendCall and OVESuspend must be provided together"); setDependence(computeDependence(this)); } @@ -4709,6 +4728,7 @@ "wrong constructor for non-dependent co_await/co_yield expression"); SubExprs[SubExpr::Common] = Common; SubExprs[SubExpr::Ready] = nullptr; + SubExprs[SubExpr::AwaitSuspendCall] = nullptr; SubExprs[SubExpr::Suspend] = nullptr; SubExprs[SubExpr::Resume] = nullptr; setDependence(computeDependence(this)); @@ -4717,6 +4737,7 @@ CoroutineSuspendExpr(StmtClass SC, EmptyShell Empty) : Expr(SC, Empty) { SubExprs[SubExpr::Common] = nullptr; SubExprs[SubExpr::Ready] = nullptr; + SubExprs[SubExpr::AwaitSuspendCall] = nullptr; SubExprs[SubExpr::Suspend] = nullptr; SubExprs[SubExpr::Resume] = nullptr; } @@ -4728,16 +4749,22 @@ } /// getOpaqueValue - Return the opaque value placeholder. - OpaqueValueExpr *getOpaqueValue() const { return OpaqueValue; } + OpaqueValueExpr *getOpaqueValueCommon() const { return OVECommon; } Expr *getReadyExpr() const { return static_cast(SubExprs[SubExpr::Ready]); } + Expr *getAwaitSuspendCallExpr() const { + return static_cast(SubExprs[SubExpr::AwaitSuspendCall]); + } + Expr *getSuspendExpr() const { return static_cast(SubExprs[SubExpr::Suspend]); } + OpaqueValueExpr *getOpaqueValueSuspend() const { return OVESuspend; } + Expr *getResumeExpr() const { return static_cast(SubExprs[SubExpr::Resume]); } @@ -4768,10 +4795,12 @@ public: CoawaitExpr(SourceLocation CoawaitLoc, Expr *Operand, Expr *Ready, - Expr *Suspend, Expr *Resume, OpaqueValueExpr *OpaqueValue, + Expr *AwaitSuspendCall, Expr *Suspend, Expr *Resume, + OpaqueValueExpr *OVECommon, OpaqueValueExpr *OVESuspend, bool IsImplicit = false) : CoroutineSuspendExpr(CoawaitExprClass, CoawaitLoc, Operand, Ready, - Suspend, Resume, OpaqueValue) { + AwaitSuspendCall, Suspend, Resume, OVECommon, + OVESuspend) { CoawaitBits.IsImplicit = IsImplicit; } @@ -4853,9 +4882,11 @@ public: CoyieldExpr(SourceLocation CoyieldLoc, Expr *Operand, Expr *Ready, - Expr *Suspend, Expr *Resume, OpaqueValueExpr *OpaqueValue) + Expr *AwaitSuspendCall, Expr *Suspend, Expr *Resume, + OpaqueValueExpr *OVECommon, OpaqueValueExpr *OVESuspend) : CoroutineSuspendExpr(CoyieldExprClass, CoyieldLoc, Operand, Ready, - Suspend, Resume, OpaqueValue) {} + AwaitSuspendCall, Suspend, Resume, OVECommon, + OVESuspend) {} CoyieldExpr(SourceLocation CoyieldLoc, QualType Ty, Expr *Operand) : CoroutineSuspendExpr(CoyieldExprClass, CoyieldLoc, Ty, Operand) {} CoyieldExpr(EmptyShell Empty) diff --git a/clang/lib/CodeGen/CGCoroutine.cpp b/clang/lib/CodeGen/CGCoroutine.cpp --- a/clang/lib/CodeGen/CGCoroutine.cpp +++ b/clang/lib/CodeGen/CGCoroutine.cpp @@ -178,8 +178,8 @@ bool ignoreResult, bool forLValue) { auto *E = S.getCommonExpr(); - auto Binder = - CodeGenFunction::OpaqueValueMappingData::bind(CGF, S.getOpaqueValue(), E); + auto Binder = CodeGenFunction::OpaqueValueMappingData::bind( + CGF, S.getOpaqueValueCommon(), E); auto UnbindOnExit = llvm::make_scope_exit([&] { Binder.unbind(CGF); }); auto Prefix = buildSuspendPrefixStr(Coro, Kind); @@ -198,6 +198,19 @@ auto *NullPtr = llvm::ConstantPointerNull::get(CGF.CGM.Int8PtrTy); auto *SaveCall = Builder.CreateCall(CoroSave, {NullPtr}); + CodeGenFunction::OpaqueValueMappingData SuspendOVEBinder; + auto UnbindSuspendOVEOnExit = llvm::make_scope_exit([&] { + if (SuspendOVEBinder.isValid()) + SuspendOVEBinder.unbind(CGF); + }); + + llvm::CallInst *ForcestackStart = nullptr; + if (auto *OVESuspend = S.getOpaqueValueSuspend()) { + SuspendOVEBinder = CodeGenFunction::OpaqueValueMappingData::bind( + CGF, OVESuspend, S.getAwaitSuspendCallExpr()); + ForcestackStart = Builder.CreateCall( + CGF.CGM.getIntrinsic(llvm::Intrinsic::coro_forcestack_begin)); + } auto *SuspendRet = CGF.EmitScalarExpr(S.getSuspendExpr()); if (SuspendRet != nullptr && SuspendRet->getType()->isIntegerTy(1)) { // Veto suspension if requested by bool returning await_suspend. @@ -205,6 +218,10 @@ CGF.createBasicBlock(Prefix + Twine(".suspend.bool")); CGF.Builder.CreateCondBr(SuspendRet, RealSuspendBlock, ReadyBlock); CGF.EmitBlock(RealSuspendBlock); + } else if (ForcestackStart) { + Builder.CreateCall( + CGF.CGM.getIntrinsic(llvm::Intrinsic::coro_forcestack_end), + {ForcestackStart}); } // Emit the suspend point. diff --git a/clang/lib/Sema/SemaCoroutine.cpp b/clang/lib/Sema/SemaCoroutine.cpp --- a/clang/lib/Sema/SemaCoroutine.cpp +++ b/clang/lib/Sema/SemaCoroutine.cpp @@ -339,11 +339,19 @@ } struct ReadySuspendResumeResult { - enum AwaitCallType { ACT_Ready, ACT_Suspend, ACT_Resume }; - Expr *Results[3]; - OpaqueValueExpr *OpaqueValue; + enum AwaitCallType { + ACT_Ready = 0, + ACT_AwaitSuspendCall, + ACT_Suspend, + ACT_Resume, + ACT_Count + }; + Expr *Results[ACT_Count]; + OpaqueValueExpr *OVECommon; + OpaqueValueExpr *OVESuspend; bool IsInvalid; }; +using ACT = ReadySuspendResumeResult::AwaitCallType; static ExprResult buildMemberCall(Sema &S, Expr *Base, SourceLocation Loc, StringRef Name, MultiExprArg Args) { @@ -427,9 +435,7 @@ // Assume valid until we see otherwise. // Further operations are responsible for setting IsInalid to true. - ReadySuspendResumeResult Calls = {{}, Operand, /*IsInvalid=*/false}; - - using ACT = ReadySuspendResumeResult::AwaitCallType; + ReadySuspendResumeResult Calls = {{}, Operand, nullptr, /*IsInvalid=*/false}; auto BuildSubExpr = [&](ACT CallType, StringRef Func, MultiExprArg Arg) -> Expr * { @@ -478,17 +484,21 @@ // a prvalue of type void, bool, or std::coroutine_handle for some // type Z. QualType RetType = AwaitSuspend->getCallReturnType(S.Context); + OpaqueValueExpr *OVESuspend = new (S.Context) OpaqueValueExpr( + Loc, AwaitSuspend->getType(), AwaitSuspend->getValueKind(), + AwaitSuspend->getObjectKind(), AwaitSuspend); // Experimental support for coroutine_handle returning await_suspend. - if (Expr *TailCallSuspend = - maybeTailCall(S, RetType, AwaitSuspend, Loc)) + if (Expr *TailCallSuspend = maybeTailCall(S, RetType, OVESuspend, Loc)) { // Note that we don't wrap the expression with ExprWithCleanups here // because that might interfere with tailcall contract (e.g. inserting // clean up instructions in-between tailcall and return). Instead // ExprWithCleanups is wrapped within maybeTailCall() prior to the resume // call. + Calls.OVESuspend = OVESuspend; + Calls.Results[ACT::ACT_AwaitSuspendCall] = AwaitSuspend; Calls.Results[ACT::ACT_Suspend] = TailCallSuspend; - else { + } else { // non-class prvalues always have cv-unqualified types if (RetType->isReferenceType() || (!RetType->isBooleanType() && !RetType->isVoidType())) { @@ -498,9 +508,12 @@ S.Diag(Loc, diag::note_coroutine_promise_call_implicitly_required) << AwaitSuspend->getDirectCallee(); Calls.IsInvalid = true; - } else + } else { + Calls.OVESuspend = nullptr; + Calls.Results[ACT::ACT_AwaitSuspendCall] = nullptr; Calls.Results[ACT::ACT_Suspend] = S.MaybeCreateExprWithCleanups(AwaitSuspend); + } } } @@ -911,9 +924,10 @@ if (RSS.IsInvalid) return ExprError(); - Expr *Res = - new (Context) CoawaitExpr(Loc, E, RSS.Results[0], RSS.Results[1], - RSS.Results[2], RSS.OpaqueValue, IsImplicit); + Expr *Res = new (Context) CoawaitExpr( + Loc, E, RSS.Results[ACT::ACT_Ready], + RSS.Results[ACT::ACT_AwaitSuspendCall], RSS.Results[ACT::ACT_Suspend], + RSS.Results[ACT::ACT_Resume], RSS.OVECommon, RSS.OVESuspend, IsImplicit); return Res; } @@ -965,10 +979,10 @@ *this, Coroutine->CoroutinePromise, Loc, E); if (RSS.IsInvalid) return ExprError(); - - Expr *Res = - new (Context) CoyieldExpr(Loc, E, RSS.Results[0], RSS.Results[1], - RSS.Results[2], RSS.OpaqueValue); + Expr *Res = new (Context) CoyieldExpr( + Loc, E, RSS.Results[ACT::ACT_Ready], + RSS.Results[ACT::ACT_AwaitSuspendCall], RSS.Results[ACT::ACT_Suspend], + RSS.Results[ACT::ACT_Resume], RSS.OVECommon, RSS.OVESuspend); return Res; } diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -471,7 +471,8 @@ E->KeywordLoc = readSourceLocation(); for (auto &SubExpr: E->SubExprs) SubExpr = Record.readSubStmt(); - E->OpaqueValue = cast_or_null(Record.readSubStmt()); + E->OVECommon = cast_or_null(Record.readSubStmt()); + E->OVESuspend = cast_or_null(Record.readSubStmt()); E->setIsImplicit(Record.readInt() != 0); } @@ -480,7 +481,8 @@ E->KeywordLoc = readSourceLocation(); for (auto &SubExpr: E->SubExprs) SubExpr = Record.readSubStmt(); - E->OpaqueValue = cast_or_null(Record.readSubStmt()); + E->OVECommon = cast_or_null(Record.readSubStmt()); + E->OVESuspend = cast_or_null(Record.readSubStmt()); } void ASTStmtReader::VisitDependentCoawaitExpr(DependentCoawaitExpr *E) { diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -373,7 +373,8 @@ Record.AddSourceLocation(E->getKeywordLoc()); for (Stmt *S : E->children()) Record.AddStmt(S); - Record.AddStmt(E->getOpaqueValue()); + Record.AddStmt(E->getOpaqueValueCommon()); + Record.AddStmt(E->getOpaqueValueSuspend()); } void ASTStmtWriter::VisitCoawaitExpr(CoawaitExpr *E) { diff --git a/clang/test/CodeGenCoroutines/coro-symmetric-transfer-01.cpp b/clang/test/CodeGenCoroutines/coro-symmetric-transfer-01.cpp --- a/clang/test/CodeGenCoroutines/coro-symmetric-transfer-01.cpp +++ b/clang/test/CodeGenCoroutines/coro-symmetric-transfer-01.cpp @@ -1,4 +1,6 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fcoroutines-ts -std=c++14 -O1 -emit-llvm %s -o - -disable-llvm-passes | FileCheck %s +// RUN: %clang_cc1 -fcoroutines-ts -std=c++14 -O0 -emit-llvm %s -ast-dump | FileCheck %s +// RUN: %clang_cc1 -fcoroutines-ts -std=c++14 -O0 -emit-llvm %s -o - -disable-llvm-passes | FileCheck --check-prefix=CHECK-PRESPLIT %s +// RUN: %clang_cc1 -fcoroutines-ts -std=c++14 -O0 -emit-llvm %s -o - | FileCheck --check-prefix=CHECK-POSTSPLIT %s #include "Inputs/coroutine.h" @@ -48,10 +50,48 @@ co_return; } -// check that the lifetime of the coroutine handle used to obtain the address is contained within single basic block, and hence does not live across suspension points. -// CHECK-LABEL: final.suspend: -// CHECK: %[[PTR1:.+]] = bitcast %"struct.std::experimental::coroutines_v1::coroutine_handle.0"* %[[ADDR_TMP:.+]] to i8* -// CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* %[[PTR1]]) -// CHECK: call i8* @{{.*address.*}}(%"struct.std::experimental::coroutines_v1::coroutine_handle.0"* {{[^,]*}} %[[ADDR_TMP]]) -// CHECK-NEXT: %[[PTR2:.+]] = bitcast %"struct.std::experimental::coroutines_v1::coroutine_handle.0"* %[[ADDR_TMP]] to i8* -// CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* %[[PTR2]]) +// CHECK-LABEL: |-FunctionDecl {{.*}} foo 'detached_task ()' +// first ExprWithCleanups is the initial await +// CHECK: | |-ExprWithCleanups {{.*}} 'void' +// second ExprWithCleanups is the final await +// CHECK: | |-ExprWithCleanups {{.*}} 'void' +// AST for the await_suspend call +// CHECK: | | |-CXXMemberCallExpr {{.*}} 'coro::coroutine_handle<>':'std::experimental::coroutine_handle<>' +// CHECK: | | | |-MemberExpr {{.*}} '' .await_suspend {{.*}} +// CHECK: | | | | `-OpaqueValueExpr {{.*}} 'detached_task::promise_type::final_awaiter' lvalue +// AST for the symmetric transferred suspend +// CHECK: | | |-CallExpr {{.*}} 'void' +// CHECK: | | | |-ImplicitCastExpr {{.*}} 'void (*)(void *)' +// CHECK: | | | | `-DeclRefExpr {{.*}} 'void (void *)' lvalue Function {{.*}} '__builtin_coro_resume' 'void (void *)' +// CHECK: | | | `-ExprWithCleanups {{.*}} 'void *' +// CHECK: | | | `-CXXMemberCallExpr {{.*}} 'void *' +// CHECK: | | | `-MemberExpr {{.*}} '' .address {{.*}} +// CHECK: | | | `-ImplicitCastExpr {{.*}} 'const std::experimental::coroutine_handle<>' xvalue +// CHECK: | | | `-MaterializeTemporaryExpr {{.*}} 'coro::coroutine_handle<>':'std::experimental::coroutine_handle<>' xvalue +// Below we are wrapping the await_suspend call with a OpaqueValueExpr +// CHECK: | | | `-OpaqueValueExpr {{.*}} 'coro::coroutine_handle<>':'std::experimental::coroutine_handle<>' +// CHECK: | | | `-CXXMemberCallExpr {{.*}} 'coro::coroutine_handle<>':'std::experimental::coroutine_handle<>' +// CHECK: | | | |-MemberExpr {{.*}} '' .await_suspend {{.*}} + +// CHECK-PRESPLIT-LABEL: define dso_local void @_Z3foov( +// CHECK-PRESPLIT: entry: +// CHECK-PRESPLIT: %coerce = alloca %"struct.std::experimental::coroutines_v1::coroutine_handle.0", align 8 +// CHECK-PRESPLIT: final.suspend: +// CHECK-PRESPLIT: %[[HANDLE:.*]] = call i8* @{{.*await_suspend.*}} +// CHECK-PRESPLIT-NEXT: %[[COERCE1:.*]] = getelementptr inbounds %"struct.std::experimental::coroutines_v1::coroutine_handle.0", %"struct.std::experimental::coroutines_v1::coroutine_handle.0"* %coerce, i32 0, i32 0 +// CHECK-PRESPLIT-NEXT: store i8* %[[HANDLE]], i8** %[[COERCE1]], align 8 +// CHECK-PRESPLIT-NEXT: %[[FORCESTACK:.*]] = call i8* @llvm.coro.forcestack.begin() +// CHECK-PRESPLIT-NEXT: %[[ADDR:.*]] = call i8* @{{.*address.*}}(%"struct.std::experimental::coroutines_v1::coroutine_handle.0"* nonnull dereferenceable(8) %coerce) +// CHECK-PRESPLIT-NEXT: call void @llvm.coro.resume(i8* %[[ADDR]]) +// CHECK-PRESPLIT-NEXT: call void @llvm.coro.forcestack.end(i8* %[[FORCESTACK]]) +// CHECK-PRESPLIT-NEXT: %16 = call i8 @llvm.coro.suspend(token %{{.*}}, i1 true) + +// CHECK-POSTSPLIT: %_Z3foov.Frame = type { void (%_Z3foov.Frame*)*, void (%_Z3foov.Frame*)*, %"struct.detached_task::promise_type", i1, %"struct.std::experimental::coroutines_v1::suspend_always", %"struct.detached_task::promise_type::final_awaiter" } +// CHECK-POSTSPLIT-LABEL: define internal fastcc void @_Z3foov.resume( +// CHECK-POSTSPLIT: entry: +// CHECK-POSTSPLIT: %coerce = alloca %"struct.std::experimental::coroutines_v1::coroutine_handle.0", align 8 + +// CHECK-POSTSPLIT: %[[HANDLE:.*]] = call i8* @{{.*await_suspend.*}} +// CHECK-POSTSPLIT: %[[COERCE1:.*]] = getelementptr inbounds %"struct.std::experimental::coroutines_v1::coroutine_handle.0", %"struct.std::experimental::coroutines_v1::coroutine_handle.0"* %coerce, i32 0, i32 0 +// CHECK-POSTSPLIT: store i8* %[[HANDLE]], i8** %[[COERCE1]], align 8 +// CHECK-POSTSPLIT: %[[ADDR:.*]] = call i8* @{{.*address.*}}(%"struct.std::experimental::coroutines_v1::coroutine_handle.0"* nonnull dereferenceable(8) %coerce) diff --git a/clang/test/CodeGenCoroutines/coro-symmetric-transfer-02.cpp b/clang/test/CodeGenCoroutines/coro-symmetric-transfer-02.cpp --- a/clang/test/CodeGenCoroutines/coro-symmetric-transfer-02.cpp +++ b/clang/test/CodeGenCoroutines/coro-symmetric-transfer-02.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fcoroutines-ts -std=c++14 -O1 -emit-llvm %s -o - -disable-llvm-passes | FileCheck %s +// RUN: %clang_cc1 -fcoroutines-ts -std=c++14 -O0 -emit-llvm %s -ast-dump | FileCheck %s #include "Inputs/coroutine.h" @@ -80,47 +80,37 @@ } } -// CHECK-LABEL: define{{.*}} void @_Z3barv -// CHECK: %[[MODE:.+]] = load i32, i32* %mode -// CHECK-NEXT: switch i32 %[[MODE]], label %{{.+}} [ -// CHECK-NEXT: i32 1, label %[[CASE1:.+]] -// CHECK-NEXT: i32 2, label %[[CASE2:.+]] -// CHECK-NEXT: ] - -// CHECK: [[CASE1]]: -// CHECK: br i1 %{{.+}}, label %[[CASE1_AWAIT_READY:.+]], label %[[CASE1_AWAIT_SUSPEND:.+]] -// CHECK: [[CASE1_AWAIT_SUSPEND]]: -// CHECK-NEXT: %{{.+}} = call token @llvm.coro.save(i8* null) -// CHECK-NEXT: %[[HANDLE11:.+]] = bitcast %"struct.std::experimental::coroutines_v1::coroutine_handle"* %[[TMP1:.+]] to i8* -// CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* %[[HANDLE11]]) - -// CHECK: %[[HANDLE12:.+]] = bitcast %"struct.std::experimental::coroutines_v1::coroutine_handle"* %[[TMP1]] to i8* -// CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* %[[HANDLE12]]) -// CHECK-NEXT: call void @llvm.coro.resume -// CHECK-NEXT: %{{.+}} = call i8 @llvm.coro.suspend -// CHECK-NEXT: switch i8 %{{.+}}, label %coro.ret [ -// CHECK-NEXT: i8 0, label %[[CASE1_AWAIT_READY]] -// CHECK-NEXT: i8 1, label %[[CASE1_AWAIT_CLEANUP:.+]] -// CHECK-NEXT: ] -// CHECK: [[CASE1_AWAIT_CLEANUP]]: -// make sure that the awaiter eventually gets cleaned up. -// CHECK: call void @{{.+Awaiter.+}} - -// CHECK: [[CASE2]]: -// CHECK: br i1 %{{.+}}, label %[[CASE2_AWAIT_READY:.+]], label %[[CASE2_AWAIT_SUSPEND:.+]] -// CHECK: [[CASE2_AWAIT_SUSPEND]]: -// CHECK-NEXT: %{{.+}} = call token @llvm.coro.save(i8* null) -// CHECK-NEXT: %[[HANDLE21:.+]] = bitcast %"struct.std::experimental::coroutines_v1::coroutine_handle"* %[[TMP2:.+]] to i8* -// CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* %[[HANDLE21]]) - -// CHECK: %[[HANDLE22:.+]] = bitcast %"struct.std::experimental::coroutines_v1::coroutine_handle"* %[[TMP2]] to i8* -// CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* %[[HANDLE22]]) -// CHECK-NEXT: call void @llvm.coro.resume -// CHECK-NEXT: %{{.+}} = call i8 @llvm.coro.suspend -// CHECK-NEXT: switch i8 %{{.+}}, label %coro.ret [ -// CHECK-NEXT: i8 0, label %[[CASE2_AWAIT_READY]] -// CHECK-NEXT: i8 1, label %[[CASE2_AWAIT_CLEANUP:.+]] -// CHECK-NEXT: ] -// CHECK: [[CASE2_AWAIT_CLEANUP]]: -// make sure that the awaiter eventually gets cleaned up. -// CHECK: call void @{{.+Awaiter.+}} +// CHECK-LABEL: `-FunctionDecl {{.*}} bar 'Task ()' +// CHECK: | `-SwitchStmt {{.*}} +// first case +// CHECK: | |-CaseStmt {{.*}} +// await_suspend call +// CHECK: | | |-CXXMemberCallExpr {{.*}} 'Task::handle_t':'std::experimental::coroutine_handle' +// CHECK: | | | |-MemberExpr {{.*}} '' .await_suspend {{.*}} +// symmetric transfered suspend, which wraps around await_suspend call with a OpaqueValueExpr +// CHECK: | | |-CallExpr {{.*}} 'void' +// CHECK: | | | |-ImplicitCastExpr {{.*}} 'void (*)(void *)' +// CHECK: | | | | `-DeclRefExpr {{.*}} 'void (void *)' lvalue Function {{.*}} '__builtin_coro_resume' 'void (void *)' +// CHECK: | | | `-ExprWithCleanups {{.*}} 'void *' +// CHECK: | | | `-CXXMemberCallExpr {{.*}} 'void *' +// CHECK: | | | `-MemberExpr {{.*}} '' .address {{.*}} +// CHECK: | | | `-ImplicitCastExpr {{.*}} 'const std::experimental::coroutine_handle<>' xvalue +// CHECK: | | | `-MaterializeTemporaryExpr {{.*}} 'Task::handle_t':'std::experimental::coroutine_handle' xvalue +// CHECK: | | | `-OpaqueValueExpr {{.*}} 'Task::handle_t':'std::experimental::coroutine_handle' +// CHECK: | | | `-CXXMemberCallExpr {{.*}} 'Task::handle_t':'std::experimental::coroutine_handle' +// CHECK: | | | |-MemberExpr {{.*}} '' .await_suspend {{.*}} +// second case +// CHECK: | |-CaseStmt {{.*}} +// CHECK: | | |-CXXMemberCallExpr {{.*}} 'Task::handle_t':'std::experimental::coroutine_handle' +// CHECK: | | | |-MemberExpr {{.*}} '' .await_suspend {{.*}} +// CHECK: | | |-CallExpr {{.*}} 'void' +// CHECK: | | | |-ImplicitCastExpr {{.*}} 'void (*)(void *)' +// CHECK: | | | | `-DeclRefExpr {{.*}} 'void (void *)' lvalue Function {{.*}} '__builtin_coro_resume' 'void (void *)' +// CHECK: | | | `-ExprWithCleanups {{.*}} 'void *' +// CHECK: | | | `-CXXMemberCallExpr {{.*}} 'void *' +// CHECK: | | | `-MemberExpr {{.*}} '' .address {{.*}} +// CHECK: | | | `-ImplicitCastExpr {{.*}} 'const std::experimental::coroutine_handle<>' xvalue +// CHECK: | | | `-MaterializeTemporaryExpr {{.*}} 'Task::handle_t':'std::experimental::coroutine_handle' xvalue +// CHECK: | | | `-OpaqueValueExpr {{.*}} 'Task::handle_t':'std::experimental::coroutine_handle' +// CHECK: | | | `-CXXMemberCallExpr {{.*}} 'Task::handle_t':'std::experimental::coroutine_handle' +// CHECK: | | | |-MemberExpr {{.*}} '' .await_suspend {{.*}} diff --git a/llvm/docs/Coroutines.rst b/llvm/docs/Coroutines.rst --- a/llvm/docs/Coroutines.rst +++ b/llvm/docs/Coroutines.rst @@ -1727,6 +1727,62 @@ The optimizer must replace coro.param(b', b) with `i1 true`, since `b` is used after suspend and therefore, it has to reside in the coroutine frame. +.. _coro.forcestack.begin: + +'llvm.coro.forcestack.begin' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +:: + + declare i8* @llvm.coro.forcestack.begin() + +Overview: +""""""""" + +The '``llvm.coro.forcestack.begin``' intrinsic, paird with '``llvm.coro.forcestack.end``', +are emitted by the front end to mark a region where only data from the local stack can be +accessed, i.e. no coroutine frame access is allowed. It's introduced to help the optimizer +make correct decisions on where to put certain data. + +Arguments: +"""""""""" + +None. + +Semantics: +"""""""""" + +Marks the beginning of a region where any use of alloca must remain on the stack during +CoroSplit and cannot be put on the coroutine frame. This is needed to aid the implementation +of symmetric transfer. After the call to '``await_suspend``' returns a handle, the current +coroutine frame may have already been destroyed, hence we can no longer access the frame. +However in order to perform symmetric transfer on the handle, the compiler needs to use +a few temporaries and also invoke the '``address``' function on coroutine class. The compiler +is not capable of determining that these operations never lead to escape, and hence will +end up putting them on the frame. + +.. _coro.forcestack.end: + +'llvm.coro.forcestack.end' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +:: + + declare i8* @llvm.coro.forcestack.end() + +Overview: +""""""""" + +Marker to end the region started by '``llvm.coro.forcestack.begin``'. + +Arguments: +"""""""""" + +None. + +Semantics: +"""""""""" + +Refer to the semantics of '``llvm.coro.forcestack.begin``'. + Coroutine Transformation Passes =============================== CoroEarly diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1252,6 +1252,9 @@ [IntrNoMem, ReadNone>, ReadNone>]>; +def int_coro_forcestack_begin : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; +def int_coro_forcestack_end : Intrinsic<[], [llvm_ptr_ty], [IntrNoMem]>; + // Coroutine Manipulation Intrinsics. def int_coro_resume : Intrinsic<[], [llvm_ptr_ty], [Throws]>; @@ -1301,8 +1304,8 @@ def int_sideeffect : DefaultAttrsIntrinsic<[], [], [IntrInaccessibleMemOnly, IntrWillReturn]>; // The pseudoprobe intrinsic works as a place holder to the block it probes. -// Like the sideeffect intrinsic defined above, this intrinsic is treated by the -// optimizer as having opaque side effects so that it won't be get rid of or moved +// Like the sideeffect intrinsic defined above, this intrinsic is treated by the +// optimizer as having opaque side effects so that it won't be get rid of or moved // out of the block it probes. def int_pseudoprobe : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [IntrInaccessibleMemOnly, IntrWillReturn]>; diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -824,6 +824,9 @@ return FrameTy; } +using ForceStackList = + SmallVector, 4>; + // We use a pointer use visitor to track how an alloca is being used. // The goal is to be able to answer the following three questions: // 1. Should this alloca be allocated on the frame instead. @@ -853,10 +856,18 @@ struct AllocaUseVisitor : PtrUseVisitor { using Base = PtrUseVisitor; AllocaUseVisitor(const DataLayout &DL, const DominatorTree &DT, - const CoroBeginInst &CB, const SuspendCrossingInfo &Checker) - : PtrUseVisitor(DL), DT(DT), CoroBegin(CB), Checker(Checker) {} + const CoroBeginInst &CB, const SuspendCrossingInfo &Checker, + const ForceStackList &ForceStacks) + : PtrUseVisitor(DL), DT(DT), CoroBegin(CB), Checker(Checker), + ForceStacks(ForceStacks) {} void visit(Instruction &I) { + for (const auto &P : ForceStacks) + if (DT.dominates(P.first, &I) && DT.dominates(&I, P.second)) { + ShouldLiveOnFrame = false; + PI.setAborted(&I); + return; + } Users.insert(&I); Base::visit(I); // If the pointer is escaped prior to CoroBegin, we have to assume it would @@ -884,60 +895,7 @@ // pointer operand, we need to assume the alloca is been written. handleMayWrite(SI); - if (SI.getValueOperand() != U->get()) - return; - - // We are storing the pointer into a memory location, potentially escaping. - // As an optimization, we try to detect simple cases where it doesn't - // actually escape, for example: - // %ptr = alloca .. - // %addr = alloca .. - // store %ptr, %addr - // %x = load %addr - // .. - // If %addr is only used by loading from it, we could simply treat %x as - // another alias of %ptr, and not considering %ptr being escaped. - auto IsSimpleStoreThenLoad = [&]() { - auto *AI = dyn_cast(SI.getPointerOperand()); - // If the memory location we are storing to is not an alloca, it - // could be an alias of some other memory locations, which is difficult - // to analyze. - if (!AI) - return false; - // StoreAliases contains aliases of the memory location stored into. - SmallVector StoreAliases = {AI}; - while (!StoreAliases.empty()) { - Instruction *I = StoreAliases.pop_back_val(); - for (User *U : I->users()) { - // If we are loading from the memory location, we are creating an - // alias of the original pointer. - if (auto *LI = dyn_cast(U)) { - enqueueUsers(*LI); - handleAlias(*LI); - continue; - } - // If we are overriding the memory location, the pointer certainly - // won't escape. - if (auto *S = dyn_cast(U)) - if (S->getPointerOperand() == I) - continue; - if (auto *II = dyn_cast(U)) - if (II->isLifetimeStartOrEnd()) - continue; - // BitCastInst creats aliases of the memory location being stored - // into. - if (auto *BI = dyn_cast(U)) { - StoreAliases.push_back(BI); - continue; - } - return false; - } - } - - return true; - }; - - if (!IsSimpleStoreThenLoad()) + if (SI.getValueOperand() == U->get()) PI.setEscaped(&SI); } @@ -995,6 +953,7 @@ const DominatorTree &DT; const CoroBeginInst &CoroBegin; const SuspendCrossingInfo &Checker; + const ForceStackList &ForceStacks; // All alias to the original AllocaInst, created before CoroBegin and used // after CoroBegin. Each entry contains the instruction and the offset in the // original Alloca. They need to be recreated after CoroBegin off the frame. @@ -2119,9 +2078,27 @@ } } +static ForceStackList collectForceStacks(Function &F) { + ForceStackList ForceStacks; + for (auto &I : instructions(F)) + if (auto *II = dyn_cast(&I)) + if (II->getIntrinsicID() == llvm::Intrinsic::coro_forcestack_begin) { + assert(II->getNumUses() == 1 && + "Each coro_forcestack_begin intrinsic must be used by one " + "coro_forcestack_end intrinsic"); + auto *End = cast(II->user_back()); + assert(End->getIntrinsicID() == llvm::Intrinsic::coro_forcestack_end && + "Each coro_forcestack_begin intrinsic must be used by one " + "coro_forcestack_end intrinsic"); + ForceStacks.emplace_back(II, End); + } + return ForceStacks; +} + static void collectFrameAllocas(Function &F, coro::Shape &Shape, const SuspendCrossingInfo &Checker, - SmallVectorImpl &Allocas) { + SmallVectorImpl &Allocas, + const ForceStackList &ForceStacks) { for (Instruction &I : instructions(F)) { auto *AI = dyn_cast(&I); if (!AI) @@ -2133,7 +2110,7 @@ } DominatorTree DT(F); AllocaUseVisitor Visitor{F.getParent()->getDataLayout(), DT, - *Shape.CoroBegin, Checker}; + *Shape.CoroBegin, Checker, ForceStacks}; Visitor.visitPtr(*AI); if (!Visitor.getShouldLiveOnFrame()) continue; @@ -2288,8 +2265,14 @@ } sinkLifetimeStartMarkers(F, Shape, Checker); - if (Shape.ABI != coro::ABI::Async || !Shape.CoroSuspends.empty()) - collectFrameAllocas(F, Shape, Checker, FrameData.Allocas); + auto ForceStacks = collectForceStacks(F); + if (Shape.ABI != coro::ABI::Async || !Shape.CoroSuspends.empty()) { + collectFrameAllocas(F, Shape, Checker, FrameData.Allocas, ForceStacks); + for (const auto &P : ForceStacks) { + DeadInstructions.push_back(P.second); + DeadInstructions.push_back(P.first); + } + } LLVM_DEBUG(dumpAllocas(FrameData.Allocas)); // Collect the spills for arguments and other not-materializable values. diff --git a/llvm/test/Transforms/Coroutines/coro-alloca-06.ll b/llvm/test/Transforms/Coroutines/coro-alloca-06.ll --- a/llvm/test/Transforms/Coroutines/coro-alloca-06.ll +++ b/llvm/test/Transforms/Coroutines/coro-alloca-06.ll @@ -1,5 +1,5 @@ -; Test that in some simple cases allocas will not live on the frame even -; though their pointers are stored. +; Test that even though some stores may seem to escape pointers, +; they can be put on the stack as long as they are within forcestack range. ; RUN: opt < %s -coro-split -S | FileCheck %s ; RUN: opt < %s -passes=coro-split -S | FileCheck %s @@ -19,14 +19,12 @@ %2 = call i8* @await_suspend() %3 = getelementptr inbounds %"handle", %"handle"* %0, i32 0, i32 0 store i8* %2, i8** %3, align 8 - %4 = bitcast %"handle"** %1 to i8* - call void @llvm.lifetime.start.p0i8(i64 8, i8* %4) + %4 = call i8* @llvm.coro.forcestack.begin() store %"handle"* %0, %"handle"** %1, align 8 %5 = load %"handle"*, %"handle"** %1, align 8 %6 = getelementptr inbounds %"handle", %"handle"* %5, i32 0, i32 0 %7 = load i8*, i8** %6, align 8 - %8 = bitcast %"handle"** %1 to i8* - call void @llvm.lifetime.end.p0i8(i64 8, i8* %8) + call void @llvm.coro.forcestack.end(i8* %4) br label %finish finish: @@ -55,10 +53,7 @@ ; CHECK: [[TMP2:%.*]] = call i8* @await_suspend() ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[HANDLE]], %handle* [[TMP0]], i32 0, i32 0 ; CHECK-NEXT: store i8* [[TMP2]], i8** [[TMP3]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast %handle** [[TMP1]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP4]]) ; CHECK-NEXT: store %handle* [[TMP0]], %handle** [[TMP1]], align 8 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP4]]) ; declare i8* @llvm.coro.free(token, i8*) @@ -71,6 +66,8 @@ declare i1 @llvm.coro.alloc(token) declare i8* @llvm.coro.begin(token, i8*) declare i1 @llvm.coro.end(i8*, i1) +declare i8* @llvm.coro.forcestack.begin() +declare void @llvm.coro.forcestack.end(i8*) declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)