diff --git a/llvm/docs/Coroutines.rst b/llvm/docs/Coroutines.rst --- a/llvm/docs/Coroutines.rst +++ b/llvm/docs/Coroutines.rst @@ -174,6 +174,61 @@ used for low-level lowering and inlining is expected to be applied earlier in the pipeline. +Async Lowering +-------------- + +In async-continuation lowering, signaled by the use of `llvm.coro.id.async`, +handling of control-flow must be handled explicitly by the frontend. + +In this lowering, a coroutine is assumed to take the current `async context` as +its first argument. It is used to marshal arguments and return values of the +coroutine. Therefore a async coroutine returns `void`. + +.. code-block:: llvm + define swiftcc void @async_coroutine(i8* %async.ctxt, i8*, i8*) { + } + + +Every suspend point takes an `async context` argument which provides the context +and the coroutine frame of the callee function. Every +suspend point has an associated `resume function` denoted by the +`llvm.coro.async.resume` intrinsic. The coroutine is resumed by +calling this `resume function` passing the `async context` as the first +argument. It is assumed that the `resume function` can restore its (the +caller's) `async context` by loading the first field in the `async context`. + +.. code-block:: c + + struct async_context { + struct async_context *caller_context; + ... + } + +The frontend should provide a `async function pointer` struct associated with +each async coroutine by `llvm.coro.id.async`'s argument. The initial size and +alignment of the `async context` must be provided as arguments to the +`llvm.coro.id.async` intrinsic. Lowering will update the size entry with the +coroutine frame requirements. The frontend is responsible for allocating the +memory for the `async context` but can use the `async function pointer` struct +to obtain the required size. + +.. code-block:: c + struct async_function_pointer { + uint32_t context_size; + uint32_t relative_function_pointer_to_async_impl; + } + +Lowering will split an async coroutine into a ramp function and one resume +function per suspend point. + +How control-flow is passed between caller, suspension point, and back to +resume function is left up to the frontend. + +The suspend point takes a function and its arguments. The function is intended +to model the transfer to the callee function. It will be tail called by +lowering and therefore must have the same signature and calling convention as +the async coroutine. + Coroutines by Example ===================== @@ -1093,6 +1148,45 @@ A frontend should emit exactly one `coro.id` intrinsic per coroutine. +.. _coro.id.async: + +'llvm.coro.id.async' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +:: + + declare token @llvm.coro.id.async(i32 , i32 , + i8* , + i8* ) + +Overview: +""""""""" + +The '``llvm.coro.id.async``' intrinsic returns a token identifying an async coroutine. + +Arguments: +"""""""""" + +The first argument provides the initial size of the `async context` as required +from the frontend. Lowering will add to this size the size required by the frame +storage and store that value to the `async function pointer`. + +The second argument, is the alignment guarantee of the memory of the +`async context`. The frontend guarantees that the memory will be aligned by this +value. + +The third argument is the `async context` argument in the current coroutine. + +The fourth argument is the address of the `async function pointer` struct. +Lowering will update the context size requirement in this struct by adding the +coroutine frame size requirement to the initial size requirement as specified by +the first argument of this intrinisc. + + +Semantics: +"""""""""" + +A frontend should emit exactly one `coro.id.async` intrinsic per coroutine. + .. _coro.id.retcon: 'llvm.coro.id.retcon' Intrinsic @@ -1380,6 +1474,46 @@ switch i8 %suspend1, label %suspend [i8 0, label %resume1 i8 1, label %cleanup] +.. _coro.suspend.async: + +'llvm.coro.suspend.async' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +:: + + declare {i8*, i8*, i8*} @llvm.coro.suspend.async(i8* , + i8* , + ... + ... ) + +Overview: +""""""""" + +The '``llvm.coro.suspend.async``' intrinsic marks the point where +execution of a async coroutine is suspended and control is passed to a callee. + +Arguments: +"""""""""" + +The first argument should be the result of the `llvm.coro.async.resume` intrinsic. +Lowering will replace this intrinsic with the resume function for this suspend +point. + +The second argument is the `async context` allocation for the callee. It should +provide storage the `async context` header and the coroutine frame. + +The third argument is the function that models tranfer to the callee at the +suspend point. It should take 3 arguments. Lowering will `musttail` call this +function. + +The fourth to six argument are the arguments for the third argument. + +Semantics: +"""""""""" + +The result of the intrinsic are mapped to the arguments of the resume function. +Execution is suspended at this intrinsic and resumed when the resume function is +called. + .. _coro.suspend.retcon: 'llvm.coro.suspend.retcon' Intrinsic diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1187,6 +1187,21 @@ llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty], []>; def int_coro_alloc : Intrinsic<[llvm_i1_ty], [llvm_token_ty], []>; +def int_coro_id_async : Intrinsic<[llvm_token_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty, llvm_ptr_ty], + []>; +def int_coro_async_context_alloc : Intrinsic<[llvm_ptr_ty], + [llvm_ptr_ty, llvm_ptr_ty], + []>; +def int_coro_async_context_dealloc : Intrinsic<[], + [llvm_ptr_ty], + []>; +def int_coro_async_resume : Intrinsic<[llvm_ptr_ty], + [], + []>; +def int_coro_suspend_async : Intrinsic<[llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty], + [llvm_ptr_ty, llvm_ptr_ty, llvm_vararg_ty], + []>; def int_coro_begin : Intrinsic<[llvm_ptr_ty], [llvm_token_ty, llvm_ptr_ty], [WriteOnly>]>; diff --git a/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp b/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp --- a/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp @@ -74,6 +74,7 @@ case Intrinsic::coro_id: case Intrinsic::coro_id_retcon: case Intrinsic::coro_id_retcon_once: + case Intrinsic::coro_id_async: II->replaceAllUsesWith(ConstantTokenNone::get(Context)); break; case Intrinsic::coro_subfn_addr: diff --git a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp --- a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp @@ -187,6 +187,7 @@ break; case Intrinsic::coro_id_retcon: case Intrinsic::coro_id_retcon_once: + case Intrinsic::coro_id_async: F.addFnAttr(CORO_PRESPLIT_ATTR, PREPARED_FOR_SPLIT); break; case Intrinsic::coro_resume: diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -135,10 +135,10 @@ BasicBlock *UseBB = I->getParent(); - // As a special case, treat uses by an llvm.coro.suspend.retcon - // as if they were uses in the suspend's single predecessor: the - // uses conceptually occur before the suspend. - if (isa(I)) { + // As a special case, treat uses by an llvm.coro.suspend.retcon or an + // llvm.coro.suspend.async as if they were uses in the suspend's single + // predecessor: the uses conceptually occur before the suspend. + if (isa(I) || isa(I)) { UseBB = UseBB->getSinglePredecessor(); assert(UseBB && "should have split coro.suspend into its own block"); } @@ -788,6 +788,18 @@ B.getStructAlign() <= Id->getStorageAlignment()); break; } + case coro::ABI::Async: { + Shape.AsyncLowering.FrameOffset = + alignTo(Shape.AsyncLowering.ContextHeaderSize, Shape.FrameAlign); + Shape.AsyncLowering.ContextSize = + Shape.AsyncLowering.FrameOffset + Shape.FrameSize; + if (Shape.AsyncLowering.getContextAlignment() < Shape.FrameAlign) { + report_fatal_error( + "The alignment requirment of frame variables cannot be higher than " + "the alignment of the async function context"); + } + break; + } } return FrameTy; @@ -1143,7 +1155,8 @@ Shape.AllocaSpillBlock = SpillBlock; // retcon and retcon.once lowering assumes all uses have been sunk. - if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce) { + if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce || + Shape.ABI == coro::ABI::Async) { // If we found any allocas, replace all of their remaining uses with Geps. Builder.SetInsertPoint(&SpillBlock->front()); for (const auto &P : FrameData.Allocas) { @@ -1866,7 +1879,8 @@ for (User *U : Def->users()) { auto Inst = cast(U); if (Inst->getParent() != CoroBegin->getParent() || - Dom.dominates(CoroBegin, Inst)) + Dom.dominates(CoroBegin, Inst) || + isa(Inst) /*'fake' use of async context argument*/) continue; if (ToMove.insert(Inst)) Worklist.push_back(Inst); @@ -2162,7 +2176,8 @@ } } LLVM_DEBUG(dumpSpills("Spills", FrameData.Spills)); - if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce) + if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce || + Shape.ABI == coro::ABI::Async) sinkSpillUsesAfterCoroBegin(F, FrameData, Shape.CoroBegin); Shape.FrameTy = buildFrameType(F, Shape, FrameData); // Add PromiseAlloca to Allocas list so that it is processed in insertSpills. diff --git a/llvm/lib/Transforms/Coroutines/CoroInstr.h b/llvm/lib/Transforms/Coroutines/CoroInstr.h --- a/llvm/lib/Transforms/Coroutines/CoroInstr.h +++ b/llvm/lib/Transforms/Coroutines/CoroInstr.h @@ -99,9 +99,9 @@ // Methods to support type inquiry through isa, cast, and dyn_cast: static bool classof(const IntrinsicInst *I) { auto ID = I->getIntrinsicID(); - return ID == Intrinsic::coro_id || - ID == Intrinsic::coro_id_retcon || - ID == Intrinsic::coro_id_retcon_once; + return ID == Intrinsic::coro_id || ID == Intrinsic::coro_id_retcon || + ID == Intrinsic::coro_id_retcon_once || + ID == Intrinsic::coro_id_async; } static bool classof(const Value *V) { @@ -273,6 +273,102 @@ } }; +/// This represents the llvm.coro.id.async instruction. +class LLVM_LIBRARY_VISIBILITY CoroIdAsyncInst : public AnyCoroIdInst { + enum { SizeArg, AlignArg, StorageArg, AsyncFuncPtrArg }; + +public: + void checkWellFormed() const; + + /// The initial async function context size. The fields of which are reserved + /// for use by the frontend. The frame will be allocated as a tail of this + /// context. + uint64_t getStorageSize() const { + return cast(getArgOperand(SizeArg))->getZExtValue(); + } + + /// The alignment of the initial async function context. + Align getStorageAlignment() const { + return cast(getArgOperand(AlignArg))->getAlignValue(); + } + + /// The async context parameter. + Value *getStorage() const { return getArgOperand(StorageArg); } + + /// Return the async function pointer address. This should be the address of + /// a async function pointer struct for the current async function. + /// struct async_function_pointer { + /// uint32_t context_size; + /// uint32_t relative_async_function_pointer; + /// }; + GlobalVariable *getAsyncFunctionPointer() const { + return cast( + getArgOperand(AsyncFuncPtrArg)->stripPointerCasts()); + } + + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + auto ID = I->getIntrinsicID(); + return ID == Intrinsic::coro_id_async; + } + + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + +/// This represents the llvm.coro.context.alloc instruction. +class LLVM_LIBRARY_VISIBILITY CoroAsyncContextAllocInst : public IntrinsicInst { + enum { AsyncFuncPtrArg }; + +public: + GlobalVariable *getAsyncFunctionPointer() const { + return cast( + getArgOperand(AsyncFuncPtrArg)->stripPointerCasts()); + } + + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_async_context_alloc; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + +/// This represents the llvm.coro.context.dealloc instruction. +class LLVM_LIBRARY_VISIBILITY CoroAsyncContextDeallocInst + : public IntrinsicInst { + enum { AsyncContextArg }; + +public: + Value *getAsyncContext() const { + return getArgOperand(AsyncContextArg)->stripPointerCasts(); + } + + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_async_context_dealloc; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + +/// This represents the llvm.coro.async.resume instruction. +/// During lowering this is replaced by the resume function of a suspend point +/// (the continuation function). +class LLVM_LIBRARY_VISIBILITY CoroAsyncResumeInst : public IntrinsicInst { +public: + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_async_resume; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + /// This represents the llvm.coro.frame instruction. class LLVM_LIBRARY_VISIBILITY CoroFrameInst : public IntrinsicInst { public: @@ -366,6 +462,7 @@ // Methods to support type inquiry through isa, cast, and dyn_cast: static bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::coro_suspend || + I->getIntrinsicID() == Intrinsic::coro_suspend_async || I->getIntrinsicID() == Intrinsic::coro_suspend_retcon; } static bool classof(const Value *V) { @@ -405,6 +502,34 @@ return nullptr; } +/// This represents the llvm.coro.suspend.async instruction. +class LLVM_LIBRARY_VISIBILITY CoroSuspendAsyncInst : public AnyCoroSuspendInst { + enum { ResumeFunctionArg, AsyncContextArg, MustTailCallFuncArg }; + +public: + Value *getAsyncContext() const { + return getArgOperand(AsyncContextArg)->stripPointerCasts(); + } + + CoroAsyncResumeInst *getResumeFunction() const { + return cast( + getArgOperand(ResumeFunctionArg)->stripPointerCasts()); + } + + Function *getMustTailCallFunction() const { + return cast( + getArgOperand(MustTailCallFuncArg)->stripPointerCasts()); + } + + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_suspend_async; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + /// This represents the llvm.coro.suspend.retcon instruction. class LLVM_LIBRARY_VISIBILITY CoroSuspendRetconInst : public AnyCoroSuspendInst { public: diff --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h b/llvm/lib/Transforms/Coroutines/CoroInternal.h --- a/llvm/lib/Transforms/Coroutines/CoroInternal.h +++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h @@ -81,6 +81,11 @@ /// suspend at most once during its execution, and the return value of /// the continuation is void. RetconOnce, + + /// The "async continuation" lowering, where each suspend point creates a + /// single continuation function. The continuation function is available as an + /// intrinsic. + Async, }; // Holds structural Coroutine Intrinsics for a particular function and other @@ -133,9 +138,22 @@ bool IsFrameInlineInStorage; }; + struct AsyncLoweringStorage { + FunctionType *AsyncFuncTy; + Value *Context; + uint64_t ContextHeaderSize; + uint64_t ContextAlignment; + uint64_t FrameOffset; // Start of the frame. + uint64_t ContextSize; // Includes frame size. + GlobalVariable *AsyncFuncPointer; + + Align getContextAlignment() const { return Align(ContextAlignment); } + }; + union { SwitchLoweringStorage SwitchLowering; RetconLoweringStorage RetconLowering; + AsyncLoweringStorage AsyncLowering; }; CoroIdInst *getSwitchCoroId() const { @@ -149,6 +167,11 @@ return cast(CoroBegin->getId()); } + CoroIdAsyncInst *getAsyncCoroId() const { + assert(ABI == coro::ABI::Async); + return cast(CoroBegin->getId()); + } + unsigned getSwitchIndexField() const { assert(ABI == coro::ABI::Switch); assert(FrameTy && "frame type not assigned"); @@ -178,7 +201,10 @@ case coro::ABI::Retcon: case coro::ABI::RetconOnce: return RetconLowering.ResumePrototype->getFunctionType(); + case coro::ABI::Async: + return AsyncLowering.AsyncFuncTy; } + llvm_unreachable("Unknown coro::ABI enum"); } @@ -212,6 +238,8 @@ case coro::ABI::Retcon: case coro::ABI::RetconOnce: return RetconLowering.ResumePrototype->getCallingConv(); + case coro::ABI::Async: + return CallingConv::Swift; } llvm_unreachable("Unknown coro::ABI enum"); } diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -90,7 +90,11 @@ /// An individual continuation function. Continuation, + + /// An async resume function. + Async, }; + private: Function &OrigF; Function *NewF; @@ -102,8 +106,9 @@ Value *NewFramePtr = nullptr; Value *SwiftErrorSlot = nullptr; - /// The active suspend instruction; meaningful only for continuation ABIs. - AnyCoroSuspendInst *ActiveSuspend = nullptr; + /// The active suspend instruction; meaningful only for continuation and async + /// ABIs. + AnyCoroSuspendInst *ActiveSuspend; public: /// Create a cloner for a switch lowering. @@ -117,11 +122,11 @@ /// Create a cloner for a continuation lowering. CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape, Function *NewF, AnyCoroSuspendInst *ActiveSuspend) - : OrigF(OrigF), NewF(NewF), Suffix(Suffix), Shape(Shape), - FKind(Kind::Continuation), Builder(OrigF.getContext()), - ActiveSuspend(ActiveSuspend) { + : OrigF(OrigF), NewF(NewF), Suffix(Suffix), Shape(Shape), + FKind(Shape.ABI == coro::ABI::Async ? Kind::Async : Kind::Continuation), + Builder(OrigF.getContext()), ActiveSuspend(ActiveSuspend) { assert(Shape.ABI == coro::ABI::Retcon || - Shape.ABI == coro::ABI::RetconOnce); + Shape.ABI == coro::ABI::RetconOnce || Shape.ABI == coro::ABI::Async); assert(NewF && "need existing function for continuation"); assert(ActiveSuspend && "need active suspend point for continuation"); } @@ -136,6 +141,7 @@ private: bool isSwitchDestroyFunction() { switch (FKind) { + case Kind::Async: case Kind::Continuation: case Kind::SwitchResume: return false; @@ -149,7 +155,7 @@ void createDeclaration(); void replaceEntryBlock(); Value *deriveNewFramePointer(); - void replaceRetconSuspendUses(); + void replaceRetconOrAsyncSuspendUses(); void replaceCoroSuspends(); void replaceCoroEnds(); void replaceSwiftErrorOps(); @@ -188,6 +194,11 @@ Builder.CreateRetVoid(); break; + // In async lowering this returns. + case coro::ABI::Async: + Builder.CreateRetVoid(); + break; + // In unique continuation lowering, the continuations always return void. // But we may have implicitly allocated storage. case coro::ABI::RetconOnce: @@ -231,7 +242,9 @@ if (!InResume) return; break; - + // In async lowering this does nothing. + case coro::ABI::Async: + break; // In continuation-lowering, this frees the continuation storage. case coro::ABI::Retcon: case coro::ABI::RetconOnce: @@ -403,20 +416,24 @@ Function::Create(FnTy, GlobalValue::LinkageTypes::InternalLinkage, OrigF.getName() + Suffix); NewF->addParamAttr(0, Attribute::NonNull); - NewF->addParamAttr(0, Attribute::NoAlias); + + // For the async lowering ABI we can't guarantee that the context argument is + // not access via a different pointer not based on the argument. + if (Shape.ABI != coro::ABI::Async) + NewF->addParamAttr(0, Attribute::NoAlias); M->getFunctionList().insert(InsertBefore, NewF); return NewF; } -/// Replace uses of the active llvm.coro.suspend.retcon call with the +/// Replace uses of the active llvm.coro.suspend.retcon/async call with the /// arguments to the continuation function. /// /// This assumes that the builder has a meaningful insertion point. -void CoroCloner::replaceRetconSuspendUses() { - assert(Shape.ABI == coro::ABI::Retcon || - Shape.ABI == coro::ABI::RetconOnce); +void CoroCloner::replaceRetconOrAsyncSuspendUses() { + assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce || + Shape.ABI == coro::ABI::Async); auto NewS = VMap[ActiveSuspend]; if (NewS->use_empty()) return; @@ -424,7 +441,11 @@ // Copy out all the continuation arguments after the buffer pointer into // an easily-indexed data structure for convenience. SmallVector Args; - for (auto I = std::next(NewF->arg_begin()), E = NewF->arg_end(); I != E; ++I) + // The async ABI includes all arguments -- including the first argument. + bool IsAsyncABI = Shape.ABI == coro::ABI::Async; + for (auto I = IsAsyncABI ? NewF->arg_begin() : std::next(NewF->arg_begin()), + E = NewF->arg_end(); + I != E; ++I) Args.push_back(&*I); // If the suspend returns a single scalar value, we can just do a simple @@ -470,6 +491,10 @@ SuspendResult = Builder.getInt8(isSwitchDestroyFunction() ? 1 : 0); break; + // In async lowering there are no uses of the result. + case coro::ABI::Async: + return; + // In returned-continuation lowering, the arguments from earlier // continuations are theoretically arbitrary, and they should have been // spilled. @@ -601,13 +626,18 @@ Builder.CreateBr(SwitchBB); break; } - + case coro::ABI::Async: case coro::ABI::Retcon: case coro::ABI::RetconOnce: { // In continuation ABIs, we want to branch to immediately after the // active suspend point. Earlier phases will have put the suspend in its // own basic block, so just thread our jump directly to its successor. - auto MappedCS = cast(VMap[ActiveSuspend]); + assert((Shape.ABI == coro::ABI::Async && + isa(ActiveSuspend)) || + ((Shape.ABI == coro::ABI::Retcon || + Shape.ABI == coro::ABI::RetconOnce) && + isa(ActiveSuspend))); + auto *MappedCS = cast(VMap[ActiveSuspend]); auto Branch = cast(MappedCS->getNextNode()); assert(Branch->isUnconditional()); Builder.CreateBr(Branch->getSuccessor(0)); @@ -624,7 +654,25 @@ // In switch-lowering, the argument is the frame pointer. case coro::ABI::Switch: return &*NewF->arg_begin(); - + case coro::ABI::Async: { + auto *CalleeContext = &*NewF->arg_begin(); + auto *FramePtrTy = Shape.FrameTy->getPointerTo(); + // The caller context is assumed to be stored at the begining of the callee + // context. + // struct async_context { + // struct async_context *caller; + // ... + auto &Context = Builder.getContext(); + auto *Int8PtrPtrTy = Type::getInt8PtrTy(Context)->getPointerTo(); + auto *CallerContextAddr = + Builder.CreateBitOrPointerCast(CalleeContext, Int8PtrPtrTy); + auto *CallerContext = Builder.CreateLoad(CallerContextAddr); + // The frame is located after the async_context header. + auto *FramePtrAddr = Builder.CreateConstInBoundsGEP1_32( + Type::getInt8Ty(Context), CallerContext, + Shape.AsyncLowering.FrameOffset, "async.ctx.frameptr"); + return Builder.CreateBitCast(FramePtrAddr, FramePtrTy); + } // In continuation-lowering, the argument is the opaque storage. case coro::ABI::Retcon: case coro::ABI::RetconOnce: { @@ -707,7 +755,8 @@ addFramePointerAttrs(NewAttrs, Context, 0, Shape.FrameSize, Shape.FrameAlign); break; - + case coro::ABI::Async: + break; case coro::ABI::Retcon: case coro::ABI::RetconOnce: // If we have a continuation prototype, just use its attributes, @@ -737,6 +786,12 @@ // so we want to leave any returns in place. case coro::ABI::Retcon: break; + // Async lowering will insert musttail call functions at all suspend points + // followed by a return. + // Don't change returns to unreachable because that will trip up the verifier. + // These returns should be unreachable from the clone. + case coro::ABI::Async: + break; } NewF->setAttributes(NewAttrs); @@ -767,14 +822,14 @@ if (Shape.SwitchLowering.HasFinalSuspend) handleFinalSuspend(); break; - + case coro::ABI::Async: case coro::ABI::Retcon: case coro::ABI::RetconOnce: // Replace uses of the active suspend with the corresponding // continuation-function arguments. assert(ActiveSuspend != nullptr && "no active suspend when lowering a continuation-style coroutine"); - replaceRetconSuspendUses(); + replaceRetconOrAsyncSuspendUses(); break; } @@ -811,7 +866,25 @@ } } +static void updateAsyncFuncPointerContextSize(coro::Shape &Shape) { + assert(Shape.ABI == coro::ABI::Async); + + auto *FuncPtrStruct = cast( + Shape.AsyncLowering.AsyncFuncPointer->getInitializer()); + auto *OrigContextSize = FuncPtrStruct->getOperand(0); + auto *OrigRelativeFunOffset = FuncPtrStruct->getOperand(1); + auto *NewContextSize = ConstantInt::get(OrigContextSize->getType(), + Shape.AsyncLowering.ContextSize); + auto *NewFuncPtrStruct = ConstantStruct::get( + FuncPtrStruct->getType(), NewContextSize, OrigRelativeFunOffset); + + Shape.AsyncLowering.AsyncFuncPointer->setInitializer(NewFuncPtrStruct); +} + static void replaceFrameSize(coro::Shape &Shape) { + if (Shape.ABI == coro::ABI::Async) + updateAsyncFuncPointerContextSize(Shape); + if (Shape.CoroSizes.empty()) return; @@ -1075,7 +1148,7 @@ } break; } - + case coro::ABI::Async: case coro::ABI::Retcon: case coro::ABI::RetconOnce: CoroBegin->replaceAllUsesWith(UndefValue::get(CoroBegin->getType())); @@ -1271,6 +1344,98 @@ setCoroInfo(F, Shape, Clones); } +static void replaceAsyncResumeFunction(CoroSuspendAsyncInst *Suspend, + Value *Continuation) { + auto *ResumeIntrinsic = Suspend->getResumeFunction(); + auto &Context = Suspend->getParent()->getParent()->getContext(); + auto *Int8PtrTy = Type::getInt8PtrTy(Context); + + IRBuilder<> Builder(ResumeIntrinsic); + auto *Val = Builder.CreateBitOrPointerCast(Continuation, Int8PtrTy); + ResumeIntrinsic->replaceAllUsesWith(Val); + ResumeIntrinsic->eraseFromParent(); + Suspend->setOperand(0, UndefValue::get(Int8PtrTy)); +} + +static void splitAsyncCoroutine(Function &F, coro::Shape &Shape, + SmallVectorImpl &Clones) { + assert(Shape.ABI == coro::ABI::Async); + assert(Clones.empty()); + // Reset various things that the optimizer might have decided it + // "knows" about the coroutine function due to not seeing a return. + F.removeFnAttr(Attribute::NoReturn); + F.removeAttribute(AttributeList::ReturnIndex, Attribute::NoAlias); + F.removeAttribute(AttributeList::ReturnIndex, Attribute::NonNull); + + auto &Context = F.getContext(); + auto *Int8PtrTy = Type::getInt8PtrTy(Context); + + auto *Id = cast(Shape.CoroBegin->getId()); + IRBuilder<> Builder(Id); + + auto *FramePtr = Id->getStorage(); + FramePtr = Builder.CreateBitOrPointerCast(FramePtr, Int8PtrTy); + FramePtr = Builder.CreateConstInBoundsGEP1_32( + Type::getInt8Ty(Context), FramePtr, Shape.AsyncLowering.FrameOffset, + "async.ctx.frameptr"); + + // Map all uses of llvm.coro.begin to the allocated frame pointer. + { + // Make sure we don't invalidate Shape.FramePtr. + TrackingVH Handle(Shape.FramePtr); + Shape.CoroBegin->replaceAllUsesWith(FramePtr); + Shape.FramePtr = Handle.getValPtr(); + } + + // Create all the functions in order after the main function. + auto NextF = std::next(F.getIterator()); + + // Create a continuation function for each of the suspend points. + Clones.reserve(Shape.CoroSuspends.size()); + for (size_t idx = 0, end = Shape.CoroSuspends.size(); idx != end; ++idx) { + auto *Suspend = cast(Shape.CoroSuspends[idx]); + + // Create the clone declaration. + auto *Continuation = + createCloneDeclaration(F, Shape, ".resume." + Twine(idx), NextF); + Clones.push_back(Continuation); + + // Insert a branch to a new return block immediately before the suspend + // point. + auto *SuspendBB = Suspend->getParent(); + auto *NewSuspendBB = SuspendBB->splitBasicBlock(Suspend); + auto *Branch = cast(SuspendBB->getTerminator()); + + // Place it before the first suspend. + auto *ReturnBB = + BasicBlock::Create(F.getContext(), "coro.return", &F, NewSuspendBB); + Branch->setSuccessor(0, ReturnBB); + + IRBuilder<> Builder(ReturnBB); + + // Insert the call to the tail call function. + auto *Fun = Suspend->getMustTailCallFunction(); + SmallVector Args(Suspend->operand_values()); + auto *TailCall = Builder.CreateCall( + cast(Fun->getType()->getPointerElementType()), Fun, + ArrayRef(Args).drop_front(3).drop_back(1)); + TailCall->setTailCallKind(CallInst::TCK_MustTail); + TailCall->setCallingConv(Fun->getCallingConv()); + Builder.CreateRetVoid(); + + // Replace the lvm.coro.async.resume intrisic call. + replaceAsyncResumeFunction(Suspend, Continuation); + } + + assert(Clones.size() == Shape.CoroSuspends.size()); + for (size_t idx = 0, end = Shape.CoroSuspends.size(); idx != end; ++idx) { + auto *Suspend = Shape.CoroSuspends[idx]; + auto *Clone = Clones[idx]; + + CoroCloner(F, "resume." + Twine(idx), Shape, Clone, Suspend).create(); + } +} + static void splitRetconCoroutine(Function &F, coro::Shape &Shape, SmallVectorImpl &Clones) { assert(Shape.ABI == coro::ABI::Retcon || @@ -1441,6 +1606,9 @@ case coro::ABI::Switch: splitSwitchCoroutine(F, Shape, Clones); break; + case coro::ABI::Async: + splitAsyncCoroutine(F, Shape, Clones); + break; case coro::ABI::Retcon: case coro::ABI::RetconOnce: splitRetconCoroutine(F, Shape, Clones); diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp --- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp +++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp @@ -124,6 +124,9 @@ // NOTE: Must be sorted! static const char *const CoroIntrinsics[] = { "llvm.coro.alloc", + "llvm.coro.async.context.alloc", + "llvm.coro.async.context.dealloc", + "llvm.coro.async.store_resume", "llvm.coro.begin", "llvm.coro.destroy", "llvm.coro.done", @@ -131,6 +134,7 @@ "llvm.coro.frame", "llvm.coro.free", "llvm.coro.id", + "llvm.coro.id.async", "llvm.coro.id.retcon", "llvm.coro.id.retcon.once", "llvm.coro.noop", @@ -142,6 +146,7 @@ "llvm.coro.size", "llvm.coro.subfn.addr", "llvm.coro.suspend", + "llvm.coro.suspend.async", "llvm.coro.suspend.retcon", }; return Intrinsic::lookupLLVMIntrinsicByName(CoroIntrinsics, Name) != -1; @@ -269,6 +274,11 @@ if (II->use_empty()) UnusedCoroSaves.push_back(cast(II)); break; + case Intrinsic::coro_suspend_async: { + auto *Suspend = cast(II); + CoroSuspends.push_back(Suspend); + break; + } case Intrinsic::coro_suspend_retcon: { auto Suspend = cast(II); CoroSuspends.push_back(Suspend); @@ -371,7 +381,22 @@ } break; } - + case Intrinsic::coro_id_async: { + auto *AsyncId = cast(Id); + AsyncId->checkWellFormed(); + this->ABI = coro::ABI::Async; + this->AsyncLowering.Context = AsyncId->getStorage(); + this->AsyncLowering.ContextHeaderSize = AsyncId->getStorageSize(); + this->AsyncLowering.ContextAlignment = + AsyncId->getStorageAlignment().value(); + this->AsyncLowering.AsyncFuncPointer = AsyncId->getAsyncFunctionPointer(); + auto &Context = F.getContext(); + auto *Int8PtrTy = Type::getInt8PtrTy(Context); + auto *VoidTy = Type::getVoidTy(Context); + this->AsyncLowering.AsyncFuncTy = + FunctionType::get(VoidTy, {Int8PtrTy, Int8PtrTy, Int8PtrTy}, false); + break; + }; case Intrinsic::coro_id_retcon: case Intrinsic::coro_id_retcon_once: { auto ContinuationId = cast(Id); @@ -512,6 +537,8 @@ addCallToCallGraph(CG, Call, Alloc); return Call; } + case coro::ABI::Async: + llvm_unreachable("can't allocate memory in coro async-lowering"); } llvm_unreachable("Unknown coro::ABI enum"); } @@ -532,6 +559,8 @@ addCallToCallGraph(CG, Call, Dealloc); return; } + case coro::ABI::Async: + llvm_unreachable("can't allocate memory in coro async-lowering"); } llvm_unreachable("Unknown coro::ABI enum"); } @@ -633,6 +662,32 @@ checkWFDealloc(this, getArgOperand(DeallocArg)); } +static void checkAsyncFuncPointer(const Instruction *I, Value *V) { + auto *AsyncFuncPtrAddr = dyn_cast(V->stripPointerCasts()); + if (!AsyncFuncPtrAddr) + fail(I, "llvm.coro.id.async async function pointer not a global", V); + + auto *StructTy = dyn_cast( + AsyncFuncPtrAddr->getType()->getPointerElementType()); + if (StructTy->isOpaque() || !StructTy->isPacked() || + StructTy->getNumElements() != 2 || + !StructTy->getElementType(0)->isIntegerTy(32) || + !StructTy->getElementType(1)->isIntegerTy(32)) + fail(I, + "llvm.coro.id.async async function pointer argument's type is not " + "<{i32, i32}>", + V); +} + +void CoroIdAsyncInst::checkWellFormed() const { + // TODO: check that the StorageArg is a parameter of this function. + checkConstantInt(this, getArgOperand(SizeArg), + "size argument to coro.id.async must be constant"); + checkConstantInt(this, getArgOperand(AlignArg), + "alignment argument to coro.id.async must be constant"); + checkAsyncFuncPointer(this, getArgOperand(AsyncFuncPtrArg)); +} + void LLVMAddCoroEarlyPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createCoroEarlyLegacyPass()); } diff --git a/llvm/test/Transforms/Coroutines/coro-async.ll b/llvm/test/Transforms/Coroutines/coro-async.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-async.ll @@ -0,0 +1,241 @@ +; RUN: opt < %s -enable-coroutines -O2 -S | FileCheck --check-prefixes=CHECK %s + +target datalayout = "p:64:64:64" + +%async.task = type { i64 } +%async.actor = type { i64 } +%async.fp = type <{ i32, i32 }> + +%async.ctxt = type { i8*, void (i8*, %async.task*, %async.actor*)* } + +; The async callee. +@my_other_async_function_fp = external global <{ i32, i32 }> +declare void @my_other_async_function(i8* %async.ctxt) + +; The current async function (the caller). +; This struct describes an async function. The first field is the size needed +; for the async context of the current async function, the second field is the +; relative offset to the async function implementation. +@my_async_function_fp = constant <{ i32, i32 }> + <{ i32 128, ; Initial async context size without space for frame + i32 trunc ( ; Relative pointer to async function + i64 sub ( + i64 ptrtoint (void (i8*, %async.task*, %async.actor*)* @my_async_function to i64), + i64 ptrtoint (i32* getelementptr inbounds (<{ i32, i32 }>, <{ i32, i32 }>* @my_async_function_fp, i32 0, i32 1) to i64) + ) + to i32) + }> + +; Function that implements the dispatch to the callee function. +define swiftcc void @my_async_function.my_other_async_function_fp.apply(i8* %async.ctxt, %async.task* %task, %async.actor* %actor) { + musttail call swiftcc void @asyncSuspend(i8* %async.ctxt, %async.task* %task, %async.actor* %actor) + ret void +} + +declare void @some_user(i64) +declare void @some_may_write(i64*) + +define swiftcc void @my_async_function(i8* %async.ctxt, %async.task* %task, %async.actor* %actor) { +entry: + %tmp = alloca { i64, i64 }, align 8 + %proj.1 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %tmp, i64 0, i32 0 + %proj.2 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %tmp, i64 0, i32 1 + + %id = call token @llvm.coro.id.async(i32 128, i32 16, i8* %async.ctxt, i8* bitcast (<{i32, i32}>* @my_async_function_fp to i8*)) + %hdl = call i8* @llvm.coro.begin(token %id, i8* null) + store i64 0, i64* %proj.1, align 8 + store i64 1, i64* %proj.2, align 8 + call void @some_may_write(i64* %proj.1) + + ; Begin lowering: apply %my_other_async_function(%args...) + + ; setup callee context + %arg0 = bitcast %async.task* %task to i8* + %arg1 = bitcast <{ i32, i32}>* @my_other_async_function_fp to i8* + %callee_context = call i8* @llvm.coro.async.context.alloc(i8* %arg0, i8* %arg1) + %callee_context.0 = bitcast i8* %callee_context to %async.ctxt* + ; store arguments ... + ; ... (omitted) + + ; store the return continuation + %callee_context.return_to_caller.addr = getelementptr inbounds %async.ctxt, %async.ctxt* %callee_context.0, i32 0, i32 1 + %return_to_caller.addr = bitcast void(i8*, %async.task*, %async.actor*)** %callee_context.return_to_caller.addr to i8** + %resume.func_ptr = call i8* @llvm.coro.async.resume() + store i8* %resume.func_ptr, i8** %return_to_caller.addr + + ; store caller context into callee context + %callee_context.caller_context.addr = getelementptr inbounds %async.ctxt, %async.ctxt* %callee_context.0, i32 0, i32 0 + store i8* %async.ctxt, i8** %callee_context.caller_context.addr + + %res = call {i8*, i8*, i8*} (i8*, i8*, ...) @llvm.coro.suspend.async( + i8* %resume.func_ptr, + i8* %callee_context, + void (i8*, %async.task*, %async.actor*)* @my_async_function.my_other_async_function_fp.apply, + i8* %callee_context, %async.task* %task, %async.actor *%actor) + + call void @llvm.coro.async.context.dealloc(i8* %callee_context) + %continuation_task_arg = extractvalue {i8*, i8*, i8*} %res, 1 + %task.2 = bitcast i8* %continuation_task_arg to %async.task* + %val = load i64, i64* %proj.1 + call void @some_user(i64 %val) + %val.2 = load i64, i64* %proj.2 + call void @some_user(i64 %val.2) + + tail call swiftcc void @asyncReturn(i8* %async.ctxt, %async.task* %task.2, %async.actor* %actor) + call i1 @llvm.coro.end(i8* %hdl, i1 0) + unreachable +} + +; Make sure we update the async function pointer +; CHECK: @my_async_function_fp = constant <{ i32, i32 }> <{ i32 168, +; CHECK: @my_async_function2_fp = constant <{ i32, i32 }> <{ i32 168, + +; CHECK-LABEL: define swiftcc void @my_async_function(i8* %async.ctxt, %async.task* %task, %async.actor* %actor) { +; CHECK: entry: +; CHECK: [[FRAMEPTR:%.*]] = getelementptr inbounds i8, i8* %async.ctxt, i64 128 +; CHECK: [[ACTOR_SPILL_ADDR:%.*]] = getelementptr inbounds i8, i8* %async.ctxt, i64 152 +; CHECK: [[CAST1:%.*]] = bitcast i8* [[ACTOR_SPILL_ADDR]] to %async.actor** +; CHECK: store %async.actor* %actor, %async.actor** [[CAST1]] +; CHECK: [[ADDR1:%.*]] = getelementptr inbounds i8, i8* %async.ctxt, i64 144 +; CHECK: [[ASYNC_CTXT_SPILL_ADDR:%.*]] = bitcast i8* [[ADDR1]] to i8** +; CHECK: store i8* %async.ctxt, i8** [[ASYNC_CTXT_SPILL_ADDR]] +; CHECK: [[ALLOCA_PRJ1:%.*]] = bitcast i8* [[FRAMEPTR]] to i64* +; CHECK: [[ALLOCA_PRJ2:%.*]] = getelementptr inbounds i8, i8* %async.ctxt, i64 136 +; CHECK: [[ADDR2:%.*]] = bitcast i8* [[ALLOCA_PRJ2]] to i64* +; CHECK: store i64 0, i64* [[ALLOCA_PRJ1]] +; CHECK: store i64 1, i64* [[ADDR2]] +; CHECK: tail call void @some_may_write(i64* nonnull %proj.1) +; CHECK: [[TASK:%.*]] = bitcast %async.task* %task to i8* +; CHECK: [[CALLEE_CTXT:%.*]] = tail call i8* @llvm.coro.async.context.alloc(i8* [[TASK]], i8* bitcast (<{ i32, i32 }>* @my_other_async_function_fp to i8*)) +; CHECK: [[CALLEE_CTXT_SPILL:%.*]] = getelementptr inbounds i8, i8* %async.ctxt, i64 160 +; CHECK: [[CAST2:%.*]] = bitcast i8* [[CALLEE_CTXT_SPILL]] to i8** +; CHECK: store i8* [[CALLEE_CTXT]], i8** [[CAST2]] +; CHECK: [[TYPED_RETURN_TO_CALLER_ADDR:%.*]] = getelementptr inbounds i8, i8* [[CALLEE_CTXT]], i64 8 +; CHECK: [[RETURN_TO_CALLER_ADDR:%.*]] = bitcast i8* [[TYPED_RETURN_TO_CALLER_ADDR]] to i8** +; CHECK: store i8* bitcast (void (i8*, i8*, i8*)* @my_async_function.resume.0 to i8*), i8** [[RETURN_TO_CALLER_ADDR]] +; CHECK: [[CALLER_CONTEXT_ADDR:%.*]] = bitcast i8* [[CALLEE_CTXT]] to i8** +; CHECK: store i8* %async.ctxt, i8** [[CALLER_CONTEXT_ADDR]] +; CHECK: musttail call swiftcc void @my_async_function.my_other_async_function_fp.apply(i8* [[CALLEE_CTXT]], %async.task* %task, %async.actor* %actor) +; CHECK: ret void +; CHECK: } + +; CHECK-LABEL: define internal swiftcc void @my_async_function.resume.0(i8* %0, i8* %1, i8* %2) { +; CHECK: entryresume.0: +; CHECK: [[CALLER_CONTEXT_ADDR:%.*]] = bitcast i8* %0 to i8** +; CHECK: [[CALLER_CONTEXT:%.*]] = load i8*, i8** [[CALLER_CONTEXT_ADDR]] +; CHECK: [[FRAME_PTR:%.*]] = getelementptr inbounds i8, i8* [[CALLER_CONTEXT]], i64 128 +; CHECK: [[CALLEE_CTXT_SPILL_ADDR:%.*]] = getelementptr inbounds i8, i8* [[CALLER_CONTEXT]], i64 160 +; CHECK: [[CAST1:%.*]] = bitcast i8* [[CALLEE_CTXT_SPILL_ADDR]] to i8** +; CHECK: [[CALLEE_CTXT_RELOAD:%.*]] = load i8*, i8** [[CAST1]] +; CHECK: [[ACTOR_RELOAD_ADDR:%.*]] = getelementptr inbounds i8, i8* [[CALLER_CONTEXT]], i64 152 +; CHECK: [[CAST2:%.*]] = bitcast i8* [[ACTOR_RELOAD_ADDR]] to %async.actor** +; CHECK: [[ACTOR_RELOAD:%.*]] = load %async.actor*, %async.actor** [[CAST2]] +; CHECK: [[ADDR1:%.*]] = getelementptr inbounds i8, i8* %4, i64 144 +; CHECK: [[ASYNC_CTXT_RELOAD_ADDR:%.*]] = bitcast i8* [[ADDR1]] to i8** +; CHECK: [[ASYNC_CTXT_RELOAD:%.*]] = load i8*, i8** [[ASYNC_CTXT_RELOAD_ADDR]] +; CHECK: [[ALLOCA_PRJ2:%.*]] = getelementptr inbounds i8, i8* [[CALLER_CONTEXT]], i64 136 +; CHECK: [[ADDR2:%.*]] = bitcast i8* [[ALLOCA_PRJ2]] to i64* +; CHECK: [[ALLOCA_PRJ1:%.*]] = bitcast i8* [[FRAME_PTR]] to i64* +; CHECK: tail call void @llvm.coro.async.context.dealloc(i8* [[CALLEE_CTXT_RELOAD]]) +; CHECK: [[TASK_ARG:%.*]] = bitcast i8* %1 to %async.task* +; CHECK: [[VAL1:%.*]] = load i64, i64* [[ALLOCA_PRJ1]] +; CHECK: tail call void @some_user(i64 [[VAL1]]) +; CHECK: [[VAL2:%.*]] = load i64, i64* [[ADDR2]] +; CHECK: tail call void @some_user(i64 [[VAL2]]) +; CHECK: tail call swiftcc void @asyncReturn(i8* [[ASYNC_CTXT_RELOAD]], %async.task* [[TASK_ARG]], %async.actor* [[ACTOR_RELOAD]]) +; CHECK: ret void +; CHECK: } + +@my_async_function2_fp = constant <{ i32, i32 }> + <{ i32 128, ; Initial async context size without space for frame + i32 trunc ( ; Relative pointer to async function + i64 sub ( + i64 ptrtoint (void (i8*, %async.task*, %async.actor*)* @my_async_function2 to i64), + i64 ptrtoint (i32* getelementptr inbounds (<{ i32, i32 }>, <{ i32, i32 }>* @my_async_function2_fp, i32 0, i32 1) to i64) + ) + to i32) + }> + +define swiftcc void @my_async_function2(i8* %async.ctxt, %async.task* %task, %async.actor* %actor) { +entry: + + %id = call token @llvm.coro.id.async(i32 128, i32 16, i8* %async.ctxt, i8* bitcast (<{i32, i32}>* @my_async_function2_fp to i8*)) + %hdl = call i8* @llvm.coro.begin(token %id, i8* null) + ; setup callee context + %arg0 = bitcast %async.task* %task to i8* + %arg1 = bitcast <{ i32, i32}>* @my_other_async_function_fp to i8* + %callee_context = call i8* @llvm.coro.async.context.alloc(i8* %arg0, i8* %arg1) + + %callee_context.0 = bitcast i8* %callee_context to %async.ctxt* + %callee_context.return_to_caller.addr = getelementptr inbounds %async.ctxt, %async.ctxt* %callee_context.0, i32 0, i32 1 + %return_to_caller.addr = bitcast void(i8*, %async.task*, %async.actor*)** %callee_context.return_to_caller.addr to i8** + %resume.func_ptr = call i8* @llvm.coro.async.resume() + store i8* %resume.func_ptr, i8** %return_to_caller.addr + %callee_context.caller_context.addr = getelementptr inbounds %async.ctxt, %async.ctxt* %callee_context.0, i32 0, i32 0 + store i8* %async.ctxt, i8** %callee_context.caller_context.addr + %res = call {i8*, i8*, i8*} (i8*, i8*, ...) @llvm.coro.suspend.async( + i8* %resume.func_ptr, + i8* %callee_context, + void (i8*, %async.task*, %async.actor*)* @my_async_function.my_other_async_function_fp.apply, + i8* %callee_context, %async.task* %task, %async.actor *%actor) + + %continuation_task_arg = extractvalue {i8*, i8*, i8*} %res, 1 + %task.2 = bitcast i8* %continuation_task_arg to %async.task* + + %callee_context.0.1 = bitcast i8* %callee_context to %async.ctxt* + %callee_context.return_to_caller.addr.1 = getelementptr inbounds %async.ctxt, %async.ctxt* %callee_context.0.1, i32 0, i32 1 + %return_to_caller.addr.1 = bitcast void(i8*, %async.task*, %async.actor*)** %callee_context.return_to_caller.addr.1 to i8** + %resume.func_ptr.1 = call i8* @llvm.coro.async.resume() + store i8* %resume.func_ptr.1, i8** %return_to_caller.addr.1 + %callee_context.caller_context.addr.1 = getelementptr inbounds %async.ctxt, %async.ctxt* %callee_context.0.1, i32 0, i32 0 + store i8* %async.ctxt, i8** %callee_context.caller_context.addr.1 + %res.2 = call {i8*, i8*, i8*} (i8*, i8*, ...) @llvm.coro.suspend.async( + i8* %resume.func_ptr.1, + i8* %callee_context, + void (i8*, %async.task*, %async.actor*)* @my_async_function.my_other_async_function_fp.apply, + i8* %callee_context, %async.task* %task, %async.actor *%actor) + + call void @llvm.coro.async.context.dealloc(i8* %callee_context) + %continuation_actor_arg = extractvalue {i8*, i8*, i8*} %res.2, 2 + %actor.2 = bitcast i8* %continuation_actor_arg to %async.actor* + + tail call swiftcc void @asyncReturn(i8* %async.ctxt, %async.task* %task.2, %async.actor* %actor.2) + call i1 @llvm.coro.end(i8* %hdl, i1 0) + unreachable +} + +; CHECK-LABEL: define swiftcc void @my_async_function2(i8* %async.ctxt, %async.task* %task, %async.actor* %actor) { +; CHECK: store %async.actor* %actor, +; CHECK: store %async.task* %task, +; CHECK: store i8* %async.ctxt, +; CHECK: [[CALLEE_CTXT:%.*]] = tail call i8* @llvm.coro.async.context.alloc( +; CHECK: store i8* [[CALLEE_CTXT]], +; CHECK: store i8* bitcast (void (i8*, i8*, i8*)* @my_async_function2.resume.0 to i8*), +; CHECK: store i8* %async.ctxt, +; CHECK: musttail call swiftcc void @my_async_function.my_other_async_function_fp.apply(i8* [[CALLEE_CTXT]], %async.task* %task, %async.actor* %actor) +; CHECK: ret void + +; CHECK-LABEL: define internal swiftcc void @my_async_function2.resume.0(i8* %0, i8* %1, i8* %2) { +; CHECK: [[CALLEE_CTXT_ADDR:%.*]] = bitcast i8* %0 to i8** +; CHECK: [[CALLEE_CTXT:%.*]] = load i8*, i8** [[CALLEE_CTXT_ADDR]] +; CHECK: [[CALLEE_CTXT_SPILL_ADDR:%.*]] = getelementptr inbounds i8, i8* [[CALLEE_CTXT]], i64 152 +; CHECK: [[CALLEE_CTXT_SPILL_ADDR2:%.*]] = bitcast i8* [[CALLEE_CTXT_SPILL_ADDR]] to i8** +; CHECK: store i8* bitcast (void (i8*, i8*, i8*)* @my_async_function2.resume.1 to i8*), +; CHECK: [[CALLLE_CTXT_RELOAD:%.*]] = load i8*, i8** [[CALLEE_CTXT_SPILL_ADDR2]] +; CHECK: musttail call swiftcc void @my_async_function.my_other_async_function_fp.apply(i8* [[CALLEE_CTXT_RELOAD]] +; CHECK: ret void + +; CHECK-LABEL: define internal swiftcc void @my_async_function2.resume.1(i8* %0, i8* %1, i8* %2) { +; CHECK: [[ACTOR_ARG:%.*]] = bitcast i8* %2 +; CHECK: tail call swiftcc void @asyncReturn({{.*}}[[ACTOR_ARG]]) +; CHECK: ret void + +declare token @llvm.coro.id.async(i32, i32, i8*, i8*) +declare i8* @llvm.coro.begin(token, i8*) +declare i1 @llvm.coro.end(i8*, i1) +declare {i8*, i8*, i8*} @llvm.coro.suspend.async(i8*, i8*, ...) +declare i8* @llvm.coro.async.context.alloc(i8*, i8*) +declare void @llvm.coro.async.context.dealloc(i8*) +declare swiftcc void @asyncReturn(i8*, %async.task*, %async.actor*) +declare swiftcc void @asyncSuspend(i8*, %async.task*, %async.actor*) +declare i8* @llvm.coro.async.resume()