Index: llvm/trunk/docs/Coroutines.rst =================================================================== --- llvm/trunk/docs/Coroutines.rst +++ llvm/trunk/docs/Coroutines.rst @@ -95,7 +95,8 @@ entry: %size = call i32 @llvm.coro.size.i32() %alloc = call i8* @malloc(i32 %size) - %hdl = call noalias i8* @llvm.coro.begin(i8* %alloc, i32 0, i8* null, i8* null) + %beg = call token @llvm.coro.begin(i8* %alloc, i8* null, i32 0, i8* null, i8* null) + %hdl = call noalias i8* @llvm.coro.frame(token %beg) br label %loop loop: %n.val = phi i32 [ %n, %entry ], [ %inc, %loop ] @@ -115,9 +116,10 @@ The `entry` block establishes the coroutine frame. The `coro.size`_ intrinsic is lowered to a constant representing the size required for the coroutine frame. -The `coro.begin`_ intrinsic initializes the coroutine frame and returns the -coroutine handle. The first parameter of `coro.begin` is given a block of memory -to be used if the coroutine frame needs to be allocated dynamically. +The `coro.begin`_ intrinsic initializes the coroutine frame and returns the a +token that is used to obtain the coroutine handle via `coro.frame` intrinsic. +The first parameter of `coro.begin` is given a block of memory to be used if the +coroutine frame needs to be allocated dynamically. The `cleanup` block destroys the coroutine frame. The `coro.free`_ intrinsic, given the coroutine handle, returns a pointer of the memory block to be freed or @@ -160,12 +162,13 @@ code responsible for creation and initialization of the coroutine frame and execution of the coroutine until a suspend point is reached: -.. code-block:: llvm +.. code-block:: none define i8* @f(i32 %n) { entry: %alloc = call noalias i8* @malloc(i32 24) - %0 = call noalias i8* @llvm.coro.begin(i8* %alloc, i32 0, i8* null, i8* null) + %beg = call token @llvm.coro.begin(i8* %alloc, i8* null, i32 0, i8* null, i8* null) + %0 = call i8* @llvm.coro.frame(token %beg) %frame = bitcast i8* %0 to %f.frame* %1 = getelementptr %f.frame, %f.frame* %frame, i32 0, i32 0 store void (%f.frame*)* @f.resume, void (%f.frame*)** %1 @@ -219,7 +222,7 @@ when dynamic allocation is required, and an address of an alloca on the caller's frame where coroutine frame can be stored if dynamic allocation is elided. -.. code-block:: llvm +.. code-block:: none entry: %elide = call i8* @llvm.coro.alloc() @@ -231,7 +234,7 @@ br label %coro.begin coro.begin: %phi = phi i8* [ %elide, %entry ], [ %alloc, %dyn.alloc ] - %hdl = call noalias i8* @llvm.coro.begin(i8* %phi, i32 0, i8* null, i8* null) + %beg = call token @llvm.coro.begin(i8* %phi, i8* null, i32 0, i8* null, i8* null) In the cleanup block, we will make freeing the coroutine frame conditional on `coro.free`_ intrinsic. If allocation is elided, `coro.free`_ returns `null` @@ -421,7 +424,8 @@ br label %coro.begin coro.begin: %phi = phi i8* [ %elide, %entry ], [ %alloc, %dyn.alloc ] - %hdl = call noalias i8* @llvm.coro.begin(i8* %phi, i32 0, i8* %pv, i8* null) + %beg = call token @llvm.coro.begin(i8* %phi, i8* %elide, i32 0, i8* %pv, i8* null) + %hdl = call i8* @llvm.coro.frame(token %beg) br label %loop loop: %n.val = phi i32 [ %n, %coro.begin ], [ %inc, %loop ] @@ -687,15 +691,16 @@ Example: """""""" -.. code-block:: llvm +.. code-block:: text define i8* @f(i32 %n) { entry: %promise = alloca i32 %pv = bitcast i32* %promise to i8* ... - ; the third argument to coro.begin points to the coroutine promise. - %hdl = call noalias i8* @llvm.coro.begin(i8* %alloc, i32 0, i8* %pv, i8* null) + ; the fourth argument to coro.begin points to the coroutine promise. + %beg = call token @llvm.coro.begin(i8* %alloc, i8* null, i32 0, i8* %pv, i8* null) + %hdl = call noalias i8* @llvm.coro.frame(token %beg) ... store i32 42, i32* %promise ; store something into the promise ... @@ -752,12 +757,14 @@ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ :: - declare i8* @llvm.coro.begin(i8* , i32 , i8* , i8* ) + declare i8* @llvm.coro.begin(i8* , i8* , i32 , i8* , i8* ) Overview: """"""""" -The '``llvm.coro.begin``' intrinsic returns an address of the coroutine frame. +The '``llvm.coro.begin``' intrinsic captures coroutine initialization +information and returns a token that can be used by `coro.frame` intrinsic to +return an address of the coroutine frame. Arguments: """""""""" @@ -765,15 +772,17 @@ The first argument is a pointer to a block of memory where coroutine frame will be stored. -The second argument provides information on the alignment of the memory returned +The second argument is either null or an SSA value of `coro.alloc` intrinsic. + +The third argument provides information on the alignment of the memory returned by the allocation function and given to `coro.begin` by the first argument. If this argument is 0, the memory is assumed to be aligned to 2 * sizeof(i8*). This argument only accepts constants. -The third argument, if not `null`, designates a particular alloca instruction to +The fourth argument, if not `null`, designates a particular alloca instruction to be a `coroutine promise`_. -The fourth argument is `null` before coroutine is split, and later is replaced +The fifth argument is `null` before coroutine is split, and later is replaced to point to a private global constant array containing function pointers to outlined resume and destroy parts of the coroutine. @@ -781,10 +790,10 @@ """""""""" Depending on the alignment requirements of the objects in the coroutine frame -and/or on the codegen compactness reasons the pointer returned from `coro.begin` -may be at offset to the `%mem` argument. (This could be beneficial if -instructions that express relative access to data can be more compactly encoded -with small positive and negative offsets). +and/or on the codegen compactness reasons the pointer returned from `coro.frame` +associated with a particular `coro.begin` may be at offset to the `%mem` +argument. (This could be beneficial if instructions that express relative access +to data can be more compactly encoded with small positive and negative offsets). A frontend should emit exactly one `coro.begin` intrinsic per coroutine. @@ -807,7 +816,7 @@ """""""""" A pointer to the coroutine frame. This should be the same pointer that was -returned by prior `coro.begin` call. +returned by prior `coro.frame` call. Example (custom deallocation function): """"""""""""""""""""""""""""""""""""""" @@ -862,10 +871,13 @@ A frontend should emit at most one `coro.alloc` intrinsic per coroutine. +If `coro.alloc` is present, the second parameter to `coro.begin` should refer +to it. + Example: """""""" -.. code-block:: llvm +.. code-block:: text entry: %elide = call i8* @llvm.coro.alloc() @@ -879,7 +891,8 @@ coro.begin: %phi = phi i8* [ %elide, %entry ], [ %alloc, %coro.alloc ] - %frame = call i8* @llvm.coro.begin(i8* %phi, i32 0, i8* null, i8* null) + %beg = call token @llvm.coro.begin(i8* %phi, i8* %elide, i32 0, i8* null, i8* null) + %frame = call i8* @llvm.coro.frame(token %beg) .. _coro.frame: @@ -898,14 +911,12 @@ Arguments: """""""""" -None +A token that refers to `coro.begin` instruction. Semantics: """""""""" -This intrinsic is lowered to refer to the `coro.begin`_ instruction. This is -a frontend convenience intrinsic that makes it easier to refer to the -coroutine frame. +This intrinsic is lowered to refer to address of the coroutine frame. .. _coro.end: @@ -1164,7 +1175,7 @@ --------- The pass CoroElide examines if the inlined coroutine is eligible for heap allocation elision optimization. If so, it replaces `coro.alloc` and -`coro.begin` intrinsic with an address of a coroutine frame placed on its caller +`coro.frame` intrinsic with an address of a coroutine frame placed on its caller and replaces `coro.free` intrinsics with `null` to remove the deallocation code. This pass also replaces `coro.resume` and `coro.destroy` intrinsics with direct calls to resume and destroy functions for a particular coroutine where possible. @@ -1178,11 +1189,11 @@ ================================= #. Add documentation. #. Add coroutine intrinsics. -#. Add empty coroutine passes. <== we are here +#. Add empty coroutine passes. #. Add coroutine devirtualization + tests. #. Add CGSCC restart trigger + tests. #. Add coroutine heap elision + tests. -#. Add custom allocation heap elision + tests. +#. Add custom allocation heap elision + tests. <== we are here #. Add coroutine splitting logic + tests. #. Add simple coroutine frame builder + tests. #. Add the rest of the logic + tests. (Maybe split further as needed). Index: llvm/trunk/include/llvm/IR/Intrinsics.td =================================================================== --- llvm/trunk/include/llvm/IR/Intrinsics.td +++ llvm/trunk/include/llvm/IR/Intrinsics.td @@ -603,16 +603,16 @@ // Coroutine Structure Intrinsics. def int_coro_alloc : Intrinsic<[llvm_ptr_ty], [], []>; -def int_coro_begin : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty, - llvm_ptr_ty, llvm_ptr_ty], - [WriteOnly<0>, ReadNone<2>, ReadOnly<3>, - NoCapture<3>]>; +def int_coro_begin : Intrinsic<[llvm_token_ty], [llvm_ptr_ty, llvm_ptr_ty, + llvm_i32_ty, llvm_ptr_ty, llvm_ptr_ty], + [WriteOnly<0>, WriteOnly<0>, + ReadNone<3>, ReadOnly<4>, NoCapture<4>]>; def int_coro_free : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], [IntrArgMemOnly, ReadOnly<0>, NoCapture<0>]>; def int_coro_end : Intrinsic<[], [llvm_ptr_ty, llvm_i1_ty], []>; -def int_coro_frame : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; +def int_coro_frame : Intrinsic<[llvm_ptr_ty], [llvm_token_ty], [IntrNoMem]>; def int_coro_size : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>; def int_coro_save : Intrinsic<[llvm_token_ty], [llvm_ptr_ty], []>; Index: llvm/trunk/lib/Transforms/Coroutines/CoroElide.cpp =================================================================== --- llvm/trunk/lib/Transforms/Coroutines/CoroElide.cpp +++ llvm/trunk/lib/Transforms/Coroutines/CoroElide.cpp @@ -16,6 +16,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/IR/InstIterator.h" #include "llvm/Pass.h" +#include "llvm/Support/ErrorHandling.h" using namespace llvm; @@ -39,11 +40,29 @@ bool runOnFunction(Function &F) override; void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); AU.setPreservesCFG(); } }; } +char CoroElide::ID = 0; +INITIALIZE_PASS_BEGIN( + CoroElide, "coro-elide", + "Coroutine frame allocation elision and indirect calls replacement", false, + false) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_END( + CoroElide, "coro-elide", + "Coroutine frame allocation elision and indirect calls replacement", false, + false) + +Pass *llvm::createCoroElidePass() { return new CoroElide(); } + +//===----------------------------------------------------------------------===// +// Implementation +//===----------------------------------------------------------------------===// + // Go through the list of coro.subfn.addr intrinsics and replace them with the // provided constant. static void replaceWithConstant(Constant *Value, @@ -68,24 +87,103 @@ replaceAndRecursivelySimplify(I, Value); } +// See if any operand of the call instruction references the coroutine frame. +static bool operandReferences(CallInst *CI, AllocaInst *Frame, AAResults &AA) { + for (Value *Op : CI->operand_values()) + if (AA.alias(Op, Frame) != NoAlias) + return true; + return false; +} + +// Look for any tail calls referencing the coroutine frame and remove tail +// attribute from them, since now coroutine frame resides on the stack and tail +// call implies that the function does not references anything on the stack. +static void removeTailCallAttribute(AllocaInst *Frame, AAResults &AA) { + Function &F = *Frame->getFunction(); + MemoryLocation Mem(Frame); + for (Instruction &I : instructions(F)) + if (auto *Call = dyn_cast(&I)) + if (Call->isTailCall() && operandReferences(Call, Frame, AA)) { + // FIXME: If we ever hit this check. Evaluate whether it is more + // appropriate to retain musttail and allow the code to compile. + if (Call->isMustTailCall()) + report_fatal_error("Call referring to the coroutine frame cannot be " + "marked as musttail"); + Call->setTailCall(false); + } +} + +// Given a resume function @f.resume(%f.frame* %frame), returns %f.frame type. +static Type *getFrameType(Function *Resume) { + auto *ArgType = Resume->getArgumentList().front().getType(); + return cast(ArgType)->getElementType(); +} + +// Finds first non alloca instruction in the entry block of a function. +static Instruction *getFirstNonAllocaInTheEntryBlock(Function *F) { + for (Instruction &I : F->getEntryBlock()) + if (!isa(&I)) + return &I; + llvm_unreachable("no terminator in the entry block"); +} + +// To elide heap allocations we need to suppress code blocks guarded by +// llvm.coro.alloc and llvm.coro.free instructions. +static void elideHeapAllocations(CoroBeginInst *CoroBegin, Type *FrameTy, + CoroAllocInst *AllocInst, AAResults &AA) { + LLVMContext &C = CoroBegin->getContext(); + auto *InsertPt = getFirstNonAllocaInTheEntryBlock(CoroBegin->getFunction()); + + // FIXME: Design how to transmit alignment information for every alloca that + // is spilled into the coroutine frame and recreate the alignment information + // here. Possibly we will need to do a mini SROA here and break the coroutine + // frame into individual AllocaInst recreating the original alignment. + auto *Frame = new AllocaInst(FrameTy, "", InsertPt); + auto *FrameVoidPtr = + new BitCastInst(Frame, Type::getInt8PtrTy(C), "vFrame", InsertPt); + + // Replacing llvm.coro.alloc with non-null value will suppress dynamic + // allocation as it is expected for the frontend to generate the code that + // looks like: + // mem = coro.alloc(); + // if (!mem) mem = malloc(coro.size()); + // coro.begin(mem, ...) + AllocInst->replaceAllUsesWith(FrameVoidPtr); + AllocInst->eraseFromParent(); + + // To suppress deallocation code, we replace all llvm.coro.free intrinsics + // associated with this coro.begin with null constant. + auto *NullPtr = ConstantPointerNull::get(Type::getInt8PtrTy(C)); + coro::replaceAllCoroFrees(CoroBegin, NullPtr); + CoroBegin->lowerTo(FrameVoidPtr); + + // Since now coroutine frame lives on the stack we need to make sure that + // any tail call referencing it, must be made non-tail call. + removeTailCallAttribute(Frame, AA); +} + // See if there are any coro.subfn.addr intrinsics directly referencing // the coro.begin. If found, replace them with an appropriate coroutine // subfunction associated with that coro.begin. -static bool replaceIndirectCalls(CoroBeginInst *CoroBegin) { +static bool replaceIndirectCalls(CoroBeginInst *CoroBegin, AAResults &AA) { SmallVector ResumeAddr; SmallVector DestroyAddr; - for (User *U : CoroBegin->users()) { - if (auto *II = dyn_cast(U)) { - switch (II->getIndex()) { - case CoroSubFnInst::ResumeIndex: - ResumeAddr.push_back(II); - break; - case CoroSubFnInst::DestroyIndex: - DestroyAddr.push_back(II); - break; - default: - llvm_unreachable("unexpected coro.subfn.addr constant"); + for (User *CF : CoroBegin->users()) { + assert(isa(CF) && + "CoroBegin can be only used by coro.frame instructions"); + for (User *U : CF->users()) { + if (auto *II = dyn_cast(U)) { + switch (II->getIndex()) { + case CoroSubFnInst::ResumeIndex: + ResumeAddr.push_back(II); + break; + case CoroSubFnInst::DestroyIndex: + DestroyAddr.push_back(II); + break; + default: + llvm_unreachable("unexpected coro.subfn.addr constant"); + } } } } @@ -99,11 +197,28 @@ "of coroutine subfunctions"); auto *ResumeAddrConstant = ConstantExpr::getExtractValue(Resumers, CoroSubFnInst::ResumeIndex); + replaceWithConstant(ResumeAddrConstant, ResumeAddr); + + if (DestroyAddr.empty()) + return true; + auto *DestroyAddrConstant = ConstantExpr::getExtractValue(Resumers, CoroSubFnInst::DestroyIndex); - - replaceWithConstant(ResumeAddrConstant, ResumeAddr); replaceWithConstant(DestroyAddrConstant, DestroyAddr); + + // If llvm.coro.begin refers to llvm.coro.alloc, we can elide the allocation. + if (auto *AllocInst = CoroBegin->getAlloc()) { + // FIXME: The check above is overly lax. It only checks for whether we have + // an ability to elide heap allocations, not whether it is safe to do so. + // We need to do something like: + // If for every exit from the function where coro.begin is + // live, there is a coro.free or coro.destroy dominating that exit block, + // then it is safe to elide heap allocation, since the lifetime of coroutine + // is fully enclosed in its caller. + auto *FrameTy = getFrameType(cast(ResumeAddrConstant)); + elideHeapAllocations(CoroBegin, FrameTy, AllocInst, AA); + } + return true; } @@ -143,20 +258,9 @@ if (CoroBegins.empty()) return Changed; + AAResults &AA = getAnalysis().getAAResults(); for (auto *CB : CoroBegins) - Changed |= replaceIndirectCalls(CB); + Changed |= replaceIndirectCalls(CB, AA); return Changed; } - -char CoroElide::ID = 0; -INITIALIZE_PASS_BEGIN( - CoroElide, "coro-elide", - "Coroutine frame allocation elision and indirect calls replacement", false, - false) -INITIALIZE_PASS_END( - CoroElide, "coro-elide", - "Coroutine frame allocation elision and indirect calls replacement", false, - false) - -Pass *llvm::createCoroElidePass() { return new CoroElide(); } Index: llvm/trunk/lib/Transforms/Coroutines/CoroInstr.h =================================================================== --- llvm/trunk/lib/Transforms/Coroutines/CoroInstr.h +++ llvm/trunk/lib/Transforms/Coroutines/CoroInstr.h @@ -62,11 +62,57 @@ } }; +/// This represents the llvm.coro.alloc instruction. +class LLVM_LIBRARY_VISIBILITY CoroAllocInst : public IntrinsicInst { +public: + // Methods to support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_alloc; + } + static inline bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + +/// This represents the llvm.coro.frame instruction. +class LLVM_LIBRARY_VISIBILITY CoroFrameInst : public IntrinsicInst { +public: + // Methods to support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_frame; + } + static inline bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + +/// This represents the llvm.coro.free instruction. +class LLVM_LIBRARY_VISIBILITY CoroFreeInst : public IntrinsicInst { +public: + // Methods to support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_free; + } + static inline bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + /// This class represents the llvm.coro.begin instruction. class LLVM_LIBRARY_VISIBILITY CoroBeginInst : public IntrinsicInst { - enum { MemArg, AlignArg, PromiseArg, InfoArg }; + enum { MemArg, ElideArg, AlignArg, PromiseArg, InfoArg }; public: + CoroAllocInst *getAlloc() const { + if (auto *CAI = dyn_cast( + getArgOperand(ElideArg)->stripPointerCasts())) + return CAI; + + return nullptr; + } + + Value *getMem() const { return getArgOperand(MemArg); } + Constant *getRawInfo() const { return cast(getArgOperand(InfoArg)->stripPointerCasts()); } @@ -108,6 +154,22 @@ return Result; } + // Replaces all coro.frame intrinsics that are associated with this coro.begin + // to a replacement value and removes coro.begin and all of the coro.frame + // intrinsics. + void lowerTo(Value* Replacement) { + SmallVector FrameInsts; + for (auto *CF : this->users()) + FrameInsts.push_back(cast(CF)); + + for (auto *CF : FrameInsts) { + CF->replaceAllUsesWith(Replacement); + CF->eraseFromParent(); + } + + this->eraseFromParent(); + } + // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::coro_begin; Index: llvm/trunk/lib/Transforms/Coroutines/CoroInternal.h =================================================================== --- llvm/trunk/lib/Transforms/Coroutines/CoroInternal.h +++ llvm/trunk/lib/Transforms/Coroutines/CoroInternal.h @@ -42,6 +42,7 @@ namespace coro { bool declaresIntrinsics(Module &M, std::initializer_list); +void replaceAllCoroFrees(CoroBeginInst *CB, Value *Replacement); // Keeps data and helper functions for lowering coroutine intrinsics. struct LowererBase { Index: llvm/trunk/lib/Transforms/Coroutines/Coroutines.cpp =================================================================== --- llvm/trunk/lib/Transforms/Coroutines/Coroutines.cpp +++ llvm/trunk/lib/Transforms/Coroutines/Coroutines.cpp @@ -122,3 +122,21 @@ return false; } + +// Find all llvm.coro.free instructions associated with the provided coro.begin +// and replace them with the provided replacement value. +void coro::replaceAllCoroFrees(CoroBeginInst *CB, Value *Replacement) { + SmallVector CoroFrees; + for (User *FramePtr: CB->users()) + for (User *U : FramePtr->users()) + if (auto *CF = dyn_cast(U)) + CoroFrees.push_back(CF); + + if (CoroFrees.empty()) + return; + + for (CoroFreeInst *CF : CoroFrees) { + CF->replaceAllUsesWith(Replacement); + CF->eraseFromParent(); + } +} Index: llvm/trunk/test/Transforms/Coroutines/coro-elide.ll =================================================================== --- llvm/trunk/test/Transforms/Coroutines/coro-elide.ll +++ llvm/trunk/test/Transforms/Coroutines/coro-elide.ll @@ -1,6 +1,6 @@ ; Tests that the coro.destroy and coro.resume are devirtualized where possible, ; SCC pipeline restarts and inlines the direct calls. -; RUN: opt < %s -S -inline -coro-elide | FileCheck %s +; RUN: opt < %s -S -inline -coro-elide -dce | FileCheck %s declare void @print(i32) nounwind @@ -22,15 +22,16 @@ ; a coroutine start function define i8* @f() { entry: - %hdl = call i8* @llvm.coro.begin(i8* null, i32 0, i8* null, + %tok = call token @llvm.coro.begin(i8* null, i8* null, i32 0, i8* null, i8* bitcast ([2 x void (i8*)*]* @f.resumers to i8*)) + %hdl = call i8* @llvm.coro.frame(token %tok) ret i8* %hdl } ; CHECK-LABEL: @callResume( define void @callResume() { entry: -; CHECK: call i8* @llvm.coro.begin +; CHECK: call token @llvm.coro.begin %hdl = call i8* @f() ; CHECK-NEXT: call void @print(i32 0) @@ -50,7 +51,7 @@ ; CHECK-LABEL: @eh( define void @eh() personality i8* null { entry: -; CHECK: call i8* @llvm.coro.begin +; CHECK: call token @llvm.coro.begin %hdl = call i8* @f() ; CHECK-NEXT: call void @print(i32 0) @@ -70,7 +71,8 @@ ; no devirtualization here, since coro.begin info parameter is null define void @no_devirt_info_null() { entry: - %hdl = call i8* @llvm.coro.begin(i8* null, i32 0, i8* null, i8* null) + %tok = call token @llvm.coro.begin(i8* null, i8* null, i32 0, i8* null, i8* null) + %hdl = call i8* @llvm.coro.frame(token %tok) ; CHECK: call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0) %0 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0) @@ -106,5 +108,6 @@ } -declare i8* @llvm.coro.begin(i8*, i32, i8*, i8*) +declare token @llvm.coro.begin(i8*, i8*, i32, i8*, i8*) +declare i8* @llvm.coro.frame(token) declare i8* @llvm.coro.subfn.addr(i8*, i8) Index: llvm/trunk/test/Transforms/Coroutines/coro-heap-elide.ll =================================================================== --- llvm/trunk/test/Transforms/Coroutines/coro-heap-elide.ll +++ llvm/trunk/test/Transforms/Coroutines/coro-heap-elide.ll @@ -0,0 +1,125 @@ +; Tests that the dynamic allocation and deallocation of the coroutine frame is +; elided and any tail calls referencing the coroutine frame has the tail +; call attribute removed. +; RUN: opt < %s -S -inline -coro-elide -instsimplify -simplifycfg | FileCheck %s + +declare void @print(i32) nounwind + +%f.frame = type {i32} + +declare void @bar(i8*) + +declare fastcc void @f.resume(%f.frame*) +declare fastcc void @f.destroy(%f.frame*) + +declare void @may_throw() +declare i8* @CustomAlloc(i32) +declare void @CustomFree(i8*) + +@f.resumers = internal constant + [2 x void (%f.frame*)*] [void (%f.frame*)* @f.resume, void (%f.frame*)* @f.destroy] + +; a coroutine start function +define i8* @f() personality i8* null { +entry: + %elide = call i8* @llvm.coro.alloc() + %need.dyn.alloc = icmp ne i8* %elide, null + br i1 %need.dyn.alloc, label %coro.begin, label %dyn.alloc +dyn.alloc: + %alloc = call i8* @CustomAlloc(i32 4) + br label %coro.begin +coro.begin: + %phi = phi i8* [ %elide, %entry ], [ %alloc, %dyn.alloc ] + %beg = call token @llvm.coro.begin(i8* %phi, i8* %elide, i32 0, i8* null, + i8* bitcast ([2 x void (%f.frame*)*]* @f.resumers to i8*)) + %hdl = call i8* @llvm.coro.frame(token %beg) + invoke void @may_throw() + to label %ret unwind label %ehcleanup +ret: + ret i8* %hdl + +ehcleanup: + %tok = cleanuppad within none [] + %mem = call i8* @llvm.coro.free(i8* %hdl) + %need.dyn.free = icmp ne i8* %mem, null + br i1 %need.dyn.free, label %dyn.free, label %if.end +dyn.free: + call void @CustomFree(i8* %mem) + br label %if.end +if.end: + cleanupret from %tok unwind to caller +} + +; CHECK-LABEL: @callResume( +define void @callResume() { +entry: +; CHECK: alloca %f.frame +; CHECK-NOT: coro.begin +; CHECK-NOT: CustomAlloc +; CHECK: call void @may_throw() + %hdl = call i8* @f() + +; Need to remove 'tail' from the first call to @bar +; CHECK-NOT: tail call void @bar( +; CHECK: call void @bar( + tail call void @bar(i8* %hdl) +; CHECK: tail call void @bar( + tail call void @bar(i8* null) + +; CHECK-NEXT: call fastcc void bitcast (void (%f.frame*)* @f.resume to void (i8*)*)(i8* %vFrame) + %0 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0) + %1 = bitcast i8* %0 to void (i8*)* + call fastcc void %1(i8* %hdl) + +; CHECK-NEXT: call fastcc void bitcast (void (%f.frame*)* @f.destroy to void (i8*)*)(i8* %vFrame) + %2 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 1) + %3 = bitcast i8* %2 to void (i8*)* + call fastcc void %3(i8* %hdl) + +; CHECK-NEXT: ret void + ret void +} + +; a coroutine start function (cannot elide heap alloc, due to second argument to +; coro.begin not pointint to coro.alloc) +define i8* @f_no_elision() personality i8* null { +entry: + %alloc = call i8* @CustomAlloc(i32 4) + %beg = call token @llvm.coro.begin(i8* %alloc, i8* null, i32 0, i8* null, + i8* bitcast ([2 x void (%f.frame*)*]* @f.resumers to i8*)) + %hdl = call i8* @llvm.coro.frame(token %beg) + ret i8* %hdl +} + +; CHECK-LABEL: @callResume_no_elision( +define void @callResume_no_elision() { +entry: +; CHECK: call i8* @CustomAlloc( + %hdl = call i8* @f_no_elision() + +; Tail call should remain tail calls +; CHECK: tail call void @bar( + tail call void @bar(i8* %hdl) +; CHECK: tail call void @bar( + tail call void @bar(i8* null) + +; CHECK-NEXT: call fastcc void bitcast (void (%f.frame*)* @f.resume to void (i8*)*)(i8* + %0 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0) + %1 = bitcast i8* %0 to void (i8*)* + call fastcc void %1(i8* %hdl) + +; CHECK-NEXT: call fastcc void bitcast (void (%f.frame*)* @f.destroy to void (i8*)*)(i8* + %2 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 1) + %3 = bitcast i8* %2 to void (i8*)* + call fastcc void %3(i8* %hdl) + +; CHECK-NEXT: ret void + ret void +} + + +declare i8* @llvm.coro.alloc() +declare i8* @llvm.coro.free(i8*) +declare token @llvm.coro.begin(i8*, i8*, i32, i8*, i8*) +declare i8* @llvm.coro.frame(token) +declare i8* @llvm.coro.subfn.addr(i8*, i8) Index: llvm/trunk/test/Transforms/Coroutines/restart-trigger.ll =================================================================== --- llvm/trunk/test/Transforms/Coroutines/restart-trigger.ll +++ llvm/trunk/test/Transforms/Coroutines/restart-trigger.ll @@ -0,0 +1,16 @@ +; Verifies that restart trigger forces IPO pipelines restart and the same +; coroutine is looked at by CoroSplit pass twice. +; REQUIRES: asserts +; RUN: opt < %s -S -O0 -enable-coroutines -debug-only=coro-split 2>&1 | FileCheck %s +; RUN: opt < %s -S -O1 -enable-coroutines -debug-only=coro-split 2>&1 | FileCheck %s + +; CHECK: CoroSplit: Processing coroutine 'f' state: 0 +; CHECK-NEXT: CoroSplit: Processing coroutine 'f' state: 1 + +declare token @llvm.coro.begin(i8*, i8*, i32, i8*, i8*) + +; a coroutine start function +define void @f() { + call token @llvm.coro.begin(i8* null, i8* null, i32 0, i8* null, i8* null) + ret void +}