Index: llvm/trunk/docs/Coroutines.rst =================================================================== --- llvm/trunk/docs/Coroutines.rst +++ llvm/trunk/docs/Coroutines.rst @@ -93,10 +93,10 @@ define i8* @f(i32 %n) { entry: + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null) %size = call i32 @llvm.coro.size.i32() %alloc = call i8* @malloc(i32 %size) - %beg = call token @llvm.coro.begin(i8* %alloc, i8* null, i32 0, i8* null, i8* null) - %hdl = call noalias i8* @llvm.coro.frame(token %beg) + %hdl = call noalias i8* @llvm.coro.begin(token %id, i8* %alloc) br label %loop loop: %n.val = phi i32 [ %n, %entry ], [ %inc, %loop ] @@ -116,10 +116,12 @@ The `entry` block establishes the coroutine frame. The `coro.size`_ intrinsic is lowered to a constant representing the size required for the coroutine frame. -The `coro.begin`_ intrinsic initializes the coroutine frame and returns the a -token that is used to obtain the coroutine handle via `coro.frame` intrinsic. -The first parameter of `coro.begin` is given a block of memory to be used if the -coroutine frame needs to be allocated dynamically. +The `coro.begin`_ intrinsic initializes the coroutine frame and returns the +coroutine handle. The second parameter of `coro.begin` is given a block of memory +to be used if the coroutine frame needs to be allocated dynamically. +The `coro.id`_ intrinsic serves as coroutine identity useful in cases when the +`coro.begin`_ intrinsic get duplicated by optimization passes such as +jump-threading. The `cleanup` block destroys the coroutine frame. The `coro.free`_ intrinsic, given the coroutine handle, returns a pointer of the memory block to be freed or @@ -166,9 +168,9 @@ define i8* @f(i32 %n) { entry: + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null) %alloc = call noalias i8* @malloc(i32 24) - %beg = call token @llvm.coro.begin(i8* %alloc, i8* null, i32 0, i8* null, i8* null) - %0 = call i8* @llvm.coro.frame(token %beg) + %0 = call noalias i8* @llvm.coro.begin(token %id, i8* %alloc) %frame = bitcast i8* %0 to %f.frame* %1 = getelementptr %f.frame, %f.frame* %frame, i32 0, i32 0 store void (%f.frame*)* @f.resume, void (%f.frame*)** %1 @@ -218,23 +220,23 @@ dynamic allocation by storing the coroutine frame as a static `alloca` in its caller. -In the entry block, we will call `coro.alloc`_ intrinsic that will return `null` -when dynamic allocation is required, and an address of an alloca on the caller's -frame where coroutine frame can be stored if dynamic allocation is elided. +In the entry block, we will call `coro.alloc`_ intrinsic that will return `true` +when dynamic allocation is required, and `false` if dynamic allocation is +elided. .. code-block:: none entry: - %elide = call i8* @llvm.coro.alloc() - %need.dyn.alloc = icmp ne i8* %elide, null - br i1 %need.dyn.alloc, label %coro.begin, label %dyn.alloc + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null) + %need.dyn.alloc = call i1 @llvm.coro.alloc(token %id) + br i1 %need.dyn.alloc, label %dyn.alloc, label %coro.begin dyn.alloc: %size = call i32 @llvm.coro.size.i32() %alloc = call i8* @CustomAlloc(i32 %size) br label %coro.begin coro.begin: - %phi = phi i8* [ %elide, %entry ], [ %alloc, %dyn.alloc ] - %beg = call token @llvm.coro.begin(i8* %phi, i8* null, i32 0, i8* null, i8* null) + %phi = phi i8* [ null, %entry ], [ %alloc, %dyn.alloc ] + %hdl = call noalias i8* @llvm.coro.begin(token %id, i8* %phi) In the cleanup block, we will make freeing the coroutine frame conditional on `coro.free`_ intrinsic. If allocation is elided, `coro.free`_ returns `null` @@ -403,8 +405,8 @@ A coroutine author or a frontend may designate a distinguished `alloca` that can be used to communicate with the coroutine. This distinguished alloca is called -**coroutine promise** and is provided as a third parameter to the `coro.begin`_ -intrinsic. +**coroutine promise** and is provided as the second parameter to the +`coro.id`_ intrinsic. The following coroutine designates a 32 bit integer `promise` and uses it to store the current value produced by a coroutine. @@ -415,17 +417,16 @@ entry: %promise = alloca i32 %pv = bitcast i32* %promise to i8* - %elide = call i8* @llvm.coro.alloc() - %need.dyn.alloc = icmp ne i8* %elide, null - br i1 %need.dyn.alloc, label %coro.begin, label %dyn.alloc + %id = call token @llvm.coro.id(i32 0, i8* %pv, i8* null) + %need.dyn.alloc = call i1 @llvm.coro.alloc(token %id) + br i1 %need.dyn.alloc, label %dyn.alloc, label %coro.begin dyn.alloc: %size = call i32 @llvm.coro.size.i32() %alloc = call i8* @malloc(i32 %size) br label %coro.begin coro.begin: - %phi = phi i8* [ %elide, %entry ], [ %alloc, %dyn.alloc ] - %beg = call token @llvm.coro.begin(i8* %phi, i8* %elide, i32 0, i8* %pv, i8* null) - %hdl = call i8* @llvm.coro.frame(token %beg) + %phi = phi i8* [ null, %entry ], [ %alloc, %dyn.alloc ] + %hdl = call noalias i8* @llvm.coro.begin(token %id, i8* %phi) br label %loop loop: %n.val = phi i32 [ %n, %coro.begin ], [ %inc, %loop ] @@ -697,10 +698,10 @@ entry: %promise = alloca i32 %pv = bitcast i32* %promise to i8* + ; the second argument to coro.id points to the coroutine promise. + %id = call token @llvm.coro.id(i32 0, i8* %pv, i8* null) ... - ; the fourth argument to coro.begin points to the coroutine promise. - %beg = call token @llvm.coro.begin(i8* %alloc, i8* null, i32 0, i8* %pv, i8* null) - %hdl = call noalias i8* @llvm.coro.frame(token %beg) + %hdl = call noalias i8* @llvm.coro.begin(token %id, i8* %alloc) ... store i32 42, i32* %promise ; store something into the promise ... @@ -757,43 +758,30 @@ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ :: - declare i8* @llvm.coro.begin(i8* , i8* , i32 , i8* , i8* ) + declare i8* @llvm.coro.begin(token , i8* ) Overview: """"""""" -The '``llvm.coro.begin``' intrinsic captures coroutine initialization -information and returns a token that can be used by `coro.frame` intrinsic to -return an address of the coroutine frame. +The '``llvm.coro.begin``' intrinsic returns an address of the coroutine frame. Arguments: """""""""" -The first argument is a pointer to a block of memory where coroutine frame -will be stored. +The first argument is a token returned by a call to '``llvm.coro.id``' +identifying the coroutine. -The second argument is either null or an SSA value of `coro.alloc` intrinsic. - -The third argument provides information on the alignment of the memory returned -by the allocation function and given to `coro.begin` by the first argument. If -this argument is 0, the memory is assumed to be aligned to 2 * sizeof(i8*). -This argument only accepts constants. - -The fourth argument, if not `null`, designates a particular alloca instruction to -be a `coroutine promise`_. - -The fifth argument is `null` before coroutine is split, and later is replaced -to point to a private global constant array containing function pointers to -outlined resume and destroy parts of the coroutine. +The second argument is a pointer to a block of memory where coroutine frame +will be stored if it is allocated dynamically. Semantics: """""""""" Depending on the alignment requirements of the objects in the coroutine frame -and/or on the codegen compactness reasons the pointer returned from `coro.frame` -associated with a particular `coro.begin` may be at offset to the `%mem` -argument. (This could be beneficial if instructions that express relative access -to data can be more compactly encoded with small positive and negative offsets). +and/or on the codegen compactness reasons the pointer returned from `coro.begin` +may be at offset to the `%mem` argument. (This could be beneficial if +instructions that express relative access to data can be more compactly encoded +with small positive and negative offsets). A frontend should emit exactly one `coro.begin` intrinsic per coroutine. @@ -816,7 +804,7 @@ """""""""" A pointer to the coroutine frame. This should be the same pointer that was -returned by prior `coro.frame` call. +returned by prior `coro.begin` call. Example (custom deallocation function): """"""""""""""""""""""""""""""""""""""" @@ -849,30 +837,26 @@ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ :: - declare i8* @llvm.coro.alloc() + declare i1 @llvm.coro.alloc(token ) Overview: """"""""" -The '``llvm.coro.alloc``' intrinsic returns an address of the memory on the -callers frame where coroutine frame of this coroutine can be placed or `null` -otherwise. +The '``llvm.coro.alloc``' intrinsic returns `true` if dynamic allocation is +required to obtain a memory for the corutine frame and `false` otherwise. Arguments: """""""""" -None +The first argument is a token returned by a call to '``llvm.coro.id``' +identifying the coroutine. Semantics: """""""""" -If the coroutine is eligible for heap elision, this intrinsic is lowered to an -alloca storing the coroutine frame. Otherwise, it is lowered to constant `null`. - A frontend should emit at most one `coro.alloc` intrinsic per coroutine. - -If `coro.alloc` is present, the second parameter to `coro.begin` should refer -to it. +The intrinsic is used to suppress dynamic allocation of the coroutine frame +when possible. Example: """""""" @@ -880,9 +864,9 @@ .. code-block:: text entry: - %elide = call i8* @llvm.coro.alloc() - %0 = icmp ne i8* %elide, null - br i1 %0, label %coro.begin, label %coro.alloc + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null) + %dyn.alloc.required = call i1 @llvm.coro.alloc(token %id) + br i1 %dyn.alloc.required, label %coro.alloc, label %coro.begin coro.alloc: %frame.size = call i32 @llvm.coro.size() @@ -890,9 +874,8 @@ br label %coro.begin coro.begin: - %phi = phi i8* [ %elide, %entry ], [ %alloc, %coro.alloc ] - %beg = call token @llvm.coro.begin(i8* %phi, i8* %elide, i32 0, i8* null, i8* null) - %frame = call i8* @llvm.coro.frame(token %beg) + %phi = phi i8* [ null, %entry ], [ %alloc, %coro.alloc ] + %frame = call i8* @llvm.coro.begin(token %id, i8* %phi) .. _coro.frame: @@ -911,12 +894,53 @@ Arguments: """""""""" -A token that refers to `coro.begin` instruction. +None + +Semantics: +"""""""""" + +This intrinsic is lowered to refer to the `coro.begin`_ instruction. This is +a frontend convenience intrinsic that makes it easier to refer to the +coroutine frame. + +.. _coro.id: + +'llvm.coro.id' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +:: + + declare token @llvm.coro.id(i32 , i8* , i8* ) + +Overview: +""""""""" + +The '``llvm.coro.id``' intrinsic returns a token identifying a coroutine. + +Arguments: +"""""""""" + +The first argument provides information on the alignment of the memory returned +by the allocation function and given to `coro.begin` by the first argument. If +this argument is 0, the memory is assumed to be aligned to 2 * sizeof(i8*). +This argument only accepts constants. + +The second argument, if not `null`, designates a particular alloca instruction +to be a `coroutine promise`_. + +The third argument is `null` before coroutine is split, and later is replaced +to point to a private global constant array containing function pointers to +outlined resume and destroy parts of the coroutine. + Semantics: """""""""" -This intrinsic is lowered to refer to address of the coroutine frame. +The purpose of this intrinsic is to tie together `coro.id`, `coro.alloc` and +`coro.begin` belonging to the same coroutine to prevent optimization passes from +duplicating any of these instructions unless entire body of the coroutine is +duplicated. + +A frontend should emit exactly one `coro.id` intrinsic per coroutine. .. _coro.end: @@ -1174,9 +1198,10 @@ CoroElide --------- The pass CoroElide examines if the inlined coroutine is eligible for heap -allocation elision optimization. If so, it replaces `coro.alloc` and -`coro.frame` intrinsic with an address of a coroutine frame placed on its caller -and replaces `coro.free` intrinsics with `null` to remove the deallocation code. +allocation elision optimization. If so, it replaces +`coro.begin` intrinsic with an address of a coroutine frame placed on its caller +and replaces `coro.alloc` and `coro.free` intrinsics with `false` and `null` +respectively to remove the deallocation code. This pass also replaces `coro.resume` and `coro.destroy` intrinsics with direct calls to resume and destroy functions for a particular coroutine where possible. Index: llvm/trunk/include/llvm/IR/Intrinsics.td =================================================================== --- llvm/trunk/include/llvm/IR/Intrinsics.td +++ llvm/trunk/include/llvm/IR/Intrinsics.td @@ -602,17 +602,19 @@ // Coroutine Structure Intrinsics. -def int_coro_alloc : Intrinsic<[llvm_ptr_ty], [], []>; -def int_coro_begin : Intrinsic<[llvm_token_ty], [llvm_ptr_ty, llvm_ptr_ty, - llvm_i32_ty, llvm_ptr_ty, llvm_ptr_ty], - [WriteOnly<0>, WriteOnly<0>, - ReadNone<3>, ReadOnly<4>, NoCapture<4>]>; +def int_coro_id : Intrinsic<[llvm_token_ty], [llvm_i32_ty, llvm_ptr_ty, + llvm_ptr_ty], + [IntrArgMemOnly, IntrReadMem, + ReadNone<1>, ReadOnly<2>, NoCapture<2>]>; +def int_coro_alloc : Intrinsic<[llvm_i1_ty], [llvm_token_ty], []>; +def int_coro_begin : Intrinsic<[llvm_ptr_ty], [llvm_token_ty, llvm_ptr_ty], + [WriteOnly<1>]>; def int_coro_free : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], [IntrArgMemOnly, ReadOnly<0>, NoCapture<0>]>; def int_coro_end : Intrinsic<[], [llvm_ptr_ty, llvm_i1_ty], []>; -def int_coro_frame : Intrinsic<[llvm_ptr_ty], [llvm_token_ty], [IntrNoMem]>; +def int_coro_frame : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; def int_coro_size : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>; def int_coro_save : Intrinsic<[llvm_token_ty], [llvm_ptr_ty], []>; Index: llvm/trunk/lib/IR/Verifier.cpp =================================================================== --- llvm/trunk/lib/IR/Verifier.cpp +++ llvm/trunk/lib/IR/Verifier.cpp @@ -3835,8 +3835,8 @@ switch (ID) { default: break; - case Intrinsic::coro_begin: { - auto *InfoArg = CS.getArgOperand(3)->stripPointerCasts(); + case Intrinsic::coro_id: { + auto *InfoArg = CS.getArgOperand(2)->stripPointerCasts(); if (isa(InfoArg)) break; auto *GV = dyn_cast(InfoArg); Index: llvm/trunk/lib/Transforms/Coroutines/CoroEarly.cpp =================================================================== --- llvm/trunk/lib/Transforms/Coroutines/CoroEarly.cpp +++ llvm/trunk/lib/Transforms/Coroutines/CoroEarly.cpp @@ -52,11 +52,11 @@ switch (CS.getIntrinsicID()) { default: continue; - case Intrinsic::coro_begin: + case Intrinsic::coro_id: // Mark a function that comes out of the frontend that has a coro.begin // with a coroutine attribute. - if (auto *CB = cast(&I)) { - if (CB->getInfo().isPreSplit()) + if (auto *CII = cast(&I)) { + if (CII->getInfo().isPreSplit()) F.addFnAttr(CORO_PRESPLIT_ATTR, UNPREPARED_FOR_SPLIT); } break; Index: llvm/trunk/lib/Transforms/Coroutines/CoroElide.cpp =================================================================== --- llvm/trunk/lib/Transforms/Coroutines/CoroElide.cpp +++ llvm/trunk/lib/Transforms/Coroutines/CoroElide.cpp @@ -22,46 +22,20 @@ #define DEBUG_TYPE "coro-elide" -//===----------------------------------------------------------------------===// -// Top Level Driver -//===----------------------------------------------------------------------===// +// Created on demand if CoroElide pass has work to do. +struct Lowerer : coro::LowererBase { + SmallVector CoroIds; + SmallVector CoroBegins; + SmallVector CoroAllocs; + SmallVector ResumeAddr; + SmallVector DestroyAddr; + SmallVector CoroFrees; -namespace { -struct CoroElide : FunctionPass { - static char ID; - CoroElide() : FunctionPass(ID) {} - - bool NeedsToRun = false; - - bool doInitialization(Module &M) override { - NeedsToRun = coro::declaresIntrinsics(M, {"llvm.coro.begin"}); - return false; - } + Lowerer(Module &M) : LowererBase(M) {} - bool runOnFunction(Function &F) override; - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.setPreservesCFG(); - } + void elideHeapAllocations(Function *F, Type *FrameTy, AAResults &AA); + bool processCoroId(CoroIdInst *, AAResults &AA); }; -} - -char CoroElide::ID = 0; -INITIALIZE_PASS_BEGIN( - CoroElide, "coro-elide", - "Coroutine frame allocation elision and indirect calls replacement", false, - false) -INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_END( - CoroElide, "coro-elide", - "Coroutine frame allocation elision and indirect calls replacement", false, - false) - -Pass *llvm::createCoroElidePass() { return new CoroElide(); } - -//===----------------------------------------------------------------------===// -// Implementation -//===----------------------------------------------------------------------===// // Go through the list of coro.subfn.addr intrinsics and replace them with the // provided constant. @@ -129,10 +103,30 @@ // To elide heap allocations we need to suppress code blocks guarded by // llvm.coro.alloc and llvm.coro.free instructions. -static void elideHeapAllocations(CoroBeginInst *CoroBegin, Type *FrameTy, - CoroAllocInst *AllocInst, AAResults &AA) { - LLVMContext &C = CoroBegin->getContext(); - auto *InsertPt = getFirstNonAllocaInTheEntryBlock(CoroBegin->getFunction()); +void Lowerer::elideHeapAllocations(Function *F, Type *FrameTy, AAResults &AA) { + LLVMContext &C = FrameTy->getContext(); + auto *InsertPt = + getFirstNonAllocaInTheEntryBlock(CoroIds.front()->getFunction()); + + // Replacing llvm.coro.alloc with false will suppress dynamic + // allocation as it is expected for the frontend to generate the code that + // looks like: + // id = coro.id(...) + // mem = coro.alloc(id) ? malloc(coro.size()) : 0; + // coro.begin(id, mem) + auto *False = ConstantInt::getFalse(C); + for (auto *CA : CoroAllocs) { + CA->replaceAllUsesWith(False); + CA->eraseFromParent(); + } + + // To suppress deallocation code, we replace all llvm.coro.free intrinsics + // associated with this coro.begin with null constant. + auto *NullPtr = ConstantPointerNull::get(Type::getInt8PtrTy(C)); + for (auto *CF : CoroFrees) { + CF->replaceAllUsesWith(NullPtr); + CF->eraseFromParent(); + } // FIXME: Design how to transmit alignment information for every alloca that // is spilled into the coroutine frame and recreate the alignment information @@ -142,38 +136,37 @@ auto *FrameVoidPtr = new BitCastInst(Frame, Type::getInt8PtrTy(C), "vFrame", InsertPt); - // Replacing llvm.coro.alloc with non-null value will suppress dynamic - // allocation as it is expected for the frontend to generate the code that - // looks like: - // mem = coro.alloc(); - // if (!mem) mem = malloc(coro.size()); - // coro.begin(mem, ...) - AllocInst->replaceAllUsesWith(FrameVoidPtr); - AllocInst->eraseFromParent(); - - // To suppress deallocation code, we replace all llvm.coro.free intrinsics - // associated with this coro.begin with null constant. - auto *NullPtr = ConstantPointerNull::get(Type::getInt8PtrTy(C)); - coro::replaceAllCoroFrees(CoroBegin, NullPtr); - CoroBegin->lowerTo(FrameVoidPtr); + for (auto *CB : CoroBegins) { + CB->replaceAllUsesWith(FrameVoidPtr); + CB->eraseFromParent(); + } // Since now coroutine frame lives on the stack we need to make sure that // any tail call referencing it, must be made non-tail call. removeTailCallAttribute(Frame, AA); } -// See if there are any coro.subfn.addr intrinsics directly referencing -// the coro.begin. If found, replace them with an appropriate coroutine -// subfunction associated with that coro.begin. -static bool replaceIndirectCalls(CoroBeginInst *CoroBegin, AAResults &AA) { - SmallVector ResumeAddr; - SmallVector DestroyAddr; - - for (User *CF : CoroBegin->users()) { - assert(isa(CF) && - "CoroBegin can be only used by coro.frame instructions"); - for (User *U : CF->users()) { - if (auto *II = dyn_cast(U)) { +bool Lowerer::processCoroId(CoroIdInst *CoroId, AAResults &AA) { + CoroBegins.clear(); + CoroAllocs.clear(); + ResumeAddr.clear(); + DestroyAddr.clear(); + + // Collect all coro.begin and coro.allocs associated with this coro.id. + for (User *U : CoroId->users()) { + if (auto *CB = dyn_cast(U)) + CoroBegins.push_back(CB); + else if (auto *CA = dyn_cast(U)) + CoroAllocs.push_back(CA); + } + + // Collect all coro.subfn.addrs associated with coro.begin. + // Note, we only devirtualize the calls if their coro.subfn.addr refers to + // coro.begin directly. If we run into cases where this check is too + // conservative, we can consider relaxing the check. + for (CoroBeginInst *CB : CoroBegins) { + for (User *U : CB->users()) + if (auto *II = dyn_cast(U)) switch (II->getIndex()) { case CoroSubFnInst::ResumeIndex: ResumeAddr.push_back(II); @@ -184,19 +177,16 @@ default: llvm_unreachable("unexpected coro.subfn.addr constant"); } - } - } } - if (ResumeAddr.empty() && DestroyAddr.empty()) - return false; - // PostSplit coro.begin refers to an array of subfunctions in its Info + // PostSplit coro.id refers to an array of subfunctions in its Info // argument. - ConstantArray *Resumers = CoroBegin->getInfo().Resumers; - assert(Resumers && "PostSplit coro.begin Info argument must refer to an array" + ConstantArray *Resumers = CoroId->getInfo().Resumers; + assert(Resumers && "PostSplit coro.id Info argument must refer to an array" "of coroutine subfunctions"); auto *ResumeAddrConstant = ConstantExpr::getExtractValue(Resumers, CoroSubFnInst::ResumeIndex); + replaceWithConstant(ResumeAddrConstant, ResumeAddr); if (DestroyAddr.empty()) @@ -204,10 +194,12 @@ auto *DestroyAddrConstant = ConstantExpr::getExtractValue(Resumers, CoroSubFnInst::DestroyIndex); + replaceWithConstant(DestroyAddrConstant, DestroyAddr); - // If llvm.coro.begin refers to llvm.coro.alloc, we can elide the allocation. - if (auto *AllocInst = CoroBegin->getAlloc()) { + // If there is a coro.alloc that llvm.coro.id refers to, we have the ability + // to suppress dynamic allocation. + if (!CoroAllocs.empty()) { // FIXME: The check above is overly lax. It only checks for whether we have // an ability to elide heap allocations, not whether it is safe to do so. // We need to do something like: @@ -216,9 +208,8 @@ // then it is safe to elide heap allocation, since the lifetime of coroutine // is fully enclosed in its caller. auto *FrameTy = getFrameType(cast(ResumeAddrConstant)); - elideHeapAllocations(CoroBegin, FrameTy, AllocInst, AA); + elideHeapAllocations(CoroId->getFunction(), FrameTy, AA); } - return true; } @@ -242,25 +233,69 @@ return true; } -bool CoroElide::runOnFunction(Function &F) { - bool Changed = false; +//===----------------------------------------------------------------------===// +// Top Level Driver +//===----------------------------------------------------------------------===// - if (F.hasFnAttribute(CORO_PRESPLIT_ATTR)) - Changed = replaceDevirtTrigger(F); +namespace { +struct CoroElide : FunctionPass { + static char ID; + CoroElide() : FunctionPass(ID) {} - // Collect all PostSplit coro.begins. - SmallVector CoroBegins; - for (auto &I : instructions(F)) - if (auto *CB = dyn_cast(&I)) - if (CB->getInfo().isPostSplit()) - CoroBegins.push_back(CB); + std::unique_ptr L; - if (CoroBegins.empty()) - return Changed; + bool doInitialization(Module &M) override { + if (coro::declaresIntrinsics(M, {"llvm.coro.id"})) + L = llvm::make_unique(M); + return false; + } - AAResults &AA = getAnalysis().getAAResults(); - for (auto *CB : CoroBegins) - Changed |= replaceIndirectCalls(CB, AA); + bool runOnFunction(Function &F) override { + if (!L) + return false; + + bool Changed = false; + + if (F.hasFnAttribute(CORO_PRESPLIT_ATTR)) + Changed = replaceDevirtTrigger(F); + + L->CoroIds.clear(); + L->CoroFrees.clear(); + + // Collect all PostSplit coro.ids and all coro.free. + for (auto &I : instructions(F)) + if (auto *CF = dyn_cast(&I)) + L->CoroFrees.push_back(CF); + else if (auto *CII = dyn_cast(&I)) + if (CII->getInfo().isPostSplit()) + L->CoroIds.push_back(CII); + + // If we did not find any coro.id, there is nothing to do. + if (L->CoroIds.empty()) + return Changed; + + AAResults &AA = getAnalysis().getAAResults(); + for (auto *CII : L->CoroIds) + Changed |= L->processCoroId(CII, AA); - return Changed; + return Changed; + } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.setPreservesCFG(); + } +}; } + +char CoroElide::ID = 0; +INITIALIZE_PASS_BEGIN( + CoroElide, "coro-elide", + "Coroutine frame allocation elision and indirect calls replacement", false, + false) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_END( + CoroElide, "coro-elide", + "Coroutine frame allocation elision and indirect calls replacement", false, + false) + +Pass *llvm::createCoroElidePass() { return new CoroElide(); } Index: llvm/trunk/lib/Transforms/Coroutines/CoroInstr.h =================================================================== --- llvm/trunk/lib/Transforms/Coroutines/CoroInstr.h +++ llvm/trunk/lib/Transforms/Coroutines/CoroInstr.h @@ -11,7 +11,7 @@ // allows you to do things like: // // if (auto *SF = dyn_cast(Inst)) -// ... SF->getFrame() ... SF->getAlloc() ... +// ... SF->getFrame() ... // // All intrinsic function calls are instances of the call instruction, so these // are all subclasses of the CallInst class. Note that none of these classes @@ -74,52 +74,11 @@ } }; -/// This represents the llvm.coro.frame instruction. -class LLVM_LIBRARY_VISIBILITY CoroFrameInst : public IntrinsicInst { -public: - // Methods to support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const IntrinsicInst *I) { - return I->getIntrinsicID() == Intrinsic::coro_frame; - } - static inline bool classof(const Value *V) { - return isa(V) && classof(cast(V)); - } -}; - -/// This represents the llvm.coro.free instruction. -class LLVM_LIBRARY_VISIBILITY CoroFreeInst : public IntrinsicInst { -public: - // Methods to support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const IntrinsicInst *I) { - return I->getIntrinsicID() == Intrinsic::coro_free; - } - static inline bool classof(const Value *V) { - return isa(V) && classof(cast(V)); - } -}; - -/// This class represents the llvm.coro.begin instruction. -class LLVM_LIBRARY_VISIBILITY CoroBeginInst : public IntrinsicInst { - enum { MemArg, ElideArg, AlignArg, PromiseArg, InfoArg }; - +/// This represents the llvm.coro.alloc instruction. +class LLVM_LIBRARY_VISIBILITY CoroIdInst : public IntrinsicInst { + enum { AlignArg, PromiseArg, InfoArg }; public: - CoroAllocInst *getAlloc() const { - if (auto *CAI = dyn_cast( - getArgOperand(ElideArg)->stripPointerCasts())) - return CAI; - - return nullptr; - } - - Value *getMem() const { return getArgOperand(MemArg); } - - Constant *getRawInfo() const { - return cast(getArgOperand(InfoArg)->stripPointerCasts()); - } - - void setInfo(Constant *C) { setArgOperand(InfoArg, C); } - - // Info argument of coro.begin is + // Info argument of coro.id is // fresh out of the frontend: null ; // outlined : {Init, Return, Susp1, Susp2, ...} ; // postsplit : [resume, destroy, cleanup] ; @@ -153,22 +112,56 @@ Result.Resumers = cast(Initializer); return Result; } + Constant *getRawInfo() const { + return cast(getArgOperand(InfoArg)->stripPointerCasts()); + } + + void setInfo(Constant *C) { setArgOperand(InfoArg, C); } - // Replaces all coro.frame intrinsics that are associated with this coro.begin - // to a replacement value and removes coro.begin and all of the coro.frame - // intrinsics. - void lowerTo(Value* Replacement) { - SmallVector FrameInsts; - for (auto *CF : this->users()) - FrameInsts.push_back(cast(CF)); - - for (auto *CF : FrameInsts) { - CF->replaceAllUsesWith(Replacement); - CF->eraseFromParent(); - } - this->eraseFromParent(); + // Methods to support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_id; } + static inline bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + +/// This represents the llvm.coro.frame instruction. +class LLVM_LIBRARY_VISIBILITY CoroFrameInst : public IntrinsicInst { +public: + // Methods to support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_frame; + } + static inline bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + +/// This represents the llvm.coro.free instruction. +class LLVM_LIBRARY_VISIBILITY CoroFreeInst : public IntrinsicInst { +public: + // Methods to support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_free; + } + static inline bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + +/// This class represents the llvm.coro.begin instruction. +class LLVM_LIBRARY_VISIBILITY CoroBeginInst : public IntrinsicInst { + enum { IdArg, MemArg }; + +public: + CoroIdInst *getId() const { + return cast(getArgOperand(IdArg)); + } + + Value *getMem() const { return getArgOperand(MemArg); } // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const IntrinsicInst *I) { Index: llvm/trunk/lib/Transforms/Coroutines/CoroInternal.h =================================================================== --- llvm/trunk/lib/Transforms/Coroutines/CoroInternal.h +++ llvm/trunk/lib/Transforms/Coroutines/CoroInternal.h @@ -42,6 +42,7 @@ namespace coro { bool declaresIntrinsics(Module &M, std::initializer_list); +void replaceAllCoroAllocs(CoroBeginInst *CB, bool Replacement); void replaceAllCoroFrees(CoroBeginInst *CB, Value *Replacement); // Keeps data and helper functions for lowering coroutine intrinsics. Index: llvm/trunk/lib/Transforms/Coroutines/Coroutines.cpp =================================================================== --- llvm/trunk/lib/Transforms/Coroutines/Coroutines.cpp +++ llvm/trunk/lib/Transforms/Coroutines/Coroutines.cpp @@ -99,11 +99,11 @@ static bool isCoroutineIntrinsicName(StringRef Name) { // NOTE: Must be sorted! static const char *const CoroIntrinsics[] = { - "llvm.coro.alloc", "llvm.coro.begin", "llvm.coro.destroy", - "llvm.coro.done", "llvm.coro.end", "llvm.coro.frame", - "llvm.coro.free", "llvm.coro.param", "llvm.coro.promise", - "llvm.coro.resume", "llvm.coro.save", "llvm.coro.size", - "llvm.coro.suspend", + "llvm.coro.alloc", "llvm.coro.begin", "llvm.coro.destroy", + "llvm.coro.done", "llvm.coro.end", "llvm.coro.frame", + "llvm.coro.free", "llvm.coro.id", "llvm.coro.param", + "llvm.coro.promise", "llvm.coro.resume", "llvm.coro.save", + "llvm.coro.size", "llvm.coro.suspend", }; return Intrinsic::lookupLLVMIntrinsicByName(CoroIntrinsics, Name) != -1; } @@ -122,21 +122,3 @@ return false; } - -// Find all llvm.coro.free instructions associated with the provided coro.begin -// and replace them with the provided replacement value. -void coro::replaceAllCoroFrees(CoroBeginInst *CB, Value *Replacement) { - SmallVector CoroFrees; - for (User *FramePtr: CB->users()) - for (User *U : FramePtr->users()) - if (auto *CF = dyn_cast(U)) - CoroFrees.push_back(CF); - - if (CoroFrees.empty()) - return; - - for (CoroFreeInst *CF : CoroFrees) { - CF->replaceAllUsesWith(Replacement); - CF->eraseFromParent(); - } -} Index: llvm/trunk/test/Transforms/Coroutines/coro-elide.ll =================================================================== --- llvm/trunk/test/Transforms/Coroutines/coro-elide.ll +++ llvm/trunk/test/Transforms/Coroutines/coro-elide.ll @@ -22,16 +22,16 @@ ; a coroutine start function define i8* @f() { entry: - %tok = call token @llvm.coro.begin(i8* null, i8* null, i32 0, i8* null, + %id = call token @llvm.coro.id(i32 0, i8* null, i8* bitcast ([2 x void (i8*)*]* @f.resumers to i8*)) - %hdl = call i8* @llvm.coro.frame(token %tok) + %hdl = call i8* @llvm.coro.begin(token %id, i8* null) ret i8* %hdl } ; CHECK-LABEL: @callResume( define void @callResume() { entry: -; CHECK: call token @llvm.coro.begin +; CHECK: call i8* @llvm.coro.begin %hdl = call i8* @f() ; CHECK-NEXT: call void @print(i32 0) @@ -51,7 +51,7 @@ ; CHECK-LABEL: @eh( define void @eh() personality i8* null { entry: -; CHECK: call token @llvm.coro.begin +; CHECK: call i8* @llvm.coro.begin %hdl = call i8* @f() ; CHECK-NEXT: call void @print(i32 0) @@ -71,8 +71,8 @@ ; no devirtualization here, since coro.begin info parameter is null define void @no_devirt_info_null() { entry: - %tok = call token @llvm.coro.begin(i8* null, i8* null, i32 0, i8* null, i8* null) - %hdl = call i8* @llvm.coro.frame(token %tok) + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null) + %hdl = call i8* @llvm.coro.begin(token %id, i8* null) ; CHECK: call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0) %0 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0) @@ -107,7 +107,7 @@ ret void } - -declare token @llvm.coro.begin(i8*, i8*, i32, i8*, i8*) -declare i8* @llvm.coro.frame(token) +declare token @llvm.coro.id(i32, i8*, i8*) +declare i8* @llvm.coro.begin(token, i8*) +declare i8* @llvm.coro.frame() declare i8* @llvm.coro.subfn.addr(i8*, i8) Index: llvm/trunk/test/Transforms/Coroutines/coro-heap-elide.ll =================================================================== --- llvm/trunk/test/Transforms/Coroutines/coro-heap-elide.ll +++ llvm/trunk/test/Transforms/Coroutines/coro-heap-elide.ll @@ -22,17 +22,16 @@ ; a coroutine start function define i8* @f() personality i8* null { entry: - %elide = call i8* @llvm.coro.alloc() - %need.dyn.alloc = icmp ne i8* %elide, null - br i1 %need.dyn.alloc, label %coro.begin, label %dyn.alloc + %id = call token @llvm.coro.id(i32 0, i8* null, + i8* bitcast ([2 x void (%f.frame*)*]* @f.resumers to i8*)) + %need.dyn.alloc = call i1 @llvm.coro.alloc(token %id) + br i1 %need.dyn.alloc, label %dyn.alloc, label %coro.begin dyn.alloc: %alloc = call i8* @CustomAlloc(i32 4) br label %coro.begin coro.begin: - %phi = phi i8* [ %elide, %entry ], [ %alloc, %dyn.alloc ] - %beg = call token @llvm.coro.begin(i8* %phi, i8* %elide, i32 0, i8* null, - i8* bitcast ([2 x void (%f.frame*)*]* @f.resumers to i8*)) - %hdl = call i8* @llvm.coro.frame(token %beg) + %phi = phi i8* [ null, %entry ], [ %alloc, %dyn.alloc ] + %hdl = call i8* @llvm.coro.begin(token %id, i8* %phi) invoke void @may_throw() to label %ret unwind label %ehcleanup ret: @@ -84,10 +83,10 @@ ; coro.begin not pointint to coro.alloc) define i8* @f_no_elision() personality i8* null { entry: + %id = call token @llvm.coro.id(i32 0, i8* null, + i8* bitcast ([2 x void (%f.frame*)*]* @f.resumers to i8*)) %alloc = call i8* @CustomAlloc(i32 4) - %beg = call token @llvm.coro.begin(i8* %alloc, i8* null, i32 0, i8* null, - i8* bitcast ([2 x void (%f.frame*)*]* @f.resumers to i8*)) - %hdl = call i8* @llvm.coro.frame(token %beg) + %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc) ret i8* %hdl } @@ -117,9 +116,9 @@ ret void } - -declare i8* @llvm.coro.alloc() +declare token @llvm.coro.id(i32, i8*, i8*) +declare i1 @llvm.coro.alloc(token) declare i8* @llvm.coro.free(i8*) -declare token @llvm.coro.begin(i8*, i8*, i32, i8*, i8*) +declare i8* @llvm.coro.begin(token, i8*) declare i8* @llvm.coro.frame(token) declare i8* @llvm.coro.subfn.addr(i8*, i8) Index: llvm/trunk/test/Transforms/Coroutines/restart-trigger.ll =================================================================== --- llvm/trunk/test/Transforms/Coroutines/restart-trigger.ll +++ llvm/trunk/test/Transforms/Coroutines/restart-trigger.ll @@ -7,10 +7,12 @@ ; CHECK: CoroSplit: Processing coroutine 'f' state: 0 ; CHECK-NEXT: CoroSplit: Processing coroutine 'f' state: 1 -declare token @llvm.coro.begin(i8*, i8*, i32, i8*, i8*) +declare token @llvm.coro.id(i32, i8*, i8*) +declare i8* @llvm.coro.begin(token, i8*) ; a coroutine start function define void @f() { - call token @llvm.coro.begin(i8* null, i8* null, i32 0, i8* null, i8* null) + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null) + call i8* @llvm.coro.begin(token %id, i8* null) ret void }