Index: docs/Coroutines.rst
===================================================================
--- docs/Coroutines.rst
+++ docs/Coroutines.rst
@@ -93,7 +93,7 @@
define i8* @f(i32 %n) {
entry:
- %id = call token @llvm.coro.id(i32 0, i8* null, i8* null)
+ %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%size = call i32 @llvm.coro.size.i32()
%alloc = call i8* @malloc(i32 %size)
%hdl = call noalias i8* @llvm.coro.begin(token %id, i8* %alloc)
@@ -106,7 +106,7 @@
switch i8 %0, label %suspend [i8 0, label %loop
i8 1, label %cleanup]
cleanup:
- %mem = call i8* @llvm.coro.free(i8* %hdl)
+ %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
call void @free(i8* %mem)
br label %suspend
suspend:
@@ -168,7 +168,7 @@
define i8* @f(i32 %n) {
entry:
- %id = call token @llvm.coro.id(i32 0, i8* null, i8* null)
+ %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%alloc = call noalias i8* @malloc(i32 24)
%0 = call noalias i8* @llvm.coro.begin(token %id, i8* %alloc)
%frame = bitcast i8* %0 to %f.frame*
@@ -227,7 +227,7 @@
.. code-block:: none
entry:
- %id = call token @llvm.coro.id(i32 0, i8* null, i8* null)
+ %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%need.dyn.alloc = call i1 @llvm.coro.alloc(token %id)
br i1 %need.dyn.alloc, label %dyn.alloc, label %coro.begin
dyn.alloc:
@@ -245,7 +245,7 @@
.. code-block:: llvm
cleanup:
- %mem = call i8* @llvm.coro.free(i8* %hdl)
+ %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
%need.dyn.free = icmp ne i8* %mem, null
br i1 %need.dyn.free, label %dyn.free, label %if.end
dyn.free:
@@ -417,7 +417,7 @@
entry:
%promise = alloca i32
%pv = bitcast i32* %promise to i8*
- %id = call token @llvm.coro.id(i32 0, i8* %pv, i8* null)
+ %id = call token @llvm.coro.id(i32 0, i8* %pv, i8* null, i8* null)
%need.dyn.alloc = call i1 @llvm.coro.alloc(token %id)
br i1 %need.dyn.alloc, label %dyn.alloc, label %coro.begin
dyn.alloc:
@@ -436,7 +436,7 @@
switch i8 %0, label %suspend [i8 0, label %loop
i8 1, label %cleanup]
cleanup:
- %mem = call i8* @llvm.coro.free(i8* %hdl)
+ %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
call void @free(i8* %mem)
br label %suspend
suspend:
@@ -699,7 +699,7 @@
%promise = alloca i32
%pv = bitcast i32* %promise to i8*
; the second argument to coro.id points to the coroutine promise.
- %id = call token @llvm.coro.id(i32 0, i8* %pv, i8* null)
+ %id = call token @llvm.coro.id(i32 0, i8* %pv, i8* null, i8* null)
...
%hdl = call noalias i8* @llvm.coro.begin(token %id, i8* %alloc)
...
@@ -791,7 +791,7 @@
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
::
- declare i8* @llvm.coro.free(i8* )
+ declare i8* @llvm.coro.free(token %id, i8* )
Overview:
"""""""""
@@ -803,8 +803,11 @@
Arguments:
""""""""""
-A pointer to the coroutine frame. This should be the same pointer that was
-returned by prior `coro.begin` call.
+The first argument is a token returned by a call to '``llvm.coro.id``'
+identifying the coroutine.
+
+The second argument is a pointer to the coroutine frame. This should be the same
+pointer that was returned by prior `coro.begin` call.
Example (custom deallocation function):
"""""""""""""""""""""""""""""""""""""""
@@ -812,7 +815,7 @@
.. code-block:: llvm
cleanup:
- %mem = call i8* @llvm.coro.free(i8* %frame)
+ %mem = call i8* @llvm.coro.free(token %id, i8* %frame)
%mem_not_null = icmp ne i8* %mem, null
br i1 %mem_not_null, label %if.then, label %if.end
if.then:
@@ -827,7 +830,7 @@
.. code-block:: llvm
cleanup:
- %mem = call i8* @llvm.coro.free(i8* %frame)
+ %mem = call i8* @llvm.coro.free(token %id, i8* %frame)
call void @free(i8* %mem)
ret void
@@ -864,7 +867,7 @@
.. code-block:: text
entry:
- %id = call token @llvm.coro.id(i32 0, i8* null, i8* null)
+ %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%dyn.alloc.required = call i1 @llvm.coro.alloc(token %id)
br i1 %dyn.alloc.required, label %coro.alloc, label %coro.begin
@@ -909,7 +912,8 @@
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
::
- declare token @llvm.coro.id(i32 , i8* , i8* )
+ declare token @llvm.coro.id(i32 , i8* , i8* ,
+ i8* )
Overview:
"""""""""
@@ -927,7 +931,10 @@
The second argument, if not `null`, designates a particular alloca instruction
to be a `coroutine promise`_.
-The third argument is `null` before coroutine is split, and later is replaced
+The third argument is `null` coming out of the frontend. The CoroEarly pass sets
+this argument to point to the function this coro.id belongs to.
+
+The fourth argument is `null` before coroutine is split, and later is replaced
to point to a private global constant array containing function pointers to
outlined resume and destroy parts of the coroutine.
@@ -1210,19 +1217,6 @@
This pass runs late to lower all coroutine related intrinsics not replaced by
earlier passes.
-Upstreaming sequence (rough plan)
-=================================
-#. Add documentation.
-#. Add coroutine intrinsics.
-#. Add empty coroutine passes.
-#. Add coroutine devirtualization + tests.
-#. Add CGSCC restart trigger + tests.
-#. Add coroutine heap elision + tests.
-#. Add custom allocation heap elision + tests. <== we are here
-#. Add coroutine splitting logic + tests.
-#. Add simple coroutine frame builder + tests.
-#. Add the rest of the logic + tests. (Maybe split further as needed).
-
Areas Requiring Attention
=========================
#. A coroutine frame is bigger than it could be. Adding stack packing and stack
Index: include/llvm/IR/Intrinsics.td
===================================================================
--- include/llvm/IR/Intrinsics.td
+++ include/llvm/IR/Intrinsics.td
@@ -603,15 +603,16 @@
// Coroutine Structure Intrinsics.
def int_coro_id : Intrinsic<[llvm_token_ty], [llvm_i32_ty, llvm_ptr_ty,
- llvm_ptr_ty],
+ llvm_ptr_ty, llvm_ptr_ty],
[IntrArgMemOnly, IntrReadMem,
ReadNone<1>, ReadOnly<2>, NoCapture<2>]>;
def int_coro_alloc : Intrinsic<[llvm_i1_ty], [llvm_token_ty], []>;
def int_coro_begin : Intrinsic<[llvm_ptr_ty], [llvm_token_ty, llvm_ptr_ty],
[WriteOnly<1>]>;
-def int_coro_free : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty],
- [IntrArgMemOnly, ReadOnly<0>, NoCapture<0>]>;
+def int_coro_free : Intrinsic<[llvm_ptr_ty], [llvm_token_ty, llvm_ptr_ty],
+ [IntrReadMem, IntrArgMemOnly, ReadOnly<1>,
+ NoCapture<1>]>;
def int_coro_end : Intrinsic<[], [llvm_ptr_ty, llvm_i1_ty], []>;
def int_coro_frame : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
Index: lib/IR/Verifier.cpp
===================================================================
--- lib/IR/Verifier.cpp
+++ lib/IR/Verifier.cpp
@@ -3873,7 +3873,7 @@
default:
break;
case Intrinsic::coro_id: {
- auto *InfoArg = CS.getArgOperand(2)->stripPointerCasts();
+ auto *InfoArg = CS.getArgOperand(3)->stripPointerCasts();
if (isa(InfoArg))
break;
auto *GV = dyn_cast(InfoArg);
Index: lib/Transforms/Coroutines/CoroEarly.cpp
===================================================================
--- lib/Transforms/Coroutines/CoroEarly.cpp
+++ lib/Transforms/Coroutines/CoroEarly.cpp
@@ -81,6 +81,7 @@
if (CII->getInfo().isPreSplit()) {
F.addFnAttr(CORO_PRESPLIT_ATTR, UNPREPARED_FOR_SPLIT);
setCannotDuplicate(CII);
+ CII->setCoroutineSelf();
}
}
break;
Index: lib/Transforms/Coroutines/CoroElide.cpp
===================================================================
--- lib/Transforms/Coroutines/CoroElide.cpp
+++ lib/Transforms/Coroutines/CoroElide.cpp
@@ -35,6 +35,7 @@
Lowerer(Module &M) : LowererBase(M) {}
void elideHeapAllocations(Function *F, Type *FrameTy, AAResults &AA);
+ bool shouldElide() const;
bool processCoroId(CoroIdInst *, AAResults &AA);
};
} // end anonymous namespace
@@ -122,14 +123,6 @@
CA->eraseFromParent();
}
- // To suppress deallocation code, we replace all llvm.coro.free intrinsics
- // associated with this coro.begin with null constant.
- auto *NullPtr = ConstantPointerNull::get(Type::getInt8PtrTy(C));
- for (auto *CF : CoroFrees) {
- CF->replaceAllUsesWith(NullPtr);
- CF->eraseFromParent();
- }
-
// FIXME: Design how to transmit alignment information for every alloca that
// is spilled into the coroutine frame and recreate the alignment information
// here. Possibly we will need to do a mini SROA here and break the coroutine
@@ -148,9 +141,50 @@
removeTailCallAttribute(Frame, AA);
}
+// This function is used to check whether a terminator causes control flow to
+// leaves the function in some way.
+static bool returnsOrUnwindsToCaller(TerminatorInst *T) {
+ if (isa(T) || isa(T))
+ return true;
+
+ if (auto *CR = dyn_cast(T))
+ return CR->unwindsToCaller();
+ if (auto *CS = dyn_cast(T))
+ return CS->unwindsToCaller();
+
+ return false;
+}
+
+bool Lowerer::shouldElide() const {
+ // If no CoroAllocs, we cannot suppress allocation, so elision is not
+ // possible.
+ if (CoroAllocs.empty())
+ return false;
+
+ // Check that for every coro.begin there is a coro.destroy directly
+ // referencing the SSA value of that coro.begin. If the value escaped, then
+ // coro.destroy would have been referencing a memory location storing that
+ // value and not the virtual register.
+
+ SmallPtrSet ReferencedCoroBegins;
+
+ for (CoroSubFnInst* DA : DestroyAddr) {
+ if (auto *CB = dyn_cast(DA->getFrame()))
+ ReferencedCoroBegins.insert(CB);
+ else
+ return false;
+ }
+
+ // If size of the set is the same as total number of CoroBegins, means we
+ // found a coro.free or coro.destroy mentioning a coro.begin and we can
+ // perform heap elision.
+ return ReferencedCoroBegins.size() == CoroBegins.size();
+}
+
bool Lowerer::processCoroId(CoroIdInst *CoroId, AAResults &AA) {
CoroBegins.clear();
CoroAllocs.clear();
+ CoroFrees.clear();
ResumeAddr.clear();
DestroyAddr.clear();
@@ -160,6 +194,8 @@
CoroBegins.push_back(CB);
else if (auto *CA = dyn_cast(U))
CoroAllocs.push_back(CA);
+ else if (auto *CF = dyn_cast(U))
+ CoroFrees.push_back(CF);
}
// Collect all coro.subfn.addrs associated with coro.begin.
@@ -191,27 +227,20 @@
replaceWithConstant(ResumeAddrConstant, ResumeAddr);
- if (DestroyAddr.empty())
- return true;
+ bool ShouldElide = shouldElide();
- auto *DestroyAddrConstant =
- ConstantExpr::getExtractValue(Resumers, CoroSubFnInst::DestroyIndex);
+ auto *DestroyAddrConstant = ConstantExpr::getExtractValue(
+ Resumers,
+ ShouldElide ? CoroSubFnInst::CleanupIndex : CoroSubFnInst::DestroyIndex);
replaceWithConstant(DestroyAddrConstant, DestroyAddr);
- // If there is a coro.alloc that llvm.coro.id refers to, we have the ability
- // to suppress dynamic allocation.
- if (!CoroAllocs.empty()) {
- // FIXME: The check above is overly lax. It only checks for whether we have
- // an ability to elide heap allocations, not whether it is safe to do so.
- // We need to do something like:
- // If for every exit from the function where coro.begin is
- // live, there is a coro.free or coro.destroy dominating that exit block,
- // then it is safe to elide heap allocation, since the lifetime of coroutine
- // is fully enclosed in its caller.
+ if (ShouldElide) {
auto *FrameTy = getFrameType(cast(ResumeAddrConstant));
elideHeapAllocations(CoroId->getFunction(), FrameTy, AA);
+ coro::replaceCoroFree(CoroId, /*Elide=*/true);
}
+
return true;
}
@@ -262,21 +291,21 @@
Changed = replaceDevirtTrigger(F);
L->CoroIds.clear();
- L->CoroFrees.clear();
- // Collect all PostSplit coro.ids and all coro.free.
+ // Collect all PostSplit coro.ids.
for (auto &I : instructions(F))
- if (auto *CF = dyn_cast(&I))
- L->CoroFrees.push_back(CF);
- else if (auto *CII = dyn_cast(&I))
+ if (auto *CII = dyn_cast(&I))
if (CII->getInfo().isPostSplit())
- L->CoroIds.push_back(CII);
+ // If it is the coroutine itself, don't touch it.
+ if (CII->getCoroutine() != CII->getFunction())
+ L->CoroIds.push_back(CII);
// If we did not find any coro.id, there is nothing to do.
if (L->CoroIds.empty())
return Changed;
AAResults &AA = getAnalysis().getAAResults();
+
for (auto *CII : L->CoroIds)
Changed |= L->processCoroId(CII, AA);
@@ -284,7 +313,6 @@
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired();
- AU.setPreservesCFG();
}
};
}
Index: lib/Transforms/Coroutines/CoroFrame.cpp
===================================================================
--- lib/Transforms/Coroutines/CoroFrame.cpp
+++ lib/Transforms/Coroutines/CoroFrame.cpp
@@ -24,6 +24,7 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/circular_raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -182,8 +183,9 @@
}
// Iterate propagating consumes and kills until they stop changing
- int Iteration = 0; (void)Iteration;
-
+ int Iteration = 0;
+ (void)Iteration;
+
bool Changed;
do {
DEBUG(dbgs() << "iteration " << ++Iteration);
@@ -307,10 +309,10 @@
/*IsVarArgs=*/false);
auto *FnPtrTy = FnTy->getPointerTo();
- if (Shape.CoroSuspends.size() > UINT32_MAX)
- report_fatal_error("Cannot handle coroutine with this many suspend points");
+ // Figure out how wide should be an integer type storing the suspend index.
+ unsigned IndexBits = std::max(1U, Log2_64_Ceil(Shape.CoroSuspends.size()));
- SmallVector Types{FnPtrTy, FnPtrTy, Type::getInt32Ty(C)};
+ SmallVector Types{FnPtrTy, FnPtrTy, Type::getIntNTy(C, IndexBits)};
Value *CurrentDef = nullptr;
// Create an entry for every spilled value.
@@ -333,13 +335,6 @@
return FrameTy;
}
-// Returns the index of the last non-spill field in the coroutine frame.
-// 2 - if there is no coroutine promise specified or 3, if there is.
-static unsigned getLastNonSpillIndex(coro::Shape &Shape) {
- // TODO: Add support for coroutine promise.
- return 2;
-}
-
// Replace all alloca and SSA values that are accessed across suspend points
// with GetElementPointer from coroutine frame + loads and stores. Create an
// AllocaSpillBB that will become the new entry block for the resume parts of
@@ -373,7 +368,7 @@
Value *CurrentValue = nullptr;
BasicBlock *CurrentBlock = nullptr;
Value *CurrentReload = nullptr;
- unsigned Index = getLastNonSpillIndex(Shape);
+ unsigned Index = coro::Shape::LastKnownField;
// We need to keep track of any allocas that need "spilling"
// since they will live in the coroutine frame now, all access to them
@@ -622,6 +617,10 @@
// frame.
if (isa(&I))
continue;
+ // A token returned CoroIdInst is used to tie together structural intrinsics
+ // in a coroutine. It should not be saved to the coroutine frame.
+ if (isa(&I))
+ continue;
for (User *U : I.users())
if (Checker.isDefinitionAcrossSuspend(I, U)) {
Index: lib/Transforms/Coroutines/CoroInstr.h
===================================================================
--- lib/Transforms/Coroutines/CoroInstr.h
+++ lib/Transforms/Coroutines/CoroInstr.h
@@ -11,7 +11,7 @@
// allows you to do things like:
//
// if (auto *SF = dyn_cast(Inst))
-// ... SF->getFrame() ...
+// ... SF->getFrame() ...
//
// All intrinsic function calls are instances of the call instruction, so these
// are all subclasses of the CallInst class. Note that none of these classes
@@ -37,6 +37,7 @@
RestartTrigger = -1,
ResumeIndex,
DestroyIndex,
+ CleanupIndex,
IndexLast,
IndexFirst = RestartTrigger
};
@@ -76,7 +77,8 @@
/// This represents the llvm.coro.alloc instruction.
class LLVM_LIBRARY_VISIBILITY CoroIdInst : public IntrinsicInst {
- enum { AlignArg, PromiseArg, InfoArg };
+ enum { AlignArg, PromiseArg, CoroutineArg, InfoArg };
+
public:
// Info argument of coro.id is
// fresh out of the frontend: null ;
@@ -118,6 +120,16 @@
void setInfo(Constant *C) { setArgOperand(InfoArg, C); }
+ Function *getCoroutine() const {
+ return cast(getArgOperand(CoroutineArg)->stripPointerCasts());
+ }
+ void setCoroutineSelf() {
+ assert(isa(getArgOperand(CoroutineArg)) &&
+ "Coroutine argument is already assigned");
+ auto *const Int8PtrTy = Type::getInt8PtrTy(getContext());
+ setArgOperand(CoroutineArg,
+ ConstantExpr::getBitCast(getFunction(), Int8PtrTy));
+ }
// Methods to support type inquiry through isa, cast, and dyn_cast:
static inline bool classof(const IntrinsicInst *I) {
@@ -142,7 +154,11 @@
/// This represents the llvm.coro.free instruction.
class LLVM_LIBRARY_VISIBILITY CoroFreeInst : public IntrinsicInst {
+ enum { IdArg, FrameArg };
+
public:
+ Value *getFrame() const { return getArgOperand(FrameArg); }
+
// Methods to support type inquiry through isa, cast, and dyn_cast:
static inline bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::coro_free;
@@ -157,9 +173,7 @@
enum { IdArg, MemArg };
public:
- CoroIdInst *getId() const {
- return cast(getArgOperand(IdArg));
- }
+ CoroIdInst *getId() const { return cast(getArgOperand(IdArg)); }
Value *getMem() const { return getArgOperand(MemArg); }
Index: lib/Transforms/Coroutines/CoroInternal.h
===================================================================
--- lib/Transforms/Coroutines/CoroInternal.h
+++ lib/Transforms/Coroutines/CoroInternal.h
@@ -46,6 +46,7 @@
bool declaresIntrinsics(Module &M, std::initializer_list);
void replaceAllCoroAllocs(CoroBeginInst *CB, bool Replacement);
void replaceAllCoroFrees(CoroBeginInst *CB, Value *Replacement);
+void replaceCoroFree(CoroIdInst *CoroId, bool Elide);
void updateCallGraph(Function &Caller, ArrayRef Funcs,
CallGraph &CG, CallGraphSCC &SCC);
@@ -71,9 +72,10 @@
// Field Indexes for known coroutine frame fields.
enum {
- ResumeField = 0,
- DestroyField = 1,
- IndexField = 2,
+ ResumeField,
+ DestroyField,
+ IndexField,
+ LastKnownField = IndexField
};
StructType *FrameTy;
@@ -82,6 +84,14 @@
SwitchInst* ResumeSwitch;
bool HasFinalSuspend;
+ IntegerType* getIndexType() const {
+ assert(FrameTy && "frame type not assigned");
+ return cast(FrameTy->getElementType(IndexField));
+ }
+ ConstantInt *getIndex(uint64_t Value) const {
+ return ConstantInt::get(getIndexType(), Value);
+ }
+
Shape() = default;
explicit Shape(Function &F) { buildFrom(F); }
void buildFrom(Function &F);
Index: lib/Transforms/Coroutines/CoroSplit.cpp
===================================================================
--- lib/Transforms/Coroutines/CoroSplit.cpp
+++ lib/Transforms/Coroutines/CoroSplit.cpp
@@ -64,7 +64,7 @@
uint32_t SuspendIndex = 0;
for (auto S : Shape.CoroSuspends) {
- ConstantInt *IndexVal = Builder.getInt32(SuspendIndex);
+ ConstantInt *IndexVal = Shape.getIndex(SuspendIndex);
// Replace CoroSave with a store to Index:
// %index.addr = getelementptr %f.frame... (index field number)
@@ -140,7 +140,6 @@
// resume or cleanup pass for every suspend point.
static Function *createClone(Function &F, Twine Suffix, coro::Shape &Shape,
BasicBlock *ResumeEntry, int8_t FnIndex) {
-
Module *M = F.getParent();
auto *FrameTy = Shape.FrameTy;
auto *FnPtrTy = cast(FrameTy->getElementType(0));
@@ -207,7 +206,11 @@
OldVFrame->replaceAllUsesWith(NewVFrame);
// Replace coro suspend with the appropriate resume index.
- auto *NewValue = Builder.getInt8(FnIndex);
+ // Replacing coro.suspend with (0) will result in control flow proceeding to
+ // a resume label associated with a suspend point, replacing it with (1) will
+ // result in control flow proceeding to a cleanup label associated with this
+ // suspend point.
+ auto *NewValue = Builder.getInt8(FnIndex ? 1 : 0);
for (CoroSuspendInst *CS : Shape.CoroSuspends) {
auto *MappedCS = cast(VMap[CS]);
MappedCS->replaceAllUsesWith(NewValue);
@@ -219,11 +222,23 @@
// FIXME: coming in upcoming patches:
// replaceUnwindCoroEnds(Shape.CoroEnds, VMap);
- // Store the address of this clone in the coroutine frame.
- Builder.SetInsertPoint(Shape.FramePtr->getNextNode());
- auto *G = Builder.CreateConstInBoundsGEP2_32(Shape.FrameTy, Shape.FramePtr, 0,
- FnIndex, "fn.addr");
- Builder.CreateStore(NewF, G);
+ // We only store resume(0) and destroy(1) addresses in the coroutine frame.
+ // The cleanup(2) clone is only used during devirtualization when coroutine is
+ // eligible for heap elision and thus does not participate in indirect calls
+ // and does not need its address to be stored in the coroutine frame.
+ if (FnIndex < 2) {
+ // Store the address of this clone in the coroutine frame.
+ Builder.SetInsertPoint(Shape.FramePtr->getNextNode());
+ auto *G = Builder.CreateConstInBoundsGEP2_32(Shape.FrameTy, Shape.FramePtr,
+ 0, FnIndex, "fn.addr");
+ Builder.CreateStore(NewF, G);
+ }
+
+ // Eliminate coro.free from the clones, replacing it with 'null' in cleanup,
+ // to suppress deallocation code.
+ coro::replaceCoroFree(cast(VMap[Shape.CoroBegin->getId()]),
+ /*Elide=*/FnIndex == 2);
+
NewF->setCallingConv(CallingConv::Fast);
return NewF;
@@ -303,10 +318,12 @@
return;
buildCoroutineFrame(F, Shape);
+ replaceFrameSize(Shape);
auto *ResumeEntry = createResumeEntryBlock(F, Shape);
- auto *ResumeClone = createClone(F, ".resume", Shape, ResumeEntry, 0);
- auto *DestroyClone = createClone(F, ".destroy", Shape, ResumeEntry, 1);
+ auto ResumeClone = createClone(F, ".resume", Shape, ResumeEntry, 0);
+ auto DestroyClone = createClone(F, ".destroy", Shape, ResumeEntry, 1);
+ auto CleanupClone = createClone(F, ".cleanup", Shape, ResumeEntry, 2);
// We no longer need coro.end in F.
removeCoroEnds(Shape);
@@ -314,11 +331,10 @@
postSplitCleanup(F);
postSplitCleanup(*ResumeClone);
postSplitCleanup(*DestroyClone);
+ postSplitCleanup(*CleanupClone);
- replaceFrameSize(Shape);
-
- setCoroInfo(F, Shape.CoroBegin, {ResumeClone, DestroyClone});
- coro::updateCallGraph(F, {ResumeClone, DestroyClone}, CG, SCC);
+ setCoroInfo(F, Shape.CoroBegin, {ResumeClone, DestroyClone, CleanupClone});
+ coro::updateCallGraph(F, {ResumeClone, DestroyClone, CleanupClone}, CG, SCC);
}
// When we see the coroutine the first time, we insert an indirect call to a
Index: lib/Transforms/Coroutines/Coroutines.cpp
===================================================================
--- lib/Transforms/Coroutines/Coroutines.cpp
+++ lib/Transforms/Coroutines/Coroutines.cpp
@@ -127,6 +127,27 @@
return false;
}
+// Replace all coro.frees associated with the provided CoroId either with 'null'
+// if Elide is true and with its frame parameter otherwise.
+void coro::replaceCoroFree(CoroIdInst *CoroId, bool Elide) {
+ SmallVector CoroFrees;
+ for (User *U : CoroId->users())
+ if (auto CF = dyn_cast(U))
+ CoroFrees.push_back(CF);
+
+ if (CoroFrees.empty())
+ return;
+
+ Value *Replacement =
+ Elide ? ConstantPointerNull::get(Type::getInt8PtrTy(CoroId->getContext()))
+ : CoroFrees.front()->getFrame();
+
+ for (CoroFreeInst *CF : CoroFrees) {
+ CF->replaceAllUsesWith(Replacement);
+ CF->eraseFromParent();
+ }
+}
+
// FIXME: This code is stolen from CallGraph::addToCallGraph(Function *F), which
// happens to be private. It is better for this functionality exposed by the
// CallGraph.
@@ -286,5 +307,5 @@
// Canonicalize coro.suspend by inserting a coro.save if needed.
for (CoroSuspendInst *CS : CoroSuspends)
if (!CS->getCoroSave())
- createCoroSave(CoroBegin, CoroSuspends.back());
+ createCoroSave(CoroBegin, CS);
}
Index: test/Transforms/Coroutines/coro-elide.ll
===================================================================
--- test/Transforms/Coroutines/coro-elide.ll
+++ test/Transforms/Coroutines/coro-elide.ll
@@ -23,6 +23,7 @@
define i8* @f() {
entry:
%id = call token @llvm.coro.id(i32 0, i8* null,
+ i8* bitcast (i8*()* @f to i8*),
i8* bitcast ([2 x void (i8*)*]* @f.resumers to i8*))
%hdl = call i8* @llvm.coro.begin(token %id, i8* null)
ret i8* %hdl
@@ -31,10 +32,9 @@
; CHECK-LABEL: @callResume(
define void @callResume() {
entry:
-; CHECK: call i8* @llvm.coro.begin
%hdl = call i8* @f()
-; CHECK-NEXT: call void @print(i32 0)
+; CHECK: call void @print(i32 0)
%0 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0)
%1 = bitcast i8* %0 to void (i8*)*
call fastcc void %1(i8* %hdl)
@@ -51,10 +51,9 @@
; CHECK-LABEL: @eh(
define void @eh() personality i8* null {
entry:
-; CHECK: call i8* @llvm.coro.begin
%hdl = call i8* @f()
-; CHECK-NEXT: call void @print(i32 0)
+; CHECK: call void @print(i32 0)
%0 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0)
%1 = bitcast i8* %0 to void (i8*)*
invoke void %1(i8* %hdl)
@@ -71,7 +70,7 @@
; no devirtualization here, since coro.begin info parameter is null
define void @no_devirt_info_null() {
entry:
- %id = call token @llvm.coro.id(i32 0, i8* null, i8* null)
+ %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%hdl = call i8* @llvm.coro.begin(token %id, i8* null)
; CHECK: call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0)
@@ -107,7 +106,7 @@
ret void
}
-declare token @llvm.coro.id(i32, i8*, i8*)
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
declare i8* @llvm.coro.begin(token, i8*)
declare i8* @llvm.coro.frame()
declare i8* @llvm.coro.subfn.addr(i8*, i8)
Index: test/Transforms/Coroutines/coro-heap-elide.ll
===================================================================
--- test/Transforms/Coroutines/coro-heap-elide.ll
+++ test/Transforms/Coroutines/coro-heap-elide.ll
@@ -11,19 +11,21 @@
declare fastcc void @f.resume(%f.frame*)
declare fastcc void @f.destroy(%f.frame*)
+declare fastcc void @f.cleanup(%f.frame*)
declare void @may_throw()
declare i8* @CustomAlloc(i32)
declare void @CustomFree(i8*)
-@f.resumers = internal constant
- [2 x void (%f.frame*)*] [void (%f.frame*)* @f.resume, void (%f.frame*)* @f.destroy]
+@f.resumers = internal constant [3 x void (%f.frame*)*]
+ [void (%f.frame*)* @f.resume, void (%f.frame*)* @f.destroy, void (%f.frame*)* @f.cleanup]
; a coroutine start function
define i8* @f() personality i8* null {
entry:
%id = call token @llvm.coro.id(i32 0, i8* null,
- i8* bitcast ([2 x void (%f.frame*)*]* @f.resumers to i8*))
+ i8* bitcast (i8*()* @f to i8*),
+ i8* bitcast ([3 x void (%f.frame*)*]* @f.resumers to i8*))
%need.dyn.alloc = call i1 @llvm.coro.alloc(token %id)
br i1 %need.dyn.alloc, label %dyn.alloc, label %coro.begin
dyn.alloc:
@@ -39,7 +41,7 @@
ehcleanup:
%tok = cleanuppad within none []
- %mem = call i8* @llvm.coro.free(i8* %hdl)
+ %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
%need.dyn.free = icmp ne i8* %mem, null
br i1 %need.dyn.free, label %dyn.free, label %if.end
dyn.free:
@@ -62,7 +64,7 @@
; CHECK-NOT: tail call void @bar(
; CHECK: call void @bar(
tail call void @bar(i8* %hdl)
-; CHECK: tail call void @bar(
+; CHECK: tail call void @bar(
tail call void @bar(i8* null)
; CHECK-NEXT: call fastcc void bitcast (void (%f.frame*)* @f.resume to void (i8*)*)(i8* %vFrame)
@@ -70,7 +72,7 @@
%1 = bitcast i8* %0 to void (i8*)*
call fastcc void %1(i8* %hdl)
-; CHECK-NEXT: call fastcc void bitcast (void (%f.frame*)* @f.destroy to void (i8*)*)(i8* %vFrame)
+; CHECK-NEXT: call fastcc void bitcast (void (%f.frame*)* @f.cleanup to void (i8*)*)(i8* %vFrame)
%2 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 1)
%3 = bitcast i8* %2 to void (i8*)*
call fastcc void %3(i8* %hdl)
@@ -84,7 +86,8 @@
define i8* @f_no_elision() personality i8* null {
entry:
%id = call token @llvm.coro.id(i32 0, i8* null,
- i8* bitcast ([2 x void (%f.frame*)*]* @f.resumers to i8*))
+ i8* bitcast (i8*()* @f_no_elision to i8*),
+ i8* bitcast ([3 x void (%f.frame*)*]* @f.resumers to i8*))
%alloc = call i8* @CustomAlloc(i32 4)
%hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
ret i8* %hdl
@@ -116,9 +119,9 @@
ret void
}
-declare token @llvm.coro.id(i32, i8*, i8*)
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
declare i1 @llvm.coro.alloc(token)
-declare i8* @llvm.coro.free(i8*)
+declare i8* @llvm.coro.free(token, i8*)
declare i8* @llvm.coro.begin(token, i8*)
declare i8* @llvm.coro.frame(token)
declare i8* @llvm.coro.subfn.addr(i8*, i8)
Index: test/Transforms/Coroutines/coro-split-00.ll
===================================================================
--- test/Transforms/Coroutines/coro-split-00.ll
+++ test/Transforms/Coroutines/coro-split-00.ll
@@ -3,7 +3,7 @@
define i8* @f() "coroutine.presplit"="1" {
entry:
- %id = call token @llvm.coro.id(i32 0, i8* null, i8* null)
+ %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%size = call i32 @llvm.coro.size.i32()
%alloc = call i8* @malloc(i32 %size)
%hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
@@ -16,7 +16,7 @@
br label %cleanup
cleanup:
- %mem = call i8* @llvm.coro.free(i8* %hdl)
+ %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
call void @free(i8* %mem)
br label %suspend
suspend:
@@ -45,13 +45,13 @@
; CHECK: call void @free(
; CHECK: ret void
-declare i8* @llvm.coro.free(i8*)
+declare i8* @llvm.coro.free(token, i8*)
declare i32 @llvm.coro.size.i32()
declare i8 @llvm.coro.suspend(token, i1)
declare void @llvm.coro.resume(i8*)
declare void @llvm.coro.destroy(i8*)
-declare token @llvm.coro.id(i32, i8*, i8*)
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
declare i8* @llvm.coro.alloc(token)
declare i8* @llvm.coro.begin(token, i8*)
declare void @llvm.coro.end(i8*, i1)
Index: test/Transforms/Coroutines/coro-split-01.ll
===================================================================
--- test/Transforms/Coroutines/coro-split-01.ll
+++ test/Transforms/Coroutines/coro-split-01.ll
@@ -3,7 +3,7 @@
define i8* @f() {
entry:
- %id = call token @llvm.coro.id(i32 0, i8* null, i8* null)
+ %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%need.dyn.alloc = call i1 @llvm.coro.alloc(token %id)
br i1 %need.dyn.alloc, label %dyn.alloc, label %coro.begin
dyn.alloc:
@@ -22,7 +22,7 @@
br label %cleanup
cleanup:
- %mem = call i8* @llvm.coro.free(i8* %hdl)
+ %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
call void @free(i8* %mem)
br label %suspend
suspend:
@@ -35,22 +35,18 @@
call void @llvm.coro.resume(i8* %hdl)
ret i32 0
; CHECK-LABEL: @main(
-; CHECK: call i8* @malloc
-; CHECK-NOT: call void @free
; CHECK: call void @print(i32 0)
-; CHECK-NOT: call void @free
; CHECK: call void @print(i32 1)
-; CHECK: call void @free
; CHECK: ret i32 0
}
-declare i8* @llvm.coro.free(i8*)
+declare i8* @llvm.coro.free(token, i8*)
declare i32 @llvm.coro.size.i32()
declare i8 @llvm.coro.suspend(token, i1)
declare void @llvm.coro.resume(i8*)
declare void @llvm.coro.destroy(i8*)
-declare token @llvm.coro.id(i32, i8*, i8*)
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
declare i1 @llvm.coro.alloc(token)
declare i8* @llvm.coro.begin(token, i8*)
declare void @llvm.coro.end(i8*, i1)
Index: test/Transforms/Coroutines/ex0.ll
===================================================================
--- test/Transforms/Coroutines/ex0.ll
+++ test/Transforms/Coroutines/ex0.ll
@@ -3,7 +3,7 @@
define i8* @f(i32 %n) {
entry:
- %id = call token @llvm.coro.id(i32 0, i8* null, i8* null)
+ %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%size = call i32 @llvm.coro.size.i32()
%alloc = call i8* @malloc(i32 %size)
%hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
@@ -20,7 +20,7 @@
br label %loop
cleanup:
- %mem = call i8* @llvm.coro.free(i8* %hdl)
+ %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
call void @free(i8* %mem)
br label %suspend
suspend:
@@ -43,9 +43,9 @@
; CHECK: ret i32 0
}
-declare token @llvm.coro.id(i32, i8*, i8*)
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
declare i8* @llvm.coro.alloc(token)
-declare i8* @llvm.coro.free(i8*)
+declare i8* @llvm.coro.free(token, i8*)
declare i32 @llvm.coro.size.i32()
declare i8 @llvm.coro.suspend(token, i1)
declare void @llvm.coro.resume(i8*)
Index: test/Transforms/Coroutines/ex1.ll
===================================================================
--- test/Transforms/Coroutines/ex1.ll
+++ test/Transforms/Coroutines/ex1.ll
@@ -3,7 +3,7 @@
define i8* @f(i32 %n) {
entry:
- %id = call token @llvm.coro.id(i32 0, i8* null, i8* null)
+ %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%size = call i32 @llvm.coro.size.i32()
%alloc = call i8* @malloc(i32 %size)
%hdl = call noalias i8* @llvm.coro.begin(token %id, i8* %alloc)
@@ -16,7 +16,7 @@
switch i8 %0, label %suspend [i8 0, label %loop
i8 1, label %cleanup]
cleanup:
- %mem = call i8* @llvm.coro.free(i8* %hdl)
+ %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
call void @free(i8* %mem)
br label %suspend
suspend:
@@ -43,11 +43,11 @@
declare void @free(i8*)
declare void @print(i32)
-declare token @llvm.coro.id(i32, i8*, i8*)
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
declare i32 @llvm.coro.size.i32()
declare i8* @llvm.coro.begin(token, i8*)
declare i8 @llvm.coro.suspend(token, i1)
-declare i8* @llvm.coro.free(i8*)
+declare i8* @llvm.coro.free(token, i8*)
declare void @llvm.coro.end(i8*, i1)
declare void @llvm.coro.resume(i8*)
Index: test/Transforms/Coroutines/ex2.ll
===================================================================
--- /dev/null
+++ test/Transforms/Coroutines/ex2.ll
@@ -0,0 +1,63 @@
+; Second example from Doc/Coroutines.rst (custom alloc and free functions)
+; RUN: opt < %s -O2 -enable-coroutines -S | FileCheck %s
+
+define i8* @f(i32 %n) {
+entry:
+ %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+ %need.dyn.alloc = call i1 @llvm.coro.alloc(token %id)
+ br i1 %need.dyn.alloc, label %dyn.alloc, label %coro.begin
+dyn.alloc:
+ %size = call i32 @llvm.coro.size.i32()
+ %alloc = call i8* @CustomAlloc(i32 %size)
+ br label %coro.begin
+coro.begin:
+ %phi = phi i8* [ null, %entry ], [ %alloc, %dyn.alloc ]
+ %hdl = call noalias i8* @llvm.coro.begin(token %id, i8* %phi)
+ br label %loop
+loop:
+ %n.val = phi i32 [ %n, %coro.begin ], [ %inc, %loop ]
+ %inc = add nsw i32 %n.val, 1
+ call void @print(i32 %n.val)
+ %0 = call i8 @llvm.coro.suspend(token none, i1 false)
+ switch i8 %0, label %suspend [i8 0, label %loop
+ i8 1, label %cleanup]
+cleanup:
+ %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+ %need.dyn.free = icmp ne i8* %mem, null
+ br i1 %need.dyn.free, label %dyn.free, label %suspend
+dyn.free:
+ call void @CustomFree(i8* %mem)
+ br label %suspend
+suspend:
+ call void @llvm.coro.end(i8* %hdl, i1 false)
+ ret i8* %hdl
+}
+
+; CHECK-LABEL: @main
+define i32 @main() {
+entry:
+ %hdl = call i8* @f(i32 4)
+ call void @llvm.coro.resume(i8* %hdl)
+ call void @llvm.coro.resume(i8* %hdl)
+ call void @llvm.coro.destroy(i8* %hdl)
+ ret i32 0
+; CHECK: call void @print(i32 4)
+; CHECK-NEXT: call void @print(i32 5)
+; CHECK-NEXT: call void @print(i32 6)
+; CHECK-NEXT: ret i32 0
+}
+
+declare i8* @CustomAlloc(i32)
+declare void @CustomFree(i8*)
+declare void @print(i32)
+
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
+declare i1 @llvm.coro.alloc(token)
+declare i32 @llvm.coro.size.i32()
+declare i8* @llvm.coro.begin(token, i8*)
+declare i8 @llvm.coro.suspend(token, i1)
+declare i8* @llvm.coro.free(token, i8*)
+declare void @llvm.coro.end(i8*, i1)
+
+declare void @llvm.coro.resume(i8*)
+declare void @llvm.coro.destroy(i8*)
Index: test/Transforms/Coroutines/ex3.ll
===================================================================
--- /dev/null
+++ test/Transforms/Coroutines/ex3.ll
@@ -0,0 +1,60 @@
+; Third example from Doc/Coroutines.rst (two suspend points)
+; RUN: opt < %s -O2 -enable-coroutines -S | FileCheck %s
+
+define i8* @f(i32 %n) {
+entry:
+ %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+ %size = call i32 @llvm.coro.size.i32()
+ %alloc = call i8* @malloc(i32 %size)
+ %hdl = call noalias i8* @llvm.coro.begin(token %id, i8* %alloc)
+ br label %loop
+loop:
+ %n.val = phi i32 [ %n, %entry ], [ %inc, %loop.resume ]
+ call void @print(i32 %n.val) #4
+ %0 = call i8 @llvm.coro.suspend(token none, i1 false)
+ switch i8 %0, label %suspend [i8 0, label %loop.resume
+ i8 1, label %cleanup]
+loop.resume:
+ %inc = add nsw i32 %n.val, 1
+ %sub = xor i32 %n.val, -1
+ call void @print(i32 %sub)
+ %1 = call i8 @llvm.coro.suspend(token none, i1 false)
+ switch i8 %1, label %suspend [i8 0, label %loop
+ i8 1, label %cleanup]
+cleanup:
+ %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+ call void @free(i8* %mem)
+ br label %suspend
+suspend:
+ call void @llvm.coro.end(i8* %hdl, i1 false)
+ ret i8* %hdl
+}
+
+; CHECK-LABEL: @main
+define i32 @main() {
+entry:
+ %hdl = call i8* @f(i32 4)
+ call void @llvm.coro.resume(i8* %hdl)
+ call void @llvm.coro.resume(i8* %hdl)
+ call void @llvm.coro.destroy(i8* %hdl)
+ ret i32 0
+; CHECK: call void @print(i32 4)
+; CHECK-NEXT: call void @print(i32 -5)
+; CHECK-NEXT: call void @print(i32 5)
+; CHECK: ret i32 0
+}
+
+declare i8* @malloc(i32)
+declare void @free(i8*)
+declare void @print(i32)
+
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
+declare i1 @llvm.coro.alloc(token)
+declare i32 @llvm.coro.size.i32()
+declare i8* @llvm.coro.begin(token, i8*)
+declare i8 @llvm.coro.suspend(token, i1)
+declare i8* @llvm.coro.free(token, i8*)
+declare void @llvm.coro.end(i8*, i1)
+
+declare void @llvm.coro.resume(i8*)
+declare void @llvm.coro.destroy(i8*)
Index: test/Transforms/Coroutines/restart-trigger.ll
===================================================================
--- test/Transforms/Coroutines/restart-trigger.ll
+++ test/Transforms/Coroutines/restart-trigger.ll
@@ -8,7 +8,7 @@
; CHECK-NEXT: CoroSplit: Processing coroutine 'f' state: 1
define void @f() {
- %id = call token @llvm.coro.id(i32 0, i8* null, i8* null)
+ %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%size = call i32 @llvm.coro.size.i32()
%alloc = call i8* @malloc(i32 %size)
%hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
@@ -21,7 +21,7 @@
br label %cleanup
cleanup:
- %mem = call i8* @llvm.coro.free(i8* %hdl)
+ %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
call void @free(i8* %mem)
br label %suspend
suspend:
@@ -29,9 +29,9 @@
ret void
}
-declare token @llvm.coro.id(i32, i8*, i8*)
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
declare i8* @llvm.coro.begin(token, i8*)
-declare i8* @llvm.coro.free(i8*)
+declare i8* @llvm.coro.free(token, i8*)
declare i32 @llvm.coro.size.i32()
declare i8 @llvm.coro.suspend(token, i1)
declare void @llvm.coro.resume(i8*)