Index: lib/Transforms/Coroutines/CoroElide.cpp =================================================================== --- lib/Transforms/Coroutines/CoroElide.cpp +++ lib/Transforms/Coroutines/CoroElide.cpp @@ -39,11 +39,29 @@ bool runOnFunction(Function &F) override; void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); AU.setPreservesCFG(); } }; } +char CoroElide::ID = 0; +INITIALIZE_PASS_BEGIN( + CoroElide, "coro-elide", + "Coroutine frame allocation elision and indirect calls replacement", false, + false) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_END( + CoroElide, "coro-elide", + "Coroutine frame allocation elision and indirect calls replacement", false, + false) + +Pass *llvm::createCoroElidePass() { return new CoroElide(); } + +//===----------------------------------------------------------------------===// +// Implementation +//===----------------------------------------------------------------------===// + // Go through the list of coro.subfn.addr intrinsics and replace them with the // provided constant. static void replaceWithConstant(Constant *Value, @@ -68,10 +86,75 @@ replaceAndRecursivelySimplify(I, Value); } +// See if any operand of the call instruction references the coroutine frame. +static bool operandReferences(CallInst *CI, AllocaInst *Frame, AAResults &AA) { + for (Value *Op : CI->operand_values()) + if (AA.alias(Op, Frame) != NoAlias) + return true; + return false; +} + +// Look for any tail calls referencing the coroutine frame and remove tail +// attribute from them, since now coroutine frame resides on the stack and tail +// call implies that the function does not references anything on the stack. +static void removeTailCallAttribute(AllocaInst *Frame, AAResults &AA) { + Function &F = *Frame->getFunction(); + for (Instruction &I : instructions(F)) + if (auto *Call = dyn_cast(&I)) + if (Call->isTailCall() && operandReferences(Call, Frame, AA)) + Call->setTailCall(false); +} + +// Given a resume function @f.resume(%f.frame* %frame), returns %f.frame type. +static Type *getFrameType(Function *Resume) { + auto ArgType = Resume->getArgumentList().front().getType(); + return cast(ArgType)->getElementType(); +} + +// Finds first non alloca instruction in the entry block of a function. +static Instruction *getFirstNonAllocaInTheEntryBlock(Function *F) { + for (Instruction &I : F->getEntryBlock()) + if (!isa(&I)) + return &I; + llvm_unreachable("no terminator in the entry block"); +} + +// To elide heap allocations we need to suppress code blocks guarded by +// llvm.coro.alloc and llvm.coro.free instructions. +static void elideHeapAllocations(CoroBeginInst *CoroBegin, Type *FrameTy, + CoroAllocInst *AllocInst, AAResults &AA) { + LLVMContext &C = CoroBegin->getContext(); + auto *InsertPt = getFirstNonAllocaInTheEntryBlock(CoroBegin->getFunction()); + + auto *Frame = new AllocaInst(FrameTy, "", InsertPt); + auto *FrameVoidPtr = + new BitCastInst(Frame, Type::getInt8PtrTy(C), "vFrame", InsertPt); + + // Replacing llvm.coro.alloc with non-null value will suppress dynamic + // allocation as it is expected for the frontend to generate the code that + // looks like: + // mem = coro.alloc(); + // if (!mem) mem = malloc(coro.size()); + // coro.begin(mem, ...) + AllocInst->replaceAllUsesWith(FrameVoidPtr); + AllocInst->eraseFromParent(); + + // To suppress deallocation code, we replace all llvm.coro.free intrinsics + // associated with this coro.begin with null constant. + auto *NullPtr = ConstantPointerNull::get(Type::getInt8PtrTy(C)); + coro::replaceCoroFree(CoroBegin, NullPtr); + CoroBegin->replaceAllUsesWith(FrameVoidPtr); + CoroBegin->eraseFromParent(); + + // Since now coroutine frame lives on the stack we need to make sure that + // any tail call referencing it, must be made non-tail call. + removeTailCallAttribute(Frame, AA); +} + // See if there are any coro.subfn.addr intrinsics directly referencing // the coro.begin. If found, replace them with an appropriate coroutine // subfunction associated with that coro.begin. -static bool replaceIndirectCalls(CoroBeginInst *CoroBegin) { +static bool replaceIndirectCalls(CoroBeginInst *CoroBegin, AAResults &AA) { SmallVector ResumeAddr; SmallVector DestroyAddr; @@ -99,11 +182,29 @@ "of coroutine subfunctions"); auto *ResumeAddrConstant = ConstantExpr::getExtractValue(Resumers, CoroSubFnInst::ResumeIndex); + replaceWithConstant(ResumeAddrConstant, ResumeAddr); + + if (DestroyAddr.empty()) + return true; + auto *DestroyAddrConstant = ConstantExpr::getExtractValue(Resumers, CoroSubFnInst::DestroyIndex); - - replaceWithConstant(ResumeAddrConstant, ResumeAddr); replaceWithConstant(DestroyAddrConstant, DestroyAddr); + + // If llvm.coro.begin refers to llvm.coro.alloc, we can elide the allocation. + auto *AllocInst = CoroBegin->getAlloc(); + + // FIXME: Do more sophisticated check for when we can do heap elision. + // Something like: for every exit from the function where coro.begin is live, + // there is a coro.free or coro.destroy that dominates that exit block. + // At the moment we simply assume that if we found at least one coro.destroy + // referencing the coro.begin, we can elide the heap allocation. + + if (AllocInst) { + auto FrameTy = getFrameType(cast(ResumeAddrConstant)); + elideHeapAllocations(CoroBegin, FrameTy, AllocInst, AA); + } + return true; } @@ -143,20 +244,9 @@ if (CoroBegins.empty()) return Changed; + AAResults &AA = getAnalysis().getAAResults(); for (auto *CB : CoroBegins) - Changed |= replaceIndirectCalls(CB); + Changed |= replaceIndirectCalls(CB, AA); return Changed; } - -char CoroElide::ID = 0; -INITIALIZE_PASS_BEGIN( - CoroElide, "coro-elide", - "Coroutine frame allocation elision and indirect calls replacement", false, - false) -INITIALIZE_PASS_END( - CoroElide, "coro-elide", - "Coroutine frame allocation elision and indirect calls replacement", false, - false) - -Pass *llvm::createCoroElidePass() { return new CoroElide(); } Index: lib/Transforms/Coroutines/CoroInstr.h =================================================================== --- lib/Transforms/Coroutines/CoroInstr.h +++ lib/Transforms/Coroutines/CoroInstr.h @@ -62,11 +62,46 @@ } }; +/// This represents the llvm.coro.alloc instruction. +class LLVM_LIBRARY_VISIBILITY CoroAllocInst : public IntrinsicInst { +public: + // Methods to support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_alloc; + } + static inline bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + +/// This represents the llvm.coro.free instruction. +class LLVM_LIBRARY_VISIBILITY CoroFreeInst : public IntrinsicInst { +public: + // Methods to support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_free; + } + static inline bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + /// This class represents the llvm.coro.begin instruction. class LLVM_LIBRARY_VISIBILITY CoroBeginInst : public IntrinsicInst { enum { MemArg, AlignArg, PromiseArg, InfoArg }; public: + // See if there is a coro.alloc alternative to dynamic memory allocation. + CoroAllocInst *getAlloc() const { + if (auto PN = dyn_cast(getMem())) + for (Value *V : PN->incoming_values()) + if (auto *CA = dyn_cast(V)) + return CA; + return nullptr; + } + + Value *getMem() const { return getArgOperand(MemArg); } + Constant *getRawInfo() const { return cast(getArgOperand(InfoArg)->stripPointerCasts()); } Index: lib/Transforms/Coroutines/CoroInternal.h =================================================================== --- lib/Transforms/Coroutines/CoroInternal.h +++ lib/Transforms/Coroutines/CoroInternal.h @@ -42,6 +42,7 @@ namespace coro { bool declaresIntrinsics(Module &M, std::initializer_list); +void replaceCoroFree(Value *FramePtr, Value *Replacement); // Keeps data and helper functions for lowering coroutine intrinsics. struct LowererBase { Index: lib/Transforms/Coroutines/Coroutines.cpp =================================================================== --- lib/Transforms/Coroutines/Coroutines.cpp +++ lib/Transforms/Coroutines/Coroutines.cpp @@ -122,3 +122,21 @@ return false; } + +// Find all llvm.coro.free instructions associated with the provided frame +// pointer and replace them with the provided replacement value. If the +// replacement value is not provided, use appropriately typed null constant. +void coro::replaceCoroFree(Value *FramePtr, Value *Replacement) { + SmallVector CoroFrees; + for (User *U : FramePtr->users()) + if (auto *CF = dyn_cast(U)) + CoroFrees.push_back(CF); + + if (CoroFrees.empty()) + return; + + for (CoroFreeInst *CF : CoroFrees) { + CF->replaceAllUsesWith(Replacement); + CF->eraseFromParent(); + } +} Index: test/Transforms/Coroutines/coro-heap-elide.ll =================================================================== --- /dev/null +++ test/Transforms/Coroutines/coro-heap-elide.ll @@ -0,0 +1,85 @@ +; Tests that the dynamic allocation and deallocation of the coroutine frame is +; elided and any tail calls referencing the coroutine frame has the tail +; call attribute removed. +; RUN: opt < %s -S -inline -coro-elide -instsimplify -simplifycfg | FileCheck %s + +declare void @print(i32) nounwind + +%f.frame = type {i32} + +declare void @bar(i8*) + +declare fastcc void @f.resume(%f.frame*) +declare fastcc void @f.destroy(%f.frame*) + +declare void @may_throw() +declare i8* @CustomAlloc(i32) +declare void @CustomFree(i8*) + +@f.resumers = internal constant + [2 x void (%f.frame*)*] [void (%f.frame*)* @f.resume, void (%f.frame*)* @f.destroy] + +; a coroutine start function +define i8* @f() personality i8* null { +entry: + %elide = call i8* @llvm.coro.alloc() + %need.dyn.alloc = icmp ne i8* %elide, null + br i1 %need.dyn.alloc, label %coro.begin, label %dyn.alloc +dyn.alloc: + %alloc = call i8* @CustomAlloc(i32 4) + br label %coro.begin +coro.begin: + %phi = phi i8* [ %elide, %entry ], [ %alloc, %dyn.alloc ] + %hdl = call i8* @llvm.coro.begin(i8* %phi, i32 0, i8* null, + i8* bitcast ([2 x void (%f.frame*)*]* @f.resumers to i8*)) + invoke void @may_throw() + to label %ret unwind label %ehcleanup +ret: + ret i8* %hdl + +ehcleanup: + %tok = cleanuppad within none [] + %mem = call i8* @llvm.coro.free(i8* %hdl) + %need.dyn.free = icmp ne i8* %mem, null + br i1 %need.dyn.free, label %dyn.free, label %if.end +dyn.free: + call void @CustomFree(i8* %mem) + br label %if.end +if.end: + cleanupret from %tok unwind to caller +} + +; CHECK-LABEL: @callResume( +define void @callResume() { +entry: +; CHECK: alloca %f.frame +; CHECK-NOT: coro.begin +; CHECK-NOT: CustomAlloc +; CHECK: call void @may_throw() + %hdl = call i8* @f() + +; Need to remove 'tail' from the first call to @bar +; CHECK-NOT: tail call +; CHECK: call void @bar( + tail call void @bar(i8* %hdl) +; CHECK: tail call void @bar( + tail call void @bar(i8* null) + +; CHECK-NEXT: call fastcc void bitcast (void (%f.frame*)* @f.resume to void (i8*)*)(i8* %vFrame) + %0 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0) + %1 = bitcast i8* %0 to void (i8*)* + call fastcc void %1(i8* %hdl) + +; CHECK-NEXT: call fastcc void bitcast (void (%f.frame*)* @f.destroy to void (i8*)*)(i8* %vFrame) + %2 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 1) + %3 = bitcast i8* %2 to void (i8*)* + call fastcc void %3(i8* %hdl) + +; CHECK-NEXT: ret void + ret void +} + +declare i8* @llvm.coro.alloc() +declare i8* @llvm.coro.free(i8*) +declare i8* @llvm.coro.begin(i8*, i32, i8*, i8*) +declare i8* @llvm.coro.subfn.addr(i8*, i8) Index: test/Transforms/Coroutines/restart-trigger.ll =================================================================== --- /dev/null +++ test/Transforms/Coroutines/restart-trigger.ll @@ -0,0 +1,17 @@ +; Verifies that restart trigger forces IPO pipelines restart and the same +; coroutine is looked at by CoroSplit pass twice. +; REQUIRES: asserts +; RUN: opt < %s -S -O0 -enable-coroutines -debug-only=coro-split 2>&1 | FileCheck %s +; RUN: opt < %s -S -O1 -enable-coroutines -debug-only=coro-split 2>&1 | FileCheck %s + +; CHECK: CoroSplit: Processing coroutine 'f' state: 0 +; CHECK-NEXT: CoroSplit: Processing coroutine 'f' state: 1 + +declare i8* @llvm.coro.begin(i8*, i32, i8*, i8*) + +; a coroutine start function +define i8* @f() { +entry: + %hdl = call i8* @llvm.coro.begin(i8* null, i32 0, i8* null, i8* null) + ret i8* %hdl +}