Index: llvm/lib/Transforms/Coroutines/CoroFrame.cpp =================================================================== --- llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -2333,14 +2333,54 @@ SuspendCrossingInfo &Checker) { DominatorTree DT(F); - // Collect all possible basic blocks which may dominate all uses of allocas. + // Collect all possible basic blocks which may dominate all uses of allocas + // except the entry block. SmallPtrSet DomSet; - DomSet.insert(&F.getEntryBlock()); for (auto *CSI : Shape.CoroSuspends) { BasicBlock *SuspendBlock = CSI->getParent(); assert(isSuspendBlock(SuspendBlock) && SuspendBlock->getSingleSuccessor() && "should have split coro.suspend into its own block"); - DomSet.insert(SuspendBlock->getSingleSuccessor()); + if (isa(CSI) || isa(CSI)) + DomSet.insert(SuspendBlock->getSingleSuccessor()); + else { + // We can't insert lifetime markers between coro.suspend and its user + // (generally a switch instruction) since it may prevent symmetric + // transfer (converting the call before coro.suspend into a musttail + // call). So we choose to insert lifetime markers to the successors of the + // dispatch blocks. + BasicBlock *DispatchBB = SuspendBlock->getSingleSuccessor(); +#ifndef NDEBUG + // Guard codes to check the pattern isn't beyond our imagination. + // Generally we want the code to be one of the followings: + // ``` + // %suspend = call i8 @llvm.coro.suspend() + // switch i8 %suspend label %await.suspend [ + // i8 0, label %await.resume + // i8 1, label %await.destroy + // ] + // ``` + // Or + // ``` + // %suspend = call i8 @llvm.coro.suspend() + // icmp %cond = icmp eq i8 %suspend, @constant + // br %cond, label %One, label %Other + // ``` + auto *Term = DispatchBB->getTerminator(); + bool CorrectPattern = false; + if (isa(Term) && Term->getOperand(0) == CSI) + CorrectPattern = true; + else if (isa(Term)) { + if (auto *ICI = dyn_cast(Term->getOperand(0))) + if (ICI->getOperand(0) == CSI) + CorrectPattern = true; + } + if (!CorrectPattern) + report_fatal_error( + "Unthinkable pattern when sinking lifetime markers for coroutine."); +#endif + for (auto *BB : llvm::successors(DispatchBB)) + DomSet.insert(BB); + } } for (Instruction &I : instructions(F)) { @@ -2391,18 +2431,16 @@ // Sink lifetime.start markers to dominate block when they are // only used outside the region. if (Valid && Lifetimes.size() != 0) { - // May be AI itself, when the type of AI is i8* - auto *NewBitCast = [&](AllocaInst *AI) -> Value* { - if (isa(Lifetimes[0]->getOperand(1))) - return AI; - auto *Int8PtrTy = Type::getInt8PtrTy(F.getContext()); - return CastInst::Create(Instruction::BitCast, AI, Int8PtrTy, "", - DomBB->getTerminator()); - }(AI); - auto *NewLifetime = Lifetimes[0]->clone(); - NewLifetime->replaceUsesOfWith(NewLifetime->getOperand(1), NewBitCast); - NewLifetime->insertBefore(DomBB->getTerminator()); + if (isa(NewLifetime->getOperand(1))) + NewLifetime->insertBefore(DomBB->getFirstNonPHI()); + else { + auto *NewBitCast = CastInst::Create( + Instruction::BitCast, AI, Type::getInt8PtrTy(F.getContext()), "", + DomBB->getFirstNonPHI()); + NewLifetime->setOperand(1, NewBitCast); + NewLifetime->insertAfter(NewBitCast); + } // All the outsided lifetime.start markers are no longer necessary. for (Instruction *S : Lifetimes) Index: llvm/test/Transforms/Coroutines/ArgAddr.ll =================================================================== --- llvm/test/Transforms/Coroutines/ArgAddr.ll +++ llvm/test/Transforms/Coroutines/ArgAddr.ll @@ -37,10 +37,9 @@ store i32 %dec, i32* %n.addr call void @print(i32 %3) %4 = call i8 @llvm.coro.suspend(token none, i1 false) - %conv = sext i8 %4 to i32 - switch i32 %conv, label %coro_Suspend [ - i32 0, label %for.cond - i32 1, label %coro_Cleanup + switch i8 %4, label %coro_Suspend [ + i8 0, label %for.cond + i8 1, label %coro_Cleanup ] coro_Cleanup: Index: llvm/test/Transforms/Coroutines/coro-debug.ll =================================================================== --- llvm/test/Transforms/Coroutines/coro-debug.ll +++ llvm/test/Transforms/Coroutines/coro-debug.ll @@ -19,19 +19,19 @@ %2 = call i8* @llvm.coro.begin(token %0, i8* %call) #7, !dbg !16 store i8* %2, i8** %coro_hdl, align 8, !dbg !16 %3 = call i8 @llvm.coro.suspend(token none, i1 false), !dbg !17 - %conv = sext i8 %3 to i32, !dbg !17 %late_local = alloca i32, align 4 call void @coro.devirt.trigger(i8* null) - switch i32 %conv, label %sw.default [ - i32 0, label %sw.bb - i32 1, label %sw.bb1 + switch i8 %3, label %sw.default [ + i8 0, label %sw.bb + i8 1, label %sw.bb1 ], !dbg !17 sw.bb: ; preds = %entry %direct = load i32, i32* %x.addr, align 4, !dbg !14 %gep = getelementptr inbounds [16 x i8], [16 x i8]* %undef, i32 %direct, !dbg !14 + %direct_const = sext i8 %3 to i32 call void @llvm.dbg.declare(metadata [16 x i8] *%gep, metadata !27, metadata !13), !dbg !14 - call void @llvm.dbg.declare(metadata i32 %conv, metadata !26, metadata !13), !dbg !14 + call void @llvm.dbg.declare(metadata i32 %direct_const, metadata !26, metadata !13), !dbg !14 call void @llvm.dbg.declare(metadata i32 %direct, metadata !25, metadata !13), !dbg !14 call void @llvm.dbg.declare(metadata i32* %x.addr, metadata !12, metadata !13), !dbg !14 call void @llvm.dbg.declare(metadata i8** %coro_hdl, metadata !15, metadata !13), !dbg !16 @@ -155,10 +155,10 @@ ; CHECK: call void @llvm.dbg.declare(metadata %f.Frame** %[[DBG_PTR]], metadata ![[RESUME_DIRECT:[0-9]+]], metadata !DIExpression(DW_OP_deref, DW_OP_plus_uconst, [[EXPR_TAIL]]) ; CHECK: store %f.Frame* {{.*}}, %f.Frame** %[[DBG_PTR]] ; CHECK-NOT: alloca %struct.test* +; CHECK: call void @coro.devirt.trigger(i8* null) ; CHECK: call void @llvm.dbg.declare(metadata i32 0, metadata ![[RESUME_CONST:[0-9]+]], metadata !DIExpression()) ; Note that keeping the undef value here could be acceptable, too. ; CHECK-NOT: call void @llvm.dbg.declare(metadata i32* undef, metadata !{{[0-9]+}}, metadata !DIExpression()) -; CHECK: call void @coro.devirt.trigger(i8* null) ; CHECK: define internal fastcc void @f.destroy(%f.Frame* noalias nonnull align 8 dereferenceable(32) %FramePtr) #0 !dbg ![[DESTROY:[0-9]+]] ; CHECK: define internal fastcc void @f.cleanup(%f.Frame* noalias nonnull align 8 dereferenceable(32) %FramePtr) #0 !dbg ![[CLEANUP:[0-9]+]] Index: llvm/test/Transforms/Coroutines/coro-materialize.ll =================================================================== --- llvm/test/Transforms/Coroutines/coro-materialize.ll +++ llvm/test/Transforms/Coroutines/coro-materialize.ll @@ -16,7 +16,7 @@ resume1: %inc2 = add i32 %inc1, 1 %sp2 = call i8 @llvm.coro.suspend(token none, i1 false) - switch i8 %sp1, label %suspend [i8 0, label %resume2 + switch i8 %sp2, label %suspend [i8 0, label %resume2 i8 1, label %cleanup] resume2: Index: llvm/test/Transforms/Coroutines/coro-split-dbg.ll =================================================================== --- llvm/test/Transforms/Coroutines/coro-split-dbg.ll +++ llvm/test/Transforms/Coroutines/coro-split-dbg.ll @@ -27,10 +27,9 @@ tail call void (...) @bar() #7, !dbg !33 %3 = tail call token @llvm.coro.save(i8* null), !dbg !34 %4 = tail call i8 @llvm.coro.suspend(token %3, i1 false), !dbg !34 - %conv = sext i8 %4 to i32, !dbg !34 - switch i32 %conv, label %coro_Suspend [ - i32 0, label %for.cond - i32 1, label %coro_Cleanup + switch i8 %4, label %coro_Suspend [ + i8 0, label %for.cond + i8 1, label %coro_Cleanup ], !dbg !34 coro_Cleanup: ; preds = %for.cond Index: llvm/test/Transforms/Coroutines/coro-split-musttail.ll =================================================================== --- llvm/test/Transforms/Coroutines/coro-split-musttail.ll +++ llvm/test/Transforms/Coroutines/coro-split-musttail.ll @@ -27,9 +27,11 @@ %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false) switch i8 %suspend2, label %exit [ - i8 0, label %exit + i8 0, label %pre.exit i8 1, label %exit ] +pre.exit: + br label %exit exit: call i1 @llvm.coro.end(i8* null, i1 false) ret void Index: llvm/test/Transforms/Coroutines/coro-split-musttail2.ll =================================================================== --- llvm/test/Transforms/Coroutines/coro-split-musttail2.ll +++ llvm/test/Transforms/Coroutines/coro-split-musttail2.ll @@ -33,9 +33,11 @@ %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false) switch i8 %suspend2, label %exit [ - i8 0, label %exit + i8 0, label %pre.exit i8 1, label %exit ] +pre.exit: + br label %exit exit: call i1 @llvm.coro.end(i8* null, i1 false) ret void Index: llvm/test/Transforms/Coroutines/coro-split-musttail4.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Coroutines/coro-split-musttail4.ll @@ -0,0 +1,68 @@ +; Tests that sink lifetime doesn't affect musttail. +; RUN: opt < %s -coro-split -S | FileCheck %s +; RUN: opt < %s -passes=coro-split -S | FileCheck %s + +declare void @fakeresume1(i8*) + +declare void @fakeresume2(i64* align 8) + +define void @g() #0 { +entry: + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) + %alloc = call i8* @malloc(i64 16) #3 + %alloc.var = alloca i8 + call void @llvm.lifetime.start.p0i8(i64 1, i8* %alloc.var) + %vFrame = call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %alloc) + + %save = call token @llvm.coro.save(i8* null) + %suspend = call i8 @llvm.coro.suspend(token %save, i1 false) + call fastcc void @fakeresume1(i8* null) + switch i8 %suspend, label %exit [ + i8 0, label %await.suspend + i8 1, label %exit + ] +await.suspend: + %save2 = call token @llvm.coro.save(i8* null) + call fastcc void @fakeresume2(i64* align 8 null) + %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false) + switch i8 %suspend2, label %exit [ + i8 0, label %await.ready + i8 1, label %exit + ] +await.ready: + call void @consume(i8* %alloc.var) + call void @llvm.lifetime.end.p0i8(i64 1, i8* %alloc.var) + br label %exit +exit: + call i1 @llvm.coro.end(i8* null, i1 false) + ret void +} + +; Verify that in the initial function resume is not marked with musttail. +; CHECK-LABEL: @g( +; CHECK-NOT: musttail call fastcc void @fakeresume1(i8* null) + +; Verify that in the resume part resume call is marked with musttail. +; CHECK-LABEL: @g.resume( +; CHECK: musttail call fastcc void @fakeresume2(i64* align 8 null) +; CHECK-NEXT: ret void + +declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) #1 +declare i1 @llvm.coro.alloc(token) #2 +declare i64 @llvm.coro.size.i64() #3 +declare i8* @llvm.coro.begin(token, i8* writeonly) #2 +declare token @llvm.coro.save(i8*) #2 +declare i8* @llvm.coro.frame() #3 +declare i8 @llvm.coro.suspend(token, i1) #2 +declare i8* @llvm.coro.free(token, i8* nocapture readonly) #1 +declare i1 @llvm.coro.end(i8*, i1) #2 +declare i8* @llvm.coro.subfn.addr(i8* nocapture readonly, i8) #1 +declare i8* @malloc(i64) +declare void @consume(i8*) +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) + +attributes #0 = { "coroutine.presplit"="1" } +attributes #1 = { argmemonly nounwind readonly } +attributes #2 = { nounwind } +attributes #3 = { nounwind readnone }