diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -1548,6 +1548,75 @@ return; } +/// For each local variable that all of its user are only used inside one of +/// suspended region, we sink their lifetime.start markers to the place where +/// after the suspend block. Doing so minimizes the lifetime of each variable, +/// hence minimizing the amount of data we end up putting on the frame. +static void sinkLifetimeStartMarkers(Function &F, coro::Shape &Shape, + SuspendCrossingInfo &Checker) { + DominatorTree DT(F); + + // Collect all possible basic blocks which may dominate all uses of allocas. + SmallPtrSet DomSet; + DomSet.insert(&F.getEntryBlock()); + for (auto *CSI : Shape.CoroSuspends) { + BasicBlock *SuspendBlock = CSI->getParent(); + assert(isSuspendBlock(SuspendBlock) && SuspendBlock->getSingleSuccessor() && + "should have split coro.suspend into its own block"); + DomSet.insert(SuspendBlock->getSingleSuccessor()); + } + + for (Instruction &I : instructions(F)) { + if (!isa(&I)) + continue; + + for (BasicBlock *DomBB : DomSet) { + bool Valid = true; + SmallVector BCInsts; + + auto isUsedByLifetimeStart = [&](Instruction *I) { + if (isa(I) && I->hasOneUse()) + if (auto *IT = dyn_cast(I->user_back())) + return IT->getIntrinsicID() == Intrinsic::lifetime_start; + return false; + }; + + for (User *U : I.users()) { + Instruction *UI = cast(U); + // For all users except lifetime.start markers, if they are all + // dominated by one of the basic blocks and do not cross + // suspend points as well, then there is no need to spill the + // instruction. + if (!DT.dominates(DomBB, UI->getParent()) || + Checker.isDefinitionAcrossSuspend(DomBB, U)) { + // Skip bitcast used by lifetime.start markers. + if (isUsedByLifetimeStart(UI)) { + BCInsts.push_back(UI); + continue; + } + Valid = false; + break; + } + } + // Sink lifetime.start markers to dominate block when they are + // only used outside the region. + if (Valid && BCInsts.size() != 0) { + auto *NewBitcast = BCInsts[0]->clone(); + auto *NewLifetime = cast(BCInsts[0]->user_back())->clone(); + NewLifetime->replaceUsesOfWith(BCInsts[0], NewBitcast); + NewBitcast->insertBefore(DomBB->getTerminator()); + NewLifetime->insertBefore(DomBB->getTerminator()); + + // All the outsided lifetime.start markers are no longer necessary. + for (Instruction *S : BCInsts) { + S->user_back()->eraseFromParent(); + } + break; + } + } + } +} + void coro::buildCoroutineFrame(Function &F, Shape &Shape) { eliminateSwiftError(F, Shape); @@ -1598,6 +1667,7 @@ Spills.clear(); } + sinkLifetimeStartMarkers(F, Shape, Checker); // Collect lifetime.start info for each alloca. using LifetimeStart = SmallPtrSet; llvm::DenseMap> LifetimeMap; diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -1239,103 +1239,6 @@ S.resize(N); } -/// For every local variable that has lifetime intrinsics markers, we sink -/// their lifetime.start marker to the places where the variable is being -/// used for the first time. Doing so minimizes the lifetime of each variable, -/// hence minimizing the amount of data we end up putting on the frame. -static void sinkLifetimeStartMarkers(Function &F) { - DominatorTree Dom(F); - for (Instruction &I : instructions(F)) { - // We look for this particular pattern: - // %tmpX = alloca %.., align ... - // %0 = bitcast %...* %tmpX to i8* - // call void @llvm.lifetime.start.p0i8(i64 ..., i8* nonnull %0) #2 - if (!isa(&I)) - continue; - // There can be multiple lifetime start markers for the same variable. - SmallPtrSet LifetimeStartInsts; - // SinkBarriers stores all instructions that use this local variable. - // When sinking the lifetime start intrinsics, we can never sink past - // these barriers. - SmallPtrSet SinkBarriers; - bool Valid = true; - auto AddSinkBarrier = [&](Instruction *I) { - // When adding a new barrier to SinkBarriers, we maintain the case - // that no instruction in SinkBarriers dominates another instruction. - SmallPtrSet ToRemove; - bool ShouldAdd = true; - for (Instruction *S : SinkBarriers) { - if (I == S || Dom.dominates(S, I)) { - ShouldAdd = false; - break; - } else if (Dom.dominates(I, S)) { - ToRemove.insert(S); - } - } - if (ShouldAdd) { - SinkBarriers.insert(I); - for (Instruction *R : ToRemove) { - SinkBarriers.erase(R); - } - } - }; - for (User *U : I.users()) { - if (!isa(U)) - continue; - for (User *CU : U->users()) { - // If we see any user of CastInst that's not lifetime start/end - // intrinsics, give up because it's too complex. - if (auto *CUI = dyn_cast(CU)) { - if (CUI->getIntrinsicID() == Intrinsic::lifetime_start) - LifetimeStartInsts.insert(CUI); - else if (CUI->getIntrinsicID() == Intrinsic::lifetime_end) - AddSinkBarrier(CUI); - else - Valid = false; - } else { - Valid = false; - } - } - } - if (!Valid || LifetimeStartInsts.empty()) - continue; - - for (User *U : I.users()) { - if (isa(U)) - continue; - // Every user of the variable is also a sink barrier. - AddSinkBarrier(cast(U)); - } - - // For each sink barrier, we insert a lifetime start marker right - // before it. - for (Instruction *S : SinkBarriers) { - if (auto *IS = dyn_cast(S)) { - if (IS->getIntrinsicID() == Intrinsic::lifetime_end) { - // If we have a lifetime end marker in SinkBarriers, meaning it's - // not dominated by any other users, we can safely delete it. - IS->eraseFromParent(); - continue; - } - } - // We find an existing lifetime.start marker that domintes the barrier, - // clone it and insert it right before the barrier. We cannot clone an - // arbitrary lifetime.start marker because we want to make sure the - // BitCast instruction referred in the marker also dominates the barrier. - for (const IntrinsicInst *LifetimeStart : LifetimeStartInsts) { - if (Dom.dominates(LifetimeStart, S)) { - LifetimeStart->clone()->insertBefore(S); - break; - } - } - } - // All the old lifetime.start markers are no longer necessary. - for (IntrinsicInst *S : LifetimeStartInsts) { - S->eraseFromParent(); - } - } -} - static void splitSwitchCoroutine(Function &F, coro::Shape &Shape, SmallVectorImpl &Clones) { assert(Shape.ABI == coro::ABI::Switch); @@ -1525,7 +1428,6 @@ return Shape; simplifySuspendPoints(Shape); - sinkLifetimeStartMarkers(F); buildCoroutineFrame(F, Shape); replaceFrameSize(Shape); diff --git a/llvm/test/Transforms/Coroutines/coro-split-sink-lifetime.ll b/llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-01.ll rename from llvm/test/Transforms/Coroutines/coro-split-sink-lifetime.ll rename to llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-01.ll --- a/llvm/test/Transforms/Coroutines/coro-split-sink-lifetime.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-01.ll @@ -1,5 +1,5 @@ ; Tests that coro-split will optimize the lifetime.start maker of each local variable, -; sink them to the places closest to the actual use. +; sink them to the places after the suspend block. ; RUN: opt < %s -coro-split -S | FileCheck %s ; RUN: opt < %s -passes=coro-split -S | FileCheck %s @@ -43,14 +43,14 @@ ; CHECK-LABEL: @a.resume( ; CHECK: %testval = alloca i32, align 4 +; CHECK-NEXT: %0 = bitcast i32* %testval to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* %0) ; CHECK-NEXT: getelementptr inbounds %a.Frame ; CHECK-NEXT: getelementptr inbounds %"struct.lean_future::Awaiter" -; CHECK-NEXT: %cast1 = bitcast i32* %testval to i8* ; CHECK-NEXT: %val = load i32, i32* %Result -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* %cast1) ; CHECK-NEXT: %test = load i32, i32* %testval ; CHECK-NEXT: call void @print(i32 %test) -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* %cast1) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* %0) ; CHECK-NEXT: call void @print(i32 %val) ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/Coroutines/coro-split-sink-lifetime.ll b/llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-02.ll rename from llvm/test/Transforms/Coroutines/coro-split-sink-lifetime.ll rename to llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-02.ll --- a/llvm/test/Transforms/Coroutines/coro-split-sink-lifetime.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-02.ll @@ -1,5 +1,5 @@ ; Tests that coro-split will optimize the lifetime.start maker of each local variable, -; sink them to the places closest to the actual use. +; sink them to the places after the suspend block. ; RUN: opt < %s -coro-split -S | FileCheck %s ; RUN: opt < %s -passes=coro-split -S | FileCheck %s @@ -7,6 +7,7 @@ %"struct.std::coroutine_handle.0" = type { %"struct.std::coroutine_handle" } %"struct.lean_future::Awaiter" = type { i32, %"struct.std::coroutine_handle.0" } +declare i1 @getcond() declare i8* @malloc(i64) declare void @print(i32) @@ -20,7 +21,10 @@ %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) %alloc = call i8* @malloc(i64 16) #3 %vFrame = call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %alloc) + %testcond = call i1 @getcond() + br i1 %testcond, label %if.suspend, label %else.direct +if.suspend: %save = call token @llvm.coro.save(i8* null) %Result.i19 = getelementptr inbounds %"struct.lean_future::Awaiter", %"struct.lean_future::Awaiter"* %ref.tmp7, i64 0, i32 0 %suspend = call i8 @llvm.coro.suspend(token %save, i1 false) @@ -28,31 +32,36 @@ i8 0, label %await.ready i8 1, label %exit ] + +else.direct: + br label %after.await + await.ready: %StrayCoroSave = call token @llvm.coro.save(i8* null) %val = load i32, i32* %Result.i19 %test = load i32, i32* %testval call void @print(i32 %test) - call void @llvm.lifetime.end.p0i8(i64 4, i8* %cast) call void @print(i32 %val) + br label %after.await + +after.await: + %test1 = load i32, i32* %testval + call void @print(i32 %test1) + call void @llvm.lifetime.end.p0i8(i64 4, i8* %cast) br label %exit + exit: call i1 @llvm.coro.end(i8* null, i1 false) ret void } ; CHECK-LABEL: @a.resume( -; CHECK: %testval = alloca i32, align 4 -; CHECK-NEXT: getelementptr inbounds %a.Frame -; CHECK-NEXT: getelementptr inbounds %"struct.lean_future::Awaiter" -; CHECK-NEXT: %cast1 = bitcast i32* %testval to i8* -; CHECK-NEXT: %val = load i32, i32* %Result -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* %cast1) -; CHECK-NEXT: %test = load i32, i32* %testval -; CHECK-NEXT: call void @print(i32 %test) -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* %cast1) -; CHECK-NEXT: call void @print(i32 %val) -; CHECK-NEXT: ret void +; CHECK: %[[VAL:testval.+]] = getelementptr inbounds %a.Frame +; CHECK-NOT: %testval = alloca i32, align 4 +; CHECK-NOT: %[[CAST:.+]] = bitcast i32* %testval to i8* +; CHECK-NOT: call void @llvm.lifetime.start.p0i8(i64 4, i8* %[[CAST]]) +; CHECK: %test = load i32, i32* %[[VAL]] +; CHECK-NOT: %test = load i32, i32* %testval declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) declare i1 @llvm.coro.alloc(token) #3