Index: llvm/lib/Transforms/Coroutines/CoroFrame.cpp =================================================================== --- llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -1662,50 +1662,70 @@ } for (Instruction &I : instructions(F)) { - if (!isa(&I)) + AllocaInst* AI = dyn_cast(&I); + if (!AI) continue; for (BasicBlock *DomBB : DomSet) { bool Valid = true; - SmallVector BCInsts; + SmallVector Lifetimes; - auto isUsedByLifetimeStart = [&](Instruction *I) { - if (isa(I) && I->hasOneUse()) - if (auto *IT = dyn_cast(I->user_back())) - return IT->getIntrinsicID() == Intrinsic::lifetime_start; + auto isLifetimeStart = [](Instruction* I) { + if (auto* II = dyn_cast(I)) + return II->getIntrinsicID() == Intrinsic::lifetime_start; return false; }; - for (User *U : I.users()) { + auto collectLifetimeStart = [&](Instruction *U, AllocaInst *AI) { + if (isLifetimeStart(U)) { + Lifetimes.push_back(U); + return true; + } + if (!U->hasOneUse() || U->stripPointerCasts() != AI) + return false; + if (isLifetimeStart(U->user_back())) { + Lifetimes.push_back(U->user_back()); + return true; + } + return false; + }; + + for (User *U : AI->users()) { Instruction *UI = cast(U); // For all users except lifetime.start markers, if they are all // dominated by one of the basic blocks and do not cross // suspend points as well, then there is no need to spill the // instruction. if (!DT.dominates(DomBB, UI->getParent()) || - Checker.isDefinitionAcrossSuspend(DomBB, U)) { - // Skip bitcast used by lifetime.start markers. - if (isUsedByLifetimeStart(UI)) { - BCInsts.push_back(UI); + Checker.isDefinitionAcrossSuspend(DomBB, UI)) { + // Skip lifetime.start, GEP and bitcast used by lifetime.start + // markers. + if (collectLifetimeStart(UI, AI)) continue; - } Valid = false; break; } } // Sink lifetime.start markers to dominate block when they are // only used outside the region. - if (Valid && BCInsts.size() != 0) { - auto *NewBitcast = BCInsts[0]->clone(); - auto *NewLifetime = cast(BCInsts[0]->user_back())->clone(); - NewLifetime->replaceUsesOfWith(BCInsts[0], NewBitcast); - NewBitcast->insertBefore(DomBB->getTerminator()); + if (Valid && Lifetimes.size() != 0) { + // May be AI itself, when the type of AI is i8* + auto *NewBitCast = [&](AllocaInst *AI) -> Value* { + if (isa(Lifetimes[0]->getOperand(1))) + return AI; + auto *Int8PtrTy = Type::getInt8PtrTy(F.getContext()); + return CastInst::Create(Instruction::BitCast, AI, Int8PtrTy, "", + DomBB->getTerminator()); + }(AI); + + auto *NewLifetime = Lifetimes[0]->clone(); + NewLifetime->replaceUsesOfWith(NewLifetime->getOperand(1), NewBitCast); NewLifetime->insertBefore(DomBB->getTerminator()); // All the outsided lifetime.start markers are no longer necessary. - for (Instruction *S : BCInsts) { - S->user_back()->eraseFromParent(); - } + for (Instruction *S : Lifetimes) + S->eraseFromParent(); + break; } } @@ -1771,14 +1791,14 @@ if (!II || II->getIntrinsicID() != Intrinsic::lifetime_start) continue; - if (auto *OpInst = dyn_cast(I.getOperand(1))) - if (auto *AI = dyn_cast(OpInst->getOperand(0))) { + if (auto *OpInst = dyn_cast(II->getOperand(1))) { + if (auto *AI = dyn_cast(OpInst->stripPointerCasts())) { if (LifetimeMap.find(AI) == LifetimeMap.end()) LifetimeMap[AI] = std::make_unique(); - - LifetimeMap[AI]->insert(OpInst); + LifetimeMap[AI]->insert(isa(OpInst) ? II : OpInst); } + } } // Collect the spills for arguments and other not-materializable values. Index: llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-03.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-03.ll @@ -0,0 +1,71 @@ +; Corresponding to coro-split-sink-lifetime-01.ll. This file tests that whether the CoroFrame +; pass knows the operand of lifetime.start intrinsic may be GEP as well. +; RUN: opt < %s -coro-split -S | FileCheck %s +; RUN: opt < %s -passes=coro-split -S | FileCheck %s + +%"struct.std::coroutine_handle" = type { i8* } +%"struct.std::coroutine_handle.0" = type { %"struct.std::coroutine_handle" } +%"struct.lean_future::Awaiter" = type { i32, %"struct.std::coroutine_handle.0" } + +declare i8* @malloc(i64) +declare void @print(i32) + +%i8.array = type { [100 x i8] } +declare void @consume.i8.array(%i8.array*) + +define void @a.gep() "coroutine.presplit"="1" { +entry: + %ref.tmp7 = alloca %"struct.lean_future::Awaiter", align 8 + %testval = alloca %i8.array + %cast = getelementptr inbounds %i8.array, %i8.array* %testval, i64 0, i32 0, i64 0 + ; lifetime of %testval starts here, but not used until await.ready. + call void @llvm.lifetime.start.p0i8(i64 100, i8* %cast) + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) + %alloc = call i8* @malloc(i64 16) #3 + %vFrame = call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %alloc) + + %save = call token @llvm.coro.save(i8* null) + %Result.i19 = getelementptr inbounds %"struct.lean_future::Awaiter", %"struct.lean_future::Awaiter"* %ref.tmp7, i64 0, i32 0 + %suspend = call i8 @llvm.coro.suspend(token %save, i1 false) + switch i8 %suspend, label %exit [ + i8 0, label %await.ready + i8 1, label %exit + ] +await.ready: + %StrayCoroSave = call token @llvm.coro.save(i8* null) + %val = load i32, i32* %Result.i19 + call void @consume.i8.array(%i8.array* %testval) + call void @llvm.lifetime.end.p0i8(i64 100, i8* %cast) + call void @print(i32 %val) + br label %exit +exit: + call i1 @llvm.coro.end(i8* null, i1 false) + ret void +} +; CHECK-LABEL: @a.gep.resume( +; CHECK: %testval = alloca %i8.array +; CHECK-NEXT: %0 = bitcast %i8.array* %testval to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 100, i8* %0) +; CHECK-NEXT: getelementptr inbounds %a.gep.Frame +; CHECK-NEXT: getelementptr inbounds %"struct.lean_future::Awaiter" +; CHECK-NEXT: getelementptr inbounds %i8.array, %i8.array* %testval +; CHECK-NEXT: %val = load i32, i32* %Result +; CHECK-NEXT: call void @consume.i8.array(%i8.array* %testval) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 100, i8* %cast1) +; CHECK-NEXT: call void @print(i32 %val) +; CHECK-NEXT: ret void + +declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) +declare i1 @llvm.coro.alloc(token) #3 +declare noalias nonnull i8* @"\01??2@YAPEAX_K@Z"(i64) local_unnamed_addr +declare i64 @llvm.coro.size.i64() #5 +declare i8* @llvm.coro.begin(token, i8* writeonly) #3 +declare void @"\01?puts@@YAXZZ"(...) +declare token @llvm.coro.save(i8*) #3 +declare i8* @llvm.coro.frame() #5 +declare i8 @llvm.coro.suspend(token, i1) #3 +declare void @"\01??3@YAXPEAX@Z"(i8*) local_unnamed_addr #10 +declare i8* @llvm.coro.free(token, i8* nocapture readonly) #2 +declare i1 @llvm.coro.end(i8*, i1) #3 +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #4 +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #4 Index: llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-04.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-04.ll @@ -0,0 +1,71 @@ +; Tests that coro-split will optimize the lifetime.start maker of each local variable, +; sink them to the places after the suspend block. +; RUN: opt < %s -coro-split -S | FileCheck %s +; RUN: opt < %s -passes=coro-split -S | FileCheck %s + +%"struct.std::coroutine_handle" = type { i8* } +%"struct.std::coroutine_handle.0" = type { %"struct.std::coroutine_handle" } +%"struct.lean_future::Awaiter" = type { i32, %"struct.std::coroutine_handle.0" } + +declare i8* @malloc(i64) +declare void @print(i32) +declare void @consume.i8(i8) + +define void @a() "coroutine.presplit"="1" { +entry: + %ref.tmp7 = alloca %"struct.lean_future::Awaiter", align 8 + %testval = alloca i8 + ; lifetime of %testval starts here, but not used until await.ready. + call void @llvm.lifetime.start.p0i8(i64 1, i8* %testval) + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) + %alloc = call i8* @malloc(i64 16) #3 + %vFrame = call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %alloc) + + %save = call token @llvm.coro.save(i8* null) + %Result.i19 = getelementptr inbounds %"struct.lean_future::Awaiter", %"struct.lean_future::Awaiter"* %ref.tmp7, i64 0, i32 0 + %suspend = call i8 @llvm.coro.suspend(token %save, i1 false) + switch i8 %suspend, label %exit [ + i8 0, label %await.ready + i8 1, label %exit + ] +await.ready: + %StrayCoroSave = call token @llvm.coro.save(i8* null) + %val = load i32, i32* %Result.i19 + %test = load i8, i8* %testval + call void @consume.i8(i8 %test) + call void @llvm.lifetime.end.p0i8(i64 1, i8* %testval) + call void @print(i32 %val) + br label %exit +exit: + call i1 @llvm.coro.end(i8* null, i1 false) + ret void +} + +; CHECK-LABEL: @a.resume( +; CHECK: %testval = alloca i8, align 1 +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 1, i8* %testval) +; CHECK-NEXT: getelementptr inbounds %a.Frame +; CHECK-NEXT: getelementptr inbounds %"struct.lean_future::Awaiter" +; CHECK-NEXT: %val = load i32, i32* %Result +; CHECK-NEXT: %test = load i8, i8* %testval +; CHECK-NEXT: call void @consume.i8(i8 %test) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 1, i8* %testval) +; CHECK-NEXT: call void @print(i32 %val) +; CHECK-NEXT: ret void + + +declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) +declare i1 @llvm.coro.alloc(token) #3 +declare noalias nonnull i8* @"\01??2@YAPEAX_K@Z"(i64) local_unnamed_addr +declare i64 @llvm.coro.size.i64() #5 +declare i8* @llvm.coro.begin(token, i8* writeonly) #3 +declare void @"\01?puts@@YAXZZ"(...) +declare token @llvm.coro.save(i8*) #3 +declare i8* @llvm.coro.frame() #5 +declare i8 @llvm.coro.suspend(token, i1) #3 +declare void @"\01??3@YAXPEAX@Z"(i8*) local_unnamed_addr #10 +declare i8* @llvm.coro.free(token, i8* nocapture readonly) #2 +declare i1 @llvm.coro.end(i8*, i1) #3 +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #4 +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #4 +