diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -853,6 +853,9 @@ // offset values). We cannot handle unknown offsets and will assert. This is the // potential issue left out. An ideal solution would likely require a // significant redesign. +// TODO: Optimization: If all escapes of a pointer happen in the last suspend +// region, i.e. there won't be a CoroSuspend after the escapes, we should still +// be able to put the alloca on the stack. namespace { struct AllocaUseVisitor : PtrUseVisitor { using Base = PtrUseVisitor; @@ -2087,8 +2090,17 @@ NewLifetime->insertBefore(DomBB->getTerminator()); // All the outsided lifetime.start markers are no longer necessary. - for (Instruction *S : Lifetimes) + // The bitcasts used for those markers are also no longer needed. + for (Instruction *S : Lifetimes) { + Instruction *Ptr = cast(S->getOperand(1)); S->eraseFromParent(); + if (!isa(Ptr)) { + assert(Ptr->user_empty() && + "Bitcasts used for lifetime start intrinsics should not " + "have other uses"); + Ptr->eraseFromParent(); + } + } break; } @@ -2099,24 +2111,6 @@ static void collectFrameAllocas(Function &F, coro::Shape &Shape, const SuspendCrossingInfo &Checker, SmallVectorImpl &Allocas) { - // Collect lifetime.start info for each alloca. - using LifetimeStart = SmallPtrSet; - llvm::DenseMap> LifetimeMap; - for (Instruction &I : instructions(F)) { - auto *II = dyn_cast(&I); - if (!II || II->getIntrinsicID() != Intrinsic::lifetime_start) - continue; - - if (auto *OpInst = dyn_cast(II->getOperand(1))) { - if (auto *AI = dyn_cast(OpInst->stripPointerCasts())) { - - if (LifetimeMap.find(AI) == LifetimeMap.end()) - LifetimeMap[AI] = std::make_unique(); - LifetimeMap[AI]->insert(isa(OpInst) ? II : OpInst); - } - } - } - for (Instruction &I : instructions(F)) { auto *AI = dyn_cast(&I); if (!AI) @@ -2126,23 +2120,6 @@ if (AI == Shape.SwitchLowering.PromiseAlloca) { continue; } - bool ShouldLiveOnFrame = false; - auto Iter = LifetimeMap.find(AI); - if (Iter != LifetimeMap.end()) { - // Check against lifetime.start if the instruction has the info. - for (User *U : I.users()) { - for (auto *S : *Iter->second) - if ((ShouldLiveOnFrame = Checker.isDefinitionAcrossSuspend(*S, U))) - break; - if (ShouldLiveOnFrame) - break; - } - if (!ShouldLiveOnFrame) - continue; - } - // At this point, either ShouldLiveOnFrame is true or we didn't have - // lifetime information. We will need to rely on more precise pointer - // tracking. DominatorTree DT(F); AllocaUseVisitor Visitor{F.getParent()->getDataLayout(), DT, *Shape.CoroBegin, Checker}; diff --git a/llvm/test/Transforms/Coroutines/coro-alloca-07.ll b/llvm/test/Transforms/Coroutines/coro-alloca-07.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-alloca-07.ll @@ -0,0 +1,104 @@ +; Tests that CoroSplit can succesfully determine allocas should live on the frame +; if their aliases are used across suspension points through PHINode. +; RUN: opt < %s -coro-split -S | FileCheck %s +; RUN: opt < %s -passes=coro-split -S | FileCheck %s + +define i8* @f(i1 %n) "coroutine.presplit"="1" { +entry: + %x = alloca i64 + %y = alloca i64 + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) + %size = call i32 @llvm.coro.size.i32() + %alloc = call i8* @malloc(i32 %size) + %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc) + br i1 %n, label %flag_true, label %flag_false + +flag_true: + %x.alias = bitcast i64* %x to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %x.alias) + br label %merge + +flag_false: + %y.alias = bitcast i64* %y to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %y.alias) + br label %merge + +merge: + %alias_phi = phi i8* [ %x.alias, %flag_true ], [ %y.alias, %flag_false ] + store i8 1, i8* %alias_phi + %sp1 = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %sp1, label %suspend [i8 0, label %resume + i8 1, label %cleanup] +resume: + call void @print(i8* %alias_phi) + br label %cleanup + +cleanup: + %mem = call i8* @llvm.coro.free(token %id, i8* %hdl) + call void @free(i8* %mem) + br label %suspend + +suspend: + call i1 @llvm.coro.end(i8* %hdl, i1 0) + ret i8* %hdl +} + +declare i8* @llvm.coro.free(token, i8*) +declare i32 @llvm.coro.size.i32() +declare i8 @llvm.coro.suspend(token, i1) +declare void @llvm.coro.resume(i8*) +declare void @llvm.coro.destroy(i8*) + +declare token @llvm.coro.id(i32, i8*, i8*, i8*) +declare i1 @llvm.coro.alloc(token) +declare i8* @llvm.coro.begin(token, i8*) +declare i1 @llvm.coro.end(i8*, i1) + +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) + +declare void @print(i8*) +declare noalias i8* @malloc(i32) +declare void @free(i8*) + +; Verify that both x and y are put in the frame. +; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i64, i64, i8*, i1 } + +; CHECK-LABEL: @f( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ID:%.*]] = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* bitcast ([3 x void (%f.Frame*)*]* @f.resumers to i8*)) +; CHECK-NEXT: [[ALLOC:%.*]] = call i8* @malloc(i32 48) +; CHECK-NEXT: [[HDL:%.*]] = call noalias nonnull i8* @llvm.coro.begin(token [[ID]], i8* [[ALLOC]]) +; CHECK-NEXT: [[FRAMEPTR:%.*]] = bitcast i8* [[HDL]] to %f.Frame* +; CHECK-NEXT: [[RESUME_ADDR:%.*]] = getelementptr inbounds [[F_FRAME:%.*]], %f.Frame* [[FRAMEPTR]], i32 0, i32 0 +; CHECK-NEXT: store void (%f.Frame*)* @f.resume, void (%f.Frame*)** [[RESUME_ADDR]], align 8 +; CHECK-NEXT: [[DESTROY_ADDR:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 1 +; CHECK-NEXT: store void (%f.Frame*)* @f.destroy, void (%f.Frame*)** [[DESTROY_ADDR]], align 8 +; CHECK-NEXT: [[X_RELOAD_ADDR:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 2 +; CHECK-NEXT: [[Y_RELOAD_ADDR:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 3 +; CHECK-NEXT: br i1 [[N:%.*]], label [[FLAG_TRUE:%.*]], label [[FLAG_FALSE:%.*]] +; CHECK: flag_true: +; CHECK-NEXT: [[X_ALIAS:%.*]] = bitcast i64* [[X_RELOAD_ADDR]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[X_ALIAS]]) +; CHECK-NEXT: br label [[MERGE:%.*]] +; CHECK: flag_false: +; CHECK-NEXT: [[Y_ALIAS:%.*]] = bitcast i64* [[Y_RELOAD_ADDR]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[Y_ALIAS]]) +; CHECK-NEXT: br label [[MERGE]] +; CHECK: merge: +; CHECK-NEXT: [[ALIAS_PHI:%.*]] = phi i8* [ [[X_ALIAS]], [[FLAG_TRUE]] ], [ [[Y_ALIAS]], [[FLAG_FALSE]] ] +; CHECK-NEXT: [[ALIAS_PHI_SPILL_ADDR:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 4 +; CHECK-NEXT: store i8* [[ALIAS_PHI]], i8** [[ALIAS_PHI_SPILL_ADDR]], align 8 +; CHECK-NEXT: store i8 1, i8* [[ALIAS_PHI]], align 1 +; CHECK-NEXT: [[INDEX_ADDR1:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 5 +; CHECK-NEXT: store i1 false, i1* [[INDEX_ADDR1]], align 1 +; CHECK-NEXT: ret i8* [[HDL]] +; +; +; CHECK-LABEL: @f.resume( +; CHECK-NEXT: entry.resume: +; CHECK-NEXT: [[VFRAME:%.*]] = bitcast %f.Frame* [[FRAMEPTR:%.*]] to i8* +; CHECK-NEXT: [[ALIAS_PHI_RELOAD_ADDR:%.*]] = getelementptr inbounds [[F_FRAME:%.*]], %f.Frame* [[FRAMEPTR]], i32 0, i32 4 +; CHECK-NEXT: [[ALIAS_PHI_RELOAD:%.*]] = load i8*, i8** [[ALIAS_PHI_RELOAD_ADDR]], align 8 +; CHECK-NEXT: call void @print(i8* [[ALIAS_PHI_RELOAD]]) +; CHECK-NEXT: call void @free(i8* [[VFRAME]]) +; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-03.ll b/llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-03.ll --- a/llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-03.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-03.ll @@ -11,7 +11,7 @@ declare void @print(i32) %i8.array = type { [100 x i8] } -declare void @consume.i8.array(%i8.array*) +declare void @consume.i8.array(%i8.array* nocapture) define void @a.gep() "coroutine.presplit"="1" { entry: