diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -857,7 +857,7 @@ : PtrUseVisitor(DL), DT(DT), CoroBegin(CB), Checker(Checker) {} void visit(Instruction &I) { - UserBBs.insert(I.getParent()); + Users.insert(&I); Base::visit(I); // If the pointer is escaped prior to CoroBegin, we have to assume it would // be written into before CoroBegin as well. @@ -960,6 +960,12 @@ handleAlias(GEPI); } + void visitIntrinsicInst(IntrinsicInst &II) { + if (II.getIntrinsicID() != Intrinsic::lifetime_start) + return Base::visitIntrinsicInst(II); + LifetimeStarts.insert(&II); + } + void visitCallBase(CallBase &CB) { for (unsigned Op = 0, OpCount = CB.getNumArgOperands(); Op < OpCount; ++Op) if (U->get() == CB.getArgOperand(Op) && !CB.doesNotCapture(Op)) @@ -993,18 +999,40 @@ // after CoroBegin. Each entry contains the instruction and the offset in the // original Alloca. They need to be recreated after CoroBegin off the frame. DenseMap> AliasOffetMap{}; - SmallPtrSet UserBBs{}; + SmallPtrSet Users{}; + SmallPtrSet LifetimeStarts{}; bool MayWriteBeforeCoroBegin{false}; mutable llvm::Optional ShouldLiveOnFrame{}; bool computeShouldLiveOnFrame() const { + // If lifetime information is available, we check it first since it's + // more precise. We look at every pair of lifetime.start intrinsic and + // every basic block that uses the pointer to see if they cross suspension + // points. The uses cover both direct uses as well as indirect uses. + if (!LifetimeStarts.empty()) { + for (auto *I : Users) + for (auto *S : LifetimeStarts) + if (Checker.isDefinitionAcrossSuspend(*S, I)) + return true; + return false; + } + // FIXME: Ideally the isEscaped check should come at the beginning. + // However there are a few loose ends that need to be fixed first before + // we can do that. We need to make sure we are not over-conservative, so + // that the data accessed in-between await_suspend and symmetric transfer + // is always put on the stack, and also data accessed after coro.end is + // always put on the stack (esp the return object). To fix that, we need + // to: + // 1) Potentially treat sret as nocapture in calls + // 2) Special handle the return object and put it on the stack + // 3) Utilize lifetime.end intrinsic if (PI.isEscaped()) return true; - for (auto *BB1 : UserBBs) - for (auto *BB2 : UserBBs) - if (Checker.hasPathCrossingSuspendPoint(BB1, BB2)) + for (auto *U1 : Users) + for (auto *U2 : Users) + if (Checker.isDefinitionAcrossSuspend(*U1, U2)) return true; return false; @@ -2094,24 +2122,6 @@ static void collectFrameAllocas(Function &F, coro::Shape &Shape, const SuspendCrossingInfo &Checker, SmallVectorImpl &Allocas) { - // Collect lifetime.start info for each alloca. - using LifetimeStart = SmallPtrSet; - llvm::DenseMap> LifetimeMap; - for (Instruction &I : instructions(F)) { - auto *II = dyn_cast(&I); - if (!II || II->getIntrinsicID() != Intrinsic::lifetime_start) - continue; - - if (auto *OpInst = dyn_cast(II->getOperand(1))) { - if (auto *AI = dyn_cast(OpInst->stripPointerCasts())) { - - if (LifetimeMap.find(AI) == LifetimeMap.end()) - LifetimeMap[AI] = std::make_unique(); - LifetimeMap[AI]->insert(isa(OpInst) ? II : OpInst); - } - } - } - for (Instruction &I : instructions(F)) { auto *AI = dyn_cast(&I); if (!AI) @@ -2121,23 +2131,6 @@ if (AI == Shape.SwitchLowering.PromiseAlloca) { continue; } - bool ShouldLiveOnFrame = false; - auto Iter = LifetimeMap.find(AI); - if (Iter != LifetimeMap.end()) { - // Check against lifetime.start if the instruction has the info. - for (User *U : I.users()) { - for (auto *S : *Iter->second) - if ((ShouldLiveOnFrame = Checker.isDefinitionAcrossSuspend(*S, U))) - break; - if (ShouldLiveOnFrame) - break; - } - if (!ShouldLiveOnFrame) - continue; - } - // At this point, either ShouldLiveOnFrame is true or we didn't have - // lifetime information. We will need to rely on more precise pointer - // tracking. DominatorTree DT(F); AllocaUseVisitor Visitor{F.getParent()->getDataLayout(), DT, *Shape.CoroBegin, Checker}; diff --git a/llvm/test/Transforms/Coroutines/coro-alloca-07.ll b/llvm/test/Transforms/Coroutines/coro-alloca-07.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-alloca-07.ll @@ -0,0 +1,104 @@ +; Tests that CoroSplit can succesfully determine allocas should live on the frame +; if their aliases are used across suspension points through PHINode. +; RUN: opt < %s -coro-split -S | FileCheck %s +; RUN: opt < %s -passes=coro-split -S | FileCheck %s + +define i8* @f(i1 %n) "coroutine.presplit"="1" { +entry: + %x = alloca i64 + %y = alloca i64 + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) + %size = call i32 @llvm.coro.size.i32() + %alloc = call i8* @malloc(i32 %size) + %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc) + br i1 %n, label %flag_true, label %flag_false + +flag_true: + %x.alias = bitcast i64* %x to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %x.alias) + br label %merge + +flag_false: + %y.alias = bitcast i64* %y to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %y.alias) + br label %merge + +merge: + %alias_phi = phi i8* [ %x.alias, %flag_true ], [ %y.alias, %flag_false ] + store i8 1, i8* %alias_phi + %sp1 = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %sp1, label %suspend [i8 0, label %resume + i8 1, label %cleanup] +resume: + call void @print(i8* %alias_phi) + br label %cleanup + +cleanup: + %mem = call i8* @llvm.coro.free(token %id, i8* %hdl) + call void @free(i8* %mem) + br label %suspend + +suspend: + call i1 @llvm.coro.end(i8* %hdl, i1 0) + ret i8* %hdl +} + +declare i8* @llvm.coro.free(token, i8*) +declare i32 @llvm.coro.size.i32() +declare i8 @llvm.coro.suspend(token, i1) +declare void @llvm.coro.resume(i8*) +declare void @llvm.coro.destroy(i8*) + +declare token @llvm.coro.id(i32, i8*, i8*, i8*) +declare i1 @llvm.coro.alloc(token) +declare i8* @llvm.coro.begin(token, i8*) +declare i1 @llvm.coro.end(i8*, i1) + +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) + +declare void @print(i8*) +declare noalias i8* @malloc(i32) +declare void @free(i8*) + +; Verify that both x and y are put in the frame. +; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i64, i64, i8*, i1 } + +; CHECK-LABEL: @f( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ID:%.*]] = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* bitcast ([3 x void (%f.Frame*)*]* @f.resumers to i8*)) +; CHECK-NEXT: [[ALLOC:%.*]] = call i8* @malloc(i32 48) +; CHECK-NEXT: [[HDL:%.*]] = call noalias nonnull i8* @llvm.coro.begin(token [[ID]], i8* [[ALLOC]]) +; CHECK-NEXT: [[FRAMEPTR:%.*]] = bitcast i8* [[HDL]] to %f.Frame* +; CHECK-NEXT: [[RESUME_ADDR:%.*]] = getelementptr inbounds [[F_FRAME:%.*]], %f.Frame* [[FRAMEPTR]], i32 0, i32 0 +; CHECK-NEXT: store void (%f.Frame*)* @f.resume, void (%f.Frame*)** [[RESUME_ADDR]], align 8 +; CHECK-NEXT: [[DESTROY_ADDR:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 1 +; CHECK-NEXT: store void (%f.Frame*)* @f.destroy, void (%f.Frame*)** [[DESTROY_ADDR]], align 8 +; CHECK-NEXT: [[X_RELOAD_ADDR:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 2 +; CHECK-NEXT: [[Y_RELOAD_ADDR:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 3 +; CHECK-NEXT: br i1 [[N:%.*]], label [[FLAG_TRUE:%.*]], label [[FLAG_FALSE:%.*]] +; CHECK: flag_true: +; CHECK-NEXT: [[X_ALIAS:%.*]] = bitcast i64* [[X_RELOAD_ADDR]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[X_ALIAS]]) +; CHECK-NEXT: br label [[MERGE:%.*]] +; CHECK: flag_false: +; CHECK-NEXT: [[Y_ALIAS:%.*]] = bitcast i64* [[Y_RELOAD_ADDR]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[Y_ALIAS]]) +; CHECK-NEXT: br label [[MERGE]] +; CHECK: merge: +; CHECK-NEXT: [[ALIAS_PHI:%.*]] = phi i8* [ [[X_ALIAS]], [[FLAG_TRUE]] ], [ [[Y_ALIAS]], [[FLAG_FALSE]] ] +; CHECK-NEXT: [[ALIAS_PHI_SPILL_ADDR:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 4 +; CHECK-NEXT: store i8* [[ALIAS_PHI]], i8** [[ALIAS_PHI_SPILL_ADDR]], align 8 +; CHECK-NEXT: store i8 1, i8* [[ALIAS_PHI]], align 1 +; CHECK-NEXT: [[INDEX_ADDR1:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 5 +; CHECK-NEXT: store i1 false, i1* [[INDEX_ADDR1]], align 1 +; CHECK-NEXT: ret i8* [[HDL]] +; +; +; CHECK-LABEL: @f.resume( +; CHECK-NEXT: entry.resume: +; CHECK-NEXT: [[VFRAME:%.*]] = bitcast %f.Frame* [[FRAMEPTR:%.*]] to i8* +; CHECK-NEXT: [[ALIAS_PHI_RELOAD_ADDR:%.*]] = getelementptr inbounds [[F_FRAME:%.*]], %f.Frame* [[FRAMEPTR]], i32 0, i32 4 +; CHECK-NEXT: [[ALIAS_PHI_RELOAD:%.*]] = load i8*, i8** [[ALIAS_PHI_RELOAD_ADDR]], align 8 +; CHECK-NEXT: call void @print(i8* [[ALIAS_PHI_RELOAD]]) +; CHECK-NEXT: call void @free(i8* [[VFRAME]]) +; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/Coroutines/coro-alloca-08.ll b/llvm/test/Transforms/Coroutines/coro-alloca-08.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-alloca-08.ll @@ -0,0 +1,83 @@ +; RUN: opt < %s -coro-split -S | FileCheck %s +; RUN: opt < %s -passes=coro-split -S | FileCheck %s + +%"struct.std::coroutine_handle" = type { i8* } +%"struct.std::coroutine_handle.0" = type { %"struct.std::coroutine_handle" } +%"struct.lean_future::Awaiter" = type { i32, %"struct.std::coroutine_handle.0" } + +declare i8* @malloc(i64) + +%i8.array = type { [100 x i8] } +declare void @consume.i8.array(%i8.array*) + +; The lifetime of testval starts and ends before coro.suspend. Even though consume.i8.array +; might capture it, we can safely say it won't live across suspension. +define void @foo() "coroutine.presplit"="1" { +entry: + %testval = alloca %i8.array + %cast = getelementptr inbounds %i8.array, %i8.array* %testval, i64 0, i32 0, i64 0 + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) + %alloc = call i8* @malloc(i64 16) #3 + %vFrame = call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %alloc) + + call void @llvm.lifetime.start.p0i8(i64 100, i8* %cast) + call void @consume.i8.array(%i8.array* %testval) + call void @llvm.lifetime.end.p0i8(i64 100, i8* %cast) + + %save = call token @llvm.coro.save(i8* null) + %suspend = call i8 @llvm.coro.suspend(token %save, i1 false) + switch i8 %suspend, label %exit [ + i8 0, label %await.ready + i8 1, label %exit + ] +await.ready: + %StrayCoroSave = call token @llvm.coro.save(i8* null) + br label %exit +exit: + call i1 @llvm.coro.end(i8* null, i1 false) + ret void +} + +; The lifetime of testval starts after coro.suspend. So it will never live across suspension +; points. +define void @bar() "coroutine.presplit"="1" { +entry: + %testval = alloca %i8.array + %cast = getelementptr inbounds %i8.array, %i8.array* %testval, i64 0, i32 0, i64 0 + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) + %alloc = call i8* @malloc(i64 16) #3 + %vFrame = call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %alloc) + %save = call token @llvm.coro.save(i8* null) + %suspend = call i8 @llvm.coro.suspend(token %save, i1 false) + switch i8 %suspend, label %exit [ + i8 0, label %await.ready + i8 1, label %exit + ] +await.ready: + %StrayCoroSave = call token @llvm.coro.save(i8* null) + + call void @llvm.lifetime.start.p0i8(i64 100, i8* %cast) + call void @consume.i8.array(%i8.array* %testval) + call void @llvm.lifetime.end.p0i8(i64 100, i8* %cast) + + br label %exit +exit: + call i1 @llvm.coro.end(i8* null, i1 false) + ret void +} + +; Verify that for both foo and bar, testval isn't put on the frame. +; CHECK: %foo.Frame = type { void (%foo.Frame*)*, void (%foo.Frame*)*, i1 } +; CHECK: %bar.Frame = type { void (%bar.Frame*)*, void (%bar.Frame*)*, i1 } + +declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) +declare i1 @llvm.coro.alloc(token) #3 +declare i64 @llvm.coro.size.i64() #5 +declare i8* @llvm.coro.begin(token, i8* writeonly) #3 +declare token @llvm.coro.save(i8*) #3 +declare i8* @llvm.coro.frame() #5 +declare i8 @llvm.coro.suspend(token, i1) #3 +declare i8* @llvm.coro.free(token, i8* nocapture readonly) #2 +declare i1 @llvm.coro.end(i8*, i1) #3 +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #4 +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #4