diff --git a/llvm/docs/Coroutines.rst b/llvm/docs/Coroutines.rst --- a/llvm/docs/Coroutines.rst +++ b/llvm/docs/Coroutines.rst @@ -1760,6 +1760,14 @@ Areas Requiring Attention ========================= +#. When coro.suspend returns -1, the coroutine is suspended, and it's possible + that the coroutine has already been destroyed (hence the frame has been freed). + We cannot access anything on the frame on the suspend path. + However there is nothing that prevents the compiler from moving instructions + along that path (e.g. LICM), which can lead to use-after-free. At the moment + we disabled LICM for loops that have coro.suspend, but the general problem still + exists and requires a general solution. + #. Take advantage of the lifetime intrinsics for the data that goes into the coroutine frame. Leave lifetime intrinsics as is for the data that stays in allocas. diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -362,6 +362,22 @@ std::unique_ptr MSSAU; std::unique_ptr Flags; + // Don't sink stores from loops with coroutine suspend instructions. + // LICM would sink instructions into the default destination of + // the coroutine switch. The default destination of the switch is to + // handle the case where the coroutine is suspended, by which point the + // coroutine frame may have been destroyed. No instruction can be sunk there. + // FIXME: This would unfortunately hurt the performance of coroutines, however + // there is currently no general solution for this. Similar issues could also + // potentially happen in other passes where instructions are being moved + // across that edge. + bool HasCoroSuspendInst = llvm::any_of(L->getBlocks(), [](BasicBlock *BB) { + return llvm::any_of(*BB, [](Instruction &I) { + IntrinsicInst *II = dyn_cast(&I); + return II && II->getIntrinsicID() == Intrinsic::coro_suspend; + }); + }); + if (!MSSA) { LLVM_DEBUG(dbgs() << "LICM: Using Alias Set Tracker.\n"); CurAST = collectAliasInfoForLoop(L, LI, AA); @@ -408,7 +424,7 @@ // preheader for SSA updater, so also avoid sinking when no preheader // is available. if (!DisablePromotion && Preheader && L->hasDedicatedExits() && - !Flags->tooManyMemoryAccesses()) { + !Flags->tooManyMemoryAccesses() && !HasCoroSuspendInst) { // Figure out the loop exits and their insertion points SmallVector ExitBlocks; L->getUniqueExitBlocks(ExitBlocks); diff --git a/llvm/test/Transforms/Coroutines/ArgAddr.ll b/llvm/test/Transforms/Coroutines/ArgAddr.ll --- a/llvm/test/Transforms/Coroutines/ArgAddr.ll +++ b/llvm/test/Transforms/Coroutines/ArgAddr.ll @@ -1,9 +1,25 @@ ; Need to move users of allocas that were moved into the coroutine frame after ; coro.begin. -; RUN: opt < %s -preserve-alignment-assumptions-during-inlining=false -O2 -enable-coroutines -S | FileCheck %s -; RUN: opt < %s -preserve-alignment-assumptions-during-inlining=false -aa-pipeline=basic-aa -passes='default' -enable-coroutines -S | FileCheck %s +; RUN: opt < %s -coro-split -S | FileCheck %s +; RUN: opt < %s -passes=coro-split -S | FileCheck %s -define nonnull i8* @f(i32 %n) { +define nonnull i8* @f(i32 %n) "coroutine.presplit"="1" { +; CHECK-LABEL: @f( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ID:%.*]] = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* bitcast ([3 x void (%f.Frame*)*]* @f.resumers to i8*)) +; CHECK-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[N:%.*]], i32* [[N_ADDR]], align 4 +; CHECK-NEXT: [[CALL:%.*]] = tail call i8* @malloc(i32 24) +; CHECK-NEXT: [[TMP0:%.*]] = tail call noalias nonnull i8* @llvm.coro.begin(token [[ID]], i8* [[CALL]]) +; CHECK-NEXT: [[FRAMEPTR:%.*]] = bitcast i8* [[TMP0]] to %f.Frame* +; CHECK-NEXT: [[RESUME_ADDR:%.*]] = getelementptr inbounds [[F_FRAME:%.*]], %f.Frame* [[FRAMEPTR]], i32 0, i32 0 +; CHECK-NEXT: store void (%f.Frame*)* @f.resume, void (%f.Frame*)** [[RESUME_ADDR]], align 8 +; CHECK-NEXT: [[DESTROY_ADDR:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 1 +; CHECK-NEXT: store void (%f.Frame*)* @f.destroy, void (%f.Frame*)** [[DESTROY_ADDR]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 2 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +; entry: %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null); %n.addr = alloca i32 @@ -23,8 +39,8 @@ %4 = call i8 @llvm.coro.suspend(token none, i1 false) %conv = sext i8 %4 to i32 switch i32 %conv, label %coro_Suspend [ - i32 0, label %for.cond - i32 1, label %coro_Cleanup + i32 0, label %for.cond + i32 1, label %coro_Cleanup ] coro_Cleanup: @@ -45,24 +61,6 @@ call void @llvm.coro.resume(i8* %hdl) call void @llvm.coro.destroy(i8* %hdl) ret i32 0 -; CHECK: call void @ctor -; CHECK-NEXT: %dec1.spill.addr.i = getelementptr inbounds i8, i8* %call.i, i64 20 -; CHECK-NEXT: bitcast i8* %dec1.spill.addr.i to i32* -; CHECK-NEXT: store i32 4 -; CHECK-NEXT: call void @print(i32 4) -; CHECK-NEXT: %index.addr12.i = getelementptr inbounds i8, i8* %call.i, i64 24 -; CHECK-NEXT: bitcast i8* %index.addr12.i to i1* -; CHECK-NEXT: store i1 false -; CHECK-NEXT: store i32 3 -; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl -; CHECK-NEXT: store i32 3 -; CHECK-NEXT: call void @print(i32 3) -; CHECK-NEXT: store i1 false -; CHECK-NEXT: store i32 2 -; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl -; CHECK-NEXT: store i32 2 -; CHECK-NEXT: call void @print(i32 2) -; CHECK: ret i32 0 } declare i8* @malloc(i32) diff --git a/llvm/test/Transforms/LICM/sink-with-coroutine.ll b/llvm/test/Transforms/LICM/sink-with-coroutine.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LICM/sink-with-coroutine.ll @@ -0,0 +1,52 @@ +; Verifies that LICM is disabled for loops that contains coro.suspend. +; RUN: opt -S < %s -passes=licm | FileCheck %s + +define i64 @licm(i64 %n) #0 { +; CHECK-LABEL: @licm( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P:%.*]] = alloca i64, align 8 +; CHECK-NEXT: br label [[BB0:%.*]] +; CHECK: bb0: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[BB0]] ], [ [[T5:%.*]], [[AWAIT_READY:%.*]] ] +; CHECK-NEXT: [[T5]] = add i64 [[I]], 1 +; CHECK-NEXT: [[SUSPEND:%.*]] = call i8 @llvm.coro.suspend(token none, i1 false) +; CHECK-NEXT: switch i8 [[SUSPEND]], label [[BB2:%.*]] [ +; CHECK-NEXT: i8 0, label [[AWAIT_READY]] +; CHECK-NEXT: ] +; CHECK: await.ready: +; CHECK-NEXT: store i64 1, i64* [[P]], align 4 +; CHECK-NEXT: [[T6:%.*]] = icmp ult i64 [[T5]], [[N:%.*]] +; CHECK-NEXT: br i1 [[T6]], label [[LOOP]], label [[BB2]] +; CHECK: bb2: +; CHECK-NEXT: [[RES:%.*]] = call i1 @llvm.coro.end(i8* null, i1 false) +; CHECK-NEXT: ret i64 0 +; +entry: + %p = alloca i64 + br label %bb0 + +bb0: + br label %loop + +loop: + %i = phi i64 [ 0, %bb0 ], [ %t5, %await.ready ] + %t5 = add i64 %i, 1 + %suspend = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %suspend, label %bb2 [ + i8 0, label %await.ready + ] + +await.ready: + store i64 1, i64* %p + %t6 = icmp ult i64 %t5, %n + br i1 %t6, label %loop, label %bb2 + +bb2: + %res = call i1 @llvm.coro.end(i8* null, i1 false) + ret i64 0 +} + +declare i8 @llvm.coro.suspend(token, i1) +declare i1 @llvm.coro.end(i8*, i1)