diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -359,6 +359,23 @@ std::unique_ptr MSSAU; std::unique_ptr Flags; + // Don't sink stores from loops with coroutine suspend instructions. + // There are two reasons for this: + // 1. LICM would sink instructions into the default destination of + // the coroutine switch. The default destination of the switch is to special + // handle the case where the coroutine is destroyed. No instruction can be + // sunk there. + // 2. In most cases LICM sinks memory operations. In the case of coroutine, + // sinking memory operation out of the loop does not improve performance + // since coroutien needs to get data from the frame anyway. In fact LICM + // would hurt coroutine performance since it adds more entries to the frame. + bool HasCoroSuspendInst = llvm::any_of(L->getBlocks(), [](BasicBlock *BB) { + return llvm::any_of(*BB, [](Instruction &I) { + IntrinsicInst *II = dyn_cast(&I); + return II && II->getIntrinsicID() == Intrinsic::coro_suspend; + }); + }); + if (!MSSA) { LLVM_DEBUG(dbgs() << "LICM: Using Alias Set Tracker.\n"); CurAST = collectAliasInfoForLoop(L, LI, AA); @@ -405,7 +422,7 @@ // preheader for SSA updater, so also avoid sinking when no preheader // is available. if (!DisablePromotion && Preheader && L->hasDedicatedExits() && - !Flags->tooManyMemoryAccesses()) { + !Flags->tooManyMemoryAccesses() && !HasCoroSuspendInst) { // Figure out the loop exits and their insertion points SmallVector ExitBlocks; L->getUniqueExitBlocks(ExitBlocks); diff --git a/llvm/test/Transforms/LICM/sink-with-coroutine.ll b/llvm/test/Transforms/LICM/sink-with-coroutine.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LICM/sink-with-coroutine.ll @@ -0,0 +1,53 @@ +; RUN: opt -S < %s -passes=licm | FileCheck %s + +; LICM across a @coro.suspend. + +define i64 @licm(i64 %n) #0 { +; CHECK-LABEL: @licm( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P:%.*]] = alloca i64, align 8 +; CHECK-NEXT: br label [[BB0:%.*]] +; CHECK: bb0: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[BB0]] ], [ [[T5:%.*]], [[AWAIT_READY:%.*]] ] +; CHECK-NEXT: [[T5]] = add i64 [[I]], 1 +; CHECK-NEXT: [[SUSPEND:%.*]] = call i8 @llvm.coro.suspend(token none, i1 false) +; CHECK-NEXT: switch i8 [[SUSPEND]], label [[BB2:%.*]] [ +; CHECK-NEXT: i8 0, label [[AWAIT_READY]] +; CHECK-NEXT: ] +; CHECK: await.ready: +; CHECK-NEXT: store i64 1, i64* [[P]], align 4 +; CHECK-NEXT: [[T6:%.*]] = icmp ult i64 [[T5]], [[N:%.*]] +; CHECK-NEXT: br i1 [[T6]], label [[LOOP]], label [[BB2]] +; CHECK: bb2: +; CHECK-NEXT: [[RES:%.*]] = call i1 @llvm.coro.end(i8* null, i1 false) +; CHECK-NEXT: ret i64 0 +; +entry: + %p = alloca i64 + br label %bb0 + +bb0: + br label %loop + +loop: + %i = phi i64 [ 0, %bb0 ], [ %t5, %await.ready ] + %t5 = add i64 %i, 1 + %suspend = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %suspend, label %bb2 [ + i8 0, label %await.ready + ] + +await.ready: + store i64 1, i64* %p + %t6 = icmp ult i64 %t5, %n + br i1 %t6, label %loop, label %bb2 + +bb2: + %res = call i1 @llvm.coro.end(i8* null, i1 false) + ret i64 0 +} + +declare i8 @llvm.coro.suspend(token, i1) +declare i1 @llvm.coro.end(i8*, i1)