diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -621,9 +621,6 @@ FPM.addPass(InstCombinePass()); invokePeepholeEPCallbacks(FPM, Level); - if (PTO.Coroutines) - FPM.addPass(CoroElidePass()); - for (auto &C : ScalarOptimizerLateEPCallbacks) C(FPM, Level); @@ -963,9 +960,6 @@ if (AttributorRun & AttributorRunOption::CGSCC) MainCGPipeline.addPass(AttributorCGSCCPass()); - if (PTO.Coroutines) - MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0)); - // Now deduce any function attributes based in the current code. MainCGPipeline.addPass(PostOrderFunctionAttrsPass()); @@ -987,6 +981,9 @@ MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor( buildFunctionSimplificationPipeline(Level, Phase))); + if (PTO.Coroutines) + MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0)); + return MIWP; } @@ -1345,8 +1342,10 @@ // inserting redundancies into the program. This even includes SimplifyCFG. OptimizePM.addPass(SpeculateAroundPHIsPass()); - if (PTO.Coroutines) + if (PTO.Coroutines) { + OptimizePM.addPass(CoroElidePass()); OptimizePM.addPass(CoroCleanupPass()); + } // Add the core optimizing pipeline. MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM))); diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -1023,27 +1023,6 @@ Builder.CreateStore(DestroyOrCleanupFn, DestroyAddr); } -static void postSplitCleanup(Function &F) { - removeUnreachableBlocks(F); - - // For now, we do a mandatory verification step because we don't - // entirely trust this pass. Note that we don't want to add a verifier - // pass to FPM below because it will also verify all the global data. - if (verifyFunction(F, &errs())) - report_fatal_error("Broken function"); - - legacy::FunctionPassManager FPM(F.getParent()); - - FPM.add(createSCCPPass()); - FPM.add(createCFGSimplificationPass()); - FPM.add(createEarlyCSEPass()); - FPM.add(createCFGSimplificationPass()); - - FPM.doInitialization(); - FPM.run(F); - FPM.doFinalization(); -} - // Assuming we arrived at the block NewBlock from Prev instruction, store // PHI's incoming values in the ResolvedValues map. static void @@ -1386,9 +1365,9 @@ auto CleanupClone = createClone(F, ".cleanup", Shape, CoroCloner::Kind::SwitchCleanup); - postSplitCleanup(*ResumeClone); - postSplitCleanup(*DestroyClone); - postSplitCleanup(*CleanupClone); + removeUnreachableBlocks(*ResumeClone); + removeUnreachableBlocks(*DestroyClone); + removeUnreachableBlocks(*CleanupClone); addMustTailToCoroResumes(*ResumeClone); @@ -1728,7 +1707,7 @@ return; removeCoroEnds(Shape, &CG); - postSplitCleanup(F); + removeUnreachableBlocks(F); // Update call graph and add the functions we created to the SCC. coro::updateCallGraph(F, Clones, CG, SCC); @@ -1748,6 +1727,8 @@ End->eraseFromParent(); } + removeUnreachableBlocks(N.getFunction()); + if (!Clones.empty()) { switch (Shape.ABI) { case coro::ABI::Switch: @@ -1768,11 +1749,6 @@ // Let the CGSCC infra handle the changes to the original function. updateCGAndAnalysisManagerForCGSCCPass(CG, C, N, AM, UR, FAM); } - - // Do some cleanup and let the CGSCC infra see if we've cleaned up any edges - // to the split functions. - postSplitCleanup(N.getFunction()); - updateCGAndAnalysisManagerForFunctionPass(CG, C, N, AM, UR, FAM); } // When we see the coroutine the first time, we insert an indirect call to a @@ -2016,24 +1992,17 @@ StringRef Value = Attr.getValueAsString(); LLVM_DEBUG(dbgs() << "CoroSplit: Processing coroutine '" << F.getName() << "' state: " << Value << "\n"); - if (Value == UNPREPARED_FOR_SPLIT) { - // Enqueue a second iteration of the CGSCC pipeline on this SCC. - UR.CWorklist.insert(&C); - F.addFnAttr(CORO_PRESPLIT_ATTR, PREPARED_FOR_SPLIT); - continue; - } F.removeFnAttr(CORO_PRESPLIT_ATTR); SmallVector Clones; const coro::Shape Shape = splitCoroutine(F, Clones, ReuseFrameSlot); updateCallGraphAfterCoroutineSplit(*N, Shape, Clones, C, CG, AM, UR, FAM); - if ((Shape.ABI == coro::ABI::Async || Shape.ABI == coro::ABI::Retcon || - Shape.ABI == coro::ABI::RetconOnce) && - !Shape.CoroSuspends.empty()) { - // Run the CGSCC pipeline on the newly split functions. - // All clones will be in the same RefSCC, so choose a random clone. - UR.RCWorklist.insert(CG.lookupRefSCC(CG.get(*Clones[0]))); + if (!Shape.CoroSuspends.empty()) { + // Run the CGSCC pipeline on the original and newly split functions. + UR.CWorklist.insert(&C); + for (Function *Clone : Clones) + UR.CWorklist.insert(CG.lookupSCC(CG.get(*Clone))); } } diff --git a/llvm/test/Transforms/Coroutines/coro-alloc-with-param-O0.ll b/llvm/test/Transforms/Coroutines/coro-alloc-with-param-O0.ll --- a/llvm/test/Transforms/Coroutines/coro-alloc-with-param-O0.ll +++ b/llvm/test/Transforms/Coroutines/coro-alloc-with-param-O0.ll @@ -34,15 +34,18 @@ ; See that %this is spilled into the frame ; CHECK-LABEL: define i8* @f_copy(i64 %this_arg) +; CHECK: %this.addr = alloca i64, align 8 +; CHECK: store i64 %this_arg, i64* %this.addr, align 4 +; CHECK: %this = load i64, i64* %this.addr, align 4 ; CHECK: %this.spill.addr = getelementptr inbounds %f_copy.Frame, %f_copy.Frame* %FramePtr, i32 0, i32 2 -; CHECK: store i64 %this_arg, i64* %this.spill.addr -; CHECK: ret i8* %hdl +; CHECK: store i64 %this, i64* %this.spill.addr +; CHECK: ret i8* %hdl ; See that %this was loaded from the frame ; CHECK-LABEL: @f_copy.resume( ; CHECK: %this.reload = load i64, i64* %this.reload.addr ; CHECK: call void @print2(i64 %this.reload) -; CHECK: ret void +; CHECK: ret void declare i8* @llvm.coro.free(token, i8*) declare i32 @llvm.coro.size.i32() diff --git a/llvm/test/Transforms/Coroutines/coro-alloca-01.ll b/llvm/test/Transforms/Coroutines/coro-alloca-01.ll --- a/llvm/test/Transforms/Coroutines/coro-alloca-01.ll +++ b/llvm/test/Transforms/Coroutines/coro-alloca-01.ll @@ -46,8 +46,10 @@ ; CHECK: %x.reload.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2 ; CHECK: %y.reload.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 3 ; CHECK: %x.alias = bitcast i64* %x.reload.addr to i32* +; CHECK: %x.alias.merge = phi i32* [ %x.alias, %flag_true ] ; CHECK: %y.alias = bitcast i64* %y.reload.addr to i32* -; CHECK: %alias_phi = select i1 %n, i32* %x.alias, i32* %y.alias +; CHECK: %y.alias.merge = phi i32* [ %y.alias, %flag_false ] +; CHECK: %alias_phi = phi i32* [ %x.alias.merge, %merge.from.flag_true ], [ %y.alias.merge, %merge.from.flag_false ] ; CHECK: %alias_phi.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 4 ; CHECK: store i32* %alias_phi, i32** %alias_phi.spill.addr, align 8 diff --git a/llvm/test/Transforms/Coroutines/coro-alloca-04.ll b/llvm/test/Transforms/Coroutines/coro-alloca-04.ll --- a/llvm/test/Transforms/Coroutines/coro-alloca-04.ll +++ b/llvm/test/Transforms/Coroutines/coro-alloca-04.ll @@ -45,9 +45,10 @@ ; CHECK: store void (%f.Frame*)* @f.destroy, void (%f.Frame*)** %destroy.addr ; CHECK-NEXT: %0 = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2 ; CHECK-NEXT: %1 = bitcast i64* %0 to i8* -; CHECK-NEXT: %2 = bitcast i8* %1 to i32* -; CHECK-NEXT: %alias_phi.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 3 -; CHECK-NEXT: store i32* %2, i32** %alias_phi.spill.addr +; CHECK-NEXT: %2 = getelementptr i8, i8* %1, i64 0 +; CHECK-NEXT: %3 = bitcast i8* %2 to i32* +; CHECK: %alias_phi.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 3 +; CHECK-NEXT: store i32* %3, i32** %alias_phi.spill.addr declare i8* @llvm.coro.free(token, i8*) declare i32 @llvm.coro.size.i32() diff --git a/llvm/test/Transforms/Coroutines/coro-alloca-05.ll b/llvm/test/Transforms/Coroutines/coro-alloca-05.ll --- a/llvm/test/Transforms/Coroutines/coro-alloca-05.ll +++ b/llvm/test/Transforms/Coroutines/coro-alloca-05.ll @@ -32,10 +32,10 @@ ; CHECK-NEXT: entry.resume: ; CHECK-NEXT: [[VFRAME:%.*]] = bitcast %f.Frame* [[FRAMEPTR:%.*]] to i8* ; CHECK-NEXT: [[X:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[X_VALUE:%.*]] = load i32, i32* [[X]], align 4 +; CHECK: [[X_VALUE:%.*]] = load i32, i32* [[X]], align 4 ; CHECK-NEXT: call void @print(i32 [[X_VALUE]]) -; CHECK-NEXT: call void @free(i8* [[VFRAME]]) -; CHECK-NEXT: ret void +; CHECK: call void @free(i8* [[VFRAME]]) +; CHECK: ret void declare i8* @llvm.coro.free(token, i8*) declare i32 @llvm.coro.size.i32() diff --git a/llvm/test/Transforms/Coroutines/restart-trigger.ll b/llvm/test/Transforms/Coroutines/restart-trigger.ll --- a/llvm/test/Transforms/Coroutines/restart-trigger.ll +++ b/llvm/test/Transforms/Coroutines/restart-trigger.ll @@ -6,11 +6,14 @@ ; RUN: opt < %s -S -O1 -enable-coroutines -debug-only=coro-split 2>&1 | FileCheck %s ; The following tests use the new pass manager, and verify that the coroutine ; passes re-run the CGSCC pipeline. -; RUN: opt < %s -S -passes='default' -enable-coroutines -debug-only=coro-split 2>&1 | FileCheck %s -; RUN: opt < %s -S -passes='default' -enable-coroutines -debug-only=coro-split 2>&1 | FileCheck %s +; RUN: opt < %s -S -passes='default' -enable-coroutines -debug-only=coro-split 2>&1 | FileCheck --check-prefix=CHECK-NEWPM %s +; RUN: opt < %s -S -passes='default' -enable-coroutines -debug-only=coro-split 2>&1 | FileCheck --check-prefix=CHECK-NEWPM %s ; CHECK: CoroSplit: Processing coroutine 'f' state: 0 ; CHECK-NEXT: CoroSplit: Processing coroutine 'f' state: 1 +; CHECK-NEWPM: CoroSplit: Processing coroutine 'f' state: 0 +; CHECK-NEWPM-NOT: CoroSplit: Processing coroutine 'f' state: 1 + define void @f() { %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)