diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -621,9 +621,6 @@
   FPM.addPass(InstCombinePass());
   invokePeepholeEPCallbacks(FPM, Level);
 
-  if (PTO.Coroutines)
-    FPM.addPass(CoroElidePass());
-
   for (auto &C : ScalarOptimizerLateEPCallbacks)
     C(FPM, Level);
 
@@ -963,9 +960,6 @@
   if (AttributorRun & AttributorRunOption::CGSCC)
     MainCGPipeline.addPass(AttributorCGSCCPass());
 
-  if (PTO.Coroutines)
-    MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
-
   // Now deduce any function attributes based in the current code.
   MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
 
@@ -987,6 +981,9 @@
   MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
       buildFunctionSimplificationPipeline(Level, Phase)));
 
+  if (PTO.Coroutines)
+    MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
+
   return MIWP;
 }
 
@@ -1345,8 +1342,10 @@
   // inserting redundancies into the program. This even includes SimplifyCFG.
   OptimizePM.addPass(SpeculateAroundPHIsPass());
 
-  if (PTO.Coroutines)
+  if (PTO.Coroutines) {
+    OptimizePM.addPass(CoroElidePass());
     OptimizePM.addPass(CoroCleanupPass());
+  }
 
   // Add the core optimizing pipeline.
   MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM)));
diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -1023,27 +1023,6 @@
   Builder.CreateStore(DestroyOrCleanupFn, DestroyAddr);
 }
 
-static void postSplitCleanup(Function &F) {
-  removeUnreachableBlocks(F);
-
-  // For now, we do a mandatory verification step because we don't
-  // entirely trust this pass.  Note that we don't want to add a verifier
-  // pass to FPM below because it will also verify all the global data.
-  if (verifyFunction(F, &errs()))
-    report_fatal_error("Broken function");
-
-  legacy::FunctionPassManager FPM(F.getParent());
-
-  FPM.add(createSCCPPass());
-  FPM.add(createCFGSimplificationPass());
-  FPM.add(createEarlyCSEPass());
-  FPM.add(createCFGSimplificationPass());
-
-  FPM.doInitialization();
-  FPM.run(F);
-  FPM.doFinalization();
-}
-
 // Assuming we arrived at the block NewBlock from Prev instruction, store
 // PHI's incoming values in the ResolvedValues map.
 static void
@@ -1386,9 +1365,9 @@
   auto CleanupClone = createClone(F, ".cleanup", Shape,
                                   CoroCloner::Kind::SwitchCleanup);
 
-  postSplitCleanup(*ResumeClone);
-  postSplitCleanup(*DestroyClone);
-  postSplitCleanup(*CleanupClone);
+  removeUnreachableBlocks(*ResumeClone);
+  removeUnreachableBlocks(*DestroyClone);
+  removeUnreachableBlocks(*CleanupClone);
 
   addMustTailToCoroResumes(*ResumeClone);
 
@@ -1728,7 +1707,7 @@
     return;
 
   removeCoroEnds(Shape, &CG);
-  postSplitCleanup(F);
+  removeUnreachableBlocks(F);
 
   // Update call graph and add the functions we created to the SCC.
   coro::updateCallGraph(F, Clones, CG, SCC);
@@ -1748,6 +1727,8 @@
     End->eraseFromParent();
   }
 
+  removeUnreachableBlocks(N.getFunction());
+
   if (!Clones.empty()) {
     switch (Shape.ABI) {
     case coro::ABI::Switch:
@@ -1768,11 +1749,6 @@
     // Let the CGSCC infra handle the changes to the original function.
     updateCGAndAnalysisManagerForCGSCCPass(CG, C, N, AM, UR, FAM);
   }
-
-  // Do some cleanup and let the CGSCC infra see if we've cleaned up any edges
-  // to the split functions.
-  postSplitCleanup(N.getFunction());
-  updateCGAndAnalysisManagerForFunctionPass(CG, C, N, AM, UR, FAM);
 }
 
 // When we see the coroutine the first time, we insert an indirect call to a
@@ -2016,24 +1992,17 @@
     StringRef Value = Attr.getValueAsString();
     LLVM_DEBUG(dbgs() << "CoroSplit: Processing coroutine '" << F.getName()
                       << "' state: " << Value << "\n");
-    if (Value == UNPREPARED_FOR_SPLIT) {
-      // Enqueue a second iteration of the CGSCC pipeline on this SCC.
-      UR.CWorklist.insert(&C);
-      F.addFnAttr(CORO_PRESPLIT_ATTR, PREPARED_FOR_SPLIT);
-      continue;
-    }
     F.removeFnAttr(CORO_PRESPLIT_ATTR);
 
     SmallVector<Function *, 4> Clones;
     const coro::Shape Shape = splitCoroutine(F, Clones, ReuseFrameSlot);
     updateCallGraphAfterCoroutineSplit(*N, Shape, Clones, C, CG, AM, UR, FAM);
 
-    if ((Shape.ABI == coro::ABI::Async || Shape.ABI == coro::ABI::Retcon ||
-         Shape.ABI == coro::ABI::RetconOnce) &&
-        !Shape.CoroSuspends.empty()) {
-      // Run the CGSCC pipeline on the newly split functions.
-      // All clones will be in the same RefSCC, so choose a random clone.
-      UR.RCWorklist.insert(CG.lookupRefSCC(CG.get(*Clones[0])));
+    if (!Shape.CoroSuspends.empty()) {
+      // Run the CGSCC pipeline on the original and newly split functions.
+      UR.CWorklist.insert(&C);
+      for (Function *Clone : Clones)
+        UR.CWorklist.insert(CG.lookupSCC(CG.get(*Clone)));
     }
   }
 
diff --git a/llvm/test/Transforms/Coroutines/coro-alloc-with-param-O0.ll b/llvm/test/Transforms/Coroutines/coro-alloc-with-param-O0.ll
--- a/llvm/test/Transforms/Coroutines/coro-alloc-with-param-O0.ll
+++ b/llvm/test/Transforms/Coroutines/coro-alloc-with-param-O0.ll
@@ -34,15 +34,18 @@
 
 ; See that %this is spilled into the frame
 ; CHECK-LABEL: define i8* @f_copy(i64 %this_arg)
+; CHECK:  %this.addr = alloca i64, align 8
+; CHECK:  store i64 %this_arg, i64* %this.addr, align 4
+; CHECK:  %this = load i64, i64* %this.addr, align 4
 ; CHECK:  %this.spill.addr = getelementptr inbounds %f_copy.Frame, %f_copy.Frame* %FramePtr, i32 0, i32 2
-; CHECK:  store i64 %this_arg, i64* %this.spill.addr
-; CHECK: ret i8* %hdl
+; CHECK:  store i64 %this, i64* %this.spill.addr
+; CHECK:  ret i8* %hdl
 
 ; See that %this was loaded from the frame
 ; CHECK-LABEL: @f_copy.resume(
 ; CHECK:  %this.reload = load i64, i64* %this.reload.addr
 ; CHECK:  call void @print2(i64 %this.reload)
-; CHECK: ret void
+; CHECK:  ret void
 
 declare i8* @llvm.coro.free(token, i8*)
 declare i32 @llvm.coro.size.i32()
diff --git a/llvm/test/Transforms/Coroutines/coro-alloca-01.ll b/llvm/test/Transforms/Coroutines/coro-alloca-01.ll
--- a/llvm/test/Transforms/Coroutines/coro-alloca-01.ll
+++ b/llvm/test/Transforms/Coroutines/coro-alloca-01.ll
@@ -46,8 +46,10 @@
 ; CHECK:         %x.reload.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2
 ; CHECK:         %y.reload.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 3
 ; CHECK:         %x.alias = bitcast i64* %x.reload.addr to i32*
+; CHECK:         %x.alias.merge = phi i32* [ %x.alias, %flag_true ]
 ; CHECK:         %y.alias = bitcast i64* %y.reload.addr to i32*
-; CHECK:         %alias_phi = select i1 %n, i32* %x.alias, i32* %y.alias
+; CHECK:         %y.alias.merge = phi i32* [ %y.alias, %flag_false ]
+; CHECK:         %alias_phi = phi i32* [ %x.alias.merge, %merge.from.flag_true ], [ %y.alias.merge, %merge.from.flag_false ]
 ; CHECK:         %alias_phi.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 4
 ; CHECK:         store i32* %alias_phi, i32** %alias_phi.spill.addr, align 8
 
diff --git a/llvm/test/Transforms/Coroutines/coro-alloca-04.ll b/llvm/test/Transforms/Coroutines/coro-alloca-04.ll
--- a/llvm/test/Transforms/Coroutines/coro-alloca-04.ll
+++ b/llvm/test/Transforms/Coroutines/coro-alloca-04.ll
@@ -45,9 +45,10 @@
 ; CHECK:         store void (%f.Frame*)* @f.destroy, void (%f.Frame*)** %destroy.addr
 ; CHECK-NEXT:    %0 = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2
 ; CHECK-NEXT:    %1 = bitcast i64* %0 to i8*
-; CHECK-NEXT:    %2 = bitcast i8* %1 to i32*
-; CHECK-NEXT:    %alias_phi.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 3
-; CHECK-NEXT:    store i32* %2, i32** %alias_phi.spill.addr
+; CHECK-NEXT:    %2 = getelementptr i8, i8* %1, i64 0
+; CHECK-NEXT:    %3 = bitcast i8* %2 to i32*
+; CHECK:         %alias_phi.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 3
+; CHECK-NEXT:    store i32* %3, i32** %alias_phi.spill.addr
 
 declare i8* @llvm.coro.free(token, i8*)
 declare i32 @llvm.coro.size.i32()
diff --git a/llvm/test/Transforms/Coroutines/coro-alloca-05.ll b/llvm/test/Transforms/Coroutines/coro-alloca-05.ll
--- a/llvm/test/Transforms/Coroutines/coro-alloca-05.ll
+++ b/llvm/test/Transforms/Coroutines/coro-alloca-05.ll
@@ -32,10 +32,10 @@
 ; CHECK-NEXT:  entry.resume:
 ; CHECK-NEXT:    [[VFRAME:%.*]] = bitcast %f.Frame* [[FRAMEPTR:%.*]] to i8*
 ; CHECK-NEXT:    [[X:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[X_VALUE:%.*]] = load i32, i32* [[X]], align 4
+; CHECK:         [[X_VALUE:%.*]] = load i32, i32* [[X]], align 4
 ; CHECK-NEXT:    call void @print(i32 [[X_VALUE]])
-; CHECK-NEXT:    call void @free(i8* [[VFRAME]])
-; CHECK-NEXT:    ret void
+; CHECK:         call void @free(i8* [[VFRAME]])
+; CHECK:         ret void
 
 declare i8* @llvm.coro.free(token, i8*)
 declare i32 @llvm.coro.size.i32()
diff --git a/llvm/test/Transforms/Coroutines/restart-trigger.ll b/llvm/test/Transforms/Coroutines/restart-trigger.ll
--- a/llvm/test/Transforms/Coroutines/restart-trigger.ll
+++ b/llvm/test/Transforms/Coroutines/restart-trigger.ll
@@ -6,11 +6,14 @@
 ; RUN: opt < %s -S -O1 -enable-coroutines -debug-only=coro-split 2>&1 | FileCheck %s
 ; The following tests use the new pass manager, and verify that the coroutine
 ; passes re-run the CGSCC pipeline.
-; RUN: opt < %s -S -passes='default<O0>' -enable-coroutines -debug-only=coro-split 2>&1 | FileCheck %s
-; RUN: opt < %s -S -passes='default<O1>' -enable-coroutines -debug-only=coro-split 2>&1 | FileCheck %s
+; RUN: opt < %s -S -passes='default<O0>' -enable-coroutines -debug-only=coro-split 2>&1 | FileCheck --check-prefix=CHECK-NEWPM %s
+; RUN: opt < %s -S -passes='default<O1>' -enable-coroutines -debug-only=coro-split 2>&1 | FileCheck --check-prefix=CHECK-NEWPM %s
 
 ; CHECK:      CoroSplit: Processing coroutine 'f' state: 0
 ; CHECK-NEXT: CoroSplit: Processing coroutine 'f' state: 1
+; CHECK-NEWPM:      CoroSplit: Processing coroutine 'f' state: 0
+; CHECK-NEWPM-NOT:  CoroSplit: Processing coroutine 'f' state: 1
+
 
 define void @f() {
   %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)