Index: lib/Transforms/Coroutines/CoroEarly.cpp
===================================================================
--- lib/Transforms/Coroutines/CoroEarly.cpp
+++ lib/Transforms/Coroutines/CoroEarly.cpp
@@ -13,10 +13,12 @@
 
 #include "CoroInternal.h"
 #include "llvm/IR/CallSite.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
 
 using namespace llvm;
 
@@ -38,7 +40,7 @@
         AnyResumeFnPtrTy(FunctionType::get(Type::getVoidTy(Context), Int8Ptr,
                                            /*isVarArg=*/false)
                              ->getPointerTo()) {}
-  bool lowerEarlyIntrinsics(Function &F);
+  bool lowerEarlyIntrinsics(Function &F, DominatorTree &DT);
 };
 }
 
@@ -103,6 +105,43 @@
   II->eraseFromParent();
 }
 
+static bool allocaOnlyStoresConstantInts(AllocaInst *AI) {
+  for (User *U : AI->users())
+    if (auto *SI = dyn_cast<StoreInst>(U))
+      if (!isa<ConstantInt>(SI->getValueOperand()))
+        return false;
+
+  return true;
+}
+
+// Allocas that follow the pattern of cleanup.dest.slot, namely, they are
+// integer allocas that don't escape and only store constants. We don't want
+// them to be saved into the coroutine frame (as some of the cleanup code will
+// access them after coroutine frame destroyed).
+//
+// PromiseAlloca should be always stored in the coroutine frame even if its
+// usage pattern only consists of constant stores.
+static void
+promoteCleanupSlotLikeAllocasToRegisters(Function &F, DominatorTree& DT,
+                                         AllocaInst *PromiseAlloca) {
+  SmallVector<AllocaInst *, 2> Allocas;
+  BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function
+
+  // Find allocas that are safe to promote, by looking at all non-terminator
+  // instructions in the entry node.
+  for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
+    if (auto *AI = dyn_cast<AllocaInst>(I))
+      if (AI != PromiseAlloca && AI->getAllocatedType()->isIntegerTy() &&
+          isAllocaPromotable(AI) && allocaOnlyStoresConstantInts(AI))
+        Allocas.push_back(AI);
+
+  // None found, nothing to do.
+  if (Allocas.empty())
+    return;
+
+  PromoteMemToReg(Allocas, DT);
+}
+
 // Prior to CoroSplit, calls to coro.begin needs to be marked as NoDuplicate,
 // as CoroSplit assumes there is exactly one coro.begin. After CoroSplit,
 // NoDuplicate attribute will be removed from coro.begin otherwise, it will
@@ -113,7 +152,7 @@
       CB->setCannotDuplicate();
 }
 
-bool Lowerer::lowerEarlyIntrinsics(Function &F) {
+bool Lowerer::lowerEarlyIntrinsics(Function &F, DominatorTree &DT) {
   bool Changed = false;
   CoroIdInst *CoroId = nullptr;
   SmallVector<CoroFreeInst *, 4> CoroFrees;
@@ -147,6 +186,11 @@
             setCannotDuplicate(CII);
             CII->setCoroutineSelf();
             CoroId = cast<CoroIdInst>(&I);
+            // Eliminate cleanup.slot-like allocas. Required for O0, since
+            // cleanup slot allocas are accessed (potentially) after coroutine
+            // frame is destroyed.
+            promoteCleanupSlotLikeAllocasToRegisters(F, DT,
+                                                     CoroId->getPromise());
           }
         }
         break;
@@ -204,11 +248,13 @@
     if (!L)
       return false;
 
-    return L->lowerEarlyIntrinsics(F);
+    auto &DTWP = getAnalysis<DominatorTreeWrapperPass>();
+    return L->lowerEarlyIntrinsics(F, DTWP.getDomTree());
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
+    AU.addRequired<DominatorTreeWrapperPass>();
   }
   StringRef getPassName() const override {
     return "Lower early coroutine intrinsics";
@@ -217,7 +263,10 @@
 }
 
 char CoroEarly::ID = 0;
-INITIALIZE_PASS(CoroEarly, "coro-early", "Lower early coroutine intrinsics",
-                false, false)
+INITIALIZE_PASS_BEGIN(CoroEarly, "coro-early",
+                      "Lower early coroutine intrinsics", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(CoroEarly, "coro-early", "Lower early coroutine intrinsics",
+                    false, false)
 
 Pass *llvm::createCoroEarlyPass() { return new CoroEarly(); }
Index: test/Transforms/Coroutines/coro-cleanup-dest-promote.ll
===================================================================
--- /dev/null
+++ test/Transforms/Coroutines/coro-cleanup-dest-promote.ll
@@ -0,0 +1,119 @@
+; Check that we do not spill cleanup-slot like allocas.
+; RUN: opt < %s -coro-early -S | FileCheck %s
+
+define i8* @happy.case() {
+entry:
+  %slot = alloca i32
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call i8* @malloc(i32 %size)
+  %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
+
+  store i32 1, i32* %slot
+
+  %tok = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %tok, label %suspend [i8 0, label %resume
+                                i8 1, label %cleanup]
+resume:
+  store i32 2, i32* %slot
+  br label %cleanup
+
+cleanup:
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  %x = load i32, i32* %slot
+  call void @print.i32(i32 %x)
+  br label %suspend
+suspend:
+  call i1 @llvm.coro.end(i8* %hdl, i1 0)
+  ret i8* %hdl
+}
+
+; See that we got a PHI with constants in the happy case
+; CHECK-LABEL: define i8* @happy.case(
+; CHECK:         %[[SLOT:.+]] = phi i32 [ 1, %entry ], [ 2, %resume ]
+; CHECK:         call void @free(i8* {{.+}})
+; CHECK:         call void @print.i32(i32 %[[SLOT]])
+; CHECK:         ret i8*
+
+define i8* @escaped.case() {
+entry:
+  %slot = alloca i32
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call i8* @malloc(i32 %size)
+  %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
+
+  store i32 1, i32* %slot
+  call void @escape(i32* %slot)
+
+  %tok = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %tok, label %suspend [i8 0, label %resume
+                                i8 1, label %cleanup]
+resume:
+  store i32 2, i32* %slot
+  br label %cleanup
+
+cleanup:
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  %x = load i32, i32* %slot
+  call void @print.i32(i32 %x)
+  call void @free(i8* %mem)
+  br label %suspend
+suspend:
+  call i1 @llvm.coro.end(i8* %hdl, i1 0)
+  ret i8* %hdl
+}
+
+; See that we kept the load when the alloca escaped
+; CHECK-LABEL: define i8* @escaped.case(
+; CHECK:         %[[SLOT:.+]] = load i32, i32* %{{.+}}
+; CHECK:         call void @print.i32(i32 %[[SLOT]])
+; CHECK:         ret i8*
+
+define i8* @promise.case() {
+entry:
+  %slot = alloca i32
+  %pv = bitcast i32* %slot to i8*
+  %id = call token @llvm.coro.id(i32 0, i8* %pv, i8* null, i8* null)
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call i8* @malloc(i32 %size)
+  %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
+
+  %tok = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %tok, label %suspend [i8 0, label %resume
+                                i8 1, label %cleanup]
+resume:
+  store i32 2, i32* %slot
+  br label %cleanup
+
+cleanup:
+  %x = load i32, i32* %slot
+  call void @print.i32(i32 %x)
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  br label %suspend
+suspend:
+  call i1 @llvm.coro.end(i8* %hdl, i1 0)
+  ret i8* %hdl
+}
+
+; See that we kept the load when the alloca is from promise
+; CHECK-LABEL: define i8* @promise.case(
+; CHECK:         %[[SLOT:.+]] = load i32, i32* %{{.+}}
+; CHECK:         call void @print.i32(i32 %[[SLOT]])
+; CHECK:         ret i8*
+
+declare i8* @llvm.coro.free(token, i8*)
+declare i32 @llvm.coro.size.i32()
+declare i8  @llvm.coro.suspend(token, i1)
+
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
+declare i1 @llvm.coro.alloc(token)
+declare i8* @llvm.coro.begin(token, i8*)
+declare i1 @llvm.coro.end(i8*, i1)
+
+declare noalias i8* @malloc(i32)
+declare void @print.i32(i32)
+declare void @escape(i32*)
+declare void @free(i8*)