Index: llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -1010,6 +1010,25 @@
   return true;
 }
 
+class ExpandedValuesCleaner {
+
+  SCEVExpander &Expander;
+  TargetLibraryInfo *TLI;
+  SmallVector<Value *, 4> ExpandedValues;
+
+public:
+  ExpandedValuesCleaner(SCEVExpander &Expander, TargetLibraryInfo *TLI)
+      : Expander(Expander), TLI(TLI) {}
+
+  void add(Value *V) { ExpandedValues.push_back(V); }
+
+  void clean() {
+    Expander.clear();
+    for (auto *V : ExpandedValues)
+      RecursivelyDeleteTriviallyDeadInstructions(V, TLI);
+  }
+};
+
 /// If the stored value is a strided load in the same loop with the same stride
 /// this may be transformable into a memcpy.  This kicks in for stuff like
 /// for (i) A[i] = B[i];
@@ -1040,6 +1059,8 @@
   IRBuilder<> Builder(Preheader->getTerminator());
   SCEVExpander Expander(*SE, *DL, "loop-idiom");
 
+  ExpandedValuesCleaner EVC{Expander, TLI};
+
   const SCEV *StrStart = StoreEv->getStart();
   unsigned StrAS = SI->getPointerAddressSpace();
   Type *IntIdxTy = Builder.getIntNTy(DL->getIndexSizeInBits(StrAS));
@@ -1056,14 +1077,13 @@
   // checking everything.
   Value *StoreBasePtr = Expander.expandCodeFor(
       StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator());
+  EVC.add(StoreBasePtr);
 
   SmallPtrSet<Instruction *, 1> Stores;
   Stores.insert(SI);
   if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount,
                             StoreSize, *AA, Stores)) {
-    Expander.clear();
-    // If we generated new code for the base pointer, clean up.
-    RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
+    EVC.clean();
     return false;
   }
 
@@ -1078,18 +1098,18 @@
   // mutated by the loop.
   Value *LoadBasePtr = Expander.expandCodeFor(
       LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator());
+  EVC.add(LoadBasePtr);
 
   if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
                             StoreSize, *AA, Stores)) {
-    Expander.clear();
-    // If we generated new code for the base pointer, clean up.
-    RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI);
-    RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
+    EVC.clean();
     return false;
   }
 
-  if (avoidLIRForMultiBlockLoop())
+  if (avoidLIRForMultiBlockLoop()) {
+    EVC.clean();
     return false;
+  }
 
   // Okay, everything is safe, we can transform this!
 
@@ -1098,6 +1118,7 @@
 
   Value *NumBytes =
       Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
+  EVC.add(NumBytes);
 
   CallInst *NewCall = nullptr;
   // Check whether to generate an unordered atomic memcpy:
@@ -1111,15 +1132,19 @@
     // anything where the alignment isn't at least the element size.
     const Align StoreAlign = SI->getAlign();
     const Align LoadAlign = LI->getAlign();
-    if (StoreAlign < StoreSize || LoadAlign < StoreSize)
+    if (StoreAlign < StoreSize || LoadAlign < StoreSize) {
+      EVC.clean();
       return false;
+    }
 
     // If the element.atomic memcpy is not lowered into explicit
     // loads/stores later, then it will be lowered into an element-size
     // specific lib call. If the lib call doesn't exist for our store size, then
     // we shouldn't generate the memcpy.
-    if (StoreSize > TTI->getAtomicMemIntrinsicMaxElementSize())
+    if (StoreSize > TTI->getAtomicMemIntrinsicMaxElementSize()) {
+      EVC.clean();
       return false;
+    }
 
     // Create the call.
     // Note that unordered atomic loads/stores are *required* by the spec to