Index: lib/Transforms/Utils/LoopUtils.cpp
===================================================================
--- lib/Transforms/Utils/LoopUtils.cpp
+++ lib/Transforms/Utils/LoopUtils.cpp
@@ -36,6 +36,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/KnownBits.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/IR/InstrTypes.h"
 
 using namespace llvm;
 using namespace llvm::PatternMatch;
@@ -74,9 +75,37 @@
     if (IsDedicatedExit)
       return false;
 
+    // With nested loops, the inner loop might exit to the header of an
+    // enclosing loop, and the in-loop-predecessor is a latch for that
+    // enclosing loop. If we insert a block between the latch and the header,
+    // that block becomes the new latch. Any loop metadata from the old latch
+    // needs to be moved to the new one.
+    MDNode *OuterLoopMD = nullptr;
+    if (LI->isLoopHeader(BB)) {
+      OuterLoopMD = LI->getLoopFor(BB)->getLoopID();
+      // getLoopID() also verifies that all latches have the same metadata
+    }
+
     auto *NewExitBB = SplitBlockPredecessors(
         BB, InLoopPredecessors, ".loopexit", DT, LI, nullptr, PreserveLCSSA);
 
+    // If OuterLoopMD is non-null, we know that BB is a loop header for
+    // a different loop, and that loop has metadata. Check that the new block
+    // is enclosed by the other loop, which means that it's a latch.
+    if (NewExitBB && OuterLoopMD &&
+        LI->getLoopFor(NewExitBB) == LI->getLoopFor(BB)) {
+      // Every pred of the new block should be a latch for the outer loop,
+      // with the same metadata.
+      for (auto *PredLoopBB : InLoopPredecessors) {
+        Instruction *TI = PredLoopBB->getTerminator();
+        assert(TI->getMetadata(LLVMContext::MD_loop) == OuterLoopMD &&
+               "exit edge to other loop doesn't contain expected metadata");
+        TI->setMetadata(LLVMContext::MD_loop, nullptr);
+      }
+      NewExitBB->getTerminator()->setMetadata(LLVMContext::MD_loop,
+                                              OuterLoopMD);
+    }
+
     if (!NewExitBB)
       LLVM_DEBUG(
           dbgs() << "WARNING: Can't create a dedicated exit block for loop: "
Index: test/Transforms/LoopSimplify/preserve-llvm-loop-metadata2.ll
===================================================================
--- /dev/null
+++ test/Transforms/LoopSimplify/preserve-llvm-loop-metadata2.ll
@@ -0,0 +1,65 @@
+; RUN: opt -S -loop-simplify < %s | FileCheck %s
+
+; CHECK: for.cond.loopexit:
+; CHECK-NEXT: llvm.loop [[LOOPMD:.*]]
+; CHECK-NOT: br i1 {{.*}}, label {{.*}}, label %for.cond.loopexit, !llvm.loop [[LOOPMD]]
+; CHECK: br label %for.cond1, !llvm.loop {{.*}}
+
+; Two-loop nest with llvm.loop metadata on each loop.
+; After loop-simplify, a new block is created between for.cond1 (inner loop's
+; exit) and for.cond (header of outer loop).
+; for.cond1 contains the llvm.loop metadata for the outer loop, which must
+; be copied to the new block, as the new block is now the outer loop latch.
+
+define dso_local void @loopnest(double* noalias nocapture readonly %a, double* noalias nocapture %b) local_unnamed_addr #0 {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond1, %entry
+  %ii.0 = phi i64 [ 2, %entry ], [ %add, %for.cond1 ]
+  %cmp = icmp ult i64 %ii.0, 262142
+  br i1 %cmp, label %for.cond1, label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond
+  ret void
+
+for.cond1:                                        ; preds = %for.cond, %for.body4
+  %j.0 = phi i64 [ %add10, %for.body4 ], [ %ii.0, %for.cond ]
+  %add = add nuw nsw i64 %ii.0, 16
+  %cmp2 = icmp ult i64 %j.0, %add
+  br i1 %cmp2, label %for.body4, label %for.cond, !llvm.loop !2
+
+for.body4:                                        ; preds = %for.cond1
+  %sub = add nsw i64 %j.0, -2
+  %arrayidx = getelementptr inbounds double, double* %a, i64 %sub
+  %0 = load double, double* %arrayidx, align 8, !tbaa !4
+  %arrayidx5 = getelementptr inbounds double, double* %a, i64 %j.0
+  %1 = load double, double* %arrayidx5, align 8, !tbaa !4
+  %add6 = fadd double %0, %1
+  %sub7 = add nsw i64 %j.0, -1
+  %arrayidx8 = getelementptr inbounds double, double* %a, i64 %sub7
+  %2 = load double, double* %arrayidx8, align 8, !tbaa !4
+  %add9 = fadd double %add6, %2
+  %add10 = add nuw nsw i64 %j.0, 1
+  %arrayidx11 = getelementptr inbounds double, double* %a, i64 %add10
+  %3 = load double, double* %arrayidx11, align 8, !tbaa !4
+  %add12 = fadd double %add9, %3
+  %add13 = add nuw nsw i64 %j.0, 2
+  %arrayidx14 = getelementptr inbounds double, double* %a, i64 %add13
+  %4 = load double, double* %arrayidx14, align 8, !tbaa !4
+  %add15 = fadd double %add12, %4
+  %arrayidx16 = getelementptr inbounds double, double* %b, i64 %j.0
+  store double %add15, double* %arrayidx16, align 8, !tbaa !4
+  br label %for.cond1, !llvm.loop !8
+}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 8.0.0"}
+!2 = distinct !{!2, !3}
+!3 = !{!"llvm.loop.unroll_and_jam.count", i32 17}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"double", !6, i64 0}
+!6 = !{!"omnipotent char", !7, i64 0}
+!7 = !{!"Simple C/C++ TBAA"}
+!8 = distinct !{!8, !9}
+!9 = !{!"llvm.loop.unroll.count", i32 1}