Index: lib/Transforms/Scalar/LICM.cpp
===================================================================
--- lib/Transforms/Scalar/LICM.cpp
+++ lib/Transforms/Scalar/LICM.cpp
@@ -731,7 +731,8 @@
       // time in isGuaranteedToExecute if we don't actually have anything to
       // drop.  It is a compile time optimization, not required for correctness.
       !isGuaranteedToExecute(I, DT, CurLoop, SafetyInfo))
-    I.dropUnknownNonDebugMetadata();
+    I.dropUnknownNonDebugMetadata(
+      LLVMContext::MD_invariant_load);
 
   // Move the new node to the Preheader, before its terminator.
   I.moveBefore(Preheader->getTerminator());
Index: test/Transforms/LICM/hoist-invariant-load-twice.ll
===================================================================
--- /dev/null
+++ test/Transforms/LICM/hoist-invariant-load-twice.ll
@@ -0,0 +1,36 @@
+; RUN: opt -S -licm < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @work(i32, i32, i32)
+
+define void @loop(i32* dereferenceable(4) %ptr) {
+; CHECK: entry:
+; CHECK-NEXT: %val = load i32, i32* %ptr, !invariant.load !0
+; CHECK-NEXT: br label %loop1
+entry:
+  br label %loop1
+
+loop1:
+  %ind1 = phi i32 [ 0, %entry ], [ %indnext1, %loop2 ]
+  %cont1 = icmp eq i32 %ind1, 10
+  br i1 %cont1, label %loop2, label %exit
+
+loop2:
+  %ind2 = phi i32 [ 0, %loop1 ], [ %indnext2, %work ]
+  %indnext1 = add i32 %ind1, 1
+  %cont2 = icmp eq i32 %ind2, 15
+  br i1 %cont2, label %work, label %loop1
+
+work:
+  %indnext2 = add i32 %ind2, 1
+  %val = load i32, i32* %ptr, !invariant.load !0
+  call void @work(i32 %ind1, i32 %ind2, i32 %val)
+  br label %loop2
+
+exit:
+  ret void
+}
+
+!0 = !{}
+!1 = !{i64 4}