Index: llvm/include/llvm/Transforms/Utils/SSAUpdater.h
===================================================================
--- llvm/include/llvm/Transforms/Utils/SSAUpdater.h
+++ llvm/include/llvm/Transforms/Utils/SSAUpdater.h
@@ -169,6 +169,10 @@
 
   /// Called to update debug info associated with the instruction.
   virtual void updateDebugInfo(Instruction *I) const {}
+
+  /// Return false if a sub-class wants to keep one of the loads/stores
+  /// after the SSA construction.
+  virtual bool shouldDelete(Instruction *I) const { return true; }
 };
 
 } // end namespace llvm
Index: llvm/lib/Transforms/Scalar/LICM.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/LICM.cpp
+++ llvm/lib/Transforms/Scalar/LICM.cpp
@@ -1860,6 +1860,7 @@
   bool UnorderedAtomic;
   AAMDNodes AATags;
   ICFLoopSafetyInfo &SafetyInfo;
+  bool CanInsertStoresInExitBlocks;
 
   // We're about to add a use of V in a loop exit block.  Insert an LCSSA phi
   // (if legal) if doing so would add an out-of-loop use to an instruction
@@ -1886,12 +1887,13 @@
                SmallVectorImpl<MemoryAccess *> &MSSAIP, PredIteratorCache &PIC,
                MemorySSAUpdater *MSSAU, LoopInfo &li, DebugLoc dl,
                Align Alignment, bool UnorderedAtomic, const AAMDNodes &AATags,
-               ICFLoopSafetyInfo &SafetyInfo)
+               ICFLoopSafetyInfo &SafetyInfo, bool CanInsertStoresInExitBlocks)
       : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
         LoopExitBlocks(LEB), LoopInsertPts(LIP), MSSAInsertPts(MSSAIP),
         PredCache(PIC), MSSAU(MSSAU), LI(li), DL(std::move(dl)),
         Alignment(Alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags),
-        SafetyInfo(SafetyInfo) {}
+        SafetyInfo(SafetyInfo),
+        CanInsertStoresInExitBlocks(CanInsertStoresInExitBlocks) {}
 
   bool isInstInList(Instruction *I,
                     const SmallVectorImpl<Instruction *> &) const override {
@@ -1903,7 +1905,7 @@
     return PointerMustAliases.count(Ptr);
   }
 
-  void doExtraRewritesBeforeFinalDeletion() override {
+  void insertStoresInLoopExitBlocks() {
     // Insert stores after in the loop exit blocks.  Each exit block gets a
     // store of the live-out values that feed them.  Since we've already told
     // the SSA updater about the defs in the loop and the preheader
@@ -1937,10 +1939,21 @@
     }
   }
 
+  void doExtraRewritesBeforeFinalDeletion() override {
+    if (CanInsertStoresInExitBlocks)
+      insertStoresInLoopExitBlocks();
+  }
+
   void instructionDeleted(Instruction *I) const override {
     SafetyInfo.removeInstruction(I);
     MSSAU->removeMemoryAccess(I);
   }
+
+  bool shouldDelete(Instruction *I) const override {
+    if (isa<StoreInst>(I))
+      return CanInsertStoresInExitBlocks;
+    return true;
+  }
 };
 
 bool isNotCapturedBeforeOrInLoop(const Value *V, const Loop *L,
@@ -2039,6 +2052,7 @@
 
   bool DereferenceableInPH = false;
   bool SafeToInsertStore = false;
+  bool FoundLoadToPromote = false;
 
   SmallVector<Instruction *, 64> LoopUses;
 
@@ -2086,6 +2100,7 @@
 
         SawUnorderedAtomic |= Load->isAtomic();
         SawNotAtomic |= !Load->isAtomic();
+        FoundLoadToPromote = true;
 
         Align InstAlignment = Load->getAlign();
 
@@ -2197,13 +2212,20 @@
     }
   }
 
-  // If we've still failed to prove we can sink the store, give up.
-  if (!SafeToInsertStore)
+  // If we've still failed to prove we can sink the store, hoist the load
+  // only, if possible.
+  if (!SafeToInsertStore && !FoundLoadToPromote)
+    // If we cannot hoist the load either, give up.
     return false;
 
-  // Otherwise, this is safe to promote, lets do it!
-  LLVM_DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr
-                    << '\n');
+  // Lets do the promotion!
+  if (SafeToInsertStore)
+    LLVM_DEBUG(dbgs() << "LICM: Promoting load/store of the value: " << *SomePtr
+                      << '\n');
+  else
+    LLVM_DEBUG(dbgs() << "LICM: Promoting load of the value: " << *SomePtr
+                      << '\n');
+
   ORE->emit([&]() {
     return OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar",
                               LoopUses[0])
@@ -2222,7 +2244,8 @@
   SSAUpdater SSA(&NewPHIs);
   LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
                         InsertPts, MSSAInsertPts, PIC, MSSAU, *LI, DL,
-                        Alignment, SawUnorderedAtomic, AATags, *SafetyInfo);
+                        Alignment, SawUnorderedAtomic, AATags, *SafetyInfo,
+                        SafeToInsertStore);
 
   // Set up the preheader to have a definition of the value.  It is the live-out
   // value from the preheader that uses in the loop will use.
Index: llvm/lib/Transforms/Utils/SSAUpdater.cpp
===================================================================
--- llvm/lib/Transforms/Utils/SSAUpdater.cpp
+++ llvm/lib/Transforms/Utils/SSAUpdater.cpp
@@ -446,6 +446,9 @@
   // Now that everything is rewritten, delete the old instructions from the
   // function.  They should all be dead now.
   for (Instruction *User : Insts) {
+    if (!shouldDelete(User))
+      continue;
+
     // If this is a load that still has uses, then the load must have been added
     // as a live value in the SSAUpdate data structure for a block (e.g. because
     // the loaded value was stored later).  In this case, we need to recursively
Index: llvm/test/Transforms/InstMerge/st_sink_bugfix_22613.ll
===================================================================
--- llvm/test/Transforms/InstMerge/st_sink_bugfix_22613.ll
+++ llvm/test/Transforms/InstMerge/st_sink_bugfix_22613.ll
@@ -5,12 +5,12 @@
 ; RUN: opt -O2 -S < %s | FileCheck %s
 
 ; CHECK-LABEL: main
-; CHECK: if.end
-; CHECK: store
 ; CHECK: memset
 ; CHECK: if.then
 ; CHECK: store
-; CHECK: memset
+; CHECK: if.end
+; CHECK: store
+; CHECK: store
 
 @d = common global i32 0, align 4
 @b = common global i32 0, align 4
Index: llvm/test/Transforms/LICM/hoist-load-without-store.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/LICM/hoist-load-without-store.ll
@@ -0,0 +1,67 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -licm -S < %s | FileCheck %s
+
+;; C reproducer:
+;; void f(int *ptr, int n) {
+;;   for (int i = 0; i < n; ++i) {
+;;     int x = *ptr;
+;;     if (x)
+;;       break;
+;;
+;;     *ptr = x + 1;
+;;   }
+;; }
+
+define dso_local void @f(i32* nocapture %ptr, i32 %n) {
+; CHECK-LABEL: @f(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP7:%.*]] = icmp slt i32 0, [[N:%.*]]
+; CHECK-NEXT:    br i1 [[CMP7]], label [[FOR_BODY_LR_PH:%.*]], label [[CLEANUP1:%.*]]
+; CHECK:       for.body.lr.ph:
+; CHECK-NEXT:    [[PTR_PROMOTED:%.*]] = load i32, i32* [[PTR:%.*]], align 4
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ [[PTR_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ 1, [[IF_END:%.*]] ]
+; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[IF_END]] ]
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END]], label [[FOR_BODY_CLEANUP1_CRIT_EDGE:%.*]]
+; CHECK:       if.end:
+; CHECK-NEXT:    store i32 1, i32* [[PTR]], align 4
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_08]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP1_CRIT_EDGE:%.*]]
+; CHECK:       for.body.cleanup1_crit_edge:
+; CHECK-NEXT:    br label [[CLEANUP1]]
+; CHECK:       for.cond.cleanup1_crit_edge:
+; CHECK-NEXT:    br label [[CLEANUP1]]
+; CHECK:       cleanup1:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %cmp7 = icmp slt i32 0, %n
+  br i1 %cmp7, label %for.body.lr.ph, label %cleanup1
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %if.end
+  %i.08 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %if.end ]
+  %0 = load i32, i32* %ptr, align 4
+  %tobool.not = icmp eq i32 %0, 0
+  br i1 %tobool.not, label %if.end, label %for.body.cleanup1_crit_edge
+
+if.end:                                           ; preds = %for.body
+  store i32 1, i32* %ptr, align 4
+  %inc = add nuw nsw i32 %i.08, 1
+  %cmp = icmp slt i32 %inc, %n
+  br i1 %cmp, label %for.body, label %for.cond.cleanup1_crit_edge
+
+for.body.cleanup1_crit_edge:                      ; preds = %for.body
+  br label %cleanup1
+
+for.cond.cleanup1_crit_edge:                      ; preds = %if.end
+  br label %cleanup1
+
+cleanup1:                                         ; preds = %for.cond.cleanup1_crit_edge, %for.body.cleanup1_crit_edge, %entry
+  ret void
+}
Index: llvm/test/Transforms/LICM/promote-capture.ll
===================================================================
--- llvm/test/Transforms/LICM/promote-capture.ll
+++ llvm/test/Transforms/LICM/promote-capture.ll
@@ -111,17 +111,19 @@
 ; CHECK-NEXT:    [[COUNT:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    store i32 0, i32* [[COUNT]], align 4
 ; CHECK-NEXT:    call void @capture(i32* [[COUNT]])
+; CHECK-NEXT:    [[COUNT_PROMOTED:%.*]] = load i32, i32* [[COUNT]], align 4
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LATCH:%.*]] ]
+; CHECK-NEXT:    [[C_INC2:%.*]] = phi i32 [ [[COUNT_PROMOTED]], [[ENTRY:%.*]] ], [ [[C_INC1:%.*]], [[LATCH:%.*]] ]
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[I_NEXT:%.*]], [[LATCH]] ]
 ; CHECK-NEXT:    [[COND:%.*]] = call i1 @cond(i32 [[I]])
 ; CHECK-NEXT:    br i1 [[COND]], label [[IF:%.*]], label [[LATCH]]
 ; CHECK:       if:
-; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[COUNT]], align 4
-; CHECK-NEXT:    [[C_INC:%.*]] = add i32 [[C]], 1
+; CHECK-NEXT:    [[C_INC:%.*]] = add i32 [[C_INC2]], 1
 ; CHECK-NEXT:    store i32 [[C_INC]], i32* [[COUNT]], align 4
 ; CHECK-NEXT:    br label [[LATCH]]
 ; CHECK:       latch:
+; CHECK-NEXT:    [[C_INC1]] = phi i32 [ [[C_INC]], [[IF]] ], [ [[C_INC2]], [[LOOP]] ]
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[LEN:%.*]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]
Index: llvm/test/Transforms/LICM/scalar-promote-memmodel.ll
===================================================================
--- llvm/test/Transforms/LICM/scalar-promote-memmodel.ll
+++ llvm/test/Transforms/LICM/scalar-promote-memmodel.ll
@@ -11,19 +11,21 @@
 ; CHECK-LABEL: @bar(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[B:%.*]], 0
+; CHECK-NEXT:    [[G_PROMOTED:%.*]] = load i32, i32* @g, align 4
 ; CHECK-NEXT:    br label [[FOR_COND:%.*]]
 ; CHECK:       for.cond:
-; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC5:%.*]], [[FOR_INC:%.*]] ]
+; CHECK-NEXT:    [[INC2:%.*]] = phi i32 [ [[G_PROMOTED]], [[ENTRY:%.*]] ], [ [[INC1:%.*]], [[FOR_INC:%.*]] ]
+; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC5:%.*]], [[FOR_INC]] ]
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], [[N:%.*]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    br i1 [[TOBOOL]], label [[FOR_INC]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* @g, align 4
-; CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP3]], 1
+; CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[INC2]], 1
 ; CHECK-NEXT:    store i32 [[INC]], i32* @g, align 4
 ; CHECK-NEXT:    br label [[FOR_INC]]
 ; CHECK:       for.inc:
+; CHECK-NEXT:    [[INC1]] = phi i32 [ [[INC]], [[IF_THEN]] ], [ [[INC2]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[INC5]] = add nsw i32 [[I_0]], 1
 ; CHECK-NEXT:    br label [[FOR_COND]]
 ; CHECK:       for.end:
Index: llvm/test/Transforms/LICM/scalar-promote-opaque-ptrs.ll
===================================================================
--- llvm/test/Transforms/LICM/scalar-promote-opaque-ptrs.ll
+++ llvm/test/Transforms/LICM/scalar-promote-opaque-ptrs.ll
@@ -314,17 +314,19 @@
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[LOCAL:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    call void @capture(ptr [[LOCAL]])
+; CHECK-NEXT:    [[LOCAL_PROMOTED:%.*]] = load i32, ptr [[LOCAL]], align 4
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[ELSE:%.*]] ]
-; CHECK-NEXT:    [[X:%.*]] = load i32, ptr [[LOCAL]], align 4
-; CHECK-NEXT:    [[X2:%.*]] = call i32 @opaque(i32 [[X]])
+; CHECK-NEXT:    [[X22:%.*]] = phi i32 [ [[LOCAL_PROMOTED]], [[ENTRY:%.*]] ], [ [[X21:%.*]], [[ELSE:%.*]] ]
+; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[NEXT:%.*]], [[ELSE]] ]
+; CHECK-NEXT:    [[X2:%.*]] = call i32 @opaque(i32 [[X22]])
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X2]], 0
 ; CHECK-NEXT:    br i1 [[CMP]], label [[IF:%.*]], label [[ELSE]]
 ; CHECK:       if:
 ; CHECK-NEXT:    store i32 [[X2]], ptr [[LOCAL]], align 4
 ; CHECK-NEXT:    br label [[ELSE]]
 ; CHECK:       else:
+; CHECK-NEXT:    [[X21]] = phi i32 [ [[X2]], [[IF]] ], [ [[X22]], [[LOOP]] ]
 ; CHECK-NEXT:    [[NEXT]] = add i32 [[J]], 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[NEXT]], 0
 ; CHECK-NEXT:    br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]]
Index: llvm/test/Transforms/LICM/scalar-promote.ll
===================================================================
--- llvm/test/Transforms/LICM/scalar-promote.ll
+++ llvm/test/Transforms/LICM/scalar-promote.ll
@@ -315,17 +315,19 @@
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[LOCAL:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    call void @capture(i32* [[LOCAL]])
+; CHECK-NEXT:    [[LOCAL_PROMOTED:%.*]] = load i32, i32* [[LOCAL]], align 4
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[ELSE:%.*]] ]
-; CHECK-NEXT:    [[X:%.*]] = load i32, i32* [[LOCAL]], align 4
-; CHECK-NEXT:    [[X2:%.*]] = call i32 @opaque(i32 [[X]])
+; CHECK-NEXT:    [[X22:%.*]] = phi i32 [ [[LOCAL_PROMOTED]], [[ENTRY:%.*]] ], [ [[X21:%.*]], [[ELSE:%.*]] ]
+; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[NEXT:%.*]], [[ELSE]] ]
+; CHECK-NEXT:    [[X2:%.*]] = call i32 @opaque(i32 [[X22]])
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X2]], 0
 ; CHECK-NEXT:    br i1 [[CMP]], label [[IF:%.*]], label [[ELSE]]
 ; CHECK:       if:
 ; CHECK-NEXT:    store i32 [[X2]], i32* [[LOCAL]], align 4
 ; CHECK-NEXT:    br label [[ELSE]]
 ; CHECK:       else:
+; CHECK-NEXT:    [[X21]] = phi i32 [ [[X2]], [[IF]] ], [ [[X22]], [[LOOP]] ]
 ; CHECK-NEXT:    [[NEXT]] = add i32 [[J]], 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[NEXT]], 0
 ; CHECK-NEXT:    br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]]