Index: llvm/trunk/include/llvm/Analysis/InstructionPrecedenceTracking.h
===================================================================
--- llvm/trunk/include/llvm/Analysis/InstructionPrecedenceTracking.h
+++ llvm/trunk/include/llvm/Analysis/InstructionPrecedenceTracking.h
@@ -114,6 +114,31 @@
   virtual bool isSpecialInstruction(const Instruction *Insn) const;
 };
 
+class MemoryWriteTracking : public InstructionPrecedenceTracking {
+public:
+  MemoryWriteTracking(DominatorTree *DT) : InstructionPrecedenceTracking(DT) {}
+
+  /// Returns the topmost instruction that may write memory from the given
+  /// basic block. Returns nullptr if there is no such instructions in the block.
+  const Instruction *getFirstMemoryWrite(const BasicBlock *BB) {
+    return getFirstSpecialInstruction(BB);
+  }
+
+  /// Returns true if at least one instruction from the given basic block may
+  /// write memory.
+  bool mayWriteToMemory(const BasicBlock *BB) {
+    return hasSpecialInstructions(BB);
+  }
+
+  /// Returns true if the first memory writing instruction of Insn's block
+  /// exists and dominates Insn.
+  bool isDominatedByMemoryWriteFromSameBlock(const Instruction *Insn) {
+    return isPreceededBySpecialInstruction(Insn);
+  }
+
+  virtual bool isSpecialInstruction(const Instruction *Insn) const;
+};
+
 } // llvm
 
 #endif // LLVM_ANALYSIS_INSTRUCTIONPRECEDENCETRACKING_H
Index: llvm/trunk/include/llvm/Analysis/MustExecute.h
===================================================================
--- llvm/trunk/include/llvm/Analysis/MustExecute.h
+++ llvm/trunk/include/llvm/Analysis/MustExecute.h
@@ -126,6 +126,8 @@
                                // may throw.
   // Contains information about implicit control flow in this loop's blocks.
   mutable ImplicitControlFlowTracking ICF;
+  // Contains information about instruction that may possibly write memory.
+  mutable MemoryWriteTracking MW;
 
 public:
   virtual bool blockMayThrow(const BasicBlock *BB) const;
@@ -138,6 +140,16 @@
                                      const DominatorTree *DT,
                                      const Loop *CurLoop) const;
 
+  /// Returns true if we could not execute a memory-modifying instruction before
+  /// we enter \p BB under assumption that \p CurLoop is entered.
+  bool doesNotWriteMemoryBefore(const BasicBlock *BB, const Loop *CurLoop)
+      const;
+
+  /// Returns true if we could not execute a memory-modifying instruction before
+  /// we execute \p I under assumption that \p CurLoop is entered.
+  bool doesNotWriteMemoryBefore(const Instruction &I, const Loop *CurLoop)
+      const;
+
   /// Inform the safety info that we are planning to insert a new instruction
   /// into the basic block \p BB. It will make all cache updates to keep it
   /// correct after this insertion.
@@ -148,7 +160,7 @@
   /// this removal.
   void removeInstruction(const Instruction *Inst);
 
-  ICFLoopSafetyInfo(DominatorTree *DT) : LoopSafetyInfo(), ICF(DT) {};
+  ICFLoopSafetyInfo(DominatorTree *DT) : LoopSafetyInfo(), ICF(DT), MW(DT) {};
 
   virtual ~ICFLoopSafetyInfo() {};
 };
Index: llvm/trunk/lib/Analysis/InstructionPrecedenceTracking.cpp
===================================================================
--- llvm/trunk/lib/Analysis/InstructionPrecedenceTracking.cpp
+++ llvm/trunk/lib/Analysis/InstructionPrecedenceTracking.cpp
@@ -142,3 +142,8 @@
   }
   return true;
 }
+
+bool MemoryWriteTracking::isSpecialInstruction(
+    const Instruction *Insn) const {
+  return Insn->mayWriteToMemory();
+}
Index: llvm/trunk/lib/Analysis/MustExecute.cpp
===================================================================
--- llvm/trunk/lib/Analysis/MustExecute.cpp
+++ llvm/trunk/lib/Analysis/MustExecute.cpp
@@ -72,6 +72,7 @@
 void ICFLoopSafetyInfo::computeLoopSafetyInfo(const Loop *CurLoop) {
   assert(CurLoop != nullptr && "CurLoop can't be null");
   ICF.clear();
+  MW.clear();
   MayThrow = false;
   // Figure out the fact that at least one block may throw.
   for (auto &BB : CurLoop->blocks())
@@ -84,12 +85,14 @@
 
 void ICFLoopSafetyInfo::insertInstructionTo(const BasicBlock *BB) {
   ICF.invalidateBlock(BB);
+  MW.invalidateBlock(BB);
 }
 
 void ICFLoopSafetyInfo::removeInstruction(const Instruction *Inst) {
   // TODO: So far we just conservatively drop cache, but maybe we can not do it
   // when Inst is not an ICF instruction. Follow-up on that.
   ICF.invalidateBlock(Inst->getParent());
+  MW.invalidateBlock(Inst->getParent());
 }
 
 void LoopSafetyInfo::computeBlockColors(const Loop *CurLoop) {
@@ -256,6 +259,34 @@
          allLoopPathsLeadToBlock(CurLoop, Inst.getParent(), DT);
 }
 
+bool ICFLoopSafetyInfo::doesNotWriteMemoryBefore(const BasicBlock *BB,
+                                                 const Loop *CurLoop) const {
+  assert(CurLoop->contains(BB) && "Should only be called for loop blocks!");
+
+  // Fast path: there are no instructions before header.
+  if (BB == CurLoop->getHeader())
+    return true;
+
+  // Collect all transitive predecessors of BB in the same loop. This set will
+  // be a subset of the blocks within the loop.
+  SmallPtrSet<const BasicBlock *, 4> Predecessors;
+  collectTransitivePredecessors(CurLoop, BB, Predecessors);
+  // Find if there any instruction in either predecessor that could write
+  // to memory.
+  for (auto *Pred : Predecessors)
+    if (MW.mayWriteToMemory(Pred))
+      return false;
+  return true;
+}
+
+bool ICFLoopSafetyInfo::doesNotWriteMemoryBefore(const Instruction &I,
+                                                 const Loop *CurLoop) const {
+  auto *BB = I.getParent();
+  assert(CurLoop->contains(BB) && "Should only be called for loop blocks!");
+  return !MW.isDominatedByMemoryWriteFromSameBlock(&I) &&
+         doesNotWriteMemoryBefore(BB, CurLoop);
+}
+
 namespace {
   struct MustExecutePrinter : public FunctionPass {
 
Index: llvm/trunk/lib/Transforms/Scalar/LICM.cpp
===================================================================
--- llvm/trunk/lib/Transforms/Scalar/LICM.cpp
+++ llvm/trunk/lib/Transforms/Scalar/LICM.cpp
@@ -463,12 +463,6 @@
     if (inSubLoop(BB, CurLoop, LI))
       continue;
 
-    // Keep track of whether the prefix instructions could have written memory.
-    // TODO: This may be done smarter if we keep track of all throwing and
-    // mem-writing operations in every block, e.g. using something similar to
-    // isGuaranteedToExecute.
-    bool IsMemoryNotModified = CurLoop->getHeader() == BB;
-
     for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
       Instruction &I = *II++;
       // Try constant folding this instruction.  If all the operands are
@@ -529,15 +523,13 @@
       if (((I.use_empty() &&
             match(&I, m_Intrinsic<Intrinsic::invariant_start>())) ||
            isGuard(&I)) &&
-          IsMemoryNotModified && CurLoop->hasLoopInvariantOperands(&I) &&
-          SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop)) {
+          CurLoop->hasLoopInvariantOperands(&I) &&
+          SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) &&
+          SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop)) {
         hoist(I, DT, CurLoop, SafetyInfo, ORE);
         Changed = true;
         continue;
       }
-
-      if (IsMemoryNotModified)
-        IsMemoryNotModified = !I.mayWriteToMemory();
     }
   }
 
Index: llvm/trunk/test/Transforms/LICM/guards.ll
===================================================================
--- llvm/trunk/test/Transforms/LICM/guards.ll
+++ llvm/trunk/test/Transforms/LICM/guards.ll
@@ -85,16 +85,15 @@
 }
 
 
-; TODO: We can also hoist this guard from mustexec non-header block.
 define void @test4(i1 %c, i32* %p) {
 
 ; CHECK-LABEL: @test4(
 ; CHECK-LABEL: entry:
 ; CHECK:       %a = load i32, i32* %p
 ; CHECK:       %invariant_cond = icmp ne i32 %a, 100
+; CHECK:       call void (i1, ...) @llvm.experimental.guard(i1 %invariant_cond)
 ; CHECK-LABEL: loop:
 ; CHECK-LABEL: backedge:
-; CHECK:       call void (i1, ...) @llvm.experimental.guard(i1 %invariant_cond)
 
 entry:
   br label %loop
@@ -197,6 +196,155 @@
   ret void
 }
 
+; Check that we don't hoist across a store in the header.
+define void @test4c(i1 %c, i32* %p, i8* noalias %s) {
+
+; CHECK-LABEL: @test4c(
+; CHECK-LABEL: entry:
+; CHECK:       %a = load i32, i32* %p
+; CHECK:       %invariant_cond = icmp ne i32 %a, 100
+; CHECK:       call void (i1, ...) @llvm.experimental.guard(i1 %invariant_cond)
+; CHECK-LABEL: loop:
+; CHECK-LABEL: backedge:
+
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]
+  %iv.next = add i32 %iv, 1
+  store i8 0, i8* %s
+  br i1 %c, label %if.true, label %if.false
+
+if.true:
+  br label %backedge
+
+if.false:
+  br label %backedge
+
+backedge:
+  %a = load i32, i32* %p
+  %invariant_cond = icmp ne i32 %a, 100
+  call void (i1, ...) @llvm.experimental.guard(i1 %invariant_cond) [ "deopt"() ]
+  %loop_cond = icmp slt i32 %iv.next, 1000
+  br i1 %loop_cond, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+; Check that we don't hoist across a store in a conditionally execute block.
+define void @test4d(i1 %c, i32* %p, i8* noalias %s) {
+
+; CHECK-LABEL: @test4d(
+; CHECK-LABEL: entry:
+; CHECK:       %a = load i32, i32* %p
+; CHECK:       %invariant_cond = icmp ne i32 %a, 100
+; CHECK-LABEL: loop:
+; CHECK-LABEL: backedge:
+; CHECK:       call void (i1, ...) @llvm.experimental.guard(i1 %invariant_cond)
+
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]
+  %iv.next = add i32 %iv, 1
+  br i1 %c, label %if.true, label %if.false
+
+if.true:
+  store i8 0, i8* %s
+  br label %backedge
+
+if.false:
+  br label %backedge
+
+backedge:
+  %a = load i32, i32* %p
+  %invariant_cond = icmp ne i32 %a, 100
+  call void (i1, ...) @llvm.experimental.guard(i1 %invariant_cond) [ "deopt"() ]
+  %loop_cond = icmp slt i32 %iv.next, 1000
+  br i1 %loop_cond, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+; Check that we don't hoist across a store before the guard in the backedge.
+define void @test4e(i1 %c, i32* %p, i8* noalias %s) {
+
+; CHECK-LABEL: @test4e(
+; CHECK-LABEL: entry:
+; CHECK:       %a = load i32, i32* %p
+; CHECK:       %invariant_cond = icmp ne i32 %a, 100
+; CHECK:       store i8 0, i8* %s
+; CHECK:       call void (i1, ...) @llvm.experimental.guard(i1 %invariant_cond)
+; CHECK-LABEL: loop:
+; CHECK-LABEL: backedge:
+
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]
+  %iv.next = add i32 %iv, 1
+  br i1 %c, label %if.true, label %if.false
+
+if.true:
+  br label %backedge
+
+if.false:
+  br label %backedge
+
+backedge:
+  %a = load i32, i32* %p
+  %invariant_cond = icmp ne i32 %a, 100
+  store i8 0, i8* %s
+  call void (i1, ...) @llvm.experimental.guard(i1 %invariant_cond) [ "deopt"() ]
+  %loop_cond = icmp slt i32 %iv.next, 1000
+  br i1 %loop_cond, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+; Check that we can hoist the guard in spite of store which happens after.
+define void @test4f(i1 %c, i32* %p, i8* noalias %s) {
+
+; CHECK-LABEL: @test4f(
+; CHECK-LABEL: entry:
+; CHECK:       %a = load i32, i32* %p
+; CHECK:       %invariant_cond = icmp ne i32 %a, 100
+; CHECK:       call void (i1, ...) @llvm.experimental.guard(i1 %invariant_cond)
+; CHECK-LABEL: loop:
+; CHECK-LABEL: backedge:
+
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]
+  %iv.next = add i32 %iv, 1
+  br i1 %c, label %if.true, label %if.false
+
+if.true:
+  br label %backedge
+
+if.false:
+  br label %backedge
+
+backedge:
+  %a = load i32, i32* %p
+  %invariant_cond = icmp ne i32 %a, 100
+  call void (i1, ...) @llvm.experimental.guard(i1 %invariant_cond) [ "deopt"() ]
+  store i8 0, i8* %s
+  %loop_cond = icmp slt i32 %iv.next, 1000
+  br i1 %loop_cond, label %loop, label %exit
+
+exit:
+  ret void
+}
+
 ; Do not hoist an invariant guard across a variant guard.
 define void @test5(i1 %c, i32* %p, i32* %q) {