Index: llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h
===================================================================
--- llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h
+++ llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h
@@ -106,6 +106,7 @@
   unsigned LicmMssaOptCounter;
   unsigned LicmMssaOptCap;
   unsigned LicmMssaNoAccForPromotionCap;
+  bool IsSink;
 };
 
 /// Walk the specified region of the CFG (defined by all blocks
Index: llvm/trunk/lib/Transforms/Scalar/LICM.cpp
===================================================================
--- llvm/trunk/lib/Transforms/Scalar/LICM.cpp
+++ llvm/trunk/lib/Transforms/Scalar/LICM.cpp
@@ -376,10 +376,12 @@
   // us to sink instructions in one pass, without iteration.  After sinking
   // instructions, we perform another pass to hoist them out of the loop.
   SinkAndHoistLICMFlags Flags = {NoOfMemAccTooLarge, LicmMssaOptCounter,
-                                 LicmMssaOptCap, LicmMssaNoAccForPromotionCap};
+                                 LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+                                 /*IsSink=*/true};
   if (L->hasDedicatedExits())
     Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
                           CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE);
+  Flags.IsSink = false;
   if (Preheader)
     Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
                            CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE);
@@ -1224,6 +1226,7 @@
       // Could do better here, but this is conservatively correct.
       // TODO: Cache set of Uses on the first walk in runOnLoop, update when
       // moving accesses. Can also extend to dominating uses.
+      auto *SIMD = MSSA->getMemoryAccess(SI);
       for (auto *BB : CurLoop->getBlocks())
         if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
           for (const auto &MA : *Accesses)
@@ -1232,6 +1235,12 @@
               if (!MSSA->isLiveOnEntryDef(MD) &&
                   CurLoop->contains(MD->getBlock()))
                 return false;
+              // Disable hoisting past potentially interfering loads. Optimized
+              // Uses may point to an access outside the loop, as getClobbering
+              // checks the previous iteration when walking the backedge.
+              // FIXME: More precise: no Uses that alias SI.
+              if (!Flags->IsSink && !MSSA->dominates(SIMD, MU))
+                return false;
             } else if (const auto *MD = dyn_cast<MemoryDef>(&MA))
               if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) {
                 (void)LI; // Silence warning.
@@ -2257,16 +2266,47 @@
 static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
                                              Loop *CurLoop,
                                              SinkAndHoistLICMFlags &Flags) {
-  MemoryAccess *Source;
-  // See declaration of SetLicmMssaOptCap for usage details.
-  if (Flags.LicmMssaOptCounter >= Flags.LicmMssaOptCap)
-    Source = MU->getDefiningAccess();
-  else {
-    Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU);
-    Flags.LicmMssaOptCounter++;
+  // For hoisting, use the walker to determine safety
+  if (!Flags.IsSink) {
+    MemoryAccess *Source;
+    // See declaration of SetLicmMssaOptCap for usage details.
+    if (Flags.LicmMssaOptCounter >= Flags.LicmMssaOptCap)
+      Source = MU->getDefiningAccess();
+    else {
+      Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU);
+      Flags.LicmMssaOptCounter++;
+    }
+    return !MSSA->isLiveOnEntryDef(Source) &&
+           CurLoop->contains(Source->getBlock());
   }
-  return !MSSA->isLiveOnEntryDef(Source) &&
-         CurLoop->contains(Source->getBlock());
+
+  // For sinking, we'd need to check all Defs below this use. The getClobbering
+  // call will look on the backedge of the loop, but will check aliasing with
+  // the instructions on the previous iteration.
+  // For example:
+  // for (i ... )
+  //   load a[i] ( Use (LoE)
+  //   store a[i] ( 1 = Def (2), with 2 = Phi for the loop.
+  //   i++;
+  // The load sees no clobbering inside the loop, as the backedge alias check
+  // does phi translation, and will check aliasing against store a[i-1].
+  // However sinking the load outside the loop, below the store is incorrect.
+
+  // For now, only sink if there are no Defs in the loop, and the existing ones
+  // precede the use and are in the same block.
+  // FIXME: Increase precision: Safe to sink if Use post dominates the Def;
+  // needs PostDominatorTreeAnalysis.
+  // FIXME: More precise: no Defs that alias this Use.
+  if (Flags.NoOfMemAccTooLarge)
+    return true;
+  for (auto *BB : CurLoop->getBlocks())
+    if (auto *Accesses = MSSA->getBlockDefs(BB))
+      for (const auto &MA : *Accesses)
+        if (const auto *MD = dyn_cast<MemoryDef>(&MA))
+          if (MU->getBlock() != MD->getBlock() ||
+              !MSSA->locallyDominates(MD, MU))
+            return true;
+  return false;
 }
 
 /// Little predicate that returns true if the specified basic block is in
Index: llvm/trunk/test/Analysis/MemorySSA/pr42294.ll
===================================================================
--- llvm/trunk/test/Analysis/MemorySSA/pr42294.ll
+++ llvm/trunk/test/Analysis/MemorySSA/pr42294.ll
@@ -0,0 +1,48 @@
+; RUN: opt -loop-rotate -licm %s -disable-output -enable-mssa-loop-dependency=true -debug-only=licm 2>&1 | FileCheck %s -check-prefix=LICM
+; RUN: opt -loop-rotate -licm %s -disable-output -enable-mssa-loop-dependency=false -debug-only=licm 2>&1 | FileCheck %s -check-prefix=LICM
+; RUN: opt -loop-rotate -licm %s -S -enable-mssa-loop-dependency=true  | FileCheck %s
+; RUN: opt -loop-rotate -licm %s -S -enable-mssa-loop-dependency=false | FileCheck %s
+
+; LICM: Using
+; LICM-NOT: LICM sinking instruction:   %.pre = load i8, i8* %arrayidx.phi.trans.insert
+
+; CHECK-LABEL: @fn1
+; CHECK-LABEL: entry:
+; CHECK:    br i1 true, label %[[END:.*]], label %[[PH:.*]]
+; CHECK: [[PH]]:
+; CHECK:    br label %[[CRIT:.*]]
+; CHECK: [[CRIT]]:
+; CHECK:    load i8
+; CHECK:    store i8
+; CHECK:    br i1 true, label %[[ENDCRIT:.*]], label %[[CRIT]]
+; CHECK: [[ENDCRIT]]:
+; CHECK-NOT: load i8
+; CHECK:    br label %[[END]]
+
+target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64"
+target triple = "s390x-unknown-linux-gnu"
+
+define void @fn1() {
+entry:
+  %g = alloca [9 x i8], align 1
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.for.body_crit_edge, %entry
+  %0 = phi i64 [ 0, %entry ], [ %phitmp, %for.body.for.body_crit_edge ]
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body.for.body_crit_edge ]
+  %arrayidx = getelementptr inbounds [9 x i8], [9 x i8]* %g, i64 0, i64 %indvars.iv
+  store i8 2, i8* %arrayidx, align 1
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  br i1 undef, label %for.end18, label %for.body.for.body_crit_edge
+
+for.body.for.body_crit_edge:                      ; preds = %for.body
+  %arrayidx.phi.trans.insert = getelementptr inbounds [9 x i8], [9 x i8]* %g, i64 0, i64 %indvars.iv.next
+  %.pre = load i8, i8* %arrayidx.phi.trans.insert, align 1
+  %phitmp = zext i8 %.pre to i64
+  br label %for.body
+
+for.end18:                                        ; preds = %for.body
+  store i64 %0, i64* undef, align 8
+  ret void
+}
+
Index: llvm/trunk/test/Transforms/LICM/store-hoisting.ll
===================================================================
--- llvm/trunk/test/Transforms/LICM/store-hoisting.ll
+++ llvm/trunk/test/Transforms/LICM/store-hoisting.ll
@@ -348,8 +348,11 @@
 ; the load must observe.
 define i32 @test_dominated_read(i32* %loc) {
 ; CHECK-LABEL: @test_dominated_read
-; CHECK-LABEL: exit:
-; CHECK: store i32 0, i32* %loc
+; MSSA-LABEL: entry:
+; MSSA: store i32 0, i32* %loc
+; MSSA-LABEL: loop:
+; AST-LABEL: exit:
+; AST:  store i32 0, i32* %loc
 entry:
   br label %loop