diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -965,11 +965,14 @@
       SinkAfter.count(Previous)) // Cannot rely on dominance due to motion.
     return false;
 
+  // The Previous value having a NonLocal memory dependence implies that all
+  // memory instructions can be sunk past it.
+  bool CanSinkMemoryInstrs = MDR->getDependency(Previous).isNonLocal();
+
   // Ensure every user of the phi node (recursively) is dominated by the
   // previous value. The dominance requirement ensures the loop vectorizer will
   // not need to vectorize the initial value prior to the first iteration of the
   // loop.
-  // TODO: Consider extending this sinking to handle memory instructions.
 
   // We optimistically assume we can sink all users after Previous. Keep a set
   // of instructions to sink after Previous ordered by dominance in the common
@@ -996,9 +999,11 @@
                       SinkCandidate)) // We already are good w/o sinking.
       return true;
 
-    if (SinkCandidate->getParent() != PhiBB ||
-        SinkCandidate->mayHaveSideEffects() ||
-        SinkCandidate->mayReadFromMemory() || SinkCandidate->isTerminator())
+    if (SinkCandidate->getParent() != PhiBB || SinkCandidate->isTerminator())
+      return false;
+
+    if (!CanSinkMemoryInstrs && (SinkCandidate->mayHaveSideEffects() ||
+                                 SinkCandidate->mayReadFromMemory()))
       return false;
 
     // Avoid sinking an instruction multiple times (if multiple operands are
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll
--- a/llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll
@@ -221,3 +221,102 @@
   %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
 }
+
+; Test that LoopVectorize is not inhibited by the presence of a[i+1]
+; after the store to w[i].
+;
+; Testcase derived from the following C:
+;
+;   double w[1024], x[1024], a[1024];
+;   void f() {
+;     for (unsigned i = 0; i < 1024-1; i++) {
+;       w[i] += a[i];
+;       x[i] += a[i+1];
+;     }
+;   }
+;
+define void @fixedorder_memory_dependencies(ptr noalias %w, ptr noalias %x, ptr %a) {
+; CHECK-LABEL: @fixedorder_memory_dependencies(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_PRE:%.*]] = load double, ptr [[A:%.*]], align 8
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <2 x double> poison, double [[A_PRE]], i32 1
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <2 x double> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [1024 x double], ptr [[A]], i64 0, i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1024 x double], ptr [[W:%.*]], i64 0, i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1024 x double], ptr [[X:%.*]], i64 0, i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP5]], align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD1]] = load <2 x double>, ptr [[TMP6]], align 8
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x double> [[VECTOR_RECUR]], <2 x double> [[WIDE_LOAD1]], <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT:    [[TMP8:%.*]] = fadd fast <2 x double> [[WIDE_LOAD]], [[TMP7]]
+; CHECK-NEXT:    store <2 x double> [[TMP8]], ptr [[TMP5]], align 8
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds double, ptr [[TMP4]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <2 x double>, ptr [[TMP9]], align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = fadd fast <2 x double> [[WIDE_LOAD2]], [[WIDE_LOAD1]]
+; CHECK-NEXT:    store <2 x double> [[TMP10]], ptr [[TMP9]], align 8
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1022
+; CHECK-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1023, 1022
+; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x double> [[WIDE_LOAD1]], i32 1
+; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x double> [[WIDE_LOAD1]], i32 0
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[A_PRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1022, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    ret void
+; CHECK:       for.body:
+; CHECK-NEXT:    [[SCALAR_RECUR:%.*]] = phi double [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[A_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[A_IDX_NEXT:%.*]] = getelementptr inbounds [1024 x double], ptr [[A]], i64 0, i64 [[INDVARS_IV_NEXT]]
+; CHECK-NEXT:    [[W_IDX:%.*]] = getelementptr inbounds [1024 x double], ptr [[W]], i64 0, i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[X_IDX:%.*]] = getelementptr inbounds [1024 x double], ptr [[X]], i64 0, i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[W_LOAD:%.*]] = load double, ptr [[W_IDX]], align 8
+; CHECK-NEXT:    [[ADD_0:%.*]] = fadd fast double [[W_LOAD]], [[SCALAR_RECUR]]
+; CHECK-NEXT:    store double [[ADD_0]], ptr [[W_IDX]], align 8
+; CHECK-NEXT:    [[A_NEXT]] = load double, ptr [[A_IDX_NEXT]], align 8
+; CHECK-NEXT:    [[X_LOAD:%.*]] = load double, ptr [[X_IDX]], align 8
+; CHECK-NEXT:    [[ADD_1:%.*]] = fadd fast double [[X_LOAD]], [[A_NEXT]]
+; CHECK-NEXT:    store double [[ADD_1]], ptr [[X_IDX]], align 8
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1023
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+;
+entry:
+  %a_pre = load double, ptr %a
+  br label %for.body
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %a_via_phi = phi double [ %a_pre, %entry ], [ %a_next, %for.body ]
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %a_idx_next = getelementptr inbounds [1024 x double], ptr %a, i64 0, i64 %indvars.iv.next
+  %w_idx = getelementptr inbounds [1024 x double], ptr %w, i64 0, i64 %indvars.iv
+  %x_idx = getelementptr inbounds [1024 x double], ptr %x, i64 0, i64 %indvars.iv
+  %w_load = load double, ptr %w_idx
+  %add_0 = fadd fast double %w_load, %a_via_phi
+
+  ; LV needs to be able to reason that this store can move past the next load.
+  store double %add_0, ptr %w_idx
+  %a_next = load double, ptr %a_idx_next
+
+  %x_load = load double, ptr %x_idx
+  %add_1 = fadd fast double %x_load, %a_next
+  store double %add_1, ptr %x_idx
+  %exitcond.not = icmp eq i64 %indvars.iv.next, 1023
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}