Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11165,6 +11165,13 @@
     if (Index->getMemoryVT() != MemVT)
       break;
 
+    // We do not allow under-aligned stores in order to prevent
+    // overriding stores. NOTE: this is a bad hack. Alignment SHOULD
+    // be irrelevant here; what MATTERS is that we not move memory
+    // operations that potentially overlap past each-other.
+    if (Index->getAlignment()*8 < MemVT.getSizeInBits())
+      break;
+
     // We found a potential memory operand to merge.
     StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++));
 
@@ -11249,12 +11256,18 @@
   if (StoreNodes.size() < 2)
     return false;
 
-  // Sort the memory operands according to their distance from the base pointer.
+  // Sort the memory operands according to their distance from the
+  // base pointer.  As a secondary criteria: make sure stores coming
+  // later in the code come first in the list. This is important for
+  // the non-UseAA case, because we're merging stores into the FINAL
+  // store along a chain which potentially contains aliasing stores.
+  // Thus, if there are multiple stores to the same address, the last
+  // one can be considered for merging but not the others.
   std::sort(StoreNodes.begin(), StoreNodes.end(),
             [](MemOpLink LHS, MemOpLink RHS) {
     return LHS.OffsetFromBase < RHS.OffsetFromBase ||
            (LHS.OffsetFromBase == RHS.OffsetFromBase &&
-            LHS.SequenceNum > RHS.SequenceNum);
+            LHS.SequenceNum < RHS.SequenceNum);
   });
 
   // Scan the memory operations on the chain and find the first non-consecutive
Index: test/CodeGen/X86/MergeConsecutiveStores.ll
===================================================================
--- test/CodeGen/X86/MergeConsecutiveStores.ll
+++ test/CodeGen/X86/MergeConsecutiveStores.ll
@@ -288,12 +288,16 @@
   ret void
 }
 
-;; On x86, even unaligned copies can be merged to vector ops.
+;; On x86, even unaligned copies should be merged to vector ops.
+;; TODO: however, this cannot happen at the moment, due to brokenness
+;; in MergeConsecutiveStores. See UseAA FIXME in DAGCombiner.cpp
+;; visitSTORE.
+
 ; CHECK-LABEL: merge_loads_no_align:
 ;  load:
-; CHECK: vmovups
+; CHECK-NOT: vmovups ;; TODO
 ;  store:
-; CHECK: vmovups
+; CHECK-NOT: vmovups ;; TODO
 ; CHECK: ret
 define void @merge_loads_no_align(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
   %a1 = icmp sgt i32 %count, 0
Index: test/CodeGen/X86/dag-merge-fast-accesses.ll
===================================================================
--- test/CodeGen/X86/dag-merge-fast-accesses.ll
+++ test/CodeGen/X86/dag-merge-fast-accesses.ll
@@ -50,11 +50,19 @@
 }
 
 
+;; TODO: FAST *should* be:
+;;    movups (%rdi), %xmm0
+;;    movups %xmm0, 40(%rdi)
+;; ..but is not currently. See the UseAA FIXME in DAGCombiner.cpp
+;; visitSTORE.
+
 define void @merge_vec_load_and_stores(i64 *%ptr) {
 ; FAST-LABEL: merge_vec_load_and_stores:
 ; FAST:       # BB#0:
-; FAST-NEXT:    movups (%rdi), %xmm0
-; FAST-NEXT:    movups %xmm0, 40(%rdi)
+; FAST-NEXT:    movq (%rdi), %rax
+; FAST-NEXT:    movq 8(%rdi), %rcx
+; FAST-NEXT:    movq %rax, 40(%rdi)
+; FAST-NEXT:    movq %rcx, 48(%rdi)
 ; FAST-NEXT:    retq
 ;
 ; SLOW-LABEL: merge_vec_load_and_stores:
Index: test/CodeGen/X86/stores-merging.ll
===================================================================
--- test/CodeGen/X86/stores-merging.ll
+++ test/CodeGen/X86/stores-merging.ll
@@ -7,17 +7,51 @@
 
 @e = common global %structTy zeroinitializer, align 4
 
-; CHECK-LABEL: f
-define void @f() {
-entry:
+;; Ensure that MergeConsecutiveStores doesn't incorrectly reorder
+;; store operations.  The first test stores in increasing address
+;; order, the second in decreasing -- but in both cases should have
+;; the same result in memory in the end.
 
-; CHECK:   movabsq	$528280977409, %rax
+; CHECK-LABEL: redundant_stores_merging:
+; CHECK:   movl    $123, e+8(%rip)
+; CHECK:   movabsq $1958505086977, %rax
 ; CHECK:   movq    %rax, e+4(%rip)
-; CHECK:   movl    $456, e+8(%rip)
-
+define void @redundant_stores_merging() {
+entry:
   store i32 1, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 1), align 4
   store i32 123, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4
   store i32 456, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4
   ret void
 }
 
+;; This variant tests PR25154.
+; CHECK-LABEL: redundant_stores_merging_reverse:
+; CHECK:   movl    $123, e+8(%rip)
+; CHECK:   movabsq $1958505086977, %rax
+; CHECK:   movq    %rax, e+4(%rip)
+define void @redundant_stores_merging_reverse() {
+entry:
+  store i32 123, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4
+  store i32 456, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4
+  store i32 1, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 1), align 4
+  ret void
+}
+
+@b = common global [8 x i8] zeroinitializer, align 2
+
+;; The 2-byte store to offset 3 overlaps the 2-byte store to offset 2;
+;; these must not be reordered in MergeConsecutiveStores such that the
+;; store to 3 comes first (e.g. by merging the stores to 0 and 2 into
+;; a movl, after the store to 3).
+
+;; CHECK-LABEL: overlapping_stores_merging:
+;; CHECK:  movw    $0, b+2(%rip)
+;; CHECK:  movw    $2, b+3(%rip)
+;; CHECK:  movw    $1, b(%rip)
+define void @overlapping_stores_merging() {
+entry:
+  store i16 0, i16* bitcast (i8* getelementptr inbounds ([8 x i8], [8 x i8]* @b, i64 0, i64 2) to i16*), align 2
+  store i16 2, i16* bitcast (i8* getelementptr inbounds ([8 x i8], [8 x i8]* @b, i64 0, i64 3) to i16*), align 1
+  store i16 1, i16* bitcast (i8* getelementptr inbounds ([8 x i8], [8 x i8]* @b, i64 0, i64 0) to i16*), align 2
+  ret void
+}