diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h
--- a/llvm/include/llvm/IR/Instruction.h
+++ b/llvm/include/llvm/IR/Instruction.h
@@ -747,10 +747,13 @@
   /// sometimes useful to ignore certain attributes.
   enum OperationEquivalenceFlags {
     /// Check for equivalence ignoring load/store alignment.
-    CompareIgnoringAlignment = 1<<0,
+    CompareIgnoringAlignment = 1 << 0,
     /// Check for equivalence treating a type and a vector of that type
     /// as equivalent.
-    CompareUsingScalarTypes = 1<<1
+    CompareUsingScalarTypes = 1 << 1,
+    /// Check for equivalence ignoring operand types, particularly useful
+    /// for loads and stores (see commit 816d26fe5eeae4ad)
+    CompareIgnoringType = 1 << 2
   };
 
   /// This function determines if the specified instruction executes the same
@@ -763,6 +766,27 @@
   /// Determine if one instruction is the same operation as another.
   bool isSameOperationAs(const Instruction *I, unsigned flags = 0) const LLVM_READONLY;
 
+  /// This function determines if the specified instruction executes the same
+  /// load / store operation as the current one. This means that the opcodes,
+  /// and any other factors affecting the operation must be the same. This is
+  /// similar to isSameOperationAs except it ignores types since memory does not
+  /// have an intrinsic type
+  /// @returns true if the specified instruction is the same operation as
+  /// the current one.
+  /// Determine if one instruction is the same operation as another.
+  bool isSameLoadStoreOperationAs(const Instruction *I) const LLVM_READONLY;
+
+  /// This function determines if the specified instruction has conflicting
+  /// value types with the current one. This means that the memory opcodes are
+  /// the same, but the value type is different. It is meant to be used as a
+  /// complement to isSameLoadStoreOperationAs
+  /// @returns true if the specified instruction is the same memopry operation
+  /// as the current one, but with a different value tpye Determine if one
+  /// instruction is the same memory operation as another, but with a different
+  /// value type.
+  bool isLoadStoreConflictingType(const Instruction *I,
+                                  bool UseScalarTy = false) const LLVM_READONLY;
+
   /// Return true if there are any uses of this instruction in blocks other than
   /// the specified block. Note that PHI nodes are considered to evaluate their
   /// operands in the corresponding predecessor block.
diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h
--- a/llvm/include/llvm/IR/Instructions.h
+++ b/llvm/include/llvm/IR/Instructions.h
@@ -387,6 +387,11 @@
            !isVolatile();
   }
 
+  /// Whether bitcasting the ValueOperand has no semantic effect. For example,
+  /// bitcasting an atomic store may cause complications as backends may rely
+  /// on the exact type stored to select appropriate atomic ops
+  bool isTriviallyBitcasted() const { return isUnordered() && !isAtomic(); }
+
   Value *getValueOperand() { return getOperand(0); }
   const Value *getValueOperand() const { return getOperand(0); }
 
diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp
--- a/llvm/lib/IR/Instruction.cpp
+++ b/llvm/lib/IR/Instruction.cpp
@@ -586,26 +586,52 @@
                                     unsigned flags) const {
   bool IgnoreAlignment = flags & CompareIgnoringAlignment;
   bool UseScalarTypes  = flags & CompareUsingScalarTypes;
+  bool IgnoreType = flags & CompareIgnoringType;
 
-  if (getOpcode() != I->getOpcode() ||
-      getNumOperands() != I->getNumOperands() ||
-      (UseScalarTypes ?
-       getType()->getScalarType() != I->getType()->getScalarType() :
-       getType() != I->getType()))
+  if (getOpcode() != I->getOpcode() || getNumOperands() != I->getNumOperands())
     return false;
 
   // We have two instructions of identical opcode and #operands.  Check to see
   // if all operands are the same type
-  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
-    if (UseScalarTypes ?
-        getOperand(i)->getType()->getScalarType() !=
-          I->getOperand(i)->getType()->getScalarType() :
-        getOperand(i)->getType() != I->getOperand(i)->getType())
+  if (!IgnoreType) {
+    if ((UseScalarTypes
+             ? getType()->getScalarType() != I->getType()->getScalarType()
+             : getType() != I->getType()))
       return false;
+    for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+      if (UseScalarTypes
+              ? getOperand(i)->getType()->getScalarType() !=
+                    I->getOperand(i)->getType()->getScalarType()
+              : getOperand(i)->getType() != I->getOperand(i)->getType())
+        return false;
+  }
 
   return haveSameSpecialState(this, I, IgnoreAlignment);
 }
 
+bool Instruction::isSameLoadStoreOperationAs(const Instruction *I) const {
+  if (!isa<StoreInst>(this) && !isa<LoadInst>(this))
+    return false;
+
+  return isSameOperationAs(I, CompareIgnoringType);
+}
+
+bool Instruction::isLoadStoreConflictingType(const Instruction *I,
+                                             bool UseScalarTy) const {
+
+  if (getOpcode() != I->getOpcode())
+    return false;
+
+  if (!isa<StoreInst>(this) && !isa<LoadInst>(this))
+    return false;
+
+  auto ThisType = getLoadStoreType(const_cast<Instruction *>(this));
+  auto OtherType = getLoadStoreType(const_cast<Instruction *>(I));
+
+  return UseScalarTy ? ThisType->getScalarType() != OtherType->getScalarType()
+                     : ThisType != OtherType;
+}
+
 bool Instruction::isUsedOutsideOfBlock(const BasicBlock *BB) const {
   for (const Use &U : uses()) {
     // PHI nodes uses values in the corresponding predecessor block.  For other
diff --git a/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
--- a/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
+++ b/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@@ -78,6 +78,7 @@
 #include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Support/Debug.h"
@@ -191,10 +192,11 @@
 
     MemoryLocation Loc0 = MemoryLocation::get(Store0);
     MemoryLocation Loc1 = MemoryLocation::get(Store1);
-    if (AA->isMustAlias(Loc0, Loc1) && Store0->isSameOperationAs(Store1) &&
+    if (AA->isMustAlias(Loc0, Loc1) &&
         !isStoreSinkBarrierInRange(*Store1->getNextNode(), BB1->back(), Loc1) &&
         !isStoreSinkBarrierInRange(*Store0->getNextNode(), BB0->back(), Loc0)) {
-      return Store1;
+      if (Store0->isSameLoadStoreOperationAs(Store1))
+        return Store1;
     }
   }
   return nullptr;
@@ -254,6 +256,14 @@
   S0->applyMergedLocation(S0->getDebugLoc(), S1->getDebugLoc());
   S0->mergeDIAssignID(S1);
 
+  // Insert bitcast for conflicting typed stores.
+  if (S0->isLoadStoreConflictingType(S1) && S0->isTriviallyBitcasted()) {
+    IRBuilder<> Builder(S0);
+    auto Cast = Builder.CreateBitCast(S0->getValueOperand(),
+                                      S1->getValueOperand()->getType());
+    S0->setOperand(0, Cast);
+  }
+
   // Create the new store to be inserted at the join point.
   StoreInst *SNew = cast<StoreInst>(S0->clone());
   SNew->insertBefore(&*InsertPt);
diff --git a/llvm/test/Transforms/MergedLoadStoreMotion/st_sink_conflict_type.ll b/llvm/test/Transforms/MergedLoadStoreMotion/st_sink_conflict_type.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/MergedLoadStoreMotion/st_sink_conflict_type.ll
@@ -0,0 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -passes=mldst-motion -S < %s | FileCheck %s
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+
+define internal fastcc void @sink_conflict(ptr %this.64.val, half %val1, i16 %val2, i32 %val3) unnamed_addr align 2 {
+; CHECK-LABEL: define internal fastcc void @sink_conflict
+; CHECK-SAME: (ptr [[THIS_64_VAL:%.*]], half [[VAL1:%.*]], i16 [[VAL2:%.*]], i32 [[VAL3:%.*]]) unnamed_addr align 2 {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP_NOT_NOT:%.*]] = icmp eq i32 [[VAL3]], 0
+; CHECK-NEXT:    br i1 [[CMP_NOT_NOT]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast half [[VAL1]] to i16
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16 [[VAL2]] to half
+; CHECK-NEXT:    br label [[IF_END:%.*]]
+; CHECK:       if.else:
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[VAL2_SINK:%.*]] = phi i16 [ [[TMP0]], [[IF_THEN]] ], [ [[VAL2]], [[IF_ELSE]] ]
+; CHECK-NEXT:    [[VAL1_SINK:%.*]] = phi half [ [[TMP1]], [[IF_THEN]] ], [ [[VAL1]], [[IF_ELSE]] ]
+; CHECK-NEXT:    store i16 [[VAL2_SINK]], ptr [[THIS_64_VAL]], align 2
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[THIS_64_VAL]], i64 16
+; CHECK-NEXT:    store half [[VAL1_SINK]], ptr [[TMP2]], align 2
+; CHECK-NEXT:    ret void
+;
+entry:
+  %cmp.not.not = icmp eq i32 %val3, 0
+  br i1 %cmp.not.not, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  store half %val1, ptr %this.64.val, align 2
+  %add.ptr.i.i.i.i = getelementptr inbounds i8, ptr %this.64.val, i64 16
+  store i16 %val2, ptr %add.ptr.i.i.i.i, align 2
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %add.ptr.i.i.i.i7 = getelementptr inbounds i8, ptr %this.64.val, i64 16
+  store half %val1, ptr %add.ptr.i.i.i.i7, align 2
+  store i16 %val2, ptr %this.64.val, align 2
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret void
+}
diff --git a/llvm/test/Transforms/PhaseOrdering/bitcast-store-branch.ll b/llvm/test/Transforms/PhaseOrdering/bitcast-store-branch.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/bitcast-store-branch.ll
@@ -0,0 +1,61 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -O3 -S < %s | FileCheck %s
+
+%struct.ss = type { ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr }
+
+define internal void @phantomLoad(ptr %p, ptr %y, ptr %x) {
+entry:
+  %0 = load i32, ptr %x
+  store i32 %0, ptr %y
+  ret void
+}
+
+define ptr @parent(ptr align 8 dereferenceable(72) %f, half %val1, i16 %val2, i32 %val3) align 2 {
+; CHECK-LABEL: define nonnull ptr @parent
+; CHECK-SAME: (ptr readonly returned align 8 dereferenceable(72) [[F:%.*]], half [[VAL1:%.*]], i16 [[VAL2:%.*]], i32 [[VAL3:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] align 2 {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[F]], i64 64
+; CHECK-NEXT:    [[F_VAL:%.*]] = load ptr, ptr [[TMP0]], align 8
+; CHECK-NEXT:    [[CMP_NOT_NOT_I:%.*]] = icmp eq i32 [[VAL3]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast half [[VAL1]] to i16
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[VAL2]] to half
+; CHECK-NEXT:    [[VAL2_SINK_I:%.*]] = select i1 [[CMP_NOT_NOT_I]], i16 [[TMP1]], i16 [[VAL2]]
+; CHECK-NEXT:    [[VAL1_SINK_I:%.*]] = select i1 [[CMP_NOT_NOT_I]], half [[TMP2]], half [[VAL1]]
+; CHECK-NEXT:    store i16 [[VAL2_SINK_I]], ptr [[F_VAL]], align 2
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[F_VAL]], i64 16
+; CHECK-NEXT:    store half [[VAL1_SINK_I]], ptr [[TMP3]], align 2
+; CHECK-NEXT:    ret ptr [[F]]
+;
+entry:
+  call void @badChild(ptr align 8 dereferenceable(72) %f, half %val1, i16 %val2, i32 %val3) #4
+  ret ptr %f
+}
+
+
+define internal void @badChild(ptr align 8 dereferenceable(72) %this, half %val1, i16 %val2, i32 %val3) align 2 {
+entry:
+  %othergep = getelementptr inbounds %struct.ss, ptr %this, i64 0, i32 2
+  %load0 = load ptr, ptr %othergep, align 8
+  %x = alloca i32
+  %y = alloca i32
+  call void @phantomLoad(ptr %load0, ptr %x, ptr %y)
+  %val1.cast = bitcast half %val1 to i16
+  %cmp.not.not = icmp eq i32 %val3, 0
+  br i1 %cmp.not.not, label %if.then, label %if.else
+if.then:                                          ; preds = %entry
+  %0 = getelementptr inbounds %struct.ss, ptr %this, i64 0, i32 8
+  %1 = load ptr, ptr %0, align 8
+  store i16 %val1.cast, ptr %1, align 2
+  %add.ptr.i.i.i.i = getelementptr inbounds i8, ptr %1, i64 16
+  store i16 %val2, ptr %add.ptr.i.i.i.i, align 2
+  br label %if.end
+if.else:                                          ; preds = %entry
+  %2 = getelementptr inbounds %struct.ss, ptr %this, i64 0, i32 8
+  %3 = load ptr, ptr %2, align 8
+  %add.ptr.i.i.i.i7 = getelementptr inbounds i8, ptr %3, i64 16
+  store i16 %val1.cast, ptr %add.ptr.i.i.i.i7, align 2
+  store i16 %val2, ptr %3, align 2
+  br label %if.end
+if.end:                                           ; preds = %if.else, %if.then
+  ret void
+}