diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -927,6 +927,7 @@
     ID.AddInteger(MN->getRawSubclassData());
     ID.AddInteger(MN->getPointerInfo().getAddrSpace());
     ID.AddInteger(MN->getMemOperand()->getFlags());
+    ID.AddInteger(MN->getMemoryVT().getRawBits());
   }
 }
 
@@ -8066,6 +8067,7 @@
         Opcode, dl.getIROrder(), VTList, MemVT, MMO));
     ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
     ID.AddInteger(MMO->getFlags());
+    ID.AddInteger(MemVT.getRawBits());
     void *IP = nullptr;
     if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
       cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO);
diff --git a/llvm/test/CodeGen/NVPTX/dag-cse.ll b/llvm/test/CodeGen/NVPTX/dag-cse.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/dag-cse.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=nvptx64 | FileCheck %s
+
+%st = type { i8, i8, i16 }
+
+@a = internal addrspace(1) global %st zeroinitializer, align 8
+@b = internal addrspace(1) global i32 0, align 8
+@c = internal addrspace(1) global i32 0, align 8
+
+; Verify that loads with different memory types are not subject to CSE
+; once they are promoted to the same type.
+;
+; CHECK: ld.global.v2.u8  {%[[B1:rs[0-9]+]], %[[B2:rs[0-9]+]]}, [a];
+; CHECK: st.global.v2.u8  [b], {%[[B1]], %[[B2]]};
+;
+; CHECK: ld.global.v2.u16 {%[[C1:rs[0-9]+]], %[[C2:rs[0-9]+]]}, [a];
+; CHECK: st.global.v2.u16 [c], {%[[C1]], %[[C2]]};
+
+define void @test1() #0 {
+  %1 = load <2 x i8>, ptr addrspace(1) @a, align 8
+  store <2 x i8> %1, ptr addrspace(1) @b, align 8
+  %2 = load <2 x i16>, ptr addrspace(1) @a, align 8
+  store <2 x i16> %2, ptr addrspace(1) @c, align 8
+  ret void
+}