Index: lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
===================================================================
--- lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@@ -339,14 +339,13 @@
                                         const APInt &PtrDelta,
                                         unsigned Depth) const {
   unsigned PtrBitWidth = DL.getPointerTypeSizeInBits(PtrA->getType());
-  unsigned PtrAS = PtrA->getType()->getPointerAddressSpace();
   APInt OffsetA(PtrBitWidth, 0);
   APInt OffsetB(PtrBitWidth, 0);
   PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
   PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
 
-  if (PtrA->getType()->getPointerAddressSpace() != PtrAS ||
-      PtrB->getType()->getPointerAddressSpace() != PtrAS)
+  if (DL.getTypeStoreSizeInBits(PtrA->getType()) != PtrBitWidth ||
+      DL.getTypeStoreSizeInBits(PtrB->getType()) != PtrBitWidth)
     return false;
 
   APInt OffsetDelta = OffsetB - OffsetA;
Index: test/Transforms/LoadStoreVectorizer/AMDGPU/vect-ptr-ptr-size-mismatch.ll
===================================================================
--- test/Transforms/LoadStoreVectorizer/AMDGPU/vect-ptr-ptr-size-mismatch.ll
+++ test/Transforms/LoadStoreVectorizer/AMDGPU/vect-ptr-ptr-size-mismatch.ll
@@ -1,11 +1,11 @@
-; RUN: opt -mtriple=amdgcn-amd-amdhsa -load-store-vectorizer -S < %s | FileCheck %s
+; RUN: opt -load-store-vectorizer -S < %s | FileCheck %s
 
-target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32"
+target datalayout = "e-p:64:64-p1:64:64-p5:32:32"
 
-; CHECK-LABEL: @test
+; CHECK-LABEL: @cast_to_ptr
 ; CHECK: store i32* undef, i32** %tmp9, align 8
 ; CHECK: store i32* undef, i32** %tmp7, align 8
-define amdgpu_kernel void @test() {
+define void @cast_to_ptr() {
 entry:
   %a10.ascast.i = addrspacecast i32* addrspace(5)* null to i32**
   %tmp4 = icmp eq i32 undef, 0
@@ -16,3 +16,38 @@
   store i32* undef, i32** %tmp7, align 8
   unreachable
 }
+
+; CHECK-LABEL: @cast_to_cast
+; CHECK: %tmp4 = load i32*, i32** %tmp1, align 8
+; CHECK: %tmp5 = load i32*, i32** %tmp3, align 8
+define void @cast_to_cast() {
+entry:
+  %a10.ascast.i = addrspacecast i32* addrspace(5)* undef to i32**
+  %b14.ascast.i = addrspacecast i32* addrspace(5)* null to i32**
+  %tmp1 = select i1 false, i32** %a10.ascast.i, i32** undef
+  %tmp3 = select i1 false, i32** %b14.ascast.i, i32** undef
+  %tmp4 = load i32*, i32** %tmp1, align 8
+  %tmp5 = load i32*, i32** %tmp3, align 8
+  unreachable
+}
+
+; CHECK-LABEL: @all_to_cast
+; CHECK: load <4 x float>
+define void @all_to_cast(i8* nocapture readonly align 16 dereferenceable(16) %alloc1) {
+entry:
+  %alloc16 = addrspacecast i8* %alloc1 to i8 addrspace(1)*
+  %tmp = bitcast i8 addrspace(1)* %alloc16 to float addrspace(1)*
+  %tmp1 = load float, float addrspace(1)* %tmp, align 16, !invariant.load !0
+  %tmp6 = getelementptr inbounds i8, i8 addrspace(1)* %alloc16, i64 4
+  %tmp7 = bitcast i8 addrspace(1)* %tmp6 to float addrspace(1)*
+  %tmp8 = load float, float addrspace(1)* %tmp7, align 4, !invariant.load !0
+  %tmp15 = getelementptr inbounds i8, i8 addrspace(1)* %alloc16, i64 8
+  %tmp16 = bitcast i8 addrspace(1)* %tmp15 to float addrspace(1)*
+  %tmp17 = load float, float addrspace(1)* %tmp16, align 8, !invariant.load !0
+  %tmp24 = getelementptr inbounds i8, i8 addrspace(1)* %alloc16, i64 12
+  %tmp25 = bitcast i8 addrspace(1)* %tmp24 to float addrspace(1)*
+  %tmp26 = load float, float addrspace(1)* %tmp25, align 4, !invariant.load !0
+  ret void
+}
+
+!0 = !{}