Index: lib/Transforms/Vectorize/LoadStoreVectorizer.cpp =================================================================== --- lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -147,7 +147,7 @@ static const unsigned MaxDepth = 3; bool isConsecutiveAccess(Value *A, Value *B); - bool areConsecutivePointers(Value *PtrA, Value *PtrB, const APInt &PtrDelta, + bool areConsecutivePointers(Value *PtrA, Value *PtrB, APInt PtrDelta, unsigned Depth = 0) const; bool lookThroughComplexAddresses(Value *PtrA, Value *PtrB, APInt PtrDelta, unsigned Depth) const; @@ -336,18 +336,35 @@ } bool Vectorizer::areConsecutivePointers(Value *PtrA, Value *PtrB, - const APInt &PtrDelta, - unsigned Depth) const { + APInt PtrDelta, unsigned Depth) const { unsigned PtrBitWidth = DL.getPointerTypeSizeInBits(PtrA->getType()); APInt OffsetA(PtrBitWidth, 0); APInt OffsetB(PtrBitWidth, 0); PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA); PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB); - if (DL.getTypeStoreSizeInBits(PtrA->getType()) != PtrBitWidth || - DL.getTypeStoreSizeInBits(PtrB->getType()) != PtrBitWidth) + unsigned NewPtrBitWidth = DL.getTypeStoreSizeInBits(PtrA->getType()); + + if (NewPtrBitWidth != DL.getTypeStoreSizeInBits(PtrB->getType())) return false; + if (NewPtrBitWidth > PtrBitWidth) { + OffsetA = OffsetA.sext(NewPtrBitWidth); + OffsetB = OffsetB.sext(NewPtrBitWidth); + PtrDelta = PtrDelta.sext(NewPtrBitWidth); + } else if (NewPtrBitWidth < PtrBitWidth) { + // In case if we have to shrink the pointer + // stripAndAccumulateInBoundsConstantOffsets should properly handle a + // possible overflow and the value should fit into a smallest data type + // used in the cast/gep chain. + assert(OffsetA.getMinSignedBits() <= NewPtrBitWidth && + OffsetB.getMinSignedBits() <= NewPtrBitWidth); + + OffsetA = OffsetA.trunc(NewPtrBitWidth); + OffsetB = OffsetB.trunc(NewPtrBitWidth); + PtrDelta = PtrDelta.trunc(NewPtrBitWidth); + } + APInt OffsetDelta = OffsetB - OffsetA; // Check if they are based on the same pointer. That makes the offsets Index: test/Transforms/LoadStoreVectorizer/AMDGPU/vect-ptr-ptr-size-mismatch.ll =================================================================== --- test/Transforms/LoadStoreVectorizer/AMDGPU/vect-ptr-ptr-size-mismatch.ll +++ test/Transforms/LoadStoreVectorizer/AMDGPU/vect-ptr-ptr-size-mismatch.ll @@ -54,4 +54,43 @@ ret void } +; CHECK-LABEL: @ext_ptr +; CHECK load <2 x i32> +define void @ext_ptr(i32 addrspace(5)* %p) { +entry: + %gep1 = getelementptr inbounds i32, i32 addrspace(5)* %p, i64 0 + %gep2 = getelementptr inbounds i32, i32 addrspace(5)* %p, i64 1 + %a.ascast = addrspacecast i32 addrspace(5)* %gep1 to i32* + %b.ascast = addrspacecast i32 addrspace(5)* %gep2 to i32* + %tmp1 = load i32, i32* %a.ascast, align 8 + %tmp2 = load i32, i32* %b.ascast, align 8 + unreachable +} + +; CHECK-LABEL: @shrink_ptr +; CHECK load <2 x i32> +define void @shrink_ptr(i32* %p) { +entry: + %gep1 = getelementptr inbounds i32, i32* %p, i64 0 + %gep2 = getelementptr inbounds i32, i32* %p, i64 1 + %a.ascast = addrspacecast i32* %gep1 to i32 addrspace(5)* + %b.ascast = addrspacecast i32* %gep2 to i32 addrspace(5)* + %tmp1 = load i32, i32 addrspace(5)* %a.ascast, align 8 + %tmp2 = load i32, i32 addrspace(5)* %b.ascast, align 8 + unreachable +} + +; CHECK-LABEL: @ext_ptr_wrap +; CHECK: load <2 x i8> +define void @ext_ptr_wrap(i8 addrspace(5)* %p) { +entry: + %gep1 = getelementptr inbounds i8, i8 addrspace(5)* %p, i64 0 + %gep2 = getelementptr inbounds i8, i8 addrspace(5)* %p, i64 4294967295 + %a.ascast = addrspacecast i8 addrspace(5)* %gep1 to i8* + %b.ascast = addrspacecast i8 addrspace(5)* %gep2 to i8* + %tmp1 = load i8, i8* %a.ascast, align 1 + %tmp2 = load i8, i8* %b.ascast, align 1 + unreachable +} + !0 = !{}