diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -1441,8 +1441,7 @@ if (Offset.has_value()) { // `Offset` might not have the expected number of bits, if e.g. AS has a // different number of bits than opaque pointers. - ChainIter->second.push_back( - ChainElem{I, Offset.value().sextOrTrunc(ASPtrBits)}); + ChainIter->second.push_back(ChainElem{I, Offset.value()}); // Move ChainIter to the front of the MRU list. MRU.remove(*ChainIter); MRU.push_front(*ChainIter); @@ -1475,9 +1474,11 @@ LLVM_DEBUG(dbgs() << "LSV: getConstantOffset, PtrA=" << *PtrA << ", PtrB=" << *PtrB << ", ContextInst= " << *ContextInst << ", Depth=" << Depth << "\n"); - unsigned OffsetBitWidth = DL.getIndexTypeSizeInBits(PtrA->getType()); - APInt OffsetA(OffsetBitWidth, 0); - APInt OffsetB(OffsetBitWidth, 0); + // We'll ultimately return a value of this bit width, even if computations + // happen in a different width. + unsigned OrigBitWidth = DL.getIndexTypeSizeInBits(PtrA->getType()); + APInt OffsetA(OrigBitWidth, 0); + APInt OffsetB(OrigBitWidth, 0); PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA); PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB); unsigned NewPtrBitWidth = DL.getTypeStoreSizeInBits(PtrA->getType()); @@ -1493,7 +1494,7 @@ OffsetA = OffsetA.sextOrTrunc(NewPtrBitWidth); OffsetB = OffsetB.sextOrTrunc(NewPtrBitWidth); if (PtrA == PtrB) - return OffsetB - OffsetA; + return (OffsetB - OffsetA).sextOrTrunc(OrigBitWidth); // Try to compute B - A. const SCEV *DistScev = SE.getMinusSCEV(SE.getSCEV(PtrB), SE.getSCEV(PtrA)); @@ -1501,11 +1502,13 @@ LLVM_DEBUG(dbgs() << "LSV: SCEV PtrB - PtrA =" << *DistScev << "\n"); ConstantRange DistRange = SE.getSignedRange(DistScev); if (DistRange.isSingleElement()) - return OffsetB - OffsetA + *DistRange.getSingleElement(); + return (OffsetB - OffsetA + *DistRange.getSingleElement()) + .sextOrTrunc(OrigBitWidth); } std::optional Diff = getConstantOffsetComplexAddrs(PtrA, PtrB, ContextInst, Depth); if (Diff.has_value()) - return OffsetB - OffsetA + Diff->sext(OffsetB.getBitWidth()); + return (OffsetB - OffsetA + Diff->sext(OffsetB.getBitWidth())) + .sextOrTrunc(OrigBitWidth); return std::nullopt; } diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/vect-ptr-ptr-size-mismatch.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/vect-ptr-ptr-size-mismatch.ll --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/vect-ptr-ptr-size-mismatch.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/vect-ptr-ptr-size-mismatch.ll @@ -62,6 +62,23 @@ unreachable } +; CHECK-LABEL: @select_different_as +; CHECK: load <2 x i32> +define void @select_different_as(ptr addrspace(1) %p0, ptr addrspace(5) %q0, i1 %cond) { +entry: + %p1 = getelementptr inbounds i32, ptr addrspace(1) %p0, i64 1 + %q1 = getelementptr inbounds i32, ptr addrspace(5) %q0, i64 1 + %p0.ascast = addrspacecast ptr addrspace(1) %p0 to ptr + %p1.ascast = addrspacecast ptr addrspace(1) %p1 to ptr + %q0.ascast = addrspacecast ptr addrspace(5) %q0 to ptr + %q1.ascast = addrspacecast ptr addrspace(5) %q1 to ptr + %sel0 = select i1 %cond, ptr %p0.ascast, ptr %q0.ascast + %sel1 = select i1 %cond, ptr %p1.ascast, ptr %q1.ascast + %tmp1 = load i32, ptr %sel0, align 8 + %tmp2 = load i32, ptr %sel1, align 8 + unreachable +} + ; CHECK-LABEL: @shrink_ptr ; CHECK: load <2 x i32> define void @shrink_ptr(ptr %p) {