Index: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5997,6 +5997,12 @@ if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size) continue; + // Ignore A if the memory object of A and B don't belong to the same + // address space + if (DesA.Scev->getType()->getPointerAddressSpace() != + DesB.Scev->getType()->getPointerAddressSpace()) + continue; + // Calculate the distance from A to B. const SCEVConstant *DistToB = dyn_cast( PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev)); @@ -6040,35 +6046,35 @@ releaseGroup(Group); // Remove interleaved groups with gaps (currently only loads) whose memory - // accesses may wrap around. We have to revisit the getPtrStride analysis, - // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does + // accesses may wrap around. We have to revisit the getPtrStride analysis, + // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does // not check wrapping (see documentation there). - // FORNOW we use Assume=false; - // TODO: Change to Assume=true but making sure we don't exceed the threshold + // FORNOW we use Assume=false; + // TODO: Change to Assume=true but making sure we don't exceed the threshold // of runtime SCEV assumptions checks (thereby potentially failing to - // vectorize altogether). + // vectorize altogether). // Additional optional optimizations: - // TODO: If we are peeling the loop and we know that the first pointer doesn't + // TODO: If we are peeling the loop and we know that the first pointer doesn't // wrap then we can deduce that all pointers in the group don't wrap. - // This means that we can forcefully peel the loop in order to only have to - // check the first pointer for no-wrap. When we'll change to use Assume=true + // This means that we can forcefully peel the loop in order to only have to + // check the first pointer for no-wrap. When we'll change to use Assume=true // we'll only need at most one runtime check per interleaved group. // for (InterleaveGroup *Group : LoadGroups) { // Case 1: A full group. Can Skip the checks; For full groups, if the wide - // load would wrap around the address space we would do a memory access at - // nullptr even without the transformation. - if (Group->getNumMembers() == Group->getFactor()) + // load would wrap around the address space we would do a memory access at + // nullptr even without the transformation. + if (Group->getNumMembers() == Group->getFactor()) continue; - // Case 2: If first and last members of the group don't wrap this implies + // Case 2: If first and last members of the group don't wrap this implies // that all the pointers in the group don't wrap. // So we check only group member 0 (which is always guaranteed to exist), - // and group member Factor - 1; If the latter doesn't exist we rely on + // and group member Factor - 1; If the latter doesn't exist we rely on // peeling (if it is a non-reveresed accsess -- see Case 3). Value *FirstMemberPtr = getPointerOperand(Group->getMember(0)); - if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false, + if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false, /*ShouldCheckWrap=*/true)) { DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to " "first group member potentially pointer-wrapping.\n"); Index: llvm/trunk/test/Transforms/LoopVectorize/AArch64/pr31900.ll =================================================================== --- llvm/trunk/test/Transforms/LoopVectorize/AArch64/pr31900.ll +++ llvm/trunk/test/Transforms/LoopVectorize/AArch64/pr31900.ll @@ -0,0 +1,37 @@ +; RUN: opt -S -mtriple=aarch64-apple-ios -loop-vectorize -enable-interleaved-mem-accesses -force-vector-width=2 < %s | FileCheck %s + +; Reproducer for address space fault in the LoopVectorizer (pr31900). Added +; different sized address space pointers (p:16:16-p4:32:16) to the aarch64 +; datalayout to reproduce the fault. + +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128-p:16:16-p4:32:16" + +; Check that all the loads are scalarized +; CHECK: load i16, i16* +; CHECK: load i16, i16* +; CHECK: load i16, i16 addrspace(4)* +; CHECK: load i16, i16 addrspace(4)* + +%rec1445 = type { i16, i16, i16, i16, i16 } + +define void @foo() { +bb1: + br label %bb4 + +bb4: + %tmp1 = phi i16 [ undef, %bb1 ], [ %_tmp1013, %bb4 ] + %tmp2 = phi %rec1445* [ undef, %bb1 ], [ %_tmp1015, %bb4 ] + %tmp3 = phi %rec1445 addrspace(4)* [ undef, %bb1 ], [ %_tmp1017, %bb4 ] + %0 = getelementptr %rec1445, %rec1445* %tmp2, i16 0, i32 1 + %_tmp987 = load i16, i16* %0, align 1 + %1 = getelementptr %rec1445, %rec1445 addrspace(4)* %tmp3, i32 0, i32 1 + %_tmp993 = load i16, i16 addrspace(4)* %1, align 1 + %_tmp1013 = add i16 %tmp1, 1 + %_tmp1015 = getelementptr %rec1445, %rec1445* %tmp2, i16 1 + %_tmp1017 = getelementptr %rec1445, %rec1445 addrspace(4)* %tmp3, i32 1 + %_tmp1019 = icmp ult i16 %_tmp1013, 24 + br i1 %_tmp1019, label %bb4, label %bb16 + +bb16: + unreachable +}