Index: include/llvm/Analysis/LoopAccessAnalysis.h =================================================================== --- include/llvm/Analysis/LoopAccessAnalysis.h +++ include/llvm/Analysis/LoopAccessAnalysis.h @@ -653,7 +653,9 @@ Value *Ptr, Value *OrigPtr = nullptr); /// \brief If the pointer has a constant stride return it in units of its -/// element size. Otherwise return zero. +/// element size. Otherwise return zero. If loop \p Lp is nullptr +/// then strided loop is obtained within the function, +/// else stride is computed with regard to the specified loop \p Lp. /// /// Ensure that it does not wrap in the address space, assuming the predicate /// associated with \p PSE is true. @@ -662,7 +664,7 @@ /// to \p PtrToStride and therefore add further predicates to \p PSE. /// The \p Assume parameter indicates if we are allowed to make additional /// run-time assumptions. -int isStridedPtr(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, +int getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap = ValueToValueMap(), bool Assume = false); Index: lib/Analysis/LoopAccessAnalysis.cpp =================================================================== --- lib/Analysis/LoopAccessAnalysis.cpp +++ lib/Analysis/LoopAccessAnalysis.cpp @@ -583,7 +583,7 @@ // When we run after a failing dependency check we have to make sure // we don't have wrapping pointers. (!ShouldCheckStride || - isStridedPtr(PSE, Ptr, TheLoop, StridesMap) == 1)) { + getPtrStride(PSE, Ptr, /*Lp = */nullptr, StridesMap) == 1)) { // The id of the dependence set. unsigned DepId; @@ -832,7 +832,7 @@ } /// \brief Check whether the access through \p Ptr has a constant stride. -int llvm::isStridedPtr(PredicatedScalarEvolution &PSE, Value *Ptr, +int llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap, bool Assume) { Type *Ty = Ptr->getType(); @@ -858,12 +858,17 @@ return 0; } - // The accesss function must stride over the innermost loop. - if (Lp != AR->getLoop()) { - DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop " << - *Ptr << " SCEV: " << *AR << "\n"); - return 0; - } + // Check whether we should get stride with regard to specified loop. + if (Lp) { + // Make sure Ptr is strided over Lp. + if (PSE.getSE()->isLoopInvariant(PtrScev, Lp)) { + DEBUG(dbgs() << "LAA: Bad stride - Not striding over provided loop " << + *Ptr << " SCEV: " << *AR << "\n"); + return 0; + } + } else + // Obtain the loop over which access function is strided. + Lp = AR->getLoop(); // The address calculation must not wrap. Otherwise, a dependence could be // inverted. @@ -1161,8 +1166,8 @@ const SCEV *AScev = replaceSymbolicStrideSCEV(PSE, Strides, APtr); const SCEV *BScev = replaceSymbolicStrideSCEV(PSE, Strides, BPtr); - int StrideAPtr = isStridedPtr(PSE, APtr, InnermostLoop, Strides, true); - int StrideBPtr = isStridedPtr(PSE, BPtr, InnermostLoop, Strides, true); + int StrideAPtr = getPtrStride(PSE, APtr, InnermostLoop, Strides, true); + int StrideBPtr = getPtrStride(PSE, BPtr, InnermostLoop, Strides, true); const SCEV *Src = AScev; const SCEV *Sink = BScev; @@ -1615,7 +1620,7 @@ // read a few words, modify, and write a few words, and some of the // words may be written to the same address. bool IsReadOnlyPtr = false; - if (Seen.insert(Ptr).second || !isStridedPtr(PSE, Ptr, TheLoop, Strides)) { + if (Seen.insert(Ptr).second || !getPtrStride(PSE, Ptr, TheLoop, Strides)) { ++NumReads; IsReadOnlyPtr = true; } Index: lib/Transforms/Scalar/LoopLoadElimination.cpp =================================================================== --- lib/Transforms/Scalar/LoopLoadElimination.cpp +++ lib/Transforms/Scalar/LoopLoadElimination.cpp @@ -77,8 +77,8 @@ // Currently we only support accesses with unit stride. FIXME: we should be // able to handle non unit stirde as well as long as the stride is equal to // the dependence distance. - if (isStridedPtr(PSE, LoadPtr, L) != 1 || - isStridedPtr(PSE, LoadPtr, L) != 1) + if (getPtrStride(PSE, LoadPtr, L) != 1 || + getPtrStride(PSE, LoadPtr, L) != 1) return false; auto &DL = Load->getParent()->getModule()->getDataLayout(); Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4999,7 +4999,7 @@ StoreInst *SI = dyn_cast(I); Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand(); - int Stride = isStridedPtr(PSE, Ptr, TheLoop, Strides); + int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides); // The factor of the corresponding interleave group. unsigned Factor = std::abs(Stride); Index: test/Analysis/LoopAccessAnalysis/multiple-strides-rt-memory-checks.ll =================================================================== --- test/Analysis/LoopAccessAnalysis/multiple-strides-rt-memory-checks.ll +++ test/Analysis/LoopAccessAnalysis/multiple-strides-rt-memory-checks.ll @@ -0,0 +1,46 @@ +; This is the test case from PR26314. +; RUN: opt -loop-accesses -analyze -S < %s | FileCheck %s +; CHECK: function 'Test': +; CHECK: .inner: +; CHECK-NEXT: Memory dependences are safe +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK: Check 0: +; CHECK: Check 1: + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.s = type { [32 x i32], [32 x i32], [32 x [32 x i32]] } + +define void @Test(%struct.s* nocapture %obj) #0 { + br label %.outer.preheader + + +.outer.preheader: + %i = phi i64 [ 0, %0 ], [ %i.next, %.outer ] + %1 = getelementptr inbounds %struct.s, %struct.s* %obj, i64 0, i32 1, i64 %i + br label %.inner + +.exit: + ret void + +.outer: + %i.next = add nuw nsw i64 %i, 1 + %exitcond.outer = icmp eq i64 %i.next, 32 + br i1 %exitcond.outer, label %.exit, label %.outer.preheader + +.inner: + %j = phi i64 [ 0, %.outer.preheader ], [ %j.next, %.inner ] + %2 = getelementptr inbounds %struct.s, %struct.s* %obj, i64 0, i32 0, i64 %j + %3 = load i32, i32* %2 + %4 = load i32, i32* %1 + %5 = add nsw i32 %4, %3 + %6 = getelementptr inbounds %struct.s, %struct.s* %obj, i64 0, i32 2, i64 %i, i64 %j + %7 = load i32, i32* %6 + %8 = add nsw i32 %5, %7 + store i32 %8, i32* %6 + %j.next = add nuw nsw i64 %j, 1 + %exitcond.inner = icmp eq i64 %j.next, 32 + br i1 %exitcond.inner, label %.outer, label %.inner +} Index: test/Transforms/LoopVectorize/multiple-strides-vectorization.ll =================================================================== --- test/Transforms/LoopVectorize/multiple-strides-vectorization.ll +++ test/Transforms/LoopVectorize/multiple-strides-vectorization.ll @@ -0,0 +1,43 @@ +; This is the test case from PR26314. +; RUN: opt -loop-vectorize -S < %s | FileCheck %s +; CHECK-LABEL: Test +; CHECK: <4 x i64> +; CHECK: <4 x i32>, <4 x i32> +; CHECK: llvm.loop.vectorize.width + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.s = type { [32 x i32], [32 x i32], [32 x [32 x i32]] } + +define void @Test(%struct.s* nocapture %obj) #0 { + br label %.outer.preheader + + +.outer.preheader: + %i = phi i64 [ 0, %0 ], [ %i.next, %.outer ] + %1 = getelementptr inbounds %struct.s, %struct.s* %obj, i64 0, i32 1, i64 %i + br label %.inner + +.exit: + ret void + +.outer: + %i.next = add nuw nsw i64 %i, 1 + %exitcond.outer = icmp eq i64 %i.next, 32 + br i1 %exitcond.outer, label %.exit, label %.outer.preheader + +.inner: + %j = phi i64 [ 0, %.outer.preheader ], [ %j.next, %.inner ] + %2 = getelementptr inbounds %struct.s, %struct.s* %obj, i64 0, i32 0, i64 %j + %3 = load i32, i32* %2 + %4 = load i32, i32* %1 + %5 = add nsw i32 %4, %3 + %6 = getelementptr inbounds %struct.s, %struct.s* %obj, i64 0, i32 2, i64 %i, i64 %j + %7 = load i32, i32* %6 + %8 = add nsw i32 %5, %7 + store i32 %8, i32* %6 + %j.next = add nuw nsw i64 %j, 1 + %exitcond.inner = icmp eq i64 %j.next, 32 + br i1 %exitcond.inner, label %.outer, label %.inner +}