diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -253,6 +253,8 @@ return {}; } + const Loop *getInnermostLoop() const { return InnermostLoop; } + private: /// A wrapper around ScalarEvolution, used to add runtime SCEV checks, and /// applies dynamic knowledge to simplify SCEV expressions and convert them diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -281,7 +281,8 @@ auto *SrcAR = dyn_cast(Src->Expr); auto *SinkAR = dyn_cast(Sink->Expr); - if (!SrcAR || !SinkAR) { + if (!SrcAR || !SinkAR || SrcAR->getLoop() != DC.getInnermostLoop() || + SinkAR->getLoop() != DC.getInnermostLoop()) { CanUseDiffCheck = false; return; } diff --git a/llvm/test/Transforms/LoopVectorize/nested-loop.ll b/llvm/test/Transforms/LoopVectorize/nested-loop.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/nested-loop.ll @@ -0,0 +1,35 @@ +; RUN: opt -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S %s -o - | FileCheck %s +; CHECK: vector.memcheck: +; CHECK-NEXT: %bound0 = icmp ult ptr +; CHECK-NEXT: %bound1 = icmp ult ptr +; CHECK-NEXT: %found.conflict = and i1 %bound0, %bound1 + +define dso_local void @array_magick(ptr nocapture noundef %a, ptr nocapture noundef readonly %b, i64 noundef %len) local_unnamed_addr { +entry: + br label %outer.header + +outer.header: ; preds = %entry, %outer.latch + %iv28 = phi i64 [ %iv.next29, %outer.latch ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv28 + %.pre = load i32, ptr %arrayidx, align 4 + br label %inner.body + +inner.body: ; preds = %outer.header, %inner.body + %0 = phi i32 [ %.pre, %outer.header ], [ %sub, %inner.body ] + %iv = phi i64 [ 0, %outer.header ], [ %iv.next, %inner.body ] + %arrayidx8 = getelementptr inbounds i32, ptr %b, i64 %iv + %1 = load i32, ptr %arrayidx8, align 4 + %sub = sub i32 %0, %1 + store i32 %sub, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %len + br i1 %exitcond.not, label %outer.latch, label %inner.body + +outer.latch: ; preds = %inner.body + %iv.next29 = add nuw nsw i64 %iv28, 1 + %exitcond31.not = icmp eq i64 %iv.next29, %len + br i1 %exitcond31.not, label %exit, label %outer.header + +exit: ; preds = %outer.latch + ret void +}