diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -2561,6 +2561,11 @@ if (!Ptr) return; + // If the pointer is invariant then there is no stride and it makes no + // sense to add it here. + if (TheLoop->isLoopInvariant(Ptr)) + return; + Value *Stride = getStrideFromPointer(Ptr, PSE->getSE(), TheLoop); if (!Stride) return; diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -1157,6 +1157,12 @@ Value *Ptr = getLoadStorePointerOperand(&I); if (!Ptr) continue; + + // If the pointer is invariant then there is no stride and it makes no + // sense to add it here. + if (TheLoop->isLoopInvariant(Ptr)) + continue; + Type *ElementTy = getLoadStoreType(&I); // Currently, codegen doesn't support cases where the type size doesn't diff --git a/llvm/test/Transforms/LoopVectorize/vector-no-scevcheck.ll b/llvm/test/Transforms/LoopVectorize/vector-no-scevcheck.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/vector-no-scevcheck.ll @@ -0,0 +1,73 @@ +; REQUIRES: aarch64-registered-target +; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -S < %s | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +define void @foo(ptr %pout, ptr readonly %pin, ptr readonly %dims1, ptr readonly %dims2) { +; CHECK-LABEL: @foo( +; CHECK-NOT: vector.scevcheck +; CHECK: vector.body +for.cond1.preheader.lr.ph: + %arrayidx7 = getelementptr inbounds i64, ptr %dims1, i64 1 + %arrayidx11 = getelementptr inbounds i64, ptr %dims2, i64 1 + %0 = load i64, ptr %arrayidx7, align 8, !tbaa !2 + %cmp845 = icmp sgt i64 %0, 0 + br label %for.cond6.preheader.lr.ph + +for.cond6.preheader.lr.ph: ; preds = %for.cond.cleanup4, %for.cond1.preheader.lr.ph + %1 = phi i64 [ 0, %for.cond1.preheader.lr.ph ], [ %inc26, %for.cond.cleanup4 ] + %2 = getelementptr double, ptr %pin, i64 %1 + br i1 %cmp845, label %for.cond6.preheader.us.preheader, label %for.cond.cleanup4 + +for.cond6.preheader.us.preheader: ; preds = %for.cond6.preheader.lr.ph + %.pre = load i64, ptr %arrayidx11, align 8, !tbaa !2 + %mul14.us = mul nsw i64 %1, %.pre + %3 = getelementptr double, ptr %pin, i64 %mul14.us + br label %for.cond6.preheader.us + +for.cond6.preheader.us: ; preds = %for.cond6.for.cond.cleanup9_crit_edge.us, %for.cond6.preheader.us.preheader + %i.048.us = phi i64 [ %inc23.us, %for.cond6.for.cond.cleanup9_crit_edge.us ], [ 0, %for.cond6.preheader.us.preheader ] + %mul.us = mul nsw i64 %i.048.us, %.pre + %arrayidx12.us = getelementptr inbounds double, ptr %2, i64 %mul.us + %mul19.us = mul nsw i64 %i.048.us, %0 + br label %for.body10.us + +for.body10.us: ; preds = %for.cond6.preheader.us, %for.body10.us + %j.046.us = phi i64 [ 0, %for.cond6.preheader.us ], [ %inc.us, %for.body10.us ] + %4 = load double, ptr %arrayidx12.us, align 8, !tbaa !6 + %arrayidx16.us = getelementptr inbounds double, ptr %3, i64 %j.046.us + %5 = load double, ptr %arrayidx16.us, align 8, !tbaa !6 + %add17.us = fadd contract double %4, %5 + %6 = getelementptr double, ptr %pout, i64 %mul19.us + %arrayidx21.us = getelementptr inbounds double, ptr %6, i64 %j.046.us + store double %add17.us, ptr %arrayidx21.us, align 8, !tbaa !6 + %inc.us = add nuw nsw i64 %j.046.us, 1 + %cmp8.us = icmp slt i64 %inc.us, %0 + br i1 %cmp8.us, label %for.body10.us, label %for.cond6.for.cond.cleanup9_crit_edge.us + +for.cond6.for.cond.cleanup9_crit_edge.us: ; preds = %for.body10.us + %inc23.us = add nuw nsw i64 %i.048.us, 1 + %exitcond = icmp eq i64 %inc23.us, %0 + br i1 %exitcond, label %for.cond.cleanup4, label %for.cond6.preheader.us + +for.cond.cleanup4: ; preds = %for.cond6.for.cond.cleanup9_crit_edge.us, %for.cond6.preheader.lr.ph + %inc26 = add nuw nsw i64 %1, 1 + %cmp = icmp slt i64 %inc26, %0 + br i1 %cmp, label %for.cond6.preheader.lr.ph, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup4 + ret void +} + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3, !3, i64 0} +!3 = !{!"long", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C++ TBAA"} +!6 = !{!7, !7, i64 0} +!7 = !{!"double", !4, i64 0}