diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -1296,11 +1296,20 @@ } bool LoopVectorizationLegality::prepareToFoldTailByMasking() { - LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n"); - SmallPtrSet ReductionLiveOuts; + for (auto &Induction : getInductionVars()) + if (any_of(Induction.first->users(), [&](User *U) { + return !TheLoop->contains(cast(U)); + })) { + LLVM_DEBUG( + dbgs() + << "LV: Cannot fold tail by masking, loop has an outside user for " + << *Induction.first << "\n"); + return false; + } + SmallPtrSet ReductionLiveOuts; for (auto &Reduction : getReductionVars()) ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr()); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3321,10 +3321,12 @@ Value *PostInc = OrigPhi->getIncomingValueForBlock(OrigLoop->getLoopLatch()); for (User *U : PostInc->users()) { Instruction *UI = cast(U); - if (!OrigLoop->contains(UI)) { - assert(isa(UI) && "Expected LCSSA form"); - MissingVals[UI] = EndValue; - } + if (OrigLoop->contains(UI)) + continue; + + assert(isa(UI) && "Expected LCSSA form"); + assert(!Cost->foldTailByMasking() && "Unexpected external use of IV"); + MissingVals[UI] = EndValue; } // An external user of the penultimate value need to see EndValue - Step. @@ -3334,6 +3336,7 @@ auto *UI = cast(U); if (!OrigLoop->contains(UI)) { assert(isa(UI) && "Expected LCSSA form"); + assert(!Cost->foldTailByMasking() && "Unexpected external use of IV"); IRBuilder<> B(MiddleBlock->getTerminator()); diff --git a/llvm/test/Transforms/LoopVectorize/pr52335.ll b/llvm/test/Transforms/LoopVectorize/pr52335.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/pr52335.ll @@ -0,0 +1,45 @@ +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S | FileCheck %s + +define dso_local i32 @test(i32* nocapture %arr, i64 %n) { +; The vectorizer should refuse to fold the tail by masking because +; %conv is used outside of the loop. Test for this by checking that +; %n.vec, the vector trip count, is rounded down to the next multiple of +; 4. If folding the tail, it would have been rounded up instead. +; +; CHECK-LABEL: vector.ph: +; CHECK-NEXT: %n.mod.vf = urem i64 [[TC:%.*]], 4 +; CHECK-NEXT: %n.vec = sub i64 [[TC]], %n.mod.vf +entry: + %cmp1 = icmp ugt i64 %n, 1 + br i1 %cmp1, label %preheader, label %done + +preheader: + br label %loop + +loop: + %conv = phi i64 [ %conv2, %loop ], [ 1, %preheader ] + %i = phi i8 [ %inc, %loop ], [ 1, %preheader ] + %sub = add nsw i64 %conv, -1 + %ptr = getelementptr inbounds i32, i32* %arr, i64 %sub + store i32 65, i32* %ptr, align 4 + %inc = add i8 %i, 1 + %conv2 = zext i8 %inc to i64 + %cmp2 = icmp ult i64 %conv2, %n + br i1 %cmp2, label %loop, label %load_val, !llvm.loop !0 + +load_val: + %final = phi i64 [ %conv, %loop ] + %ptr2 = getelementptr inbounds i32, i32* %arr, i64 %final + %val = load i32, i32* %ptr2, align 4 + br label %done + +done: + %value = phi i32 [ %val, %load_val ], [ 0, %entry ] + ret i32 %value + +} + +!0 = distinct !{!0, !1, !2, !3} +!1 = !{!"llvm.loop.unroll.disable"} +!2 = !{!"llvm.loop.vectorize.predicate.enable", i1 true} +!3 = !{!"llvm.loop.vectorize.enable", i1 true}