diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -580,7 +580,7 @@ void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II, Value *VectorTripCount, Value *EndValue, BasicBlock *MiddleBlock, BasicBlock *VectorHeader, - VPlan &Plan); + VPlan &Plan, VPTransformState &State); /// Handle all cross-iteration phis in the header. void fixCrossIterationPHIs(VPTransformState &State); @@ -3370,7 +3370,8 @@ const InductionDescriptor &II, Value *VectorTripCount, Value *EndValue, BasicBlock *MiddleBlock, - BasicBlock *VectorHeader, VPlan &Plan) { + BasicBlock *VectorHeader, VPlan &Plan, + VPTransformState &State) { // There are two kinds of external IV usages - those that use the value // computed in the last iteration (the PHI) and those that use the penultimate // value (the value that feeds into the phi from the loop latch). @@ -3394,11 +3395,35 @@ // An external user of the penultimate value need to see EndValue - Step. // The simplest way to get this is to recompute it from the constituent SCEVs, // that is Start + (Step * (CRD - 1)). + Value *Step = nullptr; for (User *U : OrigPhi->users()) { auto *UI = cast(U); if (!OrigLoop->contains(UI)) { assert(isa(UI) && "Expected LCSSA form"); + if (!Step) { + // Get the value corresponding to the expansion of the induction step. + // First try to directly convert the SCEV, otherwise look for a + // corresponding VPExpandSCEVRecipe. + if (auto *S = dyn_cast(II.getStep())) + Step = S->getValue(); + else if (auto *S = dyn_cast(II.getStep())) + Step = S->getValue(); + else { + // All steps for induction should have been expanded earlier, so this + // only looks up existing VPValues. + VPBasicBlock *Preheader = Plan.getEntry()->getEntryBasicBlock(); + for (auto &R : *Preheader) { + auto *StepR = dyn_cast(&R); + if (StepR && StepR->getSCEV() == II.getStep()) { + Step = State.get(StepR, 0); + break; + } + } + assert(Step && "need a step here"); + } + } + IRBuilder<> B(MiddleBlock->getTerminator()); // Fast-math-flags propagate from the original induction instruction. @@ -3408,8 +3433,7 @@ Value *CountMinusOne = B.CreateSub( VectorTripCount, ConstantInt::get(VectorTripCount->getType(), 1)); CountMinusOne->setName("cmo"); - Value *Step = CreateStepValue(II.getStep(), *PSE.getSE(), - VectorHeader->getTerminator()); + Value *Escape = emitTransformedIndex(B, CountMinusOne, II.getStartValue(), Step, II); Escape->setName("ind.escape"); @@ -3768,7 +3792,7 @@ fixupIVUsers(Entry.first, Entry.second, getOrCreateVectorTripCount(VectorLoop->getLoopPreheader()), IVEndValues[Entry.first], LoopMiddleBlock, - VectorLoop->getHeader(), Plan); + VectorLoop->getHeader(), Plan, State); } // Fix LCSSA phis not already fixed earlier. Extracts may need to be generated diff --git a/llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll b/llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll --- a/llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll +++ b/llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll @@ -1,9 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck %s -; REQUIRES: asserts -; XFAIL: * - define void @test1_pr58811() { ; CHECK-LABEL: @test1_pr58811( ; CHECK-NEXT: entry: