diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -575,7 +575,7 @@ /// Set up the values of the IVs correctly when exiting the vector loop. void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II, - Value *CountRoundDown, Value *EndValue, + Value *VectorTripCount, Value *EndValue, BasicBlock *MiddleBlock); /// Introduce a conditional branch (on true, condition to be set later) at the @@ -3422,10 +3422,10 @@ Type *StepType = II.getStep()->getType(); Instruction::CastOps CastOp = CastInst::getCastOpcode(VectorTripCount, true, StepType, true); - Value *CRD = B.CreateCast(CastOp, VectorTripCount, StepType, "cast.crd"); + Value *VTC = B.CreateCast(CastOp, VectorTripCount, StepType, "cast.vtc"); const DataLayout &DL = LoopScalarBody->getModule()->getDataLayout(); EndValue = - emitTransformedIndex(B, CRD, PSE.getSE(), DL, II, LoopVectorBody); + emitTransformedIndex(B, VTC, PSE.getSE(), DL, II, LoopVectorBody); EndValue->setName("ind.end"); // Compute the end value for the additional bypass (if applicable). @@ -3433,10 +3433,10 @@ B.SetInsertPoint(&(*AdditionalBypass.first->getFirstInsertionPt())); CastOp = CastInst::getCastOpcode(AdditionalBypass.second, true, StepType, true); - CRD = - B.CreateCast(CastOp, AdditionalBypass.second, StepType, "cast.crd"); + VTC = + B.CreateCast(CastOp, AdditionalBypass.second, StepType, "cast.vtc"); EndValueFromAdditionalBypass = - emitTransformedIndex(B, CRD, PSE.getSE(), DL, II, LoopVectorBody); + emitTransformedIndex(B, VTC, PSE.getSE(), DL, II, LoopVectorBody); EndValueFromAdditionalBypass->setName("ind.end"); } } @@ -3582,7 +3582,7 @@ // value for the IV when arriving directly from the middle block. void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II, - Value *CountRoundDown, Value *EndValue, + Value *VectorTripCount, Value *EndValue, BasicBlock *MiddleBlock) { // There are two kinds of external IV usages - those that use the value // computed in the last iteration (the PHI) and those that use the penultimate @@ -3606,33 +3606,29 @@ // An external user of the penultimate value need to see EndValue - Step. // The simplest way to get this is to recompute it from the constituent SCEVs, - // that is Start + (Step * (CRD - 1)). + // that is Start + (Step * (VTC - 1)). for (User *U : OrigPhi->users()) { auto *UI = cast(U); - if (!OrigLoop->contains(UI)) { - const DataLayout &DL = - OrigLoop->getHeader()->getModule()->getDataLayout(); - assert(isa(UI) && "Expected LCSSA form"); + if (OrigLoop->contains(UI)) + continue; - IRBuilder<> B(MiddleBlock->getTerminator()); + const DataLayout &DL = OrigLoop->getHeader()->getModule()->getDataLayout(); + assert(isa(UI) && "Expected LCSSA form"); - // Fast-math-flags propagate from the original induction instruction. - if (II.getInductionBinOp() && isa(II.getInductionBinOp())) - B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags()); + IRBuilder<> B(MiddleBlock->getTerminator()); - Value *CountMinusOne = B.CreateSub( - CountRoundDown, ConstantInt::get(CountRoundDown->getType(), 1)); - Value *CMO = - !II.getStep()->getType()->isIntegerTy() - ? B.CreateCast(Instruction::SIToFP, CountMinusOne, - II.getStep()->getType()) - : B.CreateSExtOrTrunc(CountMinusOne, II.getStep()->getType()); - CMO->setName("cast.cmo"); - Value *Escape = - emitTransformedIndex(B, CMO, PSE.getSE(), DL, II, LoopVectorBody); - Escape->setName("ind.escape"); - MissingVals[UI] = Escape; - } + Value *CountMinusOne = B.CreateSub( + VectorTripCount, ConstantInt::get(VectorTripCount->getType(), 1)); + Value *CMO = + !II.getStep()->getType()->isIntegerTy() + ? B.CreateCast(Instruction::SIToFP, CountMinusOne, + II.getStep()->getType()) + : B.CreateSExtOrTrunc(CountMinusOne, II.getStep()->getType()); + CMO->setName("cast.cmo"); + Value *Escape = + emitTransformedIndex(B, CMO, PSE.getSE(), DL, II, LoopVectorBody); + Escape->setName("ind.escape"); + MissingVals[UI] = Escape; } for (auto &I : MissingVals) { @@ -8071,8 +8067,7 @@ emitMinimumIterationCountCheck(Lp, LoopScalarPreHeader, false); // Generate the induction variable. - Value *CountRoundDown = getOrCreateVectorTripCount(Lp); - EPI.VectorTripCount = CountRoundDown; + EPI.VectorTripCount = getOrCreateVectorTripCount(Lp); createHeaderBranch(Lp); // Skip induction resume value creation here because they will be created in