diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -531,6 +531,17 @@ // generated by fixReduction. PHINode *getReductionResumeValue(const RecurrenceDescriptor &RdxDesc); + /// Create a new phi node for the induction variable \p OrigPhi to resume + /// iteration count in the scalar epilogue, from where the vectorized loop + /// left off. In cases where the loop skeleton is more complicated (eg. + /// epilogue vectorization) and the resume values can come from an additional + /// bypass block, the \p AdditionalBypass pair provides information about the + /// bypass block and the end value on the edge from bypass to this loop. + PHINode *createInductionResumeValue( + PHINode *OrigPhi, const InductionDescriptor &ID, + ArrayRef BypassBlocks, + std::pair AdditionalBypass = {nullptr, nullptr}); + protected: friend class LoopVectorizationPlanner; @@ -3105,14 +3116,76 @@ DT->changeImmediateDominator(LoopExitBlock, LoopMiddleBlock); } +PHINode *InnerLoopVectorizer::createInductionResumeValue( + PHINode *OrigPhi, const InductionDescriptor &II, + ArrayRef BypassBlocks, + std::pair AdditionalBypass) { + Value *VectorTripCount = getOrCreateVectorTripCount(LoopVectorPreHeader); + assert(VectorTripCount && "Expected valid arguments"); + + Instruction *OldInduction = Legal->getPrimaryInduction(); + Value *&EndValue = IVEndValues[OrigPhi]; + Value *EndValueFromAdditionalBypass = AdditionalBypass.second; + if (OrigPhi == OldInduction) { + // We know what the end value is. + EndValue = VectorTripCount; + } else { + IRBuilder<> B(LoopVectorPreHeader->getTerminator()); + + // Fast-math-flags propagate from the original induction instruction. + if (II.getInductionBinOp() && isa(II.getInductionBinOp())) + B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags()); + + Type *StepType = II.getStep()->getType(); + Instruction::CastOps CastOp = + CastInst::getCastOpcode(VectorTripCount, true, StepType, true); + Value *VTC = B.CreateCast(CastOp, VectorTripCount, StepType, "cast.vtc"); + Value *Step = + CreateStepValue(II.getStep(), *PSE.getSE(), &*B.GetInsertPoint()); + EndValue = emitTransformedIndex(B, VTC, II.getStartValue(), Step, II); + EndValue->setName("ind.end"); + + // Compute the end value for the additional bypass (if applicable). + if (AdditionalBypass.first) { + B.SetInsertPoint(&(*AdditionalBypass.first->getFirstInsertionPt())); + CastOp = CastInst::getCastOpcode(AdditionalBypass.second, true, StepType, + true); + Value *Step = + CreateStepValue(II.getStep(), *PSE.getSE(), &*B.GetInsertPoint()); + VTC = B.CreateCast(CastOp, AdditionalBypass.second, StepType, "cast.vtc"); + EndValueFromAdditionalBypass = + emitTransformedIndex(B, VTC, II.getStartValue(), Step, II); + EndValueFromAdditionalBypass->setName("ind.end"); + } + } + + // Create phi nodes to merge from the backedge-taken check block. + PHINode *BCResumeVal = PHINode::Create(OrigPhi->getType(), 3, "bc.resume.val", + LoopScalarPreHeader->getTerminator()); + // Copy original phi DL over to the new one. + BCResumeVal->setDebugLoc(OrigPhi->getDebugLoc()); + + // The new PHI merges the original incoming value, in case of a bypass, + // or the value at the end of the vectorized loop. + BCResumeVal->addIncoming(EndValue, LoopMiddleBlock); + + // Fix the scalar body counter (PHI node). + // The old induction's phi node in the scalar body needs the truncated + // value. + for (BasicBlock *BB : BypassBlocks) + BCResumeVal->addIncoming(II.getStartValue(), BB); + + if (AdditionalBypass.first) + BCResumeVal->setIncomingValueForBlock(AdditionalBypass.first, + EndValueFromAdditionalBypass); + return BCResumeVal; +} + void InnerLoopVectorizer::createInductionResumeValues( std::pair AdditionalBypass) { assert(((AdditionalBypass.first && AdditionalBypass.second) || (!AdditionalBypass.first && !AdditionalBypass.second)) && "Inconsistent information about additional bypass."); - - Value *VectorTripCount = getOrCreateVectorTripCount(LoopVectorPreHeader); - assert(VectorTripCount && "Expected valid arguments"); // We are going to resume the execution of the scalar loop. // Go over all of the induction variables that we found and fix the // PHIs that are left in the scalar version of the loop. @@ -3120,68 +3193,11 @@ // iteration in the vectorized loop. // If we come from a bypass edge then we need to start from the original // start value. - Instruction *OldInduction = Legal->getPrimaryInduction(); for (const auto &InductionEntry : Legal->getInductionVars()) { PHINode *OrigPhi = InductionEntry.first; - InductionDescriptor II = InductionEntry.second; - - Value *&EndValue = IVEndValues[OrigPhi]; - Value *EndValueFromAdditionalBypass = AdditionalBypass.second; - if (OrigPhi == OldInduction) { - // We know what the end value is. - EndValue = VectorTripCount; - } else { - IRBuilder<> B(LoopVectorPreHeader->getTerminator()); - - // Fast-math-flags propagate from the original induction instruction. - if (II.getInductionBinOp() && isa(II.getInductionBinOp())) - B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags()); - - Type *StepType = II.getStep()->getType(); - Instruction::CastOps CastOp = - CastInst::getCastOpcode(VectorTripCount, true, StepType, true); - Value *VTC = B.CreateCast(CastOp, VectorTripCount, StepType, "cast.vtc"); - Value *Step = - CreateStepValue(II.getStep(), *PSE.getSE(), &*B.GetInsertPoint()); - EndValue = emitTransformedIndex(B, VTC, II.getStartValue(), Step, II); - EndValue->setName("ind.end"); - - // Compute the end value for the additional bypass (if applicable). - if (AdditionalBypass.first) { - B.SetInsertPoint(&(*AdditionalBypass.first->getFirstInsertionPt())); - CastOp = CastInst::getCastOpcode(AdditionalBypass.second, true, - StepType, true); - Value *Step = - CreateStepValue(II.getStep(), *PSE.getSE(), &*B.GetInsertPoint()); - VTC = - B.CreateCast(CastOp, AdditionalBypass.second, StepType, "cast.vtc"); - EndValueFromAdditionalBypass = - emitTransformedIndex(B, VTC, II.getStartValue(), Step, II); - EndValueFromAdditionalBypass->setName("ind.end"); - } - } - - // Create phi nodes to merge from the backedge-taken check block. - PHINode *BCResumeVal = - PHINode::Create(OrigPhi->getType(), 3, "bc.resume.val", - LoopScalarPreHeader->getTerminator()); - // Copy original phi DL over to the new one. - BCResumeVal->setDebugLoc(OrigPhi->getDebugLoc()); - - // The new PHI merges the original incoming value, in case of a bypass, - // or the value at the end of the vectorized loop. - BCResumeVal->addIncoming(EndValue, LoopMiddleBlock); - - // Fix the scalar body counter (PHI node). - // The old induction's phi node in the scalar body needs the truncated - // value. - for (BasicBlock *BB : LoopBypassBlocks) - BCResumeVal->addIncoming(II.getStartValue(), BB); - - if (AdditionalBypass.first) - BCResumeVal->setIncomingValueForBlock(AdditionalBypass.first, - EndValueFromAdditionalBypass); - + const InductionDescriptor &II = InductionEntry.second; + PHINode *BCResumeVal = createInductionResumeValue( + OrigPhi, II, LoopBypassBlocks, AdditionalBypass); OrigPhi->setIncomingValueForBlock(LoopScalarPreHeader, BCResumeVal); } }