diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3914,7 +3914,7 @@ // a Select choosing between the vectorized LoopExitInst and vectorized Phi, // instead of the former. For an inloop reduction the reduction will already // be predicated, and does not need to be handled here. - if (Cost->foldTailByMasking() && !PhiR->isInLoop()) { + if (PhiR->getParent()->getPlan()->hasTailFolded() && !PhiR->isInLoop()) { for (unsigned Part = 0; Part < UF; ++Part) { Value *VecLoopExitInst = State.get(LoopExitInstDef, Part); SelectInst *Sel = nullptr; @@ -9256,7 +9256,7 @@ // If tail is folded by masking, introduce selects between the phi // and the live-out instruction of each reduction, at the beginning of the // dedicated latch block. - if (CM.foldTailByMasking()) { + if (Plan->hasTailFolded()) { Builder.setInsertPoint(LatchVPBB, LatchVPBB->begin()); for (VPRecipeBase &R : Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2653,6 +2653,9 @@ VPBasicBlock *getPreheader() { return Preheader; } const VPBasicBlock *getPreheader() const { return Preheader; } + /// Returns true if the tail loop is folded in the VPlan. + bool hasTailFolded(); + private: /// Add to the given dominator tree the header block and every new basic block /// that was created between it and the latch block, inclusive. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -845,6 +845,32 @@ LiveOuts.insert({PN, new VPLiveOut(PN, V)}); } +bool VPlan::hasTailFolded() { + VPBasicBlock *HeaderVPBB = getVectorLoopRegion()->getEntryBasicBlock(); + return any_of(*HeaderVPBB, [this](VPRecipeBase &R) { + // A widened canonical IV or active-lane-mask PHI imply the tail being + // folded. + if (isa(&R) || isa(&R)) + return true; + + auto *VPI = dyn_cast(&R); + if (!VPI) + return false; + if (VPI->getOpcode() == VPInstruction::ActiveLaneMask) + return true; + if (VPI->getOpcode() != VPInstruction::ICmpULE) + return false; + auto IsWidenCanonicalIV = [](const VPRecipeBase &R) { + if (auto *Ind = dyn_cast(&R)) + return Ind->isCanonical(); + return isa(&R); + }; + if (IsWidenCanonicalIV(*VPI->getOperand(0)->getDefiningRecipe()) && + VPI->getOperand(1) == getOrCreateBackedgeTakenCount()) + return true; + }); +} + void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopHeaderBB, BasicBlock *LoopLatchBB, BasicBlock *LoopExitBB) {