diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1594,11 +1594,6 @@ return InLoopReductionChains; } - /// Returns true if the Phi is part of an inloop reduction. - bool isInLoopReduction(PHINode *Phi) const { - return InLoopReductionChains.count(Phi); - } - /// Estimate cost of an intrinsic call instruction CI if it were vectorized /// with factor VF. Return the cost of the instruction, including /// scalarization overhead if it's needed. @@ -4308,7 +4303,7 @@ TrackingVH ReductionStartValue = RdxDesc.getRecurrenceStartValue(); Instruction *LoopExitInst = RdxDesc.getLoopExitInstr(); setDebugLocFromInst(Builder, ReductionStartValue); - bool IsInLoopReductionPhi = Cost->isInLoopReduction(OrigPhi); + bool IsInLoopReductionPhi = PhiR->isInLoopReduction(); VPValue *LoopExitInstDef = State.Plan->getVPValue(LoopExitInst); // This is the vector-clone of the value that leaves the loop. @@ -4723,8 +4718,8 @@ // this value when we vectorize all of the instructions that use the PHI. if (RdxDesc || Legal->isFirstOrderRecurrence(P)) { Value *Iden = nullptr; - bool ScalarPHI = - (State.VF.isScalar()) || Cost->isInLoopReduction(cast(PN)); + bool IsInLoopReduction = PhiR->isInLoopReduction(); + bool ScalarPHI = State.VF.isScalar() || IsInLoopReduction; Type *VecTy = ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), State.VF); @@ -4758,8 +4753,7 @@ } } - bool IsOrdered = State.VF.isVector() && - Cost->isInLoopReduction(cast(PN)) && + bool IsOrdered = State.VF.isVector() && IsInLoopReduction && useOrderedReductions(*RdxDesc); for (unsigned Part = 0; Part < State.UF; ++Part) { @@ -9174,16 +9168,32 @@ // Finally, if tail is folded by masking, introduce selects between the phi // and the live-out instruction of each reduction, at the end of the latch. - if (CM.foldTailByMasking() && !Legal->getReductionVars().empty()) { - Builder.setInsertPoint(VPBB); - auto *Cond = RecipeBuilder.createBlockInMask(OrigLoop->getHeader(), Plan); - for (auto &Reduction : Legal->getReductionVars()) { - if (CM.isInLoopReduction(Reduction.first)) + if (CM.foldTailByMasking()) { + VPValue *Cond = nullptr; + unsigned NumRegularReductions = 0; + for (VPRecipeBase &R : Plan->getEntry()->getEntryBasicBlock()->phis()) { + VPWidenPHIRecipe *PhiR = dyn_cast(&R); + if (!PhiR || !PhiR->getRecurrenceDescriptor() || + PhiR->isInLoopReduction()) continue; - VPValue *Phi = Plan->getOrAddVPValue(Reduction.first); - VPValue *Red = Plan->getOrAddVPValue(Reduction.second.getLoopExitInstr()); + + if (!Cond) { + Builder.setInsertPoint(VPBB); + Cond = RecipeBuilder.createBlockInMask(OrigLoop->getHeader(), Plan); + } + VPValue *Phi = PhiR; + VPValue *Red = Plan->getOrAddVPValue( + PhiR->getRecurrenceDescriptor()->getLoopExitInstr()); Builder.createNaryOp(Instruction::Select, {Cond, Red, Phi}); +#ifndef NDEBUG + NumRegularReductions++; +#endif } + (void)NumRegularReductions; + assert(!Range.Start.isVector() || + NumRegularReductions == Legal->getReductionVars().size() - + CM.getInLoopReductionChains().size() && + "failed to identify an in-loop reduction in VPlan"); } std::string PlanName; diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1048,6 +1048,12 @@ return getOperand(1); } + const VPValue *getBackedgeValue() const { + assert(RdxDesc && "second incoming value is only guaranteed to be backedge " + "value for reductions"); + return getOperand(1); + } + /// Adds a pair (\p IncomingV, \p IncomingBlock) to the phi. void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock) { addOperand(IncomingV); @@ -1061,6 +1067,9 @@ VPBasicBlock *getIncomingBlock(unsigned I) { return IncomingBlocks[I]; } RecurrenceDescriptor *getRecurrenceDescriptor() { return RdxDesc; } + + /// Returns true if this phi is part of an in-loop reduction. + bool isInLoopReduction() const; }; /// A recipe for vectorizing a phi-node as a sequence of mask-based select diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -1073,6 +1073,30 @@ printOperands(O, SlotTracker); } +bool VPWidenPHIRecipe::isInLoopReduction() const { + if (!RdxDesc) + return false; + + SmallVector WorkList; + // Walk backwards the backedge value until we either reach a VPReductionRecipe + // or a phi. + WorkList.push_back(cast(getBackedgeValue()->getDef())); + while (!WorkList.empty()) { + const VPRecipeBase *Current = WorkList.pop_back_val(); + if (Current->isPhi()) + continue; + if (isa(Current)) + return true; + for (VPValue *Op : Current->operands()) { + if (!Op->getDef()) + continue; + if (auto *R = dyn_cast(Op->getDef())) + WorkList.push_back(R); + } + } + return false; +} + void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << Indent << "BLEND "; diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -172,6 +172,7 @@ void replaceAllUsesWith(VPValue *New); VPDef *getDef() { return Def; } + const VPDef *getDef() const { return Def; } /// Returns the underlying IR value, if this VPValue is defined outside the /// scope of VPlan. Returns nullptr if the VPValue is defined by a VPDef