diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1270,7 +1270,7 @@ void collectElementTypesForWidening(); /// Split reductions into those that happen in the loop, and those that happen - /// outside. In loop reductions are collected into InLoopReductionChains. + /// outside. In loop reductions are collected into InLoopReductions. void collectInLoopReductions(); /// Returns true if we should use strict in-order reductions for the given @@ -1606,20 +1606,9 @@ return foldTailByMasking() || Legal->blockNeedsPredication(BB); } - /// A SmallMapVector to store the InLoop reduction op chains, mapping phi - /// nodes to the chain of instructions representing the reductions. Uses a - /// MapVector to ensure deterministic iteration order. - using ReductionChainMap = - SmallMapVector, 4>; - - /// Return the chain of instructions representing an inloop reduction. - const ReductionChainMap &getInLoopReductionChains() const { - return InLoopReductionChains; - } - /// Returns true if the Phi is part of an inloop reduction. bool isInLoopReduction(PHINode *Phi) const { - return InLoopReductionChains.count(Phi); + return InLoopReductions.count(Phi); } /// Estimate cost of an intrinsic call instruction CI if it were vectorized @@ -1786,7 +1775,7 @@ /// PHINodes of the reductions that should be expanded in-loop along with /// their associated chains of reduction operations, in program order from top /// (PHI) to bottom - ReductionChainMap InLoopReductionChains; + SmallPtrSet InLoopReductions; /// A Map of inloop reduction operations and their immediate chain operand. /// FIXME: This can be removed once reductions can be costed correctly in @@ -6637,7 +6626,7 @@ Instruction *I, ElementCount VF, Type *Ty, TTI::TargetCostKind CostKind) { using namespace llvm::PatternMatch; // Early exit for no inloop reductions - if (InLoopReductionChains.empty() || VF.isScalar() || !isa(Ty)) + if (InLoopReductions.empty() || VF.isScalar() || !isa(Ty)) return std::nullopt; auto *VectorTy = cast(Ty); @@ -7487,8 +7476,9 @@ SmallVector ReductionOperations = RdxDesc.getReductionOpChain(Phi, TheLoop); bool InLoop = !ReductionOperations.empty(); + if (InLoop) { - InLoopReductionChains[Phi] = ReductionOperations; + InLoopReductions.insert(Phi); // Add the elements to InLoopReductionImmediateChains for cost modelling. Instruction *LastChain = Phi; for (auto *I : ReductionOperations) { @@ -8880,24 +8870,6 @@ // process after constructing the initial VPlan. // --------------------------------------------------------------------------- - for (const auto &Reduction : CM.getInLoopReductionChains()) { - PHINode *Phi = Reduction.first; - RecurKind Kind = - Legal->getReductionVars().find(Phi)->second.getRecurrenceKind(); - const SmallVector &ReductionOperations = Reduction.second; - - RecipeBuilder.recordRecipeOf(Phi); - for (const auto &R : ReductionOperations) { - RecipeBuilder.recordRecipeOf(R); - // For min/max reductions, where we have a pair of icmp/select, we also - // need to record the ICmp recipe, so it can be removed later. - assert(!RecurrenceDescriptor::isSelectCmpRecurrenceKind(Kind) && - "Only min/max recurrences allowed for inloop reductions"); - if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) - RecipeBuilder.recordRecipeOf(cast(R->getOperand(0))); - } - } - // For each interleave group which is relevant for this (possibly trimmed) // Range, add it to the set of groups to be later applied to the VPlan and add // placeholders for its members' Recipes which we'll be replacing with a @@ -9177,25 +9149,49 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( VPBasicBlock *LatchVPBB, VPlanPtr &Plan, VPRecipeBuilder &RecipeBuilder, ElementCount MinVF) { - for (const auto &Reduction : CM.getInLoopReductionChains()) { - PHINode *Phi = Reduction.first; - const RecurrenceDescriptor &RdxDesc = - Legal->getReductionVars().find(Phi)->second; - const SmallVector &ReductionOperations = Reduction.second; + SmallVector Phis; + for (VPRecipeBase &R : + Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) + Phis.push_back(&R); + + for (VPRecipeBase *R : Phis) { + auto *PhiR = dyn_cast(R); + if (!PhiR || !PhiR->isInLoop()) + continue; + const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); - if (MinVF.isScalar() && !CM.useOrderedReductions(RdxDesc)) + if (MinVF.isScalar() && !PhiR->isOrdered()) continue; + SmallVector Worklist; + SmallPtrSet Visited; + Worklist.push_back(PhiR); + Visited.insert(PhiR); + + for (unsigned I = 0; I != Worklist.size(); ++I) { + VPValue *Cur = Worklist[I]; + for (VPUser *U : Cur->users()) { + auto *UserRecipe = dyn_cast(U); + if (!UserRecipe) + continue; + for (VPValue *V : UserRecipe->definedValues()) + if (Visited.insert(V).second) + Worklist.push_back(V); + } + } + // ReductionOperations are orders top-down from the phi's use to the // LoopExitValue. We keep a track of the previous item (the Chain) to tell // which of the two operands will remain scalar and which will be reduced. // For minmax the chain will be the select instructions. - Instruction *Chain = Phi; - for (Instruction *R : ReductionOperations) { - VPRecipeBase *WidenRecipe = RecipeBuilder.getRecipe(R); + VPRecipeBase *Chain = PhiR; + for (unsigned I = 1; I != Worklist.size(); ++I) { + VPValue *ChainOp = Chain->getVPSingleValue(); + VPRecipeBase *WidenRecipe = Worklist[I]->getDefiningRecipe(); + RecurKind Kind = RdxDesc.getRecurrenceKind(); + Instruction *R = WidenRecipe->getUnderlyingInstr(); - VPValue *ChainOp = Plan->getVPValue(Chain); unsigned FirstOpId; assert(!RecurrenceDescriptor::isSelectCmpRecurrenceKind(Kind) && "Only min/max recurrences allowed for inloop reductions"); @@ -9204,6 +9200,8 @@ assert((!IsFMulAdd || RecurrenceDescriptor::isFMulAddIntrinsic(R)) && "Expected instruction to be a call to the llvm.fmuladd intrinsic"); if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) { + if (!isa(WidenRecipe)) + continue; assert(isa(WidenRecipe) && "Expected to replace a VPWidenSelectSC"); FirstOpId = 1; @@ -9213,50 +9211,55 @@ "Expected to replace a VPWidenSC"); FirstOpId = 0; } - unsigned VecOpId = - R->getOperand(FirstOpId) == Chain ? FirstOpId + 1 : FirstOpId; - VPValue *VecOp = Plan->getVPValue(R->getOperand(VecOpId)); + unsigned VecOpId = WidenRecipe->getOperand(FirstOpId) == ChainOp + ? FirstOpId + 1 + : FirstOpId; + VPValue *VecOp = WidenRecipe->getOperand(VecOpId); + BasicBlock *BB = WidenRecipe->getUnderlyingInstr()->getParent(); VPValue *CondOp = nullptr; - if (CM.blockNeedsPredicationForAnyReason(R->getParent())) { + if (CM.blockNeedsPredicationForAnyReason(BB)) { VPBuilder::InsertPointGuard Guard(Builder); Builder.setInsertPoint(WidenRecipe->getParent(), WidenRecipe->getIterator()); CondOp = RecipeBuilder.createBlockInMask(R->getParent(), *Plan); } + Chain = WidenRecipe; if (IsFMulAdd) { // If the instruction is a call to the llvm.fmuladd intrinsic then we // need to create an fmul recipe to use as the vector operand for the // fadd reduction. VPInstruction *FMulRecipe = new VPInstruction( - Instruction::FMul, {VecOp, Plan->getVPValue(R->getOperand(1))}); + Instruction::FMul, {VecOp, WidenRecipe->getOperand(1)}); FMulRecipe->setFastMathFlags(R->getFastMathFlags()); WidenRecipe->getParent()->insert(FMulRecipe, WidenRecipe->getIterator()); VecOp = FMulRecipe; } VPReductionRecipe *RedRecipe = - new VPReductionRecipe(&RdxDesc, R, ChainOp, VecOp, CondOp, &TTI); - WidenRecipe->getVPSingleValue()->replaceAllUsesWith(RedRecipe); - Plan->removeVPValueFor(R); - Plan->addVPValue(R, RedRecipe); + new VPReductionRecipe(&RdxDesc, WidenRecipe->getUnderlyingInstr(), + ChainOp, VecOp, CondOp, &TTI); + VPRecipeBase *CompareRecipe = + WidenRecipe->getOperand(0)->getDefiningRecipe(); + // Append the recipe to the end of the VPBasicBlock because we need to // ensure that it comes after all of it's inputs, including CondOp. WidenRecipe->getParent()->appendRecipe(RedRecipe); WidenRecipe->getVPSingleValue()->replaceAllUsesWith(RedRecipe); WidenRecipe->eraseFromParent(); + Chain = RedRecipe; if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) { - VPRecipeBase *CompareRecipe = - RecipeBuilder.getRecipe(cast(R->getOperand(0))); assert(isa(CompareRecipe) && "Expected to replace a VPWidenSC"); assert(cast(CompareRecipe)->getNumUsers() == 0 && "Expected no remaining users"); CompareRecipe->eraseFromParent(); } - Chain = R; + + if (Chain == PhiR) + break; } } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2583,12 +2583,6 @@ return getVPValue(V); } - void removeVPValueFor(Value *V) { - assert(Value2VPValueEnabled && - "IR value to VPValue mapping may be out of date!"); - Value2VPValue.erase(V); - } - #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print this VPlan to \p O. void print(raw_ostream &O) const;