diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -289,6 +289,8 @@ getDecisionAndClampRange(const std::function &Predicate, VFRange &Range); + static bool useOrderedReductions(RecurrenceDescriptor &RdxDesc); + protected: /// Collect the instructions from the original loop that would be trivially /// dead in the vectorized loop if generated. diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -331,11 +331,6 @@ cl::desc("Prefer in-loop vector reductions, " "overriding the targets preference.")); -cl::opt EnableStrictReductions( - "enable-strict-reductions", cl::init(false), cl::Hidden, - cl::desc("Enable the vectorisation of loops with in-order (strict) " - "FP reductions")); - static cl::opt PreferPredicatedReductionSelect( "prefer-predicated-reduction-select", cl::init(false), cl::Hidden, cl::desc( @@ -597,8 +592,7 @@ /// this phi node. void fixFirstOrderRecurrence(PHINode *Phi, VPTransformState &State); - /// Fix a reduction cross-iteration phi. This is the second phase of - /// vectorizing this phi node. + /// Create code for the loop exit value of the reduction. void fixReduction(VPWidenPHIRecipe *Phi, VPTransformState &State); /// Clear NSW/NUW flags from reduction instructions if necessary. @@ -4262,10 +4256,6 @@ LCSSAPhi.addIncoming(ExtractForPhiUsedOutsideLoop, LoopMiddleBlock); } -static bool useOrderedReductions(RecurrenceDescriptor &RdxDesc) { - return EnableStrictReductions && RdxDesc.isOrdered(); -} - void InnerLoopVectorizer::fixReduction(VPWidenPHIRecipe *PhiR, VPTransformState &State) { PHINode *OrigPhi = cast(PhiR->getUnderlyingValue()); @@ -4288,21 +4278,6 @@ // Wrap flags are in general invalid after vectorization, clear them. clearReductionWrapFlags(RdxDesc, State); - // Fix the vector-loop phi. - - // Reductions do not have to start at zero. They can start with - // any loop invariant values. - for (unsigned Part = 0; Part < UF; ++Part) { - Value *VecRdxPhi = State.get(PhiR->getVPValue(0), Part); - Value *Val = State.get(PhiR->getOperand(1), Part); - if (IsInLoopReductionPhi && useOrderedReductions(RdxDesc) && - State.VF.isVector()) - Val = State.get(PhiR->getOperand(1), UF - 1); - - cast(VecRdxPhi)->addIncoming( - Val, LI->getLoopFor(LoopVectorBody)->getLoopLatch()); - } - // Before each round, move the insertion point right between // the PHIs and the values we are going to write. // This allows us to write both PHINodes and the extractelement @@ -4389,7 +4364,8 @@ // terminate on this line. This is the easiest way to ensure we don't // accidentally cause an extra step back into the loop while debugging. setDebugLocFromInst(Builder, LoopMiddleBlock->getTerminator()); - if (IsInLoopReductionPhi && useOrderedReductions(RdxDesc)) + if (IsInLoopReductionPhi && + LoopVectorizationPlanner::useOrderedReductions(RdxDesc)) ReducedPartRdx = State.get(LoopExitInstDef, UF - 1); else { // Floating-point operations should have some FMF to enable the reduction. @@ -6092,7 +6068,8 @@ if (!Legal->isReductionVariable(PN)) continue; RecurrenceDescriptor RdxDesc = Legal->getReductionVars()[PN]; - if (PreferInLoopReductions || useOrderedReductions(RdxDesc) || + if (PreferInLoopReductions || + LoopVectorizationPlanner::useOrderedReductions(RdxDesc) || TTI.preferInLoopReduction(RdxDesc.getOpcode(), RdxDesc.getRecurrenceType(), TargetTransformInfo::ReductionFlags())) @@ -7691,7 +7668,8 @@ // If the target would prefer this reduction to happen "in-loop", then we // want to record it as such. unsigned Opcode = RdxDesc.getOpcode(); - if (!PreferInLoopReductions && !useOrderedReductions(RdxDesc) && + if (!PreferInLoopReductions && + !LoopVectorizationPlanner::useOrderedReductions(RdxDesc) && !TTI.preferInLoopReduction(Opcode, Phi->getType(), TargetTransformInfo::ReductionFlags())) continue; @@ -9261,7 +9239,7 @@ Value *PrevInChain = State.get(getChainOp(), 0); for (unsigned Part = 0; Part < State.UF; ++Part) { RecurKind Kind = RdxDesc->getRecurrenceKind(); - bool IsOrdered = useOrderedReductions(*RdxDesc); + bool IsOrdered = LoopVectorizationPlanner::useOrderedReductions(*RdxDesc); Value *NewVecOp = State.get(getVecOp(), Part); if (VPValue *Cond = getCondOp()) { Value *NewCond = State.get(Cond, Part); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -17,6 +17,7 @@ //===----------------------------------------------------------------------===// #include "VPlan.h" +#include "LoopVectorizationPlanner.h" #include "VPlanDominatorTree.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/PostOrderIterator.h" @@ -48,6 +49,11 @@ using namespace llvm; extern cl::opt EnableVPlanNativePath; +static cl::opt EnableStrictReductions( + "enable-strict-reductions", cl::init(false), cl::Hidden, + cl::desc("Enable the vectorisation of loops with in-order (strict) " + "FP reductions")); + #define DEBUG_TYPE "vplan" #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -60,6 +66,11 @@ } #endif +bool LoopVectorizationPlanner::useOrderedReductions( + RecurrenceDescriptor &RdxDesc) { + return EnableStrictReductions && RdxDesc.isOrdered(); +} + Value *VPLane::getAsRuntimeExpr(IRBuilder<> &Builder, const ElementCount &VF) const { switch (LaneKind) { @@ -738,6 +749,29 @@ if (!EnableVPlanNativePath) updateDominatorTree(State->DT, VectorPreHeaderBB, VectorLatchBB, L->getExitBlock()); + + // Fixup reduction PHI nodes in the vectorized loop header. + VPBasicBlock *Header = getEntry()->getEntryBasicBlock(); + for (VPRecipeBase &R : Header->phis()) { + VPWidenPHIRecipe *PhiR = dyn_cast(&R); + if (!PhiR) + continue; + auto *RdxDesc = PhiR->getRecurrenceDescriptor(); + if (!RdxDesc) + continue; + + bool IsInLoopReductionPhi = isa(PhiR->getOperand(1)); + for (unsigned Part = 0; Part < State->UF; ++Part) { + auto *VecRdxPhi = cast(State->get(PhiR->getVPValue(0), Part)); + Value *Val = State->get(PhiR->getOperand(1), Part); + if (IsInLoopReductionPhi && + LoopVectorizationPlanner::useOrderedReductions(*RdxDesc) && + State->VF.isVector()) + Val = State->get(PhiR->getOperand(1), State->UF - 1); + VecRdxPhi->addIncoming( + Val, State->LI->getLoopFor(VecRdxPhi->getParent())->getLoopLatch()); + } + } } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)