diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4297,16 +4297,15 @@ // Reductions do not have to start at zero. They can start with // any loop invariant values. - BasicBlock *OrigLatch = OrigLoop->getLoopLatch(); - Value *OrigLoopVal = OrigPhi->getIncomingValueForBlock(OrigLatch); BasicBlock *VectorLoopLatch = LI->getLoopFor(LoopVectorBody)->getLoopLatch(); for (unsigned Part = 0; Part < UF; ++Part) { Value *VecRdxPhi = State.get(PhiR->getVPValue(), Part); - Value *Val = State.get(State.Plan->getVPValue(OrigLoopVal), Part); + Value *Val = State.get(PhiR->getBackedgeValue(), Part); if (IsInLoopReductionPhi && useOrderedReductions(RdxDesc) && State.VF.isVector()) - Val = State.get(State.Plan->getVPValue(OrigLoopVal), UF - 1); + Val = State.get(PhiR->getBackedgeValue(), UF - 1); + cast(VecRdxPhi)->addIncoming(Val, VectorLoopLatch); } @@ -8678,6 +8677,17 @@ return new VPWidenRecipe(*I, make_range(Operands.begin(), Operands.end())); } +void VPRecipeBuilder::fixHeaderPhis() { + BasicBlock *OrigLatch = OrigLoop->getLoopLatch(); + for (VPWidenPHIRecipe *R : PhisToFix) { + auto *PN = cast(R->getUnderlyingValue()); + VPRecipeBase *IncR = + getRecipe(cast(PN->getIncomingValueForBlock(OrigLatch))); + assert(IncR->getNumDefinedValues() == 1); + R->addOperand(IncR->getVPValue(0)); + } +} + VPBasicBlock *VPRecipeBuilder::handleReplication( Instruction *I, VFRange &Range, VPBasicBlock *VPBB, VPlanPtr &Plan) { @@ -8781,7 +8791,15 @@ assert(RdxDesc.getRecurrenceStartValue() == Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader())); VPValue *StartV = Operands[0]; - return toVPRecipeResult(new VPWidenPHIRecipe(Phi, RdxDesc, *StartV)); + + // Record the PHI and the incoming value from the backedge, so we can add + // the incoming value from the backedge after all recipes have been + // created. + auto *PhiRecipe = new VPWidenPHIRecipe(Phi, RdxDesc, *StartV); + PhisToFix.push_back(PhiRecipe); + recordRecipeOf(cast( + Phi->getIncomingValueForBlock(OrigLoop->getLoopLatch()))); + return toVPRecipeResult(PhiRecipe); } return toVPRecipeResult(new VPWidenPHIRecipe(Phi)); @@ -8969,6 +8987,8 @@ } } + RecipeBuilder.fixHeaderPhis(); + // Discard empty dummy pre-entry VPBasicBlock. Note that other VPBasicBlocks // may also be empty, such as the last one VPBB, reflecting original // basic-blocks with no recipes. diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -56,6 +56,10 @@ // marked by having a nullptr entry in this map. DenseMap Ingredient2Recipe; + /// Cross-iteration reduction phis for which we need to add the incoming value + /// from the backedge after all recipes have been created. + SmallVector PhisToFix; + /// Check if \p I can be widened at the start of \p Range and possibly /// decrease the range such that the returned value holds for the entire \p /// Range. The function should not be called for memory instructions or calls. @@ -165,6 +169,10 @@ VPBasicBlock *handleReplication( Instruction *I, VFRange &Range, VPBasicBlock *VPBB, VPlanPtr &Plan); + + /// Add the incoming values from the backedge to reduction cross-iteration + /// phis. + void fixHeaderPhis(); }; } // end namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -987,9 +987,10 @@ /// A recipe for handling all phi nodes except for integer and FP inductions. /// For reduction PHIs, RdxDesc must point to the corresponding recurrence -/// descriptor and the start value is the first operand of the recipe. -/// In the VPlan native path, all incoming VPValues & VPBasicBlock pairs are -/// managed in the recipe directly. +/// descriptor, the start value is the first operand of the recipe and the +/// incoming value from the backedge is the second operand. In the VPlan native +/// path, all incoming VPValues & VPBasicBlock pairs are managed in the recipe +/// directly. class VPWidenPHIRecipe : public VPRecipeBase, public VPValue { /// Descriptor for a reduction PHI. RecurrenceDescriptor *RdxDesc = nullptr; @@ -1034,6 +1035,13 @@ return getNumOperands() == 0 ? nullptr : getOperand(0); } + /// Returns the incoming value from the loop backedge, if it is a reduction. + VPValue *getBackedgeValue() { + assert(RdxDesc && "second incoming value is only guaranteed to be backedge " + "value for reductions"); + return getOperand(1); + } + /// Adds a pair (\p IncomingV, \p IncomingBlock) to the phi. void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock) { addOperand(IncomingV); diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -79,7 +79,7 @@ ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: for.body: ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi %iv.next, 0 -; CHECK-NEXT: WIDEN-PHI %red = phi %red.next, 0.000000e+00 +; CHECK-NEXT: WIDEN-PHI ir<%red> = phi ir<0.000000e+00>, ir<%red.next> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%y>, ir<%iv> ; CHECK-NEXT: WIDEN ir<%lv> = load ir<%arrayidx> ; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + reduce.fadd (ir<%lv>)