Index: llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -292,8 +292,7 @@ /// Build a VPlan using VPRecipes according to the information gather by /// Legal. This method is only used for the legacy inner loop vectorizer. VPlanPtr buildVPlanWithVPRecipes( - VFRange &Range, SmallPtrSetImpl &NeedDef, - SmallPtrSetImpl &DeadInstructions, + VFRange &Range, SmallPtrSetImpl &DeadInstructions, const DenseMap &SinkAfter); /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive, Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -461,7 +461,7 @@ BasicBlock *createVectorizedLoopSkeleton(); /// Widen a single instruction within the innermost loop. - void widenInstruction(Instruction &I, VPUser &Operands, + void widenInstruction(Instruction &I, VPValue *Def, VPUser &Operands, VPTransformState &State); /// Widen a single call instruction within the innermost loop. @@ -4510,7 +4510,8 @@ return !CInt || CInt->isZero(); } -void InnerLoopVectorizer::widenInstruction(Instruction &I, VPUser &User, +void InnerLoopVectorizer::widenInstruction(Instruction &I, + VPValue *Def, VPUser &User, VPTransformState &State) { assert(!VF.isScalable() && "scalable vectors not yet supported."); switch (I.getOpcode()) { @@ -4553,7 +4554,7 @@ VecOp->copyIRFlags(&I); // Use this vector value for all users of the original instruction. - VectorLoopValueMap.setVectorValue(&I, Part, V); + State.set(Def, &I, V, Part); addMetadata(V, &I); } @@ -4577,7 +4578,7 @@ } else { C = Builder.CreateICmp(Cmp->getPredicate(), A, B); } - VectorLoopValueMap.setVectorValue(&I, Part, C); + State.set(Def, &I, C, Part); addMetadata(C, &I); } @@ -4607,7 +4608,7 @@ for (unsigned Part = 0; Part < UF; ++Part) { Value *A = State.get(User.getOperand(0), Part); Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy); - VectorLoopValueMap.setVectorValue(&I, Part, Cast); + State.set(Def, &I, Cast, Part); addMetadata(Cast, &I); } break; @@ -7240,7 +7241,7 @@ if (!BI->isConditional() || BI->getSuccessor(0) == BI->getSuccessor(1)) return EdgeMaskCache[Edge] = SrcMask; - VPValue *EdgeMask = Plan->getVPValue(BI->getCondition()); + VPValue *EdgeMask = Plan->getOrAddVPValue(BI->getCondition()); assert(EdgeMask && "No Edge Mask found for condition"); if (BI->getSuccessor(0) != Dst) @@ -7278,7 +7279,7 @@ // Start by constructing the desired canonical IV. VPValue *IV = nullptr; if (Legal->getPrimaryInduction()) - IV = Plan->getVPValue(Legal->getPrimaryInduction()); + IV = Plan->getOrAddVPValue(Legal->getPrimaryInduction()); else { auto IVRecipe = new VPWidenCanonicalIVRecipe(); Builder.getInsertBlock()->insert(IVRecipe, NewInsertionPoint); @@ -7625,24 +7626,6 @@ ElementCount MaxVF) { assert(OrigLoop->isInnermost() && "Inner loop expected."); - // Collect conditions feeding internal conditional branches; they need to be - // represented in VPlan for it to model masking. - SmallPtrSet NeedDef; - - auto *Latch = OrigLoop->getLoopLatch(); - for (BasicBlock *BB : OrigLoop->blocks()) { - if (BB == Latch) - continue; - BranchInst *Branch = dyn_cast(BB->getTerminator()); - if (Branch && Branch->isConditional()) - NeedDef.insert(Branch->getCondition()); - } - - // If the tail is to be folded by masking, the primary induction variable, if - // exists needs to be represented in VPlan for it to model early-exit masking. - if (CM.foldTailByMasking() && Legal->getPrimaryInduction()) - NeedDef.insert(Legal->getPrimaryInduction()); - // Collect instructions from the original loop that will become trivially dead // in the vectorized loop. We don't need to vectorize these instructions. For // example, original induction update instructions can become dead because we @@ -7667,15 +7650,14 @@ auto MaxVFPlusOne = MaxVF.getWithIncrement(1); for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFPlusOne);) { VFRange SubRange = {VF, MaxVFPlusOne}; - VPlans.push_back(buildVPlanWithVPRecipes(SubRange, NeedDef, + VPlans.push_back(buildVPlanWithVPRecipes(SubRange, DeadInstructions, SinkAfter)); VF = SubRange.End; } } VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( - VFRange &Range, SmallPtrSetImpl &NeedDef, - SmallPtrSetImpl &DeadInstructions, + VFRange &Range, SmallPtrSetImpl &DeadInstructions, const DenseMap &SinkAfter) { // Hold a mapping from predicated instructions to their recipes, in order to @@ -7744,10 +7726,6 @@ VPBasicBlock *VPBB = new VPBasicBlock("Pre-Entry"); Plan->setEntry(VPBB); - // Represent values that will have defs inside VPlan. - for (Value *V : NeedDef) - Plan->addVPValue(V); - // Scan the body of the loop in a topological order to visit each basic block // after having visited its predecessor basic blocks. LoopBlocksDFS DFS(OrigLoop); @@ -7777,12 +7755,8 @@ // Check if the recipe can be converted to a VPValue. We need the extra // down-casting step until VPRecipeBase inherits from VPValue. VPValue *MaybeVPValue = Recipe->toVPValue(); - if (!Instr->getType()->isVoidTy() && MaybeVPValue) { - if (NeedDef.contains(Instr)) - Plan->addOrReplaceVPValue(Instr, MaybeVPValue); - else - Plan->addVPValue(Instr, MaybeVPValue); - } + if (!Instr->getType()->isVoidTy() && MaybeVPValue) + Plan->addVPValue(Instr, MaybeVPValue); RecipeBuilder.setRecipe(Instr, Recipe); VPBB->appendRecipe(Recipe); @@ -7955,6 +7929,9 @@ VPReductionRecipe *RedRecipe = new VPReductionRecipe( &RdxDesc, R, ChainOp, VecOp, CondOp, Legal->hasFunNoNaNAttr(), UseReductionIntrinsic); + WidenRecipe->toVPValue()->replaceAllUsesWith(cast(RedRecipe)); + Plan->removeVPValueFor(R); + Plan->addVPValue(R, cast(RedRecipe)); WidenRecipe->getParent()->insert(RedRecipe, WidenRecipe->getIterator()); WidenRecipe->eraseFromParent(); @@ -8008,7 +7985,7 @@ } void VPWidenRecipe::execute(VPTransformState &State) { - State.ILV->widenInstruction(Ingredient, *this, State); + State.ILV->widenInstruction(*getUnderlyingInstr(), this, *this, State); } void VPWidenGEPRecipe::execute(VPTransformState &State) { Index: llvm/lib/Transforms/Vectorize/VPlan.h =================================================================== --- llvm/lib/Transforms/Vectorize/VPlan.h +++ llvm/lib/Transforms/Vectorize/VPlan.h @@ -828,14 +828,12 @@ /// VPWidenRecipe is a recipe for producing a copy of vector type its /// ingredient. This recipe covers most of the traditional vectorization cases /// where each ingredient transforms into a vectorized version of itself. -class VPWidenRecipe : public VPRecipeBase, public VPUser { - /// Hold the instruction to be widened. - Instruction &Ingredient; - +class VPWidenRecipe : public VPRecipeBase, public VPValue, public VPUser { public: template VPWidenRecipe(Instruction &I, iterator_range Operands) - : VPRecipeBase(VPWidenSC), VPUser(Operands), Ingredient(I) {} + : VPRecipeBase(VPRecipeBase::VPWidenSC), VPValue(VPValue::VPWidenSC, &I), + VPUser(Operands) {} ~VPWidenRecipe() override = default; @@ -843,6 +841,9 @@ static inline bool classof(const VPRecipeBase *V) { return V->getVPRecipeID() == VPRecipeBase::VPWidenSC; } + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPValue::VPWidenSC; + } /// Produce widened copies of all Ingredients. void execute(VPTransformState &State) override; @@ -1736,15 +1737,6 @@ Value2VPValue[V] = VPV; } - void addOrReplaceVPValue(Value *V, VPValue *VPV) { - assert(V && "Trying to add a null Value to VPlan"); - auto I = Value2VPValue.find(V); - if (I == Value2VPValue.end()) - Value2VPValue[V] = VPV; - else - I->second = VPV; - } - VPValue *getVPValue(Value *V) { assert(V && "Trying to get the VPValue of a null Value"); assert(Value2VPValue.count(V) && "Value does not exist in VPlan"); Index: llvm/lib/Transforms/Vectorize/VPlan.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/VPlan.cpp +++ llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -115,6 +115,8 @@ return V; if (auto *V = dyn_cast(this)) return V; + if (auto *V = dyn_cast(this)) + return V; return nullptr; } @@ -131,6 +133,8 @@ return V; if (auto *V = dyn_cast(this)) return V; + if (auto *V = dyn_cast(this)) + return V; return nullptr; } @@ -869,8 +873,16 @@ void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { - O << "\"WIDEN\\l\""; - O << "\" " << VPlanIngredient(&Ingredient); + O << "\"WIDEN "; + printAsOperand(O, SlotTracker); + O << " = " << getUnderlyingInstr()->getOpcodeName() << " "; + unsigned NumOperands = getNumOperands(); + if (NumOperands > 0) + getOperand(0)->printAsOperand(O, SlotTracker); + for (size_t I = 1; I < NumOperands; ++I) { + O << ", "; + getOperand(I)->printAsOperand(O, SlotTracker); + } } void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent, Index: llvm/lib/Transforms/Vectorize/VPlanValue.h =================================================================== --- llvm/lib/Transforms/Vectorize/VPlanValue.h +++ llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -83,6 +83,7 @@ VPInstructionSC, VPMemoryInstructionSC, VPReductionSC, + VPWidenSC, VPVWidenCallSC, VPVWidenGEPSC, VPVWidenSelectSC, Index: llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll +++ llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll @@ -38,7 +38,7 @@ ; CHECK: N0 [label = ; CHECK-NEXT: "loop:\n" + ; CHECK-NEXT: "WIDEN-INDUCTION %iv = phi 0, %iv.next\l" + -; CHECK-NEXT: "WIDEN\l"" %cond0 = icmp %iv, 13\l" + +; CHECK-NEXT: "WIDEN ir<%cond0> = icmp ir<%iv>, ir<13>\l" + ; CHECK-NEXT: "WIDEN-SELECT ir<%s> = select ir<%cond0>, ir<10>, ir<20>\l" ; CHECK-NEXT: ] define void @test() { Index: llvm/test/Transforms/LoopVectorize/vplan-printing.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -42,9 +42,9 @@ ; CHECK-NEXT: "WIDEN-INDUCTION %iv = phi %iv.next, 0\l" + ; CHECK-NEXT: "WIDEN-GEP Inv[Var] ir<%arrayidx> = getelementptr ir<%y>, ir<%iv>\l" + ; CHECK-NEXT: "WIDEN ir<%lv> = load ir<%arrayidx>\l" + -; CHECK-NEXT: "WIDEN\l"" %cmp = icmp %arrayidx, %z\l" + +; CHECK-NEXT: "WIDEN ir<%cmp> = icmp ir<%arrayidx>, ir<%z>\l" + ; CHECK-NEXT: "WIDEN-SELECT ir<%sel> = select ir<%cmp>, ir<1.000000e+01>, ir<2.000000e+01>\l" + -; CHECK-NEXT: "WIDEN\l"" %add = fadd %lv, %sel\l" + +; CHECK-NEXT: "WIDEN ir<%add> = fadd ir<%lv>, ir<%sel>\l" + ; CHECK-NEXT: "CLONE %arrayidx2 = getelementptr %x, %iv\l" + ; CHECK-NEXT: "WIDEN store ir<%arrayidx2>, ir<%add>\l" ; CHECK-NEXT: ]