diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -278,6 +278,8 @@ getDecisionAndClampRange(const std::function &Predicate, VFRange &Range); + VPlanPtr clone(VPlan &OriginalPlan); + protected: /// Collect the instructions from the original loop that would be trivially /// dead in the vectorized loop if generated. diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7739,7 +7739,9 @@ CM.selectUserVectorizationFactor(VF); CM.collectInLoopReductions(); auto InitialPlan = buildVPlan({VF, VF}); - buildVPlansWithVPRecipes(VF, VF, *InitialPlan); + VPlanPredicator Pred(*InitialPlan); + Pred.predicate(); + buildVPlansWithVPRecipes(UserVF, UserVF, *InitialPlan); LLVM_DEBUG(printPlans(dbgs())); return {{VF, 0}}; } @@ -7761,6 +7763,8 @@ CM.collectInLoopReductions(); auto InitialPlan = buildVPlan({ElementCount::getFixed(1), MaxVF}); + VPlanPredicator Pred(*InitialPlan); + Pred.predicate(); buildVPlansWithVPRecipes(ElementCount::getFixed(1), MaxVF, *InitialPlan); LLVM_DEBUG(printPlans(dbgs())); if (MaxVF.isScalar()) @@ -8310,7 +8314,6 @@ if (!CM.blockNeedsPredication(BB)) return BlockMaskCache[BB] = BlockMask; // Loop incoming mask is all-one. - // Create the block in mask as the first non-phi instruction in the block. VPBuilder::InsertPointGuard Guard(Builder); auto NewInsertionPoint = Builder.getInsertBlock()->getFirstNonPhi(); Builder.setInsertPoint(Builder.getInsertBlock(), NewInsertionPoint); @@ -8323,7 +8326,7 @@ IV = Plan->getOrAddVPValue(Legal->getPrimaryInduction()); else { auto IVRecipe = new VPWidenCanonicalIVRecipe(); - Builder.getInsertBlock()->insert(IVRecipe, NewInsertionPoint); + IVRecipe->insertBefore(&*Builder.getInsertBlock()->begin()); IV = IVRecipe->getVPValue(); } VPValue *BTC = Plan->getOrCreateBackedgeTakenCount(); @@ -8572,7 +8575,8 @@ return new VPWidenRecipe(*I, Plan.mapToVPValues(I->operands())); } -VPBasicBlock *VPRecipeBuilder::handleReplication( +std::pair +VPRecipeBuilder::handleReplication( Instruction *I, VFRange &Range, VPBasicBlock *VPBB, DenseMap &PredInst2Recipe, VPlanPtr &Plan) { @@ -8587,6 +8591,8 @@ auto *Recipe = new VPReplicateRecipe(I, Plan->mapToVPValues(I->operands()), IsUniform, IsPredicated); setRecipe(I, Recipe); + Plan->getVPValue(I)->replaceAllUsesWith(Recipe); + Plan->removeVPValueFor(I); Plan->addVPValue(I, Recipe); // Find if I uses a predicated instruction. If so, it will use its scalar @@ -8600,19 +8606,26 @@ // Finalize the recipe for Instr, first if it is not predicated. if (!IsPredicated) { LLVM_DEBUG(dbgs() << "LV: Scalarizing:" << *I << "\n"); - VPBB->appendRecipe(Recipe); - return VPBB; + return {VPBB, Recipe}; } LLVM_DEBUG(dbgs() << "LV: Scalarizing and predicating:" << *I << "\n"); - assert(VPBB->getSuccessors().empty() && - "VPBB has successors when handling predicated replication."); + // FIXME + SmallVector Succs(VPBB->getSuccessors().begin(), + VPBB->getSuccessors().end()); + for (auto *Succ : VPBB->getSuccessors()) + VPBlockUtils::disconnectBlocks(VPBB, Succ); + // assert(VPBB->getSuccessors().empty() && + //"VPBB has successors when handling predicated replication."); // Record predicated instructions for above packing optimizations. PredInst2Recipe[I] = Recipe; VPBlockBase *Region = createReplicateRegion(I, Recipe, Plan); VPBlockUtils::insertBlockAfter(Region, VPBB); auto *RegSucc = new VPBasicBlock(); VPBlockUtils::insertBlockAfter(RegSucc, Region); - return RegSucc; + for (auto *Succ : Succs) + VPBlockUtils::connectBlocks(RegSucc, Succ); + + return {RegSucc, Recipe}; } VPRegionBlock *VPRecipeBuilder::createReplicateRegion(Instruction *Instr, @@ -8697,6 +8710,88 @@ return toVPRecipeResult(tryToWiden(Instr, *Plan)); } +VPlanPtr LoopVectorizationPlanner::clone(VPlan &OriginalPlan) { + ReversePostOrderTraversal RPOT( + OriginalPlan.getEntry()->getEntryBasicBlock()); + + auto Plan = std::make_unique(); + DenseMap Old2New; + DenseMap Old2NewBlocks; + for (VPBlockBase *Base : RPOT) { + assert(isa(Base)); + Old2NewBlocks[Base] = new VPBasicBlock(Base->getName()); + } + + Plan->setEntry(Old2NewBlocks[OriginalPlan.getEntry()]); + for (VPBlockBase *Base : RPOT) { + for (auto *Succ : Base->getSuccessors()) { + VPBlockUtils::connectBlocks(Old2NewBlocks[Base], Old2NewBlocks[Succ]); + } + } + + SmallVector OldPhisToFix; + SmallVector RemapAfter; + for (VPBlockBase *Base : RPOT) { + if (Base->getNumPredecessors() == 0 || Base->getNumSuccessors() == 0) + continue; + + VPBasicBlock *OriginalVPBB = Base->getEntryBasicBlock(); + VPBasicBlock *VPBB = cast(Old2NewBlocks[OriginalVPBB]); + + // Relevant instructions from basic block BB will be grouped into VPRecipe + // ingredients and fill a new VPBasicBlock. + // Introduce each ingredient into VPlan. + for (auto I = OriginalVPBB->begin(), E = OriginalVPBB->end(); I != E;) { + VPRecipeBase *Ingredient = &*I++; + auto RemapOperands = [&Old2New, &RemapAfter, &Plan](VPRecipeBase *U) { + if (isa(U)) { + RemapAfter.push_back(U); + return; + } + + for (unsigned I = 0, E = U->getNumOperands(); I != E; ++I) { + VPValue *OldOp = U->getOperand(I); + VPValue *NewOp = nullptr; + if (OldOp->getDef()) { + NewOp = Old2New[OldOp]; + assert(NewOp && "trying to map operand that was not defined"); + } else { + NewOp = Plan->getOrAddVPValue(OldOp->getLiveInIRValue()); + } + U->setOperand(I, NewOp); + } + }; + + VPRecipeBase *NewI = Ingredient->clone(); + RemapOperands(NewI); + for (unsigned I = 0, E = NewI->getNumDefinedValues(); I != E; ++I) { + VPValue *NewDef = NewI->getVPValue(I); + Old2New[Ingredient->getVPValue(I)] = NewDef; + if (NewDef) + Plan->addVPValue(NewDef->getUnderlyingValue(), NewDef); + } + + VPBB->appendRecipe(NewI); + } + } + + for (VPUser *U : RemapAfter) { + for (unsigned I = 0, E = U->getNumOperands(); I != E; ++I) { + VPValue *OldOp = U->getOperand(I); + VPValue *NewOp = nullptr; + if (OldOp->getDef()) { + NewOp = Old2New[OldOp]; + assert(NewOp && "trying to map operand that was not defined"); + } else { + NewOp = Plan->getOrAddVPValue(OldOp->getLiveInIRValue()); + } + U->setOperand(I, NewOp); + } + } + + return Plan; +} + void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF, VPlan &OriginalPlan) { @@ -8751,7 +8846,7 @@ } VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, PSE, Builder); - + SmallVector ToRemove; auto *DummyVal = new VPValue(); OriginalPlan.addExternalDef(DummyVal); for (VPBlockBase *Base : RPOT) { @@ -8785,11 +8880,24 @@ for (auto I = OriginalVPBB->rbegin(), E = OriginalVPBB->rend(); I != E;) { VPRecipeBase *Ingredient = &*I++; VPInstruction *VPInst = dyn_cast(Ingredient); - if (!VPInst) + if (!VPInst || !VPInst->getUnderlyingValue()) continue; Instruction *Instr = VPInst->getUnderlyingInstr(); if (DeadInstructions.contains(Instr) || isa(Instr)) { + for (VPUser *U : + make_early_inc_range(Ingredient->getVPValue()->users())) { + VPRecipeBase *R = cast(U); + if (auto *VPhi = dyn_cast(R)) { + auto *NewP = + new VPWidenPHIRecipe(cast(VPhi->getUnderlyingValue())); + if (VPhi->getStartValue()) + NewP->addOperand(VPhi->getStartValue()); + NewP->insertBefore(VPhi); + VPhi->getVPValue()->replaceAllUsesWith(NewP); + ToRemove.push_back(VPhi); + } + } Ingredient->getVPValue()->replaceAllUsesWith(DummyVal); Ingredient->eraseFromParent(); continue; @@ -8801,6 +8909,9 @@ } } + for (auto *R : ToRemove) + R->eraseFromParent(); + for (auto &Entry : SinkAfter) { VPRecipeBase *Sink = Inst2VPInst.find(Entry.first)->second; VPRecipeBase *Target = Inst2VPInst.find(Entry.second)->second; @@ -8870,86 +8981,108 @@ // visit each basic block after having visited its predecessor basic blocks. // --------------------------------------------------------------------------- - // Create a dummy pre-entry VPBasicBlock to start building the VPlan. - auto Plan = std::make_unique(); - - // Scan the body of the loop in a topological order to visit each basic block - // after having visited its predecessor basic blocks. - LoopBlocksDFS DFS(OrigLoop); - DFS.perform(LI); - + auto Plan = clone(OriginalPlan); ReversePostOrderTraversal RPOT( - OriginalPlan.getEntry()->getEntryBasicBlock()); + Plan->getEntry()->getEntryBasicBlock()); + bool NeedAppend = false; VPBasicBlock *VPBB = nullptr; for (VPBlockBase *Base : RPOT) { - VPBasicBlock *OriginalVPBB = Base->getEntryBasicBlock(); // Relevant instructions from basic block BB will be grouped into VPRecipe // ingredients and fill a new VPBasicBlock. unsigned VPBBsForBB = 0; - auto *FirstVPBBForBB = new VPBasicBlock(OriginalVPBB->getName()); - if (VPBB) - VPBlockUtils::insertBlockAfter(FirstVPBBForBB, VPBB); - else - Plan->setEntry(FirstVPBBForBB); - VPBB = FirstVPBBForBB; + VPBB = cast(Base); Builder.setInsertPoint(VPBB); - // Introduce each ingredient into VPlan. - for (auto I = OriginalVPBB->begin(), E = OriginalVPBB->end(); I != E;) { - VPRecipeBase *Ingredient = &*I++; - if (isa(Ingredient)) { - auto *C = Ingredient->clone(); - for (auto *Def : C->definedValues()) { - auto *UV = Def->getUnderlyingValue(); - Plan->addVPValue(UV, Def); + auto ReplaceAllUsesWith = [&Plan](VPDef *Def) { + for (auto *V : Def->definedValues()) { + auto *UV = V->getUnderlyingValue(); + if (!Plan->hasVPValue(UV)) { + Plan->addVPValue(UV, V); + continue; } - for (unsigned I = 0, E = Ingredient->getNumOperands(); I != E; ++I) - C->setOperand(I, Plan->getOrAddVPValue( - Ingredient->getOperand(I)->getLiveInIRValue())); - VPBB->appendRecipe(C); + + Plan->getVPValue(UV)->replaceAllUsesWith(V); + Plan->removeVPValueFor(UV); + Plan->addVPValue(UV, V); + } + }; + auto FirstNonPhi = VPBB->getFirstNonPhi(); + auto I = VPBB->begin(), E = VPBB->end(); + while (I != E && (isa(*I) || + isa(*I))) { + VPRecipeBase *Ingredient = &*I++; + if (isa(Ingredient)) + continue; + PHINode *Phi = cast( + cast(Ingredient)->getUnderlyingInstr()); + Builder.setInsertPoint(Ingredient->getParent(), FirstNonPhi); + auto RecipeOrValue = + RecipeBuilder.tryToCreateWidenRecipe(Phi, Range, Plan); + // If Instr can be simplified to an existing VPValue, use it. + if (RecipeOrValue.is()) { + Ingredient->getVPValue()->replaceAllUsesWith( + RecipeOrValue.get()); + Plan->removeVPValueFor(Phi); + Plan->addVPValue(Phi, RecipeOrValue.get()); + Ingredient->eraseFromParent(); continue; } + // Otherwise, add the new recipe. + VPRecipeBase *Recipe = RecipeOrValue.get(); + RecipeBuilder.setRecipe(Phi, Recipe); + if (isa(Recipe)) + VPBB->insert(Recipe, FirstNonPhi); + else + Recipe->insertBefore(Ingredient); + + ReplaceAllUsesWith(Recipe); + Ingredient->eraseFromParent(); + } + + // Introduce each ingredient into VPlan. + while (I != E) { + VPRecipeBase *Ingredient = &*I++; VPInstruction *VPInst = dyn_cast(Ingredient); - Instruction *Instr = - VPInst - ? VPInst->getUnderlyingInstr() - : cast( - cast(Ingredient)->getUnderlyingInstr()); - - // First filter out irrelevant instructions, to ensure no recipes are - // built for them. - if (isa(Instr)) + if (!VPInst || !VPInst->getUnderlyingValue()) continue; + Instruction *Instr = nullptr; + Instr = VPInst->getUnderlyingInstr(); + + assert(!isa(Instr) && + "Branch instruction must be removed earlier"); + auto InsertPt = Ingredient->getIterator(); + if (NeedAppend) + InsertPt = VPBB->end(); + Builder.setInsertPoint(VPBB, InsertPt); + VPRecipeBase *Recipe = nullptr; if (auto RecipeOrValue = RecipeBuilder.tryToCreateWidenRecipe(Instr, Range, Plan)) { - // If Instr can be simplified to an existing VPValue, use it. - if (RecipeOrValue.is()) { - Plan->addVPValue(Instr, RecipeOrValue.get()); - continue; - } - // Otherwise, add the new recipe. - VPRecipeBase *Recipe = RecipeOrValue.get(); - for (auto *Def : Recipe->definedValues()) { - auto *UV = Def->getUnderlyingValue(); - Plan->addVPValue(UV, Def); - } - + // Add the new recipe. + Recipe = RecipeOrValue.get(); RecipeBuilder.setRecipe(Instr, Recipe); - VPBB->appendRecipe(Recipe); - continue; - } + VPBB->insert(Recipe, InsertPt); + } else { - // Otherwise, if all widening options failed, Instruction is to be - // replicated. This may create a successor for VPBB. - VPBasicBlock *NextVPBB = RecipeBuilder.handleReplication( - Instr, Range, VPBB, PredInst2Recipe, Plan); - if (NextVPBB != VPBB) { - VPBB = NextVPBB; - VPBB->setName(OriginalVPBB->getName() + "." + Twine(VPBBsForBB++)); + // Otherwise, if all widening options failed, Instruction is to be + // replicated. This may create a successor for VPBB. + auto Res = RecipeBuilder.handleReplication(Instr, Range, VPBB, + PredInst2Recipe, Plan); + Recipe = Res.second; + VPBasicBlock *NextVPBB = Res.first; + if (NextVPBB != VPBB) { + VPBB = NextVPBB; + VPBB->setName(Base->getName() + "." + Twine(VPBBsForBB++)); + NeedAppend = true; + } else { + VPBB->insert(Recipe, InsertPt); + } } + + ReplaceAllUsesWith(Recipe); + Ingredient->eraseFromParent(); } } diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -160,7 +160,7 @@ /// Region. Update the packing decision of predicated instructions if they /// feed \p I. Range.End may be decreased to ensure same recipe behavior from /// \p Range.Start to \p Range.End. - VPBasicBlock *handleReplication( + std::pair handleReplication( Instruction *I, VFRange &Range, VPBasicBlock *VPBB, DenseMap &PredInst2Recipe, VPlanPtr &Plan); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -580,6 +580,12 @@ return true; } + /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPUser *D) { + // All VPDefs are also VPRecipeBases. + return true; + } + /// Clone this recipe. virtual VPRecipeBase *clone() = 0; }; @@ -625,7 +631,6 @@ protected: void setUnderlyingInstr(Instruction *I) { setUnderlyingValue(I); } - public: VPInstruction(unsigned Opcode, ArrayRef Operands) : VPRecipeBase(VPRecipeBase::VPInstructionSC, Operands), @@ -1811,6 +1816,8 @@ return getVPValue(V); } + bool hasVPValue(Value *V) { return Value2VPValue.count(V); } + void removeVPValueFor(Value *V) { Value2VPValue.erase(V); } /// Return the VPLoopInfo analysis for this VPlan. diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp @@ -135,8 +135,8 @@ // Blocks that dominate region exit inherit the predicate from the region. // Return after setting the predicate. if (VPDomTree.dominates(CurrBlock, Region->getExit())) { - VPValue *RegionBP = Region->getPredicate(); - CurrBlock->setPredicate(RegionBP); + // VPValue *RegionBP = Region->getPredicate(); + // CurrBlock->setPredicate(RegionBP); return; } @@ -155,6 +155,7 @@ // Skip back-edges if (VPBlockUtils::isBackEdge(PredBlock, CurrBlock, VPLI)) continue; + continue; VPValue *IncomingPredicate = nullptr; unsigned NumPredSuccsNoBE = @@ -177,10 +178,10 @@ } // Logically OR all incoming predicates by building the Predicate Tree. - VPValue *Predicate = genPredicateTree(IncomingPredicates); + // VPValue *Predicate = genPredicateTree(IncomingPredicates); - // Now update the block's predicate with the new one. - CurrBlock->setPredicate(Predicate); + //// Now update the block's predicate with the new one. + // CurrBlock->setPredicate(Predicate); } // Generate all predicates needed for Region. @@ -208,6 +209,7 @@ ReversePostOrderTraversal RPOT(Region->getEntry()); VPBlockBase *PrevBlock = nullptr; + auto *Exit = Region->getExit(); for (VPBlockBase *CurrBlock : RPOT) { // TODO: Handle nested regions once we start generating the same. assert(!isa(CurrBlock) && "Nested region not expected"); @@ -216,11 +218,17 @@ // and CurrBlock skipping loop headers and latches to keep intact loop // header predecessors and loop latch successors. if (PrevBlock && !VPLI->isLoopHeader(CurrBlock) && - !VPBlockUtils::blockIsLoopLatch(PrevBlock, VPLI)) { + !VPBlockUtils::blockIsLoopLatch(PrevBlock, VPLI) && CurrBlock != Exit && + PrevBlock != Exit) { LLVM_DEBUG(dbgs() << "Linearizing: " << PrevBlock->getName() << "->" << CurrBlock->getName() << "\n"); + SmallVector Succs(PrevBlock->getSuccessors().begin(), + PrevBlock->getSuccessors().end()); + for (auto *Succ : Succs) { + VPBlockUtils::disconnectBlocks(PrevBlock, Succ); + } PrevBlock->clearSuccessors(); CurrBlock->clearPredecessors(); VPBlockUtils::connectBlocks(PrevBlock, CurrBlock); diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -73,13 +73,13 @@ // for multiple underlying IRs (Polly?) by providing a new VPlan front-end, // back-end and analysis information for the new IR. +public: // Set \p Val as the underlying Value of this VPValue. void setUnderlyingValue(Value *Val) { assert(!UnderlyingVal && "Underlying Value is already set."); UnderlyingVal = Val; } -public: /// Return the underlying Value attached to this VPValue. Value *getUnderlyingValue() { return UnderlyingVal; } const Value *getUnderlyingValue() const { return UnderlyingVal; } diff --git a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll --- a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll @@ -147,13 +147,13 @@ ; CHECK-NEXT: br label [[VECTOR_BODY9:%.*]] ; CHECK: vector.body9: ; CHECK-NEXT: [[INDEX14:%.*]] = phi i64 [ 0, [[VECTOR_PH10]] ], [ [[INDEX_NEXT15:%.*]], [[PRED_STORE_CONTINUE51:%.*]] ] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT22:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX14]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT23:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT22]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[VEC_IV:%.*]] = or <4 x i64> [[BROADCAST_SPLAT23]], ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[I_0_LCSSA]], [[INDEX14]] ; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[OFFSET_IDX]], 1 ; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT28:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX14]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT29:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT28]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[VEC_IV:%.*]] = or <4 x i64> [[BROADCAST_SPLAT29]], ; CHECK-NEXT: [[TMP23:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT21]] ; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP23]], i32 0 ; CHECK-NEXT: br i1 [[TMP24]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] @@ -334,15 +334,15 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE27:%.*]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT14:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT15:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT14]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[VEC_IV:%.*]] = or <4 x i64> [[BROADCAST_SPLAT15]], +; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT5]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[VEC_IV:%.*]] = or <4 x i64> [[BROADCAST_SPLAT6]], ; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: -; CHECK-NEXT: [[NEXT_GEP10:%.*]] = getelementptr i32, i32* [[Q:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[NEXT_GEP10]], align 16 +; CHECK-NEXT: [[NEXT_GEP12:%.*]] = getelementptr i32, i32* [[Q:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[NEXT_GEP12]], align 16 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] ; CHECK: pred.load.continue: ; CHECK-NEXT: [[TMP7:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ] @@ -350,8 +350,8 @@ ; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF16:%.*]], label [[PRED_LOAD_CONTINUE17:%.*]] ; CHECK: pred.load.if16: ; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[INDEX]], 1 -; CHECK-NEXT: [[NEXT_GEP11:%.*]] = getelementptr i32, i32* [[Q]], i64 [[TMP9]] -; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[NEXT_GEP11]], align 16 +; CHECK-NEXT: [[NEXT_GEP13:%.*]] = getelementptr i32, i32* [[Q]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[NEXT_GEP13]], align 16 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE17]] ; CHECK: pred.load.continue17: ; CHECK-NEXT: [[TMP11:%.*]] = phi i32 [ poison, [[PRED_LOAD_CONTINUE]] ], [ [[TMP10]], [[PRED_LOAD_IF16]] ] @@ -359,8 +359,8 @@ ; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_LOAD_IF18:%.*]], label [[PRED_LOAD_CONTINUE19:%.*]] ; CHECK: pred.load.if18: ; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[INDEX]], 2 -; CHECK-NEXT: [[NEXT_GEP12:%.*]] = getelementptr i32, i32* [[Q]], i64 [[TMP13]] -; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[NEXT_GEP12]], align 16 +; CHECK-NEXT: [[NEXT_GEP14:%.*]] = getelementptr i32, i32* [[Q]], i64 [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[NEXT_GEP14]], align 16 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE19]] ; CHECK: pred.load.continue19: ; CHECK-NEXT: [[TMP15:%.*]] = phi i32 [ poison, [[PRED_LOAD_CONTINUE17]] ], [ [[TMP14]], [[PRED_LOAD_IF18]] ] @@ -368,8 +368,8 @@ ; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_LOAD_IF20:%.*]], label [[PRED_LOAD_CONTINUE21:%.*]] ; CHECK: pred.load.if20: ; CHECK-NEXT: [[TMP17:%.*]] = or i64 [[INDEX]], 3 -; CHECK-NEXT: [[NEXT_GEP13:%.*]] = getelementptr i32, i32* [[Q]], i64 [[TMP17]] -; CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* [[NEXT_GEP13]], align 16 +; CHECK-NEXT: [[NEXT_GEP15:%.*]] = getelementptr i32, i32* [[Q]], i64 [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* [[NEXT_GEP15]], align 16 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE21]] ; CHECK: pred.load.continue21: ; CHECK-NEXT: [[TMP19:%.*]] = phi i32 [ poison, [[PRED_LOAD_CONTINUE19]] ], [ [[TMP18]], [[PRED_LOAD_IF20]] ] @@ -384,24 +384,24 @@ ; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_STORE_IF22:%.*]], label [[PRED_STORE_CONTINUE23:%.*]] ; CHECK: pred.store.if22: ; CHECK-NEXT: [[TMP22:%.*]] = or i64 [[INDEX]], 1 -; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP22]] -; CHECK-NEXT: store i32 [[TMP11]], i32* [[NEXT_GEP7]], align 16 +; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP22]] +; CHECK-NEXT: store i32 [[TMP11]], i32* [[NEXT_GEP9]], align 16 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE23]] ; CHECK: pred.store.continue23: ; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2 ; CHECK-NEXT: br i1 [[TMP23]], label [[PRED_STORE_IF24:%.*]], label [[PRED_STORE_CONTINUE25:%.*]] ; CHECK: pred.store.if24: ; CHECK-NEXT: [[TMP24:%.*]] = or i64 [[INDEX]], 2 -; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP24]] -; CHECK-NEXT: store i32 [[TMP15]], i32* [[NEXT_GEP8]], align 16 +; CHECK-NEXT: [[NEXT_GEP10:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP24]] +; CHECK-NEXT: store i32 [[TMP15]], i32* [[NEXT_GEP10]], align 16 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE25]] ; CHECK: pred.store.continue25: ; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3 ; CHECK-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF26:%.*]], label [[PRED_STORE_CONTINUE27]] ; CHECK: pred.store.if26: ; CHECK-NEXT: [[TMP26:%.*]] = or i64 [[INDEX]], 3 -; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP26]] -; CHECK-NEXT: store i32 [[TMP19]], i32* [[NEXT_GEP9]], align 16 +; CHECK-NEXT: [[NEXT_GEP11:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP26]] +; CHECK-NEXT: store i32 [[TMP19]], i32* [[NEXT_GEP11]], align 16 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE27]] ; CHECK: pred.store.continue27: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -663,10 +663,10 @@ ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE9:%.*]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[PRED_UDIV_CONTINUE9]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP23:%.*]], [[PRED_UDIV_CONTINUE9]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = or <4 x i32> [[BROADCAST_SPLAT3]], +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <4 x i32> [[VEC_IV]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0 ; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll --- a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll @@ -36,6 +36,7 @@ } ; Check for crash exposed by D76992. +; CHECK: After buildPlainCFG ; CHECK: N0 [label = ; CHECK-NEXT: "loop:\n" + ; CHECK-NEXT: "WIDEN-INDUCTION %iv = phi 0, %iv.next\l" + diff --git a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll --- a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll +++ b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll @@ -30,18 +30,18 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], [[INC]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[VEC_IV:%.*]] = add <2 x i64> [[BROADCAST_SPLAT2]], +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], [[INC]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT3]], <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[INC]], i32 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i64> , [[DOTSPLAT]] -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i64> [[BROADCAST_SPLAT2]], [[TMP3]] +; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i64> [[BROADCAST_SPLAT4]], [[TMP3]] ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 0, [[INC]] ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], [[TMP4]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT3]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[VEC_IV:%.*]] = add <2 x i64> [[BROADCAST_SPLAT4]], ; CHECK-NEXT: [[TMP6:%.*]] = icmp ule <2 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP6]], i32 0 ; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/select-reduction.ll b/llvm/test/Transforms/LoopVectorize/select-reduction.ll --- a/llvm/test/Transforms/LoopVectorize/select-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/select-reduction.ll @@ -25,14 +25,14 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[EXTRA_ITER]], [[INDEX]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[OFFSET_IDX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT2]], -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i64> [[BROADCAST_SPLAT2]], +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[EXTRA_ITER]], [[INDEX]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i64> poison, i64 [[OFFSET_IDX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT3]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i64> [[BROADCAST_SPLAT4]], +; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT4]], +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[VEC_PHI]], ; CHECK-NEXT: [[TMP3]] = select <4 x i1> [[TMP2]], <4 x i32> [[VEC_PHI]], <4 x i32>