diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -383,12 +383,6 @@ unsigned getNumStores() const { return LAI->getNumStores(); } unsigned getNumLoads() const { return LAI->getNumLoads(); } - /// Returns all assume calls in predicated blocks. They need to be dropped - /// when flattening the CFG. - const SmallPtrSetImpl &getConditionalAssumes() const { - return ConditionalAssumes; - } - PredicatedScalarEvolution *getPredicatedScalarEvolution() const { return &PSE; } @@ -450,13 +444,11 @@ /// \p SafePtrs is a list of addresses that are known to be legal and we know /// that we can read from them without segfault. /// \p MaskedOp is a list of instructions that have to be transformed into - /// calls to the appropriate masked intrinsic when the loop is vectorized. - /// \p ConditionalAssumes is a list of assume instructions in predicated - /// blocks that must be dropped if the CFG gets flattened. - bool blockCanBePredicated( - BasicBlock *BB, SmallPtrSetImpl &SafePtrs, - SmallPtrSetImpl &MaskedOp, - SmallPtrSetImpl &ConditionalAssumes) const; + /// calls to the appropriate masked intrinsic when the loop is vectorized + /// or dropped if the instruction is a conditional assume intrinsic. + bool + blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl &SafePtrs, + SmallPtrSetImpl &MaskedOp) const; /// Updates the vectorization state by adding \p Phi to the inductions list. /// This can set \p Phi as the main induction of the loop if \p Phi is a @@ -539,13 +531,10 @@ AssumptionCache *AC; /// While vectorizing these instructions we have to generate a - /// call to the appropriate masked intrinsic + /// call to the appropriate masked intrinsic or drop them in case of + /// conditional assumes. SmallPtrSet MaskedOp; - /// Assume instructions in predicated blocks must be dropped if the CFG gets - /// flattened. - SmallPtrSet ConditionalAssumes; - /// BFI and PSI are used to check for profile guided size optimizations. BlockFrequencyInfo *BFI; ProfileSummaryInfo *PSI; diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -1242,13 +1242,12 @@ bool LoopVectorizationLegality::blockCanBePredicated( BasicBlock *BB, SmallPtrSetImpl &SafePtrs, - SmallPtrSetImpl &MaskedOp, - SmallPtrSetImpl &ConditionalAssumes) const { + SmallPtrSetImpl &MaskedOp) const { for (Instruction &I : *BB) { // We can predicate blocks with calls to assume, as long as we drop them in // case we flatten the CFG via predication. if (match(&I, m_Intrinsic())) { - ConditionalAssumes.insert(&I); + MaskedOp.insert(&I); continue; } @@ -1345,16 +1344,13 @@ } // We must be able to predicate all blocks that need to be predicated. - if (blockNeedsPredication(BB)) { - if (!blockCanBePredicated(BB, SafePointers, MaskedOp, - ConditionalAssumes)) { - reportVectorizationFailure( - "Control flow cannot be substituted for a select", - "control flow cannot be substituted for a select", - "NoCFGForSelect", ORE, TheLoop, - BB->getTerminator()); - return false; - } + if (blockNeedsPredication(BB) && + !blockCanBePredicated(BB, SafePointers, MaskedOp)) { + reportVectorizationFailure( + "Control flow cannot be substituted for a select", + "control flow cannot be substituted for a select", "NoCFGForSelect", + ORE, TheLoop, BB->getTerminator()); + return false; } } @@ -1554,14 +1550,14 @@ // The list of pointers that we can safely read and write to remains empty. SmallPtrSet SafePointers; + // Collect masked ops in temporary set first to avoid partially populating + // MaskedOp if a block cannot be predicated. SmallPtrSet TmpMaskedOp; - SmallPtrSet TmpConditionalAssumes; // Check and mark all blocks for predication, including those that ordinarily // do not need predication such as the header block. for (BasicBlock *BB : TheLoop->blocks()) { - if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp, - TmpConditionalAssumes)) { + if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp)) { LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking as requested.\n"); return false; } @@ -1570,9 +1566,6 @@ LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n"); MaskedOp.insert(TmpMaskedOp.begin(), TmpMaskedOp.end()); - ConditionalAssumes.insert(TmpConditionalAssumes.begin(), - TmpConditionalAssumes.end()); - return true; } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -377,8 +377,7 @@ /// returned VPlan is valid for. If no VPlan can be built for the input range, /// set the largest included VF to the maximum VF for which no plan could be /// built. - VPlanPtr tryToBuildVPlanWithVPRecipes( - VFRange &Range, SmallPtrSetImpl &DeadInstructions); + VPlanPtr tryToBuildVPlanWithVPRecipes(VFRange &Range); /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive, /// according to the information gathered by Legal when it checked if it is diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8697,18 +8697,10 @@ ElementCount MaxVF) { assert(OrigLoop->isInnermost() && "Inner loop expected."); - // Add assume instructions we need to drop to DeadInstructions, to prevent - // them from being added to the VPlan. - // TODO: We only need to drop assumes in blocks that get flattend. If the - // control flow is preserved, we should keep them. - SmallPtrSet DeadInstructions; - auto &ConditionalAssumes = Legal->getConditionalAssumes(); - DeadInstructions.insert(ConditionalAssumes.begin(), ConditionalAssumes.end()); - auto MaxVFTimes2 = MaxVF * 2; for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) { VFRange SubRange = {VF, MaxVFTimes2}; - if (auto Plan = tryToBuildVPlanWithVPRecipes(SubRange, DeadInstructions)) { + if (auto Plan = tryToBuildVPlanWithVPRecipes(SubRange)) { // Now optimize the initial VPlan. VPlanTransforms::optimize(*Plan, *PSE.getSE()); assert(VPlanVerifier::verifyPlanIsValid(*Plan) && "VPlan is invalid"); @@ -8845,8 +8837,8 @@ } } -VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( - VFRange &Range, SmallPtrSetImpl &DeadInstructions) { +VPlanPtr +LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { SmallPtrSet *, 1> InterleaveGroups; @@ -8930,14 +8922,8 @@ // Introduce each ingredient into VPlan. // TODO: Model and preserve debug intrinsics in VPlan. - for (Instruction &I : BB->instructionsWithoutDebug(false)) { + for (Instruction &I : drop_end(BB->instructionsWithoutDebug(false))) { Instruction *Instr = &I; - - // First filter out irrelevant instructions, to ensure no recipes are - // built for them. - if (isa(Instr) || DeadInstructions.count(Instr)) - continue; - SmallVector Operands; auto *Phi = dyn_cast(Instr); if (Phi && Phi->getParent() == OrigLoop->getHeader()) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -12,17 +12,20 @@ //===----------------------------------------------------------------------===// #include "VPlanTransforms.h" -#include "VPlanDominatorTree.h" #include "VPRecipeBuilder.h" #include "VPlanCFG.h" +#include "VPlanDominatorTree.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/IVDescriptors.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/PatternMatch.h" using namespace llvm; +using namespace llvm::PatternMatch; + void VPlanTransforms::VPInstructionsToVPRecipes( VPlanPtr &Plan, function_ref @@ -479,10 +482,20 @@ // The recipes in the block are processed in reverse order, to catch chains // of dead recipes. for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) { - if (R.mayHaveSideEffects() || any_of(R.definedValues(), [](VPValue *V) { - return V->getNumUsers() > 0; - })) + // A user keeps R alive: + if (any_of(R.definedValues(), + [](VPValue *V) { return V->getNumUsers(); })) continue; + + // Having side effects keeps R alive, but do remove conditional assume + // instructions as their conditions may be flattened. + auto *RepR = dyn_cast(&R); + bool IsConditionalAssume = + RepR && RepR->isPredicated() && + match(RepR->getUnderlyingInstr(), m_Intrinsic()); + if (R.mayHaveSideEffects() && !IsConditionalAssume) + continue; + R.eraseFromParent(); } }