diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1542,7 +1542,7 @@ // through scalar predication or masked load/store or masked gather/scatter. // Superset of instructions that return true for isScalarWithPredication. bool isPredicatedInst(Instruction *I) { - if (!blockNeedsPredication(I->getParent())) + if (!blockNeedsPredicationForAnyReason(I->getParent())) return false; // Loads and stores that need some form of masked operation are predicated // instructions. @@ -1596,7 +1596,10 @@ /// Returns true if all loop blocks should be masked to fold tail loop. bool foldTailByMasking() const { return FoldTailByMasking; } - bool blockNeedsPredication(BasicBlock *BB) const { + /// Returns true if the instructions in this block requires predication + /// for any reason, e.g. because tail folding now requires a predicate + /// or because the block in the original loop was predicated. + bool blockNeedsPredicationForAnyReason(BasicBlock *BB) const { return foldTailByMasking() || Legal->blockNeedsPredication(BB); } @@ -5254,7 +5257,7 @@ } bool LoopVectorizationCostModel::isScalarWithPredication(Instruction *I) const { - if (!blockNeedsPredication(I->getParent())) + if (!blockNeedsPredicationForAnyReason(I->getParent())) return false; switch(I->getOpcode()) { default: @@ -5301,7 +5304,8 @@ // (either a gap at the end of a load-access that may result in a speculative // load, or any gaps in a store-access). bool PredicatedAccessRequiresMasking = - blockNeedsPredication(I->getParent()) && Legal->isMaskRequired(I); + blockNeedsPredicationForAnyReason(I->getParent()) && + Legal->isMaskRequired(I); bool LoadAccessWithGapsRequiresEpilogMasking = isa(I) && Group->requiresScalarEpilogue() && !isScalarEpilogueAllowed(); @@ -6845,7 +6849,7 @@ // determine if it would be better to not if-convert the blocks they are in. // If so, we also record the instructions to scalarize. for (BasicBlock *BB : TheLoop->blocks()) { - if (!blockNeedsPredication(BB)) + if (!blockNeedsPredicationForAnyReason(BB)) continue; for (Instruction &I : *BB) if (isScalarWithPredication(&I)) { @@ -8138,7 +8142,7 @@ return None; // Invalidate interleave groups if all blocks of loop will be predicated. - if (CM.blockNeedsPredication(OrigLoop->getHeader()) && + if (CM.blockNeedsPredicationForAnyReason(OrigLoop->getHeader()) && !useMaskedInterleavedAccesses(*TTI)) { LLVM_DEBUG( dbgs() @@ -8748,7 +8752,7 @@ VPValue *BlockMask = nullptr; if (OrigLoop->getHeader() == BB) { - if (!CM.blockNeedsPredication(BB)) + if (!CM.blockNeedsPredicationForAnyReason(BB)) return BlockMaskCache[BB] = BlockMask; // Loop incoming mask is all-one. // Create the block in mask as the first non-phi instruction in the block.