diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1543,7 +1543,7 @@ // through scalar predication or masked load/store or masked gather/scatter. // Superset of instructions that return true for isScalarWithPredication. bool isPredicatedInst(Instruction *I) { - if (!blockNeedsPredication(I->getParent())) + if (!Legal->blockNeedsPredication(I->getParent()) && !foldTailByMasking()) return false; // Loads and stores that need some form of masked operation are predicated // instructions. @@ -1597,10 +1597,6 @@ /// Returns true if all loop blocks should be masked to fold tail loop. bool foldTailByMasking() const { return FoldTailByMasking; } - bool blockNeedsPredication(BasicBlock *BB) const { - return foldTailByMasking() || Legal->blockNeedsPredication(BB); - } - /// A SmallMapVector to store the InLoop reduction op chains, mapping phi /// nodes to the chain of instructions representing the reductions. Uses a /// MapVector to ensure deterministic iteration order. @@ -5256,7 +5252,7 @@ } bool LoopVectorizationCostModel::isScalarWithPredication(Instruction *I) const { - if (!blockNeedsPredication(I->getParent())) + if (!Legal->blockNeedsPredication(I->getParent()) && !foldTailByMasking()) return false; switch(I->getOpcode()) { default: @@ -5303,7 +5299,8 @@ // (either a gap at the end of a load-access that may result in a speculative // load, or any gaps in a store-access). bool PredicatedAccessRequiresMasking = - blockNeedsPredication(I->getParent()) && Legal->isMaskRequired(I); + (Legal->blockNeedsPredication(I->getParent()) || foldTailByMasking()) && + Legal->isMaskRequired(I); bool LoadAccessWithGapsRequiresEpilogMasking = isa(I) && Group->requiresScalarEpilogue() && !isScalarEpilogueAllowed(); @@ -6827,7 +6824,7 @@ // determine if it would be better to not if-convert the blocks they are in. // If so, we also record the instructions to scalarize. for (BasicBlock *BB : TheLoop->blocks()) { - if (!blockNeedsPredication(BB)) + if (!Legal->blockNeedsPredication(BB) && !foldTailByMasking()) continue; for (Instruction &I : *BB) if (isScalarWithPredication(&I)) { @@ -8120,7 +8117,8 @@ return None; // Invalidate interleave groups if all blocks of loop will be predicated. - if (CM.blockNeedsPredication(OrigLoop->getHeader()) && + if ((Legal->blockNeedsPredication(OrigLoop->getHeader()) || + CM.foldTailByMasking()) && !useMaskedInterleavedAccesses(*TTI)) { LLVM_DEBUG( dbgs() @@ -8727,7 +8725,7 @@ VPValue *BlockMask = nullptr; if (OrigLoop->getHeader() == BB) { - if (!CM.blockNeedsPredication(BB)) + if (!Legal->blockNeedsPredication(BB) && !CM.foldTailByMasking()) return BlockMaskCache[BB] = BlockMask; // Loop incoming mask is all-one. // Create the block in mask as the first non-phi instruction in the block.