diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -5280,6 +5280,15 @@ return cast(I)->getPointerAddressSpace(); } +/// A helper function that returns the type of loaded or stored value. +inline Type *getLoadStoreInstValueType(Value *I) { + assert((isa(I) || isa(I)) && + "Expected Load or Store instruction"); + if (auto *LI = dyn_cast(I)) + return LI->getType(); + return cast(I)->getValueOperand()->getType(); +} + //===----------------------------------------------------------------------===// // FreezeInst Class //===----------------------------------------------------------------------===// diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -255,6 +255,12 @@ /// If false, good old LV code. bool canVectorize(bool UseVPlanNativePath); + /// \return true if the instruction can be vectorized. If \p + /// VectorizeWithPredication is true, then the strategy requires predication + /// to vectorize the loop. + bool canVectorizeWithPredication(Instruction *I, + bool VectorizeWithPredication); + /// Return true if we can vectorize this loop while folding its tail by /// masking, and mark all respective loads/stores for masking. /// This object's state is only modified iff this function returns true. diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -833,6 +833,58 @@ return true; } +/// A helper function for checking whether an integer division-related +/// instruction may divide by zero (in which case it must be predicated if +/// executed conditionally in the scalar code). +/// TODO: It may be worthwhile to generalize and check isKnownNonZero(). +/// Non-zero divisors that are non compile-time constants will not be +/// converted into multiplication, so we will still end up scalarizing +/// the division, but can do so w/o predication. +static bool mayDivideByZero(Instruction &I) { + assert((I.getOpcode() == Instruction::UDiv || + I.getOpcode() == Instruction::SDiv || + I.getOpcode() == Instruction::URem || + I.getOpcode() == Instruction::SRem) && + "Unexpected instruction"); + Value *Divisor = I.getOperand(1); + auto *CInt = dyn_cast(Divisor); + return !CInt || CInt->isZero(); +} + +bool LoopVectorizationLegality::canVectorizeWithPredication( + Instruction *I, bool VectorizeWithPredication) { + switch (I->getOpcode()) { + default: + break; + case Instruction::Load: + case Instruction::Store: { + // Some instructions can be speculated, even when predication is used + // for the block. + if (!isMaskRequired(I)) + return true; + + auto *Ptr = getLoadStorePointerOperand(I); + auto *Ty = getLoadStoreInstValueType(I); + const Align Alignment = getLoadStoreAlignment(I); + return isa(I) ? ((isConsecutivePtr(Ptr) && + TTI->isLegalMaskedLoad(Ty, Alignment)) || + TTI->isLegalMaskedGather(Ty, Alignment)) + : ((isConsecutivePtr(Ptr) && + TTI->isLegalMaskedStore(Ty, Alignment)) || + TTI->isLegalMaskedScatter(Ty, Alignment)); + } + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::SRem: + case Instruction::URem: + // If predication is used for this block and the operation would otherwise + // be guarded, then this requires scalarizing. + if (blockNeedsPredication(I->getParent()) || VectorizeWithPredication) + return !mayDivideByZero(*I); + } + return true; +} + bool LoopVectorizationLegality::canVectorizeMemory() { LAI = &(*GetLAA)(*TheLoop); const OptimizationRemarkAnalysis *LAR = LAI->getReport(); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -377,11 +377,7 @@ /// A helper function that returns the type of loaded or stored value. static Type *getMemInstValueType(Value *I) { - assert((isa(I) || isa(I)) && - "Expected Load or Store instruction"); - if (auto *LI = dyn_cast(I)) - return LI->getType(); - return cast(I)->getValueOperand()->getType(); + return getLoadStoreInstValueType(I); } /// A helper function that returns true if the given type is irregular. The @@ -1515,23 +1511,20 @@ /// Returns true if \p I is an instruction that will be scalarized with /// predication. Such instructions include conditional stores and /// instructions that may divide by zero. - /// If a non-zero VF has been calculated, we check if I will be scalarized - /// predication for that VF. bool - isScalarWithPredication(Instruction *I, - ElementCount VF = ElementCount::getFixed(1)) const; + isScalarWithPredication(Instruction *I) const; // Returns true if \p I is an instruction that will be predicated either // through scalar predication or masked load/store or masked gather/scatter. // Superset of instructions that return true for isScalarWithPredication. bool isPredicatedInst(Instruction *I, ElementCount VF) { - if (!blockNeedsPredication(I->getParent())) + if (!Legal->blockNeedsPredication(I->getParent()) && !foldTailByMasking()) return false; // Loads and stores that need some form of masked operation are predicated // instructions. if (isa(I) || isa(I)) return Legal->isMaskRequired(I); - return isScalarWithPredication(I, VF); + return isScalarWithPredication(I); } /// Returns true if \p I is a memory instruction with consecutive memory @@ -4917,24 +4910,6 @@ } } -/// A helper function for checking whether an integer division-related -/// instruction may divide by zero (in which case it must be predicated if -/// executed conditionally in the scalar code). -/// TODO: It may be worthwhile to generalize and check isKnownNonZero(). -/// Non-zero divisors that are non compile-time constants will not be -/// converted into multiplication, so we will still end up scalarizing -/// the division, but can do so w/o predication. -static bool mayDivideByZero(Instruction &I) { - assert((I.getOpcode() == Instruction::UDiv || - I.getOpcode() == Instruction::SDiv || - I.getOpcode() == Instruction::URem || - I.getOpcode() == Instruction::SRem) && - "Unexpected instruction"); - Value *Divisor = I.getOperand(1); - auto *CInt = dyn_cast(Divisor); - return !CInt || CInt->isZero(); -} - void InnerLoopVectorizer::widenInstruction(Instruction &I, VPValue *Def, VPUser &User, VPTransformState &State) { @@ -5326,39 +5301,8 @@ } bool LoopVectorizationCostModel::isScalarWithPredication( - Instruction *I, ElementCount VF) const { - if (!blockNeedsPredication(I->getParent())) - return false; - switch(I->getOpcode()) { - default: - break; - case Instruction::Load: - case Instruction::Store: { - if (!Legal->isMaskRequired(I)) - return false; - auto *Ptr = getLoadStorePointerOperand(I); - auto *Ty = getMemInstValueType(I); - // We have already decided how to vectorize this instruction, get that - // result. - if (VF.isVector()) { - InstWidening WideningDecision = getWideningDecision(I, VF); - assert(WideningDecision != CM_Unknown && - "Widening decision should be ready at this moment"); - return WideningDecision == CM_Scalarize; - } - const Align Alignment = getLoadStoreAlignment(I); - return isa(I) ? !(isLegalMaskedLoad(Ty, Ptr, Alignment) || - isLegalMaskedGather(Ty, Alignment)) - : !(isLegalMaskedStore(Ty, Ptr, Alignment) || - isLegalMaskedScatter(Ty, Alignment)); - } - case Instruction::UDiv: - case Instruction::SDiv: - case Instruction::SRem: - case Instruction::URem: - return mayDivideByZero(*I); - } - return false; + Instruction *I) const { + return !Legal->canVectorizeWithPredication(I, foldTailByMasking()); } bool LoopVectorizationCostModel::interleavedAccessCanBeWidened( @@ -5463,7 +5407,7 @@ << *I << "\n"); return; } - if (isScalarWithPredication(I, VF)) { + if (isScalarWithPredication(I)) { LLVM_DEBUG(dbgs() << "LV: Found not uniform being ScalarWithPredication: " << *I << "\n"); return; @@ -6707,7 +6651,7 @@ // determine if it would be better to not if-convert the blocks they are in. // If so, we also record the instructions to scalarize. for (BasicBlock *BB : TheLoop->blocks()) { - if (!blockNeedsPredication(BB)) + if (!Legal->blockNeedsPredication(BB) && !foldTailByMasking()) continue; for (Instruction &I : *BB) if (isScalarWithPredication(&I)) { @@ -7944,7 +7888,8 @@ return None; // Invalidate interleave groups if all blocks of loop will be predicated. - if (CM.blockNeedsPredication(OrigLoop->getHeader()) && + if ((Legal->blockNeedsPredication(OrigLoop->getHeader()) || + CM.foldTailByMasking()) && !useMaskedInterleavedAccesses(*TTI)) { LLVM_DEBUG( dbgs() @@ -8544,7 +8489,7 @@ VPValue *BlockMask = nullptr; if (OrigLoop->getHeader() == BB) { - if (!CM.blockNeedsPredication(BB)) + if (!Legal->blockNeedsPredication(BB) && !CM.foldTailByMasking()) return BlockMaskCache[BB] = BlockMask; // Loop incoming mask is all-one. // Create the block in mask as the first non-phi instruction in the block. @@ -8717,7 +8662,7 @@ bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange( [this, CI](ElementCount VF) { - return CM.isScalarWithPredication(CI, VF); + return CM.isScalarWithPredication(CI); }, Range); @@ -8761,7 +8706,7 @@ auto WillScalarize = [this, I](ElementCount VF) -> bool { return CM.isScalarAfterVectorization(I, VF) || CM.isProfitableToScalarize(I, VF) || - CM.isScalarWithPredication(I, VF); + CM.isScalarWithPredication(I); }; return !LoopVectorizationPlanner::getDecisionAndClampRange(WillScalarize, Range);