Index: llvm/include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfo.h +++ llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -501,6 +501,11 @@ bool isLegalMaskedScatter(Type *DataType) const; bool isLegalMaskedGather(Type *DataType) const; + /// If non-zero, returns the (artificially high) cost for emulated masked + /// memrefs. + int getEmulatedMaskMemRefCost(Instruction *Inst, int NumPredStores, + int Threshold) const; + /// Return true if the target has a unified operation to calculate division /// and remainder. If so, the additional implicit multiplication and /// subtraction required to calculate a remainder from division are free. This @@ -1074,6 +1079,8 @@ virtual bool isLegalMaskedLoad(Type *DataType) = 0; virtual bool isLegalMaskedScatter(Type *DataType) = 0; virtual bool isLegalMaskedGather(Type *DataType) = 0; + virtual int getEmulatedMaskMemRefCost(Instruction *Inst, int NumPredStores, + int Threshold) = 0; virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0; virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0; virtual bool prefersVectorizedAddressing() = 0; @@ -1321,6 +1328,10 @@ bool isLegalMaskedGather(Type *DataType) override { return Impl.isLegalMaskedGather(DataType); } + int getEmulatedMaskMemRefCost(Instruction *Inst, int NumPredStores, + int Threshold) override { + return Impl.getEmulatedMaskMemRefCost(Inst, NumPredStores, Threshold); + } bool hasDivRemOp(Type *DataType, bool IsSigned) override { return Impl.hasDivRemOp(DataType, IsSigned); } Index: llvm/include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -263,6 +263,13 @@ bool isLegalMaskedGather(Type *DataType) { return false; } + int getEmulatedMaskMemRefCost(Instruction *Inst, int NumPredStores, + int Threshold) { + bool useHackedCost = isa(Inst) || + (isa(Inst) && NumPredStores > Threshold); + return useHackedCost ? 3000000 : 0; + } + bool hasDivRemOp(Type *DataType, bool IsSigned) { return false; } bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) { return false; } Index: llvm/lib/Analysis/TargetTransformInfo.cpp =================================================================== --- llvm/lib/Analysis/TargetTransformInfo.cpp +++ llvm/lib/Analysis/TargetTransformInfo.cpp @@ -182,6 +182,12 @@ return TTIImpl->isLegalMaskedScatter(DataType); } +int TargetTransformInfo::getEmulatedMaskMemRefCost(Instruction *Inst, + int NumPredStores, + int Threshold) const { + return TTIImpl->getEmulatedMaskMemRefCost(Inst, NumPredStores, Threshold); +} + bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const { return TTIImpl->hasDivRemOp(DataType, IsSigned); } Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1213,10 +1213,6 @@ /// as a vector operation. bool isConsecutiveLoadOrStore(Instruction *I); - /// Returns true if an artificially high cost for emulated masked memrefs - /// should be used. - bool useEmulatedMaskMemRefHack(Instruction *I); - /// Create an analysis remark that explains why vectorization failed /// /// \p RemarkName is the identifier for the remark. \return the remark object @@ -5227,21 +5223,6 @@ return RUs; } -bool LoopVectorizationCostModel::useEmulatedMaskMemRefHack(Instruction *I){ - // TODO: Cost model for emulated masked load/store is completely - // broken. This hack guides the cost model to use an artificially - // high enough value to practically disable vectorization with such - // operations, except where previously deployed legality hack allowed - // using very low cost values. This is to avoid regressions coming simply - // from moving "masked load/store" check from legality to cost model. - // Masked Load/Gather emulation was previously never allowed. - // Limited number of Masked Store/Scatter emulation was allowed. - assert(isPredicatedInst(I) && "Expecting a scalar emulated instruction"); - return isa(I) || - (isa(I) && - NumPredStores > NumberOfStoresToPredicate); -} - void LoopVectorizationCostModel::collectInstsToScalarize(unsigned VF) { // If we aren't vectorizing the loop, or if we've already collected the // instructions to scalarize, there's nothing to do. Collection may already @@ -5266,7 +5247,8 @@ ScalarCostsTy ScalarCosts; // Do not apply discount logic if hacked cost is needed // for emulated masked memrefs. - if (!useEmulatedMaskMemRefHack(&I) && + if (TTI.getEmulatedMaskMemRefCost(&I, NumPredStores, + NumberOfStoresToPredicate) == 0 && computePredInstDiscount(&I, ScalarCosts, VF) >= 0) ScalarCostsVF.insert(ScalarCosts.begin(), ScalarCosts.end()); // Remember that BB will remain after vectorization. @@ -5503,10 +5485,11 @@ if (isPredicatedInst(I)) { Cost /= getReciprocalPredBlockProb(); - if (useEmulatedMaskMemRefHack(I)) + if (int HackedCost = TTI.getEmulatedMaskMemRefCost( + I, NumPredStores, NumberOfStoresToPredicate)) // Artificially setting to a high enough value to practically disable // vectorization with such operations. - Cost = 3000000; + Cost = HackedCost; } return Cost;