diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -21,6 +21,7 @@ #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" @@ -1092,10 +1093,14 @@ /// \returns The expected cost of compare and select instructions. If there /// is an existing instruction that holds Opcode, it may be passed in the - /// 'I' parameter. - int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy = nullptr, - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, - const Instruction *I = nullptr) const; + /// 'I' parameter. The \p VecPred parameter can be used to indicate the select + /// is using a compare with the specified predicate as condition. When vector + /// types are passed, \p VecPred must be used for all lanes. + int getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy = nullptr, + CmpInst::Predicate VecPred = CmpInst::BAD_ICMP_PREDICATE, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, + const Instruction *I = nullptr) const; /// \return The expected cost of vector Insert and Extract. /// Use -1 to indicate that there is no information on the index value. @@ -1534,6 +1539,7 @@ virtual int getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) = 0; virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I) = 0; virtual int getVectorInstrCost(unsigned Opcode, Type *Val, @@ -1975,9 +1981,10 @@ return Impl.getCFInstrCost(Opcode, CostKind); } int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I) override { - return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I); + return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); } int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override { return Impl.getVectorInstrCost(Opcode, Val, Index); diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -478,6 +478,7 @@ } unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I) const { return 1; @@ -947,12 +948,14 @@ case Instruction::Select: { Type *CondTy = U->getOperand(0)->getType(); return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind, I); } case Instruction::ICmp: case Instruction::FCmp: { Type *ValTy = U->getOperand(0)->getType(); return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(), + cast(U)->getPredicate(), CostKind, I); } case Instruction::InsertElement: { diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -881,6 +881,7 @@ } unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I = nullptr) { const TargetLoweringBase *TLI = getTLI(); @@ -889,7 +890,8 @@ // TODO: Handle other cost kinds. if (CostKind != TTI::TCK_RecipThroughput) - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, + I); // Selects on vectors are actually vector selects. if (ISD == ISD::SELECT) { @@ -914,7 +916,7 @@ if (CondTy) CondTy = CondTy->getScalarType(); unsigned Cost = thisT()->getCmpSelInstrCost( - Opcode, ValVTy->getScalarType(), CondTy, CostKind, I); + Opcode, ValVTy->getScalarType(), CondTy, VecPred, CostKind, I); // Return the cost of multiple scalar invocation plus the cost of // inserting and extracting the values. @@ -1241,10 +1243,12 @@ // For non-rotates (X != Y) we must add shift-by-zero handling costs. if (X != Y) { Type *CondTy = RetTy->getWithNewBitWidth(1); - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, - CostKind); - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, - CondTy, CostKind); + Cost += + thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); + Cost += + thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); } return Cost; } @@ -1483,10 +1487,12 @@ Type *CondTy = RetTy->getWithNewBitWidth(1); unsigned Cost = 0; // TODO: Ideally getCmpSelInstrCost would accept an icmp condition code. - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, - CostKind); - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, - CostKind); + Cost += + thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); + Cost += + thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); // TODO: Should we add an OperandValueProperties::OP_Zero property? if (IID == Intrinsic::abs) Cost += thisT()->getArithmeticInstrCost( @@ -1508,10 +1514,12 @@ IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed); Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind); - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, - CostKind); - Cost += 2 * thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, - CondTy, CostKind); + Cost += + thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); + Cost += 2 * thisT()->getCmpSelInstrCost( + BinaryOperator::Select, RetTy, CondTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); return Cost; } case Intrinsic::uadd_sat: @@ -1527,8 +1535,9 @@ IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed); Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind); - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, - CostKind); + Cost += + thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); return Cost; } case Intrinsic::smul_fix: @@ -1573,10 +1582,12 @@ // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) unsigned Cost = 0; Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind); - Cost += 3 * thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, - OverflowTy, CostKind); - Cost += 2 * thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, OverflowTy, - OverflowTy, CostKind); + Cost += 3 * thisT()->getCmpSelInstrCost( + BinaryOperator::ICmp, SumTy, OverflowTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); + Cost += 2 * thisT()->getCmpSelInstrCost( + BinaryOperator::ICmp, OverflowTy, OverflowTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, OverflowTy, CostKind); return Cost; @@ -1591,8 +1602,9 @@ unsigned Cost = 0; Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind); - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, - OverflowTy, CostKind); + Cost += + thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, OverflowTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); return Cost; } case Intrinsic::smul_with_overflow: @@ -1621,8 +1633,9 @@ CostKind, TTI::OK_AnyValue, TTI::OK_UniformConstantValue); - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy, - OverflowTy, CostKind); + Cost += + thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy, OverflowTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); return Cost; } case Intrinsic::ctpop: @@ -1864,9 +1877,10 @@ (IsPairwise + 1) * thisT()->getShuffleCost(TTI::SK_ExtractSubvector, Ty, NumVecElts, SubTy); MinMaxCost += - thisT()->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, CostKind) + + thisT()->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind) + thisT()->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy, - CostKind); + CmpInst::BAD_ICMP_PREDICATE, CostKind); Ty = SubTy; ++LongVectorCount; } @@ -1888,9 +1902,10 @@ thisT()->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, 0, Ty); MinMaxCost += NumReduxLevels * - (thisT()->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, CostKind) + + (thisT()->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind) + thisT()->getCmpSelInstrCost(Instruction::Select, Ty, CondTy, - CostKind)); + CmpInst::BAD_ICMP_PREDICATE, CostKind)); // The last min/max should be in vector registers and we counted it above. // So just need a single extractelement. return ShuffleCost + MinMaxCost + diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -807,11 +807,13 @@ int TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I) const { assert((I == nullptr || I->getOpcode() == Opcode) && "Opcode should reflect passed instruction."); - int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I); + int Cost = + TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -141,6 +141,7 @@ int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr); int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#include "AArch64ExpandImm.h" #include "AArch64TargetTransformInfo.h" +#include "AArch64ExpandImm.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -16,9 +16,11 @@ #include "llvm/CodeGen/TargetLowering.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/IntrinsicsAArch64.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" #include using namespace llvm; +using namespace llvm::PatternMatch; #define DEBUG_TYPE "aarch64tti" @@ -675,12 +677,13 @@ } int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy, + Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I) { // TODO: Handle other cost kinds. if (CostKind != TTI::TCK_RecipThroughput) - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, + I); int ISD = TLI->InstructionOpcodeToISD(Opcode); // We don't lower some vector selects well that are wider than the register @@ -688,6 +691,26 @@ if (ValTy->isVectorTy() && ISD == ISD::SELECT) { // We would need this many instructions to hide the scalarization happening. const int AmortizationCost = 20; + + // If VecPred is not set, check if we can get a predicate from the context + // instruction, if its type matches the requested ValTy. + if (VecPred == CmpInst::BAD_ICMP_PREDICATE && I && I->getType() == ValTy) { + CmpInst::Predicate CurrentPred; + if (match(I, m_Select(m_Cmp(CurrentPred, m_Value(), m_Value()), m_Value(), + m_Value()))) + VecPred = CurrentPred; + } + // Check if we have a compare/select chain that can be lowered using CMxx & + // BFI pair. + if (CmpInst::isIntPredicate(VecPred)) { + static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16, + MVT::v8i16, MVT::v2i32, MVT::v4i32, + MVT::v2i64}; + auto LT = TLI->getTypeLegalizationCost(DL, ValTy); + if (any_of(ValidMinMaxTys, [<](MVT M) { return M == LT.second; })) + return LT.first; + } + static const TypeConversionCostTblEntry VectorSelectTbl[] = { { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 }, @@ -707,7 +730,7 @@ return Entry->Cost; } } - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); } AArch64TTIImpl::TTI::MemCmpExpansionOptions diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -213,6 +213,7 @@ const Instruction *I = nullptr); int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -810,6 +810,7 @@ } int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I) { int ISD = TLI->InstructionOpcodeToISD(Opcode); @@ -839,7 +840,8 @@ } if (CostKind != TTI::TCK_RecipThroughput) - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, + I); // On NEON a vector select gets lowered to vbsl. if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) { @@ -866,8 +868,8 @@ int BaseCost = ST->hasMVEIntegerOps() && ValTy->isVectorTy() ? ST->getMVEVectorCostFactor() : 1; - return BaseCost * BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, - I); + return BaseCost * + BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); } int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE, diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -134,6 +134,8 @@ TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, bool UseMaskForCond = false, bool UseMaskForGaps = false); unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + + CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); unsigned getArithmeticInstrCost( diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -242,13 +242,16 @@ } unsigned HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy, TTI::TargetCostKind CostKind, const Instruction *I) { + Type *CondTy, + CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, + const Instruction *I) { if (ValTy->isVectorTy() && CostKind == TTI::TCK_RecipThroughput) { std::pair LT = TLI.getTypeLegalizationCost(DL, ValTy); if (Opcode == Instruction::FCmp) return LT.first + FloatFactor * getTypeNumElements(ValTy); } - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); } unsigned HexagonTTIImpl::getArithmeticInstrCost( diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -113,6 +113,7 @@ const Instruction *I = nullptr); int getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind); int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -978,9 +978,11 @@ } int PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I) { - int Cost = BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I); + int Cost = + BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); // TODO: Handle other cost kinds. if (CostKind != TTI::TCK_RecipThroughput) return Cost; diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -97,6 +97,7 @@ TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -847,11 +847,11 @@ } int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy, + Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I) { if (CostKind != TTI::TCK_RecipThroughput) - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind); if (!ValTy->isVectorTy()) { switch (Opcode) { @@ -927,7 +927,7 @@ } } - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind); } int SystemZTTIImpl:: diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -133,6 +133,7 @@ TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -2084,11 +2084,13 @@ } int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I) { // TODO: Handle other cost kinds. if (CostKind != TTI::TCK_RecipThroughput) - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, + I); // Legalize the type. std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); @@ -2272,7 +2274,7 @@ if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy)) return LT.first * (ExtraCost + Entry->Cost); - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); } unsigned X86TTIImpl::getAtomicMemIntrinsicMaxElementSize() const { return 16; } @@ -3223,7 +3225,7 @@ getScalarizationOverhead(MaskTy, DemandedElts, false, true); int ScalarCompareCost = getCmpSelInstrCost( Instruction::ICmp, Type::getInt8Ty(SrcVTy->getContext()), nullptr, - CostKind); + CmpInst::BAD_ICMP_PREDICATE, CostKind); int BranchCost = getCFInstrCost(Instruction::Br, CostKind); int MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost); int ValueSplitCost = @@ -3644,8 +3646,10 @@ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; // Otherwise fall back to cmp+select. - return getCmpSelInstrCost(CmpOpcode, Ty, CondTy, CostKind) + - getCmpSelInstrCost(Instruction::Select, Ty, CondTy, CostKind); + return getCmpSelInstrCost(CmpOpcode, Ty, CondTy, CmpInst::BAD_ICMP_PREDICATE, + CostKind) + + getCmpSelInstrCost(Instruction::Select, Ty, CondTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); } int X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy, @@ -4123,9 +4127,9 @@ FixedVectorType::get(Type::getInt1Ty(SrcVTy->getContext()), VF); MaskUnpackCost = getScalarizationOverhead(MaskTy, DemandedElts, false, true); - int ScalarCompareCost = - getCmpSelInstrCost(Instruction::ICmp, Type::getInt1Ty(SrcVTy->getContext()), - nullptr, CostKind); + int ScalarCompareCost = getCmpSelInstrCost( + Instruction::ICmp, Type::getInt1Ty(SrcVTy->getContext()), nullptr, + CmpInst::BAD_ICMP_PREDICATE, CostKind); int BranchCost = getCFInstrCost(Instruction::Br, CostKind); MaskUnpackCost += VF * (BranchCost + ScalarCompareCost); } diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -2236,9 +2236,9 @@ unsigned MinIdx, unsigned MaxIdx) { Operations.emplace_back(Opcode, MinIdx, MaxIdx); Type *OpType = S->getOperand(0)->getType(); - return NumRequired * - TTI.getCmpSelInstrCost(Opcode, OpType, - CmpInst::makeCmpResultType(OpType), CostKind); + return NumRequired * TTI.getCmpSelInstrCost( + Opcode, OpType, CmpInst::makeCmpResultType(OpType), + CmpInst::BAD_ICMP_PREDICATE, CostKind); }; switch (S->getSCEVType()) { diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -2026,7 +2026,7 @@ BudgetRemaining -= TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(), nullptr, - CostKind); + CmpInst::BAD_ICMP_PREDICATE, CostKind); // Don't convert to selects if we could remove undefined behavior instead. if (passingValueIsAlwaysUndefined(OrigV, &PN) || diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -6613,7 +6613,7 @@ TTI.getCmpSelInstrCost( Instruction::Select, ToVectorTy(Phi->getType(), VF), ToVectorTy(Type::getInt1Ty(Phi->getContext()), VF), - CostKind); + CmpInst::BAD_ICMP_PREDICATE, CostKind); return TTI.getCFInstrCost(Instruction::PHI, CostKind); } @@ -6702,7 +6702,7 @@ CondTy = VectorType::get(CondTy, VF); } return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy, - CostKind, I); + CmpInst::BAD_ICMP_PREDICATE, CostKind, I); } case Instruction::ICmp: case Instruction::FCmp: { @@ -6711,8 +6711,8 @@ if (canTruncateToMinimalBitwidth(Op0AsInstruction, VF)) ValTy = IntegerType::get(ValTy->getContext(), MinBWs[Op0AsInstruction]); VectorTy = ToVectorTy(ValTy, VF); - return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, nullptr, CostKind, - I); + return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, nullptr, + CmpInst::BAD_ICMP_PREDICATE, CostKind, I); } case Instruction::Store: case Instruction::Load: { diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3539,16 +3539,17 @@ case Instruction::ICmp: case Instruction::Select: { // Calculate the cost of this instruction. - int ScalarEltCost = TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy, - Builder.getInt1Ty(), - CostKind, VL0); + int ScalarEltCost = + TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy, Builder.getInt1Ty(), + CmpInst::BAD_ICMP_PREDICATE, CostKind, VL0); if (NeedToShuffleReuses) { ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; } auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(), VL.size()); int ScalarCost = VecTy->getNumElements() * ScalarEltCost; - int VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy, - CostKind, VL0); + int VecCost = + TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind, VL0); // Check if it is possible and profitable to use min/max for selects in // VL. // @@ -3560,8 +3561,9 @@ // If the selects are the only uses of the compares, they will be dead // and we can adjust the cost by removing their cost. if (IntrinsicAndUse.second) - IntrinsicCost -= TTI->getCmpSelInstrCost(Instruction::ICmp, VecTy, - MaskTy, CostKind); + IntrinsicCost -= + TTI->getCmpSelInstrCost(Instruction::ICmp, VecTy, MaskTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); VecCost = std::min(VecCost, IntrinsicCost); } return ReuseShuffleCost + VecCost - ScalarCost; diff --git a/llvm/test/Analysis/CostModel/AArch64/vector-select.ll b/llvm/test/Analysis/CostModel/AArch64/vector-select.ll --- a/llvm/test/Analysis/CostModel/AArch64/vector-select.ll +++ b/llvm/test/Analysis/CostModel/AArch64/vector-select.ll @@ -3,7 +3,7 @@ ; COST-LABEL: v8i8_select_eq ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = icmp eq <8 x i8> %a, %b -; COST-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x i8> %a, <8 x i8> %c +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x i8> %a, <8 x i8> %c ; CODE-LABEL: v8i8_select_eq ; CODE: bb.0 @@ -19,7 +19,7 @@ ; COST-LABEL: v16i8_select_sgt ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = icmp sgt <16 x i8> %a, %b -; COST-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %s.1 = select <16 x i1> %cmp.1, <16 x i8> %a, <16 x i8> %c +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <16 x i1> %cmp.1, <16 x i8> %a, <16 x i8> %c ; CODE-LABEL: v16i8_select_sgt ; CODE: bb.0 @@ -35,7 +35,7 @@ ; COST-LABEL: v4i16_select_ne ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = icmp ne <4 x i16> %a, %b -; COST-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x i16> %a, <4 x i16> %c +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x i16> %a, <4 x i16> %c ; CODE-LABEL: v4i16_select_ne ; CODE: bb.0 @@ -51,7 +51,7 @@ ; COST-LABEL: v8i16_select_ugt ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = icmp ugt <8 x i16> %a, %b -; COST-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x i16> %a, <8 x i16> %c +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x i16> %a, <8 x i16> %c ; CODE-LABEL: v8i16_select_ugt ; CODE: bb.0 @@ -67,7 +67,7 @@ ; COST-LABEL: v2i32_select_ule ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = icmp ule <2 x i32> %a, %b -; COST-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %s.1 = select <2 x i1> %cmp.1, <2 x i32> %a, <2 x i32> %c +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <2 x i1> %cmp.1, <2 x i32> %a, <2 x i32> %c ; CODE-LABEL: v2i32_select_ule ; CODE: bb.0 @@ -83,7 +83,7 @@ ; COST-LABEL: v4i32_select_ult ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = icmp ult <4 x i32> %a, %b -; COST-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x i32> %a, <4 x i32> %c +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x i32> %a, <4 x i32> %c ; CODE-LABEL: v4i32_select_ult ; CODE: bb.0 @@ -99,7 +99,7 @@ ; COST-LABEL: v2i64_select_sle ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = icmp sle <2 x i64> %a, %b -; COST-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %s.1 = select <2 x i1> %cmp.1, <2 x i64> %a, <2 x i64> %c +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <2 x i1> %cmp.1, <2 x i64> %a, <2 x i64> %c ; CODE-LABEL: v2i64_select_sle ; CODE: bb.0 @@ -115,7 +115,7 @@ ; COST-LABEL: v3i64_select_sle ; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cmp.1 = icmp sle <3 x i64> %a, %b -; COST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %s.1 = select <3 x i1> %cmp.1, <3 x i64> %a, <3 x i64> %c +; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <3 x i1> %cmp.1, <3 x i64> %a, <3 x i64> %c ; CODE-LABEL: v3i64_select_sle ; CODE: bb.0