Index: llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h +++ llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h @@ -114,6 +114,40 @@ /// \name Generic Target Information /// @{ + /// \brief The kind of cost model. + /// + /// There are several different cost models that can be customized by the + /// target. The normalization of each cost model may be target specific. + enum TargetCostKind { + TCK_RecipThroughput, ///< Reciprocal throughput. + TCK_Latency, ///< The latency of instruction. + TCK_CodeSize ///< Instruction code size. + }; + + /// \brief Query the cost of a specified instruction. + /// + /// Clients should use this interface to query the cost of an existing + /// instruction. The instruction must have a valid parent (basic block). + /// + /// Note, this method does not cache the cost calculation and it + /// can be expensive in some cases. + int getInstructionCost(const Instruction *I, enum TargetCostKind kind) const { + switch (kind){ + case TCK_RecipThroughput: + return getInstructionThroughput(I); + + case TCK_Latency: + return getInstructionLatency(I); + + case TCK_CodeSize: + return getUserCost(I); + + default: + llvm_unreachable("Unknown instruction cost kind"); + return 0; + } + } + /// \brief Underlying constants for 'cost' values in this interface. /// /// Many APIs in this interface return a cost. This enum defines the @@ -868,6 +902,14 @@ /// @} private: + /// \brief Estimate the latency of specified instruction. + /// Returns 1 as the default value. + int getInstructionLatency(const Instruction *I) const; + + /// \brief Returns the expected throughput cost of the instruction. + /// Returns -1 if the cost is unknown. + int getInstructionThroughput(const Instruction *I) const; + /// \brief The abstract base class used to type erase specific TTI /// implementations. class Concept; @@ -1044,6 +1086,7 @@ virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty, ReductionFlags) const = 0; virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0; + virtual int getInstructionLatency(const Instruction *I) = 0; }; template @@ -1406,6 +1449,9 @@ bool shouldExpandReduction(const IntrinsicInst *II) const override { return Impl.shouldExpandReduction(II); } + int getInstructionLatency(const Instruction *I) override { + return Impl.getInstructionLatency(I); + } }; template Index: llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h +++ llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -771,6 +771,25 @@ Operator::getOpcode(U), U->getType(), U->getNumOperands() == 1 ? U->getOperand(0)->getType() : nullptr); } + + int getInstructionLatency(const Instruction *I) { + if (isa(I)) + return 0; + + if (isa(I)) + return 40; + + if (isa(I)) + return 4; + + Type *dstTy = I->getType(); + if (VectorType *VectorTy = dyn_cast(dstTy)) + dstTy = VectorTy->getElementType(); + if (dstTy->isFloatingPointTy()) + return 3; + + return 1; + } }; } Index: llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h +++ llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h @@ -350,6 +350,13 @@ UP.BEInsns = 2; } + int getInstructionLatency(const Instruction *I) { + if (isa(I)) + return getST()->getSchedModel().DefaultLoadLatency; + + return BaseT::getInstructionLatency(I); + } + /// @} /// \name Vector TTI Implementations Index: llvm/trunk/lib/Analysis/CostModel.cpp =================================================================== --- llvm/trunk/lib/Analysis/CostModel.cpp +++ llvm/trunk/lib/Analysis/CostModel.cpp @@ -20,26 +20,27 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/PatternMatch.h" -#include "llvm/IR/Value.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -using namespace PatternMatch; + +static cl::opt CostKind( + "cost-kind", cl::desc("Target cost kind"), + cl::init(TargetTransformInfo::TCK_RecipThroughput), + cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, + "throughput", "Reciprocal throughput"), + clEnumValN(TargetTransformInfo::TCK_Latency, + "latency", "Instruction latency"), + clEnumValN(TargetTransformInfo::TCK_CodeSize, + "code-size", "Code size"))); #define CM_NAME "cost-model" #define DEBUG_TYPE CM_NAME -static cl::opt EnableReduxCost("costmodel-reduxcost", cl::init(false), - cl::Hidden, - cl::desc("Recognize reduction patterns.")); - namespace { class CostModelAnalysis : public FunctionPass { @@ -54,7 +55,9 @@ /// Returns -1 if the cost is unknown. /// Note, this method does not cache the cost calculation and it /// can be expensive in some cases. - unsigned getInstructionCost(const Instruction *I) const; + unsigned getInstructionCost(const Instruction *I) const { + return TTI->getInstructionCost(I, TargetTransformInfo::TCK_RecipThroughput); + } private: void getAnalysisUsage(AnalysisUsage &AU) const override; @@ -92,564 +95,13 @@ return false; } -static bool isReverseVectorMask(ArrayRef Mask) { - for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i) - if (Mask[i] >= 0 && Mask[i] != (int)(MaskSize - 1 - i)) - return false; - return true; -} - -static bool isSingleSourceVectorMask(ArrayRef Mask) { - bool Vec0 = false; - bool Vec1 = false; - for (unsigned i = 0, NumVecElts = Mask.size(); i < NumVecElts; ++i) { - if (Mask[i] >= 0) { - if ((unsigned)Mask[i] >= NumVecElts) - Vec1 = true; - else - Vec0 = true; - } - } - return !(Vec0 && Vec1); -} - -static bool isZeroEltBroadcastVectorMask(ArrayRef Mask) { - for (unsigned i = 0; i < Mask.size(); ++i) - if (Mask[i] > 0) - return false; - return true; -} - -static bool isAlternateVectorMask(ArrayRef Mask) { - bool isAlternate = true; - unsigned MaskSize = Mask.size(); - - // Example: shufflevector A, B, <0,5,2,7> - for (unsigned i = 0; i < MaskSize && isAlternate; ++i) { - if (Mask[i] < 0) - continue; - isAlternate = Mask[i] == (int)((i & 1) ? MaskSize + i : i); - } - - if (isAlternate) - return true; - - isAlternate = true; - // Example: shufflevector A, B, <4,1,6,3> - for (unsigned i = 0; i < MaskSize && isAlternate; ++i) { - if (Mask[i] < 0) - continue; - isAlternate = Mask[i] == (int)((i & 1) ? i : MaskSize + i); - } - - return isAlternate; -} - -static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) { - TargetTransformInfo::OperandValueKind OpInfo = - TargetTransformInfo::OK_AnyValue; - - // Check for a splat of a constant or for a non uniform vector of constants. - if (isa(V) || isa(V)) { - OpInfo = TargetTransformInfo::OK_NonUniformConstantValue; - if (cast(V)->getSplatValue() != nullptr) - OpInfo = TargetTransformInfo::OK_UniformConstantValue; - } - - // Check for a splat of a uniform value. This is not loop aware, so return - // true only for the obviously uniform cases (argument, globalvalue) - const Value *Splat = getSplatValue(V); - if (Splat && (isa(Splat) || isa(Splat))) - OpInfo = TargetTransformInfo::OK_UniformValue; - - return OpInfo; -} - -static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft, - unsigned Level) { - // We don't need a shuffle if we just want to have element 0 in position 0 of - // the vector. - if (!SI && Level == 0 && IsLeft) - return true; - else if (!SI) - return false; - - SmallVector Mask(SI->getType()->getVectorNumElements(), -1); - - // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether - // we look at the left or right side. - for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2) - Mask[i] = val; - - SmallVector ActualMask = SI->getShuffleMask(); - return Mask == ActualMask; -} - -namespace { -/// Kind of the reduction data. -enum ReductionKind { - RK_None, /// Not a reduction. - RK_Arithmetic, /// Binary reduction data. - RK_MinMax, /// Min/max reduction data. - RK_UnsignedMinMax, /// Unsigned min/max reduction data. -}; -/// Contains opcode + LHS/RHS parts of the reduction operations. -struct ReductionData { - ReductionData() = delete; - ReductionData(ReductionKind Kind, unsigned Opcode, Value *LHS, Value *RHS) - : Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind) { - assert(Kind != RK_None && "expected binary or min/max reduction only."); - } - unsigned Opcode = 0; - Value *LHS = nullptr; - Value *RHS = nullptr; - ReductionKind Kind = RK_None; - bool hasSameData(ReductionData &RD) const { - return Kind == RD.Kind && Opcode == RD.Opcode; - } -}; -} // namespace - -static Optional getReductionData(Instruction *I) { - Value *L, *R; - if (m_BinOp(m_Value(L), m_Value(R)).match(I)) - return ReductionData(RK_Arithmetic, I->getOpcode(), L, R); - if (auto *SI = dyn_cast(I)) { - if (m_SMin(m_Value(L), m_Value(R)).match(SI) || - m_SMax(m_Value(L), m_Value(R)).match(SI) || - m_OrdFMin(m_Value(L), m_Value(R)).match(SI) || - m_OrdFMax(m_Value(L), m_Value(R)).match(SI) || - m_UnordFMin(m_Value(L), m_Value(R)).match(SI) || - m_UnordFMax(m_Value(L), m_Value(R)).match(SI)) { - auto *CI = cast(SI->getCondition()); - return ReductionData(RK_MinMax, CI->getOpcode(), L, R); - } - if (m_UMin(m_Value(L), m_Value(R)).match(SI) || - m_UMax(m_Value(L), m_Value(R)).match(SI)) { - auto *CI = cast(SI->getCondition()); - return ReductionData(RK_UnsignedMinMax, CI->getOpcode(), L, R); - } - } - return llvm::None; -} - -static ReductionKind matchPairwiseReductionAtLevel(Instruction *I, - unsigned Level, - unsigned NumLevels) { - // Match one level of pairwise operations. - // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, - // <4 x i32> - // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, - // <4 x i32> - // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 - if (!I) - return RK_None; - - assert(I->getType()->isVectorTy() && "Expecting a vector type"); - - Optional RD = getReductionData(I); - if (!RD) - return RK_None; - - ShuffleVectorInst *LS = dyn_cast(RD->LHS); - if (!LS && Level) - return RK_None; - ShuffleVectorInst *RS = dyn_cast(RD->RHS); - if (!RS && Level) - return RK_None; - - // On level 0 we can omit one shufflevector instruction. - if (!Level && !RS && !LS) - return RK_None; - - // Shuffle inputs must match. - Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr; - Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr; - Value *NextLevelOp = nullptr; - if (NextLevelOpR && NextLevelOpL) { - // If we have two shuffles their operands must match. - if (NextLevelOpL != NextLevelOpR) - return RK_None; - - NextLevelOp = NextLevelOpL; - } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) { - // On the first level we can omit the shufflevector <0, undef,...>. So the - // input to the other shufflevector <1, undef> must match with one of the - // inputs to the current binary operation. - // Example: - // %NextLevelOpL = shufflevector %R, <1, undef ...> - // %BinOp = fadd %NextLevelOpL, %R - if (NextLevelOpL && NextLevelOpL != RD->RHS) - return RK_None; - else if (NextLevelOpR && NextLevelOpR != RD->LHS) - return RK_None; - - NextLevelOp = NextLevelOpL ? RD->RHS : RD->LHS; - } else { - return RK_None; - } - - // Check that the next levels binary operation exists and matches with the - // current one. - if (Level + 1 != NumLevels) { - Optional NextLevelRD = - getReductionData(cast(NextLevelOp)); - if (!NextLevelRD || !RD->hasSameData(*NextLevelRD)) - return RK_None; - } - - // Shuffle mask for pairwise operation must match. - if (matchPairwiseShuffleMask(LS, /*IsLeft=*/true, Level)) { - if (!matchPairwiseShuffleMask(RS, /*IsLeft=*/false, Level)) - return RK_None; - } else if (matchPairwiseShuffleMask(RS, /*IsLeft=*/true, Level)) { - if (!matchPairwiseShuffleMask(LS, /*IsLeft=*/false, Level)) - return RK_None; - } else { - return RK_None; - } - - if (++Level == NumLevels) - return RD->Kind; - - // Match next level. - return matchPairwiseReductionAtLevel(cast(NextLevelOp), Level, - NumLevels); -} - -static ReductionKind matchPairwiseReduction(const ExtractElementInst *ReduxRoot, - unsigned &Opcode, Type *&Ty) { - if (!EnableReduxCost) - return RK_None; - - // Need to extract the first element. - ConstantInt *CI = dyn_cast(ReduxRoot->getOperand(1)); - unsigned Idx = ~0u; - if (CI) - Idx = CI->getZExtValue(); - if (Idx != 0) - return RK_None; - - auto *RdxStart = dyn_cast(ReduxRoot->getOperand(0)); - if (!RdxStart) - return RK_None; - Optional RD = getReductionData(RdxStart); - if (!RD) - return RK_None; - - Type *VecTy = RdxStart->getType(); - unsigned NumVecElems = VecTy->getVectorNumElements(); - if (!isPowerOf2_32(NumVecElems)) - return RK_None; - - // We look for a sequence of shuffle,shuffle,add triples like the following - // that builds a pairwise reduction tree. - // - // (X0, X1, X2, X3) - // (X0 + X1, X2 + X3, undef, undef) - // ((X0 + X1) + (X2 + X3), undef, undef, undef) - // - // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, - // <4 x i32> - // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, - // <4 x i32> - // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 - // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, - // <4 x i32> - // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, - // <4 x i32> - // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 - // %r = extractelement <4 x float> %bin.rdx8, i32 0 - if (matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems)) == - RK_None) - return RK_None; - - Opcode = RD->Opcode; - Ty = VecTy; - - return RD->Kind; -} - -static std::pair -getShuffleAndOtherOprd(Value *L, Value *R) { - ShuffleVectorInst *S = nullptr; - - if ((S = dyn_cast(L))) - return std::make_pair(R, S); - - S = dyn_cast(R); - return std::make_pair(L, S); -} - -static ReductionKind -matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, - unsigned &Opcode, Type *&Ty) { - if (!EnableReduxCost) - return RK_None; - - // Need to extract the first element. - ConstantInt *CI = dyn_cast(ReduxRoot->getOperand(1)); - unsigned Idx = ~0u; - if (CI) - Idx = CI->getZExtValue(); - if (Idx != 0) - return RK_None; - - auto *RdxStart = dyn_cast(ReduxRoot->getOperand(0)); - if (!RdxStart) - return RK_None; - Optional RD = getReductionData(RdxStart); - if (!RD) - return RK_None; - - Type *VecTy = ReduxRoot->getOperand(0)->getType(); - unsigned NumVecElems = VecTy->getVectorNumElements(); - if (!isPowerOf2_32(NumVecElems)) - return RK_None; - - // We look for a sequence of shuffles and adds like the following matching one - // fadd, shuffle vector pair at a time. - // - // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, - // <4 x i32> - // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf - // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, - // <4 x i32> - // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 - // %r = extractelement <4 x float> %bin.rdx8, i32 0 - - unsigned MaskStart = 1; - Instruction *RdxOp = RdxStart; - SmallVector ShuffleMask(NumVecElems, 0); - unsigned NumVecElemsRemain = NumVecElems; - while (NumVecElemsRemain - 1) { - // Check for the right reduction operation. - if (!RdxOp) - return RK_None; - Optional RDLevel = getReductionData(RdxOp); - if (!RDLevel || !RDLevel->hasSameData(*RD)) - return RK_None; - - Value *NextRdxOp; - ShuffleVectorInst *Shuffle; - std::tie(NextRdxOp, Shuffle) = - getShuffleAndOtherOprd(RDLevel->LHS, RDLevel->RHS); - - // Check the current reduction operation and the shuffle use the same value. - if (Shuffle == nullptr) - return RK_None; - if (Shuffle->getOperand(0) != NextRdxOp) - return RK_None; - - // Check that shuffle masks matches. - for (unsigned j = 0; j != MaskStart; ++j) - ShuffleMask[j] = MaskStart + j; - // Fill the rest of the mask with -1 for undef. - std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1); - - SmallVector Mask = Shuffle->getShuffleMask(); - if (ShuffleMask != Mask) - return RK_None; - - RdxOp = dyn_cast(NextRdxOp); - NumVecElemsRemain /= 2; - MaskStart *= 2; - } - - Opcode = RD->Opcode; - Ty = VecTy; - return RD->Kind; -} - -unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { - if (!TTI) - return -1; - - switch (I->getOpcode()) { - case Instruction::GetElementPtr: - return TTI->getUserCost(I); - - case Instruction::Ret: - case Instruction::PHI: - case Instruction::Br: { - return TTI->getCFInstrCost(I->getOpcode()); - } - case Instruction::Add: - case Instruction::FAdd: - case Instruction::Sub: - case Instruction::FSub: - case Instruction::Mul: - case Instruction::FMul: - case Instruction::UDiv: - case Instruction::SDiv: - case Instruction::FDiv: - case Instruction::URem: - case Instruction::SRem: - case Instruction::FRem: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: { - TargetTransformInfo::OperandValueKind Op1VK = - getOperandInfo(I->getOperand(0)); - TargetTransformInfo::OperandValueKind Op2VK = - getOperandInfo(I->getOperand(1)); - SmallVector Operands(I->operand_values()); - return TTI->getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, - Op2VK, TargetTransformInfo::OP_None, - TargetTransformInfo::OP_None, - Operands); - } - case Instruction::Select: { - const SelectInst *SI = cast(I); - Type *CondTy = SI->getCondition()->getType(); - return TTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy, I); - } - case Instruction::ICmp: - case Instruction::FCmp: { - Type *ValTy = I->getOperand(0)->getType(); - return TTI->getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(), I); - } - case Instruction::Store: { - const StoreInst *SI = cast(I); - Type *ValTy = SI->getValueOperand()->getType(); - return TTI->getMemoryOpCost(I->getOpcode(), ValTy, - SI->getAlignment(), - SI->getPointerAddressSpace(), I); - } - case Instruction::Load: { - const LoadInst *LI = cast(I); - return TTI->getMemoryOpCost(I->getOpcode(), I->getType(), - LI->getAlignment(), - LI->getPointerAddressSpace(), I); - } - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::FPExt: - case Instruction::PtrToInt: - case Instruction::IntToPtr: - case Instruction::SIToFP: - case Instruction::UIToFP: - case Instruction::Trunc: - case Instruction::FPTrunc: - case Instruction::BitCast: - case Instruction::AddrSpaceCast: { - Type *SrcTy = I->getOperand(0)->getType(); - return TTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, I); - } - case Instruction::ExtractElement: { - const ExtractElementInst * EEI = cast(I); - ConstantInt *CI = dyn_cast(I->getOperand(1)); - unsigned Idx = -1; - if (CI) - Idx = CI->getZExtValue(); - - // Try to match a reduction sequence (series of shufflevector and vector - // adds followed by a extractelement). - unsigned ReduxOpCode; - Type *ReduxType; - - switch (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) { - case RK_Arithmetic: - return TTI->getArithmeticReductionCost(ReduxOpCode, ReduxType, - /*IsPairwiseForm=*/false); - case RK_MinMax: - return TTI->getMinMaxReductionCost( - ReduxType, CmpInst::makeCmpResultType(ReduxType), - /*IsPairwiseForm=*/false, /*IsUnsigned=*/false); - case RK_UnsignedMinMax: - return TTI->getMinMaxReductionCost( - ReduxType, CmpInst::makeCmpResultType(ReduxType), - /*IsPairwiseForm=*/false, /*IsUnsigned=*/true); - case RK_None: - break; - } - - switch (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) { - case RK_Arithmetic: - return TTI->getArithmeticReductionCost(ReduxOpCode, ReduxType, - /*IsPairwiseForm=*/true); - case RK_MinMax: - return TTI->getMinMaxReductionCost( - ReduxType, CmpInst::makeCmpResultType(ReduxType), - /*IsPairwiseForm=*/true, /*IsUnsigned=*/false); - case RK_UnsignedMinMax: - return TTI->getMinMaxReductionCost( - ReduxType, CmpInst::makeCmpResultType(ReduxType), - /*IsPairwiseForm=*/true, /*IsUnsigned=*/true); - case RK_None: - break; - } - - return TTI->getVectorInstrCost(I->getOpcode(), - EEI->getOperand(0)->getType(), Idx); - } - case Instruction::InsertElement: { - const InsertElementInst * IE = cast(I); - ConstantInt *CI = dyn_cast(IE->getOperand(2)); - unsigned Idx = -1; - if (CI) - Idx = CI->getZExtValue(); - return TTI->getVectorInstrCost(I->getOpcode(), - IE->getType(), Idx); - } - case Instruction::ShuffleVector: { - const ShuffleVectorInst *Shuffle = cast(I); - Type *VecTypOp0 = Shuffle->getOperand(0)->getType(); - unsigned NumVecElems = VecTypOp0->getVectorNumElements(); - SmallVector Mask = Shuffle->getShuffleMask(); - - if (NumVecElems == Mask.size()) { - if (isReverseVectorMask(Mask)) - return TTI->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0, - 0, nullptr); - if (isAlternateVectorMask(Mask)) - return TTI->getShuffleCost(TargetTransformInfo::SK_Alternate, - VecTypOp0, 0, nullptr); - - if (isZeroEltBroadcastVectorMask(Mask)) - return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, - VecTypOp0, 0, nullptr); - - if (isSingleSourceVectorMask(Mask)) - return TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, - VecTypOp0, 0, nullptr); - - return TTI->getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, - VecTypOp0, 0, nullptr); - } - - return -1; - } - case Instruction::Call: - if (const IntrinsicInst *II = dyn_cast(I)) { - SmallVector Args(II->arg_operands()); - - FastMathFlags FMF; - if (auto *FPMO = dyn_cast(II)) - FMF = FPMO->getFastMathFlags(); - - return TTI->getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), - Args, FMF); - } - return -1; - default: - // We don't have any information on this instruction. - return -1; - } -} - void CostModelAnalysis::print(raw_ostream &OS, const Module*) const { if (!F) return; for (BasicBlock &B : *F) { for (Instruction &Inst : B) { - unsigned Cost = getInstructionCost(&Inst); + unsigned Cost = TTI->getInstructionCost(&Inst, CostKind); if (Cost != (unsigned)-1) OS << "Cost Model: Found an estimated cost of " << Cost; else Index: llvm/trunk/lib/Analysis/TargetTransformInfo.cpp =================================================================== --- llvm/trunk/lib/Analysis/TargetTransformInfo.cpp +++ llvm/trunk/lib/Analysis/TargetTransformInfo.cpp @@ -16,11 +16,13 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include using namespace llvm; +using namespace PatternMatch; #define DEBUG_TYPE "tti" @@ -29,6 +31,10 @@ cl::desc("Enables the new wide memcpy loop lowering in Transforms/Utils."), cl::Hidden); +static cl::opt EnableReduxCost("costmodel-reduxcost", cl::init(false), + cl::Hidden, + cl::desc("Recognize reduction patterns.")); + namespace { /// \brief No-op implementation of the TTI interface using the utility base /// classes. @@ -583,6 +589,557 @@ return TTIImpl->shouldExpandReduction(II); } +int TargetTransformInfo::getInstructionLatency(const Instruction *I) const { + return TTIImpl->getInstructionLatency(I); +} + +static bool isReverseVectorMask(ArrayRef Mask) { + for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i) + if (Mask[i] >= 0 && Mask[i] != (int)(MaskSize - 1 - i)) + return false; + return true; +} + +static bool isSingleSourceVectorMask(ArrayRef Mask) { + bool Vec0 = false; + bool Vec1 = false; + for (unsigned i = 0, NumVecElts = Mask.size(); i < NumVecElts; ++i) { + if (Mask[i] >= 0) { + if ((unsigned)Mask[i] >= NumVecElts) + Vec1 = true; + else + Vec0 = true; + } + } + return !(Vec0 && Vec1); +} + +static bool isZeroEltBroadcastVectorMask(ArrayRef Mask) { + for (unsigned i = 0; i < Mask.size(); ++i) + if (Mask[i] > 0) + return false; + return true; +} + +static bool isAlternateVectorMask(ArrayRef Mask) { + bool isAlternate = true; + unsigned MaskSize = Mask.size(); + + // Example: shufflevector A, B, <0,5,2,7> + for (unsigned i = 0; i < MaskSize && isAlternate; ++i) { + if (Mask[i] < 0) + continue; + isAlternate = Mask[i] == (int)((i & 1) ? MaskSize + i : i); + } + + if (isAlternate) + return true; + + isAlternate = true; + // Example: shufflevector A, B, <4,1,6,3> + for (unsigned i = 0; i < MaskSize && isAlternate; ++i) { + if (Mask[i] < 0) + continue; + isAlternate = Mask[i] == (int)((i & 1) ? i : MaskSize + i); + } + + return isAlternate; +} + +static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) { + TargetTransformInfo::OperandValueKind OpInfo = + TargetTransformInfo::OK_AnyValue; + + // Check for a splat of a constant or for a non uniform vector of constants. + if (isa(V) || isa(V)) { + OpInfo = TargetTransformInfo::OK_NonUniformConstantValue; + if (cast(V)->getSplatValue() != nullptr) + OpInfo = TargetTransformInfo::OK_UniformConstantValue; + } + + // Check for a splat of a uniform value. This is not loop aware, so return + // true only for the obviously uniform cases (argument, globalvalue) + const Value *Splat = getSplatValue(V); + if (Splat && (isa(Splat) || isa(Splat))) + OpInfo = TargetTransformInfo::OK_UniformValue; + + return OpInfo; +} + +static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft, + unsigned Level) { + // We don't need a shuffle if we just want to have element 0 in position 0 of + // the vector. + if (!SI && Level == 0 && IsLeft) + return true; + else if (!SI) + return false; + + SmallVector Mask(SI->getType()->getVectorNumElements(), -1); + + // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether + // we look at the left or right side. + for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2) + Mask[i] = val; + + SmallVector ActualMask = SI->getShuffleMask(); + return Mask == ActualMask; +} + +namespace { +/// Kind of the reduction data. +enum ReductionKind { + RK_None, /// Not a reduction. + RK_Arithmetic, /// Binary reduction data. + RK_MinMax, /// Min/max reduction data. + RK_UnsignedMinMax, /// Unsigned min/max reduction data. +}; +/// Contains opcode + LHS/RHS parts of the reduction operations. +struct ReductionData { + ReductionData() = delete; + ReductionData(ReductionKind Kind, unsigned Opcode, Value *LHS, Value *RHS) + : Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind) { + assert(Kind != RK_None && "expected binary or min/max reduction only."); + } + unsigned Opcode = 0; + Value *LHS = nullptr; + Value *RHS = nullptr; + ReductionKind Kind = RK_None; + bool hasSameData(ReductionData &RD) const { + return Kind == RD.Kind && Opcode == RD.Opcode; + } +}; +} // namespace + +static Optional getReductionData(Instruction *I) { + Value *L, *R; + if (m_BinOp(m_Value(L), m_Value(R)).match(I)) + return ReductionData(RK_Arithmetic, I->getOpcode(), L, R); + if (auto *SI = dyn_cast(I)) { + if (m_SMin(m_Value(L), m_Value(R)).match(SI) || + m_SMax(m_Value(L), m_Value(R)).match(SI) || + m_OrdFMin(m_Value(L), m_Value(R)).match(SI) || + m_OrdFMax(m_Value(L), m_Value(R)).match(SI) || + m_UnordFMin(m_Value(L), m_Value(R)).match(SI) || + m_UnordFMax(m_Value(L), m_Value(R)).match(SI)) { + auto *CI = cast(SI->getCondition()); + return ReductionData(RK_MinMax, CI->getOpcode(), L, R); + } + if (m_UMin(m_Value(L), m_Value(R)).match(SI) || + m_UMax(m_Value(L), m_Value(R)).match(SI)) { + auto *CI = cast(SI->getCondition()); + return ReductionData(RK_UnsignedMinMax, CI->getOpcode(), L, R); + } + } + return llvm::None; +} + +static ReductionKind matchPairwiseReductionAtLevel(Instruction *I, + unsigned Level, + unsigned NumLevels) { + // Match one level of pairwise operations. + // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, + // <4 x i32> + // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, + // <4 x i32> + // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 + if (!I) + return RK_None; + + assert(I->getType()->isVectorTy() && "Expecting a vector type"); + + Optional RD = getReductionData(I); + if (!RD) + return RK_None; + + ShuffleVectorInst *LS = dyn_cast(RD->LHS); + if (!LS && Level) + return RK_None; + ShuffleVectorInst *RS = dyn_cast(RD->RHS); + if (!RS && Level) + return RK_None; + + // On level 0 we can omit one shufflevector instruction. + if (!Level && !RS && !LS) + return RK_None; + + // Shuffle inputs must match. + Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr; + Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr; + Value *NextLevelOp = nullptr; + if (NextLevelOpR && NextLevelOpL) { + // If we have two shuffles their operands must match. + if (NextLevelOpL != NextLevelOpR) + return RK_None; + + NextLevelOp = NextLevelOpL; + } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) { + // On the first level we can omit the shufflevector <0, undef,...>. So the + // input to the other shufflevector <1, undef> must match with one of the + // inputs to the current binary operation. + // Example: + // %NextLevelOpL = shufflevector %R, <1, undef ...> + // %BinOp = fadd %NextLevelOpL, %R + if (NextLevelOpL && NextLevelOpL != RD->RHS) + return RK_None; + else if (NextLevelOpR && NextLevelOpR != RD->LHS) + return RK_None; + + NextLevelOp = NextLevelOpL ? RD->RHS : RD->LHS; + } else + return RK_None; + + // Check that the next levels binary operation exists and matches with the + // current one. + if (Level + 1 != NumLevels) { + Optional NextLevelRD = + getReductionData(cast(NextLevelOp)); + if (!NextLevelRD || !RD->hasSameData(*NextLevelRD)) + return RK_None; + } + + // Shuffle mask for pairwise operation must match. + if (matchPairwiseShuffleMask(LS, /*IsLeft=*/true, Level)) { + if (!matchPairwiseShuffleMask(RS, /*IsLeft=*/false, Level)) + return RK_None; + } else if (matchPairwiseShuffleMask(RS, /*IsLeft=*/true, Level)) { + if (!matchPairwiseShuffleMask(LS, /*IsLeft=*/false, Level)) + return RK_None; + } else { + return RK_None; + } + + if (++Level == NumLevels) + return RD->Kind; + + // Match next level. + return matchPairwiseReductionAtLevel(cast(NextLevelOp), Level, + NumLevels); +} + +static ReductionKind matchPairwiseReduction(const ExtractElementInst *ReduxRoot, + unsigned &Opcode, Type *&Ty) { + if (!EnableReduxCost) + return RK_None; + + // Need to extract the first element. + ConstantInt *CI = dyn_cast(ReduxRoot->getOperand(1)); + unsigned Idx = ~0u; + if (CI) + Idx = CI->getZExtValue(); + if (Idx != 0) + return RK_None; + + auto *RdxStart = dyn_cast(ReduxRoot->getOperand(0)); + if (!RdxStart) + return RK_None; + Optional RD = getReductionData(RdxStart); + if (!RD) + return RK_None; + + Type *VecTy = RdxStart->getType(); + unsigned NumVecElems = VecTy->getVectorNumElements(); + if (!isPowerOf2_32(NumVecElems)) + return RK_None; + + // We look for a sequence of shuffle,shuffle,add triples like the following + // that builds a pairwise reduction tree. + // + // (X0, X1, X2, X3) + // (X0 + X1, X2 + X3, undef, undef) + // ((X0 + X1) + (X2 + X3), undef, undef, undef) + // + // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, + // <4 x i32> + // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, + // <4 x i32> + // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 + // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, + // <4 x i32> + // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, + // <4 x i32> + // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 + // %r = extractelement <4 x float> %bin.rdx8, i32 0 + if (matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems)) == + RK_None) + return RK_None; + + Opcode = RD->Opcode; + Ty = VecTy; + + return RD->Kind; +} + +static std::pair +getShuffleAndOtherOprd(Value *L, Value *R) { + ShuffleVectorInst *S = nullptr; + + if ((S = dyn_cast(L))) + return std::make_pair(R, S); + + S = dyn_cast(R); + return std::make_pair(L, S); +} + +static ReductionKind +matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, + unsigned &Opcode, Type *&Ty) { + if (!EnableReduxCost) + return RK_None; + + // Need to extract the first element. + ConstantInt *CI = dyn_cast(ReduxRoot->getOperand(1)); + unsigned Idx = ~0u; + if (CI) + Idx = CI->getZExtValue(); + if (Idx != 0) + return RK_None; + + auto *RdxStart = dyn_cast(ReduxRoot->getOperand(0)); + if (!RdxStart) + return RK_None; + Optional RD = getReductionData(RdxStart); + if (!RD) + return RK_None; + + Type *VecTy = ReduxRoot->getOperand(0)->getType(); + unsigned NumVecElems = VecTy->getVectorNumElements(); + if (!isPowerOf2_32(NumVecElems)) + return RK_None; + + // We look for a sequence of shuffles and adds like the following matching one + // fadd, shuffle vector pair at a time. + // + // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, + // <4 x i32> + // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf + // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, + // <4 x i32> + // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 + // %r = extractelement <4 x float> %bin.rdx8, i32 0 + + unsigned MaskStart = 1; + Instruction *RdxOp = RdxStart; + SmallVector ShuffleMask(NumVecElems, 0); + unsigned NumVecElemsRemain = NumVecElems; + while (NumVecElemsRemain - 1) { + // Check for the right reduction operation. + if (!RdxOp) + return RK_None; + Optional RDLevel = getReductionData(RdxOp); + if (!RDLevel || !RDLevel->hasSameData(*RD)) + return RK_None; + + Value *NextRdxOp; + ShuffleVectorInst *Shuffle; + std::tie(NextRdxOp, Shuffle) = + getShuffleAndOtherOprd(RDLevel->LHS, RDLevel->RHS); + + // Check the current reduction operation and the shuffle use the same value. + if (Shuffle == nullptr) + return RK_None; + if (Shuffle->getOperand(0) != NextRdxOp) + return RK_None; + + // Check that shuffle masks matches. + for (unsigned j = 0; j != MaskStart; ++j) + ShuffleMask[j] = MaskStart + j; + // Fill the rest of the mask with -1 for undef. + std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1); + + SmallVector Mask = Shuffle->getShuffleMask(); + if (ShuffleMask != Mask) + return RK_None; + + RdxOp = dyn_cast(NextRdxOp); + NumVecElemsRemain /= 2; + MaskStart *= 2; + } + + Opcode = RD->Opcode; + Ty = VecTy; + return RD->Kind; +} + +int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { + switch (I->getOpcode()) { + case Instruction::GetElementPtr: + return getUserCost(I); + + case Instruction::Ret: + case Instruction::PHI: + case Instruction::Br: { + return getCFInstrCost(I->getOpcode()); + } + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: { + TargetTransformInfo::OperandValueKind Op1VK = + getOperandInfo(I->getOperand(0)); + TargetTransformInfo::OperandValueKind Op2VK = + getOperandInfo(I->getOperand(1)); + SmallVector Operands(I->operand_values()); + return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, + Op2VK, TargetTransformInfo::OP_None, + TargetTransformInfo::OP_None, + Operands); + } + case Instruction::Select: { + const SelectInst *SI = cast(I); + Type *CondTy = SI->getCondition()->getType(); + return getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy, I); + } + case Instruction::ICmp: + case Instruction::FCmp: { + Type *ValTy = I->getOperand(0)->getType(); + return getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(), I); + } + case Instruction::Store: { + const StoreInst *SI = cast(I); + Type *ValTy = SI->getValueOperand()->getType(); + return getMemoryOpCost(I->getOpcode(), ValTy, + SI->getAlignment(), + SI->getPointerAddressSpace(), I); + } + case Instruction::Load: { + const LoadInst *LI = cast(I); + return getMemoryOpCost(I->getOpcode(), I->getType(), + LI->getAlignment(), + LI->getPointerAddressSpace(), I); + } + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::SIToFP: + case Instruction::UIToFP: + case Instruction::Trunc: + case Instruction::FPTrunc: + case Instruction::BitCast: + case Instruction::AddrSpaceCast: { + Type *SrcTy = I->getOperand(0)->getType(); + return getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, I); + } + case Instruction::ExtractElement: { + const ExtractElementInst * EEI = cast(I); + ConstantInt *CI = dyn_cast(I->getOperand(1)); + unsigned Idx = -1; + if (CI) + Idx = CI->getZExtValue(); + + // Try to match a reduction sequence (series of shufflevector and vector + // adds followed by a extractelement). + unsigned ReduxOpCode; + Type *ReduxType; + + switch (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) { + case RK_Arithmetic: + return getArithmeticReductionCost(ReduxOpCode, ReduxType, + /*IsPairwiseForm=*/false); + case RK_MinMax: + return getMinMaxReductionCost( + ReduxType, CmpInst::makeCmpResultType(ReduxType), + /*IsPairwiseForm=*/false, /*IsUnsigned=*/false); + case RK_UnsignedMinMax: + return getMinMaxReductionCost( + ReduxType, CmpInst::makeCmpResultType(ReduxType), + /*IsPairwiseForm=*/false, /*IsUnsigned=*/true); + case RK_None: + break; + } + + switch (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) { + case RK_Arithmetic: + return getArithmeticReductionCost(ReduxOpCode, ReduxType, + /*IsPairwiseForm=*/true); + case RK_MinMax: + return getMinMaxReductionCost( + ReduxType, CmpInst::makeCmpResultType(ReduxType), + /*IsPairwiseForm=*/true, /*IsUnsigned=*/false); + case RK_UnsignedMinMax: + return getMinMaxReductionCost( + ReduxType, CmpInst::makeCmpResultType(ReduxType), + /*IsPairwiseForm=*/true, /*IsUnsigned=*/true); + case RK_None: + break; + } + + return getVectorInstrCost(I->getOpcode(), + EEI->getOperand(0)->getType(), Idx); + } + case Instruction::InsertElement: { + const InsertElementInst * IE = cast(I); + ConstantInt *CI = dyn_cast(IE->getOperand(2)); + unsigned Idx = -1; + if (CI) + Idx = CI->getZExtValue(); + return getVectorInstrCost(I->getOpcode(), + IE->getType(), Idx); + } + case Instruction::ShuffleVector: { + const ShuffleVectorInst *Shuffle = cast(I); + Type *VecTypOp0 = Shuffle->getOperand(0)->getType(); + unsigned NumVecElems = VecTypOp0->getVectorNumElements(); + SmallVector Mask = Shuffle->getShuffleMask(); + + if (NumVecElems == Mask.size()) { + if (isReverseVectorMask(Mask)) + return getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0, + 0, nullptr); + if (isAlternateVectorMask(Mask)) + return getShuffleCost(TargetTransformInfo::SK_Alternate, + VecTypOp0, 0, nullptr); + + if (isZeroEltBroadcastVectorMask(Mask)) + return getShuffleCost(TargetTransformInfo::SK_Broadcast, + VecTypOp0, 0, nullptr); + + if (isSingleSourceVectorMask(Mask)) + return getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, + VecTypOp0, 0, nullptr); + + return getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, + VecTypOp0, 0, nullptr); + } + + return -1; + } + case Instruction::Call: + if (const IntrinsicInst *II = dyn_cast(I)) { + SmallVector Args(II->arg_operands()); + + FastMathFlags FMF; + if (auto *FPMO = dyn_cast(II)) + FMF = FPMO->getFastMathFlags(); + + return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), + Args, FMF); + } + return -1; + default: + // We don't have any information on this instruction. + return -1; + } +} + TargetTransformInfo::Concept::~Concept() {} TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {} Index: llvm/trunk/test/Analysis/CostModel/X86/costmodel.ll =================================================================== --- llvm/trunk/test/Analysis/CostModel/X86/costmodel.ll +++ llvm/trunk/test/Analysis/CostModel/X86/costmodel.ll @@ -0,0 +1,19 @@ +; RUN: opt < %s -cost-model -cost-kind=latency -analyze -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s --check-prefix=LATENCY +; RUN: opt < %s -cost-model -cost-kind=code-size -analyze -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s --check-prefix=CODESIZE + +; Tests if the interface TargetTransformInfo::getInstructionCost() works correctly. + +define i64 @foo(i64 %arg) { + + ; LATENCY: cost of 1 {{.*}} %I64 = add + ; CODESIZE: cost of 1 {{.*}} %I64 = add + %I64 = add i64 undef, undef + + ; LATENCY: cost of 4 {{.*}} load + ; CODESIZE: cost of 1 {{.*}} load + load i64, i64* undef, align 4 + + ; LATENCY: cost of 1 {{.*}} ret + ; CODESIZE: cost of 1 {{.*}} ret + ret i64 undef +}