Index: llvm/include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfo.h +++ llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -38,6 +38,7 @@ class BlockFrequencyInfo; class DominatorTree; class BranchInst; +class ExtractElementInst; class Function; class GlobalValue; class IntrinsicInst; @@ -48,6 +49,7 @@ class ProfileSummaryInfo; class SCEV; class ScalarEvolution; +class ShuffleVectorInst; class StoreInst; class SwitchInst; class TargetLibraryInfo; @@ -160,28 +162,6 @@ TCK_SizeAndLatency ///< The weighted sum of size and latency. }; - /// Query the cost of a specified instruction. - /// - /// Clients should use this interface to query the cost of an existing - /// instruction. The instruction must have a valid parent (basic block). - /// - /// Note, this method does not cache the cost calculation and it - /// can be expensive in some cases. - int getInstructionCost(const Instruction *I, enum TargetCostKind kind) const { - switch (kind) { - case TCK_RecipThroughput: - return getInstructionThroughput(I); - - case TCK_Latency: - return getInstructionLatency(I); - - case TCK_CodeSize: - case TCK_SizeAndLatency: - return getUserCost(I, kind); - } - llvm_unreachable("Unknown instruction cost kind"); - } - /// Underlying constants for 'cost' values in this interface. /// /// Many APIs in this interface return a cost. This enum defines the @@ -362,6 +342,39 @@ unsigned ScaleCost; }; + /// Kind of the reduction data. + enum ReductionKind { + RK_None, /// Not a reduction. + RK_Arithmetic, /// Binary reduction data. + RK_MinMax, /// Min/max reduction data. + RK_UnsignedMinMax, /// Unsigned min/max reduction data. + }; + + /// Contains opcode + LHS/RHS parts of the reduction operations. + struct ReductionData { + ReductionData() = delete; + ReductionData(ReductionKind Kind, unsigned Opcode, Value *LHS, Value *RHS) + : Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind) { + assert(Kind != RK_None && "expected binary or min/max reduction only."); + } + unsigned Opcode = 0; + Value *LHS = nullptr; + Value *RHS = nullptr; + ReductionKind Kind = RK_None; + bool hasSameData(ReductionData &RD) const { + return Kind == RD.Kind && Opcode == RD.Opcode; + } + }; + + static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, + bool IsLeft, unsigned Level); + + static ReductionKind matchPairwiseReduction( + const ExtractElementInst *ReduxRoot, unsigned &Opcode, VectorType *&Ty); + + static ReductionKind matchVectorSplittingReduction( + const ExtractElementInst *ReduxRoot, unsigned &Opcode, VectorType *&Ty); + /// Parameters that control the generic loop unrolling transformation. struct UnrollingPreferences { /// The cost threshold for the unrolled loop. Should be relative to the @@ -1163,10 +1176,6 @@ /// Returns 1 as the default value. int getInstructionLatency(const Instruction *I) const; - /// Returns the expected throughput cost of the instruction. - /// Returns -1 if the cost is unknown. - int getInstructionThroughput(const Instruction *I) const; - /// The abstract base class used to type erase specific TTI /// implementations. class Concept; Index: llvm/include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -19,6 +19,8 @@ #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" @@ -817,10 +819,27 @@ CostKind); } - unsigned getUserCost(const User *U, ArrayRef Operands, + int getUserCost(const User *U, ArrayRef Operands, TTI::TargetCostKind CostKind) { + if (CostKind == TTI::TCK_Latency) + return getInstructionLatency(cast(U)); + auto *TargetTTI = static_cast(this); + if (isa(U) && CostKind == TTI::TCK_RecipThroughput) { + if (const IntrinsicInst *II = dyn_cast(U)) { + SmallVector Args(II->arg_operands()); + + FastMathFlags FMF; + if (auto *FPMO = dyn_cast(II)) + FMF = FPMO->getFastMathFlags(); + + return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), Args, + FMF, 1, CostKind, II); + } + return -1; + } + // FIXME: Unlikely to be true for anything but CodeSize. if (const auto *CB = dyn_cast(U)) { const Function *F = CB->getCalledFunction(); @@ -845,10 +864,16 @@ U->getNumOperands() == 1 ? U->getOperand(0)->getType() : nullptr; unsigned Opcode = Operator::getOpcode(U); auto *I = dyn_cast(U); + int Cost = CostKind == TTI::TCK_RecipThroughput ? -1 : 0; + switch (Opcode) { default: + Cost = TTI::TCC_Basic; break; + case Instruction::Br: + case Instruction::Ret: case Instruction::PHI: + return TargetTTI->getCFInstrCost(Opcode, CostKind); case Instruction::ExtractValue: case Instruction::Freeze: return TTI::TCC_Free; @@ -862,34 +887,201 @@ GEP->getPointerOperand(), Operands.drop_front()); } - case Instruction::FDiv: - case Instruction::FRem: - case Instruction::SDiv: - case Instruction::SRem: + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: case Instruction::URem: - // FIXME: Unlikely to be true for CodeSize. - return TTI::TCC_Expensive; + case Instruction::SRem: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: { + TTI::OperandValueKind Op1VK, Op2VK; + TTI::OperandValueProperties Op1VP, Op2VP; + Op1VK = TTI::getOperandInfo(U->getOperand(0), Op1VP); + Op2VK = TTI::getOperandInfo(U->getOperand(1), Op2VP); + SmallVector Operands(U->operand_values()); + Cost = TargetTTI->getArithmeticInstrCost(Opcode, Ty, CostKind, + Op1VK, Op2VK, + Op1VP, Op2VP, Operands, I); + break; + } + case Instruction::FNeg: { + TTI::OperandValueKind Op1VK, Op2VK; + TTI::OperandValueProperties Op1VP, Op2VP; + Op1VK = TTI::getOperandInfo(I->getOperand(0), Op1VP); + Op2VK = TTI::OK_AnyValue; + Op2VP = TTI::OP_None; + SmallVector Operands(I->operand_values()); + Cost = TargetTTI->getArithmeticInstrCost(Opcode, Ty, CostKind, + Op1VK, Op2VK, + Op1VP, Op2VP, Operands, I); + break; + } + case Instruction::Select: + Cost = TargetTTI->getCmpSelInstrCost(Opcode, Ty, U->getOperand(0)->getType(), + CostKind, I); + break; + case Instruction::ICmp: + case Instruction::FCmp: + Cost = TargetTTI->getCmpSelInstrCost(Opcode, U->getOperand(0)->getType(), + Ty, CostKind, I); + break; + case Instruction::Store: { + const StoreInst *SI = cast(I); + Type *ValTy = SI->getValueOperand()->getType(); + Cost = TargetTTI->getMemoryOpCost(I->getOpcode(), ValTy, + MaybeAlign(SI->getAlignment()), + SI->getPointerAddressSpace(), CostKind, I); + break; + } + case Instruction::Load: { + const LoadInst *LI = cast(I); + Cost = TargetTTI->getMemoryOpCost(Opcode, Ty, + MaybeAlign(LI->getAlignment()), + LI->getPointerAddressSpace(), CostKind, I); + break; + } + case Instruction::FPToUI: + case Instruction::FPToSI: case Instruction::IntToPtr: case Instruction::PtrToInt: case Instruction::Trunc: + case Instruction::FPTrunc: case Instruction::BitCast: case Instruction::FPExt: case Instruction::SExt: case Instruction::ZExt: - if (TargetTTI->getCastInstrCost(Opcode, Ty, OpTy, CostKind, I) == - TTI::TCC_Free) - return TTI::TCC_Free; + case Instruction::AddrSpaceCast: + Cost = TargetTTI->getCastInstrCost(Opcode, Ty, OpTy, CostKind, I); + break; + case Instruction::ExtractElement: { + auto IsVectorSplittingReduction = [&](const ExtractElementInst *EEI) { + unsigned ReduxOpCode; + VectorType *ReduxType; + switch (TTI::matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) { + case TTI::RK_Arithmetic: + Cost = TargetTTI->getArithmeticReductionCost(ReduxOpCode, ReduxType, + /*IsPairwiseForm=*/false, + CostKind); + return true; + case TTI::RK_MinMax: + Cost = TargetTTI->getMinMaxReductionCost( + ReduxType, cast(CmpInst::makeCmpResultType(ReduxType)), + /*IsPairwiseForm=*/false, /*IsUnsigned=*/false, CostKind); + return true; + case TTI::RK_UnsignedMinMax: + Cost = TargetTTI->getMinMaxReductionCost( + ReduxType, cast(CmpInst::makeCmpResultType(ReduxType)), + /*IsPairwiseForm=*/false, /*IsUnsigned=*/true, CostKind); + return true; + case TTI::RK_None: + break; + } + return false; + }; + + auto IsPairWiseReduction = [&](const ExtractElementInst *EEI) { + unsigned ReduxOpCode; + VectorType *ReduxType; + switch (TTI::matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) { + case TTI::RK_Arithmetic: + Cost = TargetTTI->getArithmeticReductionCost(ReduxOpCode, ReduxType, + /*IsPairwiseForm=*/true, CostKind); + return true; + case TTI::RK_MinMax: + Cost = TargetTTI->getMinMaxReductionCost( + ReduxType, cast(CmpInst::makeCmpResultType(ReduxType)), + /*IsPairwiseForm=*/true, /*IsUnsigned=*/false, CostKind); + return true; + case TTI::RK_UnsignedMinMax: + Cost = TargetTTI->getMinMaxReductionCost( + ReduxType, cast(CmpInst::makeCmpResultType(ReduxType)), + /*IsPairwiseForm=*/true, /*IsUnsigned=*/true, CostKind); + return true; + case TTI::RK_None: + break; + } + return false; + }; + + const ExtractElementInst *EEI = cast(I); + ConstantInt *CI = dyn_cast(I->getOperand(1)); + unsigned Idx = -1; + if (CI) + Idx = CI->getZExtValue(); + + // Try to match a reduction sequence (series of shufflevector and vector + // adds followed by a extractelement). + if (IsVectorSplittingReduction(EEI)) + break; + else if (IsPairWiseReduction(EEI)) + break; + + Cost = TargetTTI->getVectorInstrCost(I->getOpcode(), + EEI->getOperand(0)->getType(), Idx); break; } - // By default, just classify everything as 'basic'. - return TTI::TCC_Basic; + case Instruction::InsertElement: { + const InsertElementInst *IE = cast(I); + ConstantInt *CI = dyn_cast(IE->getOperand(2)); + unsigned Idx = -1; + if (CI) + Idx = CI->getZExtValue(); + Cost = TargetTTI->getVectorInstrCost(I->getOpcode(), IE->getType(), Idx); + break; + } + case Instruction::ShuffleVector: { + const ShuffleVectorInst *Shuffle = cast(I); + auto *Ty = cast(Shuffle->getType()); + auto *SrcTy = cast(Shuffle->getOperand(0)->getType()); + + // TODO: Identify and add costs for insert subvector, etc. + int SubIndex; + if (Shuffle->isExtractSubvectorMask(SubIndex)) + return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, SrcTy, SubIndex, Ty); + + if (Shuffle->changesLength()) + return CostKind == TTI::TCK_RecipThroughput ? -1 : 1; + + if (Shuffle->isIdentity()) + return 0; + + if (Shuffle->isReverse()) + Cost = TargetTTI->getShuffleCost(TTI::SK_Reverse, Ty, 0, nullptr); + else if (Shuffle->isSelect()) + Cost = TargetTTI->getShuffleCost(TTI::SK_Select, Ty, 0, nullptr); + else if (Shuffle->isTranspose()) + Cost = TargetTTI->getShuffleCost(TTI::SK_Transpose, Ty, 0, nullptr); + else if (Shuffle->isZeroEltSplat()) + Cost = TargetTTI->getShuffleCost(TTI::SK_Broadcast, Ty, 0, nullptr); + else if (Shuffle->isSingleSource()) + Cost = TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, 0, nullptr); + else + Cost = TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, Ty, 0, nullptr); + } + } + + if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency) { + // By default, just classify everything as 'basic'. + return Cost == 0 ? Cost : TTI::TCC_Basic; + } + return Cost; } int getInstructionLatency(const Instruction *I) { SmallVector Operands(I->value_op_begin(), I->value_op_end()); - if (getUserCost(I, Operands, TTI::TCK_Latency) == TTI::TCC_Free) + if (getUserCost(I, Operands, TTI::TCK_SizeAndLatency) == TTI::TCC_Free) return 0; if (isa(I)) Index: llvm/lib/Analysis/CostModel.cpp =================================================================== --- llvm/lib/Analysis/CostModel.cpp +++ llvm/lib/Analysis/CostModel.cpp @@ -54,8 +54,8 @@ /// Returns -1 if the cost is unknown. /// Note, this method does not cache the cost calculation and it /// can be expensive in some cases. - unsigned getInstructionCost(const Instruction *I) const { - return TTI->getInstructionCost(I, TargetTransformInfo::TCK_RecipThroughput); + unsigned getUserCost(const Instruction *I) const { + return TTI->getUserCost(I, TargetTransformInfo::TCK_RecipThroughput); } private: @@ -100,7 +100,7 @@ for (BasicBlock &B : *F) { for (Instruction &Inst : B) { - unsigned Cost = TTI->getInstructionCost(&Inst, CostKind); + unsigned Cost = TTI->getUserCost(&Inst, CostKind); if (Cost != (unsigned)-1) OS << "Cost Model: Found an estimated cost of " << Cost; else Index: llvm/lib/Analysis/TargetTransformInfo.cpp =================================================================== --- llvm/lib/Analysis/TargetTransformInfo.cpp +++ llvm/lib/Analysis/TargetTransformInfo.cpp @@ -164,7 +164,8 @@ const User *U, TTI::TargetCostKind CostKind) const { int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments, U, CostKind); - assert(Cost >= 0 && "TTI should not produce negative costs!"); + assert((Cost >= 0 || CostKind == TTI::TCK_RecipThroughput) && + "TTI should not produce negative costs!"); return Cost; } @@ -178,7 +179,8 @@ ArrayRef Operands, enum TargetCostKind CostKind) const { int Cost = TTIImpl->getUserCost(U, Operands, CostKind); - assert(Cost >= 0 && "TTI should not produce negative costs!"); + assert((Cost >= 0 || CostKind == TTI::TCK_RecipThroughput) && + "TTI should not produce negative costs!"); return Cost; } @@ -873,8 +875,9 @@ return TTIImpl->getInstructionLatency(I); } -static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft, - unsigned Level) { +bool +TargetTransformInfo::matchPairwiseShuffleMask(ShuffleVectorInst *SI, + bool IsLeft, unsigned Level) { // We don't need a shuffle if we just want to have element 0 in position 0 of // the vector. if (!SI && Level == 0 && IsLeft) @@ -893,35 +896,10 @@ return Mask == ActualMask; } -namespace { -/// Kind of the reduction data. -enum ReductionKind { - RK_None, /// Not a reduction. - RK_Arithmetic, /// Binary reduction data. - RK_MinMax, /// Min/max reduction data. - RK_UnsignedMinMax, /// Unsigned min/max reduction data. -}; -/// Contains opcode + LHS/RHS parts of the reduction operations. -struct ReductionData { - ReductionData() = delete; - ReductionData(ReductionKind Kind, unsigned Opcode, Value *LHS, Value *RHS) - : Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind) { - assert(Kind != RK_None && "expected binary or min/max reduction only."); - } - unsigned Opcode = 0; - Value *LHS = nullptr; - Value *RHS = nullptr; - ReductionKind Kind = RK_None; - bool hasSameData(ReductionData &RD) const { - return Kind == RD.Kind && Opcode == RD.Opcode; - } -}; -} // namespace - -static Optional getReductionData(Instruction *I) { +static Optional getReductionData(Instruction *I) { Value *L, *R; if (m_BinOp(m_Value(L), m_Value(R)).match(I)) - return ReductionData(RK_Arithmetic, I->getOpcode(), L, R); + return TTI::ReductionData(TTI::RK_Arithmetic, I->getOpcode(), L, R); if (auto *SI = dyn_cast(I)) { if (m_SMin(m_Value(L), m_Value(R)).match(SI) || m_SMax(m_Value(L), m_Value(R)).match(SI) || @@ -930,20 +908,20 @@ m_UnordFMin(m_Value(L), m_Value(R)).match(SI) || m_UnordFMax(m_Value(L), m_Value(R)).match(SI)) { auto *CI = cast(SI->getCondition()); - return ReductionData(RK_MinMax, CI->getOpcode(), L, R); + return TTI::ReductionData(TTI::RK_MinMax, CI->getOpcode(), L, R); } if (m_UMin(m_Value(L), m_Value(R)).match(SI) || m_UMax(m_Value(L), m_Value(R)).match(SI)) { auto *CI = cast(SI->getCondition()); - return ReductionData(RK_UnsignedMinMax, CI->getOpcode(), L, R); + return TTI::ReductionData(TTI::RK_UnsignedMinMax, CI->getOpcode(), L, R); } } return llvm::None; } -static ReductionKind matchPairwiseReductionAtLevel(Instruction *I, - unsigned Level, - unsigned NumLevels) { +static TTI::ReductionKind matchPairwiseReductionAtLevel(Instruction *I, + unsigned Level, + unsigned NumLevels) { // Match one level of pairwise operations. // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, // <4 x i32> @@ -951,24 +929,24 @@ // <4 x i32> // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 if (!I) - return RK_None; + return TTI::RK_None; assert(I->getType()->isVectorTy() && "Expecting a vector type"); - Optional RD = getReductionData(I); + Optional RD = getReductionData(I); if (!RD) - return RK_None; + return TTI::RK_None; ShuffleVectorInst *LS = dyn_cast(RD->LHS); if (!LS && Level) - return RK_None; + return TTI::RK_None; ShuffleVectorInst *RS = dyn_cast(RD->RHS); if (!RS && Level) - return RK_None; + return TTI::RK_None; // On level 0 we can omit one shufflevector instruction. if (!Level && !RS && !LS) - return RK_None; + return TTI::RK_None; // Shuffle inputs must match. Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr; @@ -977,7 +955,7 @@ if (NextLevelOpR && NextLevelOpL) { // If we have two shuffles their operands must match. if (NextLevelOpL != NextLevelOpR) - return RK_None; + return TTI::RK_None; NextLevelOp = NextLevelOpL; } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) { @@ -988,32 +966,32 @@ // %NextLevelOpL = shufflevector %R, <1, undef ...> // %BinOp = fadd %NextLevelOpL, %R if (NextLevelOpL && NextLevelOpL != RD->RHS) - return RK_None; + return TTI::RK_None; else if (NextLevelOpR && NextLevelOpR != RD->LHS) - return RK_None; + return TTI::RK_None; NextLevelOp = NextLevelOpL ? RD->RHS : RD->LHS; } else - return RK_None; + return TTI::RK_None; // Check that the next levels binary operation exists and matches with the // current one. if (Level + 1 != NumLevels) { - Optional NextLevelRD = + Optional NextLevelRD = getReductionData(cast(NextLevelOp)); if (!NextLevelRD || !RD->hasSameData(*NextLevelRD)) - return RK_None; + return TTI::RK_None; } // Shuffle mask for pairwise operation must match. - if (matchPairwiseShuffleMask(LS, /*IsLeft=*/true, Level)) { - if (!matchPairwiseShuffleMask(RS, /*IsLeft=*/false, Level)) - return RK_None; - } else if (matchPairwiseShuffleMask(RS, /*IsLeft=*/true, Level)) { - if (!matchPairwiseShuffleMask(LS, /*IsLeft=*/false, Level)) - return RK_None; + if (TTI::matchPairwiseShuffleMask(LS, /*IsLeft=*/true, Level)) { + if (!TTI::matchPairwiseShuffleMask(RS, /*IsLeft=*/false, Level)) + return TTI::RK_None; + } else if (TTI::matchPairwiseShuffleMask(RS, /*IsLeft=*/true, Level)) { + if (!TTI::matchPairwiseShuffleMask(LS, /*IsLeft=*/false, Level)) + return TTI::RK_None; } else { - return RK_None; + return TTI::RK_None; } if (++Level == NumLevels) @@ -1024,11 +1002,12 @@ NumLevels); } -static ReductionKind matchPairwiseReduction(const ExtractElementInst *ReduxRoot, +TTI::ReductionKind +TargetTransformInfo::matchPairwiseReduction(const ExtractElementInst *ReduxRoot, unsigned &Opcode, VectorType *&Ty) { if (!EnableReduxCost) - return RK_None; + return TTI::RK_None; // Need to extract the first element. ConstantInt *CI = dyn_cast(ReduxRoot->getOperand(1)); @@ -1036,19 +1015,19 @@ if (CI) Idx = CI->getZExtValue(); if (Idx != 0) - return RK_None; + return TTI::RK_None; auto *RdxStart = dyn_cast(ReduxRoot->getOperand(0)); if (!RdxStart) - return RK_None; - Optional RD = getReductionData(RdxStart); + return TTI::RK_None; + Optional RD = getReductionData(RdxStart); if (!RD) - return RK_None; + return TTI::RK_None; auto *VecTy = cast(RdxStart->getType()); unsigned NumVecElems = VecTy->getNumElements(); if (!isPowerOf2_32(NumVecElems)) - return RK_None; + return TTI::RK_None; // We look for a sequence of shuffle,shuffle,add triples like the following // that builds a pairwise reduction tree. @@ -1069,8 +1048,8 @@ // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 // %r = extractelement <4 x float> %bin.rdx8, i32 0 if (matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems)) == - RK_None) - return RK_None; + TTI::RK_None) + return TTI::RK_None; Opcode = RD->Opcode; Ty = VecTy; @@ -1089,11 +1068,11 @@ return std::make_pair(L, S); } -static ReductionKind -matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, - unsigned &Opcode, VectorType *&Ty) { +TTI::ReductionKind +TargetTransformInfo::matchVectorSplittingReduction( + const ExtractElementInst *ReduxRoot, unsigned &Opcode, VectorType *&Ty) { if (!EnableReduxCost) - return RK_None; + return TTI::RK_None; // Need to extract the first element. ConstantInt *CI = dyn_cast(ReduxRoot->getOperand(1)); @@ -1101,19 +1080,19 @@ if (CI) Idx = CI->getZExtValue(); if (Idx != 0) - return RK_None; + return TTI::RK_None; auto *RdxStart = dyn_cast(ReduxRoot->getOperand(0)); if (!RdxStart) - return RK_None; - Optional RD = getReductionData(RdxStart); + return TTI::RK_None; + Optional RD = getReductionData(RdxStart); if (!RD) - return RK_None; + return TTI::RK_None; auto *VecTy = cast(ReduxRoot->getOperand(0)->getType()); unsigned NumVecElems = VecTy->getNumElements(); if (!isPowerOf2_32(NumVecElems)) - return RK_None; + return TTI::RK_None; // We look for a sequence of shuffles and adds like the following matching one // fadd, shuffle vector pair at a time. @@ -1133,10 +1112,10 @@ while (NumVecElemsRemain - 1) { // Check for the right reduction operation. if (!RdxOp) - return RK_None; - Optional RDLevel = getReductionData(RdxOp); + return TTI::RK_None; + Optional RDLevel = getReductionData(RdxOp); if (!RDLevel || !RDLevel->hasSameData(*RD)) - return RK_None; + return TTI::RK_None; Value *NextRdxOp; ShuffleVectorInst *Shuffle; @@ -1145,9 +1124,9 @@ // Check the current reduction operation and the shuffle use the same value. if (Shuffle == nullptr) - return RK_None; + return TTI::RK_None; if (Shuffle->getOperand(0) != NextRdxOp) - return RK_None; + return TTI::RK_None; // Check that shuffle masks matches. for (unsigned j = 0; j != MaskStart; ++j) @@ -1157,7 +1136,7 @@ ArrayRef Mask = Shuffle->getShuffleMask(); if (ShuffleMask != Mask) - return RK_None; + return TTI::RK_None; RdxOp = dyn_cast(NextRdxOp); NumVecElemsRemain /= 2; @@ -1169,206 +1148,6 @@ return RD->Kind; } -int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; - - switch (I->getOpcode()) { - case Instruction::GetElementPtr: - return getUserCost(I, CostKind); - - case Instruction::Ret: - case Instruction::PHI: - case Instruction::Br: { - return getCFInstrCost(I->getOpcode(), CostKind); - } - case Instruction::Add: - case Instruction::FAdd: - case Instruction::Sub: - case Instruction::FSub: - case Instruction::Mul: - case Instruction::FMul: - case Instruction::UDiv: - case Instruction::SDiv: - case Instruction::FDiv: - case Instruction::URem: - case Instruction::SRem: - case Instruction::FRem: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: { - TargetTransformInfo::OperandValueKind Op1VK, Op2VK; - TargetTransformInfo::OperandValueProperties Op1VP, Op2VP; - Op1VK = getOperandInfo(I->getOperand(0), Op1VP); - Op2VK = getOperandInfo(I->getOperand(1), Op2VP); - SmallVector Operands(I->operand_values()); - return getArithmeticInstrCost(I->getOpcode(), I->getType(), CostKind, - Op1VK, Op2VK, - Op1VP, Op2VP, Operands, I); - } - case Instruction::FNeg: { - TargetTransformInfo::OperandValueKind Op1VK, Op2VK; - TargetTransformInfo::OperandValueProperties Op1VP, Op2VP; - Op1VK = getOperandInfo(I->getOperand(0), Op1VP); - Op2VK = OK_AnyValue; - Op2VP = OP_None; - SmallVector Operands(I->operand_values()); - return getArithmeticInstrCost(I->getOpcode(), I->getType(), CostKind, - Op1VK, Op2VK, - Op1VP, Op2VP, Operands, I); - } - case Instruction::Select: { - const SelectInst *SI = cast(I); - Type *CondTy = SI->getCondition()->getType(); - return getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy, - CostKind, I); - } - case Instruction::ICmp: - case Instruction::FCmp: { - Type *ValTy = I->getOperand(0)->getType(); - return getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(), - CostKind, I); - } - case Instruction::Store: { - const StoreInst *SI = cast(I); - Type *ValTy = SI->getValueOperand()->getType(); - return getMemoryOpCost(I->getOpcode(), ValTy, - MaybeAlign(SI->getAlignment()), - SI->getPointerAddressSpace(), CostKind, I); - } - case Instruction::Load: { - const LoadInst *LI = cast(I); - return getMemoryOpCost(I->getOpcode(), I->getType(), - MaybeAlign(LI->getAlignment()), - LI->getPointerAddressSpace(), CostKind, I); - } - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::FPExt: - case Instruction::PtrToInt: - case Instruction::IntToPtr: - case Instruction::SIToFP: - case Instruction::UIToFP: - case Instruction::Trunc: - case Instruction::FPTrunc: - case Instruction::BitCast: - case Instruction::AddrSpaceCast: { - Type *SrcTy = I->getOperand(0)->getType(); - return getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, CostKind, I); - } - case Instruction::ExtractElement: { - const ExtractElementInst *EEI = cast(I); - ConstantInt *CI = dyn_cast(I->getOperand(1)); - unsigned Idx = -1; - if (CI) - Idx = CI->getZExtValue(); - - // Try to match a reduction sequence (series of shufflevector and vector - // adds followed by a extractelement). - unsigned ReduxOpCode; - VectorType *ReduxType; - - switch (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) { - case RK_Arithmetic: - return getArithmeticReductionCost(ReduxOpCode, ReduxType, - /*IsPairwiseForm=*/false, - CostKind); - case RK_MinMax: - return getMinMaxReductionCost( - ReduxType, cast(CmpInst::makeCmpResultType(ReduxType)), - /*IsPairwiseForm=*/false, /*IsUnsigned=*/false); - case RK_UnsignedMinMax: - return getMinMaxReductionCost( - ReduxType, cast(CmpInst::makeCmpResultType(ReduxType)), - /*IsPairwiseForm=*/false, /*IsUnsigned=*/true); - case RK_None: - break; - } - - switch (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) { - case RK_Arithmetic: - return getArithmeticReductionCost(ReduxOpCode, ReduxType, - /*IsPairwiseForm=*/true, CostKind); - case RK_MinMax: - return getMinMaxReductionCost( - ReduxType, cast(CmpInst::makeCmpResultType(ReduxType)), - /*IsPairwiseForm=*/true, /*IsUnsigned=*/false); - case RK_UnsignedMinMax: - return getMinMaxReductionCost( - ReduxType, cast(CmpInst::makeCmpResultType(ReduxType)), - /*IsPairwiseForm=*/true, /*IsUnsigned=*/true); - case RK_None: - break; - } - - return getVectorInstrCost(I->getOpcode(), EEI->getOperand(0)->getType(), - Idx); - } - case Instruction::InsertElement: { - const InsertElementInst *IE = cast(I); - ConstantInt *CI = dyn_cast(IE->getOperand(2)); - unsigned Idx = -1; - if (CI) - Idx = CI->getZExtValue(); - return getVectorInstrCost(I->getOpcode(), IE->getType(), Idx); - } - case Instruction::ExtractValue: - return 0; // Model all ExtractValue nodes as free. - case Instruction::ShuffleVector: { - const ShuffleVectorInst *Shuffle = cast(I); - auto *Ty = cast(Shuffle->getType()); - auto *SrcTy = cast(Shuffle->getOperand(0)->getType()); - - // TODO: Identify and add costs for insert subvector, etc. - int SubIndex; - if (Shuffle->isExtractSubvectorMask(SubIndex)) - return TTIImpl->getShuffleCost(SK_ExtractSubvector, SrcTy, SubIndex, Ty); - - if (Shuffle->changesLength()) - return -1; - - if (Shuffle->isIdentity()) - return 0; - - if (Shuffle->isReverse()) - return TTIImpl->getShuffleCost(SK_Reverse, Ty, 0, nullptr); - - if (Shuffle->isSelect()) - return TTIImpl->getShuffleCost(SK_Select, Ty, 0, nullptr); - - if (Shuffle->isTranspose()) - return TTIImpl->getShuffleCost(SK_Transpose, Ty, 0, nullptr); - - if (Shuffle->isZeroEltSplat()) - return TTIImpl->getShuffleCost(SK_Broadcast, Ty, 0, nullptr); - - if (Shuffle->isSingleSource()) - return TTIImpl->getShuffleCost(SK_PermuteSingleSrc, Ty, 0, nullptr); - - return TTIImpl->getShuffleCost(SK_PermuteTwoSrc, Ty, 0, nullptr); - } - case Instruction::Call: - if (const IntrinsicInst *II = dyn_cast(I)) { - SmallVector Args(II->arg_operands()); - - FastMathFlags FMF; - if (auto *FPMO = dyn_cast(II)) - FMF = FPMO->getFastMathFlags(); - - return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), Args, - FMF, 1, CostKind, II); - } - return -1; - default: - // We don't have any information on this instruction. - return -1; - } -} - TargetTransformInfo::Concept::~Concept() {} TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {} Index: llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp =================================================================== --- llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp +++ llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp @@ -1160,7 +1160,7 @@ for (auto &I : Is) { // Compute the old cost InstructionCost += - TTI.getInstructionCost(I, TargetTransformInfo::TCK_Latency); + TTI.getUserCost(I, TargetTransformInfo::TCK_Latency); // The final SVIs are allowed not to be dead, all uses will be replaced if (SVIs.find(I) != SVIs.end()) Index: llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -262,7 +262,7 @@ if (isa(&I)) { Type *MemAccessTy = I.getOperand(0)->getType(); NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, None, 0, - TTI::TCK_RecipThroughput); + TTI::TCK_SizeAndLatency); } } @@ -998,7 +998,7 @@ // Comparison between memory and immediate. if (UserI->getOpcode() == Instruction::ICmp) if (ConstantInt *CI = dyn_cast(UserI->getOperand(1))) - if (isUInt<16>(CI->getZExtValue())) + if (CI->getBitWidth() <= 64 && isUInt<16>(CI->getZExtValue())) return true; return (LoadOrTruncBits == 32 || LoadOrTruncBits == 64); break; Index: llvm/lib/Transforms/IPO/HotColdSplitting.cpp =================================================================== --- llvm/lib/Transforms/IPO/HotColdSplitting.cpp +++ llvm/lib/Transforms/IPO/HotColdSplitting.cpp @@ -230,7 +230,7 @@ for (Instruction &I : BB->instructionsWithoutDebug()) if (&I != BB->getTerminator()) Benefit += - TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize); + TTI.getUserCost(&I, TargetTransformInfo::TCK_CodeSize); return Benefit; } Index: llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp =================================================================== --- llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp +++ llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp @@ -211,7 +211,7 @@ unsigned Cost = 0; for (auto &InstBeforeCall : llvm::make_range(CallSiteBB->begin(), CB.getIterator())) { - Cost += TTI.getInstructionCost(&InstBeforeCall, + Cost += TTI.getUserCost(&InstBeforeCall, TargetTransformInfo::TCK_CodeSize); if (Cost >= DuplicationThreshold) return false; Index: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3610,17 +3610,17 @@ if (NeedToShuffleReuses) { for (unsigned Idx : E->ReuseShuffleIndices) { Instruction *I = cast(VL[Idx]); - ReuseShuffleCost -= TTI->getInstructionCost(I, CostKind); + ReuseShuffleCost -= TTI->getUserCost(I, CostKind); } for (Value *V : VL) { Instruction *I = cast(V); - ReuseShuffleCost += TTI->getInstructionCost(I, CostKind); + ReuseShuffleCost += TTI->getUserCost(I, CostKind); } } for (Value *V : VL) { Instruction *I = cast(V); assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode"); - ScalarCost += TTI->getInstructionCost(I, CostKind); + ScalarCost += TTI->getUserCost(I, CostKind); } // VecCost is equal to sum of the cost of creating 2 vectors // and the cost of creating shuffle.