diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1253,12 +1253,20 @@ unsigned Index) const; /// \return The expected cost of control-flow related instructions such as - /// Phi, Ret, Br, Switch. + /// Ret, Br, Switch. InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, const Instruction *I = nullptr) const; + /// \return The expected cost of a phi node. \p Ty is the type of the phi node + /// and its incoming values, and \OpInfos is any extra information about the + /// incoming values. + InstructionCost + getPHICost(Type *Ty, TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, + ArrayRef OpInfos = {}, + const Instruction *I = nullptr) const; + /// \returns The expected cost of compare and select instructions. If there /// is an existing instruction that holds Opcode, it may be passed in the /// 'I' parameter. The \p VecPred parameter can be used to indicate the select @@ -1884,6 +1892,9 @@ virtual InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I = nullptr) = 0; + virtual InstructionCost getPHICost(Type *Ty, TTI::TargetCostKind CostKind, + ArrayRef OpInfos, + const Instruction *I = nullptr) = 0; virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, @@ -2471,6 +2482,11 @@ const Instruction *I = nullptr) override { return Impl.getCFInstrCost(Opcode, CostKind, I); } + InstructionCost getPHICost(Type *Ty, TTI::TargetCostKind CostKind, + ArrayRef OpInfos, + const Instruction *I = nullptr) override { + return Impl.getPHICost(Ty, CostKind, OpInfos, I); + } InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -585,11 +585,15 @@ InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I = nullptr) const { + return 1; + } + + InstructionCost getPHICost(Type *Ty, TTI::TargetCostKind CostKind, + ArrayRef OpInfos, + const Instruction *I) const { // A phi would be free, unless we're costing the throughput because it // will require a register. - if (Opcode == Instruction::PHI && CostKind != TTI::TCK_RecipThroughput) - return 0; - return 1; + return CostKind != TTI::TCK_RecipThroughput ? 0 : 1; } InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, @@ -1108,9 +1112,14 @@ } case Instruction::Br: case Instruction::Ret: - case Instruction::PHI: case Instruction::Switch: return TargetTTI->getCFInstrCost(Opcode, CostKind, I); + case Instruction::PHI: { + SmallVector OpInfos; + transform(I->operand_values(), std::back_inserter(OpInfos), + TTI::getOperandInfo); + return TargetTTI->getPHICost(I->getType(), CostKind, OpInfos, I); + } case Instruction::ExtractValue: case Instruction::Freeze: return TTI::TCC_Free; diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -252,7 +252,7 @@ VT->getNumElements()), CostKind, -1, nullptr, nullptr) + getCFInstrCost(Instruction::Br, CostKind) + - getCFInstrCost(Instruction::PHI, CostKind)); + getPHICost(VT->getElementType(), CostKind)); } return LoadCost + PackingCost + ConditionalCost; @@ -1164,6 +1164,12 @@ return BaseT::getCFInstrCost(Opcode, CostKind, I); } + InstructionCost getPHICost(Type *Ty, TTI::TargetCostKind CostKind, + ArrayRef OpInfos = {}, + const Instruction *I = nullptr) { + return BaseT::getPHICost(Ty, CostKind, OpInfos, I); + } + InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -909,11 +909,25 @@ unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I) const { assert((I == nullptr || I->getOpcode() == Opcode) && "Opcode should reflect passed instruction."); + assert((Opcode == Instruction::Br || Opcode == Instruction::Ret || + Opcode == Instruction::Switch) && + "Opcode should be one of Br, Ret or Switch"); InstructionCost Cost = TTIImpl->getCFInstrCost(Opcode, CostKind, I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } +InstructionCost +TargetTransformInfo::getPHICost(Type *Ty, TTI::TargetCostKind CostKind, + ArrayRef OpInfos, + const Instruction *I) const { + assert((I == nullptr || I->getOpcode() == Instruction::PHI) && + "Opcode should reflect passed instruction."); + InstructionCost Cost = TTIImpl->getPHICost(Ty, CostKind, OpInfos, I); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + InstructionCost TargetTransformInfo::getCmpSelInstrCost( unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I) const { diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -170,6 +170,9 @@ InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); + InstructionCost getPHICost(Type *Ty, TTI::TargetCostKind CostKind, + ArrayRef OpInfos, + const Instruction *I = nullptr); InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -2267,12 +2267,20 @@ TTI::TargetCostKind CostKind, const Instruction *I) { if (CostKind != TTI::TCK_RecipThroughput) - return Opcode == Instruction::PHI ? 0 : 1; + return 1; assert(CostKind == TTI::TCK_RecipThroughput && "unexpected CostKind"); // Branches are assumed to be predicted. return 0; } +InstructionCost +AArch64TTIImpl::getPHICost(Type *Ty, TTI::TargetCostKind CostKind, + ArrayRef OpInfos, + const Instruction *I) { + // Branches are assumed to be predicted. + return 0; +} + InstructionCost AArch64TTIImpl::getVectorInstrCostHelper(const Instruction *I, Type *Val, unsigned Index, diff --git a/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp --- a/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp @@ -95,7 +95,7 @@ TTI::TargetCostKind CostKind, const Instruction *I) { if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency) - return Opcode == Instruction::PHI ? 0 : 1; + return 1; // XXX - For some reason this isn't called for switch. switch (Opcode) { diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -229,6 +229,10 @@ InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); + InstructionCost getPHICost(Type *Ty, TTI::TargetCostKind CostKind, + ArrayRef OpInfos, + const Instruction *I = nullptr); + InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -469,6 +469,20 @@ return BaseT::getCFInstrCost(Opcode, CostKind, I); } +InstructionCost ARMTTIImpl::getPHICost(Type *Ty, TTI::TargetCostKind CostKind, + ArrayRef OpInfos, + const Instruction *I) { + if (CostKind == TTI::TCK_RecipThroughput && + (ST->hasNEON() || ST->hasMVEIntegerOps())) { + // FIXME: The vectorizer is highly sensistive to the cost of these + // instructions, which suggests that it may be using the costs incorrectly. + // But, for now, just make them free to avoid performance regressions for + // vector targets. + return 0; + } + return BaseT::getPHICost(Ty, CostKind, OpInfos, I); +} + InstructionCost ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -121,6 +121,9 @@ const Instruction *I = nullptr); InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); + InstructionCost getPHICost(Type *Ty, TTI::TargetCostKind CostKind, + ArrayRef OpInfos, + const Instruction *I = nullptr); InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -631,7 +631,14 @@ TTI::TargetCostKind CostKind, const Instruction *I) { if (CostKind != TTI::TCK_RecipThroughput) - return Opcode == Instruction::PHI ? 0 : 1; + return 1; + // Branches are assumed to be predicted. + return 0; +} + +InstructionCost PPCTTIImpl::getPHICost(Type *Ty, TTI::TargetCostKind CostKind, + ArrayRef OpInfos, + const Instruction *I) { // Branches are assumed to be predicted. return 0; } diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -230,6 +230,9 @@ InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); + InstructionCost getPHICost(Type *Ty, TTI::TargetCostKind CostKind, + ArrayRef OpInfos, + const Instruction *I = nullptr); InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -5616,7 +5616,14 @@ TTI::TargetCostKind CostKind, const Instruction *I) { if (CostKind != TTI::TCK_RecipThroughput) - return Opcode == Instruction::PHI ? 0 : 1; + return 1; + // Branches are assumed to be predicted. + return 0; +} + +InstructionCost X86TTIImpl::getPHICost(Type *Ty, TTI::TargetCostKind CostKind, + ArrayRef OpInfos, + const Instruction *I) { // Branches are assumed to be predicted. return 0; } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4558,8 +4558,8 @@ // that we will create. This cost is likely to be zero. The phi node // cost, if any, should be scaled by the block probability because it // models a copy at the end of each predicated block. - ScalarizationCost += VF.getKnownMinValue() * - TTI.getCFInstrCost(Instruction::PHI, CostKind); + ScalarizationCost += + VF.getKnownMinValue() * TTI.getPHICost(I->getType(), CostKind); // The cost of the non-predicated instruction. ScalarizationCost += VF.getKnownMinValue() * @@ -6364,8 +6364,7 @@ cast(ToVectorTy(I->getType(), VF)), APInt::getAllOnes(VF.getFixedValue()), /*Insert*/ true, /*Extract*/ false, CostKind); - ScalarCost += - VF.getFixedValue() * TTI.getCFInstrCost(Instruction::PHI, CostKind); + ScalarCost += VF.getFixedValue() * TTI.getPHICost(I->getType(), CostKind); } // Compute the scalarization overhead of needed extractelement @@ -7214,17 +7213,22 @@ VF.getKnownMinValue() - 1); } + Type *VecTy = ToVectorTy(Phi->getType(), VF); + // Phi nodes in non-header blocks (not inductions, reductions, etc.) are // converted into select instructions. We require N - 1 selects per phi // node, where N is the number of incoming values. if (VF.isVector() && Phi->getParent() != TheLoop->getHeader()) return (Phi->getNumIncomingValues() - 1) * TTI.getCmpSelInstrCost( - Instruction::Select, ToVectorTy(Phi->getType(), VF), + Instruction::Select, VecTy, ToVectorTy(Type::getInt1Ty(Phi->getContext()), VF), CmpInst::BAD_ICMP_PREDICATE, CostKind); - return TTI.getCFInstrCost(Instruction::PHI, CostKind); + SmallVector OpInfos; + transform(I->operand_values(), std::back_inserter(OpInfos), + TTI::getOperandInfo); + return TTI.getPHICost(VecTy, CostKind, OpInfos); } case Instruction::UDiv: case Instruction::SDiv: