diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -153,7 +153,8 @@ enum TargetCostKind { TCK_RecipThroughput, ///< Reciprocal throughput. TCK_Latency, ///< The latency of instruction. - TCK_CodeSize ///< Instruction code size. + TCK_CodeSize, ///< Instruction code size. + TCK_SizeAndLatency ///< The weighted sum of size and latency. }; /// Query the cost of a specified instruction. @@ -172,7 +173,8 @@ return getInstructionLatency(I); case TCK_CodeSize: - return getUserCost(I); + case TCK_SizeAndLatency: + return getUserCost(I, kind); } llvm_unreachable("Unknown instruction cost kind"); } @@ -263,14 +265,15 @@ /// /// The returned cost is defined in terms of \c TargetCostConstants, see its /// comments for a detailed explanation of the cost values. - int getUserCost(const User *U, ArrayRef Operands) const; + int getUserCost(const User *U, ArrayRef Operands, + TargetCostKind CostKind) const; /// This is a helper function which calls the two-argument getUserCost /// with \p Operands which are the current operands U has. - int getUserCost(const User *U) const { + int getUserCost(const User *U, TargetCostKind CostKind) const { SmallVector Operands(U->value_op_begin(), U->value_op_end()); - return getUserCost(U, Operands); + return getUserCost(U, Operands, CostKind); } /// Return true if branch divergence exists. @@ -1170,7 +1173,8 @@ getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) = 0; - virtual int getUserCost(const User *U, ArrayRef Operands) = 0; + virtual int getUserCost(const User *U, ArrayRef Operands, + TargetCostKind CostKind) = 0; virtual bool hasBranchDivergence() = 0; virtual bool useGPUDivergenceAnalysis() = 0; virtual bool isSourceOfDivergence(const Value *V) = 0; @@ -1422,8 +1426,9 @@ int getMemcpyCost(const Instruction *I) override { return Impl.getMemcpyCost(I); } - int getUserCost(const User *U, ArrayRef Operands) override { - return Impl.getUserCost(U, Operands); + int getUserCost(const User *U, ArrayRef Operands, + TargetCostKind CostKind) override { + return Impl.getUserCost(U, Operands, CostKind); } bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); } bool useGPUDivergenceAnalysis() override { diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -792,9 +792,11 @@ return static_cast(this)->getIntrinsicCost(IID, RetTy, ParamTys, U); } - unsigned getUserCost(const User *U, ArrayRef Operands) { + unsigned getUserCost(const User *U, ArrayRef Operands, + enum TTI::TargetCostKind CostKind) { auto *TargetTTI = static_cast(this); + // FIXME: Unlikely to be true for anything but CodeSize. if (const auto *CB = dyn_cast(U)) { const Function *F = CB->getCalledFunction(); if (F) { @@ -841,6 +843,7 @@ case Instruction::SRem: case Instruction::UDiv: case Instruction::URem: + // FIXME: Unlikely to be true for CodeSize. return TTI::TCC_Expensive; case Instruction::IntToPtr: case Instruction::PtrToInt: @@ -867,7 +870,7 @@ int getInstructionLatency(const Instruction *I) { SmallVector Operands(I->value_op_begin(), I->value_op_end()); - if (getUserCost(I, Operands) == TTI::TCC_Free) + if (getUserCost(I, Operands, TTI::TCK_Latency) == TTI::TCC_Free) return 0; if (isa(I)) diff --git a/llvm/lib/Analysis/CodeMetrics.cpp b/llvm/lib/Analysis/CodeMetrics.cpp --- a/llvm/lib/Analysis/CodeMetrics.cpp +++ b/llvm/lib/Analysis/CodeMetrics.cpp @@ -172,7 +172,7 @@ if (InvI->cannotDuplicate()) notDuplicatable = true; - NumInsts += TTI.getUserCost(&I); + NumInsts += TTI.getUserCost(&I, TargetTransformInfo::TCK_CodeSize); } if (isa(BB->getTerminator())) diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -803,7 +803,8 @@ Operands.push_back(SimpleOp); else Operands.push_back(*I); - return TargetTransformInfo::TCC_Free == TTI.getUserCost(&GEP, Operands); + return TargetTransformInfo::TCC_Free == + TTI.getUserCost(&GEP, Operands, TargetTransformInfo::TCK_SizeAndLatency); } bool CallAnalyzer::visitAlloca(AllocaInst &I) { @@ -1051,7 +1052,8 @@ if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0))) SROAArgValues[&I] = SROAArg; - return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); + return TargetTransformInfo::TCC_Free == + TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency); } bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { @@ -1075,7 +1077,8 @@ if (auto *SROAArg = getSROAArgForValueOrNull(Op)) SROAArgValues[&I] = SROAArg; - return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); + return TargetTransformInfo::TCC_Free == + TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency); } bool CallAnalyzer::visitCastInst(CastInst &I) { @@ -1105,7 +1108,8 @@ break; } - return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); + return TargetTransformInfo::TCC_Free == + TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency); } bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) { @@ -1807,7 +1811,8 @@ bool CallAnalyzer::visitInstruction(Instruction &I) { // Some instructions are free. All of the free intrinsics can also be // handled by SROA, etc. - if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I)) + if (TargetTransformInfo::TCC_Free == + TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency)) return true; // We found something we don't understand or can't handle. Mark any SROA-able diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -178,8 +178,9 @@ } int TargetTransformInfo::getUserCost(const User *U, - ArrayRef Operands) const { - int Cost = TTIImpl->getUserCost(U, Operands); + ArrayRef Operands, + enum TargetCostKind CostKind) const { + int Cost = TTIImpl->getUserCost(U, Operands, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -1152,7 +1153,7 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { switch (I->getOpcode()) { case Instruction::GetElementPtr: - return getUserCost(I); + return getUserCost(I, TCK_RecipThroughput); case Instruction::Ret: case Instruction::PHI: diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -6103,7 +6103,8 @@ // If it's safe to speculatively execute, then it should not have side // effects; therefore, it's safe to sink and possibly *not* execute. return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) && - TTI->getUserCost(I) >= TargetTransformInfo::TCC_Expensive; + TTI->getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency) >= + TargetTransformInfo::TCC_Expensive; } /// Returns true if a SelectInst should be turned into an explicit branch. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -242,7 +242,8 @@ int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned); - unsigned getUserCost(const User *U, ArrayRef Operands); + unsigned getUserCost(const User *U, ArrayRef Operands, + TTI::TargetCostKind CostKind); }; class R600TTIImpl final : public BasicTTIImplBase { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -949,11 +949,12 @@ CommonTTI.getUnrollingPreferences(L, SE, UP); } -unsigned GCNTTIImpl::getUserCost(const User *U, - ArrayRef Operands) { +unsigned +GCNTTIImpl::getUserCost(const User *U, ArrayRef Operands, + TTI::TargetCostKind CostKind) { const Instruction *I = dyn_cast(U); if (!I) - return BaseT::getUserCost(U, Operands); + return BaseT::getUserCost(U, Operands, CostKind); // Estimate different operations to be optimized out switch (I->getOpcode()) { @@ -980,7 +981,7 @@ return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), Args, FMF, 1, II); } else { - return BaseT::getUserCost(U, Operands); + return BaseT::getUserCost(U, Operands, CostKind); } } case Instruction::ShuffleVector: { @@ -994,7 +995,7 @@ return getShuffleCost(TTI::SK_ExtractSubvector, SrcTy, SubIndex, Ty); if (Shuffle->changesLength()) - return BaseT::getUserCost(U, Operands); + return BaseT::getUserCost(U, Operands, CostKind); if (Shuffle->isIdentity()) return 0; @@ -1059,7 +1060,7 @@ break; } - return BaseT::getUserCost(U, Operands); + return BaseT::getUserCost(U, Operands, CostKind); } unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const { diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -1362,7 +1362,7 @@ SmallVector Operands(I.value_op_begin(), I.value_op_end()); - Cost += getUserCost(&I, Operands); + Cost += getUserCost(&I, Operands, TargetTransformInfo::TCK_CodeSize); } } diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -148,7 +148,8 @@ /// @} - int getUserCost(const User *U, ArrayRef Operands); + int getUserCost(const User *U, ArrayRef Operands, + TTI::TargetCostKind CostKind); // Hexagon specific decision to generate a lookup table. bool shouldBuildLookupTables() const; diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -298,8 +298,10 @@ return ST.getL1CacheLineSize(); } -int HexagonTTIImpl::getUserCost(const User *U, - ArrayRef Operands) { +int +HexagonTTIImpl::getUserCost(const User *U, + ArrayRef Operands, + TTI::TargetCostKind CostKind) { auto isCastFoldedIntoLoad = [this](const CastInst *CI) -> bool { if (!CI->isIntegerCast()) return false; @@ -321,7 +323,7 @@ if (const CastInst *CI = dyn_cast(U)) if (isCastFoldedIntoLoad(CI)) return TargetTransformInfo::TCC_Free; - return BaseT::getUserCost(U, Operands); + return BaseT::getUserCost(U, Operands, CostKind); } bool HexagonTTIImpl::shouldBuildLookupTables() const { diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -52,7 +52,8 @@ int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty); - unsigned getUserCost(const User *U, ArrayRef Operands); + unsigned getUserCost(const User *U, ArrayRef Operands, + TTI::TargetCostKind CostKind); TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -206,15 +206,16 @@ return PPCTTIImpl::getIntImmCost(Imm, Ty); } -unsigned PPCTTIImpl::getUserCost(const User *U, - ArrayRef Operands) { +unsigned +PPCTTIImpl::getUserCost(const User *U, ArrayRef Operands, + TTI::TargetCostKind CostKind) { if (U->getType()->isVectorTy()) { // Instructions that need to be split should cost more. std::pair LT = TLI->getTypeLegalizationCost(DL, U->getType()); - return LT.first * BaseT::getUserCost(U, Operands); + return LT.first * BaseT::getUserCost(U, Operands, CostKind); } - return BaseT::getUserCost(U, Operands); + return BaseT::getUserCost(U, Operands, CostKind); } bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo, diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -181,7 +181,8 @@ int getIntImmCost(const APInt &Imm, Type *Ty); - unsigned getUserCost(const User *U, ArrayRef Operands); + unsigned getUserCost(const User *U, ArrayRef Operands, + TTI::TargetCostKind); int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty); int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -3641,8 +3641,9 @@ return X86TTIImpl::getIntImmCost(Imm, Ty); } -unsigned X86TTIImpl::getUserCost(const User *U, - ArrayRef Operands) { +unsigned +X86TTIImpl::getUserCost(const User *U, ArrayRef Operands, + TTI::TargetCostKind CostKind) { if (isa(U)) { Value *Ptr = U->getOperand(1); // Store instruction with index and scale costs 2 Uops. @@ -3653,7 +3654,7 @@ } return TTI::TCC_Basic; } - return BaseT::getUserCost(U, Operands); + return BaseT::getUserCost(U, Operands, CostKind); } // Return an average cost of Gather / Scatter instruction, maybe improved later diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -1261,7 +1261,8 @@ const TargetTransformInfo *TTI) { if (const GetElementPtrInst *GEP = dyn_cast(&I)) { - if (TTI->getUserCost(GEP) != TargetTransformInfo::TCC_Free) + if (TTI->getUserCost(GEP, TargetTransformInfo::TCK_SizeAndLatency) != + TargetTransformInfo::TCC_Free) return false; // For a GEP, we cannot simply use getUserCost because currently it // optimistically assume that a GEP will fold into addressing mode @@ -1276,7 +1277,8 @@ } return true; } else - return TTI->getUserCost(&I) == TargetTransformInfo::TCC_Free; + return TTI->getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency) == + TargetTransformInfo::TCC_Free; } /// Return true if the only users of this instruction are outside of diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -446,7 +446,7 @@ // First accumulate the cost of this instruction. if (!Cost.IsFree) { - UnrolledCost += TTI.getUserCost(I); + UnrolledCost += TTI.getUserCost(I, TargetTransformInfo::TCK_CodeSize); LLVM_DEBUG(dbgs() << "Adding cost of instruction (iteration " << Iteration << "): "); LLVM_DEBUG(I->dump()); @@ -539,7 +539,7 @@ // Track this instruction's expected baseline cost when executing the // rolled loop form. - RolledDynamicCost += TTI.getUserCost(&I); + RolledDynamicCost += TTI.getUserCost(&I, TargetTransformInfo::TCK_CodeSize); // Visit the instruction to analyze its loop cost after unrolling, // and if the visitor returns true, mark the instruction as free after diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -2660,7 +2660,7 @@ if (CB->isConvergent() || CB->cannotDuplicate()) return false; - Cost += TTI.getUserCost(&I); + Cost += TTI.getUserCost(&I, TargetTransformInfo::TCK_CodeSize); } assert(Cost >= 0 && "Must not have negative costs!"); LoopCost += Cost; diff --git a/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp b/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp --- a/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp +++ b/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp @@ -465,7 +465,7 @@ if (CostMapIt != SpecCostMap.end()) Cost += CostMapIt->second; } - Cost += TTI.getUserCost(I); + Cost += TTI.getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency); bool Inserted = SpecCostMap.insert({I, Cost}).second; (void)Inserted; assert(Inserted && "Must not re-insert a cost during the DFS!"); diff --git a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp --- a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp +++ b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp @@ -244,7 +244,7 @@ case Instruction::FNeg: case Instruction::ICmp: case Instruction::FCmp: - return TTI.getUserCost(I); + return TTI.getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency); default: return UINT_MAX; // Disallow anything not whitelisted. diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -330,7 +330,7 @@ const TargetTransformInfo &TTI) { assert(isSafeToSpeculativelyExecute(I) && "Instruction is not safe to speculatively execute!"); - return TTI.getUserCost(I); + return TTI.getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency); } /// If we have a merge point of an "if condition" as accepted above, @@ -3045,7 +3045,7 @@ return false; // Not in white-list - not worthwhile folding. // And finally, if this is a non-free instruction that we are okay // speculating, ensure that we consider the speculation budget. - BudgetRemaining -= TTI.getUserCost(&I); + BudgetRemaining -= TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency); if (BudgetRemaining < 0) return false; // Eagerly refuse to fold as soon as we're out of budget. }