Index: llvm/include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfo.h +++ llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -247,8 +247,6 @@ Cost = getUserCost(I, kind); break; } - if (Cost == -1) - Cost.setInvalid(); return Cost; } @@ -326,12 +324,12 @@ /// /// The returned cost is defined in terms of \c TargetCostConstants, see its /// comments for a detailed explanation of the cost values. - int getUserCost(const User *U, ArrayRef Operands, - TargetCostKind CostKind) const; + InstructionCost getUserCost(const User *U, ArrayRef Operands, + TargetCostKind CostKind) const; /// This is a helper function which calls the two-argument getUserCost /// with \p Operands which are the current operands U has. - int getUserCost(const User *U, TargetCostKind CostKind) const { + InstructionCost getUserCost(const User *U, TargetCostKind CostKind) const { SmallVector Operands(U->value_op_begin(), U->value_op_end()); return getUserCost(U, Operands, CostKind); @@ -1349,11 +1347,11 @@ private: /// Estimate the latency of specified instruction. /// Returns 1 as the default value. - int getInstructionLatency(const Instruction *I) const; + InstructionCost getInstructionLatency(const Instruction *I) const; /// Returns the expected throughput cost of the instruction. /// Returns -1 if the cost is unknown. - int getInstructionThroughput(const Instruction *I) const; + InstructionCost getInstructionThroughput(const Instruction *I) const; /// The abstract base class used to type erase specific TTI /// implementations. @@ -1380,8 +1378,9 @@ getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) = 0; - virtual int getUserCost(const User *U, ArrayRef Operands, - TargetCostKind CostKind) = 0; + virtual InstructionCost getUserCost(const User *U, + ArrayRef Operands, + TargetCostKind CostKind) = 0; virtual bool hasBranchDivergence() = 0; virtual bool useGPUDivergenceAnalysis() = 0; virtual bool isSourceOfDivergence(const Value *V) = 0; @@ -1629,7 +1628,7 @@ virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0; virtual unsigned getGISelRematGlobalCost() const = 0; virtual bool hasActiveVectorLength() const = 0; - virtual int getInstructionLatency(const Instruction *I) = 0; + virtual InstructionCost getInstructionLatency(const Instruction *I) = 0; }; template @@ -1658,8 +1657,8 @@ int getMemcpyCost(const Instruction *I) override { return Impl.getMemcpyCost(I); } - int getUserCost(const User *U, ArrayRef Operands, - TargetCostKind CostKind) override { + InstructionCost getUserCost(const User *U, ArrayRef Operands, + TargetCostKind CostKind) override { return Impl.getUserCost(U, Operands, CostKind); } bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); } @@ -2158,7 +2157,7 @@ return Impl.hasActiveVectorLength(); } - int getInstructionLatency(const Instruction *I) override { + InstructionCost getInstructionLatency(const Instruction *I) override { return Impl.getInstructionLatency(I); } }; Index: llvm/include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -844,8 +844,8 @@ return TTI::TCC_Basic; } - int getUserCost(const User *U, ArrayRef Operands, - TTI::TargetCostKind CostKind) { + InstructionCost getUserCost(const User *U, ArrayRef Operands, + TTI::TargetCostKind CostKind) { auto *TargetTTI = static_cast(this); // Handle non-intrinsic calls, invokes, and callbr. // FIXME: Unlikely to be true for anything but CodeSize. @@ -986,7 +986,9 @@ return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy, SubIndex, VecTy); else if (Shuffle->changesLength()) - return CostKind == TTI::TCK_RecipThroughput ? -1 : 1; + return CostKind == TTI::TCK_RecipThroughput + ? InstructionCost::getInvalid() + : 1; else if (Shuffle->isIdentity()) return 0; else if (Shuffle->isReverse()) @@ -1042,7 +1044,7 @@ return TTI::TCC_Basic; } - int getInstructionLatency(const Instruction *I) { + InstructionCost getInstructionLatency(const Instruction *I) { SmallVector Operands(I->value_op_begin(), I->value_op_end()); if (getUserCost(I, Operands, TTI::TCK_Latency) == TTI::TCC_Free) Index: llvm/include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -512,7 +512,7 @@ SimplifyAndSetOp); } - int getInstructionLatency(const Instruction *I) { + InstructionCost getInstructionLatency(const Instruction *I) { if (isa(I)) return getST()->getSchedModel().DefaultLoadLatency; Index: llvm/include/llvm/Support/InstructionCost.h =================================================================== --- llvm/include/llvm/Support/InstructionCost.h +++ llvm/include/llvm/Support/InstructionCost.h @@ -207,6 +207,16 @@ void print(raw_ostream &OS) const; }; +inline bool operator==(const InstructionCost::CostType LHS, + const InstructionCost &RHS) { + return RHS == LHS; +} + +inline bool operator!=(const InstructionCost::CostType LHS, + const InstructionCost &RHS) { + return RHS != LHS; +} + inline InstructionCost operator+(const InstructionCost &LHS, const InstructionCost &RHS) { InstructionCost LHS2(LHS); Index: llvm/lib/Analysis/CodeMetrics.cpp =================================================================== --- llvm/lib/Analysis/CodeMetrics.cpp +++ llvm/lib/Analysis/CodeMetrics.cpp @@ -170,7 +170,10 @@ if (InvI->cannotDuplicate()) notDuplicatable = true; - NumInsts += TTI.getUserCost(&I, TargetTransformInfo::TCK_CodeSize); + InstructionCost Cost = + TTI.getUserCost(&I, TargetTransformInfo::TCK_CodeSize); + assert(Cost.isValid() && "Unexpected invalid cost"); + NumInsts += *Cost.getValue(); } if (isa(BB->getTerminator())) Index: llvm/lib/Analysis/TargetTransformInfo.cpp =================================================================== --- llvm/lib/Analysis/TargetTransformInfo.cpp +++ llvm/lib/Analysis/TargetTransformInfo.cpp @@ -263,12 +263,13 @@ return TTIImpl->getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI); } -int TargetTransformInfo::getUserCost(const User *U, - ArrayRef Operands, - enum TargetCostKind CostKind) const { - int Cost = TTIImpl->getUserCost(U, Operands, CostKind); - assert((CostKind == TTI::TCK_RecipThroughput || Cost >= 0) && - "TTI should not produce negative costs!"); +InstructionCost +TargetTransformInfo::getUserCost(const User *U, + ArrayRef Operands, + enum TargetCostKind CostKind) const { + InstructionCost Cost = TTIImpl->getUserCost(U, Operands, CostKind); + assert((CostKind == TTI::TCK_RecipThroughput || Cost.isValid()) && + "TTI should not produce invalid costs!"); return Cost; } @@ -1051,7 +1052,8 @@ return TTIImpl->getGISelRematGlobalCost(); } -int TargetTransformInfo::getInstructionLatency(const Instruction *I) const { +InstructionCost +TargetTransformInfo::getInstructionLatency(const Instruction *I) const { return TTIImpl->getInstructionLatency(I); } @@ -1340,7 +1342,8 @@ return matchPairwiseReduction(Root, Opcode, Ty); } -int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { +InstructionCost +TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; switch (I->getOpcode()) { @@ -1393,7 +1396,7 @@ return getUserCost(I, CostKind); default: // We don't have any information on this instruction. - return -1; + return InstructionCost::getInvalid(); } } Index: llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -1984,8 +1984,10 @@ SmallVector Operands(I.value_op_begin(), I.value_op_end()); - Cost += - getUserCost(&I, Operands, TargetTransformInfo::TCK_SizeAndLatency); + InstructionCost UserCost = + getUserCost(&I, Operands, TargetTransformInfo::TCK_SizeAndLatency); + assert(UserCost.isValid() && "Found invalid user cost"); + Cost += *UserCost.getValue(); } } Index: llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h =================================================================== --- llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -161,8 +161,8 @@ /// @} - int getUserCost(const User *U, ArrayRef Operands, - TTI::TargetCostKind CostKind); + InstructionCost getUserCost(const User *U, ArrayRef Operands, + TTI::TargetCostKind CostKind); // Hexagon specific decision to generate a lookup table. bool shouldBuildLookupTables() const; Index: llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -316,10 +316,9 @@ return ST.getL1CacheLineSize(); } -int -HexagonTTIImpl::getUserCost(const User *U, - ArrayRef Operands, - TTI::TargetCostKind CostKind) { +InstructionCost HexagonTTIImpl::getUserCost(const User *U, + ArrayRef Operands, + TTI::TargetCostKind CostKind) { auto isCastFoldedIntoLoad = [this](const CastInst *CI) -> bool { if (!CI->isIntegerCast()) return false; Index: llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h =================================================================== --- llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -57,8 +57,8 @@ int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind); - unsigned getUserCost(const User *U, ArrayRef Operands, - TTI::TargetCostKind CostKind); + InstructionCost getUserCost(const User *U, ArrayRef Operands, + TTI::TargetCostKind CostKind); TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, Index: llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -318,9 +318,9 @@ return PPCTTIImpl::getIntImmCost(Imm, Ty, CostKind); } -unsigned -PPCTTIImpl::getUserCost(const User *U, ArrayRef Operands, - TTI::TargetCostKind CostKind) { +InstructionCost PPCTTIImpl::getUserCost(const User *U, + ArrayRef Operands, + TTI::TargetCostKind CostKind) { // We already implement getCastInstrCost and getMemoryOpCost where we perform // the vector adjustment there. if (isa(U) || isa(U) || isa(U)) Index: llvm/lib/Transforms/Scalar/LoopFlatten.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopFlatten.cpp +++ llvm/lib/Transforms/Scalar/LoopFlatten.cpp @@ -280,7 +280,7 @@ // a significant amount of code here which can't be optimised out that it's // not profitable (as these instructions would get executed for each // iteration of the inner loop). - unsigned RepeatedInstrCost = 0; + InstructionCost RepeatedInstrCost = 0; for (auto *B : FI.OuterLoop->getBlocks()) { if (FI.InnerLoop->contains(B)) continue; @@ -310,7 +310,8 @@ if (match(&I, m_c_Mul(m_Specific(FI.OuterInductionPHI), m_Specific(FI.InnerLimit)))) continue; - int Cost = TTI->getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency); + InstructionCost Cost = + TTI->getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency); LLVM_DEBUG(dbgs() << "Cost " << Cost << ": "; I.dump()); RepeatedInstrCost += Cost; } @@ -320,7 +321,8 @@ << RepeatedInstrCost << "\n"); // Bail out if flattening the loops would cause instructions in the outer // loop but not in the inner loop to be executed extra times. - if (RepeatedInstrCost > RepeatedInstructionThreshold) { + if (!RepeatedInstrCost.isValid() || + RepeatedInstrCost > RepeatedInstructionThreshold) { LLVM_DEBUG(dbgs() << "checkOuterLoopInsts: not profitable, bailing.\n"); return false; } Index: llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -359,7 +359,7 @@ // The estimated cost of the unrolled form of the loop. We try to estimate // this by simplifying as much as we can while computing the estimate. - unsigned UnrolledCost = 0; + InstructionCost UnrolledCost = 0; // We also track the estimated dynamic (that is, actually executed) cost in // the rolled form. This helps identify cases when the savings from unrolling @@ -527,7 +527,12 @@ // Track this instruction's expected baseline cost when executing the // rolled loop form. - RolledDynamicCost += TTI.getUserCost(&I, CostKind); + InstructionCost InsnCost = TTI.getUserCost(&I, CostKind); + if (!InsnCost.isValid()) { + LLVM_DEBUG(dbgs() << " Encountered invalid baseline cost.\n"); + return None; + } + RolledDynamicCost += *InsnCost.getValue(); // Visit the instruction to analyze its loop cost after unrolling, // and if the visitor returns true, mark the instruction as free after @@ -558,7 +563,7 @@ AddCostRecursively(I, Iteration); // If unrolled body turns out to be too big, bail out. - if (UnrolledCost > MaxUnrolledLoopSize) { + if (!UnrolledCost.isValid() || UnrolledCost > MaxUnrolledLoopSize) { LLVM_DEBUG(dbgs() << " Exceeded threshold.. exiting.\n" << " UnrolledCost: " << UnrolledCost << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize @@ -640,7 +645,7 @@ LLVM_DEBUG(dbgs() << "Analysis finished:\n" << "UnrolledCost: " << UnrolledCost << ", " << "RolledDynamicCost: " << RolledDynamicCost << "\n"); - return {{UnrolledCost, RolledDynamicCost}}; + return {{unsigned(*UnrolledCost.getValue()), RolledDynamicCost}}; } /// ApproximateLoopSize - Approximate the size of the loop. Index: llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp =================================================================== --- llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -2712,7 +2712,7 @@ : TargetTransformInfo::TCK_SizeAndLatency; int LoopCost = 0; for (auto *BB : L.blocks()) { - int Cost = 0; + InstructionCost Cost = 0; for (auto &I : *BB) { if (EphValues.count(&I)) continue; @@ -2725,10 +2725,10 @@ Cost += TTI.getUserCost(&I, CostKind); } - assert(Cost >= 0 && "Must not have negative costs!"); - LoopCost += Cost; + assert(Cost.isValid() && "Must have valid costs!"); + LoopCost += *Cost.getValue(); assert(LoopCost >= 0 && "Must not have negative loop costs!"); - BBCostMap[BB] = Cost; + BBCostMap[BB] = *Cost.getValue(); } LLVM_DEBUG(dbgs() << " Total loop cost: " << LoopCost << "\n"); Index: llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp =================================================================== --- llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp +++ llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp @@ -447,7 +447,7 @@ // Now do a DFS across the operand graph of the users, computing cost as we // go and when all costs for a given PHI are known, checking that PHI for // profitability. - SmallDenseMap SpecCostMap; + SmallDenseMap SpecCostMap; visitPHIUsersAndDepsInPostOrder( PNs, /*IsVisited*/ @@ -462,7 +462,7 @@ [&](Instruction *I) { // We've fully visited the operands, so sum their cost with this node // and update the cost map. - int Cost = TTI.TCC_Free; + InstructionCost Cost = TTI.TCC_Free; for (Value *OpV : I->operand_values()) if (auto *OpI = dyn_cast(OpV)) { auto CostMapIt = SpecCostMap.find(OpI); @@ -494,7 +494,7 @@ // cost will be completely shared. SmallVector SpecWorklist; for (auto *PN : llvm::make_range(UserPNsSplitIt, UserPNs.end())) { - int SpecCost = TTI.TCC_Free; + InstructionCost SpecCost = TTI.TCC_Free; for (Use &U : PN->uses()) SpecCost += SpecCostMap.find(cast(U.getUser()))->second; @@ -503,7 +503,7 @@ // profitability. If profitable, we should mark it for speculation // and zero out the cost of everything it depends on. int CostSavings = CostSavingsMap.find(PN)->second; - if (SpecCost > CostSavings) { + if (!SpecCost.isValid() || SpecCost > CostSavings) { LLVM_DEBUG(dbgs() << " Not profitable, speculation cost: " << *PN << "\n" " Cost savings: " Index: llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp =================================================================== --- llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp +++ llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp @@ -210,8 +210,8 @@ return false; } -static unsigned ComputeSpeculationCost(const Instruction *I, - const TargetTransformInfo &TTI) { +static InstructionCost ComputeSpeculationCost(const Instruction *I, + const TargetTransformInfo &TTI) { switch (Operator::getOpcode(I)) { case Instruction::GetElementPtr: case Instruction::Add: @@ -255,7 +255,8 @@ return TTI.getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency); default: - return UINT_MAX; // Disallow anything not explicitly listed. + return InstructionCost::getInvalid(); // Disallow anything not explicitly + // listed. } } @@ -288,11 +289,11 @@ return true; }; - unsigned TotalSpeculationCost = 0; + InstructionCost TotalSpeculationCost = 0; unsigned NotHoistedInstCount = 0; for (const auto &I : FromBlock) { - const unsigned Cost = ComputeSpeculationCost(&I, *TTI); - if (Cost != UINT_MAX && isSafeToSpeculativelyExecute(&I) && + const InstructionCost Cost = ComputeSpeculationCost(&I, *TTI); + if (Cost.isValid() && isSafeToSpeculativelyExecute(&I) && AllPrecedingUsesFromBlockHoisted(&I)) { TotalSpeculationCost += Cost; if (TotalSpeculationCost > SpecExecMaxSpeculationCost) Index: llvm/lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -360,7 +360,10 @@ const TargetTransformInfo &TTI) { assert(isSafeToSpeculativelyExecute(I) && "Instruction is not safe to speculatively execute!"); - return TTI.getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency); + InstructionCost Cost = + TTI.getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency); + assert(Cost.isValid() && "Unexpected invalid user cost"); + return *Cost.getValue(); } /// If we have a merge point of an "if condition" as accepted above, @@ -3192,7 +3195,10 @@ return false; // Not in white-list - not worthwhile folding. // And finally, if this is a non-free instruction that we are okay // speculating, ensure that we consider the speculation budget. - BudgetRemaining -= TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency); + InstructionCost UserCost = + TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency); + assert(UserCost.isValid() && "Unexpected invalid user cost"); + BudgetRemaining -= *UserCost.getValue(); if (BudgetRemaining < 0) return false; // Eagerly refuse to fold as soon as we're out of budget. }