Index: llvm/include/llvm/Analysis/CodeMetrics.h =================================================================== --- llvm/include/llvm/Analysis/CodeMetrics.h +++ llvm/include/llvm/Analysis/CodeMetrics.h @@ -15,6 +15,7 @@ #define LLVM_ANALYSIS_CODEMETRICS_H #include "llvm/ADT/DenseMap.h" +#include "llvm/Analysis/InstructionCost.h" namespace llvm { class AssumptionCache; @@ -48,13 +49,13 @@ bool usesDynamicAlloca = false; /// Number of instructions in the analyzed blocks. - unsigned NumInsts = false; + InstructionCost NumInsts = 0; /// Number of analyzed blocks. unsigned NumBlocks = false; /// Keeps track of basic block code size estimates. - DenseMap NumBBInsts; + DenseMap NumBBInsts; /// Keep track of the number of calls to 'big' functions. unsigned NumCalls = false; Index: llvm/include/llvm/Analysis/InstructionCost.h =================================================================== --- llvm/include/llvm/Analysis/InstructionCost.h +++ llvm/include/llvm/Analysis/InstructionCost.h @@ -209,6 +209,16 @@ void print(raw_ostream &OS) const; }; +inline bool operator==(const InstructionCost::CostType LHS, + const InstructionCost &RHS) { + return RHS == LHS; +} + +inline bool operator!=(const InstructionCost::CostType LHS, + const InstructionCost &RHS) { + return RHS != LHS; +} + inline InstructionCost operator+(const InstructionCost &LHS, const InstructionCost &RHS) { InstructionCost LHS2(LHS); Index: llvm/include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfo.h +++ llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -254,8 +254,6 @@ default: llvm_unreachable("Unknown instruction cost kind"); } - if (Cost == -1) - Cost.setInvalid(); return Cost; } @@ -333,12 +331,12 @@ /// /// The returned cost is defined in terms of \c TargetCostConstants, see its /// comments for a detailed explanation of the cost values. - int getUserCost(const User *U, ArrayRef Operands, - TargetCostKind CostKind) const; + InstructionCost getUserCost(const User *U, ArrayRef Operands, + TargetCostKind CostKind) const; /// This is a helper function which calls the two-argument getUserCost /// with \p Operands which are the current operands U has. - int getUserCost(const User *U, TargetCostKind CostKind) const { + InstructionCost getUserCost(const User *U, TargetCostKind CostKind) const { SmallVector Operands(U->value_op_begin(), U->value_op_end()); return getUserCost(U, Operands, CostKind); @@ -1356,11 +1354,11 @@ private: /// Estimate the latency of specified instruction. /// Returns 1 as the default value. - int getInstructionLatency(const Instruction *I) const; + InstructionCost getInstructionLatency(const Instruction *I) const; /// Returns the expected throughput cost of the instruction. /// Returns -1 if the cost is unknown. - int getInstructionThroughput(const Instruction *I) const; + InstructionCost getInstructionThroughput(const Instruction *I) const; /// The abstract base class used to type erase specific TTI /// implementations. @@ -1387,8 +1385,9 @@ getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) = 0; - virtual int getUserCost(const User *U, ArrayRef Operands, - TargetCostKind CostKind) = 0; + virtual InstructionCost getUserCost(const User *U, + ArrayRef Operands, + TargetCostKind CostKind) = 0; virtual bool hasBranchDivergence() = 0; virtual bool useGPUDivergenceAnalysis() = 0; virtual bool isSourceOfDivergence(const Value *V) = 0; @@ -1636,7 +1635,7 @@ virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0; virtual unsigned getGISelRematGlobalCost() const = 0; virtual bool hasActiveVectorLength() const = 0; - virtual int getInstructionLatency(const Instruction *I) = 0; + virtual InstructionCost getInstructionLatency(const Instruction *I) = 0; }; template @@ -1665,9 +1664,12 @@ int getMemcpyCost(const Instruction *I) override { return Impl.getMemcpyCost(I); } - int getUserCost(const User *U, ArrayRef Operands, - TargetCostKind CostKind) override { - return Impl.getUserCost(U, Operands, CostKind); + InstructionCost getUserCost(const User *U, ArrayRef Operands, + TargetCostKind CostKind) override { + InstructionCost Cost = Impl.getUserCost(U, Operands, CostKind); + if (Cost == -1) + Cost.setInvalid(); + return Cost; } bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); } bool useGPUDivergenceAnalysis() override { @@ -2165,7 +2167,7 @@ return Impl.hasActiveVectorLength(); } - int getInstructionLatency(const Instruction *I) override { + InstructionCost getInstructionLatency(const Instruction *I) override { return Impl.getInstructionLatency(I); } }; Index: llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h =================================================================== --- llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h +++ llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h @@ -33,7 +33,7 @@ bool LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI, AssumptionCache *AC, DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, const SimplifyQuery &SQ, - bool RotationOnly, unsigned Threshold, bool IsUtilMode); + bool RotationOnly, int Threshold, bool IsUtilMode); } // namespace llvm Index: llvm/lib/Analysis/CodeMetrics.cpp =================================================================== --- llvm/lib/Analysis/CodeMetrics.cpp +++ llvm/lib/Analysis/CodeMetrics.cpp @@ -116,7 +116,7 @@ const TargetTransformInfo &TTI, const SmallPtrSetImpl &EphValues) { ++NumBlocks; - unsigned NumInstsBeforeThisBB = NumInsts; + InstructionCost NumInstsBeforeThisBB = NumInsts; for (const Instruction &I : *BB) { // Skip ephemeral values. if (EphValues.count(&I)) @@ -176,6 +176,8 @@ if (isa(BB->getTerminator())) ++NumRets; + notDuplicatable |= !NumInsts.isValid(); + // We never want to inline functions that contain an indirectbr. This is // incorrect because all the blockaddress's (in static global initializers // for example) would be referring to the original function, and this indirect Index: llvm/lib/Analysis/TargetTransformInfo.cpp =================================================================== --- llvm/lib/Analysis/TargetTransformInfo.cpp +++ llvm/lib/Analysis/TargetTransformInfo.cpp @@ -265,12 +265,13 @@ return TTIImpl->getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI); } -int TargetTransformInfo::getUserCost(const User *U, - ArrayRef Operands, - enum TargetCostKind CostKind) const { - int Cost = TTIImpl->getUserCost(U, Operands, CostKind); - assert((CostKind == TTI::TCK_RecipThroughput || Cost >= 0) && - "TTI should not produce negative costs!"); +InstructionCost +TargetTransformInfo::getUserCost(const User *U, + ArrayRef Operands, + enum TargetCostKind CostKind) const { + InstructionCost Cost = TTIImpl->getUserCost(U, Operands, CostKind); + assert((CostKind == TTI::TCK_RecipThroughput || Cost.isValid()) && + "TTI should not produce invalid costs!"); return Cost; } @@ -1053,7 +1054,8 @@ return TTIImpl->getGISelRematGlobalCost(); } -int TargetTransformInfo::getInstructionLatency(const Instruction *I) const { +InstructionCost +TargetTransformInfo::getInstructionLatency(const Instruction *I) const { return TTIImpl->getInstructionLatency(I); } @@ -1342,7 +1344,8 @@ return matchPairwiseReduction(Root, Opcode, Ty); } -int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { +InstructionCost +TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; switch (I->getOpcode()) { @@ -1395,7 +1398,7 @@ return getUserCost(I, CostKind); default: // We don't have any information on this instruction. - return -1; + return InstructionCost::getInvalid(); } } Index: llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -700,7 +700,8 @@ for (BasicBlock *BB : L->blocks()) Metrics.analyzeBasicBlock(BB, *this, EphValues); // 6 is an approximate latency for the mtctr instruction. - if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth())) + if (!Metrics.NumInsts.isValid() || + Metrics.NumInsts <= (6 * SchedModel.getIssueWidth())) return false; } Index: llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp +++ llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp @@ -297,7 +297,10 @@ } Metrics.analyzeBasicBlock(BB, *TTI, EphValues); } - unsigned LoopSize = Metrics.NumInsts; + if (!Metrics.NumInsts.isValid()) + return MadeChange; + + unsigned LoopSize = *(Metrics.NumInsts.getValue()); if (!LoopSize) LoopSize = 1; Index: llvm/lib/Transforms/Scalar/LoopFlatten.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopFlatten.cpp +++ llvm/lib/Transforms/Scalar/LoopFlatten.cpp @@ -276,7 +276,7 @@ // a significant amount of code here which can't be optimised out that it's // not profitable (as these instructions would get executed for each // iteration of the inner loop). - unsigned RepeatedInstrCost = 0; + InstructionCost RepeatedInstrCost = 0; for (auto *B : FI.OuterLoop->getBlocks()) { if (FI.InnerLoop->contains(B)) continue; @@ -306,7 +306,8 @@ if (match(&I, m_c_Mul(m_Specific(FI.OuterInductionPHI), m_Specific(FI.InnerLimit)))) continue; - int Cost = TTI->getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency); + InstructionCost Cost = + TTI->getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency); LLVM_DEBUG(dbgs() << "Cost " << Cost << ": "; I.dump()); RepeatedInstrCost += Cost; } Index: llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -359,14 +359,14 @@ // The estimated cost of the unrolled form of the loop. We try to estimate // this by simplifying as much as we can while computing the estimate. - unsigned UnrolledCost = 0; + InstructionCost UnrolledCost = 0; // We also track the estimated dynamic (that is, actually executed) cost in // the rolled form. This helps identify cases when the savings from unrolling // aren't just exposing dead control flows, but actual reduced dynamic // instructions due to the simplifications which we expect to occur after // unrolling. - unsigned RolledDynamicCost = 0; + InstructionCost RolledDynamicCost = 0; // We track the simplification of each instruction in each iteration. We use // this to recursively merge costs into the unrolled cost on-demand so that @@ -527,7 +527,12 @@ // Track this instruction's expected baseline cost when executing the // rolled loop form. - RolledDynamicCost += TTI.getUserCost(&I, CostKind); + InstructionCost InsnCost = TTI.getUserCost(&I, CostKind); + if (!InsnCost.isValid()) { + LLVM_DEBUG(dbgs() << " Encountered invalid baseline cost.\n"); + return None; + } + RolledDynamicCost += InsnCost; // Visit the instruction to analyze its loop cost after unrolling, // and if the visitor returns true, mark the instruction as free after @@ -640,7 +645,8 @@ LLVM_DEBUG(dbgs() << "Analysis finished:\n" << "UnrolledCost: " << UnrolledCost << ", " << "RolledDynamicCost: " << RolledDynamicCost << "\n"); - return {{UnrolledCost, RolledDynamicCost}}; + return {{unsigned(*(UnrolledCost.getValue())), + unsigned(*(RolledDynamicCost.getValue()))}}; } /// ApproximateLoopSize - Approximate the size of the loop. @@ -655,7 +661,14 @@ NotDuplicatable = Metrics.notDuplicatable; Convergent = Metrics.convergent; - unsigned LoopSize = Metrics.NumInsts; + if (!Metrics.NumInsts.isValid()) { + NotDuplicatable = true; + // Similar to the code below, we shouldn't allow an estimate of zero even + // though we're marking the loop as not duplicatable. + return BEInsns + 1; + } + + unsigned LoopSize = *(Metrics.NumInsts.getValue()); // Don't allow an estimate of size zero. This would allows unrolling of loops // with huge iteration counts, which is a compile time problem even if it's Index: llvm/lib/Transforms/Scalar/LoopUnswitch.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopUnswitch.cpp +++ llvm/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -303,7 +303,7 @@ ++I) Metrics.analyzeBasicBlock(*I, TTI, EphValues); - Props.SizeEstimation = Metrics.NumInsts; + Props.SizeEstimation = *(Metrics.NumInsts.getValue()); Props.CanBeUnswitchedCount = MaxSize / (Props.SizeEstimation); Props.WasUnswitchedCount = 0; MaxSize -= Props.SizeEstimation * Props.CanBeUnswitchedCount; Index: llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp =================================================================== --- llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -2712,7 +2712,7 @@ : TargetTransformInfo::TCK_SizeAndLatency; int LoopCost = 0; for (auto *BB : L.blocks()) { - int Cost = 0; + InstructionCost Cost = 0; for (auto &I : *BB) { if (EphValues.count(&I)) continue; @@ -2725,10 +2725,10 @@ Cost += TTI.getUserCost(&I, CostKind); } - assert(Cost >= 0 && "Must not have negative costs!"); - LoopCost += Cost; + assert(Cost.isValid() && "Must have valid costs!"); + LoopCost += *(Cost.getValue()); assert(LoopCost >= 0 && "Must not have negative loop costs!"); - BBCostMap[BB] = Cost; + BBCostMap[BB] = *(Cost.getValue()); } LLVM_DEBUG(dbgs() << " Total loop cost: " << LoopCost << "\n"); Index: llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp =================================================================== --- llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp +++ llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp @@ -447,7 +447,7 @@ // Now do a DFS across the operand graph of the users, computing cost as we // go and when all costs for a given PHI are known, checking that PHI for // profitability. - SmallDenseMap SpecCostMap; + SmallDenseMap SpecCostMap; visitPHIUsersAndDepsInPostOrder( PNs, /*IsVisited*/ @@ -462,7 +462,7 @@ [&](Instruction *I) { // We've fully visited the operands, so sum their cost with this node // and update the cost map. - int Cost = TTI.TCC_Free; + InstructionCost Cost = TTI.TCC_Free; for (Value *OpV : I->operand_values()) if (auto *OpI = dyn_cast(OpV)) { auto CostMapIt = SpecCostMap.find(OpI); @@ -494,7 +494,7 @@ // cost will be completely shared. SmallVector SpecWorklist; for (auto *PN : llvm::make_range(UserPNsSplitIt, UserPNs.end())) { - int SpecCost = TTI.TCC_Free; + InstructionCost SpecCost = TTI.TCC_Free; for (Use &U : PN->uses()) SpecCost += SpecCostMap.find(cast(U.getUser()))->second; Index: llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp =================================================================== --- llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp +++ llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp @@ -210,8 +210,8 @@ return false; } -static unsigned ComputeSpeculationCost(const Instruction *I, - const TargetTransformInfo &TTI) { +static InstructionCost ComputeSpeculationCost(const Instruction *I, + const TargetTransformInfo &TTI) { switch (Operator::getOpcode(I)) { case Instruction::GetElementPtr: case Instruction::Add: @@ -255,7 +255,8 @@ return TTI.getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency); default: - return UINT_MAX; // Disallow anything not explicitly listed. + return InstructionCost::getInvalid(); // Disallow anything not explicitly + // listed. } } @@ -288,11 +289,11 @@ return true; }; - unsigned TotalSpeculationCost = 0; + InstructionCost TotalSpeculationCost = 0; unsigned NotHoistedInstCount = 0; for (const auto &I : FromBlock) { - const unsigned Cost = ComputeSpeculationCost(&I, *TTI); - if (Cost != UINT_MAX && isSafeToSpeculativelyExecute(&I) && + const InstructionCost Cost = ComputeSpeculationCost(&I, *TTI); + if (Cost.isValid() && isSafeToSpeculativelyExecute(&I) && AllPrecedingUsesFromBlockHoisted(&I)) { TotalSpeculationCost += Cost; if (TotalSpeculationCost > SpecExecMaxSpeculationCost) Index: llvm/lib/Transforms/Utils/LoopRotationUtils.cpp =================================================================== --- llvm/lib/Transforms/Utils/LoopRotationUtils.cpp +++ llvm/lib/Transforms/Utils/LoopRotationUtils.cpp @@ -56,7 +56,7 @@ namespace { /// A simple loop rotation transformation. class LoopRotate { - const unsigned MaxHeaderSize; + const int MaxHeaderSize; LoopInfo *LI; const TargetTransformInfo *TTI; AssumptionCache *AC; @@ -68,10 +68,10 @@ bool IsUtilMode; public: - LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI, - const TargetTransformInfo *TTI, AssumptionCache *AC, - DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, - const SimplifyQuery &SQ, bool RotationOnly, bool IsUtilMode) + LoopRotate(int MaxHeaderSize, LoopInfo *LI, const TargetTransformInfo *TTI, + AssumptionCache *AC, DominatorTree *DT, ScalarEvolution *SE, + MemorySSAUpdater *MSSAU, const SimplifyQuery &SQ, + bool RotationOnly, bool IsUtilMode) : MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE), MSSAU(MSSAU), SQ(SQ), RotationOnly(RotationOnly), IsUtilMode(IsUtilMode) {} @@ -744,8 +744,7 @@ AssumptionCache *AC, DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, const SimplifyQuery &SQ, bool RotationOnly = true, - unsigned Threshold = unsigned(-1), - bool IsUtilMode = true) { + int Threshold = INT_MAX, bool IsUtilMode = true) { LoopRotate LR(Threshold, LI, TTI, AC, DT, SE, MSSAU, SQ, RotationOnly, IsUtilMode); return LR.processLoop(L); Index: llvm/lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -352,8 +352,8 @@ /// which is assumed to be safe to speculate. TCC_Free means cheap, /// TCC_Basic means less cheap, and TCC_Expensive means prohibitively /// expensive. -static unsigned ComputeSpeculationCost(const User *I, - const TargetTransformInfo &TTI) { +static InstructionCost ComputeSpeculationCost(const User *I, + const TargetTransformInfo &TTI) { assert(isSafeToSpeculativelyExecute(I) && "Instruction is not safe to speculatively execute!"); return TTI.getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency); @@ -421,7 +421,13 @@ if (!isSafeToSpeculativelyExecute(I)) return false; - BudgetRemaining -= ComputeSpeculationCost(I, TTI); + InstructionCost SpecCost = ComputeSpeculationCost(I, TTI); + // If we cannot determine the cost then we cannot determine if we have + // exceeded the budget or not, so let's be conservative here. + if (!SpecCost.isValid()) + BudgetRemaining = -1; + else + BudgetRemaining -= *(SpecCost.getValue()); // Allow exactly one instruction to be speculated regardless of its cost // (as long as it is safe to do so). @@ -2044,8 +2050,8 @@ if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) || (OrigCE && !isSafeToSpeculativelyExecute(OrigCE))) return false; - unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, TTI) : 0; - unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, TTI) : 0; + InstructionCost OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, TTI) : 0; + InstructionCost ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, TTI) : 0; unsigned MaxCost = 2 * PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; if (OrigCost + ThenCost > MaxCost) @@ -3182,7 +3188,12 @@ return false; // Not in white-list - not worthwhile folding. // And finally, if this is a non-free instruction that we are okay // speculating, ensure that we consider the speculation budget. - BudgetRemaining -= TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency); + InstructionCost UserCost = + TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency); + if (UserCost.isValid()) + BudgetRemaining -= *(UserCost.getValue()); + else + BudgetRemaining = -1; if (BudgetRemaining < 0) return false; // Eagerly refuse to fold as soon as we're out of budget. } Index: llvm/unittests/Transforms/Utils/LoopRotationUtilsTest.cpp =================================================================== --- llvm/unittests/Transforms/Utils/LoopRotationUtilsTest.cpp +++ llvm/unittests/Transforms/Utils/LoopRotationUtilsTest.cpp @@ -85,10 +85,8 @@ Loop *L = *LI.begin(); - bool ret = LoopRotation(L, &LI, &TTI, - &AC, &DT, - &SE, nullptr, - SQ, true, -1, false); + bool ret = LoopRotation(L, &LI, &TTI, &AC, &DT, &SE, nullptr, SQ, true, + INT_MAX, false); EXPECT_TRUE(ret); } @@ -156,10 +154,8 @@ Loop *L = *LI.begin(); - bool ret = LoopRotation(L, &LI, &TTI, - &AC, &DT, - &SE, nullptr, - SQ, true, -1, false); + bool ret = LoopRotation(L, &LI, &TTI, &AC, &DT, &SE, nullptr, SQ, true, + INT_MAX, false); /// LoopRotation should properly report "true" as we still perform the first rotation /// so we do change the IR. EXPECT_TRUE(ret);