diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -2399,10 +2399,10 @@ /// The recursive computation is memozied into the provided DT-indexed cost map /// to allow querying it for most nodes in the domtree without it becoming /// quadratic. -static int -computeDomSubtreeCost(DomTreeNode &N, - const SmallDenseMap &BBCostMap, - SmallDenseMap &DTCostMap) { +static InstructionCost computeDomSubtreeCost( + DomTreeNode &N, + const SmallDenseMap &BBCostMap, + SmallDenseMap &DTCostMap) { // Don't accumulate cost (or recurse through) blocks not in our block cost // map and thus not part of the duplication cost being considered. auto BBCostIt = BBCostMap.find(N.getBlock()); @@ -2416,8 +2416,9 @@ // If not, we have to compute it. We can't use insert above and update // because computing the cost may insert more things into the map. - int Cost = std::accumulate( - N.begin(), N.end(), BBCostIt->second, [&](int Sum, DomTreeNode *ChildN) { + InstructionCost Cost = std::accumulate( + N.begin(), N.end(), BBCostIt->second, + [&](InstructionCost Sum, DomTreeNode *ChildN) -> InstructionCost { return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap); }); bool Inserted = DTCostMap.insert({&N, Cost}).second; @@ -2699,7 +2700,7 @@ // subsets of the loop for duplication during unswitching. SmallPtrSet EphValues; CodeMetrics::collectEphemeralValues(&L, &AC, EphValues); - SmallDenseMap BBCostMap; + SmallDenseMap BBCostMap; // Compute the cost of each block, as well as the total loop cost. Also, bail // out if we see instructions which are incompatible with loop unswitching @@ -2710,9 +2711,9 @@ L.getHeader()->getParent()->hasMinSize() ? TargetTransformInfo::TCK_CodeSize : TargetTransformInfo::TCK_SizeAndLatency; - int LoopCost = 0; + InstructionCost LoopCost = 0; for (auto *BB : L.blocks()) { - int Cost = 0; + InstructionCost Cost = 0; for (auto &I : *BB) { if (EphValues.count(&I)) continue; @@ -2746,14 +2747,15 @@ // This requires memoizing each dominator subtree to avoid redundant work. // // FIXME: Need to actually do the number of candidates part above. - SmallDenseMap DTCostMap; + SmallDenseMap DTCostMap; // Given a terminator which might be unswitched, computes the non-duplicated // cost for that terminator. - auto ComputeUnswitchedCost = [&](Instruction &TI, bool FullUnswitch) { + auto ComputeUnswitchedCost = [&](Instruction &TI, + bool FullUnswitch) -> InstructionCost { BasicBlock &BB = *TI.getParent(); SmallPtrSet Visited; - int Cost = LoopCost; + InstructionCost Cost = 0; for (BasicBlock *SuccBB : successors(&BB)) { // Don't count successors more than once. if (!Visited.insert(SuccBB).second) @@ -2787,8 +2789,8 @@ llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) { return PredBB == &BB || DT.dominates(SuccBB, PredBB); })) { - Cost -= computeDomSubtreeCost(*DT[SuccBB], BBCostMap, DTCostMap); - assert(Cost >= 0 && + Cost += computeDomSubtreeCost(*DT[SuccBB], BBCostMap, DTCostMap); + assert(Cost <= LoopCost && "Non-duplicated cost should never exceed total loop cost!"); } } @@ -2801,16 +2803,16 @@ int SuccessorsCount = isGuard(&TI) ? 2 : Visited.size(); assert(SuccessorsCount > 1 && "Cannot unswitch a condition without multiple distinct successors!"); - return Cost * (SuccessorsCount - 1); + return (LoopCost - Cost) * (SuccessorsCount - 1); }; Instruction *BestUnswitchTI = nullptr; - int BestUnswitchCost = 0; + InstructionCost BestUnswitchCost = 0; ArrayRef BestUnswitchInvariants; for (auto &TerminatorAndInvariants : UnswitchCandidates) { Instruction &TI = *TerminatorAndInvariants.first; ArrayRef Invariants = TerminatorAndInvariants.second; BranchInst *BI = dyn_cast(&TI); - int CandidateCost = ComputeUnswitchedCost( + InstructionCost CandidateCost = ComputeUnswitchedCost( TI, /*FullUnswitch*/ !BI || (Invariants.size() == 1 && Invariants[0] == BI->getCondition())); // Calculate cost multiplier which is a tool to limit potentially