Index: lib/Transforms/Scalar/SimpleLoopUnswitch.cpp =================================================================== --- lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -71,7 +71,13 @@ static cl::opt UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden, cl::desc("The cost threshold for unswitching a loop.")); - +static cl::opt UnswitchSiblingsToplevelDiv( + "unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden, + cl::desc("toplevel siblings divisor for cost multiplier")); +static cl::opt UnswitchCandidatesBottomCap( + "unswitch-candidates-bottom-cap", cl::init(8), cl::Hidden, + cl::desc("amount of unswitch candidates that are ignored when calculating " + "cost multiplier")); static cl::opt UnswitchGuards( "simple-loop-unswitch-guards", cl::init(true), cl::Hidden, cl::desc("If enabled, simple loop unswitching will also consider " @@ -2473,8 +2479,47 @@ int CandidateCost = ComputeUnswitchedCost( TI, /*FullUnswitch*/ !BI || (Invariants.size() == 1 && Invariants[0] == BI->getCondition())); + // Now we try to limit potentially exponential behavior of loop-unswitch + // by multiplying the cost in proportion of 2^number of unswitch candidates + // available. Also accounting for the number of "sibling" loops with the + // idea to account for previous unswitches that already happened on this + // cluster of loops. Formula is kept intentionally simple since it a cap for + // worst cases and not an attempt to make exact size predictions. + int CostMultiplier = 1; + if (!isGuard(&TI)) { + auto *ParentL = L.getParentLoop(); + int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size() + : std::distance(LI.begin(), LI.end())); + // Applying a "bottom cap" to allow a certain amount of unswitch + // candidates before going into prohibitive power-of-two. + unsigned CandidatesPower = std::max( + int(UnswitchCandidates.size() - (int)UnswitchCandidatesBottomCap), 0); + + // Allowing top-level loops to spread a bit more than nested ones. + int SiblingsMultiplier = + std::max((ParentL ? SiblingsCount + : SiblingsCount / (int)UnswitchSiblingsToplevelDiv), + 1); + // Handle possible overflow. UnswitchThreshold is a good upper boundary + // since multiplying by it will definitely move cost over the threshold. + if (CandidatesPower > Log2_32(UnswitchThreshold) || + SiblingsMultiplier > UnswitchThreshold) + CostMultiplier = UnswitchThreshold; + else + CostMultiplier = std::min(SiblingsMultiplier * (1 << CandidatesPower), + (int)UnswitchThreshold); + + LLVM_DEBUG(dbgs() << " Computed multiplier " << CostMultiplier + << "(siblings " << SiblingsMultiplier + << " * candidates " << (1 << CandidatesPower) << ")" + << " for unswitch candidate: " << TI << "\n"); + } + + CandidateCost *= std::max(CostMultiplier, 1); LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost + << "(multiplier: " << CostMultiplier << ")" << " for unswitch candidate: " << TI << "\n"); + if (!BestUnswitchTI || CandidateCost < BestUnswitchCost) { BestUnswitchTI = &TI; BestUnswitchCost = CandidateCost;