Index: include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- include/llvm/Analysis/TargetTransformInfo.h +++ include/llvm/Analysis/TargetTransformInfo.h @@ -273,6 +273,11 @@ /// applies even if full unrolling is selected. This allows a target to fall /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount. unsigned FullUnrollMaxCount; + // Represents number of instructions optimized when "back edge" + // becomes "fall through" in unrolled loop. + // For now we count a conditional branch on a backedge and a comparison + // feeding it. + unsigned BEInsns; /// Allow partial unrolling (unrolling of loops to expand the size of the /// loop body, not only to eliminate small constant-trip-count loops). bool Partial; Index: lib/Transforms/Scalar/LoopUnrollPass.cpp =================================================================== --- lib/Transforms/Scalar/LoopUnrollPass.cpp +++ lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -124,6 +124,7 @@ UP.Count = 0; UP.MaxCount = UINT_MAX; UP.FullUnrollMaxCount = UINT_MAX; + UP.BEInsns = 2; UP.Partial = false; UP.Runtime = false; UP.AllowRemainder = true; @@ -531,7 +532,7 @@ static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI, - AssumptionCache *AC) { + AssumptionCache *AC, unsigned BEInsns) { SmallPtrSet EphValues; CodeMetrics::collectEphemeralValues(L, AC, EphValues); @@ -550,7 +551,7 @@ // that each loop has at least three instructions (likely a conditional // branch, a comparison feeding that branch, and some kind of loop increment // feeding that comparison instruction). - LoopSize = std::max(LoopSize, 3u); + LoopSize = std::max(LoopSize, BEInsns + 1); return LoopSize; } @@ -689,6 +690,14 @@ return false; } +// Returns loop size estimation for unrolled loop. +static uint64_t UnrolledLoopSize( + unsigned LoopSize, + TargetTransformInfo::UnrollingPreferences &UP) { + assert(LoopSize >= UP.BEInsns && "LoopSize should not be less than BEInsns!"); + return (uint64_t)(LoopSize - UP.BEInsns) * UP.Count + UP.BEInsns; +} + // Returns true if unroll count was set explicitly. // Calculates unroll count and writes it to UP.Count. static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, @@ -698,11 +707,6 @@ unsigned TripCount, unsigned TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP) { - // BEInsns represents number of instructions optimized when "back edge" - // becomes "fall through" in unrolled loop. - // For now we count a conditional branch on a backedge and a comparison - // feeding it. - unsigned BEInsns = 2; // Check for explicit Count. // 1st priority is unroll count set by "unroll-count" option. bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0; @@ -710,8 +714,7 @@ UP.Count = UnrollCount; UP.AllowExpensiveTripCount = true; UP.Force = true; - if (UP.AllowRemainder && - (LoopSize - BEInsns) * UP.Count + BEInsns < UP.Threshold) + if (UP.AllowRemainder && UnrolledLoopSize(LoopSize, UP) < UP.Threshold) return true; } @@ -723,13 +726,13 @@ UP.AllowExpensiveTripCount = true; UP.Force = true; if (UP.AllowRemainder && - (LoopSize - BEInsns) * UP.Count + BEInsns < PragmaUnrollThreshold) + UnrolledLoopSize(LoopSize, UP) < PragmaUnrollThreshold) return true; } bool PragmaFullUnroll = HasUnrollFullPragma(L); if (PragmaFullUnroll && TripCount != 0) { UP.Count = TripCount; - if ((LoopSize - BEInsns) * UP.Count + BEInsns < PragmaUnrollThreshold) + if (UnrolledLoopSize(LoopSize, UP) < PragmaUnrollThreshold) return false; } @@ -754,10 +757,10 @@ if (TripCount && TripCount <= UP.FullUnrollMaxCount) { // When computing the unrolled size, note that BEInsns are not replicated // like the rest of the loop body. - UnrolledSize = (uint64_t)(LoopSize - BEInsns) * TripCount + BEInsns; + UP.Count = TripCount; if (canUnrollCompletely(L, UP.Threshold, 100, UP.DynamicCostSavingsDiscount, - UnrolledSize, UnrolledSize)) { - UP.Count = TripCount; + UnrolledLoopSize(LoopSize, UP), + UnrolledLoopSize(LoopSize, UP))) { return ExplicitUnroll; } else { // The loop isn't that small, but we still can fully unroll it if that @@ -770,7 +773,6 @@ UP.PercentDynamicCostSavedThreshold, UP.DynamicCostSavingsDiscount, Cost->UnrolledCost, Cost->RolledDynamicCost)) { - UP.Count = TripCount; return ExplicitUnroll; } } @@ -790,10 +792,10 @@ } if (UP.PartialThreshold != NoThreshold) { // Reduce unroll count to be modulo of TripCount for partial unrolling. - UnrolledSize = (uint64_t)(LoopSize - BEInsns) * UP.Count + BEInsns; - if (UnrolledSize > UP.PartialThreshold) - UP.Count = (std::max(UP.PartialThreshold, 3u) - BEInsns) / - (LoopSize - BEInsns); + if (UnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold) + UP.Count = + (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) / + (LoopSize - UP.BEInsns); if (UP.Count > UP.MaxCount) UP.Count = UP.MaxCount; while (UP.Count != 0 && TripCount % UP.Count != 0) @@ -804,11 +806,9 @@ // As we'll create fixup loop, do the type of unrolling only if // remainder loop is allowed. UP.Count = DefaultUnrollRuntimeCount; - UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns; - while (UP.Count != 0 && UnrolledSize > UP.PartialThreshold) { + while (UP.Count != 0 && + UnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold) UP.Count >>= 1; - UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns; - } } if (UP.Count < 2) { if (PragmaEnableUnroll) @@ -853,14 +853,10 @@ } if (UP.Count == 0) UP.Count = DefaultUnrollRuntimeCount; - UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns; - // Reduce unroll count to be the largest power-of-two factor of // the original count which satisfies the threshold limit. - while (UP.Count != 0 && UnrolledSize > UP.PartialThreshold) { + while (UP.Count != 0 && UnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold) UP.Count >>= 1; - UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns; - } #ifndef NDEBUG unsigned OrigCount = UP.Count; @@ -912,8 +908,11 @@ unsigned NumInlineCandidates; bool NotDuplicatable; bool Convergent; + TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences( + L, TTI, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial, + ProvidedRuntime); unsigned LoopSize = ApproximateLoopSize( - L, NumInlineCandidates, NotDuplicatable, Convergent, TTI, &AC); + L, NumInlineCandidates, NotDuplicatable, Convergent, TTI, &AC, UP.BEInsns); DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n"); if (NotDuplicatable) { DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable" @@ -944,10 +943,6 @@ TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock); } - TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences( - L, TTI, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial, - ProvidedRuntime); - // Exit early if unrolling is disabled. if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0)) return false;