diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -275,6 +275,11 @@ /// applies even if full unrolling is selected. This allows a target to fall /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount. unsigned FullUnrollMaxCount; + // Represents number of instructions optimized when "back edge" + // becomes "fall through" in unrolled loop. + // For now we count a conditional branch on a backedge and a comparison + // feeding it. + unsigned BEInsns; /// Allow partial unrolling (unrolling of loops to expand the size of the /// loop body, not only to eliminate small constant-trip-count loops). bool Partial; diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -285,6 +285,10 @@ // Avoid unrolling when optimizing for size. UP.OptSizeThreshold = 0; UP.PartialOptSizeThreshold = 0; + + // Set number of instructions optimized when "back edge" + // becomes "fall through" to default value of 2. + UP.BEInsns = 2; } /// @} diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -126,6 +126,7 @@ UP.DefaultUnrollRuntimeCount = 8; UP.MaxCount = UINT_MAX; UP.FullUnrollMaxCount = UINT_MAX; + UP.BEInsns = 2; UP.Partial = false; UP.Runtime = false; UP.AllowRemainder = true; @@ -541,7 +542,7 @@ static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI, - AssumptionCache *AC) { + AssumptionCache *AC, unsigned BEInsns) { SmallPtrSet EphValues; CodeMetrics::collectEphemeralValues(L, AC, EphValues); @@ -560,7 +561,7 @@ // that each loop has at least three instructions (likely a conditional // branch, a comparison feeding that branch, and some kind of loop increment // feeding that comparison instruction). - LoopSize = std::max(LoopSize, 3u); + LoopSize = std::max(LoopSize, BEInsns + 1); return LoopSize; } @@ -699,6 +700,14 @@ return false; } +// Returns loop size estimation for unrolled loop. +static uint64_t getUnrolledLoopSize( + unsigned LoopSize, + TargetTransformInfo::UnrollingPreferences &UP) { + assert(LoopSize >= UP.BEInsns && "LoopSize should not be less than BEInsns!"); + return (uint64_t)(LoopSize - UP.BEInsns) * UP.Count + UP.BEInsns; +} + // Returns true if unroll count was set explicitly. // Calculates unroll count and writes it to UP.Count. static bool computeUnrollCount( @@ -706,11 +715,6 @@ ScalarEvolution *SE, OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount, unsigned &TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound) { - // BEInsns represents number of instructions optimized when "back edge" - // becomes "fall through" in unrolled loop. - // For now we count a conditional branch on a backedge and a comparison - // feeding it. - unsigned BEInsns = 2; // Check for explicit Count. // 1st priority is unroll count set by "unroll-count" option. bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0; @@ -718,8 +722,7 @@ UP.Count = UnrollCount; UP.AllowExpensiveTripCount = true; UP.Force = true; - if (UP.AllowRemainder && - (LoopSize - BEInsns) * UP.Count + BEInsns < UP.Threshold) + if (UP.AllowRemainder && getUnrolledLoopSize(LoopSize, UP) < UP.Threshold) return true; } @@ -731,13 +734,13 @@ UP.AllowExpensiveTripCount = true; UP.Force = true; if (UP.AllowRemainder && - (LoopSize - BEInsns) * UP.Count + BEInsns < PragmaUnrollThreshold) + getUnrolledLoopSize(LoopSize, UP) < PragmaUnrollThreshold) return true; } bool PragmaFullUnroll = HasUnrollFullPragma(L); if (PragmaFullUnroll && TripCount != 0) { UP.Count = TripCount; - if ((LoopSize - BEInsns) * UP.Count + BEInsns < PragmaUnrollThreshold) + if (getUnrolledLoopSize(LoopSize, UP) < PragmaUnrollThreshold) return false; } @@ -745,8 +748,6 @@ bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll || PragmaEnableUnroll || UserUnrollCount; - uint64_t UnrolledSize; - if (ExplicitUnroll && TripCount != 0) { // If the loop has an unrolling pragma, we want to be more aggressive with // unrolling limits. Set thresholds to at least the PragmaThreshold value @@ -768,17 +769,16 @@ assert((ExactTripCount == 0 || MaxTripCount == 0) && "ExtractTripCound and MaxTripCount cannot both be non zero."); unsigned FullUnrollTripCount = ExactTripCount ? ExactTripCount : MaxTripCount; + UP.Count = FullUnrollTripCount; if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) { // When computing the unrolled size, note that BEInsns are not replicated // like the rest of the loop body. - UnrolledSize = - (uint64_t)(LoopSize - BEInsns) * FullUnrollTripCount + BEInsns; if (canUnrollCompletely(L, UP.Threshold, 100, UP.DynamicCostSavingsDiscount, - UnrolledSize, UnrolledSize)) { + getUnrolledLoopSize(LoopSize, UP), + getUnrolledLoopSize(LoopSize, UP))) { UseUpperBound = (MaxTripCount == FullUnrollTripCount); TripCount = FullUnrollTripCount; TripMultiple = UP.UpperBound ? 1 : TripMultiple; - UP.Count = TripCount; return ExplicitUnroll; } else { // The loop isn't that small, but we still can fully unroll it if that @@ -794,7 +794,6 @@ UseUpperBound = (MaxTripCount == FullUnrollTripCount); TripCount = FullUnrollTripCount; TripMultiple = UP.UpperBound ? 1 : TripMultiple; - UP.Count = TripCount; return ExplicitUnroll; } } @@ -814,10 +813,10 @@ UP.Count = TripCount; if (UP.PartialThreshold != NoThreshold) { // Reduce unroll count to be modulo of TripCount for partial unrolling. - UnrolledSize = (uint64_t)(LoopSize - BEInsns) * UP.Count + BEInsns; - if (UnrolledSize > UP.PartialThreshold) - UP.Count = (std::max(UP.PartialThreshold, 3u) - BEInsns) / - (LoopSize - BEInsns); + if (getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold) + UP.Count = + (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) / + (LoopSize - UP.BEInsns); if (UP.Count > UP.MaxCount) UP.Count = UP.MaxCount; while (UP.Count != 0 && TripCount % UP.Count != 0) @@ -828,11 +827,9 @@ // As we'll create fixup loop, do the type of unrolling only if // remainder loop is allowed. UP.Count = UP.DefaultUnrollRuntimeCount; - UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns; - while (UP.Count != 0 && UnrolledSize > UP.PartialThreshold) { + while (UP.Count != 0 && + getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold) UP.Count >>= 1; - UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns; - } } if (UP.Count < 2) { if (PragmaEnableUnroll) @@ -881,14 +878,12 @@ } if (UP.Count == 0) UP.Count = UP.DefaultUnrollRuntimeCount; - UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns; // Reduce unroll count to be the largest power-of-two factor of // the original count which satisfies the threshold limit. - while (UP.Count != 0 && UnrolledSize > UP.PartialThreshold) { + while (UP.Count != 0 && + getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold) UP.Count >>= 1; - UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns; - } #ifndef NDEBUG unsigned OrigCount = UP.Count; @@ -944,8 +939,11 @@ unsigned NumInlineCandidates; bool NotDuplicatable; bool Convergent; + TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences( + L, TTI, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial, + ProvidedRuntime, ProvidedUpperBound); unsigned LoopSize = ApproximateLoopSize( - L, NumInlineCandidates, NotDuplicatable, Convergent, TTI, &AC); + L, NumInlineCandidates, NotDuplicatable, Convergent, TTI, &AC, UP.BEInsns); DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n"); if (NotDuplicatable) { DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable" @@ -977,10 +975,6 @@ TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock); } - TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences( - L, TTI, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial, - ProvidedRuntime, ProvidedUpperBound); - // Exit early if unrolling is disabled. if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0)) return false;