Index: include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- include/llvm/Analysis/TargetTransformInfo.h +++ include/llvm/Analysis/TargetTransformInfo.h @@ -273,6 +273,11 @@ /// applies even if full unrolling is selected. This allows a target to fall /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount. unsigned FullUnrollMaxCount; + // Represents number of instructions optimized when "back edge" + // becomes "fall through" in unrolled loop. + // For now we count a conditional branch on a backedge and a comparison + // feeding it. + unsigned BEInsns; /// Allow partial unrolling (unrolling of loops to expand the size of the /// loop body, not only to eliminate small constant-trip-count loops). bool Partial; Index: lib/Transforms/Scalar/LoopUnrollPass.cpp =================================================================== --- lib/Transforms/Scalar/LoopUnrollPass.cpp +++ lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -122,6 +122,7 @@ UP.Count = 0; UP.MaxCount = UINT_MAX; UP.FullUnrollMaxCount = UINT_MAX; + UP.BEInsns = 2; UP.Partial = false; UP.Runtime = false; UP.AllowRemainder = true; @@ -529,7 +530,7 @@ static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI, - AssumptionCache *AC) { + AssumptionCache *AC, unsigned BEInsns) { SmallPtrSet EphValues; CodeMetrics::collectEphemeralValues(L, AC, EphValues); @@ -548,7 +549,7 @@ // that each loop has at least three instructions (likely a conditional // branch, a comparison feeding that branch, and some kind of loop increment // feeding that comparison instruction). - LoopSize = std::max(LoopSize, 3u); + LoopSize = std::max(LoopSize, BEInsns + 1); return LoopSize; } @@ -687,6 +688,27 @@ return false; } +enum ThresholdType { + Default = 0, + Partial, + Pragma +}; + +// Returns true if LoopSize satisfy Unroll Preferences: Count, Threshold +// and BEInsns. +static bool isLoopSizeMeetUP(unsigned LoopSize, + TargetTransformInfo::UnrollingPreferences &UP, + ThresholdType TT = Default) { + assert(LoopSize >= UP.BEInsns && "LoopSize should not be less than BEInsns!"); + if (TT == Pragma) + return (uint64_t)(LoopSize - UP.BEInsns) * UP.Count + + UP.BEInsns <= PragmaUnrollThreshold; + else if (TT == Partial) + return (uint64_t)(LoopSize - UP.BEInsns) * UP.Count + + UP.BEInsns <= UP.PartialThreshold; + return (uint64_t)(LoopSize - UP.BEInsns) * UP.Count + + UP.BEInsns <= UP.Threshold; +} // Returns true if unroll count was set explicitly. // Calculates unroll count and writes it to UP.Count. static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, @@ -694,11 +716,6 @@ ScalarEvolution *SE, unsigned TripCount, unsigned TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP) { - // BEInsns represents number of instructions optimized when "back edge" - // becomes "fall through" in unrolled loop. - // For now we count a conditional branch on a backedge and a comparison - // feeding it. - unsigned BEInsns = 2; // Check for explicit Count. // 1st priority is unroll count set by "unroll-count" option. bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0; @@ -706,8 +723,7 @@ UP.Count = UnrollCount; UP.AllowExpensiveTripCount = true; UP.Force = true; - if (UP.AllowRemainder && - (LoopSize - BEInsns) * UP.Count + BEInsns < UP.Threshold) + if (UP.AllowRemainder && isLoopSizeMeetUP(LoopSize, UP)) return true; } @@ -718,14 +734,13 @@ UP.Runtime = true; UP.AllowExpensiveTripCount = true; UP.Force = true; - if (UP.AllowRemainder && - (LoopSize - BEInsns) * UP.Count + BEInsns < PragmaUnrollThreshold) + if (UP.AllowRemainder && isLoopSizeMeetUP(LoopSize, UP, Pragma)) return true; } bool PragmaFullUnroll = HasUnrollFullPragma(L); if (PragmaFullUnroll && TripCount != 0) { UP.Count = TripCount; - if ((LoopSize - BEInsns) * UP.Count + BEInsns < PragmaUnrollThreshold) + if (isLoopSizeMeetUP(LoopSize, UP, Pragma)) return false; } @@ -753,7 +768,7 @@ if (TripCount && TripCount <= UP.FullUnrollMaxCount) { // When computing the unrolled size, note that BEInsns are not replicated // like the rest of the loop body. - UnrolledSize = (uint64_t)(LoopSize - BEInsns) * TripCount + BEInsns; + UnrolledSize = (uint64_t)(LoopSize - UP.BEInsns) * TripCount + UP.BEInsns; if (canUnrollCompletely(L, UP.Threshold, 100, UP.DynamicCostSavingsDiscount, UnrolledSize, UnrolledSize)) { UP.Count = TripCount; @@ -789,10 +804,10 @@ } if (UP.PartialThreshold != NoThreshold) { // Reduce unroll count to be modulo of TripCount for partial unrolling. - UnrolledSize = (uint64_t)(LoopSize - BEInsns) * UP.Count + BEInsns; - if (UnrolledSize > UP.PartialThreshold) - UP.Count = (std::max(UP.PartialThreshold, 3u) - BEInsns) / - (LoopSize - BEInsns); + if (!isLoopSizeMeetUP(LoopSize, UP, Partial)) + UP.Count = + (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) / + (LoopSize - UP.BEInsns); if (UP.Count > UP.MaxCount) UP.Count = UP.MaxCount; while (UP.Count != 0 && TripCount % UP.Count != 0) @@ -803,11 +818,8 @@ // As we'll create fixup loop, do the type of unrolling only if // remainder loop is allowed. UP.Count = DefaultUnrollRuntimeCount; - UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns; - while (UP.Count != 0 && UnrolledSize > UP.PartialThreshold) { + while (UP.Count != 0 && !isLoopSizeMeetUP(LoopSize, UP, Partial)) UP.Count >>= 1; - UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns; - } } if (UP.Count < 2) { if (PragmaEnableUnroll) @@ -852,14 +864,10 @@ } if (UP.Count == 0) UP.Count = DefaultUnrollRuntimeCount; - UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns; - // Reduce unroll count to be the largest power-of-two factor of // the original count which satisfies the threshold limit. - while (UP.Count != 0 && UnrolledSize > UP.PartialThreshold) { + while (UP.Count != 0 && !isLoopSizeMeetUP(LoopSize, UP, Partial)) UP.Count >>= 1; - UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns; - } #ifndef NDEBUG unsigned OrigCount = UP.Count; @@ -910,8 +918,11 @@ unsigned NumInlineCandidates; bool NotDuplicatable; bool Convergent; + TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences( + L, TTI, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial, + ProvidedRuntime); unsigned LoopSize = ApproximateLoopSize( - L, NumInlineCandidates, NotDuplicatable, Convergent, TTI, &AC); + L, NumInlineCandidates, NotDuplicatable, Convergent, TTI, &AC, UP.BEInsns); DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n"); if (NotDuplicatable) { DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable" @@ -942,10 +953,6 @@ TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock); } - TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences( - L, TTI, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial, - ProvidedRuntime); - // If the loop contains a convergent operation, the prelude we'd add // to do the first few instructions before we hit the unrolled loop // is unsafe -- it adds a control-flow dependency to the convergent