Changeset View
Changeset View
Standalone View
Standalone View
llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
Show First 20 Lines • Show All 800 Lines • ▼ Show 20 Lines | shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo, | ||||
return None; | return None; | ||||
} | } | ||||
static Optional<unsigned> shouldFullUnroll( | static Optional<unsigned> shouldFullUnroll( | ||||
Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, | Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, | ||||
ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues, | ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues, | ||||
const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE, | const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE, | ||||
const TargetTransformInfo::UnrollingPreferences &UP) { | const TargetTransformInfo::UnrollingPreferences &UP) { | ||||
assert(FullUnrollTripCount && "should be non-zero!"); | |||||
if (!FullUnrollTripCount || FullUnrollTripCount >= UP.FullUnrollMaxCount) | if (FullUnrollTripCount >= UP.FullUnrollMaxCount) | ||||
return None; | return None; | ||||
// When computing the unrolled size, note that BEInsns are not replicated | // When computing the unrolled size, note that BEInsns are not replicated | ||||
// like the rest of the loop body. | // like the rest of the loop body. | ||||
if (UCE.getUnrolledLoopSize(UP) < UP.Threshold) | if (UCE.getUnrolledLoopSize(UP) < UP.Threshold) | ||||
return FullUnrollTripCount; | return FullUnrollTripCount; | ||||
// The loop isn't that small, but we still can fully unroll it if that | // The loop isn't that small, but we still can fully unroll it if that | ||||
▲ Show 20 Lines • Show All 122 Lines • ▼ Show 20 Lines | if (ExplicitUnroll && TripCount != 0) { | ||||
// unrolling limits. Set thresholds to at least the PragmaUnrollThreshold | // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold | ||||
// value which is larger than the default limits. | // value which is larger than the default limits. | ||||
UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold); | UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold); | ||||
UP.PartialThreshold = | UP.PartialThreshold = | ||||
std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold); | std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold); | ||||
} | } | ||||
} | } | ||||
// 3rd priority is full unroll count. | // 3rd priority is exact full unrolling. This will eliminate all copies | ||||
// Full unroll makes sense only when TripCount or its upper bound could be | // of some exit test. | ||||
nikic: All copies of some exit test, not necessarily the latch exit test. | |||||
// statically calculated. | UP.Count = 0; | ||||
// Also we need to check if we exceed FullUnrollMaxCount. | if (TripCount) { | ||||
UP.Count = TripCount; | |||||
UnrollFactor = | |||||
shouldFullUnroll(L, TTI, DT, SE, EphValues, TripCount, UCE, UP); | |||||
if (UnrollFactor) { | |||||
Not Done ReplyInline Actionsnit: it might also be a good opportunity to limit the scope of UnrollFactor to this block, e.g. by if (auto UnrollFactor = shouldFullUnroll(L, TTI, DT, SE, EphValues, TripCount, UCE, UP)) {} fhahn: nit: it might also be a good opportunity to limit the scope of UnrollFactor to this block, e.g. | |||||
UP.Count = *UnrollFactor; | |||||
UseUpperBound = false; | |||||
return ExplicitUnroll; | |||||
} | |||||
} | |||||
// 4th priority is bounded unrolling. | |||||
// We can unroll by the upper bound amount if it's generally allowed or if | // We can unroll by the upper bound amount if it's generally allowed or if | ||||
// we know that the loop is executed either the upper bound or zero times. | // we know that the loop is executed either the upper bound or zero times. | ||||
// (MaxOrZero unrolling keeps only the first loop test, so the number of | // (MaxOrZero unrolling keeps only the first loop test, so the number of | ||||
// loop tests remains the same compared to the non-unrolled version, whereas | // loop tests remains the same compared to the non-unrolled version, whereas | ||||
// the generic upper bound unrolling keeps all but the last loop test so the | // the generic upper bound unrolling keeps all but the last loop test so the | ||||
// number of loop tests goes up which may end up being worse on targets with | // number of loop tests goes up which may end up being worse on targets with | ||||
// constrained branch predictor resources so is controlled by an option.) | // constrained branch predictor resources so is controlled by an option.) | ||||
// In addition we only unroll small upper bounds. | // In addition we only unroll small upper bounds. | ||||
unsigned FullUnrollMaxTripCount = MaxTripCount; | // Note that the cost of bounded unrolling is always strictly greater than | ||||
if (!(UP.UpperBound || MaxOrZero) || | // cost of exact full unrolling. As such, if we have an exact count and | ||||
FullUnrollMaxTripCount > UnrollMaxUpperBound) | // found it unprofitable, we'll never chose to bounded unroll. | ||||
FullUnrollMaxTripCount = 0; | if (!TripCount && MaxTripCount && (UP.UpperBound || MaxOrZero) && | ||||
MaxTripCount <= UnrollMaxUpperBound) { | |||||
// UnrollByMaxCount and ExactTripCount cannot both be non zero since we only | UP.Count = MaxTripCount; | ||||
// compute the former when the latter is zero. | |||||
unsigned ExactTripCount = TripCount; | |||||
assert((ExactTripCount == 0 || FullUnrollMaxTripCount == 0) && | |||||
"ExtractTripCount and UnrollByMaxCount cannot both be non zero."); | |||||
unsigned FullUnrollTripCount = | |||||
ExactTripCount ? ExactTripCount : FullUnrollMaxTripCount; | |||||
UP.Count = FullUnrollTripCount; | |||||
UnrollFactor = | UnrollFactor = | ||||
shouldFullUnroll(L, TTI, DT, SE, EphValues, FullUnrollTripCount, UCE, UP); | shouldFullUnroll(L, TTI, DT, SE, EphValues, MaxTripCount, UCE, UP); | ||||
// if shouldFullUnroll can do the unrolling, some side parameteres should be | |||||
// set | |||||
if (UnrollFactor) { | if (UnrollFactor) { | ||||
UP.Count = *UnrollFactor; | UP.Count = *UnrollFactor; | ||||
UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount); | UseUpperBound = true; | ||||
return ExplicitUnroll; | return ExplicitUnroll; | ||||
} else { | } | ||||
UP.Count = FullUnrollTripCount; | |||||
} | } | ||||
// 4th priority is loop peeling. | // 5th priority is loop peeling. | ||||
computePeelCount(L, LoopSize, PP, TripCount, DT, SE, UP.Threshold); | computePeelCount(L, LoopSize, PP, TripCount, DT, SE, UP.Threshold); | ||||
if (PP.PeelCount) { | if (PP.PeelCount) { | ||||
UP.Runtime = false; | UP.Runtime = false; | ||||
UP.Count = 1; | UP.Count = 1; | ||||
return ExplicitUnroll; | return ExplicitUnroll; | ||||
} | } | ||||
// Before starting partial unrolling, set up.partial to true, | // Before starting partial unrolling, set up.partial to true, | ||||
// if user explicitly asked for unrolling | // if user explicitly asked for unrolling | ||||
if (TripCount) | if (TripCount) | ||||
UP.Partial |= ExplicitUnroll; | UP.Partial |= ExplicitUnroll; | ||||
// 5th priority is partial unrolling. | // 6th priority is partial unrolling. | ||||
// Try partial unroll only when TripCount could be statically calculated. | // Try partial unroll only when TripCount could be statically calculated. | ||||
UnrollFactor = shouldPartialUnroll(LoopSize, TripCount, UCE, UP); | UnrollFactor = shouldPartialUnroll(LoopSize, TripCount, UCE, UP); | ||||
if (UnrollFactor) { | if (UnrollFactor) { | ||||
UP.Count = *UnrollFactor; | UP.Count = *UnrollFactor; | ||||
if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount && | if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount && | ||||
UP.Count != TripCount) | UP.Count != TripCount) | ||||
Show All 28 Lines | ORE->emit([&]() { | ||||
return OptimizationRemarkMissed( | return OptimizationRemarkMissed( | ||||
DEBUG_TYPE, "CantFullUnrollAsDirectedRuntimeTripCount", | DEBUG_TYPE, "CantFullUnrollAsDirectedRuntimeTripCount", | ||||
L->getStartLoc(), L->getHeader()) | L->getStartLoc(), L->getHeader()) | ||||
<< "Unable to fully unroll loop as directed by unroll(full) " | << "Unable to fully unroll loop as directed by unroll(full) " | ||||
"pragma " | "pragma " | ||||
"because loop has a runtime trip count."; | "because loop has a runtime trip count."; | ||||
}); | }); | ||||
// 6th priority is runtime unrolling. | // 7th priority is runtime unrolling. | ||||
// Don't unroll a runtime trip count loop when it is disabled. | // Don't unroll a runtime trip count loop when it is disabled. | ||||
if (hasRuntimeUnrollDisablePragma(L)) { | if (hasRuntimeUnrollDisablePragma(L)) { | ||||
UP.Count = 0; | UP.Count = 0; | ||||
return false; | return false; | ||||
} | } | ||||
// Don't unroll a small upper bound loop unless user or TTI asked to do so. | // Don't unroll a small upper bound loop unless user or TTI asked to do so. | ||||
if (MaxTripCount && !UP.Force && MaxTripCount < UnrollMaxUpperBound) { | if (MaxTripCount && !UP.Force && MaxTripCount < UnrollMaxUpperBound) { | ||||
▲ Show 20 Lines • Show All 581 Lines • Show Last 20 Lines |
All copies of some exit test, not necessarily the latch exit test.