Changeset View
Changeset View
Standalone View
Standalone View
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
- This file is larger than 256 KB, so syntax highlighting is disabled by default.
Show First 20 Lines • Show All 2,123 Lines • ▼ Show 20 Lines | |||||
// representation for pragma 'omp simd' is introduced. | // representation for pragma 'omp simd' is introduced. | ||||
static bool isExplicitVecOuterLoop(Loop *OuterLp, | static bool isExplicitVecOuterLoop(Loop *OuterLp, | ||||
OptimizationRemarkEmitter *ORE) { | OptimizationRemarkEmitter *ORE) { | ||||
assert(!OuterLp->isInnermost() && "This is not an outer loop"); | assert(!OuterLp->isInnermost() && "This is not an outer loop"); | ||||
LoopVectorizeHints Hints(OuterLp, true /*DisableInterleaving*/, *ORE); | LoopVectorizeHints Hints(OuterLp, true /*DisableInterleaving*/, *ORE); | ||||
// Only outer loops with an explicit vectorization hint are supported. | // Only outer loops with an explicit vectorization hint are supported. | ||||
// Unannotated outer loops are ignored. | // Unannotated outer loops are ignored. | ||||
if (Hints.getForce() == LoopVectorizeHints::FK_Undefined) | if (Hints.getVectorizationForce() == LoopVectorizeHints::FK_Undefined) | ||||
return false; | return false; | ||||
Function *Fn = OuterLp->getHeader()->getParent(); | Function *Fn = OuterLp->getHeader()->getParent(); | ||||
if (!Hints.allowVectorization(Fn, OuterLp, | if (!Hints.allowVectorization(Fn, OuterLp, | ||||
true /*VectorizeOnlyWhenForced*/)) { | true /*VectorizeOnlyWhenForced*/)) { | ||||
LLVM_DEBUG(dbgs() << "LV: Loop hints prevent outer loop vectorization.\n"); | LLVM_DEBUG(dbgs() << "LV: Loop hints prevent outer loop vectorization.\n"); | ||||
return false; | return false; | ||||
} | } | ||||
if (Hints.getInterleave() > 1) { | if (Hints.getInterleaveForce()) { | ||||
fhahn: Will `getInterleaveForce()` return `ENABLED` if an interleave count > 1 is set through metadata? | |||||
// TODO: Interleave support is future work. | // TODO: Interleave support is future work. | ||||
LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Interleave is not supported for " | LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Interleave is not supported for " | ||||
"outer loops.\n"); | "outer loops.\n"); | ||||
Hints.emitRemarkWithHints(); | Hints.emitRemarkWithHints(); | ||||
return false; | return false; | ||||
} | } | ||||
return true; | return true; | ||||
▲ Show 20 Lines • Show All 854 Lines • ▼ Show 20 Lines | |||||
BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) { | BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) { | ||||
BasicBlock *const SCEVCheckBlock = | BasicBlock *const SCEVCheckBlock = | ||||
RTChecks.emitSCEVChecks(Bypass, LoopVectorPreHeader, LoopExitBlock); | RTChecks.emitSCEVChecks(Bypass, LoopVectorPreHeader, LoopExitBlock); | ||||
if (!SCEVCheckBlock) | if (!SCEVCheckBlock) | ||||
return nullptr; | return nullptr; | ||||
assert(!(SCEVCheckBlock->getParent()->hasOptSize() || | assert(!(SCEVCheckBlock->getParent()->hasOptSize() || | ||||
(OptForSizeBasedOnProfile && | (OptForSizeBasedOnProfile && Cost->Hints->getVectorizationForce() != | ||||
fhahnUnsubmitted Not Done ReplyInline ActionsShould this check interleaving or vectorization forced? fhahn: Should this check interleaving or vectorization forced? | |||||
Cost->Hints->getForce() != LoopVectorizeHints::FK_Enabled)) && | LoopVectorizeHints::FK_Enabled)) && | ||||
"Cannot SCEV check stride or overflow when optimizing for size"); | "Cannot SCEV check stride or overflow when optimizing for size"); | ||||
// Update dominator only if this is first RT check. | // Update dominator only if this is first RT check. | ||||
if (LoopBypassBlocks.empty()) { | if (LoopBypassBlocks.empty()) { | ||||
DT->changeImmediateDominator(Bypass, SCEVCheckBlock); | DT->changeImmediateDominator(Bypass, SCEVCheckBlock); | ||||
if (!Cost->requiresScalarEpilogue(VF)) | if (!Cost->requiresScalarEpilogue(VF)) | ||||
// If there is an epilogue which must run, there's no edge from the | // If there is an epilogue which must run, there's no edge from the | ||||
// middle block to exit blocks and thus no need to update the immediate | // middle block to exit blocks and thus no need to update the immediate | ||||
// dominator of the exit blocks. | // dominator of the exit blocks. | ||||
DT->changeImmediateDominator(LoopExitBlock, SCEVCheckBlock); | DT->changeImmediateDominator(LoopExitBlock, SCEVCheckBlock); | ||||
Show All 14 Lines | BasicBlock *InnerLoopVectorizer::emitMemRuntimeChecks(BasicBlock *Bypass) { | ||||
// Check if we generated code that checks in runtime if arrays overlap. We put | // Check if we generated code that checks in runtime if arrays overlap. We put | ||||
// the checks into a separate block to make the more common case of few | // the checks into a separate block to make the more common case of few | ||||
// elements faster. | // elements faster. | ||||
if (!MemCheckBlock) | if (!MemCheckBlock) | ||||
return nullptr; | return nullptr; | ||||
if (MemCheckBlock->getParent()->hasOptSize() || OptForSizeBasedOnProfile) { | if (MemCheckBlock->getParent()->hasOptSize() || OptForSizeBasedOnProfile) { | ||||
assert(Cost->Hints->getForce() == LoopVectorizeHints::FK_Enabled && | assert(Cost->Hints->getVectorizationForce() == | ||||
LoopVectorizeHints::FK_Enabled && | |||||
"Cannot emit memory checks when optimizing for size, unless forced " | "Cannot emit memory checks when optimizing for size, unless forced " | ||||
"to vectorize."); | "to vectorize."); | ||||
ORE->emit([&]() { | ORE->emit([&]() { | ||||
return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationCodeSize", | return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationCodeSize", | ||||
OrigLoop->getStartLoc(), | OrigLoop->getStartLoc(), | ||||
OrigLoop->getHeader()) | OrigLoop->getHeader()) | ||||
<< "Code-size may be reduced by not forcing " | << "Code-size may be reduced by not forcing " | ||||
"vectorization, or by source-code modifications " | "vectorization, or by source-code modifications " | ||||
▲ Show 20 Lines • Show All 2,265 Lines • ▼ Show 20 Lines | VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor( | ||||
assert(ExpectedCost.isValid() && "Unexpected invalid cost for scalar loop"); | assert(ExpectedCost.isValid() && "Unexpected invalid cost for scalar loop"); | ||||
assert(VFCandidates.count(ElementCount::getFixed(1)) && | assert(VFCandidates.count(ElementCount::getFixed(1)) && | ||||
"Expected Scalar VF to be a candidate"); | "Expected Scalar VF to be a candidate"); | ||||
const VectorizationFactor ScalarCost(ElementCount::getFixed(1), ExpectedCost, | const VectorizationFactor ScalarCost(ElementCount::getFixed(1), ExpectedCost, | ||||
ExpectedCost); | ExpectedCost); | ||||
VectorizationFactor ChosenFactor = ScalarCost; | VectorizationFactor ChosenFactor = ScalarCost; | ||||
bool ForceVectorization = Hints->getForce() == LoopVectorizeHints::FK_Enabled; | bool ForceVectorization = | ||||
Hints->getVectorizationForce() == LoopVectorizeHints::FK_Enabled; | |||||
if (ForceVectorization && VFCandidates.size() > 1) { | if (ForceVectorization && VFCandidates.size() > 1) { | ||||
// Ignore scalar width, because the user explicitly wants vectorization. | // Ignore scalar width, because the user explicitly wants vectorization. | ||||
// Initialize cost to max so that VF = 2 is, at least, chosen during cost | // Initialize cost to max so that VF = 2 is, at least, chosen during cost | ||||
// evaluation. | // evaluation. | ||||
ChosenFactor.Cost = InstructionCost::getMax(); | ChosenFactor.Cost = InstructionCost::getMax(); | ||||
} | } | ||||
SmallVector<InstructionVFPair> InvalidCosts; | SmallVector<InstructionVFPair> InvalidCosts; | ||||
▲ Show 20 Lines • Show All 4,483 Lines • ▼ Show 20 Lines | static ScalarEpilogueLowering getScalarEpilogueLowering( | ||||
// 1) OptSize takes precedence over all other options, i.e. if this is set, | // 1) OptSize takes precedence over all other options, i.e. if this is set, | ||||
// don't look at hints or options, and don't request a scalar epilogue. | // don't look at hints or options, and don't request a scalar epilogue. | ||||
// (For PGSO, as shouldOptimizeForSize isn't currently accessible from | // (For PGSO, as shouldOptimizeForSize isn't currently accessible from | ||||
// LoopAccessInfo (due to code dependency and not being able to reliably get | // LoopAccessInfo (due to code dependency and not being able to reliably get | ||||
// PSI/BFI from a loop analysis under NPM), we cannot suppress the collection | // PSI/BFI from a loop analysis under NPM), we cannot suppress the collection | ||||
// of strides in LoopAccessInfo::analyzeLoop() and vectorize without | // of strides in LoopAccessInfo::analyzeLoop() and vectorize without | ||||
// versioning when the vectorization is forced, unlike hasOptSize. So revert | // versioning when the vectorization is forced, unlike hasOptSize. So revert | ||||
// back to the old way and vectorize with versioning when forced. See D81345.) | // back to the old way and vectorize with versioning when forced. See D81345.) | ||||
if (F->hasOptSize() || (llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI, | if (F->hasOptSize() || | ||||
(llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI, | |||||
PGSOQueryType::IRPass) && | PGSOQueryType::IRPass) && | ||||
Hints.getForce() != LoopVectorizeHints::FK_Enabled)) | Hints.getInterleaveForce() != LoopVectorizeHints::FK_Enabled)) | ||||
return CM_ScalarEpilogueNotAllowedOptSize; | return CM_ScalarEpilogueNotAllowedOptSize; | ||||
// 2) If set, obey the directives | // 2) If set, obey the directives | ||||
if (PreferPredicateOverEpilogue.getNumOccurrences()) { | if (PreferPredicateOverEpilogue.getNumOccurrences()) { | ||||
switch (PreferPredicateOverEpilogue) { | switch (PreferPredicateOverEpilogue) { | ||||
case PreferPredicateTy::ScalarEpilogue: | case PreferPredicateTy::ScalarEpilogue: | ||||
return CM_ScalarEpilogueAllowed; | return CM_ScalarEpilogueAllowed; | ||||
case PreferPredicateTy::PredicateElseScalarEpilogue: | case PreferPredicateTy::PredicateElseScalarEpilogue: | ||||
▲ Show 20 Lines • Show All 307 Lines • ▼ Show 20 Lines | #endif /* NDEBUG */ | ||||
LLVM_DEBUG(dbgs() << "\nLV: Checking a loop in '" | LLVM_DEBUG(dbgs() << "\nLV: Checking a loop in '" | ||||
<< L->getHeader()->getParent()->getName() << "' from " | << L->getHeader()->getParent()->getName() << "' from " | ||||
<< DebugLocStr << "\n"); | << DebugLocStr << "\n"); | ||||
LoopVectorizeHints Hints(L, InterleaveOnlyWhenForced, *ORE, TTI); | LoopVectorizeHints Hints(L, InterleaveOnlyWhenForced, *ORE, TTI); | ||||
LLVM_DEBUG( | LLVM_DEBUG( | ||||
dbgs() << "LV: Loop hints:" | dbgs() | ||||
<< " force=" | << "LV: Loop hints:" | ||||
<< (Hints.getForce() == LoopVectorizeHints::FK_Disabled | << " force_vectorization=" | ||||
<< (Hints.getVectorizationForce() == LoopVectorizeHints::FK_Disabled | |||||
? "disabled" | |||||
: (Hints.getVectorizationForce() == LoopVectorizeHints::FK_Enabled | |||||
? "enabled" | |||||
: "?")) | |||||
<< " width=" << Hints.getWidth() << " force_interleave=" | |||||
<< (Hints.getInterleaveForce() == LoopVectorizeHints::FK_Disabled | |||||
? "disabled" | ? "disabled" | ||||
: (Hints.getForce() == LoopVectorizeHints::FK_Enabled | : (Hints.getInterleaveForce() == LoopVectorizeHints::FK_Enabled | ||||
? "enabled" | ? "enabled" | ||||
: "?")) | : "?")) | ||||
<< " width=" << Hints.getWidth() | |||||
<< " interleave=" << Hints.getInterleave() << "\n"); | << " interleave=" << Hints.getInterleave() << "\n"); | ||||
// Function containing loop | // Function containing loop | ||||
Function *F = L->getHeader()->getParent(); | Function *F = L->getHeader()->getParent(); | ||||
// Looking at the diagnostic output is the only way to determine if a loop | // Looking at the diagnostic output is the only way to determine if a loop | ||||
// was vectorized (other than looking at the IR or machine code), so it | // was vectorized (other than looking at the IR or machine code), so it | ||||
// is important to generate an optimization remark for each loop. Most of | // is important to generate an optimization remark for each loop. Most of | ||||
// these messages are generated as OptimizationRemarkAnalysis. Remarks | // these messages are generated as OptimizationRemarkAnalysis. Remarks | ||||
▲ Show 20 Lines • Show All 47 Lines • ▼ Show 20 Lines | #endif /* NDEBUG */ | ||||
// Check the loop for a trip count threshold: vectorize loops with a tiny trip | // Check the loop for a trip count threshold: vectorize loops with a tiny trip | ||||
// count by optimizing for size, to minimize overheads. | // count by optimizing for size, to minimize overheads. | ||||
auto ExpectedTC = getSmallBestKnownTC(*SE, L); | auto ExpectedTC = getSmallBestKnownTC(*SE, L); | ||||
if (ExpectedTC && *ExpectedTC < TinyTripCountVectorThreshold) { | if (ExpectedTC && *ExpectedTC < TinyTripCountVectorThreshold) { | ||||
LLVM_DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " | LLVM_DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " | ||||
<< "This loop is worth vectorizing only if no scalar " | << "This loop is worth vectorizing only if no scalar " | ||||
<< "iteration overheads are incurred."); | << "iteration overheads are incurred."); | ||||
if (Hints.getForce() == LoopVectorizeHints::FK_Enabled) | if (Hints.getVectorizationForce() == LoopVectorizeHints::FK_Enabled) | ||||
LLVM_DEBUG(dbgs() << " But vectorizing was explicitly forced.\n"); | LLVM_DEBUG(dbgs() << " But vectorizing was explicitly forced.\n"); | ||||
else { | else { | ||||
if (*ExpectedTC > TTI->getMinTripCountTailFoldingThreshold()) { | if (*ExpectedTC > TTI->getMinTripCountTailFoldingThreshold()) { | ||||
LLVM_DEBUG(dbgs() << "\n"); | LLVM_DEBUG(dbgs() << "\n"); | ||||
SEL = CM_ScalarEpilogueNotAllowedLowTripLoop; | SEL = CM_ScalarEpilogueNotAllowedLowTripLoop; | ||||
} else { | } else { | ||||
LLVM_DEBUG(dbgs() << " But the target considers the trip count too " | LLVM_DEBUG(dbgs() << " But the target considers the trip count too " | ||||
"small to consider vectorizing.\n"); | "small to consider vectorizing.\n"); | ||||
▲ Show 20 Lines • Show All 82 Lines • ▼ Show 20 Lines | if (MaybeVF) { | ||||
unsigned SelectedIC = std::max(IC, UserIC); | unsigned SelectedIC = std::max(IC, UserIC); | ||||
// Optimistically generate runtime checks if they are needed. Drop them if | // Optimistically generate runtime checks if they are needed. Drop them if | ||||
// they turn out to not be profitable. | // they turn out to not be profitable. | ||||
if (VF.Width.isVector() || SelectedIC > 1) | if (VF.Width.isVector() || SelectedIC > 1) | ||||
Checks.Create(L, *LVL.getLAI(), PSE.getPredicate(), VF.Width, SelectedIC); | Checks.Create(L, *LVL.getLAI(), PSE.getPredicate(), VF.Width, SelectedIC); | ||||
// Check if it is profitable to vectorize with runtime checks. | // Check if it is profitable to vectorize with runtime checks. | ||||
bool ForceVectorization = | bool ForceVectorization = | ||||
Hints.getForce() == LoopVectorizeHints::FK_Enabled; | Hints.getVectorizationForce() == LoopVectorizeHints::FK_Enabled; | ||||
if (!ForceVectorization && | if (!ForceVectorization && | ||||
!areRuntimeChecksProfitable(Checks, VF, CM.getVScaleForTuning(), L, | !areRuntimeChecksProfitable(Checks, VF, CM.getVScaleForTuning(), L, | ||||
*PSE.getSE())) { | *PSE.getSE())) { | ||||
ORE->emit([&]() { | ORE->emit([&]() { | ||||
return OptimizationRemarkAnalysisAliasing( | return OptimizationRemarkAnalysisAliasing( | ||||
DEBUG_TYPE, "CantReorderMemOps", L->getStartLoc(), | DEBUG_TYPE, "CantReorderMemOps", L->getStartLoc(), | ||||
L->getHeader()) | L->getHeader()) | ||||
<< "loop not vectorized: cannot prove it is safe to reorder " | << "loop not vectorized: cannot prove it is safe to reorder " | ||||
▲ Show 20 Lines • Show All 338 Lines • Show Last 20 Lines |
Will getInterleaveForce() return ENABLED if an interleave count > 1 is set through metadata?