Changeset View
Changeset View
Standalone View
Standalone View
lib/Transforms/Vectorize/LoopVectorize.cpp
Show First 20 Lines • Show All 812 Lines • ▼ Show 20 Lines | struct VectorizationFactor { | ||||
unsigned Width; // Vector width with best cost | unsigned Width; // Vector width with best cost | ||||
unsigned Cost; // Cost of the loop with that width | unsigned Cost; // Cost of the loop with that width | ||||
}; | }; | ||||
/// \return The most profitable vectorization factor and the cost of that VF. | /// \return The most profitable vectorization factor and the cost of that VF. | ||||
/// This method checks every power of two up to VF. If UserVF is not ZERO | /// This method checks every power of two up to VF. If UserVF is not ZERO | ||||
/// then this vectorization factor will be selected if vectorization is | /// then this vectorization factor will be selected if vectorization is | ||||
/// possible. | /// possible. | ||||
VectorizationFactor selectVectorizationFactor(bool OptForSize, | VectorizationFactor selectVectorizationFactor(bool OptForSize, | ||||
unsigned UserVF); | unsigned UserVF, | ||||
bool ForceVectorization); | |||||
/// \return The size (in bits) of the widest type in the code that | /// \return The size (in bits) of the widest type in the code that | ||||
/// needs to be vectorized. We ignore values that remain scalar such as | /// needs to be vectorized. We ignore values that remain scalar such as | ||||
/// 64 bit loop indices. | /// 64 bit loop indices. | ||||
unsigned getWidestType(); | unsigned getWidestType(); | ||||
/// \return The most profitable unroll factor. | /// \return The most profitable unroll factor. | ||||
/// If UserUF is non-zero then this method finds the best unroll-factor | /// If UserUF is non-zero then this method finds the best unroll-factor | ||||
▲ Show 20 Lines • Show All 55 Lines • ▼ Show 20 Lines | |||||
/// Utility class for getting and setting loop vectorizer hints in the form | /// Utility class for getting and setting loop vectorizer hints in the form | ||||
/// of loop metadata. | /// of loop metadata. | ||||
struct LoopVectorizeHints { | struct LoopVectorizeHints { | ||||
/// Vectorization width. | /// Vectorization width. | ||||
unsigned Width; | unsigned Width; | ||||
/// Vectorization unroll factor. | /// Vectorization unroll factor. | ||||
unsigned Unroll; | unsigned Unroll; | ||||
/// Vectorization forced (-1 not selected, 0 force disabled, 1 force enabled) | /// Vectorization forced | ||||
int Force; | enum ForceKind { | ||||
FK_Undefined = -1, ///< Not selected. | |||||
FK_Disabled = 0, ///< Forcing disabled. | |||||
FK_Enabled = 1, ///< Forcing enabled. | |||||
} Force; | |||||
LoopVectorizeHints(const Loop *L, bool DisableUnrolling) | LoopVectorizeHints(const Loop *L, bool DisableUnrolling) | ||||
: Width(VectorizationFactor) | : Width(VectorizationFactor) | ||||
, Unroll(DisableUnrolling ? 1 : VectorizationUnroll) | , Unroll(DisableUnrolling ? 1 : VectorizationUnroll) | ||||
, Force(-1) | , Force(FK_Undefined) | ||||
, LoopID(L->getLoopID()) { | , LoopID(L->getLoopID()) { | ||||
getHints(L); | getHints(L); | ||||
// The command line options override any loop metadata except for when | // The command line options override any loop metadata except for when | ||||
// width == 1 which is used to indicate the loop is already vectorized. | // width == 1 which is used to indicate the loop is already vectorized. | ||||
if (VectorizationFactor.getNumOccurrences() > 0 && Width != 1) | if (VectorizationFactor.getNumOccurrences() > 0 && Width != 1) | ||||
Width = VectorizationFactor; | Width = VectorizationFactor; | ||||
if (VectorizationUnroll.getNumOccurrences() > 0) | if (VectorizationUnroll.getNumOccurrences() > 0) | ||||
Unroll = VectorizationUnroll; | Unroll = VectorizationUnroll; | ||||
▲ Show 20 Lines • Show All 96 Lines • ▼ Show 20 Lines | if (Hint == "width") { | ||||
DEBUG(dbgs() << "LV: ignoring invalid width hint metadata\n"); | DEBUG(dbgs() << "LV: ignoring invalid width hint metadata\n"); | ||||
} else if (Hint == "unroll") { | } else if (Hint == "unroll") { | ||||
if (isPowerOf2_32(Val) && Val <= MaxUnrollFactor) | if (isPowerOf2_32(Val) && Val <= MaxUnrollFactor) | ||||
Unroll = Val; | Unroll = Val; | ||||
else | else | ||||
DEBUG(dbgs() << "LV: ignoring invalid unroll hint metadata\n"); | DEBUG(dbgs() << "LV: ignoring invalid unroll hint metadata\n"); | ||||
} else if (Hint == "enable") { | } else if (Hint == "enable") { | ||||
if (C->getBitWidth() == 1) | if (C->getBitWidth() == 1) | ||||
Force = Val; | Force = Val == 1 ? LoopVectorizeHints::FK_Enabled | ||||
: LoopVectorizeHints::FK_Disabled; | |||||
else | else | ||||
DEBUG(dbgs() << "LV: ignoring invalid enable hint metadata\n"); | DEBUG(dbgs() << "LV: ignoring invalid enable hint metadata\n"); | ||||
} else { | } else { | ||||
DEBUG(dbgs() << "LV: ignoring unknown hint " << Hint << '\n'); | DEBUG(dbgs() << "LV: ignoring unknown hint " << Hint << '\n'); | ||||
} | } | ||||
} | } | ||||
}; | }; | ||||
▲ Show 20 Lines • Show All 79 Lines • ▼ Show 20 Lines | bool processLoop(Loop *L) { | ||||
DEBUG(dbgs() << "\nLV: Checking a loop in \"" | DEBUG(dbgs() << "\nLV: Checking a loop in \"" | ||||
<< L->getHeader()->getParent()->getName() << "\" from " | << L->getHeader()->getParent()->getName() << "\" from " | ||||
<< getDebugLocString(L->getHeader()->getFirstNonPHIOrDbg()) | << getDebugLocString(L->getHeader()->getFirstNonPHIOrDbg()) | ||||
<< "\n"); | << "\n"); | ||||
LoopVectorizeHints Hints(L, DisableUnrolling); | LoopVectorizeHints Hints(L, DisableUnrolling); | ||||
DEBUG(dbgs() << "LV: Loop hints:" | DEBUG(dbgs() << "LV: Loop hints:" | ||||
<< " force=" << (Hints.Force == 0 | << " force=" | ||||
<< (Hints.Force == LoopVectorizeHints::FK_Disabled | |||||
? "disabled" | ? "disabled" | ||||
: (Hints.Force == 1 ? "enabled" : "?")) | : (Hints.Force == LoopVectorizeHints::FK_Enabled | ||||
<< " width=" << Hints.Width << " unroll=" << Hints.Unroll | ? "enabled" | ||||
<< "\n"); | : "?")) << " width=" << Hints.Width | ||||
<< " unroll=" << Hints.Unroll << "\n"); | |||||
if (Hints.Force == 0) { | if (Hints.Force == LoopVectorizeHints::FK_Disabled) { | ||||
DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n"); | DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n"); | ||||
return false; | return false; | ||||
} | } | ||||
if (!AlwaysVectorize && Hints.Force != 1) { | if (!AlwaysVectorize && Hints.Force != LoopVectorizeHints::FK_Enabled) { | ||||
DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n"); | DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n"); | ||||
return false; | return false; | ||||
} | } | ||||
if (Hints.Width == 1 && Hints.Unroll == 1) { | if (Hints.Width == 1 && Hints.Unroll == 1) { | ||||
DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n"); | DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n"); | ||||
return false; | return false; | ||||
} | } | ||||
// Check the loop for a trip count threshold: | |||||
// do not vectorize loops with a tiny trip count. | |||||
{ | |||||
BasicBlock *Latch = L->getLoopLatch(); | |||||
const unsigned TC = SE->getSmallConstantTripCount(L, Latch); | |||||
if (TC > 0u && TC < TinyTripCountVectorThreshold) { | |||||
DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " | |||||
<< "This loop is not worth vectorizing."); | |||||
if (Hints.Force == LoopVectorizeHints::FK_Enabled) | |||||
DEBUG(dbgs() << " But vectorizing was explicitly forced.\n"); | |||||
else { | |||||
DEBUG(dbgs() << "\n"); | |||||
return false; | |||||
} | |||||
} | |||||
} | |||||
// Check if it is legal to vectorize the loop. | // Check if it is legal to vectorize the loop. | ||||
LoopVectorizationLegality LVL(L, SE, DL, DT, TLI); | LoopVectorizationLegality LVL(L, SE, DL, DT, TLI); | ||||
if (!LVL.canVectorize()) { | if (!LVL.canVectorize()) { | ||||
DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n"); | DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n"); | ||||
return false; | return false; | ||||
} | } | ||||
// Use the cost model. | // Use the cost model. | ||||
LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, DL, TLI); | LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, DL, TLI); | ||||
// Check the function attributes to find out if this function should be | // Check the function attributes to find out if this function should be | ||||
// optimized for size. | // optimized for size. | ||||
Function *F = L->getHeader()->getParent(); | Function *F = L->getHeader()->getParent(); | ||||
bool OptForSize = | bool OptForSize = Hints.Force != LoopVectorizeHints::FK_Enabled && | ||||
Hints.Force != 1 && F->hasFnAttribute(Attribute::OptimizeForSize); | F->hasFnAttribute(Attribute::OptimizeForSize); | ||||
// Compute the weighted frequency of this loop being executed and see if it | // Compute the weighted frequency of this loop being executed and see if it | ||||
// is less than 20% of the function entry baseline frequency. Note that we | // is less than 20% of the function entry baseline frequency. Note that we | ||||
// always have a canonical loop here because we think we *can* vectoriez. | // always have a canonical loop here because we think we *can* vectoriez. | ||||
// FIXME: This is hidden behind a flag due to pervasive problems with | // FIXME: This is hidden behind a flag due to pervasive problems with | ||||
// exactly what block frequency models. | // exactly what block frequency models. | ||||
if (LoopVectorizeWithBlockFrequency) { | if (LoopVectorizeWithBlockFrequency) { | ||||
BlockFrequency LoopEntryFreq = BFI->getBlockFreq(L->getLoopPreheader()); | BlockFrequency LoopEntryFreq = BFI->getBlockFreq(L->getLoopPreheader()); | ||||
if (Hints.Force != 1 && LoopEntryFreq < ColdEntryFreq) | if (Hints.Force != LoopVectorizeHints::FK_Enabled && | ||||
LoopEntryFreq < ColdEntryFreq) | |||||
OptForSize = true; | OptForSize = true; | ||||
} | } | ||||
// Check the function attributes to see if implicit floats are allowed.a | // Check the function attributes to see if implicit floats are allowed.a | ||||
// FIXME: This check doesn't seem possibly correct -- what if the loop is | // FIXME: This check doesn't seem possibly correct -- what if the loop is | ||||
// an integer loop and the vector instructions selected are purely integer | // an integer loop and the vector instructions selected are purely integer | ||||
// vector instructions? | // vector instructions? | ||||
if (F->hasFnAttribute(Attribute::NoImplicitFloat)) { | if (F->hasFnAttribute(Attribute::NoImplicitFloat)) { | ||||
DEBUG(dbgs() << "LV: Can't vectorize when the NoImplicitFloat" | DEBUG(dbgs() << "LV: Can't vectorize when the NoImplicitFloat" | ||||
"attribute is used.\n"); | "attribute is used.\n"); | ||||
return false; | return false; | ||||
} | } | ||||
// Select the optimal vectorization factor. | // Select the optimal vectorization factor. | ||||
const LoopVectorizationCostModel::VectorizationFactor VF = | const LoopVectorizationCostModel::VectorizationFactor VF = | ||||
CM.selectVectorizationFactor(OptForSize, Hints.Width); | CM.selectVectorizationFactor(OptForSize, Hints.Width, | ||||
Hints.Force == | |||||
LoopVectorizeHints::FK_Enabled); | |||||
// Select the unroll factor. | // Select the unroll factor. | ||||
const unsigned UF = CM.selectUnrollFactor(OptForSize, Hints.Unroll, VF.Width, | const unsigned UF = CM.selectUnrollFactor(OptForSize, Hints.Unroll, VF.Width, | ||||
VF.Cost); | VF.Cost); | ||||
DEBUG(dbgs() << "LV: Found a vectorizable loop (" | DEBUG(dbgs() << "LV: Found a vectorizable loop (" | ||||
<< VF.Width << ") in " | << VF.Width << ") in " | ||||
<< getDebugLocString(L->getHeader()->getFirstNonPHIOrDbg()) | << getDebugLocString(L->getHeader()->getFirstNonPHIOrDbg()) | ||||
<< '\n'); | << '\n'); | ||||
▲ Show 20 Lines • Show All 2,117 Lines • ▼ Show 20 Lines | bool LoopVectorizationLegality::canVectorize() { | ||||
// ScalarEvolution needs to be able to find the exit count. | // ScalarEvolution needs to be able to find the exit count. | ||||
const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop); | const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop); | ||||
if (ExitCount == SE->getCouldNotCompute()) { | if (ExitCount == SE->getCouldNotCompute()) { | ||||
DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n"); | DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n"); | ||||
return false; | return false; | ||||
} | } | ||||
// Do not loop-vectorize loops with a tiny trip count. | |||||
BasicBlock *Latch = TheLoop->getLoopLatch(); | |||||
unsigned TC = SE->getSmallConstantTripCount(TheLoop, Latch); | |||||
if (TC > 0u && TC < TinyTripCountVectorThreshold) { | |||||
DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " << | |||||
"This loop is not worth vectorizing.\n"); | |||||
return false; | |||||
} | |||||
// Check if we can vectorize the instructions and CFG in this loop. | // Check if we can vectorize the instructions and CFG in this loop. | ||||
if (!canVectorizeInstrs()) { | if (!canVectorizeInstrs()) { | ||||
DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n"); | DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n"); | ||||
return false; | return false; | ||||
} | } | ||||
// Go over each instruction and look at memory deps. | // Go over each instruction and look at memory deps. | ||||
if (!canVectorizeMemory()) { | if (!canVectorizeMemory()) { | ||||
▲ Show 20 Lines • Show All 1,682 Lines • ▼ Show 20 Lines | for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { | ||||
} | } | ||||
} | } | ||||
return true; | return true; | ||||
} | } | ||||
LoopVectorizationCostModel::VectorizationFactor | LoopVectorizationCostModel::VectorizationFactor | ||||
LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize, | LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize, | ||||
unsigned UserVF) { | unsigned UserVF, | ||||
bool ForceVectorization) { | |||||
// Width 1 means no vectorize | // Width 1 means no vectorize | ||||
VectorizationFactor Factor = { 1U, 0U }; | VectorizationFactor Factor = { 1U, 0U }; | ||||
if (OptForSize && Legal->getRuntimePointerCheck()->Need) { | if (OptForSize && Legal->getRuntimePointerCheck()->Need) { | ||||
DEBUG(dbgs() << "LV: Aborting. Runtime ptr check is required in Os.\n"); | DEBUG(dbgs() << "LV: Aborting. Runtime ptr check is required in Os.\n"); | ||||
return Factor; | return Factor; | ||||
} | } | ||||
if (!EnableCondStoresVectorization && Legal->NumPredStores) { | if (!EnableCondStoresVectorization && Legal->NumPredStores) { | ||||
▲ Show 20 Lines • Show All 53 Lines • ▼ Show 20 Lines | if (UserVF != 0) { | ||||
assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two"); | assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two"); | ||||
DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n"); | DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n"); | ||||
Factor.Width = UserVF; | Factor.Width = UserVF; | ||||
return Factor; | return Factor; | ||||
} | } | ||||
float Cost = expectedCost(1); | float Cost = expectedCost(1); | ||||
const float ScalarCost = Cost; | |||||
unsigned Width = 1; | unsigned Width = 1; | ||||
DEBUG(dbgs() << "LV: Scalar loop costs: " << (int)Cost << ".\n"); | DEBUG(dbgs() << "LV: Scalar loop costs: " << (int)Cost << ".\n"); | ||||
if (ForceVectorization && VF > 1) { | |||||
Width = 2; | |||||
Cost = expectedCost(Width) / (float)Width; | |||||
} | |||||
for (unsigned i=2; i <= VF; i*=2) { | for (unsigned i=2; i <= VF; i*=2) { | ||||
// Notice that the vector loop needs to be executed less times, so | // Notice that the vector loop needs to be executed less times, so | ||||
// we need to divide the cost of the vector loops by the width of | // we need to divide the cost of the vector loops by the width of | ||||
// the vector elements. | // the vector elements. | ||||
float VectorCost = expectedCost(i) / (float)i; | float VectorCost = expectedCost(i) / (float)i; | ||||
DEBUG(dbgs() << "LV: Vector loop of width " << i << " costs: " << | DEBUG(dbgs() << "LV: Vector loop of width " << i << " costs: " << | ||||
(int)VectorCost << ".\n"); | (int)VectorCost << ".\n"); | ||||
if (VectorCost < Cost) { | if (VectorCost < Cost) { | ||||
Cost = VectorCost; | Cost = VectorCost; | ||||
Width = i; | Width = i; | ||||
} | } | ||||
} | } | ||||
DEBUG(if (ForceVectorization && Width > 1 && Cost >= ScalarCost) dbgs() | |||||
<< "LV: Vectorization seems to be not beneficial, " | |||||
<< "but was forced by a user.\n"); | |||||
DEBUG(dbgs() << "LV: Selecting VF: "<< Width << ".\n"); | DEBUG(dbgs() << "LV: Selecting VF: "<< Width << ".\n"); | ||||
Factor.Width = Width; | Factor.Width = Width; | ||||
Factor.Cost = Width * Cost; | Factor.Cost = Width * Cost; | ||||
return Factor; | return Factor; | ||||
} | } | ||||
unsigned LoopVectorizationCostModel::getWidestType() { | unsigned LoopVectorizationCostModel::getWidestType() { | ||||
unsigned MaxWidth = 8; | unsigned MaxWidth = 8; | ||||
▲ Show 20 Lines • Show All 749 Lines • Show Last 20 Lines |