Changeset View
Changeset View
Standalone View
Standalone View
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
- This file is larger than 256 KB, so syntax highlighting is disabled by default.
Show All 20 Lines | |||||
#include "llvm/ADT/DenseMap.h" | #include "llvm/ADT/DenseMap.h" | ||||
#include "llvm/ADT/DenseSet.h" | #include "llvm/ADT/DenseSet.h" | ||||
#include "llvm/ADT/MapVector.h" | #include "llvm/ADT/MapVector.h" | ||||
#include "llvm/ADT/None.h" | #include "llvm/ADT/None.h" | ||||
#include "llvm/ADT/Optional.h" | #include "llvm/ADT/Optional.h" | ||||
#include "llvm/ADT/PostOrderIterator.h" | #include "llvm/ADT/PostOrderIterator.h" | ||||
#include "llvm/ADT/STLExtras.h" | #include "llvm/ADT/STLExtras.h" | ||||
#include "llvm/ADT/SetVector.h" | #include "llvm/ADT/SetVector.h" | ||||
#include "llvm/ADT/SmallBitVector.h" | |||||
#include "llvm/ADT/SmallPtrSet.h" | #include "llvm/ADT/SmallPtrSet.h" | ||||
#include "llvm/ADT/SmallSet.h" | #include "llvm/ADT/SmallSet.h" | ||||
#include "llvm/ADT/SmallVector.h" | #include "llvm/ADT/SmallVector.h" | ||||
#include "llvm/ADT/Statistic.h" | #include "llvm/ADT/Statistic.h" | ||||
#include "llvm/ADT/iterator.h" | #include "llvm/ADT/iterator.h" | ||||
#include "llvm/ADT/iterator_range.h" | #include "llvm/ADT/iterator_range.h" | ||||
#include "llvm/Analysis/AliasAnalysis.h" | #include "llvm/Analysis/AliasAnalysis.h" | ||||
#include "llvm/Analysis/CodeMetrics.h" | #include "llvm/Analysis/CodeMetrics.h" | ||||
▲ Show 20 Lines • Show All 5,290 Lines • ▼ Show 20 Lines | if (Changed) { | ||||
R.optimizeGatherSequence(); | R.optimizeGatherSequence(); | ||||
LLVM_DEBUG(dbgs() << "SLP: vectorized \"" << F.getName() << "\"\n"); | LLVM_DEBUG(dbgs() << "SLP: vectorized \"" << F.getName() << "\"\n"); | ||||
LLVM_DEBUG(verifyFunction(F)); | LLVM_DEBUG(verifyFunction(F)); | ||||
} | } | ||||
return Changed; | return Changed; | ||||
} | } | ||||
bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R, | bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R, | ||||
unsigned VecRegSize) { | unsigned Idx) { | ||||
const unsigned ChainLen = Chain.size(); | const unsigned ChainLen = Chain.size(); | ||||
LLVM_DEBUG(dbgs() << "SLP: Analyzing a store chain of length " << ChainLen | LLVM_DEBUG(dbgs() << "SLP: Analyzing a store chain of length " << ChainLen | ||||
<< "\n"); | << "\n"); | ||||
const unsigned Sz = R.getVectorElementSize(Chain[0]); | const unsigned Sz = R.getVectorElementSize(Chain[0]); | ||||
const unsigned VF = VecRegSize / Sz; | const unsigned MinVF = R.getMinVecRegSize() / Sz; | ||||
unsigned VF = Chain.size(); | |||||
if (!isPowerOf2_32(Sz) || VF < 2) | if (!isPowerOf2_32(Sz) || !isPowerOf2_32(VF) || VF < 2 || VF < MinVF) | ||||
return false; | return false; | ||||
bool Changed = false; | LLVM_DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset " << Idx | ||||
// Look for profitable vectorizable trees at all offsets, starting at zero. | |||||
for (unsigned i = 0, e = ChainLen; i + VF <= e; ++i) { | |||||
ArrayRef<Value *> Operands = Chain.slice(i, VF); | |||||
// Check that a previous iteration of this loop did not delete the Value. | |||||
if (llvm::any_of(Operands, [&R](Value *V) { | |||||
auto *I = dyn_cast<Instruction>(V); | |||||
return I && R.isDeleted(I); | |||||
})) | |||||
continue; | |||||
LLVM_DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset " << i | |||||
<< "\n"); | << "\n"); | ||||
R.buildTree(Operands); | R.buildTree(Chain); | ||||
if (R.isTreeTinyAndNotFullyVectorizable()) | if (R.isTreeTinyAndNotFullyVectorizable()) | ||||
continue; | return false; | ||||
R.computeMinimumValueSizes(); | R.computeMinimumValueSizes(); | ||||
int Cost = R.getTreeCost(); | int Cost = R.getTreeCost(); | ||||
LLVM_DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF | LLVM_DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n"); | ||||
<< "\n"); | |||||
if (Cost < -SLPCostThreshold) { | if (Cost < -SLPCostThreshold) { | ||||
LLVM_DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n"); | LLVM_DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n"); | ||||
using namespace ore; | using namespace ore; | ||||
R.getORE()->emit(OptimizationRemark(SV_NAME, "StoresVectorized", | R.getORE()->emit(OptimizationRemark(SV_NAME, "StoresVectorized", | ||||
cast<StoreInst>(Chain[i])) | cast<StoreInst>(Chain[0])) | ||||
<< "Stores SLP vectorized with cost " << NV("Cost", Cost) | << "Stores SLP vectorized with cost " << NV("Cost", Cost) | ||||
<< " and with tree size " | << " and with tree size " | ||||
<< NV("TreeSize", R.getTreeSize())); | << NV("TreeSize", R.getTreeSize())); | ||||
R.vectorizeTree(); | R.vectorizeTree(); | ||||
return true; | |||||
// Move to the next bundle. | |||||
i += VF - 1; | |||||
Changed = true; | |||||
} | |||||
} | } | ||||
return Changed; | return false; | ||||
} | } | ||||
bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores, | bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores, | ||||
BoUpSLP &R) { | BoUpSLP &R) { | ||||
SetVector<StoreInst *> Heads; | |||||
SmallDenseSet<StoreInst *> Tails; | |||||
SmallDenseMap<StoreInst *, StoreInst *> ConsecutiveChain; | |||||
// We may run into multiple chains that merge into a single chain. We mark the | // We may run into multiple chains that merge into a single chain. We mark the | ||||
// stores that we vectorized so that we don't visit the same store twice. | // stores that we vectorized so that we don't visit the same store twice. | ||||
BoUpSLP::ValueSet VectorizedStores; | BoUpSLP::ValueSet VectorizedStores; | ||||
bool Changed = false; | bool Changed = false; | ||||
auto &&FindConsecutiveAccess = | int E = Stores.size(); | ||||
[this, &Stores, &Heads, &Tails, &ConsecutiveChain] (int K, int Idx) { | SmallBitVector Tails(E, false); | ||||
SmallVector<int, 16> ConsecutiveChain(E, E + 1); | |||||
auto &&FindConsecutiveAccess = [this, &Stores, &Tails, | |||||
&ConsecutiveChain](int K, int Idx) { | |||||
if (!isConsecutiveAccess(Stores[K], Stores[Idx], *DL, *SE)) | if (!isConsecutiveAccess(Stores[K], Stores[Idx], *DL, *SE)) | ||||
return false; | return false; | ||||
Tails.insert(Stores[Idx]); | Tails.set(Idx); | ||||
Heads.insert(Stores[K]); | ConsecutiveChain[K] = Idx; | ||||
ConsecutiveChain[Stores[K]] = Stores[Idx]; | |||||
return true; | return true; | ||||
}; | }; | ||||
// Do a quadratic search on all of the given stores in reverse order and find | // Do a quadratic search on all of the given stores in reverse order and find | ||||
// all of the pairs of stores that follow each other. | // all of the pairs of stores that follow each other. | ||||
int E = Stores.size(); | |||||
for (int Idx = E - 1; Idx >= 0; --Idx) { | for (int Idx = E - 1; Idx >= 0; --Idx) { | ||||
// If a store has multiple consecutive store candidates, search according | // If a store has multiple consecutive store candidates, search according | ||||
// to the sequence: Idx-1, Idx+1, Idx-2, Idx+2, ... | // to the sequence: Idx-1, Idx+1, Idx-2, Idx+2, ... | ||||
// This is because usually pairing with immediate succeeding or preceding | // This is because usually pairing with immediate succeeding or preceding | ||||
// candidate create the best chance to find slp vectorization opportunity. | // candidate create the best chance to find slp vectorization opportunity. | ||||
for (int Offset = 1, F = std::max(E - Idx, Idx + 1); Offset < F; ++Offset) | const int MaxLookDepth = std::min(E - Idx, 16); | ||||
for (int Offset = 1, F = std::max(MaxLookDepth, Idx + 1); Offset < F; | |||||
++Offset) | |||||
if ((Idx >= Offset && FindConsecutiveAccess(Idx - Offset, Idx)) || | if ((Idx >= Offset && FindConsecutiveAccess(Idx - Offset, Idx)) || | ||||
(Idx + Offset < E && FindConsecutiveAccess(Idx + Offset, Idx))) | (Idx + Offset < E && FindConsecutiveAccess(Idx + Offset, Idx))) | ||||
break; | break; | ||||
} | } | ||||
// For stores that start but don't end a link in the chain: | // For stores that start but don't end a link in the chain: | ||||
for (auto *SI : llvm::reverse(Heads)) { | for (int Cnt = E; Cnt > 0; --Cnt) { | ||||
if (Tails.count(SI)) | int I = Cnt - 1; | ||||
if (ConsecutiveChain[I] == E + 1 || Tails.test(I)) | |||||
continue; | continue; | ||||
// We found a store instr that starts a chain. Now follow the chain and try | // We found a store instr that starts a chain. Now follow the chain and try | ||||
// to vectorize it. | // to vectorize it. | ||||
BoUpSLP::ValueList Operands; | BoUpSLP::ValueList Operands; | ||||
StoreInst *I = SI; | |||||
// Collect the chain into a list. | // Collect the chain into a list. | ||||
while ((Tails.count(I) || Heads.count(I)) && !VectorizedStores.count(I)) { | while (I != E + 1 && !VectorizedStores.count(Stores[I])) { | ||||
Operands.push_back(I); | Operands.push_back(Stores[I]); | ||||
// Move to the next value in the chain. | // Move to the next value in the chain. | ||||
I = ConsecutiveChain[I]; | I = ConsecutiveChain[I]; | ||||
} | } | ||||
// If a vector register can't hold 1 element, we are done. | |||||
unsigned MaxVecRegSize = R.getMaxVecRegSize(); | |||||
unsigned EltSize = R.getVectorElementSize(Stores[0]); | |||||
if (MaxVecRegSize % EltSize != 0) | |||||
continue; | |||||
unsigned MaxElts = MaxVecRegSize / EltSize; | |||||
// FIXME: Is division-by-2 the correct step? Should we assert that the | // FIXME: Is division-by-2 the correct step? Should we assert that the | ||||
// register size is a power-of-2? | // register size is a power-of-2? | ||||
for (unsigned Size = R.getMaxVecRegSize(); Size >= R.getMinVecRegSize(); | unsigned StartIdx = 0; | ||||
Size /= 2) { | for (unsigned Size = llvm::PowerOf2Ceil(MaxElts); Size >= 2; Size /= 2) { | ||||
if (vectorizeStoreChain(Operands, R, Size)) { | for (unsigned Cnt = StartIdx, E = Operands.size(); Cnt + Size <= E;) { | ||||
ArrayRef<Value *> Slice = makeArrayRef(Operands).slice(Cnt, Size); | |||||
if (!VectorizedStores.count(Slice.front()) && | |||||
!VectorizedStores.count(Slice.back()) && | |||||
vectorizeStoreChain(Slice, R, Cnt)) { | |||||
// Mark the vectorized stores so that we don't vectorize them again. | // Mark the vectorized stores so that we don't vectorize them again. | ||||
VectorizedStores.insert(Operands.begin(), Operands.end()); | VectorizedStores.insert(Slice.begin(), Slice.end()); | ||||
Changed = true; | Changed = true; | ||||
break; | // If we vectorized initial block, no need to try to vectorize it | ||||
// again. | |||||
if (Cnt == StartIdx) | |||||
StartIdx += Size; | |||||
Cnt += Size; | |||||
continue; | |||||
} | |||||
++Cnt; | |||||
} | } | ||||
// Check if the whole array was vectorized already - exit. | |||||
if (StartIdx >= Operands.size()) | |||||
break; | |||||
} | } | ||||
} | } | ||||
return Changed; | return Changed; | ||||
} | } | ||||
void SLPVectorizerPass::collectSeedInstructions(BasicBlock *BB) { | void SLPVectorizerPass::collectSeedInstructions(BasicBlock *BB) { | ||||
// Initialize the collections. We will make a single pass over the block. | // Initialize the collections. We will make a single pass over the block. | ||||
▲ Show 20 Lines • Show All 1,651 Lines • ▼ Show 20 Lines | bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) { | ||||
for (StoreListMap::iterator it = Stores.begin(), e = Stores.end(); it != e; | for (StoreListMap::iterator it = Stores.begin(), e = Stores.end(); it != e; | ||||
++it) { | ++it) { | ||||
if (it->second.size() < 2) | if (it->second.size() < 2) | ||||
continue; | continue; | ||||
LLVM_DEBUG(dbgs() << "SLP: Analyzing a store chain of length " | LLVM_DEBUG(dbgs() << "SLP: Analyzing a store chain of length " | ||||
<< it->second.size() << ".\n"); | << it->second.size() << ".\n"); | ||||
// Process the stores in chunks of 16. | Changed |= vectorizeStores(it->second, R); | ||||
// TODO: The limit of 16 inhibits greater vectorization factors. | |||||
// For example, AVX2 supports v32i8. Increasing this limit, however, | |||||
// may cause a significant compile-time increase. | |||||
for (unsigned CI = 0, CE = it->second.size(); CI < CE; CI += 16) { | |||||
unsigned Len = std::min<unsigned>(CE - CI, 16); | |||||
Changed |= vectorizeStores(makeArrayRef(&it->second[CI], Len), R); | |||||
} | |||||
} | } | ||||
return Changed; | return Changed; | ||||
} | } | ||||
char SLPVectorizer::ID = 0; | char SLPVectorizer::ID = 0; | ||||
static const char lv_name[] = "SLP Vectorizer"; | static const char lv_name[] = "SLP Vectorizer"; | ||||
Show All 11 Lines |