Changeset View
Changeset View
Standalone View
Standalone View
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
- This file is larger than 256 KB, so syntax highlighting is disabled by default.
Show First 20 Lines • Show All 655 Lines • ▼ Show 20 Lines | |||||
namespace llvm { | namespace llvm { | ||||
static void inversePermutation(ArrayRef<unsigned> Indices, | static void inversePermutation(ArrayRef<unsigned> Indices, | ||||
SmallVectorImpl<int> &Mask) { | SmallVectorImpl<int> &Mask) { | ||||
Mask.clear(); | Mask.clear(); | ||||
const unsigned E = Indices.size(); | const unsigned E = Indices.size(); | ||||
Mask.resize(E, UndefMaskElem); | Mask.resize(E, UndefMaskElem); | ||||
for (unsigned I = 0; I < E; ++I) | for (unsigned I = 0; I < E; ++I) | ||||
if (Indices[I] != E) | |||||
Mask[Indices[I]] = I; | Mask[Indices[I]] = I; | ||||
} | } | ||||
/// \returns inserting index of InsertElement or InsertValue instruction, | /// \returns inserting index of InsertElement or InsertValue instruction, | ||||
/// using Offset as base offset for index. | /// using Offset as base offset for index. | ||||
static Optional<int> getInsertIndex(Value *InsertInst, unsigned Offset) { | static Optional<int> getInsertIndex(Value *InsertInst, unsigned Offset) { | ||||
int Index = Offset; | int Index = Offset; | ||||
if (auto *IE = dyn_cast<InsertElementInst>(InsertInst)) { | if (auto *IE = dyn_cast<InsertElementInst>(InsertInst)) { | ||||
if (auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2))) { | if (auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2))) { | ||||
▲ Show 20 Lines • Show All 1,247 Lines • ▼ Show 20 Lines | struct TreeEntry { | ||||
/// A vector of scalars. | /// A vector of scalars. | ||||
ValueList Scalars; | ValueList Scalars; | ||||
/// The Scalars are vectorized into this value. It is initialized to Null. | /// The Scalars are vectorized into this value. It is initialized to Null. | ||||
Value *VectorizedValue = nullptr; | Value *VectorizedValue = nullptr; | ||||
/// Do we need to gather this sequence or vectorize it | /// Do we need to gather this sequence or vectorize it | ||||
/// (either with vector instruction or with scatter/gather | /// (either with vector instruction or with scatter/gather | ||||
/// intrinsics for store/load)? | /// intrinsics for store/load or split entry block)? | ||||
enum EntryState { Vectorize, ScatterVectorize, NeedToGather }; | enum EntryState { Vectorize, ScatterVectorize, NeedToGather, SplitShuffle }; | ||||
EntryState State; | EntryState State; | ||||
/// Does this sequence require some shuffling? | /// Does this sequence require some shuffling? | ||||
SmallVector<int, 4> ReuseShuffleIndices; | SmallVector<int, 4> ReuseShuffleIndices; | ||||
/// Does this entry require reordering? | /// Does this entry require reordering? | ||||
SmallVector<unsigned, 4> ReorderIndices; | SmallVector<unsigned, 4> ReorderIndices; | ||||
▲ Show 20 Lines • Show All 156 Lines • ▼ Show 20 Lines | LLVM_DUMP_METHOD void dump() const { | ||||
dbgs() << "Vectorize\n"; | dbgs() << "Vectorize\n"; | ||||
break; | break; | ||||
case ScatterVectorize: | case ScatterVectorize: | ||||
dbgs() << "ScatterVectorize\n"; | dbgs() << "ScatterVectorize\n"; | ||||
break; | break; | ||||
case NeedToGather: | case NeedToGather: | ||||
dbgs() << "NeedToGather\n"; | dbgs() << "NeedToGather\n"; | ||||
break; | break; | ||||
case SplitShuffle: | |||||
dbgs() << "SplitShuffle\n"; | |||||
break; | |||||
} | } | ||||
dbgs() << "MainOp: "; | dbgs() << "MainOp: "; | ||||
if (MainOp) | if (MainOp) | ||||
dbgs() << *MainOp << "\n"; | dbgs() << *MainOp << "\n"; | ||||
else | else | ||||
dbgs() << "NULL\n"; | dbgs() << "NULL\n"; | ||||
dbgs() << "AltOp: "; | dbgs() << "AltOp: "; | ||||
if (AltOp) | if (AltOp) | ||||
▲ Show 20 Lines • Show All 52 Lines • ▼ Show 20 Lines | #endif | ||||
TreeEntry *newTreeEntry(ArrayRef<Value *> VL, | TreeEntry *newTreeEntry(ArrayRef<Value *> VL, | ||||
TreeEntry::EntryState EntryState, | TreeEntry::EntryState EntryState, | ||||
Optional<ScheduleData *> Bundle, | Optional<ScheduleData *> Bundle, | ||||
const InstructionsState &S, | const InstructionsState &S, | ||||
const EdgeInfo &UserTreeIdx, | const EdgeInfo &UserTreeIdx, | ||||
ArrayRef<int> ReuseShuffleIndices = None, | ArrayRef<int> ReuseShuffleIndices = None, | ||||
ArrayRef<unsigned> ReorderIndices = None) { | ArrayRef<unsigned> ReorderIndices = None) { | ||||
assert(((!Bundle && EntryState == TreeEntry::NeedToGather) || | assert(((!Bundle && (EntryState == TreeEntry::NeedToGather || | ||||
EntryState == TreeEntry::SplitShuffle)) || | |||||
(Bundle && EntryState != TreeEntry::NeedToGather)) && | (Bundle && EntryState != TreeEntry::NeedToGather)) && | ||||
"Need to vectorize gather entry?"); | "Need to vectorize gather entry?"); | ||||
VectorizableTree.push_back(std::make_unique<TreeEntry>(VectorizableTree)); | VectorizableTree.push_back(std::make_unique<TreeEntry>(VectorizableTree)); | ||||
TreeEntry *Last = VectorizableTree.back().get(); | TreeEntry *Last = VectorizableTree.back().get(); | ||||
Last->Idx = VectorizableTree.size() - 1; | Last->Idx = VectorizableTree.size() - 1; | ||||
Last->State = EntryState; | Last->State = EntryState; | ||||
Last->ReuseShuffleIndices.append(ReuseShuffleIndices.begin(), | Last->ReuseShuffleIndices.append(ReuseShuffleIndices.begin(), | ||||
ReuseShuffleIndices.end()); | ReuseShuffleIndices.end()); | ||||
if (ReorderIndices.empty()) { | if (ReorderIndices.empty()) { | ||||
Last->Scalars.assign(VL.begin(), VL.end()); | Last->Scalars.assign(VL.begin(), VL.end()); | ||||
Last->setOperations(S); | Last->setOperations(S); | ||||
} else { | } else { | ||||
// Reorder scalars and build final mask. | // Reorder scalars and build final mask. | ||||
Last->Scalars.assign(VL.size(), nullptr); | Last->Scalars.assign(VL.size(), nullptr); | ||||
transform(ReorderIndices, Last->Scalars.begin(), | transform(ReorderIndices, Last->Scalars.begin(), | ||||
[VL](unsigned Idx) -> Value * { | [VL](unsigned Idx) -> Value * { | ||||
if (Idx >= VL.size()) | if (Idx >= VL.size()) | ||||
return UndefValue::get(VL.front()->getType()); | return UndefValue::get(VL.front()->getType()); | ||||
return VL[Idx]; | return VL[Idx]; | ||||
}); | }); | ||||
InstructionsState S = getSameOpcode(Last->Scalars); | InstructionsState S = getSameOpcode(Last->Scalars); | ||||
Last->setOperations(S); | Last->setOperations(S); | ||||
Last->ReorderIndices.append(ReorderIndices.begin(), ReorderIndices.end()); | Last->ReorderIndices.append(ReorderIndices.begin(), ReorderIndices.end()); | ||||
} | } | ||||
if (Last->State != TreeEntry::NeedToGather) { | if (Last->State == TreeEntry::Vectorize || | ||||
Last->State == TreeEntry::ScatterVectorize) { | |||||
for (Value *V : VL) { | for (Value *V : VL) { | ||||
assert(!getTreeEntry(V) && "Scalar already in tree!"); | assert(!getTreeEntry(V) && "Scalar already in tree!"); | ||||
ScalarToTreeEntry[V] = Last; | ScalarToTreeEntry[V] = Last; | ||||
} | } | ||||
// Update the scheduler bundle to point to this TreeEntry. | // Update the scheduler bundle to point to this TreeEntry. | ||||
unsigned Lane = 0; | unsigned Lane = 0; | ||||
for (ScheduleData *BundleMember = Bundle.getValue(); BundleMember; | for (ScheduleData *BundleMember = Bundle.getValue(); BundleMember; | ||||
BundleMember = BundleMember->NextInBundle) { | BundleMember = BundleMember->NextInBundle) { | ||||
BundleMember->TE = Last; | BundleMember->TE = Last; | ||||
BundleMember->Lane = Lane; | BundleMember->Lane = Lane; | ||||
++Lane; | ++Lane; | ||||
} | } | ||||
assert((!Bundle.getValue() || Lane == VL.size()) && | assert((!Bundle.getValue() || Lane == VL.size()) && | ||||
"Bundle and VL out of sync"); | "Bundle and VL out of sync"); | ||||
} else { | } else if (Last->State == TreeEntry::NeedToGather) { | ||||
MustGather.insert(VL.begin(), VL.end()); | MustGather.insert(VL.begin(), VL.end()); | ||||
} | } | ||||
if (UserTreeIdx.UserTE) | if (UserTreeIdx.UserTE) | ||||
Last->UserTreeIndices.push_back(UserTreeIdx); | Last->UserTreeIndices.push_back(UserTreeIdx); | ||||
return Last; | return Last; | ||||
} | } | ||||
▲ Show 20 Lines • Show All 910 Lines • ▼ Show 20 Lines | for (std::unique_ptr<TreeEntry> &TE : VectorizableTree) { | ||||
if (TE->ReuseShuffleIndices.size() == VF) { | if (TE->ReuseShuffleIndices.size() == VF) { | ||||
// Need to reorder the reuses masks of the operands with smaller VF to | // Need to reorder the reuses masks of the operands with smaller VF to | ||||
// be able to find the match between the graph nodes and scalar | // be able to find the match between the graph nodes and scalar | ||||
// operands of the given node during vectorization/cost estimation. | // operands of the given node during vectorization/cost estimation. | ||||
assert(all_of(TE->UserTreeIndices, | assert(all_of(TE->UserTreeIndices, | ||||
[VF, &TE](const EdgeInfo &EI) { | [VF, &TE](const EdgeInfo &EI) { | ||||
return EI.UserTE->Scalars.size() == VF || | return EI.UserTE->Scalars.size() == VF || | ||||
EI.UserTE->Scalars.size() == | EI.UserTE->Scalars.size() == | ||||
TE->Scalars.size(); | TE->Scalars.size() || | ||||
(EI.UserTE->State == TreeEntry::SplitShuffle && | |||||
EI.UserTE->Scalars.size() == 2 * VF); | |||||
}) && | }) && | ||||
"All users must be of VF size."); | "All users must be of VF size."); | ||||
// Update ordering of the operands with the smaller VF than the given | // Update ordering of the operands with the smaller VF than the given | ||||
// one. | // one. | ||||
reorderReuses(TE->ReuseShuffleIndices, Mask); | reorderReuses(TE->ReuseShuffleIndices, Mask); | ||||
continue; | |||||
} | } | ||||
if (TE->State == TreeEntry::SplitShuffle && | |||||
TE->Scalars.size() == VF * 2) { | |||||
// Build a full mask out of smaller mask by just duplicating it. | |||||
assert(!TE->ReorderIndices.empty() && | |||||
"Expected reordered indeces in split shuffle node."); | |||||
TE->reorderOperands(Mask); | |||||
unsigned Sz = 2 * VF; | |||||
SmallVector<int> SplitMask(Sz); | |||||
copy(Mask, SplitMask.begin()); | |||||
transform(Mask, std::next(SplitMask.begin(), VF), | |||||
[VF](int Idx) { return Idx + VF; }); | |||||
reorderScalars(TE->Scalars, SplitMask); | |||||
SmallVector<unsigned> PrevOrder(Sz, Sz); | |||||
TE->ReorderIndices.swap(PrevOrder); | |||||
for (unsigned I = 0; I < Sz; ++I) { | |||||
if (SplitMask[I] == UndefMaskElem) | |||||
continue; | continue; | ||||
TE->ReorderIndices[SplitMask[I]] = PrevOrder[I]; | |||||
} | |||||
} | } | ||||
if (TE->State == TreeEntry::Vectorize && | continue; | ||||
} | |||||
if (TE->State == TreeEntry::SplitShuffle) { | |||||
reorderOrder(TE->ReorderIndices, Mask); | |||||
if (TE->ReorderIndices.empty()) { | |||||
TE->ReorderIndices.assign(Mask.size(), 0); | |||||
std::iota(TE->ReorderIndices.begin(), TE->ReorderIndices.end(), 0); | |||||
} | |||||
} else if (TE->State == TreeEntry::Vectorize && | |||||
isa<ExtractElementInst, ExtractValueInst, LoadInst, StoreInst, | isa<ExtractElementInst, ExtractValueInst, LoadInst, StoreInst, | ||||
InsertElementInst>(TE->getMainOp()) && | InsertElementInst>(TE->getMainOp()) && | ||||
!TE->isAltShuffle()) { | !TE->isAltShuffle()) { | ||||
// Build correct orders for extract{element,value}, loads and | // Build correct orders for extract{element,value}, loads and | ||||
// stores. | // stores. | ||||
reorderOrder(TE->ReorderIndices, Mask); | reorderOrder(TE->ReorderIndices, Mask); | ||||
if (isa<InsertElementInst, StoreInst>(TE->getMainOp())) | if (isa<InsertElementInst, StoreInst>(TE->getMainOp())) | ||||
TE->reorderOperands(Mask); | TE->reorderOperands(Mask); | ||||
} else { | } else { | ||||
// Reorder the node and its operands. | // Reorder the node and its operands. | ||||
TE->reorderOperands(Mask); | TE->reorderOperands(Mask); | ||||
▲ Show 20 Lines • Show All 203 Lines • ▼ Show 20 Lines | for (const auto &Data : Users) { | ||||
continue; | continue; | ||||
assert((BestOrder.size() == TE->ReorderIndices.size() || | assert((BestOrder.size() == TE->ReorderIndices.size() || | ||||
TE->ReorderIndices.empty()) && | TE->ReorderIndices.empty()) && | ||||
"Non-matching sizes of user/operand entries."); | "Non-matching sizes of user/operand entries."); | ||||
reorderOrder(TE->ReorderIndices, Mask); | reorderOrder(TE->ReorderIndices, Mask); | ||||
} | } | ||||
// For gathers just need to reorder its scalars. | // For gathers just need to reorder its scalars. | ||||
for (TreeEntry *Gather : GatherOps) { | for (TreeEntry *Gather : GatherOps) { | ||||
assert(Gather->ReorderIndices.empty() && | assert((Gather->State == TreeEntry::SplitShuffle || | ||||
Gather->ReorderIndices.empty()) && | |||||
"Unexpected reordering of gathers."); | "Unexpected reordering of gathers."); | ||||
if (!Gather->ReuseShuffleIndices.empty()) { | if (!Gather->ReuseShuffleIndices.empty()) { | ||||
// Just reorder reuses indices. | // Just reorder reuses indices. | ||||
reorderReuses(Gather->ReuseShuffleIndices, Mask); | reorderReuses(Gather->ReuseShuffleIndices, Mask); | ||||
continue; | continue; | ||||
} | } | ||||
if (Gather->State == TreeEntry::SplitShuffle) { | |||||
reorderOrder(Gather->ReorderIndices, Mask); | |||||
if (Gather->ReorderIndices.empty()) { | |||||
Gather->ReorderIndices.assign(Mask.size(), 0); | |||||
std::iota(Gather->ReorderIndices.begin(), | |||||
Gather->ReorderIndices.end(), 0); | |||||
} | |||||
} else { | |||||
reorderScalars(Gather->Scalars, Mask); | reorderScalars(Gather->Scalars, Mask); | ||||
} | |||||
OrderedEntries.remove(Gather); | OrderedEntries.remove(Gather); | ||||
} | } | ||||
// Reorder operands of the user node and set the ordering for the user | // Reorder operands of the user node and set the ordering for the user | ||||
// node itself. | // node itself. | ||||
if (Data.first->State != TreeEntry::Vectorize || | if (Data.first->State != TreeEntry::SplitShuffle && | ||||
(Data.first->State != TreeEntry::Vectorize || | |||||
!isa<ExtractElementInst, ExtractValueInst, LoadInst>( | !isa<ExtractElementInst, ExtractValueInst, LoadInst>( | ||||
Data.first->getMainOp()) || | Data.first->getMainOp()) || | ||||
Data.first->isAltShuffle()) | Data.first->isAltShuffle())) | ||||
Data.first->reorderOperands(Mask); | Data.first->reorderOperands(Mask); | ||||
if (!isa<InsertElementInst, StoreInst>(Data.first->getMainOp()) || | if (Data.first->State != TreeEntry::SplitShuffle && | ||||
Data.first->isAltShuffle()) { | (!isa<InsertElementInst, StoreInst>(Data.first->getMainOp()) || | ||||
Data.first->isAltShuffle())) { | |||||
reorderScalars(Data.first->Scalars, Mask); | reorderScalars(Data.first->Scalars, Mask); | ||||
reorderOrder(Data.first->ReorderIndices, MaskOrder); | reorderOrder(Data.first->ReorderIndices, MaskOrder); | ||||
if (Data.first->ReuseShuffleIndices.empty() && | if (Data.first->ReuseShuffleIndices.empty() && | ||||
!Data.first->ReorderIndices.empty() && | !Data.first->ReorderIndices.empty() && | ||||
!Data.first->isAltShuffle()) { | !Data.first->isAltShuffle()) { | ||||
// Insert user node to the list to try to sink reordering deeper in | // Insert user node to the list to try to sink reordering deeper in | ||||
// the graph. | // the graph. | ||||
OrderedEntries.insert(Data.first); | OrderedEntries.insert(Data.first); | ||||
} | } | ||||
} else if (Data.first->State == TreeEntry::SplitShuffle) { | |||||
// Build a full mask out of smaller mask by just duplicating it. | |||||
unsigned VF = Mask.size(); | |||||
unsigned Sz = 2 * VF; | |||||
assert(Data.first->Scalars.size() == Sz && | |||||
"Scalars size must be twice of operands size."); | |||||
assert(!Data.first->ReorderIndices.empty() && | |||||
"Expected reordered indeces in spit shuffle node."); | |||||
Data.first->reorderOperands(Mask); | |||||
SmallVector<int> SplitMask(Sz); | |||||
copy(Mask, SplitMask.begin()); | |||||
transform(Mask, std::next(SplitMask.begin(), VF), | |||||
[VF](int Idx) { return Idx + VF; }); | |||||
reorderScalars(Data.first->Scalars, SplitMask); | |||||
SmallVector<unsigned> PrevOrder(Sz, Sz); | |||||
Data.first->ReorderIndices.swap(PrevOrder); | |||||
for (unsigned I = 0; I < Sz; ++I) { | |||||
if (SplitMask[I] == UndefMaskElem) | |||||
continue; | |||||
Data.first->ReorderIndices[SplitMask[I]] = PrevOrder[I]; | |||||
} | |||||
} else { | } else { | ||||
reorderOrder(Data.first->ReorderIndices, Mask); | reorderOrder(Data.first->ReorderIndices, Mask); | ||||
} | } | ||||
} | } | ||||
} | } | ||||
// If the reordering is unnecessary, just remove the reorder. | // If the reordering is unnecessary, just remove the reorder. | ||||
if (IgnoreReorder && !VectorizableTree.front()->ReorderIndices.empty() && | if (IgnoreReorder && !VectorizableTree.front()->ReorderIndices.empty() && | ||||
VectorizableTree.front()->ReuseShuffleIndices.empty()) | VectorizableTree.front()->ReuseShuffleIndices.empty()) | ||||
VectorizableTree.front()->ReorderIndices.clear(); | VectorizableTree.front()->ReorderIndices.clear(); | ||||
} | } | ||||
void BoUpSLP::buildExternalUses( | void BoUpSLP::buildExternalUses( | ||||
const ExtraValueToDebugLocsMap &ExternallyUsedValues) { | const ExtraValueToDebugLocsMap &ExternallyUsedValues) { | ||||
// Collect the values that we need to extract from the tree. | // Collect the values that we need to extract from the tree. | ||||
for (auto &TEPtr : VectorizableTree) { | for (auto &TEPtr : VectorizableTree) { | ||||
TreeEntry *Entry = TEPtr.get(); | TreeEntry *Entry = TEPtr.get(); | ||||
// No need to handle users of gathered values. | // No need to handle users of gathered values. | ||||
if (Entry->State == TreeEntry::NeedToGather) | if (Entry->State == TreeEntry::NeedToGather || | ||||
Entry->State == TreeEntry::SplitShuffle) | |||||
continue; | continue; | ||||
// For each lane: | // For each lane: | ||||
for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) { | for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) { | ||||
Value *Scalar = Entry->Scalars[Lane]; | Value *Scalar = Entry->Scalars[Lane]; | ||||
int FoundLane = Entry->findLaneForValue(Scalar); | int FoundLane = Entry->findLaneForValue(Scalar); | ||||
// Check if the scalar is externally used as an extra arg. | // Check if the scalar is externally used as an extra arg. | ||||
▲ Show 20 Lines • Show All 110 Lines • ▼ Show 20 Lines | if (llvm::sortPtrAccesses(PointerOps, ScalarTy, DL, SE, Order)) { | ||||
if (TTI.isLegalMaskedGather(FixedVectorType::get(ScalarTy, VL.size()), | if (TTI.isLegalMaskedGather(FixedVectorType::get(ScalarTy, VL.size()), | ||||
CommonAlignment)) | CommonAlignment)) | ||||
return LoadsState::ScatterVectorize; | return LoadsState::ScatterVectorize; | ||||
} | } | ||||
return LoadsState::Gather; | return LoadsState::Gather; | ||||
} | } | ||||
/// Generates key/subkey pair for the given value to provide effective sorting | |||||
/// of the values and better detection of the vectorizable values sequences. The | |||||
/// keys/subkeys can be used for better sorting of the values themselves (keys) | |||||
/// and in values subgroups (subkeys). | |||||
static std::pair<size_t, size_t> generateKeySubkey( | |||||
Value *V, const TargetLibraryInfo *TLI, | |||||
function_ref<hash_code(size_t, LoadInst *)> LoadsSubkeyGenerator) { | |||||
hash_code Key = hash_value(V->getValueID() + 1); | |||||
hash_code SubKey = hash_value(0); | |||||
// Sort the loads by the distance between the pointers. | |||||
if (auto *LI = dyn_cast<LoadInst>(V)) { | |||||
Key = hash_combine(hash_value(LI->getParent()), Key); | |||||
if (LI->isSimple()) | |||||
SubKey = hash_value(LoadsSubkeyGenerator(Key, LI)); | |||||
else | |||||
SubKey = hash_value(LI); | |||||
} else if (isVectorLikeInstWithConstOps(V)) { | |||||
// Sort extracts by the vector operands. | |||||
if (isa<ExtractElementInst, UndefValue>(V)) | |||||
Key = hash_value(Value::UndefValueVal + 1); | |||||
if (auto *EI = dyn_cast<ExtractElementInst>(V)) { | |||||
if (!isUndefVector(EI->getVectorOperand()) && | |||||
!isa<UndefValue>(EI->getIndexOperand())) | |||||
SubKey = hash_value(EI->getVectorOperand()); | |||||
} | |||||
} else if (auto *I = dyn_cast<Instruction>(V)) { | |||||
// Sort other instructions just by the opcodes except for CMPInst. | |||||
// For CMP also sort by the predicate kind. | |||||
if ((isa<BinaryOperator>(I) || isa<CastInst>(I)) && | |||||
isValidForAlternation(I->getOpcode())) { | |||||
Key = hash_value(0); | |||||
SubKey = hash_combine( | |||||
hash_value(isa<BinaryOperator>(I) ? 1 : 0), | |||||
hash_value(isa<BinaryOperator>(I) | |||||
? I->getType() | |||||
: cast<CastInst>(I)->getOperand(0)->getType())); | |||||
} else if (auto *CI = dyn_cast<CmpInst>(I)) { | |||||
CmpInst::Predicate Pred = CI->getPredicate(); | |||||
CmpInst::Predicate SwapPred = CmpInst::getSwappedPredicate(Pred); | |||||
SubKey = hash_combine(hash_value(I->getOpcode()), | |||||
hash_value(Pred > SwapPred ? Pred : SwapPred), | |||||
hash_value(Pred > SwapPred ? SwapPred : Pred), | |||||
hash_value(CI->getOperand(0)->getType())); | |||||
} else if (auto *Call = dyn_cast<CallInst>(I)) { | |||||
Intrinsic::ID ID = getVectorIntrinsicIDForCall(Call, TLI); | |||||
if (isTriviallyVectorizable(ID)) | |||||
SubKey = hash_combine(hash_value(I->getOpcode()), hash_value(ID)); | |||||
else if (!VFDatabase(*Call).getMappings(*Call).empty()) | |||||
SubKey = hash_combine(hash_value(I->getOpcode()), | |||||
hash_value(Call->getCalledFunction())); | |||||
else | |||||
SubKey = hash_combine(hash_value(I->getOpcode()), hash_value(Call)); | |||||
for (const CallBase::BundleOpInfo &Op : Call->bundle_op_infos()) | |||||
SubKey = hash_combine(hash_value(Op.Begin), hash_value(Op.End), | |||||
hash_value(Op.Tag), SubKey); | |||||
} else if (auto *Gep = dyn_cast<GetElementPtrInst>(I)) { | |||||
if (Gep->getNumOperands() == 2 && isa<ConstantInt>(Gep->getOperand(1))) | |||||
SubKey = hash_value(Gep->getPointerOperand()); | |||||
else | |||||
SubKey = hash_value(Gep); | |||||
} else if (BinaryOperator::isIntDivRem(I->getOpcode()) && | |||||
!isa<ConstantInt>(I->getOperand(1))) { | |||||
// Do not try to vectorize instructions with potentially high cost. | |||||
SubKey = hash_value(I); | |||||
} else { | |||||
SubKey = hash_value(I->getOpcode()); | |||||
} | |||||
Key = hash_combine(hash_value(I->getParent()), Key); | |||||
} | |||||
return std::make_pair(Key, SubKey); | |||||
} | |||||
void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, | void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, | ||||
const EdgeInfo &UserTreeIdx) { | const EdgeInfo &UserTreeIdx) { | ||||
assert((allConstant(VL) || allSameType(VL)) && "Invalid types!"); | assert((allConstant(VL) || allSameType(VL)) && "Invalid types!"); | ||||
SmallVector<int> ReuseShuffleIndicies; | SmallVector<int> ReuseShuffleIndicies; | ||||
SmallVector<Value *> UniqueValues; | SmallVector<Value *> UniqueValues; | ||||
auto &&TryToFindDuplicates = [&VL, &ReuseShuffleIndicies, &UniqueValues, | auto &&TryToFindDuplicates = [&VL, &ReuseShuffleIndicies, &UniqueValues, | ||||
&UserTreeIdx, | &UserTreeIdx, | ||||
▲ Show 20 Lines • Show All 66 Lines • ▼ Show 20 Lines | if (SI->getValueOperand()->getType()->isVectorTy()) { | ||||
LLVM_DEBUG(dbgs() << "SLP: Gathering due to store vector type.\n"); | LLVM_DEBUG(dbgs() << "SLP: Gathering due to store vector type.\n"); | ||||
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); | newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); | ||||
return; | return; | ||||
} | } | ||||
// If all of the operands are identical or constant we have a simple solution. | // If all of the operands are identical or constant we have a simple solution. | ||||
// If we deal with insert/extract instructions, they all must have constant | // If we deal with insert/extract instructions, they all must have constant | ||||
// indices, otherwise we should gather them, not try to vectorize. | // indices, otherwise we should gather them, not try to vectorize. | ||||
if (allConstant(VL) || isSplat(VL) || !allSameBlock(VL) || !S.getOpcode() || | bool IsSplat = isSplat(VL); | ||||
bool AllConstant = !IsSplat && allConstant(VL); | |||||
bool AllSameBlock = !AllConstant && allSameBlock(VL); | |||||
if (IsSplat || AllConstant || !AllSameBlock || !S.getOpcode() || | |||||
(isa<InsertElementInst, ExtractValueInst, ExtractElementInst>(S.MainOp) && | (isa<InsertElementInst, ExtractValueInst, ExtractElementInst>(S.MainOp) && | ||||
!all_of(VL, isVectorLikeInstWithConstOps))) { | !all_of(VL, isVectorLikeInstWithConstOps))) { | ||||
// Try to build split shuffle if possible. | |||||
SmallPtrSet<Value *, 4> UniqueVals; | |||||
unsigned VF = VL.size() / 2; | |||||
if (!IsSplat && !AllConstant && (!AllSameBlock || !S.getOpcode()) && | |||||
VL.size() > 2 && count_if(VL, [&UniqueVals](Value *V) { | |||||
return UniqueVals.insert(V).second; | |||||
}) > 2) { | |||||
MapVector<size_t, MapVector<size_t, SmallVector<unsigned>>> | |||||
SelectedValues; | |||||
SmallVector<unsigned> Vectorized; | |||||
unsigned InstCnt = 0; | |||||
std::pair<size_t, size_t> BestKeySubkey; | |||||
SmallDenseMap<unsigned, std::pair<unsigned, int>, 4> BestLoad; | |||||
// Group values by the most optimal kinds/subkinds. | |||||
for (unsigned Idx = 0, E = VL.size(); Idx < E; ++Idx) { | |||||
Value *V = VL[Idx]; | |||||
if (ScalarToTreeEntry.count(V) || MustGather.contains(V)) { | |||||
Vectorized.push_back(Idx); | |||||
continue; | |||||
} | |||||
std::pair<size_t, size_t> KeySubkey = generateKeySubkey( | |||||
V, TLI, | |||||
[&SelectedValues, &BestLoad, VL, DL = DL, SE = SE, | |||||
Lint: Pre-merge checks: clang-format: please reformat the code
```
- [&SelectedValues, &BestLoad, VL, DL =… | |||||
VF, Idx](size_t Key, LoadInst *LI) { | |||||
for (const auto &LoadData : SelectedValues[Key]) { | |||||
auto &Data = BestLoad[LoadData.second.front()]; | |||||
auto *RLI = cast<LoadInst>(VL[Data.first]); | |||||
Optional<int> Dist = getPointersDiff( | |||||
RLI->getType(), RLI->getPointerOperand(), LI->getType(), | |||||
LI->getPointerOperand(), *DL, *SE, /*StrictCheck=*/true); | |||||
if (Dist && static_cast<unsigned>(std::abs(*Dist)) < VF) { | |||||
if (Data.second > *Dist) { | |||||
Data.first = Idx; | |||||
Data.second = *Dist; | |||||
} | |||||
return hash_value(cast<LoadInst>(VL[LoadData.second.front()]) | |||||
->getPointerOperand()); | |||||
} | |||||
} | |||||
BestLoad.try_emplace(Idx, std::make_pair(Idx, 0)); | |||||
return hash_value(LI->getPointerOperand()); | |||||
}); | |||||
SmallVector<unsigned> &Items = | |||||
SelectedValues[KeySubkey.first][KeySubkey.second]; | |||||
Items.push_back(Idx); | |||||
if (Items.size() > InstCnt && any_of(Items, [VL](unsigned Idx) { | |||||
if (auto *EI = dyn_cast<ExtractElementInst>(VL[Idx])) { | |||||
if (auto *FTy = | |||||
dyn_cast<FixedVectorType>(EI->getVectorOperandType())) | |||||
return FTy->getNumElements() < VL.size(); | |||||
return false; | |||||
} | |||||
return isa<Instruction>(VL[Idx]); | |||||
})) { | |||||
InstCnt = Items.size(); | |||||
BestKeySubkey = KeySubkey; | |||||
} | |||||
} | |||||
// Check number of unique elements. | |||||
UniqueVals.clear(); | |||||
unsigned UniqueValsCnt = count_if( | |||||
Lint: Pre-merge checks clang-format: please reformat the code - unsigned UniqueValsCnt = count_if( - SelectedValues[BestKeySubkey.first][BestKeySubkey.second], - [&UniqueVals, VL](unsigned Idx) { - return isa<UndefValue>(VL[Idx]) || - UniqueVals.insert(VL[Idx]).second; - }); + unsigned UniqueValsCnt = + count_if(SelectedValues[BestKeySubkey.first][BestKeySubkey.second], + [&UniqueVals, VL](unsigned Idx) { + return isa<UndefValue>(VL[Idx]) || 2 diff lines are omitted. See full path. Lint: Pre-merge checks: clang-format: please reformat the code
```
- unsigned UniqueValsCnt = count_if… | |||||
SelectedValues[BestKeySubkey.first][BestKeySubkey.second], | |||||
[&UniqueVals, VL](unsigned Idx) { | |||||
return isa<UndefValue>(VL[Idx]) || | |||||
UniqueVals.insert(VL[Idx]).second; | |||||
}); | |||||
// Consider it only if we can split it evenly. | |||||
if ((((UniqueVals.empty() || !isa<PHINode>(*UniqueVals.begin())) && | |||||
InstCnt >= VF && InstCnt < VL.size()) || | |||||
InstCnt == VF) && | |||||
(UniqueValsCnt != 2 || (InstCnt == VF && UniqueValsCnt == VF))) { | |||||
LLVM_DEBUG(dbgs() << "SLP: build split shuffle block. \n"); | |||||
// How many time shall we repeat same value in the Main node. | |||||
unsigned NumRepeats = VL.size() / UniqueValsCnt; | |||||
DenseMap<Value *, unsigned> UniqueValsCounter; | |||||
auto SelectedValuesVector = SelectedValues.takeVector(); | |||||
SmallVector<unsigned> ReorderIndices(VL.size(), VL.size()); | |||||
SmallVector<ValueList, 2> Operands(2); | |||||
unsigned MainCnt = 0; | |||||
unsigned SecondCnt = 0; | |||||
for (unsigned I = 0, E = SelectedValuesVector.size(); I < E; ++I) { | |||||
auto ValuesVector = SelectedValuesVector[I].second.takeVector(); | |||||
for (unsigned K = 0, N = ValuesVector.size(); K < N; ++K) { | |||||
unsigned Sz = ValuesVector[K].second.size(); | |||||
SmallVector<unsigned> Undefs; | |||||
for (unsigned Idx : ValuesVector[K].second) { | |||||
Value *V = VL[Idx]; | |||||
unsigned &RepeatCntRef = | |||||
UniqueValsCounter.try_emplace(V, 0).first->getSecond(); | |||||
if (Sz >= VF && MainCnt < VF && | |||||
(isa<UndefValue>(V) || RepeatCntRef < NumRepeats)) { | |||||
++RepeatCntRef; | |||||
if (isa<UndefValue>(V)) { | |||||
Undefs.push_back(Idx); | |||||
} else { | |||||
ReorderIndices[MainCnt] = Idx; | |||||
Operands.front().push_back(V); | |||||
++MainCnt; | |||||
} | |||||
} else { | |||||
ReorderIndices[SecondCnt + VF] = Idx; | |||||
++SecondCnt; | |||||
Operands.back().push_back(V); | |||||
} | |||||
} | |||||
// Process remaining undefs. | |||||
if (MainCnt < VF) { | |||||
for (unsigned Idx : Undefs) { | |||||
Value *V = VL[Idx]; | |||||
if (Sz >= VF && MainCnt < VF) { | |||||
Operands.front().push_back(V); | |||||
++MainCnt; | |||||
} else { | |||||
++SecondCnt; | |||||
Operands.back().push_back(V); | |||||
} | |||||
} | |||||
} | |||||
} | |||||
} | |||||
for (unsigned Idx : Vectorized) { | |||||
Value *V = VL[Idx]; | |||||
ReorderIndices[SecondCnt + VF] = Idx; | |||||
++SecondCnt; | |||||
Operands.back().push_back(V); | |||||
} | |||||
TreeEntry *TE = newTreeEntry(VL, TreeEntry::SplitShuffle, None, S, | |||||
UserTreeIdx, None, ReorderIndices); | |||||
for (unsigned I = 0, E = Operands.size(); I < E; ++I) | |||||
TE->setOperand(I, Operands[I]); | |||||
for (unsigned I = 0, E = Operands.size(); I < E; ++I) | |||||
buildTree_rec(Operands[I], Depth + 1, {TE, I}); | |||||
return; | |||||
} | |||||
} | |||||
LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n"); | LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n"); | ||||
if (TryToFindDuplicates(S)) | if (TryToFindDuplicates(S)) | ||||
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, | newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, | ||||
ReuseShuffleIndicies); | ReuseShuffleIndicies); | ||||
return; | return; | ||||
} | } | ||||
// We now know that this is a vector of instructions of the same type from | // We now know that this is a vector of instructions of the same type from | ||||
▲ Show 20 Lines • Show All 1,222 Lines • ▼ Show 20 Lines | if (VL.size() > 2 && E->getOpcode() == Instruction::Load && | ||||
GatherCost += TTI->getShuffleCost(TTI::SK_InsertSubvector, VecTy, | GatherCost += TTI->getShuffleCost(TTI::SK_InsertSubvector, VecTy, | ||||
None, I, LoadTy); | None, I, LoadTy); | ||||
} | } | ||||
return ReuseShuffleCost + GatherCost - ScalarsCost; | return ReuseShuffleCost + GatherCost - ScalarsCost; | ||||
} | } | ||||
} | } | ||||
return ReuseShuffleCost + getGatherCost(VL); | return ReuseShuffleCost + getGatherCost(VL); | ||||
} | } | ||||
if (E->State == TreeEntry::SplitShuffle) { | |||||
unsigned VF = VL.size(); | |||||
SmallVector<int> Mask(VF, UndefMaskElem); | |||||
for (unsigned I = 0; I < VF; ++I) { | |||||
Value *V = VL[I]; | |||||
unsigned OpIdx = 0; | |||||
const auto *It = find(E->getOperand(OpIdx), V); | |||||
if (It == E->getOperand(OpIdx).end()) { | |||||
OpIdx = 1; | |||||
It = find(E->getOperand(OpIdx), V); | |||||
assert(It != E->getOperand(OpIdx).end() && | |||||
"Subvectors are not synced."); | |||||
} | |||||
int Idx = std::distance(E->getOperand(OpIdx).begin(), It); | |||||
Mask[I] = Idx + VF * OpIdx; | |||||
} | |||||
return TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, Mask); | |||||
} | |||||
InstructionCost CommonCost = 0; | InstructionCost CommonCost = 0; | ||||
SmallVector<int> Mask; | SmallVector<int> Mask; | ||||
if (!E->ReorderIndices.empty()) { | if (!E->ReorderIndices.empty()) { | ||||
SmallVector<int> NewMask; | SmallVector<int> NewMask; | ||||
if (E->getOpcode() == Instruction::Store) { | if (E->getOpcode() == Instruction::Store) { | ||||
// For stores the order is actually a mask. | // For stores the order is actually a mask. | ||||
NewMask.resize(E->ReorderIndices.size()); | NewMask.resize(E->ReorderIndices.size()); | ||||
copy(E->ReorderIndices, NewMask.begin()); | copy(E->ReorderIndices, NewMask.begin()); | ||||
▲ Show 20 Lines • Show All 1,368 Lines • ▼ Show 20 Lines | if (TreeEntry *E = getTreeEntry(S.OpValue)) | ||||
if (auto *I = dyn_cast<Instruction>(V)) { | if (auto *I = dyn_cast<Instruction>(V)) { | ||||
GatherShuffleSeq.insert(I); | GatherShuffleSeq.insert(I); | ||||
CSEBlocks.insert(I->getParent()); | CSEBlocks.insert(I->getParent()); | ||||
} | } | ||||
} | } | ||||
return V; | return V; | ||||
} | } | ||||
} | } | ||||
auto *I = | |||||
find_if(VectorizableTree, [VL](const std::unique_ptr<TreeEntry> &TE) { | |||||
return TE->State == TreeEntry::SplitShuffle && TE->isSame(VL); | |||||
}); | |||||
if (I != VectorizableTree.end()) | |||||
return vectorizeTree(I->get()); | |||||
// Check that every instruction appears once in this bundle. | // Check that every instruction appears once in this bundle. | ||||
SmallVector<int> ReuseShuffleIndicies; | SmallVector<int> ReuseShuffleIndicies; | ||||
SmallVector<Value *> UniqueValues; | SmallVector<Value *> UniqueValues; | ||||
if (VL.size() > 2) { | if (VL.size() > 2) { | ||||
DenseMap<Value *, unsigned> UniquePositions; | DenseMap<Value *, unsigned> UniquePositions; | ||||
unsigned NumValues = | unsigned NumValues = | ||||
std::distance(VL.begin(), find_if(reverse(VL), [](Value *V) { | std::distance(VL.begin(), find_if(reverse(VL), [](Value *V) { | ||||
▲ Show 20 Lines • Show All 47 Lines • ▼ Show 20 Lines | Value *BoUpSLP::vectorizeTree(TreeEntry *E) { | ||||
if (E->VectorizedValue) { | if (E->VectorizedValue) { | ||||
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *E->Scalars[0] << ".\n"); | LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *E->Scalars[0] << ".\n"); | ||||
return E->VectorizedValue; | return E->VectorizedValue; | ||||
} | } | ||||
bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty(); | bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty(); | ||||
unsigned VF = E->getVectorFactor(); | unsigned VF = E->getVectorFactor(); | ||||
auto &&SetInsertPointAfterOps = [this](ArrayRef<Value *> VL) { | |||||
// The last instruction in the bundle in program order. | |||||
Instruction *LastInst = nullptr; | |||||
for (Value *V : VL) { | |||||
// If the value was vectorized, need to get the vector value for correct | |||||
// insert point. | |||||
if (const TreeEntry *TE = getTreeEntry(V)) | |||||
if (TE->VectorizedValue) | |||||
V = TE->VectorizedValue; | |||||
auto *I = dyn_cast<Instruction>(V); | |||||
if (!I) | |||||
continue; | |||||
if (!DT->isReachableFromEntry(I->getParent())) | |||||
continue; | |||||
if (!LastInst) { | |||||
LastInst = I; | |||||
continue; | |||||
} | |||||
if ((LastInst->getParent() != I->getParent() && | |||||
DT->dominates(LastInst->getParent(), I->getParent())) || | |||||
(LastInst->getParent() == I->getParent() && LastInst->comesBefore(I))) | |||||
LastInst = I; | |||||
} | |||||
// Set the insertion point after the last instruction in the bundle. Set | |||||
// the debug location to Front. | |||||
if (!LastInst) | |||||
return; | |||||
if (isa<PHINode>(LastInst)) | |||||
Builder.SetInsertPoint(LastInst->getParent(), | |||||
LastInst->getParent()->getFirstInsertionPt()); | |||||
else | |||||
Builder.SetInsertPoint(LastInst->getParent(), | |||||
std::next(LastInst->getIterator())); | |||||
Builder.SetCurrentDebugLocation(LastInst->getDebugLoc()); | |||||
}; | |||||
ShuffleInstructionBuilder ShuffleBuilder(Builder, VF, GatherShuffleSeq, | ShuffleInstructionBuilder ShuffleBuilder(Builder, VF, GatherShuffleSeq, | ||||
CSEBlocks); | CSEBlocks); | ||||
if (E->State == TreeEntry::NeedToGather) { | if (E->State == TreeEntry::NeedToGather) { | ||||
if (E->getMainOp()) | if (E->getMainOp()) | ||||
setInsertPointAfterBundle(E); | setInsertPointAfterBundle(E); | ||||
Value *Vec; | Value *Vec; | ||||
SmallVector<int> Mask; | SmallVector<int> Mask; | ||||
SmallVector<const TreeEntry *> Entries; | SmallVector<const TreeEntry *> Entries; | ||||
Show All 13 Lines | if (E->State == TreeEntry::NeedToGather) { | ||||
} | } | ||||
if (NeedToShuffleReuses) { | if (NeedToShuffleReuses) { | ||||
ShuffleBuilder.addMask(E->ReuseShuffleIndices); | ShuffleBuilder.addMask(E->ReuseShuffleIndices); | ||||
Vec = ShuffleBuilder.finalize(Vec); | Vec = ShuffleBuilder.finalize(Vec); | ||||
} | } | ||||
E->VectorizedValue = Vec; | E->VectorizedValue = Vec; | ||||
return Vec; | return Vec; | ||||
} | } | ||||
if (E->State == TreeEntry::SplitShuffle) { | |||||
SetInsertPointAfterOps(E->getOperand(0)); | |||||
Value *Op0 = vectorizeTree(E->getOperand(0)); | |||||
SetInsertPointAfterOps(E->getOperand(1)); | |||||
Value *Op1 = vectorizeTree(E->getOperand(1)); | |||||
// Fix the insert point to emit shuffles exactly after the last instruction | |||||
// in the operands. | |||||
SetInsertPointAfterOps({Op0, Op1}); | |||||
SmallVector<int> Mask; | |||||
inversePermutation(E->ReorderIndices, Mask); | |||||
Value *Vec = Builder.CreateShuffleVector(Op0, Op1, Mask); | |||||
if (auto *I = dyn_cast<Instruction>(Vec)) { | |||||
GatherShuffleSeq.insert(I); | |||||
CSEBlocks.insert(I->getParent()); | |||||
} | |||||
E->VectorizedValue = Vec; | |||||
return Vec; | |||||
} | |||||
assert((E->State == TreeEntry::Vectorize || | assert((E->State == TreeEntry::Vectorize || | ||||
E->State == TreeEntry::ScatterVectorize) && | E->State == TreeEntry::ScatterVectorize) && | ||||
"Unhandled state"); | "Unhandled state"); | ||||
unsigned ShuffleOrOp = | unsigned ShuffleOrOp = | ||||
E->isAltShuffle() ? (unsigned)Instruction::ShuffleVector : E->getOpcode(); | E->isAltShuffle() ? (unsigned)Instruction::ShuffleVector : E->getOpcode(); | ||||
Instruction *VL0 = E->getMainOp(); | Instruction *VL0 = E->getMainOp(); | ||||
Type *ScalarTy = VL0->getType(); | Type *ScalarTy = VL0->getType(); | ||||
▲ Show 20 Lines • Show All 666 Lines • ▼ Show 20 Lines | for (const auto &ExternalUse : ExternalUses) { | ||||
LLVM_DEBUG(dbgs() << "SLP: Replaced:" << *User << ".\n"); | LLVM_DEBUG(dbgs() << "SLP: Replaced:" << *User << ".\n"); | ||||
} | } | ||||
// For each vectorized value: | // For each vectorized value: | ||||
for (auto &TEPtr : VectorizableTree) { | for (auto &TEPtr : VectorizableTree) { | ||||
TreeEntry *Entry = TEPtr.get(); | TreeEntry *Entry = TEPtr.get(); | ||||
// No need to handle users of gathered values. | // No need to handle users of gathered values. | ||||
if (Entry->State == TreeEntry::NeedToGather) | if (Entry->State == TreeEntry::NeedToGather || | ||||
Entry->State == TreeEntry::SplitShuffle) | |||||
continue; | continue; | ||||
assert(Entry->VectorizedValue && "Can't find vectorizable value"); | assert(Entry->VectorizedValue && "Can't find vectorizable value"); | ||||
// For each lane: | // For each lane: | ||||
for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) { | for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) { | ||||
Value *Scalar = Entry->Scalars[Lane]; | Value *Scalar = Entry->Scalars[Lane]; | ||||
▲ Show 20 Lines • Show All 3,201 Lines • Show Last 20 Lines |
clang-format: please reformat the code