Diff 550336

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 2,614 Lines • ▼ Show 20 Lines	struct TreeEntry {
ValueList Scalars;		ValueList Scalars;

/// The Scalars are vectorized into this value. It is initialized to Null.		/// The Scalars are vectorized into this value. It is initialized to Null.
WeakTrackingVH VectorizedValue = nullptr;		WeakTrackingVH VectorizedValue = nullptr;

/// Do we need to gather this sequence or vectorize it		/// Do we need to gather this sequence or vectorize it
/// (either with vector instruction or with scatter/gather		/// (either with vector instruction or with scatter/gather
/// intrinsics for store/load)?		/// intrinsics for store/load)?
enum EntryState { Vectorize, ScatterVectorize, NeedToGather };		enum EntryState {
		Vectorize,
		ScatterVectorize,
		PossibleStridedVectorize,
		NeedToGather
		};
EntryState State;		EntryState State;

/// Does this sequence require some shuffling?		/// Does this sequence require some shuffling?
SmallVector<int, 4> ReuseShuffleIndices;		SmallVector<int, 4> ReuseShuffleIndices;

/// Does this entry require reordering?		/// Does this entry require reordering?
SmallVector<unsigned, 4> ReorderIndices;		SmallVector<unsigned, 4> ReorderIndices;

▲ Show 20 Lines • Show All 159 Lines • ▼ Show 20 Lines	LLVM_DUMP_METHOD void dump() const {
dbgs() << "State: ";		dbgs() << "State: ";
switch (State) {		switch (State) {
case Vectorize:		case Vectorize:
dbgs() << "Vectorize\n";		dbgs() << "Vectorize\n";
break;		break;
case ScatterVectorize:		case ScatterVectorize:
dbgs() << "ScatterVectorize\n";		dbgs() << "ScatterVectorize\n";
break;		break;
		case PossibleStridedVectorize:
		dbgs() << "PossibleStridedVectorize\n";
		break;
case NeedToGather:		case NeedToGather:
dbgs() << "NeedToGather\n";		dbgs() << "NeedToGather\n";
break;		break;
}		}
dbgs() << "MainOp: ";		dbgs() << "MainOp: ";
if (MainOp)		if (MainOp)
dbgs() << *MainOp << "\n";		dbgs() << *MainOp << "\n";
else		else
▲ Show 20 Lines • Show All 891 Lines • ▼ Show 20 Lines	std::string getNodeLabel(const TreeEntry Entry, const BoUpSLP R) {
}		}
return Str;		return Str;
}		}

static std::string getNodeAttributes(const TreeEntry *Entry,		static std::string getNodeAttributes(const TreeEntry *Entry,
const BoUpSLP *) {		const BoUpSLP *) {
if (Entry->State == TreeEntry::NeedToGather)		if (Entry->State == TreeEntry::NeedToGather)
return "color=red";		return "color=red";
if (Entry->State == TreeEntry::ScatterVectorize)		if (Entry->State == TreeEntry::ScatterVectorize \|\|
		Entry->State == TreeEntry::PossibleStridedVectorize)
return "color=blue";		return "color=blue";
return "";		return "";
}		}
};		};

} // end namespace llvm		} // end namespace llvm

BoUpSLP::~BoUpSLP() {		BoUpSLP::~BoUpSLP() {
▲ Show 20 Lines • Show All 122 Lines • ▼ Show 20 Lines	if (STE && (UsedPositions.count() > 1 \|\| STE->Scalars.size() == 2)) {
}		}
return std::move(CurrentOrder);		return std::move(CurrentOrder);
}		}
return std::nullopt;		return std::nullopt;
}		}

namespace {		namespace {
/// Tracks the state we can represent the loads in the given sequence.		/// Tracks the state we can represent the loads in the given sequence.
enum class LoadsState { Gather, Vectorize, ScatterVectorize };		enum class LoadsState {
		Gather,
		Vectorize,
		ScatterVectorize,
		PossibleStridedVectorize
		};
} // anonymous namespace		} // anonymous namespace

static bool arePointersCompatible(Value Ptr1, Value Ptr2,		static bool arePointersCompatible(Value Ptr1, Value Ptr2,
const TargetLibraryInfo &TLI,		const TargetLibraryInfo &TLI,
bool CompareOpcodes = true) {		bool CompareOpcodes = true) {
if (getUnderlyingObject(Ptr1) != getUnderlyingObject(Ptr2))		if (getUnderlyingObject(Ptr1) != getUnderlyingObject(Ptr2))
return false;		return false;
auto *GEP1 = dyn_cast<GetElementPtrInst>(Ptr1);		auto *GEP1 = dyn_cast<GetElementPtrInst>(Ptr1);
▲ Show 20 Lines • Show All 43 Lines • ▼ Show 20 Lines	static LoadsState canVectorizeLoads(ArrayRef<Value > VL, const Value VL0,
}		}

Order.clear();		Order.clear();
// Check the order of pointer operands or that all pointers are the same.		// Check the order of pointer operands or that all pointers are the same.
bool IsSorted = sortPtrAccesses(PointerOps, ScalarTy, DL, SE, Order);		bool IsSorted = sortPtrAccesses(PointerOps, ScalarTy, DL, SE, Order);
if (IsSorted \|\| all_of(PointerOps, [&](Value *P) {		if (IsSorted \|\| all_of(PointerOps, [&](Value *P) {
return arePointersCompatible(P, PointerOps.front(), TLI);		return arePointersCompatible(P, PointerOps.front(), TLI);
})) {		})) {
		bool IsPossibleStrided = false;
if (IsSorted) {		if (IsSorted) {
Value *Ptr0;		Value *Ptr0;
Value *PtrN;		Value *PtrN;
if (Order.empty()) {		if (Order.empty()) {
Ptr0 = PointerOps.front();		Ptr0 = PointerOps.front();
PtrN = PointerOps.back();		PtrN = PointerOps.back();
} else {		} else {
Ptr0 = PointerOps[Order.front()];		Ptr0 = PointerOps[Order.front()];
PtrN = PointerOps[Order.back()];		PtrN = PointerOps[Order.back()];
}		}
std::optional<int> Diff =		std::optional<int> Diff =
getPointersDiff(ScalarTy, Ptr0, ScalarTy, PtrN, DL, SE);		getPointersDiff(ScalarTy, Ptr0, ScalarTy, PtrN, DL, SE);
// Check that the sorted loads are consecutive.		// Check that the sorted loads are consecutive.
if (static_cast<unsigned>(*Diff) == VL.size() - 1)		if (static_cast<unsigned>(*Diff) == VL.size() - 1)
return LoadsState::Vectorize;		return LoadsState::Vectorize;
		// Simple check if not a strided access - clear order.
		IsPossibleStrided = *Diff % (VL.size() - 1) == 0;
}		}
// TODO: need to improve analysis of the pointers, if not all of them are		// TODO: need to improve analysis of the pointers, if not all of them are
// GEPs or have > 2 operands, we end up with a gather node, which just		// GEPs or have > 2 operands, we end up with a gather node, which just
// increases the cost.		// increases the cost.
Loop *L = LI.getLoopFor(cast<LoadInst>(VL0)->getParent());		Loop *L = LI.getLoopFor(cast<LoadInst>(VL0)->getParent());
bool ProfitableGatherPointers =		bool ProfitableGatherPointers =
static_cast<unsigned>(count_if(PointerOps, [L](Value *V) {		static_cast<unsigned>(count_if(PointerOps, [L](Value *V) {
return L && L->isLoopInvariant(V);		return L && L->isLoopInvariant(V);
})) <= VL.size() / 2 && VL.size() > 2;		})) <= VL.size() / 2 && VL.size() > 2;
if (ProfitableGatherPointers \|\| all_of(PointerOps, [IsSorted](Value *P) {		if (ProfitableGatherPointers \|\| all_of(PointerOps, [IsSorted](Value *P) {
auto *GEP = dyn_cast<GetElementPtrInst>(P);		auto *GEP = dyn_cast<GetElementPtrInst>(P);
return (IsSorted && !GEP && doesNotNeedToBeScheduled(P)) \|\|		return (IsSorted && !GEP && doesNotNeedToBeScheduled(P)) \|\|
(GEP && GEP->getNumOperands() == 2);		(GEP && GEP->getNumOperands() == 2);
})) {		})) {
Align CommonAlignment = cast<LoadInst>(VL0)->getAlign();		Align CommonAlignment = cast<LoadInst>(VL0)->getAlign();
for (Value *V : VL)		for (Value *V : VL)
CommonAlignment =		CommonAlignment =
std::min(CommonAlignment, cast<LoadInst>(V)->getAlign());		std::min(CommonAlignment, cast<LoadInst>(V)->getAlign());
auto *VecTy = FixedVectorType::get(ScalarTy, VL.size());		auto *VecTy = FixedVectorType::get(ScalarTy, VL.size());
if (TTI.isLegalMaskedGather(VecTy, CommonAlignment) &&		if (TTI.isLegalMaskedGather(VecTy, CommonAlignment) &&
!TTI.forceScalarizeMaskedGather(VecTy, CommonAlignment))		!TTI.forceScalarizeMaskedGather(VecTy, CommonAlignment))
return LoadsState::ScatterVectorize;		return IsPossibleStrided ? LoadsState::PossibleStridedVectorize
		: LoadsState::ScatterVectorize;
}		}
}		}

return LoadsState::Gather;		return LoadsState::Gather;
}		}

static bool clusterSortPtrAccesses(ArrayRef<Value > VL, Type ElemTy,		static bool clusterSortPtrAccesses(ArrayRef<Value > VL, Type ElemTy,
const DataLayout &DL, ScalarEvolution &SE,		const DataLayout &DL, ScalarEvolution &SE,
▲ Show 20 Lines • Show All 184 Lines • ▼ Show 20 Lines	for (unsigned K = 0; K < VF; K += Sz) {
transform(CurrentOrder, It, [K](unsigned Pos) { return Pos + K; });		transform(CurrentOrder, It, [K](unsigned Pos) { return Pos + K; });
std::advance(It, Sz);		std::advance(It, Sz);
}		}
if (all_of(enumerate(ResOrder),		if (all_of(enumerate(ResOrder),
[](const auto &Data) { return Data.index() == Data.value(); }))		[](const auto &Data) { return Data.index() == Data.value(); }))
return std::nullopt; // No need to reorder.		return std::nullopt; // No need to reorder.
return std::move(ResOrder);		return std::move(ResOrder);
}		}
if (TE.State == TreeEntry::Vectorize &&		if ((TE.State == TreeEntry::Vectorize \|\|
		TE.State == TreeEntry::PossibleStridedVectorize) &&
(isa<LoadInst, ExtractElementInst, ExtractValueInst>(TE.getMainOp()) \|\|		(isa<LoadInst, ExtractElementInst, ExtractValueInst>(TE.getMainOp()) \|\|
(TopToBottom && isa<StoreInst, InsertElementInst>(TE.getMainOp()))) &&		(TopToBottom && isa<StoreInst, InsertElementInst>(TE.getMainOp()))) &&
!TE.isAltShuffle())		!TE.isAltShuffle())
return TE.ReorderIndices;		return TE.ReorderIndices;
if (TE.State == TreeEntry::Vectorize && TE.getOpcode() == Instruction::PHI) {		if (TE.State == TreeEntry::Vectorize && TE.getOpcode() == Instruction::PHI) {
auto PHICompare = [](llvm::Value V1, llvm::Value V2) {		auto PHICompare = [](llvm::Value V1, llvm::Value V2) {
if (V1 == V2)		if (V1 == V2)
return false;		return false;
▲ Show 20 Lines • Show All 234 Lines • ▼ Show 20 Lines	if (std::optional<OrdersType> CurrentOrder =
return EI.UserTE->State == TreeEntry::Vectorize &&		return EI.UserTE->State == TreeEntry::Vectorize &&
EI.UserTE->isAltShuffle() && EI.UserTE->Idx != 0;		EI.UserTE->isAltShuffle() && EI.UserTE->Idx != 0;
}))		}))
return;		return;
UserTE = UserTE->UserTreeIndices.back().UserTE;		UserTE = UserTE->UserTreeIndices.back().UserTE;
++Cnt;		++Cnt;
}		}
VFToOrderedEntries[TE->getVectorFactor()].insert(TE.get());		VFToOrderedEntries[TE->getVectorFactor()].insert(TE.get());
if (TE->State != TreeEntry::Vectorize \|\| !TE->ReuseShuffleIndices.empty())		if (!(TE->State == TreeEntry::Vectorize \|\|
		TE->State == TreeEntry::PossibleStridedVectorize) \|\|
		!TE->ReuseShuffleIndices.empty())
GathersToOrders.try_emplace(TE.get(), *CurrentOrder);		GathersToOrders.try_emplace(TE.get(), *CurrentOrder);
if (TE->State == TreeEntry::Vectorize &&		if (TE->State == TreeEntry::Vectorize &&
TE->getOpcode() == Instruction::PHI)		TE->getOpcode() == Instruction::PHI)
PhisToOrders.try_emplace(TE.get(), *CurrentOrder);		PhisToOrders.try_emplace(TE.get(), *CurrentOrder);
}		}
});		});

// Reorder the graph nodes according to their vectorization factor.		// Reorder the graph nodes according to their vectorization factor.
for (unsigned VF = VectorizableTree.front()->getVectorFactor(); VF > 1;		for (unsigned VF = VectorizableTree.front()->getVectorFactor(); VF > 1;
VF /= 2) {		VF /= 2) {
auto It = VFToOrderedEntries.find(VF);		auto It = VFToOrderedEntries.find(VF);
if (It == VFToOrderedEntries.end())		if (It == VFToOrderedEntries.end())
continue;		continue;
// Try to find the most profitable order. We just are looking for the most		// Try to find the most profitable order. We just are looking for the most
// used order and reorder scalar elements in the nodes according to this		// used order and reorder scalar elements in the nodes according to this
// mostly used order.		// mostly used order.
ArrayRef<TreeEntry *> OrderedEntries = It->second.getArrayRef();		ArrayRef<TreeEntry *> OrderedEntries = It->second.getArrayRef();
// All operands are reordered and used only in this node - propagate the		// All operands are reordered and used only in this node - propagate the
// most used order to the user node.		// most used order to the user node.
MapVector<OrdersType, unsigned,		MapVector<OrdersType, unsigned,
DenseMap<OrdersType, unsigned, OrdersTypeDenseMapInfo>>		DenseMap<OrdersType, unsigned, OrdersTypeDenseMapInfo>>
OrdersUses;		OrdersUses;
		// Last chance orders - scatter vectorize. Try to use their orders if no
		// other orders or the order is counted already.
		SmallVector<OrdersType> StridedVectorizeOrders;
SmallPtrSet<const TreeEntry *, 4> VisitedOps;		SmallPtrSet<const TreeEntry *, 4> VisitedOps;
for (const TreeEntry *OpTE : OrderedEntries) {		for (const TreeEntry *OpTE : OrderedEntries) {
// No need to reorder this nodes, still need to extend and to use shuffle,		// No need to reorder this nodes, still need to extend and to use shuffle,
// just need to merge reordering shuffle and the reuse shuffle.		// just need to merge reordering shuffle and the reuse shuffle.
if (!OpTE->ReuseShuffleIndices.empty() && !GathersToOrders.count(OpTE))		if (!OpTE->ReuseShuffleIndices.empty() && !GathersToOrders.count(OpTE))
continue;		continue;
// Count number of orders uses.		// Count number of orders uses.
const auto &Order = [OpTE, &GathersToOrders, &AltShufflesToOrders,		const auto &Order = [OpTE, &GathersToOrders, &AltShufflesToOrders,
Show All 30 Lines	for (const TreeEntry *OpTE : OrderedEntries) {
} else {		} else {
for (const OrdersType &ExtOrder : ExternalUserReorderIndices)		for (const OrdersType &ExtOrder : ExternalUserReorderIndices)
++OrdersUses.insert(std::make_pair(ExtOrder, 0)).first->second;		++OrdersUses.insert(std::make_pair(ExtOrder, 0)).first->second;
}		}
// No other useful reorder data in this entry.		// No other useful reorder data in this entry.
if (Order.empty())		if (Order.empty())
continue;		continue;
}		}
		// Postpone scatter orders.
		if (OpTE->State == TreeEntry::PossibleStridedVectorize) {
		StridedVectorizeOrders.push_back(Order);
		continue;
		}
// Stores actually store the mask, not the order, need to invert.		// Stores actually store the mask, not the order, need to invert.
if (OpTE->State == TreeEntry::Vectorize && !OpTE->isAltShuffle() &&		if (OpTE->State == TreeEntry::Vectorize && !OpTE->isAltShuffle() &&
OpTE->getOpcode() == Instruction::Store && !Order.empty()) {		OpTE->getOpcode() == Instruction::Store && !Order.empty()) {
SmallVector<int> Mask;		SmallVector<int> Mask;
inversePermutation(Order, Mask);		inversePermutation(Order, Mask);
unsigned E = Order.size();		unsigned E = Order.size();
OrdersType CurrentOrder(E, E);		OrdersType CurrentOrder(E, E);
transform(Mask, CurrentOrder.begin(), [E](int Idx) {		transform(Mask, CurrentOrder.begin(), [E](int Idx) {
return Idx == PoisonMaskElem ? E : static_cast<unsigned>(Idx);		return Idx == PoisonMaskElem ? E : static_cast<unsigned>(Idx);
});		});
fixupOrderingIndices(CurrentOrder);		fixupOrderingIndices(CurrentOrder);
++OrdersUses.insert(std::make_pair(CurrentOrder, 0)).first->second;		++OrdersUses.insert(std::make_pair(CurrentOrder, 0)).first->second;
} else {		} else {
++OrdersUses.insert(std::make_pair(Order, 0)).first->second;		++OrdersUses.insert(std::make_pair(Order, 0)).first->second;
}		}
}		}
// Set order of the user node.		// Set order of the user node.
if (OrdersUses.empty())		if (OrdersUses.empty()) {
		if (StridedVectorizeOrders.empty())
continue;		continue;
		// Add (potentially!) strided vectorize orders.
		for (OrdersType &Order : StridedVectorizeOrders)
		++OrdersUses.insert(std::make_pair(Order, 0)).first->second;
		} else {
		// Account (potentially!) strided vectorize orders only if it was used
		// already.
		for (OrdersType &Order : StridedVectorizeOrders) {
		auto *It = OrdersUses.find(Order);
		if (It != OrdersUses.end())
		++It->second;
		}
		}
// Choose the most used order.		// Choose the most used order.
ArrayRef<unsigned> BestOrder = OrdersUses.front().first;		ArrayRef<unsigned> BestOrder = OrdersUses.front().first;
unsigned Cnt = OrdersUses.front().second;		unsigned Cnt = OrdersUses.front().second;
for (const auto &Pair : drop_begin(OrdersUses)) {		for (const auto &Pair : drop_begin(OrdersUses)) {
if (Cnt < Pair.second \|\| (Cnt == Pair.second && Pair.first.empty())) {		if (Cnt < Pair.second \|\| (Cnt == Pair.second && Pair.first.empty())) {
BestOrder = Pair.first;		BestOrder = Pair.first;
Cnt = Pair.second;		Cnt = Pair.second;
}		}
Show All 24 Lines	for (std::unique_ptr<TreeEntry> &TE : VectorizableTree) {
}) &&		}) &&
"All users must be of VF size.");		"All users must be of VF size.");
// Update ordering of the operands with the smaller VF than the given		// Update ordering of the operands with the smaller VF than the given
// one.		// one.
reorderNodeWithReuses(*TE, Mask);		reorderNodeWithReuses(*TE, Mask);
}		}
continue;		continue;
}		}
if (TE->State == TreeEntry::Vectorize &&		if ((TE->State == TreeEntry::Vectorize \|\|
		TE->State == TreeEntry::PossibleStridedVectorize) &&
isa<ExtractElementInst, ExtractValueInst, LoadInst, StoreInst,		isa<ExtractElementInst, ExtractValueInst, LoadInst, StoreInst,
InsertElementInst>(TE->getMainOp()) &&		InsertElementInst>(TE->getMainOp()) &&
!TE->isAltShuffle()) {		!TE->isAltShuffle()) {
// Build correct orders for extract{element,value}, loads and		// Build correct orders for extract{element,value}, loads and
// stores.		// stores.
reorderOrder(TE->ReorderIndices, Mask);		reorderOrder(TE->ReorderIndices, Mask);
if (isa<InsertElementInst, StoreInst>(TE->getMainOp()))		if (isa<InsertElementInst, StoreInst>(TE->getMainOp()))
TE->reorderOperands(Mask);		TE->reorderOperands(Mask);
Show All 37 Lines	if (TreeEntry *TE = getVectorizedOperand(UserTE, I)) {
// order.		// order.
Edges.emplace_back(I, TE);		Edges.emplace_back(I, TE);
// Add ScatterVectorize nodes to the list of operands, where just		// Add ScatterVectorize nodes to the list of operands, where just
// reordering of the scalars is required. Similar to the gathers, so		// reordering of the scalars is required. Similar to the gathers, so
// simply add to the list of gathered ops.		// simply add to the list of gathered ops.
// If there are reused scalars, process this node as a regular vectorize		// If there are reused scalars, process this node as a regular vectorize
// node, just reorder reuses mask.		// node, just reorder reuses mask.
if (TE->State != TreeEntry::Vectorize &&		if (TE->State != TreeEntry::Vectorize &&
		TE->State != TreeEntry::PossibleStridedVectorize &&
TE->ReuseShuffleIndices.empty() && TE->ReorderIndices.empty())		TE->ReuseShuffleIndices.empty() && TE->ReorderIndices.empty())
GatherOps.push_back(TE);		GatherOps.push_back(TE);
continue;		continue;
}		}
TreeEntry *Gather = nullptr;		TreeEntry *Gather = nullptr;
if (count_if(ReorderableGathers,		if (count_if(ReorderableGathers,
[&Gather, UserTE, I](TreeEntry *TE) {		[&Gather, UserTE, I](TreeEntry *TE) {
assert(TE->State != TreeEntry::Vectorize &&		assert(TE->State != TreeEntry::Vectorize &&
Show All 22 Lines	void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
DenseMap<const TreeEntry *, OrdersType> GathersToOrders;		DenseMap<const TreeEntry *, OrdersType> GathersToOrders;
// Find all reorderable leaf nodes with the given VF.		// Find all reorderable leaf nodes with the given VF.
// Currently the are vectorized loads,extracts without alternate operands +		// Currently the are vectorized loads,extracts without alternate operands +
// some gathering of extracts.		// some gathering of extracts.
SmallVector<TreeEntry *> NonVectorized;		SmallVector<TreeEntry *> NonVectorized;
for_each(VectorizableTree, [this, &OrderedEntries, &GathersToOrders,		for_each(VectorizableTree, [this, &OrderedEntries, &GathersToOrders,
&NonVectorized](		&NonVectorized](
const std::unique_ptr<TreeEntry> &TE) {		const std::unique_ptr<TreeEntry> &TE) {
if (TE->State != TreeEntry::Vectorize)		if (TE->State != TreeEntry::Vectorize &&
		TE->State != TreeEntry::PossibleStridedVectorize)
NonVectorized.push_back(TE.get());		NonVectorized.push_back(TE.get());
if (std::optional<OrdersType> CurrentOrder =		if (std::optional<OrdersType> CurrentOrder =
getReorderingData(TE, /TopToBottom=*/false)) {		getReorderingData(TE, /TopToBottom=*/false)) {
OrderedEntries.insert(TE.get());		OrderedEntries.insert(TE.get());
if (TE->State != TreeEntry::Vectorize \|\| !TE->ReuseShuffleIndices.empty())		if (!(TE->State == TreeEntry::Vectorize \|\|
		TE->State == TreeEntry::PossibleStridedVectorize) \|\|
		!TE->ReuseShuffleIndices.empty())
GathersToOrders.try_emplace(TE.get(), *CurrentOrder);		GathersToOrders.try_emplace(TE.get(), *CurrentOrder);
}		}
});		});

// 1. Propagate order to the graph nodes, which use only reordered nodes.		// 1. Propagate order to the graph nodes, which use only reordered nodes.
// I.e., if the node has operands, that are reordered, try to make at least		// I.e., if the node has operands, that are reordered, try to make at least
// one operand order in the natural order and reorder others + reorder the		// one operand order in the natural order and reorder others + reorder the
// user node itself.		// user node itself.
SmallPtrSet<const TreeEntry *, 4> Visited;		SmallPtrSet<const TreeEntry *, 4> Visited;
while (!OrderedEntries.empty()) {		while (!OrderedEntries.empty()) {
// 1. Filter out only reordered nodes.		// 1. Filter out only reordered nodes.
// 2. If the entry has multiple uses - skip it and jump to the next node.		// 2. If the entry has multiple uses - skip it and jump to the next node.
DenseMap<TreeEntry , SmallVector<std::pair<unsigned, TreeEntry >>> Users;		DenseMap<TreeEntry , SmallVector<std::pair<unsigned, TreeEntry >>> Users;
SmallVector<TreeEntry *> Filtered;		SmallVector<TreeEntry *> Filtered;
for (TreeEntry *TE : OrderedEntries) {		for (TreeEntry *TE : OrderedEntries) {
if (!(TE->State == TreeEntry::Vectorize \|\|		if (!(TE->State == TreeEntry::Vectorize \|\|
		TE->State == TreeEntry::PossibleStridedVectorize \|\|
(TE->State == TreeEntry::NeedToGather &&		(TE->State == TreeEntry::NeedToGather &&
GathersToOrders.count(TE))) \|\|		GathersToOrders.count(TE))) \|\|
TE->UserTreeIndices.empty() \|\| !TE->ReuseShuffleIndices.empty() \|\|		TE->UserTreeIndices.empty() \|\| !TE->ReuseShuffleIndices.empty() \|\|
!all_of(drop_begin(TE->UserTreeIndices),		!all_of(drop_begin(TE->UserTreeIndices),
[TE](const EdgeInfo &EI) {		[TE](const EdgeInfo &EI) {
return EI.UserTE == TE->UserTreeIndices.front().UserTE;		return EI.UserTE == TE->UserTreeIndices.front().UserTE;
}) \|\|		}) \|\|
!Visited.insert(TE).second) {		!Visited.insert(TE).second) {
Show All 30 Lines	for (auto &Data : UsersVec) {
});		});
continue;		continue;
}		}
// All operands are reordered and used only in this node - propagate the		// All operands are reordered and used only in this node - propagate the
// most used order to the user node.		// most used order to the user node.
MapVector<OrdersType, unsigned,		MapVector<OrdersType, unsigned,
DenseMap<OrdersType, unsigned, OrdersTypeDenseMapInfo>>		DenseMap<OrdersType, unsigned, OrdersTypeDenseMapInfo>>
OrdersUses;		OrdersUses;
		// Last chance orders - scatter vectorize. Try to use their orders if no
		// other orders or the order is counted already.
		SmallVector<std::pair<OrdersType, unsigned>> StridedVectorizeOrders;
// Do the analysis for each tree entry only once, otherwise the order of		// Do the analysis for each tree entry only once, otherwise the order of
// the same node my be considered several times, though might be not		// the same node my be considered several times, though might be not
// profitable.		// profitable.
SmallPtrSet<const TreeEntry *, 4> VisitedOps;		SmallPtrSet<const TreeEntry *, 4> VisitedOps;
SmallPtrSet<const TreeEntry *, 4> VisitedUsers;		SmallPtrSet<const TreeEntry *, 4> VisitedUsers;
for (const auto &Op : Data.second) {		for (const auto &Op : Data.second) {
TreeEntry *OpTE = Op.second;		TreeEntry *OpTE = Op.second;
if (!VisitedOps.insert(OpTE).second)		if (!VisitedOps.insert(OpTE).second)
continue;		continue;
if (!OpTE->ReuseShuffleIndices.empty() && !GathersToOrders.count(OpTE))		if (!OpTE->ReuseShuffleIndices.empty() && !GathersToOrders.count(OpTE))
continue;		continue;
const auto &Order = [OpTE, &GathersToOrders]() -> const OrdersType & {		const auto &Order = [OpTE, &GathersToOrders]() -> const OrdersType & {
if (OpTE->State == TreeEntry::NeedToGather \|\|		if (OpTE->State == TreeEntry::NeedToGather \|\|
!OpTE->ReuseShuffleIndices.empty())		!OpTE->ReuseShuffleIndices.empty())
return GathersToOrders.find(OpTE)->second;		return GathersToOrders.find(OpTE)->second;
return OpTE->ReorderIndices;		return OpTE->ReorderIndices;
}();		}();
unsigned NumOps = count_if(		unsigned NumOps = count_if(
Data.second, [OpTE](const std::pair<unsigned, TreeEntry *> &P) {		Data.second, [OpTE](const std::pair<unsigned, TreeEntry *> &P) {
return P.second == OpTE;		return P.second == OpTE;
});		});
		// Postpone scatter orders.
		if (OpTE->State == TreeEntry::PossibleStridedVectorize) {
		StridedVectorizeOrders.emplace_back(Order, NumOps);
		continue;
		}
// Stores actually store the mask, not the order, need to invert.		// Stores actually store the mask, not the order, need to invert.
if (OpTE->State == TreeEntry::Vectorize && !OpTE->isAltShuffle() &&		if (OpTE->State == TreeEntry::Vectorize && !OpTE->isAltShuffle() &&
OpTE->getOpcode() == Instruction::Store && !Order.empty()) {		OpTE->getOpcode() == Instruction::Store && !Order.empty()) {
SmallVector<int> Mask;		SmallVector<int> Mask;
inversePermutation(Order, Mask);		inversePermutation(Order, Mask);
unsigned E = Order.size();		unsigned E = Order.size();
OrdersType CurrentOrder(E, E);		OrdersType CurrentOrder(E, E);
transform(Mask, CurrentOrder.begin(), [E](int Idx) {		transform(Mask, CurrentOrder.begin(), [E](int Idx) {
▲ Show 20 Lines • Show All 44 Lines • ▼ Show 20 Lines	for (auto &Data : UsersVec) {
return EI.UserTE == UserTE;		return EI.UserTE == UserTE;
});		});
})) <= Ops.size() / 2)		})) <= Ops.size() / 2)
++Res.first->second;		++Res.first->second;
}		}
}		}
// If no orders - skip current nodes and jump to the next one, if any.		// If no orders - skip current nodes and jump to the next one, if any.
if (OrdersUses.empty()) {		if (OrdersUses.empty()) {
for_each(Data.second,		if (StridedVectorizeOrders.empty() \|\|
		(Data.first->ReorderIndices.empty() &&
		Data.first->ReuseShuffleIndices.empty() &&
		!(IgnoreReorder &&
		Data.first == VectorizableTree.front().get()))) {
		for_each(
		Data.second,
[&OrderedEntries](const std::pair<unsigned, TreeEntry *> &Op) {		[&OrderedEntries](const std::pair<unsigned, TreeEntry *> &Op) {
OrderedEntries.remove(Op.second);		OrderedEntries.remove(Op.second);
});		});
continue;		continue;
}		}
		// Add (potentially!) strided vectorize orders.
		for (std::pair<OrdersType, unsigned> &Pair : StridedVectorizeOrders)
		OrdersUses.insert(std::make_pair(Pair.first, 0)).first->second +=
		Pair.second;
		} else {
		// Account (potentially!) strided vectorize orders only if it was used
		// already.
		for (std::pair<OrdersType, unsigned> &Pair : StridedVectorizeOrders) {
		auto *It = OrdersUses.find(Pair.first);
		if (It != OrdersUses.end())
		It->second += Pair.second;
		}
		}
// Choose the best order.		// Choose the best order.
ArrayRef<unsigned> BestOrder = OrdersUses.front().first;		ArrayRef<unsigned> BestOrder = OrdersUses.front().first;
unsigned Cnt = OrdersUses.front().second;		unsigned Cnt = OrdersUses.front().second;
for (const auto &Pair : drop_begin(OrdersUses)) {		for (const auto &Pair : drop_begin(OrdersUses)) {
if (Cnt < Pair.second \|\| (Cnt == Pair.second && Pair.first.empty())) {		if (Cnt < Pair.second \|\| (Cnt == Pair.second && Pair.first.empty())) {
BestOrder = Pair.first;		BestOrder = Pair.first;
Cnt = Pair.second;		Cnt = Pair.second;
}		}
Show All 21 Lines	for (auto &Data : UsersVec) {
if (!VisitedOps.insert(TE).second)		if (!VisitedOps.insert(TE).second)
continue;		continue;
if (TE->ReuseShuffleIndices.size() == BestOrder.size()) {		if (TE->ReuseShuffleIndices.size() == BestOrder.size()) {
reorderNodeWithReuses(*TE, Mask);		reorderNodeWithReuses(*TE, Mask);
continue;		continue;
}		}
// Gathers are processed separately.		// Gathers are processed separately.
if (TE->State != TreeEntry::Vectorize &&		if (TE->State != TreeEntry::Vectorize &&
		TE->State != TreeEntry::PossibleStridedVectorize &&
(TE->State != TreeEntry::ScatterVectorize \|\|		(TE->State != TreeEntry::ScatterVectorize \|\|
TE->ReorderIndices.empty()))		TE->ReorderIndices.empty()))
continue;		continue;
assert((BestOrder.size() == TE->ReorderIndices.size() \|\|		assert((BestOrder.size() == TE->ReorderIndices.size() \|\|
TE->ReorderIndices.empty()) &&		TE->ReorderIndices.empty()) &&
"Non-matching sizes of user/operand entries.");		"Non-matching sizes of user/operand entries.");
reorderOrder(TE->ReorderIndices, Mask);		reorderOrder(TE->ReorderIndices, Mask);
if (IgnoreReorder && TE == VectorizableTree.front().get())		if (IgnoreReorder && TE == VectorizableTree.front().get())
Show All 14 Lines	for (auto &Data : UsersVec) {
// Reorder operands of the user node and set the ordering for the user		// Reorder operands of the user node and set the ordering for the user
// node itself.		// node itself.
if (Data.first->State != TreeEntry::Vectorize \|\|		if (Data.first->State != TreeEntry::Vectorize \|\|
!isa<ExtractElementInst, ExtractValueInst, LoadInst>(		!isa<ExtractElementInst, ExtractValueInst, LoadInst>(
Data.first->getMainOp()) \|\|		Data.first->getMainOp()) \|\|
Data.first->isAltShuffle())		Data.first->isAltShuffle())
Data.first->reorderOperands(Mask);		Data.first->reorderOperands(Mask);
if (!isa<InsertElementInst, StoreInst>(Data.first->getMainOp()) \|\|		if (!isa<InsertElementInst, StoreInst>(Data.first->getMainOp()) \|\|
Data.first->isAltShuffle()) {		Data.first->isAltShuffle() \|\|
		Data.first->State == TreeEntry::PossibleStridedVectorize) {
reorderScalars(Data.first->Scalars, Mask);		reorderScalars(Data.first->Scalars, Mask);
reorderOrder(Data.first->ReorderIndices, MaskOrder);		reorderOrder(Data.first->ReorderIndices, MaskOrder);
if (Data.first->ReuseShuffleIndices.empty() &&		if (Data.first->ReuseShuffleIndices.empty() &&
!Data.first->ReorderIndices.empty() &&		!Data.first->ReorderIndices.empty() &&
!Data.first->isAltShuffle()) {		!Data.first->isAltShuffle()) {
// Insert user node to the list to try to sink reordering deeper in		// Insert user node to the list to try to sink reordering deeper in
// the graph.		// the graph.
OrderedEntries.insert(Data.first);		OrderedEntries.insert(Data.first);
▲ Show 20 Lines • Show All 44 Lines • ▼ Show 20 Lines	for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) {
// Skip in-tree scalars that become vectors		// Skip in-tree scalars that become vectors
if (TreeEntry *UseEntry = getTreeEntry(U)) {		if (TreeEntry *UseEntry = getTreeEntry(U)) {
Value *UseScalar = UseEntry->Scalars[0];		Value *UseScalar = UseEntry->Scalars[0];
// Some in-tree scalars will remain as scalar in vectorized		// Some in-tree scalars will remain as scalar in vectorized
// instructions. If that is the case, the one in Lane 0 will		// instructions. If that is the case, the one in Lane 0 will
// be used.		// be used.
if (UseScalar != U \|\|		if (UseScalar != U \|\|
UseEntry->State == TreeEntry::ScatterVectorize \|\|		UseEntry->State == TreeEntry::ScatterVectorize \|\|
		UseEntry->State == TreeEntry::PossibleStridedVectorize \|\|
!InTreeUserNeedToExtract(Scalar, UserInst, TLI)) {		!InTreeUserNeedToExtract(Scalar, UserInst, TLI)) {
LLVM_DEBUG(dbgs() << "SLP: \tInternal user will be removed:" << *U		LLVM_DEBUG(dbgs() << "SLP: \tInternal user will be removed:" << *U
<< ".\n");		<< ".\n");
assert(UseEntry->State != TreeEntry::NeedToGather && "Bad state");		assert(UseEntry->State != TreeEntry::NeedToGather && "Bad state");
continue;		continue;
}		}
}		}

▲ Show 20 Lines • Show All 342 Lines • ▼ Show 20 Lines	case Instruction::Load: {
// from such a struct, we read/write packed bits disagreeing with the		// from such a struct, we read/write packed bits disagreeing with the
// unvectorized version.		// unvectorized version.
switch (canVectorizeLoads(VL, VL0, TTI, DL, SE, LI, *TLI, CurrentOrder,		switch (canVectorizeLoads(VL, VL0, TTI, DL, SE, LI, *TLI, CurrentOrder,
PointerOps)) {		PointerOps)) {
case LoadsState::Vectorize:		case LoadsState::Vectorize:
return TreeEntry::Vectorize;		return TreeEntry::Vectorize;
case LoadsState::ScatterVectorize:		case LoadsState::ScatterVectorize:
return TreeEntry::ScatterVectorize;		return TreeEntry::ScatterVectorize;
		case LoadsState::PossibleStridedVectorize:
		return TreeEntry::PossibleStridedVectorize;
case LoadsState::Gather:		case LoadsState::Gather:
#ifndef NDEBUG		#ifndef NDEBUG
Type *ScalarTy = VL0->getType();		Type *ScalarTy = VL0->getType();
if (DL->getTypeSizeInBits(ScalarTy) !=		if (DL->getTypeSizeInBits(ScalarTy) !=
DL->getTypeAllocSizeInBits(ScalarTy))		DL->getTypeAllocSizeInBits(ScalarTy))
LLVM_DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n");		LLVM_DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n");
else if (any_of(VL,		else if (any_of(VL,
[](Value *V) { return !cast<LoadInst>(V)->isSimple(); }))		[](Value *V) { return !cast<LoadInst>(V)->isSimple(); }))
▲ Show 20 Lines • Show All 373 Lines • ▼ Show 20 Lines	if (IsCommutative) {
return false;		return false;
}		}
return true;		return true;
};		};
SmallVector<unsigned> SortedIndices;		SmallVector<unsigned> SortedIndices;
BasicBlock *BB = nullptr;		BasicBlock *BB = nullptr;
bool IsScatterVectorizeUserTE =		bool IsScatterVectorizeUserTE =
UserTreeIdx.UserTE &&		UserTreeIdx.UserTE &&
UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize;		(UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize \|\|
		UserTreeIdx.UserTE->State == TreeEntry::PossibleStridedVectorize);
bool AreAllSameInsts =		bool AreAllSameInsts =
(S.getOpcode() && allSameBlock(VL)) \|\|		(S.getOpcode() && allSameBlock(VL)) \|\|
(S.OpValue->getType()->isPointerTy() && IsScatterVectorizeUserTE &&		(S.OpValue->getType()->isPointerTy() && IsScatterVectorizeUserTE &&
VL.size() > 2 &&		VL.size() > 2 &&
all_of(VL,		all_of(VL,
[&BB](Value *V) {		[&BB](Value *V) {
auto *I = dyn_cast<GetElementPtrInst>(V);		auto *I = dyn_cast<GetElementPtrInst>(V);
if (!I)		if (!I)
▲ Show 20 Lines • Show All 75 Lines • ▼ Show 20 Lines	for (Value *V : VL) {
return;		return;
}		}
}		}
}		}

// Special processing for sorted pointers for ScatterVectorize node with		// Special processing for sorted pointers for ScatterVectorize node with
// constant indeces only.		// constant indeces only.
if (AreAllSameInsts && UserTreeIdx.UserTE &&		if (AreAllSameInsts && UserTreeIdx.UserTE &&
UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize &&		(UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize \|\|
		UserTreeIdx.UserTE->State == TreeEntry::PossibleStridedVectorize) &&
!(S.getOpcode() && allSameBlock(VL))) {		!(S.getOpcode() && allSameBlock(VL))) {
assert(S.OpValue->getType()->isPointerTy() &&		assert(S.OpValue->getType()->isPointerTy() &&
count_if(VL, [](Value *V) { return isa<GetElementPtrInst>(V); }) >=		count_if(VL, [](Value *V) { return isa<GetElementPtrInst>(V); }) >=
2 &&		2 &&
"Expected pointers only.");		"Expected pointers only.");
// Reset S to make it GetElementPtr kind of node.		// Reset S to make it GetElementPtr kind of node.
const auto It = find_if(VL, [](Value V) { return isa<GetElementPtrInst>(V); });		const auto It = find_if(VL, [](Value V) { return isa<GetElementPtrInst>(V); });
assert(It != VL.end() && "Expected at least one GEP.");		assert(It != VL.end() && "Expected at least one GEP.");
▲ Show 20 Lines • Show All 163 Lines • ▼ Show 20 Lines	switch (ShuffleOrOp) {
case Instruction::Load: {		case Instruction::Load: {
// Check that a vectorized load would load the same memory as a scalar		// Check that a vectorized load would load the same memory as a scalar
// load. For example, we don't want to vectorize loads that are smaller		// load. For example, we don't want to vectorize loads that are smaller
// than 8-bit. Even though we have a packed struct {<i2, i2, i2, i2>} LLVM		// than 8-bit. Even though we have a packed struct {<i2, i2, i2, i2>} LLVM
// treats loading/storing it as an i8 struct. If we vectorize loads/stores		// treats loading/storing it as an i8 struct. If we vectorize loads/stores
// from such a struct, we read/write packed bits disagreeing with the		// from such a struct, we read/write packed bits disagreeing with the
// unvectorized version.		// unvectorized version.
TreeEntry *TE = nullptr;		TreeEntry *TE = nullptr;
		fixupOrderingIndices(CurrentOrder);
switch (State) {		switch (State) {
case TreeEntry::Vectorize:		case TreeEntry::Vectorize:
if (CurrentOrder.empty()) {		if (CurrentOrder.empty()) {
// Original loads are consecutive and does not require reordering.		// Original loads are consecutive and does not require reordering.
TE = newTreeEntry(VL, Bundle /vectorized/, S, UserTreeIdx,		TE = newTreeEntry(VL, Bundle /vectorized/, S, UserTreeIdx,
ReuseShuffleIndicies);		ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n");		LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n");
} else {		} else {
fixupOrderingIndices(CurrentOrder);
// Need to reorder.		// Need to reorder.
TE = newTreeEntry(VL, Bundle /vectorized/, S, UserTreeIdx,		TE = newTreeEntry(VL, Bundle /vectorized/, S, UserTreeIdx,
ReuseShuffleIndicies, CurrentOrder);		ReuseShuffleIndicies, CurrentOrder);
LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n");		LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n");
}		}
TE->setOperandsInOrder();		TE->setOperandsInOrder();
break;		break;
		case TreeEntry::PossibleStridedVectorize:
		// Vectorizing non-consecutive loads with `llvm.masked.gather`.
		if (CurrentOrder.empty()) {
		TE = newTreeEntry(VL, TreeEntry::PossibleStridedVectorize, Bundle, S,
		UserTreeIdx, ReuseShuffleIndicies);
		} else {
		TE = newTreeEntry(VL, TreeEntry::PossibleStridedVectorize, Bundle, S,
		UserTreeIdx, ReuseShuffleIndicies, CurrentOrder);
		}
		TE->setOperandsInOrder();
		buildTree_rec(PointerOps, Depth + 1, {TE, 0});
		LLVM_DEBUG(dbgs() << "SLP: added a vector of non-consecutive loads.\n");
		break;
case TreeEntry::ScatterVectorize:		case TreeEntry::ScatterVectorize:
// Vectorizing non-consecutive loads with `llvm.masked.gather`.		// Vectorizing non-consecutive loads with `llvm.masked.gather`.
TE = newTreeEntry(VL, TreeEntry::ScatterVectorize, Bundle, S,		TE = newTreeEntry(VL, TreeEntry::ScatterVectorize, Bundle, S,
UserTreeIdx, ReuseShuffleIndicies);		UserTreeIdx, ReuseShuffleIndicies);
TE->setOperandsInOrder();		TE->setOperandsInOrder();
buildTree_rec(PointerOps, Depth + 1, {TE, 0});		buildTree_rec(PointerOps, Depth + 1, {TE, 0});
LLVM_DEBUG(dbgs() << "SLP: added a vector of non-consecutive loads.\n");		LLVM_DEBUG(dbgs() << "SLP: added a vector of non-consecutive loads.\n");
break;		break;
▲ Show 20 Lines • Show All 903 Lines • ▼ Show 20 Lines	if (VL.size() > 2 && S.getOpcode() == Instruction::Load &&
SmallVector<Value *> PointerOps;		SmallVector<Value *> PointerOps;
OrdersType CurrentOrder;		OrdersType CurrentOrder;
LoadsState LS =		LoadsState LS =
canVectorizeLoads(Slice, Slice.front(), TTI, R.DL, R.SE,		canVectorizeLoads(Slice, Slice.front(), TTI, R.DL, R.SE,
R.LI, R.TLI, CurrentOrder, PointerOps);		R.LI, R.TLI, CurrentOrder, PointerOps);
switch (LS) {		switch (LS) {
case LoadsState::Vectorize:		case LoadsState::Vectorize:
case LoadsState::ScatterVectorize:		case LoadsState::ScatterVectorize:
		case LoadsState::PossibleStridedVectorize:
// Mark the vectorized loads so that we don't vectorize them		// Mark the vectorized loads so that we don't vectorize them
// again.		// again.
if (LS == LoadsState::Vectorize)		if (LS == LoadsState::Vectorize)
++VectorizedCnt;		++VectorizedCnt;
else		else
++ScatterVectorizeCnt;		++ScatterVectorizeCnt;
VectorizedLoads.insert(Slice.begin(), Slice.end());		VectorizedLoads.insert(Slice.begin(), Slice.end());
// If we vectorized initial block, no need to try to vectorize		// If we vectorized initial block, no need to try to vectorize
▲ Show 20 Lines • Show All 564 Lines • ▼ Show 20 Lines	if (!all_of(GatheredScalars, PoisonValue::classof)) {
Estimator.add(BV, ReuseMask);		Estimator.add(BV, ReuseMask);
}		}
if (ExtractShuffle)		if (ExtractShuffle)
Estimator.add(E, std::nullopt);		Estimator.add(E, std::nullopt);
return Estimator.finalize(E->ReuseShuffleIndices);		return Estimator.finalize(E->ReuseShuffleIndices);
}		}
InstructionCost CommonCost = 0;		InstructionCost CommonCost = 0;
SmallVector<int> Mask;		SmallVector<int> Mask;
if (!E->ReorderIndices.empty()) {		if (!E->ReorderIndices.empty() &&
		E->State != TreeEntry::PossibleStridedVectorize) {
SmallVector<int> NewMask;		SmallVector<int> NewMask;
if (E->getOpcode() == Instruction::Store) {		if (E->getOpcode() == Instruction::Store) {
// For stores the order is actually a mask.		// For stores the order is actually a mask.
NewMask.resize(E->ReorderIndices.size());		NewMask.resize(E->ReorderIndices.size());
copy(E->ReorderIndices, NewMask.begin());		copy(E->ReorderIndices, NewMask.begin());
} else {		} else {
inversePermutation(E->ReorderIndices, NewMask);		inversePermutation(E->ReorderIndices, NewMask);
}		}
::addMask(Mask, NewMask);		::addMask(Mask, NewMask);
}		}
if (NeedToShuffleReuses)		if (NeedToShuffleReuses)
::addMask(Mask, E->ReuseShuffleIndices);		::addMask(Mask, E->ReuseShuffleIndices);
if (!Mask.empty() && !ShuffleVectorInst::isIdentityMask(Mask))		if (!Mask.empty() && !ShuffleVectorInst::isIdentityMask(Mask))
CommonCost =		CommonCost =
TTI->getShuffleCost(TTI::SK_PermuteSingleSrc, FinalVecTy, Mask);		TTI->getShuffleCost(TTI::SK_PermuteSingleSrc, FinalVecTy, Mask);
assert((E->State == TreeEntry::Vectorize \|\|		assert((E->State == TreeEntry::Vectorize \|\|
E->State == TreeEntry::ScatterVectorize) &&		E->State == TreeEntry::ScatterVectorize \|\|
		E->State == TreeEntry::PossibleStridedVectorize) &&
"Unhandled state");		"Unhandled state");
assert(E->getOpcode() &&		assert(E->getOpcode() &&
((allSameType(VL) && allSameBlock(VL)) \|\|		((allSameType(VL) && allSameBlock(VL)) \|\|
(E->getOpcode() == Instruction::GetElementPtr &&		(E->getOpcode() == Instruction::GetElementPtr &&
E->getMainOp()->getType()->isPointerTy())) &&		E->getMainOp()->getType()->isPointerTy())) &&
"Invalid VL");		"Invalid VL");
Instruction *VL0 = E->getMainOp();		Instruction *VL0 = E->getMainOp();
unsigned ShuffleOrOp =		unsigned ShuffleOrOp =
▲ Show 20 Lines • Show All 445 Lines • ▼ Show 20 Lines	case Instruction::Load: {
auto *LI0 = cast<LoadInst>(VL0);		auto *LI0 = cast<LoadInst>(VL0);
auto GetVectorCost = [=](InstructionCost CommonCost) {		auto GetVectorCost = [=](InstructionCost CommonCost) {
InstructionCost VecLdCost;		InstructionCost VecLdCost;
if (E->State == TreeEntry::Vectorize) {		if (E->State == TreeEntry::Vectorize) {
VecLdCost = TTI->getMemoryOpCost(		VecLdCost = TTI->getMemoryOpCost(
Instruction::Load, VecTy, LI0->getAlign(),		Instruction::Load, VecTy, LI0->getAlign(),
LI0->getPointerAddressSpace(), CostKind, TTI::OperandValueInfo());		LI0->getPointerAddressSpace(), CostKind, TTI::OperandValueInfo());
} else {		} else {
assert(E->State == TreeEntry::ScatterVectorize && "Unknown EntryState");		assert((E->State == TreeEntry::ScatterVectorize \|\|
		E->State == TreeEntry::PossibleStridedVectorize) &&
		"Unknown EntryState");
Align CommonAlignment = LI0->getAlign();		Align CommonAlignment = LI0->getAlign();
for (Value *V : VL)		for (Value *V : VL)
CommonAlignment =		CommonAlignment =
std::min(CommonAlignment, cast<LoadInst>(V)->getAlign());		std::min(CommonAlignment, cast<LoadInst>(V)->getAlign());
VecLdCost = TTI->getGatherScatterOpCost(		VecLdCost = TTI->getGatherScatterOpCost(
Instruction::Load, VecTy, LI0->getPointerOperand(),		Instruction::Load, VecTy, LI0->getPointerOperand(),
/VariableMask=/false, CommonAlignment, CostKind);		/VariableMask=/false, CommonAlignment, CostKind);
}		}
return VecLdCost + CommonCost;		return VecLdCost + CommonCost;
};		};

InstructionCost Cost = GetCostDiff(GetScalarCost, GetVectorCost);		InstructionCost Cost = GetCostDiff(GetScalarCost, GetVectorCost);
// If this node generates masked gather load then it is not a terminal node.		// If this node generates masked gather load then it is not a terminal node.
// Hence address operand cost is estimated separately.		// Hence address operand cost is estimated separately.
if (E->State == TreeEntry::ScatterVectorize)		if (E->State == TreeEntry::ScatterVectorize \|\|
		E->State == TreeEntry::PossibleStridedVectorize)
return Cost;		return Cost;

// Estimate cost of GEPs since this tree node is a terminator.		// Estimate cost of GEPs since this tree node is a terminator.
SmallVector<Value *> PointerOps(VL.size());		SmallVector<Value *> PointerOps(VL.size());
for (auto [I, V] : enumerate(VL))		for (auto [I, V] : enumerate(VL))
PointerOps[I] = cast<LoadInst>(V)->getPointerOperand();		PointerOps[I] = cast<LoadInst>(V)->getPointerOperand();
return Cost + GetGEPCostDiff(PointerOps, LI0->getPointerOperand());		return Cost + GetGEPCostDiff(PointerOps, LI0->getPointerOperand());
}		}
▲ Show 20 Lines • Show All 168 Lines • ▼ Show 20 Lines	bool BoUpSLP::isFullyVectorizableTinyTree(bool ForReduction) const {
if (VectorizableTree[0]->State == TreeEntry::Vectorize &&		if (VectorizableTree[0]->State == TreeEntry::Vectorize &&
AreVectorizableGathers(VectorizableTree[1].get(),		AreVectorizableGathers(VectorizableTree[1].get(),
VectorizableTree[0]->Scalars.size()))		VectorizableTree[0]->Scalars.size()))
return true;		return true;

// Gathering cost would be too much for tiny trees.		// Gathering cost would be too much for tiny trees.
if (VectorizableTree[0]->State == TreeEntry::NeedToGather \|\|		if (VectorizableTree[0]->State == TreeEntry::NeedToGather \|\|
(VectorizableTree[1]->State == TreeEntry::NeedToGather &&		(VectorizableTree[1]->State == TreeEntry::NeedToGather &&
VectorizableTree[0]->State != TreeEntry::ScatterVectorize))		VectorizableTree[0]->State != TreeEntry::ScatterVectorize &&
		VectorizableTree[0]->State != TreeEntry::PossibleStridedVectorize))
return false;		return false;

return true;		return true;
}		}

static bool isLoadCombineCandidateImpl(Value *Root, unsigned NumElts,		static bool isLoadCombineCandidateImpl(Value *Root, unsigned NumElts,
TargetTransformInfo *TTI,		TargetTransformInfo *TTI,
bool MustMatchOrInst) {		bool MustMatchOrInst) {
▲ Show 20 Lines • Show All 1,559 Lines • ▼ Show 20 Lines	public:

~ShuffleInstructionBuilder() {		~ShuffleInstructionBuilder() {
assert((IsFinalized \|\| CommonMask.empty()) &&		assert((IsFinalized \|\| CommonMask.empty()) &&
"Shuffle construction must be finalized.");		"Shuffle construction must be finalized.");
}		}
};		};

Value BoUpSLP::vectorizeOperand(TreeEntry E, unsigned NodeIdx) {		Value BoUpSLP::vectorizeOperand(TreeEntry E, unsigned NodeIdx) {
ArrayRef<Value *> VL = E->getOperand(NodeIdx);		ValueList &VL = E->getOperand(NodeIdx);
		if (E->State == TreeEntry::PossibleStridedVectorize &&
		!E->ReorderIndices.empty()) {
		SmallVector<int> Mask(E->ReorderIndices.begin(), E->ReorderIndices.end());
		reorderScalars(VL, Mask);
		}
const unsigned VF = VL.size();		const unsigned VF = VL.size();
InstructionsState S = getSameOpcode(VL, *TLI);		InstructionsState S = getSameOpcode(VL, *TLI);
// Special processing for GEPs bundle, which may include non-gep values.		// Special processing for GEPs bundle, which may include non-gep values.
if (!S.getOpcode() && VL.front()->getType()->isPointerTy()) {		if (!S.getOpcode() && VL.front()->getType()->isPointerTy()) {
const auto *It =		const auto *It =
find_if(VL, [](Value *V) { return isa<GetElementPtrInst>(V); });		find_if(VL, [](Value *V) { return isa<GetElementPtrInst>(V); });
if (It != VL.end())		if (It != VL.end())
S = getSameOpcode(It, TLI);		S = getSameOpcode(It, TLI);
▲ Show 20 Lines • Show All 475 Lines • ▼ Show 20 Lines	Value BoUpSLP::vectorizeTree(TreeEntry E) {

auto FinalShuffle = [&](Value V, const TreeEntry E) {		auto FinalShuffle = [&](Value V, const TreeEntry E) {
ShuffleInstructionBuilder ShuffleBuilder(Builder, *this);		ShuffleInstructionBuilder ShuffleBuilder(Builder, *this);
if (E->getOpcode() == Instruction::Store) {		if (E->getOpcode() == Instruction::Store) {
ArrayRef<int> Mask =		ArrayRef<int> Mask =
ArrayRef(reinterpret_cast<const int *>(E->ReorderIndices.begin()),		ArrayRef(reinterpret_cast<const int *>(E->ReorderIndices.begin()),
E->ReorderIndices.size());		E->ReorderIndices.size());
ShuffleBuilder.add(V, Mask);		ShuffleBuilder.add(V, Mask);
		} else if (E->State == TreeEntry::PossibleStridedVectorize) {
		ShuffleBuilder.addOrdered(V, std::nullopt);
} else {		} else {
ShuffleBuilder.addOrdered(V, E->ReorderIndices);		ShuffleBuilder.addOrdered(V, E->ReorderIndices);
}		}
return ShuffleBuilder.finalize(E->ReuseShuffleIndices);		return ShuffleBuilder.finalize(E->ReuseShuffleIndices);
};		};

assert((E->State == TreeEntry::Vectorize \|\|		assert((E->State == TreeEntry::Vectorize \|\|
E->State == TreeEntry::ScatterVectorize) &&		E->State == TreeEntry::ScatterVectorize \|\|
		E->State == TreeEntry::PossibleStridedVectorize) &&
"Unhandled state");		"Unhandled state");
unsigned ShuffleOrOp =		unsigned ShuffleOrOp =
E->isAltShuffle() ? (unsigned)Instruction::ShuffleVector : E->getOpcode();		E->isAltShuffle() ? (unsigned)Instruction::ShuffleVector : E->getOpcode();
Instruction *VL0 = E->getMainOp();		Instruction *VL0 = E->getMainOp();
Type *ScalarTy = VL0->getType();		Type *ScalarTy = VL0->getType();
if (auto *Store = dyn_cast<StoreInst>(VL0))		if (auto *Store = dyn_cast<StoreInst>(VL0))
ScalarTy = Store->getValueOperand()->getType();		ScalarTy = Store->getValueOperand()->getType();
else if (auto *IE = dyn_cast<InsertElementInst>(VL0))		else if (auto *IE = dyn_cast<InsertElementInst>(VL0))
▲ Show 20 Lines • Show All 374 Lines • ▼ Show 20 Lines	case Instruction::Load: {
// LoadInst to ExternalUses list to make sure that an extract will		// LoadInst to ExternalUses list to make sure that an extract will
// be generated in the future.		// be generated in the future.
if (TreeEntry *Entry = getTreeEntry(PO)) {		if (TreeEntry *Entry = getTreeEntry(PO)) {
// Find which lane we need to extract.		// Find which lane we need to extract.
unsigned FoundLane = Entry->findLaneForValue(PO);		unsigned FoundLane = Entry->findLaneForValue(PO);
ExternalUses.emplace_back(PO, NewLI, FoundLane);		ExternalUses.emplace_back(PO, NewLI, FoundLane);
}		}
} else {		} else {
assert(E->State == TreeEntry::ScatterVectorize && "Unhandled state");		assert((E->State == TreeEntry::ScatterVectorize \|\|
		E->State == TreeEntry::PossibleStridedVectorize) &&
		"Unhandled state");
Value *VecPtr = vectorizeOperand(E, 0);		Value *VecPtr = vectorizeOperand(E, 0);
if (E->VectorizedValue) {		if (E->VectorizedValue) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");		LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
return E->VectorizedValue;		return E->VectorizedValue;
}		}
// Use the minimum alignment of the gathered loads.		// Use the minimum alignment of the gathered loads.
Align CommonAlignment = LI->getAlign();		Align CommonAlignment = LI->getAlign();
for (Value *V : E->Scalars)		for (Value *V : E->Scalars)
▲ Show 20 Lines • Show All 4,724 Lines • Show Last 20 Lines

llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads.ll

	; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2			; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
	; RUN: opt -S -passes=slp-vectorizer < %s -mtriple=riscv64-unknown-linux -mattr=+v \| FileCheck %s			; RUN: opt -S -passes=slp-vectorizer < %s -mtriple=riscv64-unknown-linux -mattr=+v \| FileCheck %s

	define i32 @sum_of_abs(ptr noalias %a, ptr noalias %b) {			define i32 @sum_of_abs(ptr noalias %a, ptr noalias %b) {
	; CHECK-LABEL: define i32 @sum_of_abs			; CHECK-LABEL: define i32 @sum_of_abs
	; CHECK-SAME: (ptr noalias [[A:%.]], ptr noalias [[B:%.]]) #[[ATTR0:[0-9]+]] {			; CHECK-SAME: (ptr noalias [[A:%.]], ptr noalias [[B:%.]]) #[[ATTR0:[0-9]+]] {
	; CHECK-NEXT: entry:			; CHECK-NEXT: entry:
	; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x ptr> poison, ptr [[A]], i32 0			; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x ptr> poison, ptr [[A]], i32 0
	; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x ptr> [[TMP0]], <8 x ptr> poison, <8 x i32> zeroinitializer			; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x ptr> [[TMP0]], <8 x ptr> poison, <8 x i32> zeroinitializer
	; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, <8 x ptr> [[TMP1]], <8 x i64> <i64 64, i64 0, i64 128, i64 192, i64 256, i64 320, i64 384, i64 448>			; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, <8 x ptr> [[TMP1]], <8 x i64> <i64 0, i64 64, i64 128, i64 192, i64 256, i64 320, i64 384, i64 448>
	; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> [[TMP2]], i32 1, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i8> poison)			; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> [[TMP2]], i32 1, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i8> poison)
	; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i8> @llvm.abs.v8i8(<8 x i8> [[TMP3]], i1 false)			; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i8> @llvm.abs.v8i8(<8 x i8> [[TMP3]], i1 false)
	; CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i8> [[TMP4]] to <8 x i32>			; CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i8> [[TMP4]] to <8 x i32>
	; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP5]])			; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP5]])
	; CHECK-NEXT: ret i32 [[TMP6]]			; CHECK-NEXT: ret i32 [[TMP6]]
	;			;
	entry:			entry:
	%0 = load i8, ptr %a, align 1			%0 = load i8, ptr %a, align 1
	▲ Show 20 Lines • Show All 41 Lines • Show Last 20 Lines

llvm/test/Transforms/SLPVectorizer/X86/gep-nodes-with-non-gep-inst.ll

	Show All 24 Lines
	; CHECK-SLP-THRESHOLD-LABEL: define void @test			; CHECK-SLP-THRESHOLD-LABEL: define void @test
	; CHECK-SLP-THRESHOLD-SAME: () #[[ATTR0:[0-9]+]] {			; CHECK-SLP-THRESHOLD-SAME: () #[[ATTR0:[0-9]+]] {
	; CHECK-SLP-THRESHOLD-NEXT: entry:			; CHECK-SLP-THRESHOLD-NEXT: entry:
	; CHECK-SLP-THRESHOLD-NEXT: [[COND_IN_V:%.*]] = select i1 false, ptr null, ptr null			; CHECK-SLP-THRESHOLD-NEXT: [[COND_IN_V:%.*]] = select i1 false, ptr null, ptr null
	; CHECK-SLP-THRESHOLD-NEXT: br label [[BB:%.*]]			; CHECK-SLP-THRESHOLD-NEXT: br label [[BB:%.*]]
	; CHECK-SLP-THRESHOLD: bb:			; CHECK-SLP-THRESHOLD: bb:
	; CHECK-SLP-THRESHOLD-NEXT: [[TMP0:%.*]] = insertelement <4 x ptr> poison, ptr [[COND_IN_V]], i32 0			; CHECK-SLP-THRESHOLD-NEXT: [[TMP0:%.*]] = insertelement <4 x ptr> poison, ptr [[COND_IN_V]], i32 0
	; CHECK-SLP-THRESHOLD-NEXT: [[TMP1:%.*]] = shufflevector <4 x ptr> [[TMP0]], <4 x ptr> poison, <4 x i32> zeroinitializer			; CHECK-SLP-THRESHOLD-NEXT: [[TMP1:%.*]] = shufflevector <4 x ptr> [[TMP0]], <4 x ptr> poison, <4 x i32> zeroinitializer
	; CHECK-SLP-THRESHOLD-NEXT: [[TMP2:%.*]] = getelementptr i64, <4 x ptr> [[TMP1]], <4 x i64> <i64 12, i64 8, i64 4, i64 0>			; CHECK-SLP-THRESHOLD-NEXT: [[TMP2:%.*]] = getelementptr i64, <4 x ptr> [[TMP1]], <4 x i64> <i64 0, i64 4, i64 8, i64 12>
	; CHECK-SLP-THRESHOLD-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP2]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i64> poison)			; CHECK-SLP-THRESHOLD-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP2]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i64> poison)
	; CHECK-SLP-THRESHOLD-NEXT: [[TMP4:%.*]] = icmp eq <4 x i64> [[TMP3]], zeroinitializer			; CHECK-SLP-THRESHOLD-NEXT: [[TMP4:%.*]] = icmp eq <4 x i64> [[TMP3]], zeroinitializer
	; CHECK-SLP-THRESHOLD-NEXT: ret void			; CHECK-SLP-THRESHOLD-NEXT: ret void
	;			;
	entry:			entry:
	%cond.in.v = select i1 false, ptr null, ptr null			%cond.in.v = select i1 false, ptr null, ptr null
	br label %bb			br label %bb

	Show All 14 Lines

llvm/test/Transforms/SLPVectorizer/X86/remark_gather-load-redux-cost.ll

	; NOTE: Assertions have been autogenerated by utils/update_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
	; RUN: opt -S -mtriple=x86_64 -mcpu=skylake-avx512 -passes=slp-vectorizer -pass-remarks-output=%t < %s \| FileCheck %s			; RUN: opt -S -mtriple=x86_64 -mcpu=skylake-avx512 -passes=slp-vectorizer -pass-remarks-output=%t < %s \| FileCheck %s
	; RUN: FileCheck --input-file=%t --check-prefix=YAML %s			; RUN: FileCheck --input-file=%t --check-prefix=YAML %s

	define i32 @test(ptr noalias %p, ptr noalias %addr) {			define i32 @test(ptr noalias %p, ptr noalias %addr) {
	; CHECK-LABEL: @test(			; CHECK-LABEL: @test(
	; CHECK-NEXT: entry:			; CHECK-NEXT: entry:
	; CHECK-NEXT: [[TMP0:%.]] = insertelement <8 x ptr> poison, ptr [[ADDR:%.]], i32 0			; CHECK-NEXT: [[TMP0:%.]] = insertelement <8 x ptr> poison, ptr [[ADDR:%.]], i32 0
	; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x ptr> [[TMP0]], <8 x ptr> poison, <8 x i32> zeroinitializer			; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x ptr> [[TMP0]], <8 x ptr> poison, <8 x i32> zeroinitializer
	; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, <8 x ptr> [[TMP1]], <8 x i32> <i32 15, i32 13, i32 11, i32 9, i32 7, i32 5, i32 3, i32 1>			; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, <8 x ptr> [[TMP1]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
	; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP2]], i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison)			; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP2]], i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison)
	; CHECK-NEXT: [[TMP4:%.]] = insertelement <8 x ptr> poison, ptr [[P:%.]], i32 0			; CHECK-NEXT: [[TMP4:%.]] = insertelement <8 x ptr> poison, ptr [[P:%.]], i32 0
	; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x ptr> [[TMP4]], <8 x ptr> poison, <8 x i32> zeroinitializer			; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x ptr> [[TMP4]], <8 x ptr> poison, <8 x i32> zeroinitializer
	; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, <8 x ptr> [[TMP5]], <8 x i32> [[TMP3]]			; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, <8 x ptr> [[TMP5]], <8 x i32> [[TMP3]]
	; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP6]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison)			; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP6]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison)
	; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP7]])			; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP7]])
	; CHECK-NEXT: ret i32 [[TMP8]]			; CHECK-NEXT: ret i32 [[TMP8]]
	;			;
	▲ Show 20 Lines • Show All 60 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[SLP]Fix PR63854: Add proper sorting of pointers for masked stores.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 550336

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads.ll

llvm/test/Transforms/SLPVectorizer/X86/gep-nodes-with-non-gep-inst.ll

llvm/test/Transforms/SLPVectorizer/X86/remark_gather-load-redux-cost.ll

This is an archive of the discontinued LLVM Phabricator instance.

[SLP]Fix PR63854: Add proper sorting of pointers for masked stores.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 550336

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads.ll

llvm/test/Transforms/SLPVectorizer/X86/gep-nodes-with-non-gep-inst.ll

llvm/test/Transforms/SLPVectorizer/X86/remark_gather-load-redux-cost.ll

[SLP]Fix PR63854: Add proper sorting of pointers for masked stores.
ClosedPublic