Diff 51388

lib/Transforms/Vectorize/SLPVectorizer.cpp

Show First 20 Lines • Show All 3,418 Lines • ▼ Show 20 Lines	bool runOnFunction(Function &F) override {
else		else
MaxVecRegSize = TTI->getRegisterBitWidth(true);		MaxVecRegSize = TTI->getRegisterBitWidth(true);

MinVecRegSize = MinVectorRegSizeOption;		MinVecRegSize = MinVectorRegSizeOption;

// Don't vectorize when the attribute NoImplicitFloat is used.		// Don't vectorize when the attribute NoImplicitFloat is used.
if (F.hasFnAttribute(Attribute::NoImplicitFloat))		if (F.hasFnAttribute(Attribute::NoImplicitFloat))
return false;		return false;

		mcrosierUnsubmitted Not Done Reply Inline Actions I'm thinking this should be a TTI hook, so each target can define the MinVecRegSize. mcrosier: I'm thinking this should be a TTI hook, so each target can define the MinVecRegSize.
		JongwonLeeAuthorUnsubmitted Not Done Reply Inline Actions I'll separate this from the current patch. The current patch will only handle the range of the size of vectorizable registers. JongwonLee: I'll separate this from the current patch. The current patch will only handle the range of the…
DEBUG(dbgs() << "SLP: Analyzing blocks in " << F.getName() << ".\n");		DEBUG(dbgs() << "SLP: Analyzing blocks in " << F.getName() << ".\n");

// Use the bottom up slp vectorizer to construct chains that start with		// Use the bottom up slp vectorizer to construct chains that start with
// store instructions.		// store instructions.
BoUpSLP R(&F, SE, TTI, TLI, AA, LI, DT, AC, DB, DL);		BoUpSLP R(&F, SE, TTI, TLI, AA, LI, DT, AC, DB, DL);

// A general note: the vectorizer must use BoUpSLP::eraseInstruction() to		// A general note: the vectorizer must use BoUpSLP::eraseInstruction() to
// delete instructions.		// delete instructions.
▲ Show 20 Lines • Show All 57 Lines • ▼ Show 20 Lines	private:
/// \brief Try to vectorize a chain that starts at two arithmetic instrs.		/// \brief Try to vectorize a chain that starts at two arithmetic instrs.
bool tryToVectorizePair(Value A, Value B, BoUpSLP &R);		bool tryToVectorizePair(Value A, Value B, BoUpSLP &R);

/// \brief Try to vectorize a list of operands.		/// \brief Try to vectorize a list of operands.
/// \@param BuildVector A list of users to ignore for the purpose of		/// \@param BuildVector A list of users to ignore for the purpose of
/// scheduling and that don't need extracting.		/// scheduling and that don't need extracting.
/// \returns true if a value was vectorized.		/// \returns true if a value was vectorized.
bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,		bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
		unsigned VecRegSize = 128,
		mssimpsoUnsubmitted Not Done Reply Inline Actions Unless I missed something, it looks to me like every use of tryToVectorizeList passes VecRegSize. Why make the parameter optional? mssimpso: Unless I missed something, it looks to me like every use of tryToVectorizeList passes…
		bool vectorizeStoreChain = false,
ArrayRef<Value *> BuildVector = None,		ArrayRef<Value *> BuildVector = None,
bool allowReorder = false);		bool allowReorder = false);

/// \brief Try to vectorize a chain that may start at the operands of \V;		/// \brief Try to vectorize a chain that may start at the operands of \V;
bool tryToVectorize(BinaryOperator *V, BoUpSLP &R);		bool tryToVectorize(BinaryOperator *V, BoUpSLP &R);

/// \brief Vectorize the store instructions collected in Stores.		/// \brief Vectorize the store instructions collected in Stores.
bool vectorizeStoreChains(BoUpSLP &R);		bool vectorizeStoreChains(BoUpSLP &R);

/// \brief Vectorize the index computations of the getelementptr instructions		/// \brief Vectorize the index computations of the getelementptr instructions
/// collected in GEPs.		/// collected in GEPs.
bool vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R);		bool vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R);

/// \brief Scan the basic block and look for patterns that are likely to start		/// \brief Scan the basic block and look for patterns that are likely to start
/// a vectorization chain.		/// a vectorization chain.
bool vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R);		bool vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R);

bool vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold,		bool vectorizeStores(ArrayRef<StoreInst *> Stores, BoUpSLP &R);
BoUpSLP &R, unsigned VecRegSize);

bool vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold,
BoUpSLP &R);

/// The store instructions in a basic block organized by base pointer.		/// The store instructions in a basic block organized by base pointer.
StoreListMap Stores;		StoreListMap Stores;

/// The getelementptr instructions in a basic block organized by base pointer.		/// The getelementptr instructions in a basic block organized by base pointer.
WeakVHListMap GEPs;		WeakVHListMap GEPs;

/// The number of store instructions in a basic block.		/// The number of store instructions in a basic block.
Show All 12 Lines
/// to become invalid. We track when this has happened in the WeakVH array.		/// to become invalid. We track when this has happened in the WeakVH array.
static bool hasValueBeenRAUWed(ArrayRef<Value *> VL, ArrayRef<WeakVH> VH,		static bool hasValueBeenRAUWed(ArrayRef<Value *> VL, ArrayRef<WeakVH> VH,
unsigned SliceBegin, unsigned SliceSize) {		unsigned SliceBegin, unsigned SliceSize) {
VL = VL.slice(SliceBegin, SliceSize);		VL = VL.slice(SliceBegin, SliceSize);
VH = VH.slice(SliceBegin, SliceSize);		VH = VH.slice(SliceBegin, SliceSize);
return !std::equal(VL.begin(), VL.end(), VH.begin());		return !std::equal(VL.begin(), VL.end(), VH.begin());
}		}

bool SLPVectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain,		bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores, BoUpSLP &R) {
int CostThreshold, BoUpSLP &R,
unsigned VecRegSize) {
unsigned ChainLen = Chain.size();
DEBUG(dbgs() << "SLP: Analyzing a store chain of length " << ChainLen
<< "\n");
unsigned Sz = R.getVectorElementSize(Chain[0]);
unsigned VF = VecRegSize / Sz;

if (!isPowerOf2_32(Sz) \|\| VF < 2)
return false;

// Keep track of values that were deleted by vectorizing in the loop below.
SmallVector<WeakVH, 8> TrackValues(Chain.begin(), Chain.end());

bool Changed = false;
// Look for profitable vectorizable trees at all offsets, starting at zero.
for (unsigned i = 0, e = ChainLen; i < e; ++i) {
if (i + VF > e)
break;

// Check that a previous iteration of this loop did not delete the Value.
if (hasValueBeenRAUWed(Chain, TrackValues, i, VF))
continue;

DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset " << i
<< "\n");
ArrayRef<Value *> Operands = Chain.slice(i, VF);

R.buildTree(Operands);
R.computeMinimumValueSizes();

int Cost = R.getTreeCost();

DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n");
if (Cost < CostThreshold) {
DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n");
R.vectorizeTree();

// Move to the next bundle.
i += VF - 1;
Changed = true;
}
}

return Changed;
}

bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores,
int costThreshold, BoUpSLP &R) {
SetVector<StoreInst *> Heads, Tails;		SetVector<StoreInst *> Heads, Tails;
SmallDenseMap<StoreInst , StoreInst > ConsecutiveChain;		SmallDenseMap<StoreInst , StoreInst > ConsecutiveChain;

// We may run into multiple chains that merge into a single chain. We mark the		// We may run into multiple chains that merge into a single chain. We mark the
// stores that we vectorized so that we don't visit the same store twice.		// stores that we vectorized so that we don't visit the same store twice.
BoUpSLP::ValueSet VectorizedStores;		BoUpSLP::ValueSet VectorizedStores;
bool Changed = false;		bool Changed = false;

Show All 38 Lines	while (Tails.count(I) \|\| Heads.count(I)) {
break;		break;
Operands.push_back(I);		Operands.push_back(I);
// Move to the next value in the chain.		// Move to the next value in the chain.
I = ConsecutiveChain[I];		I = ConsecutiveChain[I];
}		}

// FIXME: Is division-by-2 the correct step? Should we assert that the		// FIXME: Is division-by-2 the correct step? Should we assert that the
// register size is a power-of-2?		// register size is a power-of-2?
for (unsigned Size = MaxVecRegSize; Size >= MinVecRegSize; Size /= 2) {		for (unsigned VecRegSize = MaxVecRegSize; VecRegSize >= MinVecRegSize;
if (vectorizeStoreChain(Operands, costThreshold, R, Size)) {		VecRegSize /= 2) {
		if (tryToVectorizeList(Operands, R, VecRegSize, true)) {
// Mark the vectorized stores so that we don't vectorize them again.		// Mark the vectorized stores so that we don't vectorize them again.
VectorizedStores.insert(Operands.begin(), Operands.end());		VectorizedStores.insert(Operands.begin(), Operands.end());
Changed = true;		Changed = true;
break;		break;
}		}
}		}
}		}

Show All 37 Lines	for (Instruction &I : *BB) {
}		}
}		}
}		}

bool SLPVectorizer::tryToVectorizePair(Value A, Value B, BoUpSLP &R) {		bool SLPVectorizer::tryToVectorizePair(Value A, Value B, BoUpSLP &R) {
if (!A \|\| !B)		if (!A \|\| !B)
return false;		return false;
Value *VL[] = { A, B };		Value *VL[] = { A, B };
return tryToVectorizeList(VL, R, None, true);		for (unsigned VecRegSize = MaxVecRegSize; VecRegSize >= MinVecRegSize;
		VecRegSize /= 2) {
		if (tryToVectorizeList(VL, R, VecRegSize, false, None, true))
		return true;
		}
		mcrosierUnsubmitted Done Reply Inline Actions Why not just return true here and remove the unnecessary temp variable? mcrosier: Why not just return true here and remove the unnecessary temp variable?
		return false;
}		}

bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,		bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
		mcrosierUnsubmitted Done Reply Inline Actions return false (assuming you do the suggestion above). mcrosier: return false (assuming you do the suggestion above).
		unsigned VecRegSize,
		bool vectorizeStoreChain,
ArrayRef<Value *> BuildVector,		ArrayRef<Value *> BuildVector,
bool allowReorder) {		bool allowReorder) {
if (VL.size() < 2)		if (VL.size() < 2)
return false;		return false;

DEBUG(dbgs() << "SLP: Vectorizing a list of length = " << VL.size() << ".\n");		DEBUG(dbgs() << "SLP: Vectorizing a list of length = " << VL.size() << ".\n");

// Check that all of the parts are scalar instructions of the same type.		// Check that all of the parts are scalar instructions of the same type.
Instruction *I0 = dyn_cast<Instruction>(VL[0]);		Instruction *I0 = dyn_cast<Instruction>(VL[0]);
if (!I0)		if (!I0)
return false;		return false;

unsigned Opcode0 = I0->getOpcode();		unsigned Opcode0 = I0->getOpcode();

// FIXME: Register size should be a parameter to this function, so we can
// try different vectorization factors.
unsigned Sz = R.getVectorElementSize(I0);		unsigned Sz = R.getVectorElementSize(I0);
unsigned VF = MinVecRegSize / Sz;		unsigned VF = VecRegSize / Sz;

		if (!vectorizeStoreChain) {
		mzolotukhinUnsubmitted Done Reply Inline Actions Nitpick: I'd rather swap if and else blocks to avoid negation in the condition. mzolotukhin: Nitpick: I'd rather swap if and else blocks to avoid negation in the condition.
for (Value *V : VL) {		for (Value *V : VL) {
Type *Ty = V->getType();		Type *Ty = V->getType();
if (!isValidElementType(Ty))		if (!isValidElementType(Ty))
return false;		return false;
Instruction *Inst = dyn_cast<Instruction>(V);		Instruction *Inst = dyn_cast<Instruction>(V);
if (!Inst \|\| Inst->getOpcode() != Opcode0)		if (!Inst \|\| Inst->getOpcode() != Opcode0)
return false;		return false;
}		}
		} else if (!isPowerOf2_32(Sz) \|\| VF < 2)
		return false;
		mzolotukhinUnsubmitted Done Reply Inline Actions I think this check should be combined with the one below: if (!isPowerOf2_32(OpsWidth) \|\| OpsWidth < 2) break; and it should be done independently on `vectorizeStoreChain` flag. mzolotukhin: I think this check should be combined with the one below: ``` if (!isPowerOf2_32(OpsWidth) \|\|…

bool Changed = false;		bool Changed = false;

// Keep track of values that were deleted by vectorizing in the loop below.		// Keep track of values that were deleted by vectorizing in the loop below.
SmallVector<WeakVH, 8> TrackValues(VL.begin(), VL.end());		SmallVector<WeakVH, 8> TrackValues(VL.begin(), VL.end());

for (unsigned i = 0, e = VL.size(); i < e; ++i) {		for (unsigned i = 0, e = VL.size(); i < e; ++i) {
unsigned OpsWidth = 0;		unsigned OpsWidth = VF;

		if (!vectorizeStoreChain) {
if (i + VF > e)		if (i + VF > e)
OpsWidth = e - i;		OpsWidth = e - i;
else
OpsWidth = VF;

if (!isPowerOf2_32(OpsWidth) \|\| OpsWidth < 2)		if (!isPowerOf2_32(OpsWidth) \|\| OpsWidth < 2)
break;		break;
		} else if (i + VF > e)
		break;

// Check that a previous iteration of this loop did not delete the Value.		// Check that a previous iteration of this loop did not delete the Value.
if (hasValueBeenRAUWed(VL, TrackValues, i, OpsWidth))		if (hasValueBeenRAUWed(VL, TrackValues, i, OpsWidth))
continue;		continue;

DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations "		DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations "
<< "\n");		<< "\n");
ArrayRef<Value *> Ops = VL.slice(i, OpsWidth);		ArrayRef<Value *> Ops = VL.slice(i, OpsWidth);

ArrayRef<Value *> BuildVectorSlice;		ArrayRef<Value *> BuildVectorSlice;
		mcrosierUnsubmitted Done Reply Inline Actions Maybe use VecRegSize, rather than Size here? mcrosier: Maybe use VecRegSize, rather than Size here?
if (!BuildVector.empty())		if (!BuildVector.empty())
BuildVectorSlice = BuildVector.slice(i, OpsWidth);		BuildVectorSlice = BuildVector.slice(i, OpsWidth);

R.buildTree(Ops, BuildVectorSlice);		R.buildTree(Ops, BuildVectorSlice);
// TODO: check if we can allow reordering also for other cases than		// TODO: check if we can allow reordering also for other cases than
// tryToVectorizePair()		// tryToVectorizePair()
if (allowReorder && R.shouldReorder()) {		if (allowReorder && R.shouldReorder()) {
assert(Ops.size() == 2);		assert(Ops.size() == 2);
assert(BuildVectorSlice.empty());		assert(BuildVectorSlice.empty());
Value *ReorderedOps[] = { Ops[1], Ops[0] };		Value *ReorderedOps[] = { Ops[1], Ops[0] };
R.buildTree(ReorderedOps, None);		R.buildTree(ReorderedOps, None);
}		}
R.computeMinimumValueSizes();		R.computeMinimumValueSizes();
int Cost = R.getTreeCost();		int Cost = R.getTreeCost();

if (Cost < -SLPCostThreshold) {		if (Cost < -SLPCostThreshold) {
DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n");		DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n");
Value *VectorizedRoot = R.vectorizeTree();		Value *VectorizedRoot = R.vectorizeTree();

// Reconstruct the build vector by extracting the vectorized root. This		// Reconstruct the build vector by extracting the vectorized root. This
// way we handle the case where some elements of the vector are undefined.		// way we handle the case where some elements of the vector are undefined.
// (return (inserelt <4 xi32> (insertelt undef (opd0) 0) (opd1) 2))		// (return (inserelt <4 xi32> (insertelt undef (opd0) 0) (opd1) 2))
if (!BuildVectorSlice.empty()) {		if (!BuildVectorSlice.empty()) {
// The insert point is the last build vector instruction. The vectorized		// The insert point is the last build vector instruction. The vectorized
		mcrosierUnsubmitted Not Done Reply Inline Actions I believe you've addressed this fix me, correct? mcrosier: I believe you've addressed this fix me, correct?
		JongwonLeeAuthorUnsubmitted Not Done Reply Inline Actions Yes. The comment are removed. JongwonLee: Yes. The comment are removed.
// root will precede it. This guarantees that we get an instruction. The		// root will precede it. This guarantees that we get an instruction. The
// vectorized tree could have been constant folded.		// vectorized tree could have been constant folded.
Instruction *InsertAfter = cast<Instruction>(BuildVectorSlice.back());		Instruction *InsertAfter = cast<Instruction>(BuildVectorSlice.back());
unsigned VecIdx = 0;		unsigned VecIdx = 0;
for (auto &V : BuildVectorSlice) {		for (auto &V : BuildVectorSlice) {
IRBuilder<NoFolder> Builder(InsertAfter->getParent(),		IRBuilder<NoFolder> Builder(InsertAfter->getParent(),
++BasicBlock::iterator(InsertAfter));		++BasicBlock::iterator(InsertAfter));
InsertElementInst *IE = cast<InsertElementInst>(V);		InsertElementInst *IE = cast<InsertElementInst>(V);
▲ Show 20 Lines • Show All 132 Lines • ▼ Show 20 Lines	public:
unsigned MinVecRegSize;		unsigned MinVecRegSize;

HorizontalReduction(unsigned MinVecRegSize)		HorizontalReduction(unsigned MinVecRegSize)
: ReductionRoot(nullptr), ReductionPHI(nullptr), ReductionOpcode(0),		: ReductionRoot(nullptr), ReductionPHI(nullptr), ReductionOpcode(0),
ReducedValueOpcode(0), IsPairwiseReduction(false), ReduxWidth(0),		ReducedValueOpcode(0), IsPairwiseReduction(false), ReduxWidth(0),
MinVecRegSize(MinVecRegSize) {}		MinVecRegSize(MinVecRegSize) {}

/// \brief Try to find a reduction tree.		/// \brief Try to find a reduction tree.
bool matchAssociativeReduction(PHINode Phi, BinaryOperator B) {		bool matchAssociativeReduction(PHINode Phi, BinaryOperator B,
		unsigned VecRegSize) {
assert((!Phi \|\|		assert((!Phi \|\|
std::find(Phi->op_begin(), Phi->op_end(), B) != Phi->op_end()) &&		std::find(Phi->op_begin(), Phi->op_end(), B) != Phi->op_end()) &&
"Thi phi needs to use the binary operator");		"Thi phi needs to use the binary operator");

// We could have a initial reductions that is not an add.		// We could have a initial reductions that is not an add.
// r *= v1 + v2 + v3 + v4		// r *= v1 + v2 + v3 + v4
// In such a case start looking for a tree rooted in the first '+'.		// In such a case start looking for a tree rooted in the first '+'.
if (Phi) {		if (Phi) {
Show All 11 Lines	bool matchAssociativeReduction(PHINode Phi, BinaryOperator B,

Type *Ty = B->getType();		Type *Ty = B->getType();
if (!isValidElementType(Ty))		if (!isValidElementType(Ty))
return false;		return false;

const DataLayout &DL = B->getModule()->getDataLayout();		const DataLayout &DL = B->getModule()->getDataLayout();
ReductionOpcode = B->getOpcode();		ReductionOpcode = B->getOpcode();
ReducedValueOpcode = 0;		ReducedValueOpcode = 0;
// FIXME: Register size should be a parameter to this function, so we can		ReduxWidth = VecRegSize / DL.getTypeSizeInBits(Ty);
// try different vectorization factors.
ReduxWidth = MinVecRegSize / DL.getTypeSizeInBits(Ty);
ReductionRoot = B;		ReductionRoot = B;
ReductionPHI = Phi;		ReductionPHI = Phi;

if (ReduxWidth < 4)		if (ReduxWidth < 4)
return false;		return false;

// We currently only support adds.		// We currently only support adds.
if (ReductionOpcode != Instruction::Add &&		if (ReductionOpcode != Instruction::Add &&
▲ Show 20 Lines • Show All 45 Lines • ▼ Show 20 Lines	while (!Stack.empty()) {
// values in our tree.		// values in our tree.
if (isa<BinaryOperator>(NextV) \|\| isa<SelectInst>(NextV))		if (isa<BinaryOperator>(NextV) \|\| isa<SelectInst>(NextV))
Stack.push_back(std::make_pair(cast<Instruction>(NextV), 0));		Stack.push_back(std::make_pair(cast<Instruction>(NextV), 0));
else if (NextV != Phi)		else if (NextV != Phi)
return false;		return false;
}		}
return true;		return true;
}		}

		mcrosierUnsubmitted Done Reply Inline Actions Same. Remove FIXME. mcrosier: Same. Remove FIXME.
/// \brief Attempt to vectorize the tree found by		/// \brief Attempt to vectorize the tree found by
/// matchAssociativeReduction.		/// matchAssociativeReduction.
bool tryToReduce(BoUpSLP &V, TargetTransformInfo *TTI) {		bool tryToReduce(BoUpSLP &V, TargetTransformInfo *TTI) {
if (ReducedVals.empty())		if (ReducedVals.empty())
return false;		return false;

unsigned NumReducedVals = ReducedVals.size();		unsigned NumReducedVals = ReducedVals.size();
if (NumReducedVals < ReduxWidth)		if (NumReducedVals < ReduxWidth)
▲ Show 20 Lines • Show All 217 Lines • ▼ Show 20 Lines
/// \brief Attempt to reduce a horizontal reduction.		/// \brief Attempt to reduce a horizontal reduction.
/// If it is legal to match a horizontal reduction feeding		/// If it is legal to match a horizontal reduction feeding
/// the phi node P with reduction operators BI, then check if it		/// the phi node P with reduction operators BI, then check if it
/// can be done.		/// can be done.
/// \returns true if a horizontal reduction was matched and reduced.		/// \returns true if a horizontal reduction was matched and reduced.
/// \returns false if a horizontal reduction was not matched.		/// \returns false if a horizontal reduction was not matched.
static bool canMatchHorizontalReduction(PHINode P, BinaryOperator BI,		static bool canMatchHorizontalReduction(PHINode P, BinaryOperator BI,
BoUpSLP &R, TargetTransformInfo *TTI,		BoUpSLP &R, TargetTransformInfo *TTI,
unsigned MinRegSize) {		unsigned MinRegSize) {
		mssimpsoUnsubmitted Not Done Reply Inline Actions I think it would be less confusing and more consistent if MinRegSize was renamed to VecRegSize here. mssimpso: I think it would be less confusing and more consistent if MinRegSize was renamed to VecRegSize…
if (!ShouldVectorizeHor)		if (!ShouldVectorizeHor)
return false;		return false;

HorizontalReduction HorRdx(MinRegSize);		HorizontalReduction HorRdx(MinRegSize);
if (!HorRdx.matchAssociativeReduction(P, BI))		if (!HorRdx.matchAssociativeReduction(P, BI, MinRegSize))
return false;		return false;

// If there is a sufficient number of reduction values, reduce		// If there is a sufficient number of reduction values, reduce
// to a nearby power-of-2. Can safely generate oversized		// to a nearby power-of-2. Can safely generate oversized
// vectors and rely on the backend to split them to legal sizes.		// vectors and rely on the backend to split them to legal sizes.
HorRdx.ReduxWidth =		HorRdx.ReduxWidth =
std::max((uint64_t)4, PowerOf2Floor(HorRdx.numReductionValues()));		std::max((uint64_t)4, PowerOf2Floor(HorRdx.numReductionValues()));

Show All 34 Lines	for (SmallVector<Value *, 4>::iterator IncIt = Incoming.begin(),
(SameTypeIt)->getType() == (IncIt)->getType()) {		(SameTypeIt)->getType() == (IncIt)->getType()) {
VisitedInstrs.insert(*SameTypeIt);		VisitedInstrs.insert(*SameTypeIt);
++SameTypeIt;		++SameTypeIt;
}		}

// Try to vectorize them.		// Try to vectorize them.
unsigned NumElts = (SameTypeIt - IncIt);		unsigned NumElts = (SameTypeIt - IncIt);
DEBUG(errs() << "SLP: Trying to vectorize starting at PHIs (" << NumElts << ")\n");		DEBUG(errs() << "SLP: Trying to vectorize starting at PHIs (" << NumElts << ")\n");
if (NumElts > 1 && tryToVectorizeList(makeArrayRef(IncIt, NumElts), R)) {		if (NumElts > 1) {
// Success start over because instructions might have been changed.		for (unsigned VecRegSize = MaxVecRegSize; VecRegSize >= MinVecRegSize;
		mcrosierUnsubmitted Not Done Reply Inline Actions Shouldn't the call to tryToVectorizeList() still be predicated on NumElts > 1? mcrosier: Shouldn't the call to tryToVectorizeList() still be predicated on NumElts > 1?
		JongwonLeeAuthorUnsubmitted Not Done Reply Inline Actions Fixed the code to call tryToVectorizeList() when NumElts > 1 is satisfied. JongwonLee: Fixed the code to call tryToVectorizeList() when NumElts > 1 is satisfied.
		mcrosierUnsubmitted Not Done Reply Inline Actions No that this logic is cleaned up you don't need the temporary bool. for (unsigned VecRegSize = MaxVecRegSize; VecRegSize >= MinVecRegSize; VecRegSize /= 2) { if (tryToVectorizeList(makeArrayRef(IncIt, NumElts), R, None, false, VecRegSize)) { // Success start over because instructions might have been changed. HaveVectorizedPhiNodes = true; Changed = true; break; } } mcrosier: No that this logic is cleaned up you don't need the temporary bool. for (unsigned VecRegSize…
		JongwonLeeAuthorUnsubmitted Not Done Reply Inline Actions Removed the temporary bool. JongwonLee: Removed the temporary bool.
		VecRegSize /= 2) {
		if (tryToVectorizeList(makeArrayRef(IncIt, NumElts), R, VecRegSize)) {
HaveVectorizedPhiNodes = true;		HaveVectorizedPhiNodes = true;
Changed = true;		Changed = true;
break;		break;
}		}
		}
		if (Changed)
		break;
		}
// Start over at the next instruction of a different type (or the end).		// Start over at the next instruction of a different type (or the end).
IncIt = SameTypeIt;		IncIt = SameTypeIt;
}		}
}		}

VisitedInstrs.clear();		VisitedInstrs.clear();

for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; it++) {		for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; it++) {
Show All 13 Lines	if (PHINode *P = dyn_cast<PHINode>(it)) {
Value *Rdx = getReductionValue(DT, P, BB, LI);		Value *Rdx = getReductionValue(DT, P, BB, LI);

// Check if this is a Binary Operator.		// Check if this is a Binary Operator.
BinaryOperator *BI = dyn_cast_or_null<BinaryOperator>(Rdx);		BinaryOperator *BI = dyn_cast_or_null<BinaryOperator>(Rdx);
if (!BI)		if (!BI)
continue;		continue;

// Try to match and vectorize a horizontal reduction.		// Try to match and vectorize a horizontal reduction.
if (canMatchHorizontalReduction(P, BI, R, TTI, MinVecRegSize)) {		bool SuccessToMatchHorizontalReduction = false;
		for (unsigned VecRegSize = MaxVecRegSize; VecRegSize >= MinVecRegSize;
		VecRegSize /= 2) {
		if (canMatchHorizontalReduction(P, BI, R, TTI, VecRegSize)) {
		SuccessToMatchHorizontalReduction = true;
		break;
		}
		}
		if (SuccessToMatchHorizontalReduction) {
Changed = true;		Changed = true;
it = BB->begin();		it = BB->begin();
e = BB->end();		e = BB->end();
continue;		continue;
}		}

Value *Inst = BI->getOperand(0);		Value *Inst = BI->getOperand(0);
if (Inst == P)		if (Inst == P)
Inst = BI->getOperand(1);		Inst = BI->getOperand(1);

if (tryToVectorize(dyn_cast<BinaryOperator>(Inst), R)) {		if (tryToVectorize(dyn_cast<BinaryOperator>(Inst), R)) {
// We would like to start over since some instructions are deleted		// We would like to start over since some instructions are deleted
// and the iterator may become invalid value.		// and the iterator may become invalid value.
Changed = true;		Changed = true;
it = BB->begin();		it = BB->begin();
e = BB->end();		e = BB->end();
		mcrosierUnsubmitted Done Reply Inline Actions Maybe use VecRegSize, rather than Size here? mcrosier: Maybe use VecRegSize, rather than Size here?
continue;		continue;
}		}

continue;		continue;
}		}

if (ShouldStartVectorizeHorAtStore)		if (ShouldStartVectorizeHorAtStore)
if (StoreInst *SI = dyn_cast<StoreInst>(it))		if (StoreInst *SI = dyn_cast<StoreInst>(it))
if (BinaryOperator *BinOp =		if (BinaryOperator *BinOp =
dyn_cast<BinaryOperator>(SI->getValueOperand())) {		dyn_cast<BinaryOperator>(SI->getValueOperand())) {
		bool SuccessToMatchHorizontalReduction = false;
		for (unsigned VecRegSize = MaxVecRegSize; VecRegSize >= MinVecRegSize;
		VecRegSize /= 2) {
if (canMatchHorizontalReduction(nullptr, BinOp, R, TTI,		if (canMatchHorizontalReduction(nullptr, BinOp, R, TTI,
MinVecRegSize) \|\|		VecRegSize)) {
tryToVectorize(BinOp, R)) {		SuccessToMatchHorizontalReduction = true;
		break;
		}
		}
		if (SuccessToMatchHorizontalReduction \|\| tryToVectorize(BinOp, R)) {
Changed = true;		Changed = true;
it = BB->begin();		it = BB->begin();
e = BB->end();		e = BB->end();
continue;		continue;
}		}
}		}

// Try to vectorize horizontal reductions feeding into a return.		// Try to vectorize horizontal reductions feeding into a return.
Show All 40 Lines	for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; it++) {
// Try to vectorize trees that start at insertelement instructions.		// Try to vectorize trees that start at insertelement instructions.
if (InsertElementInst *FirstInsertElem = dyn_cast<InsertElementInst>(it)) {		if (InsertElementInst *FirstInsertElem = dyn_cast<InsertElementInst>(it)) {
SmallVector<Value *, 16> BuildVector;		SmallVector<Value *, 16> BuildVector;
SmallVector<Value *, 16> BuildVectorOpds;		SmallVector<Value *, 16> BuildVectorOpds;
if (!findBuildVector(FirstInsertElem, BuildVector, BuildVectorOpds))		if (!findBuildVector(FirstInsertElem, BuildVector, BuildVectorOpds))
continue;		continue;

// Vectorize starting with the build vector operands ignoring the		// Vectorize starting with the build vector operands ignoring the
// BuildVector instructions for the purpose of scheduling and user		// BuildVector instructions for the purpose of scheduling and user
		mcrosierUnsubmitted Done Reply Inline Actions I assume this should be deleted, rather than commented out. mcrosier: I assume this should be deleted, rather than commented out.
// extraction.		// extraction.
if (tryToVectorizeList(BuildVectorOpds, R, BuildVector)) {		for (unsigned VecRegSize = MaxVecRegSize; VecRegSize >= MinVecRegSize;
		VecRegSize /= 2) {
		if (tryToVectorizeList(BuildVectorOpds, R, VecRegSize, false,
		BuildVector, false)) {
Changed = true;		Changed = true;
it = BB->begin();		it = BB->begin();
e = BB->end();		e = BB->end();
		break;
		}
}		}

continue;		continue;
}		}
}		}

return Changed;		return Changed;
}		}

bool SLPVectorizer::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) {		bool SLPVectorizer::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) {
▲ Show 20 Lines • Show All 42 Lines • ▼ Show 20 Lines	for (unsigned BI = 0, BE = Entry.second.size(); BI < BE; BI += 16) {
Candidates.remove(GEPList[I]);		Candidates.remove(GEPList[I]);
Candidates.remove(GEPList[J]);		Candidates.remove(GEPList[J]);
} else if (GEPI->idx_begin()->get() == GEPJ->idx_begin()->get()) {		} else if (GEPI->idx_begin()->get() == GEPJ->idx_begin()->get()) {
Candidates.remove(GEPList[J]);		Candidates.remove(GEPList[J]);
}		}
}		}
}		}

// We break out of the above computation as soon as we know there are		// We break out of the above computation as soon as we know there are
		mcrosierUnsubmitted Done Reply Inline Actions I don't think you need the temporary variable. for () { if (tryToVectorizeList()) { Changed = true; it = BB->begin(); e = BB->end(); break; } } mcrosier: I don't think you need the temporary variable. for () { if (tryToVectorizeList()) {…
// fewer than two candidates remaining.		// fewer than two candidates remaining.
if (Candidates.size() < 2)		if (Candidates.size() < 2)
continue;		continue;

// Add the single, non-constant index of each candidate to the bundle. We		// Add the single, non-constant index of each candidate to the bundle. We
// ensured the indices met these constraints when we originally collected		// ensured the indices met these constraints when we originally collected
// the getelementptrs.		// the getelementptrs.
SmallVector<Value *, 16> Bundle(Candidates.size());		SmallVector<Value *, 16> Bundle(Candidates.size());
Show All 9 Lines	for (unsigned BI = 0, BE = Entry.second.size(); BI < BE; BI += 16) {
// gather-like cases of the form:		// gather-like cases of the form:
//		//
// ... = g[a[0] - b[0]] + g[a[1] - b[1]] + ...		// ... = g[a[0] - b[0]] + g[a[1] - b[1]] + ...
//		//
// where the loads of "a", the loads of "b", and the subtractions can be		// where the loads of "a", the loads of "b", and the subtractions can be
// performed in parallel. It's likely that detecting this pattern in a		// performed in parallel. It's likely that detecting this pattern in a
// bottom-up phase will be simpler and less costly than building a		// bottom-up phase will be simpler and less costly than building a
// full-blown top-down phase beginning at the consecutive loads.		// full-blown top-down phase beginning at the consecutive loads.
Changed \|= tryToVectorizeList(Bundle, R);		for (unsigned VecRegSize = MaxVecRegSize; VecRegSize >= MinVecRegSize;
		VecRegSize /= 2) {
		if (tryToVectorizeList(Bundle, R, VecRegSize)) {
		Changed = true;
		break;
		}
		}
}		}
}		}
return Changed;		return Changed;
}		}

bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) {		bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) {
bool Changed = false;		bool Changed = false;
// Attempt to sort and vectorize each of the store-groups.		// Attempt to sort and vectorize each of the store-groups.
for (StoreListMap::iterator it = Stores.begin(), e = Stores.end(); it != e;		for (StoreListMap::iterator it = Stores.begin(), e = Stores.end(); it != e;
++it) {		++it) {
if (it->second.size() < 2)		if (it->second.size() < 2)
continue;		continue;

DEBUG(dbgs() << "SLP: Analyzing a store chain of length "		DEBUG(dbgs() << "SLP: Analyzing a store chain of length "
<< it->second.size() << ".\n");		<< it->second.size() << ".\n");

// Process the stores in chunks of 16.		// Process the stores in chunks of 16.
// TODO: The limit of 16 inhibits greater vectorization factors.		// TODO: The limit of 16 inhibits greater vectorization factors.
// For example, AVX2 supports v32i8. Increasing this limit, however,		// For example, AVX2 supports v32i8. Increasing this limit, however,
// may cause a significant compile-time increase.		// may cause a significant compile-time increase.
for (unsigned CI = 0, CE = it->second.size(); CI < CE; CI+=16) {		for (unsigned CI = 0, CE = it->second.size(); CI < CE; CI+=16) {
unsigned Len = std::min<unsigned>(CE - CI, 16);		unsigned Len = std::min<unsigned>(CE - CI, 16);
Changed \|= vectorizeStores(makeArrayRef(&it->second[CI], Len),		Changed \|= vectorizeStores(makeArrayRef(&it->second[CI], Len), R);
		mzolotukhinUnsubmitted Not Done Reply Inline Actions `SLPCostThreshold` disappeared after this change. Was it intentional? mzolotukhin: `SLPCostThreshold` disappeared after this change. Was it intentional?
		mssimpsoUnsubmitted Not Done Reply Inline Actions SLPCostThreshold is a command line option, so it doesn't need to be passed as a function parameter. mssimpso: SLPCostThreshold is a command line option, so it doesn't need to be passed as a function…
		mzolotukhinUnsubmitted Not Done Reply Inline Actions Ah, right, thanks for pointing that out! mzolotukhin: Ah, right, thanks for pointing that out!
		JongwonLeeAuthorUnsubmitted Not Done Reply Inline Actions Yes. mssimpso is right. JongwonLee: Yes. mssimpso is right.
-SLPCostThreshold, R);
}		}
}		}
return Changed;		return Changed;
}		}

} // end anonymous namespace		} // end anonymous namespace

char SLPVectorizer::ID = 0;		char SLPVectorizer::ID = 0;
static const char lv_name[] = "SLP Vectorizer";		static const char lv_name[] = "SLP Vectorizer";
INITIALIZE_PASS_BEGIN(SLPVectorizer, SV_NAME, lv_name, false, false)		INITIALIZE_PASS_BEGIN(SLPVectorizer, SV_NAME, lv_name, false, false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)		INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)		INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)		INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)		INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)		INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(DemandedBits)		INITIALIZE_PASS_DEPENDENCY(DemandedBits)
INITIALIZE_PASS_END(SLPVectorizer, SV_NAME, lv_name, false, false)		INITIALIZE_PASS_END(SLPVectorizer, SV_NAME, lv_name, false, false)

namespace llvm {		namespace llvm {
Pass *createSLPVectorizerPass() { return new SLPVectorizer(); }		Pass *createSLPVectorizerPass() { return new SLPVectorizer(); }
}		}
		mcrosierUnsubmitted Done Reply Inline Actions Please remove. mcrosier: Please remove.
		mcrosierUnsubmitted Done Reply Inline Actions I don't think you need this temporary value. You can just do something like the below, correct? for (...) if (tryToVectorizeList()) { Changed = true; break; } mcrosier: I don't think you need this temporary value. You can just do something like the below, correct?

test/Transforms/SLPVectorizer/AArch64/slp-vectorized-from-max-to-min.ll

This file was added.

				;RUN: opt -S -slp-vectorizer -slp-max-reg-size=128 -slp-min-reg-size=64 -slp-threshold=-13 < %s \| FileCheck %s

				target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
				target triple = "aarch64--linux-gnu"

				; CHECK: @foo
				; CHECK: add nsw <2 x i64>
				; CHECK: add nsw <2 x i64>

				define i64 @foo(i64* nocapture readonly %a) #0 {
				entry:
				%idx1 = getelementptr inbounds i64, i64* %a, i64 1
				%idx2 = getelementptr inbounds i64, i64* %a, i64 2
				%idx3 = getelementptr inbounds i64, i64* %a, i64 3
				%idx4 = getelementptr inbounds i64, i64* %a, i64 4
				%idx5 = getelementptr inbounds i64, i64* %a, i64 5
				%idx6 = getelementptr inbounds i64, i64* %a, i64 6
				%idx7 = getelementptr inbounds i64, i64* %a, i64 7
				%0 = load i64, i64* %a, align 4
				%1 = load i64, i64* %idx1, align 4
				%2 = load i64, i64* %idx2, align 4
				%3 = load i64, i64* %idx3, align 4
				%4 = load i64, i64* %idx4, align 4
				%5 = load i64, i64* %idx5, align 4
				%6 = load i64, i64* %idx6, align 4
				%7 = load i64, i64* %idx7, align 4
				%add = add nsw i64 %1, %0
				%add1 = add nsw i64 %3, %2
				%add2 = add nsw i64 %5, %4
				%add3 = add nsw i64 %7, %6
				%add8 = add nsw i64 %add1, %add
				%add9 = add nsw i64 %add3, %add2
				%add12 = add nsw i64 %add9, %add8
				ret i64 %add12
				}

This is an archive of the discontinued LLVM Phabricator instance.

[SLPVectorizer] Try to vectorize in the range from MaxVecRegSize to MinVecRegSize
Needs ReviewPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 51388

lib/Transforms/Vectorize/SLPVectorizer.cpp

test/Transforms/SLPVectorizer/AArch64/slp-vectorized-from-max-to-min.ll

This is an archive of the discontinued LLVM Phabricator instance.

[SLPVectorizer] Try to vectorize in the range from MaxVecRegSize to MinVecRegSizeNeeds ReviewPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 51388

lib/Transforms/Vectorize/SLPVectorizer.cpp

test/Transforms/SLPVectorizer/AArch64/slp-vectorized-from-max-to-min.ll

[SLPVectorizer] Try to vectorize in the range from MaxVecRegSize to MinVecRegSize
Needs ReviewPublic