Diff 51142

lib/Transforms/Vectorize/SLPVectorizer.cpp

Show First 20 Lines • Show All 3,418 Lines • ▼ Show 20 Lines	bool runOnFunction(Function &F) override {
else		else
MaxVecRegSize = TTI->getRegisterBitWidth(true);		MaxVecRegSize = TTI->getRegisterBitWidth(true);

MinVecRegSize = MinVectorRegSizeOption;		MinVecRegSize = MinVectorRegSizeOption;

// Don't vectorize when the attribute NoImplicitFloat is used.		// Don't vectorize when the attribute NoImplicitFloat is used.
if (F.hasFnAttribute(Attribute::NoImplicitFloat))		if (F.hasFnAttribute(Attribute::NoImplicitFloat))
return false;		return false;

		mcrosierUnsubmitted Not Done Reply Inline Actions I'm thinking this should be a TTI hook, so each target can define the MinVecRegSize. mcrosier: I'm thinking this should be a TTI hook, so each target can define the MinVecRegSize.
		JongwonLeeAuthorUnsubmitted Not Done Reply Inline Actions I'll separate this from the current patch. The current patch will only handle the range of the size of vectorizable registers. JongwonLee: I'll separate this from the current patch. The current patch will only handle the range of the…
DEBUG(dbgs() << "SLP: Analyzing blocks in " << F.getName() << ".\n");		DEBUG(dbgs() << "SLP: Analyzing blocks in " << F.getName() << ".\n");

// Use the bottom up slp vectorizer to construct chains that start with		// Use the bottom up slp vectorizer to construct chains that start with
// store instructions.		// store instructions.
BoUpSLP R(&F, SE, TTI, TLI, AA, LI, DT, AC, DB, DL);		BoUpSLP R(&F, SE, TTI, TLI, AA, LI, DT, AC, DB, DL);

// A general note: the vectorizer must use BoUpSLP::eraseInstruction() to		// A general note: the vectorizer must use BoUpSLP::eraseInstruction() to
// delete instructions.		// delete instructions.
▲ Show 20 Lines • Show All 57 Lines • ▼ Show 20 Lines	private:
/// \brief Try to vectorize a chain that starts at two arithmetic instrs.		/// \brief Try to vectorize a chain that starts at two arithmetic instrs.
bool tryToVectorizePair(Value A, Value B, BoUpSLP &R);		bool tryToVectorizePair(Value A, Value B, BoUpSLP &R);

/// \brief Try to vectorize a list of operands.		/// \brief Try to vectorize a list of operands.
/// \@param BuildVector A list of users to ignore for the purpose of		/// \@param BuildVector A list of users to ignore for the purpose of
/// scheduling and that don't need extracting.		/// scheduling and that don't need extracting.
/// \returns true if a value was vectorized.		/// \returns true if a value was vectorized.
bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,		bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
ArrayRef<Value *> BuildVector = None,		ArrayRef<Value *> BuildVector = None,
		mssimpsoUnsubmitted Not Done Reply Inline Actions Unless I missed something, it looks to me like every use of tryToVectorizeList passes VecRegSize. Why make the parameter optional? mssimpso: Unless I missed something, it looks to me like every use of tryToVectorizeList passes…
bool allowReorder = false);		bool allowReorder = false, unsigned VecRegSize = 128);

/// \brief Try to vectorize a chain that may start at the operands of \V;		/// \brief Try to vectorize a chain that may start at the operands of \V;
bool tryToVectorize(BinaryOperator *V, BoUpSLP &R);		bool tryToVectorize(BinaryOperator *V, BoUpSLP &R);

/// \brief Vectorize the store instructions collected in Stores.		/// \brief Vectorize the store instructions collected in Stores.
bool vectorizeStoreChains(BoUpSLP &R);		bool vectorizeStoreChains(BoUpSLP &R);

/// \brief Vectorize the index computations of the getelementptr instructions		/// \brief Vectorize the index computations of the getelementptr instructions
▲ Show 20 Lines • Show All 191 Lines • ▼ Show 20 Lines	for (Instruction &I : *BB) {
}		}
}		}
}		}

bool SLPVectorizer::tryToVectorizePair(Value A, Value B, BoUpSLP &R) {		bool SLPVectorizer::tryToVectorizePair(Value A, Value B, BoUpSLP &R) {
if (!A \|\| !B)		if (!A \|\| !B)
return false;		return false;
Value *VL[] = { A, B };		Value *VL[] = { A, B };
return tryToVectorizeList(VL, R, None, true);		bool SuccessToVectorizeList = false;
		for (unsigned VecRegSize = MaxVecRegSize; VecRegSize >= MinVecRegSize;
		VecRegSize /= 2) {
		if (tryToVectorizeList(VL, R, None, true, VecRegSize)) {
		SuccessToVectorizeList = true;
		mcrosierUnsubmitted Done Reply Inline Actions Why not just return true here and remove the unnecessary temp variable? mcrosier: Why not just return true here and remove the unnecessary temp variable?
		break;
		}
		}
		return SuccessToVectorizeList;
		mcrosierUnsubmitted Done Reply Inline Actions return false (assuming you do the suggestion above). mcrosier: return false (assuming you do the suggestion above).
}		}

bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,		bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
ArrayRef<Value *> BuildVector,		ArrayRef<Value *> BuildVector,
bool allowReorder) {		bool allowReorder, unsigned VecRegSize) {
		mcrosierUnsubmitted Done Reply Inline Actions Maybe use VecRegSize, rather than Size here? mcrosier: Maybe use VecRegSize, rather than Size here?
if (VL.size() < 2)		if (VL.size() < 2)
return false;		return false;

DEBUG(dbgs() << "SLP: Vectorizing a list of length = " << VL.size() << ".\n");		DEBUG(dbgs() << "SLP: Vectorizing a list of length = " << VL.size() << ".\n");

// Check that all of the parts are scalar instructions of the same type.		// Check that all of the parts are scalar instructions of the same type.
Instruction *I0 = dyn_cast<Instruction>(VL[0]);		Instruction *I0 = dyn_cast<Instruction>(VL[0]);
if (!I0)		if (!I0)
return false;		return false;

unsigned Opcode0 = I0->getOpcode();		unsigned Opcode0 = I0->getOpcode();

// FIXME: Register size should be a parameter to this function, so we can
// try different vectorization factors.
unsigned Sz = R.getVectorElementSize(I0);		unsigned Sz = R.getVectorElementSize(I0);
unsigned VF = MinVecRegSize / Sz;		unsigned VF = VecRegSize / Sz;

for (Value *V : VL) {		for (Value *V : VL) {
Type *Ty = V->getType();		Type *Ty = V->getType();
if (!isValidElementType(Ty))		if (!isValidElementType(Ty))
return false;		return false;
Instruction *Inst = dyn_cast<Instruction>(V);		Instruction *Inst = dyn_cast<Instruction>(V);
if (!Inst \|\| Inst->getOpcode() != Opcode0)		if (!Inst \|\| Inst->getOpcode() != Opcode0)
return false;		return false;
}		}
		mzolotukhinUnsubmitted Done Reply Inline Actions Nitpick: I'd rather swap if and else blocks to avoid negation in the condition. mzolotukhin: Nitpick: I'd rather swap if and else blocks to avoid negation in the condition.

bool Changed = false;		bool Changed = false;
		mcrosierUnsubmitted Not Done Reply Inline Actions I believe you've addressed this fix me, correct? mcrosier: I believe you've addressed this fix me, correct?
		JongwonLeeAuthorUnsubmitted Not Done Reply Inline Actions Yes. The comment are removed. JongwonLee: Yes. The comment are removed.

// Keep track of values that were deleted by vectorizing in the loop below.		// Keep track of values that were deleted by vectorizing in the loop below.
SmallVector<WeakVH, 8> TrackValues(VL.begin(), VL.end());		SmallVector<WeakVH, 8> TrackValues(VL.begin(), VL.end());

for (unsigned i = 0, e = VL.size(); i < e; ++i) {		for (unsigned i = 0, e = VL.size(); i < e; ++i) {
unsigned OpsWidth = 0;		unsigned OpsWidth = 0;

if (i + VF > e)		if (i + VF > e)
		mzolotukhinUnsubmitted Done Reply Inline Actions I think this check should be combined with the one below: if (!isPowerOf2_32(OpsWidth) \|\| OpsWidth < 2) break; and it should be done independently on `vectorizeStoreChain` flag. mzolotukhin: I think this check should be combined with the one below: ``` if (!isPowerOf2_32(OpsWidth) \|\|…
OpsWidth = e - i;		OpsWidth = e - i;
else		else
OpsWidth = VF;		OpsWidth = VF;

if (!isPowerOf2_32(OpsWidth) \|\| OpsWidth < 2)		if (!isPowerOf2_32(OpsWidth) \|\| OpsWidth < 2)
break;		break;

// Check that a previous iteration of this loop did not delete the Value.		// Check that a previous iteration of this loop did not delete the Value.
▲ Show 20 Lines • Show All 172 Lines • ▼ Show 20 Lines	public:
unsigned MinVecRegSize;		unsigned MinVecRegSize;

HorizontalReduction(unsigned MinVecRegSize)		HorizontalReduction(unsigned MinVecRegSize)
: ReductionRoot(nullptr), ReductionPHI(nullptr), ReductionOpcode(0),		: ReductionRoot(nullptr), ReductionPHI(nullptr), ReductionOpcode(0),
ReducedValueOpcode(0), IsPairwiseReduction(false), ReduxWidth(0),		ReducedValueOpcode(0), IsPairwiseReduction(false), ReduxWidth(0),
MinVecRegSize(MinVecRegSize) {}		MinVecRegSize(MinVecRegSize) {}

/// \brief Try to find a reduction tree.		/// \brief Try to find a reduction tree.
bool matchAssociativeReduction(PHINode Phi, BinaryOperator B) {		bool matchAssociativeReduction(PHINode Phi, BinaryOperator B,
		unsigned VecRegSize) {
assert((!Phi \|\|		assert((!Phi \|\|
std::find(Phi->op_begin(), Phi->op_end(), B) != Phi->op_end()) &&		std::find(Phi->op_begin(), Phi->op_end(), B) != Phi->op_end()) &&
"Thi phi needs to use the binary operator");		"Thi phi needs to use the binary operator");

// We could have a initial reductions that is not an add.		// We could have a initial reductions that is not an add.
// r *= v1 + v2 + v3 + v4		// r *= v1 + v2 + v3 + v4
// In such a case start looking for a tree rooted in the first '+'.		// In such a case start looking for a tree rooted in the first '+'.
if (Phi) {		if (Phi) {
Show All 11 Lines	bool matchAssociativeReduction(PHINode Phi, BinaryOperator B,

Type *Ty = B->getType();		Type *Ty = B->getType();
if (!isValidElementType(Ty))		if (!isValidElementType(Ty))
return false;		return false;

const DataLayout &DL = B->getModule()->getDataLayout();		const DataLayout &DL = B->getModule()->getDataLayout();
ReductionOpcode = B->getOpcode();		ReductionOpcode = B->getOpcode();
ReducedValueOpcode = 0;		ReducedValueOpcode = 0;
// FIXME: Register size should be a parameter to this function, so we can		ReduxWidth = VecRegSize / DL.getTypeSizeInBits(Ty);
// try different vectorization factors.
ReduxWidth = MinVecRegSize / DL.getTypeSizeInBits(Ty);
ReductionRoot = B;		ReductionRoot = B;
ReductionPHI = Phi;		ReductionPHI = Phi;

if (ReduxWidth < 4)		if (ReduxWidth < 4)
return false;		return false;

// We currently only support adds.		// We currently only support adds.
if (ReductionOpcode != Instruction::Add &&		if (ReductionOpcode != Instruction::Add &&
ReductionOpcode != Instruction::FAdd)		ReductionOpcode != Instruction::FAdd)
return false;		return false;

// Post order traverse the reduction tree starting at B. We only handle true		// Post order traverse the reduction tree starting at B. We only handle true
// trees containing only binary operators or selects.		// trees containing only binary operators or selects.
SmallVector<std::pair<Instruction *, unsigned>, 32> Stack;		SmallVector<std::pair<Instruction *, unsigned>, 32> Stack;
		mcrosierUnsubmitted Done Reply Inline Actions Same. Remove FIXME. mcrosier: Same. Remove FIXME.
Stack.push_back(std::make_pair(B, 0));		Stack.push_back(std::make_pair(B, 0));
while (!Stack.empty()) {		while (!Stack.empty()) {
Instruction *TreeN = Stack.back().first;		Instruction *TreeN = Stack.back().first;
unsigned EdgeToVist = Stack.back().second++;		unsigned EdgeToVist = Stack.back().second++;
bool IsReducedValue = TreeN->getOpcode() != ReductionOpcode;		bool IsReducedValue = TreeN->getOpcode() != ReductionOpcode;

// Only handle trees in the current basic block.		// Only handle trees in the current basic block.
if (TreeN->getParent() != B->getParent())		if (TreeN->getParent() != B->getParent())
▲ Show 20 Lines • Show All 265 Lines • ▼ Show 20 Lines
/// \brief Attempt to reduce a horizontal reduction.		/// \brief Attempt to reduce a horizontal reduction.
/// If it is legal to match a horizontal reduction feeding		/// If it is legal to match a horizontal reduction feeding
/// the phi node P with reduction operators BI, then check if it		/// the phi node P with reduction operators BI, then check if it
/// can be done.		/// can be done.
/// \returns true if a horizontal reduction was matched and reduced.		/// \returns true if a horizontal reduction was matched and reduced.
/// \returns false if a horizontal reduction was not matched.		/// \returns false if a horizontal reduction was not matched.
static bool canMatchHorizontalReduction(PHINode P, BinaryOperator BI,		static bool canMatchHorizontalReduction(PHINode P, BinaryOperator BI,
BoUpSLP &R, TargetTransformInfo *TTI,		BoUpSLP &R, TargetTransformInfo *TTI,
unsigned MinRegSize) {		unsigned MinRegSize) {
		mssimpsoUnsubmitted Not Done Reply Inline Actions I think it would be less confusing and more consistent if MinRegSize was renamed to VecRegSize here. mssimpso: I think it would be less confusing and more consistent if MinRegSize was renamed to VecRegSize…
if (!ShouldVectorizeHor)		if (!ShouldVectorizeHor)
return false;		return false;

HorizontalReduction HorRdx(MinRegSize);		HorizontalReduction HorRdx(MinRegSize);
if (!HorRdx.matchAssociativeReduction(P, BI))		if (!HorRdx.matchAssociativeReduction(P, BI, MinRegSize))
return false;		return false;

// If there is a sufficient number of reduction values, reduce		// If there is a sufficient number of reduction values, reduce
// to a nearby power-of-2. Can safely generate oversized		// to a nearby power-of-2. Can safely generate oversized
// vectors and rely on the backend to split them to legal sizes.		// vectors and rely on the backend to split them to legal sizes.
HorRdx.ReduxWidth =		HorRdx.ReduxWidth =
std::max((uint64_t)4, PowerOf2Floor(HorRdx.numReductionValues()));		std::max((uint64_t)4, PowerOf2Floor(HorRdx.numReductionValues()));

Show All 34 Lines	for (SmallVector<Value *, 4>::iterator IncIt = Incoming.begin(),
(SameTypeIt)->getType() == (IncIt)->getType()) {		(SameTypeIt)->getType() == (IncIt)->getType()) {
VisitedInstrs.insert(*SameTypeIt);		VisitedInstrs.insert(*SameTypeIt);
++SameTypeIt;		++SameTypeIt;
}		}

// Try to vectorize them.		// Try to vectorize them.
unsigned NumElts = (SameTypeIt - IncIt);		unsigned NumElts = (SameTypeIt - IncIt);
DEBUG(errs() << "SLP: Trying to vectorize starting at PHIs (" << NumElts << ")\n");		DEBUG(errs() << "SLP: Trying to vectorize starting at PHIs (" << NumElts << ")\n");
if (NumElts > 1 && tryToVectorizeList(makeArrayRef(IncIt, NumElts), R)) {		bool SuccessToVectorizeList = false;
		for (unsigned Size = MaxVecRegSize; Size >= MinVecRegSize; Size /= 2) {
		if (tryToVectorizeList(makeArrayRef(IncIt, NumElts), R, None, false,
		mcrosierUnsubmitted Not Done Reply Inline Actions Shouldn't the call to tryToVectorizeList() still be predicated on NumElts > 1? mcrosier: Shouldn't the call to tryToVectorizeList() still be predicated on NumElts > 1?
		JongwonLeeAuthorUnsubmitted Not Done Reply Inline Actions Fixed the code to call tryToVectorizeList() when NumElts > 1 is satisfied. JongwonLee: Fixed the code to call tryToVectorizeList() when NumElts > 1 is satisfied.
		Size)) {
		SuccessToVectorizeList = true;
		break;
		}
		}
		if (NumElts > 1 && SuccessToVectorizeList) {
// Success start over because instructions might have been changed.		// Success start over because instructions might have been changed.
HaveVectorizedPhiNodes = true;		HaveVectorizedPhiNodes = true;
Changed = true;		Changed = true;
break;		break;
}		}
		mcrosierUnsubmitted Not Done Reply Inline Actions No that this logic is cleaned up you don't need the temporary bool. for (unsigned VecRegSize = MaxVecRegSize; VecRegSize >= MinVecRegSize; VecRegSize /= 2) { if (tryToVectorizeList(makeArrayRef(IncIt, NumElts), R, None, false, VecRegSize)) { // Success start over because instructions might have been changed. HaveVectorizedPhiNodes = true; Changed = true; break; } } mcrosier: No that this logic is cleaned up you don't need the temporary bool. for (unsigned VecRegSize…
		JongwonLeeAuthorUnsubmitted Not Done Reply Inline Actions Removed the temporary bool. JongwonLee: Removed the temporary bool.

// Start over at the next instruction of a different type (or the end).		// Start over at the next instruction of a different type (or the end).
IncIt = SameTypeIt;		IncIt = SameTypeIt;
}		}
		mcrosierUnsubmitted Done Reply Inline Actions Maybe use VecRegSize, rather than Size here? mcrosier: Maybe use VecRegSize, rather than Size here?
}		}

VisitedInstrs.clear();		VisitedInstrs.clear();

for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; it++) {		for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; it++) {
// We may go through BB multiple times so skip the one we have checked.		// We may go through BB multiple times so skip the one we have checked.
if (!VisitedInstrs.insert(&*it).second)		if (!VisitedInstrs.insert(&*it).second)
continue;		continue;
Show All 10 Lines	if (PHINode *P = dyn_cast<PHINode>(it)) {
Value *Rdx = getReductionValue(DT, P, BB, LI);		Value *Rdx = getReductionValue(DT, P, BB, LI);

// Check if this is a Binary Operator.		// Check if this is a Binary Operator.
BinaryOperator *BI = dyn_cast_or_null<BinaryOperator>(Rdx);		BinaryOperator *BI = dyn_cast_or_null<BinaryOperator>(Rdx);
if (!BI)		if (!BI)
continue;		continue;

// Try to match and vectorize a horizontal reduction.		// Try to match and vectorize a horizontal reduction.
if (canMatchHorizontalReduction(P, BI, R, TTI, MinVecRegSize)) {		bool SuccessToMatchHorizontalReduction = false;
		for (unsigned VecRegSize = MaxVecRegSize; VecRegSize >= MinVecRegSize;
		VecRegSize /= 2) {
		if (canMatchHorizontalReduction(P, BI, R, TTI, VecRegSize)) {
		SuccessToMatchHorizontalReduction = true;
		break;
		}
		}
		if (SuccessToMatchHorizontalReduction) {
Changed = true;		Changed = true;
it = BB->begin();		it = BB->begin();
e = BB->end();		e = BB->end();
continue;		continue;
}		}

Value *Inst = BI->getOperand(0);		Value *Inst = BI->getOperand(0);
if (Inst == P)		if (Inst == P)
Show All 10 Lines	Value *Inst = BI->getOperand(0);

continue;		continue;
}		}

if (ShouldStartVectorizeHorAtStore)		if (ShouldStartVectorizeHorAtStore)
if (StoreInst *SI = dyn_cast<StoreInst>(it))		if (StoreInst *SI = dyn_cast<StoreInst>(it))
if (BinaryOperator *BinOp =		if (BinaryOperator *BinOp =
dyn_cast<BinaryOperator>(SI->getValueOperand())) {		dyn_cast<BinaryOperator>(SI->getValueOperand())) {
		bool SuccessToMatchHorizontalReduction = false;
		for (unsigned VecRegSize = MaxVecRegSize; VecRegSize >= MinVecRegSize;
		VecRegSize /= 2) {
if (canMatchHorizontalReduction(nullptr, BinOp, R, TTI,		if (canMatchHorizontalReduction(nullptr, BinOp, R, TTI,
MinVecRegSize) \|\|		VecRegSize)) {
tryToVectorize(BinOp, R)) {		SuccessToMatchHorizontalReduction = true;
		break;
		}
		}
		if (SuccessToMatchHorizontalReduction \|\| tryToVectorize(BinOp, R)) {
Changed = true;		Changed = true;
it = BB->begin();		it = BB->begin();
e = BB->end();		e = BB->end();
continue;		continue;
}		}
}		}
		mcrosierUnsubmitted Done Reply Inline Actions I assume this should be deleted, rather than commented out. mcrosier: I assume this should be deleted, rather than commented out.

// Try to vectorize horizontal reductions feeding into a return.		// Try to vectorize horizontal reductions feeding into a return.
if (ReturnInst *RI = dyn_cast<ReturnInst>(it))		if (ReturnInst *RI = dyn_cast<ReturnInst>(it))
if (RI->getNumOperands() != 0)		if (RI->getNumOperands() != 0)
if (BinaryOperator *BinOp =		if (BinaryOperator *BinOp =
dyn_cast<BinaryOperator>(RI->getOperand(0))) {		dyn_cast<BinaryOperator>(RI->getOperand(0))) {
DEBUG(dbgs() << "SLP: Found a return to vectorize.\n");		DEBUG(dbgs() << "SLP: Found a return to vectorize.\n");
if (tryToVectorizePair(BinOp->getOperand(0),		if (tryToVectorizePair(BinOp->getOperand(0),
Show All 36 Lines	if (InsertElementInst *FirstInsertElem = dyn_cast<InsertElementInst>(it)) {
SmallVector<Value *, 16> BuildVector;		SmallVector<Value *, 16> BuildVector;
SmallVector<Value *, 16> BuildVectorOpds;		SmallVector<Value *, 16> BuildVectorOpds;
if (!findBuildVector(FirstInsertElem, BuildVector, BuildVectorOpds))		if (!findBuildVector(FirstInsertElem, BuildVector, BuildVectorOpds))
continue;		continue;

// Vectorize starting with the build vector operands ignoring the		// Vectorize starting with the build vector operands ignoring the
// BuildVector instructions for the purpose of scheduling and user		// BuildVector instructions for the purpose of scheduling and user
// extraction.		// extraction.
if (tryToVectorizeList(BuildVectorOpds, R, BuildVector)) {		for (unsigned VecRegSize = MaxVecRegSize; VecRegSize >= MinVecRegSize;
		VecRegSize /= 2) {
		if (tryToVectorizeList(BuildVectorOpds, R, BuildVector, false,
		VecRegSize)) {
Changed = true;		Changed = true;
it = BB->begin();		it = BB->begin();
e = BB->end();		e = BB->end();
		break;
		}
}		}

continue;		continue;
}		}
}		}

return Changed;		return Changed;
}		}

bool SLPVectorizer::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) {		bool SLPVectorizer::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) {
		mcrosierUnsubmitted Done Reply Inline Actions I don't think you need the temporary variable. for () { if (tryToVectorizeList()) { Changed = true; it = BB->begin(); e = BB->end(); break; } } mcrosier: I don't think you need the temporary variable. for () { if (tryToVectorizeList()) {…
auto Changed = false;		auto Changed = false;
for (auto &Entry : GEPs) {		for (auto &Entry : GEPs) {

// If the getelementptr list has fewer than two elements, there's nothing		// If the getelementptr list has fewer than two elements, there's nothing
// to do.		// to do.
if (Entry.second.size() < 2)		if (Entry.second.size() < 2)
continue;		continue;

▲ Show 20 Lines • Show All 60 Lines • ▼ Show 20 Lines	for (unsigned BI = 0, BE = Entry.second.size(); BI < BE; BI += 16) {
// gather-like cases of the form:		// gather-like cases of the form:
//		//
// ... = g[a[0] - b[0]] + g[a[1] - b[1]] + ...		// ... = g[a[0] - b[0]] + g[a[1] - b[1]] + ...
//		//
// where the loads of "a", the loads of "b", and the subtractions can be		// where the loads of "a", the loads of "b", and the subtractions can be
// performed in parallel. It's likely that detecting this pattern in a		// performed in parallel. It's likely that detecting this pattern in a
// bottom-up phase will be simpler and less costly than building a		// bottom-up phase will be simpler and less costly than building a
// full-blown top-down phase beginning at the consecutive loads.		// full-blown top-down phase beginning at the consecutive loads.
Changed \|= tryToVectorizeList(Bundle, R);		for (unsigned VecRegSize = MaxVecRegSize; VecRegSize >= MinVecRegSize;
		VecRegSize /= 2) {
		if (tryToVectorizeList(Bundle, R, None, false, VecRegSize)) {
		Changed = true;
		break;
		}
		}
}		}
}		}
return Changed;		return Changed;
}		}

bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) {		bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) {
bool Changed = false;		bool Changed = false;
// Attempt to sort and vectorize each of the store-groups.		// Attempt to sort and vectorize each of the store-groups.
for (StoreListMap::iterator it = Stores.begin(), e = Stores.end(); it != e;		for (StoreListMap::iterator it = Stores.begin(), e = Stores.end(); it != e;
++it) {		++it) {
if (it->second.size() < 2)		if (it->second.size() < 2)
continue;		continue;

DEBUG(dbgs() << "SLP: Analyzing a store chain of length "		DEBUG(dbgs() << "SLP: Analyzing a store chain of length "
		mcrosierUnsubmitted Done Reply Inline Actions Please remove. mcrosier: Please remove.
<< it->second.size() << ".\n");		<< it->second.size() << ".\n");
		mcrosierUnsubmitted Done Reply Inline Actions I don't think you need this temporary value. You can just do something like the below, correct? for (...) if (tryToVectorizeList()) { Changed = true; break; } mcrosier: I don't think you need this temporary value. You can just do something like the below, correct?

// Process the stores in chunks of 16.		// Process the stores in chunks of 16.
// TODO: The limit of 16 inhibits greater vectorization factors.		// TODO: The limit of 16 inhibits greater vectorization factors.
// For example, AVX2 supports v32i8. Increasing this limit, however,		// For example, AVX2 supports v32i8. Increasing this limit, however,
// may cause a significant compile-time increase.		// may cause a significant compile-time increase.
for (unsigned CI = 0, CE = it->second.size(); CI < CE; CI+=16) {		for (unsigned CI = 0, CE = it->second.size(); CI < CE; CI+=16) {
unsigned Len = std::min<unsigned>(CE - CI, 16);		unsigned Len = std::min<unsigned>(CE - CI, 16);
Changed \|= vectorizeStores(makeArrayRef(&it->second[CI], Len),		Changed \|= vectorizeStores(makeArrayRef(&it->second[CI], Len),
-SLPCostThreshold, R);		-SLPCostThreshold, R);
		mzolotukhinUnsubmitted Not Done Reply Inline Actions `SLPCostThreshold` disappeared after this change. Was it intentional? mzolotukhin: `SLPCostThreshold` disappeared after this change. Was it intentional?
		mssimpsoUnsubmitted Not Done Reply Inline Actions SLPCostThreshold is a command line option, so it doesn't need to be passed as a function parameter. mssimpso: SLPCostThreshold is a command line option, so it doesn't need to be passed as a function…
		mzolotukhinUnsubmitted Not Done Reply Inline Actions Ah, right, thanks for pointing that out! mzolotukhin: Ah, right, thanks for pointing that out!
		JongwonLeeAuthorUnsubmitted Not Done Reply Inline Actions Yes. mssimpso is right. JongwonLee: Yes. mssimpso is right.
}		}
}		}
return Changed;		return Changed;
}		}

} // end anonymous namespace		} // end anonymous namespace

char SLPVectorizer::ID = 0;		char SLPVectorizer::ID = 0;
Show All 13 Lines

test/Transforms/SLPVectorizer/AArch64/slp-vectorized-from-max-to-min.ll

This file was added.

				;RUN: opt -S -slp-vectorizer -slp-max-reg-size=128 -slp-min-reg-size=64 -slp-threshold=-13 < %s \| FileCheck %s

				target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
				target triple = "aarch64--linux-gnu"

				; CHECK: @foo
				; CHECK: add nsw <2 x i64>
				; CHECK: add nsw <2 x i64>

				define i64 @foo(i64* nocapture readonly %a) #0 {
				entry:
				%idx1 = getelementptr inbounds i64, i64* %a, i64 1
				%idx2 = getelementptr inbounds i64, i64* %a, i64 2
				%idx3 = getelementptr inbounds i64, i64* %a, i64 3
				%idx4 = getelementptr inbounds i64, i64* %a, i64 4
				%idx5 = getelementptr inbounds i64, i64* %a, i64 5
				%idx6 = getelementptr inbounds i64, i64* %a, i64 6
				%idx7 = getelementptr inbounds i64, i64* %a, i64 7
				%0 = load i64, i64* %a, align 4
				%1 = load i64, i64* %idx1, align 4
				%2 = load i64, i64* %idx2, align 4
				%3 = load i64, i64* %idx3, align 4
				%4 = load i64, i64* %idx4, align 4
				%5 = load i64, i64* %idx5, align 4
				%6 = load i64, i64* %idx6, align 4
				%7 = load i64, i64* %idx7, align 4
				%add = add nsw i64 %1, %0
				%add1 = add nsw i64 %3, %2
				%add2 = add nsw i64 %5, %4
				%add3 = add nsw i64 %7, %6
				%add8 = add nsw i64 %add1, %add
				%add9 = add nsw i64 %add3, %add2
				%add12 = add nsw i64 %add9, %add8
				ret i64 %add12
				}

This is an archive of the discontinued LLVM Phabricator instance.

[SLPVectorizer] Try to vectorize in the range from MaxVecRegSize to MinVecRegSize
Needs ReviewPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 51142

lib/Transforms/Vectorize/SLPVectorizer.cpp

test/Transforms/SLPVectorizer/AArch64/slp-vectorized-from-max-to-min.ll

This is an archive of the discontinued LLVM Phabricator instance.

[SLPVectorizer] Try to vectorize in the range from MaxVecRegSize to MinVecRegSizeNeeds ReviewPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 51142

lib/Transforms/Vectorize/SLPVectorizer.cpp

test/Transforms/SLPVectorizer/AArch64/slp-vectorized-from-max-to-min.ll

[SLPVectorizer] Try to vectorize in the range from MaxVecRegSize to MinVecRegSize
Needs ReviewPublic