This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
llvm/lib/Transforms/Vectorize/
-
lib/
-
Transforms/
-
Vectorize/
-
SLPVectorizer.cpp

Differential D72689

[SLP] Revectorize partially vectorized instructions
AbandonedPublic

Authored by anton-afanasyev on Jan 14 2020, 2:36 AM.

Download Raw Diff

Details

Reviewers

RKSimon
ABataev
spatel
dtemirbulatov

Summary

This is just a draft with a lot of dirty hacks to show proof-of-concept
for the revectorization of partially vectorized instructions.

The main motivation comes from here: http://llvm.org/pr42022.
We need to merge store <2 x float>, store <2 x float> to store <4 x float>.
Such cases happen, for instance, when SLP-optimizing already SLP-optimized inlined
code.

The solution is to permit insertelement's as vectorization tree node, but this should
be done carefully: insertelement's couldn't be scheduled (they have inner deps).

Early feedback is appreciated since this unfinished work could concern another revisions
(like D44067 or PR35732).

This is follow-up of D70587 and D70924

Diff Detail

Repository

rG LLVM Github Monorepo

Build Status

Buildable 43917
Build 44973: arc lint + arc unit

Event Timeline

anton-afanasyev created this revision.Jan 14 2020, 2:36 AM

Herald added a project: Restricted Project. · View Herald TranscriptJan 14 2020, 2:36 AM

Herald added subscribers: llvm-commits, hiraditya. · View Herald Transcript

Harbormaster completed remote builds in B43917: Diff 237897.Jan 14 2020, 2:37 AM

I saw similar thing in recent research

https://www.nextgenvec.org/slides/revec-cc.pdf

They proposed a new compiler pass to perform revectorization.

Did you look at it? What do you think?

In D72689#1819130, @xbolva00 wrote:

I saw similar thing in recent research

https://www.nextgenvec.org/slides/revec-cc.pdf

They proposed a new compiler pass to perform revectorization.

Did you look at it? What do you think?

Thank you, I'm to learn it. There is another obvious solution for such task using special pass: just break partitial vectorization before SLP pass.

anton-afanasyev retitled this revision from [SLP] Vectorize partially vectorized instructions to [SLP] Revectorize partially vectorized instructions.Jan 16 2020, 1:27 AM

anton-afanasyev edited the summary of this revision. (Show Details)

The same work is resumed here: D98714, so abandoning this.

Revision Contents

Path

Size

llvm/

lib/

Transforms/

Vectorize/

SLPVectorizer.cpp

156 lines

Diff 237897

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 325 Lines • ▼ Show 20 Lines	isShuffle(ArrayRef<Value *> VL) {
// If we're not crossing lanes in different vectors, consider it as blending.		// If we're not crossing lanes in different vectors, consider it as blending.
if (CommonShuffleMode == Select && Vec2)		if (CommonShuffleMode == Select && Vec2)
return TargetTransformInfo::SK_Select;		return TargetTransformInfo::SK_Select;
// If Vec2 was never used, we have a permutation of a single vector, otherwise		// If Vec2 was never used, we have a permutation of a single vector, otherwise
// we have permutation of 2 vectors.		// we have permutation of 2 vectors.
return Vec2 ? TargetTransformInfo::SK_PermuteTwoSrc		return Vec2 ? TargetTransformInfo::SK_PermuteTwoSrc
: TargetTransformInfo::SK_PermuteSingleSrc;		: TargetTransformInfo::SK_PermuteSingleSrc;
}		}
		static bool findBuildAggregate(Value LastInsertInst, TargetTransformInfo TTI,
		SmallVectorImpl<Value *> &BuildVectorOpds,
		SmallVectorImpl<Value > AllInsertInsts,
		int &UserCost);

namespace {		namespace {

/// Main data required for vectorization of instructions.		/// Main data required for vectorization of instructions.
struct InstructionsState {		struct InstructionsState {
/// The very first instruction in the list with the main opcode.		/// The very first instruction in the list with the main opcode.
Value *OpValue = nullptr;		Value *OpValue = nullptr;

▲ Show 20 Lines • Show All 1,124 Lines • ▼ Show 20 Lines	private:
Instruction *MainOp = nullptr;		Instruction *MainOp = nullptr;
Instruction *AltOp = nullptr;		Instruction *AltOp = nullptr;

public:		public:
/// Set this bundle's \p OpIdx'th operand to \p OpVL.		/// Set this bundle's \p OpIdx'th operand to \p OpVL.
void setOperand(unsigned OpIdx, ArrayRef<Value *> OpVL) {		void setOperand(unsigned OpIdx, ArrayRef<Value *> OpVL) {
if (Operands.size() < OpIdx + 1)		if (Operands.size() < OpIdx + 1)
Operands.resize(OpIdx + 1);		Operands.resize(OpIdx + 1);
assert(Operands[OpIdx].size() == 0 && "Already resized?");		//assert(Operands[OpIdx].size() == 0 && "Already resized?");
Operands[OpIdx].resize(Scalars.size());		//Operands[OpIdx].resize(Scalars.size());
for (unsigned Lane = 0, E = Scalars.size(); Lane != E; ++Lane)		Operands[OpIdx].resize(OpVL.size());
		//for (unsigned Lane = 0, E = Scalars.size(); Lane != E; ++Lane)
		for (unsigned Lane = 0, E = OpVL.size(); Lane != E; ++Lane)
Operands[OpIdx][Lane] = OpVL[Lane];		Operands[OpIdx][Lane] = OpVL[Lane];
}		}

/// Set the operands of this bundle in their original order.		/// Set the operands of this bundle in their original order.
void setOperandsInOrder() {		void setOperandsInOrder() {
assert(Operands.empty() && "Already initialized?");		assert(Operands.empty() && "Already initialized?");
auto *I0 = cast<Instruction>(Scalars[0]);		auto *I0 = cast<Instruction>(Scalars[0]);
Operands.resize(I0->getNumOperands());		Operands.resize(I0->getNumOperands());
▲ Show 20 Lines • Show All 928 Lines • ▼ Show 20 Lines	void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
InstructionsState S = getSameOpcode(VL);		InstructionsState S = getSameOpcode(VL);
if (Depth == RecursionMaxDepth) {		if (Depth == RecursionMaxDepth) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n");		LLVM_DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n");
newTreeEntry(VL, None /not vectorized/, S, UserTreeIdx);		newTreeEntry(VL, None /not vectorized/, S, UserTreeIdx);
return;		return;
}		}

// Don't handle vectors.		// Don't handle vectors.
if (S.OpValue->getType()->isVectorTy()) {		// if (S.OpValue->getType()->isVectorTy()) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n");		// LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n");
newTreeEntry(VL, None /not vectorized/, S, UserTreeIdx);		// newTreeEntry(VL, None /not vectorized/, S, UserTreeIdx);
return;		// return;
}		// }

if (StoreInst *SI = dyn_cast<StoreInst>(S.OpValue))		// if (StoreInst *SI = dyn_cast<StoreInst>(S.OpValue))
if (SI->getValueOperand()->getType()->isVectorTy()) {		// if (SI->getValueOperand()->getType()->isVectorTy()) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to store vector type.\n");		// LLVM_DEBUG(dbgs() << "SLP: Gathering due to store vector type.\n");
newTreeEntry(VL, None /not vectorized/, S, UserTreeIdx);		// newTreeEntry(VL, None /not vectorized/, S, UserTreeIdx);
return;		// return;
}		// }

// If all of the operands are identical or constant we have a simple solution.		// If all of the operands are identical or constant we have a simple solution.
if (allConstant(VL) \|\| isSplat(VL) \|\| !allSameBlock(VL) \|\| !S.getOpcode()) {		if (allConstant(VL) \|\| isSplat(VL) \|\| !allSameBlock(VL) \|\| !S.getOpcode()) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n");		LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n");
newTreeEntry(VL, None /not vectorized/, S, UserTreeIdx);		newTreeEntry(VL, None /not vectorized/, S, UserTreeIdx);
return;		return;
}		}

▲ Show 20 Lines • Show All 186 Lines • ▼ Show 20 Lines	case Instruction::ExtractElement: {
return;		return;
}		}
LLVM_DEBUG(dbgs() << "SLP: Gather extract sequence.\n");		LLVM_DEBUG(dbgs() << "SLP: Gather extract sequence.\n");
newTreeEntry(VL, None /not vectorized/, S, UserTreeIdx,		newTreeEntry(VL, None /not vectorized/, S, UserTreeIdx,
ReuseShuffleIndicies);		ReuseShuffleIndicies);
BS.cancelScheduling(VL, VL0);		BS.cancelScheduling(VL, VL0);
return;		return;
}		}
		case Instruction::InsertElement: {
		int UserCost = 0;
		ValueList Inserts;
		ValueList Operands;
		for (Value *V : VL) {
		if (!findBuildAggregate(V, TTI, Operands, &Inserts, UserCost)) {
		BS.cancelScheduling(VL, VL0);
		newTreeEntry(VL, None /not vectorized/, S, UserTreeIdx,
		ReuseShuffleIndicies);
		LLVM_DEBUG(dbgs() << "SLP: Gathering insertelement's.\n");
		return;
		}
		}
		// Cancel scheduling of inserts before rescheduling
		BS.cancelScheduling(VL, VL0);
		UserTreeIdx.UserTE->setOperand(UserTreeIdx.EdgeIdx, Inserts);
		Bundle = BS.tryScheduleBundle(Inserts, this, S);
		// if (!Bundle) {
		// LLVM_DEBUG(dbgs() << "SLP: We are not able to schedule this bundle!\n");
		// assert((!BS.getScheduleData(VL0) \|\|
		// !BS.getScheduleData(VL0)->isPartOfBundle()) &&
		// "tryScheduleBundle should cancelScheduling on failure");
		// newTreeEntry(Inserts, None /not vectorized/, S, UserTreeIdx,
		// ReuseShuffleIndicies);
		// return;
		// }
		S = getSameOpcode(Inserts);
		// Bundle = None;
		TreeEntry TE = newTreeEntry(Inserts, Bundle /vectorized*/, S,
		UserTreeIdx, ReuseShuffleIndicies);
		TE->setOperandsInOrder();
		// for (Value *V : Inserts)
		// Operands.push_back(cast<Instruction>(V)->getOperand(1));
		buildTree_rec(Operands, Depth + 1, {TE, 0});
		return;



		// ValueList Inserts;
		// ValueList Operands;
		// for (Value *V : VL) {
		// Inserts.push_back(cast<Instruction>(V)->getOperand(0));
		// Operands.push_back(cast<Instruction>(V)->getOperand(1));
		// }

		// TreeEntry *TE =
		// newTreeEntry(VL, Bundle /vectorized/, S, UserTreeIdx,
		// ReuseShuffleIndicies);//, I->getFirst());
		// TE->setOperandsInOrder();
		// buildTree_rec(Inserts, Depth + 1, {TE, 0});
		// buildTree_rec(Operands, Depth + 1, {TE, 1});

		// return;

		}

		// LLVM_DEBUG(dbgs() << "SLP: Gather extract sequence.\n");
		// newTreeEntry(VL, None /not vectorized/, S, UserTreeIdx,
		// ReuseShuffleIndicies);
		// BS.cancelScheduling(VL, VL0);
		// return;

case Instruction::Load: {		case Instruction::Load: {
// Check that a vectorized load would load the same memory as a scalar		// Check that a vectorized load would load the same memory as a scalar
// load. For example, we don't want to vectorize loads that are smaller		// load. For example, we don't want to vectorize loads that are smaller
// than 8-bit. Even though we have a packed struct {<i2, i2, i2, i2>} LLVM		// than 8-bit. Even though we have a packed struct {<i2, i2, i2, i2>} LLVM
// treats loading/storing it as an i8 struct. If we vectorize loads/stores		// treats loading/storing it as an i8 struct. If we vectorize loads/stores
// from such a struct, we read/write packed bits disagreeing with the		// from such a struct, we read/write packed bits disagreeing with the
// unvectorized version.		// unvectorized version.
Type *ScalarTy = VL0->getType();		Type *ScalarTy = VL0->getType();
▲ Show 20 Lines • Show All 553 Lines • ▼ Show 20 Lines	return I->hasOneUse() \|\|
std::all_of(I->user_begin(), I->user_end(), [this](User *U) {		std::all_of(I->user_begin(), I->user_end(), [this](User *U) {
return ScalarToTreeEntry.count(U) > 0;		return ScalarToTreeEntry.count(U) > 0;
});		});
}		}

int BoUpSLP::getEntryCost(TreeEntry *E) {		int BoUpSLP::getEntryCost(TreeEntry *E) {
ArrayRef<Value*> VL = E->Scalars;		ArrayRef<Value*> VL = E->Scalars;

		if (isa<InsertElementInst>(VL[0]) \|\|
		isa<InsertValueInst>(VL[0]) \|\|
		isa<StoreInst>(VL[0]))
		return 0;

Type *ScalarTy = VL[0]->getType();		Type *ScalarTy = VL[0]->getType();
		int N = VL.size();
if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))		if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
ScalarTy = SI->getValueOperand()->getType();		ScalarTy = SI->getValueOperand()->getType();
else if (CmpInst *CI = dyn_cast<CmpInst>(VL[0]))		else if (CmpInst *CI = dyn_cast<CmpInst>(VL[0]))
ScalarTy = CI->getOperand(0)->getType();		ScalarTy = CI->getOperand(0)->getType();
VectorType *VecTy = VectorType::get(ScalarTy, VL.size());		VectorType *VecTy = VectorType::get(ScalarTy, N);

// If we have computed a smaller type for the expression, update VecTy so		// If we have computed a smaller type for the expression, update VecTy so
// that the costs will be accurate.		// that the costs will be accurate.
if (MinBWs.count(VL[0]))		if (MinBWs.count(VL[0]))
VecTy = VectorType::get(		VecTy = VectorType::get(
IntegerType::get(F->getContext(), MinBWs[VL[0]].first), VL.size());		IntegerType::get(F->getContext(), MinBWs[VL[0]].first), N);

unsigned ReuseShuffleNumbers = E->ReuseShuffleIndices.size();		unsigned ReuseShuffleNumbers = E->ReuseShuffleIndices.size();
bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();		bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
int ReuseShuffleCost = 0;		int ReuseShuffleCost = 0;
if (NeedToShuffleReuses) {		if (NeedToShuffleReuses) {
ReuseShuffleCost =		ReuseShuffleCost =
TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, VecTy);		TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
}		}
▲ Show 20 Lines • Show All 753 Lines • ▼ Show 20 Lines	if (TreeEntry *E = getTreeEntry(S.OpValue)) {
return V;		return V;
}		}
}		}
}		}

Type *ScalarTy = S.OpValue->getType();		Type *ScalarTy = S.OpValue->getType();
if (StoreInst *SI = dyn_cast<StoreInst>(S.OpValue))		if (StoreInst *SI = dyn_cast<StoreInst>(S.OpValue))
ScalarTy = SI->getValueOperand()->getType();		ScalarTy = SI->getValueOperand()->getType();
		if (isa<InsertElementInst>(S.OpValue)) {
		ValueList VL2;
		VL2.resize(VL.size());
		for (unsigned i = 0; i < VL.size(); ++i)
		if (auto *Insrt = dyn_cast<InsertElementInst>(VL[i]))
		VL2[i] = Insrt->getOperand(1);

		return vectorizeTree(VL2);
		}

// Check that every instruction appears once in this bundle.		// Check that every instruction appears once in this bundle.
SmallVector<unsigned, 4> ReuseShuffleIndicies;		SmallVector<unsigned, 4> ReuseShuffleIndicies;
SmallVector<Value *, 4> UniqueValues;		SmallVector<Value *, 4> UniqueValues;
if (VL.size() > 2) {		if (VL.size() > 2) {
DenseMap<Value *, unsigned> UniquePositions;		DenseMap<Value *, unsigned> UniquePositions;
for (Value *V : VL) {		for (Value *V : VL) {
auto Res = UniquePositions.try_emplace(V, UniqueValues.size());		auto Res = UniquePositions.try_emplace(V, UniqueValues.size());
Show All 37 Lines	Value BoUpSLP::vectorizeTree(TreeEntry E) {

if (E->VectorizedValue) {		if (E->VectorizedValue) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *E->Scalars[0] << ".\n");		LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *E->Scalars[0] << ".\n");
return E->VectorizedValue;		return E->VectorizedValue;
}		}

Instruction *VL0 = E->getMainOp();		Instruction *VL0 = E->getMainOp();
Type *ScalarTy = VL0->getType();		Type *ScalarTy = VL0->getType();
		int N = 1;
if (StoreInst *SI = dyn_cast<StoreInst>(VL0))		if (StoreInst *SI = dyn_cast<StoreInst>(VL0))
ScalarTy = SI->getValueOperand()->getType();		ScalarTy = SI->getValueOperand()->getType();
VectorType *VecTy = VectorType::get(ScalarTy, E->Scalars.size());		if (auto VectorTy = dyn_cast<VectorType>(ScalarTy)) {
		ScalarTy = VectorTy->getElementType();
		N = VectorTy->getNumElements();
		}
		VectorType VecTy = VectorType::get(ScalarTy, E->Scalars.size() N);

bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();		bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();

if (E->State == TreeEntry::NeedToGather) {		if (E->State == TreeEntry::NeedToGather && !isa<InsertElementInst>(VL0)) {
setInsertPointAfterBundle(E);		setInsertPointAfterBundle(E);
auto *V = Gather(E->Scalars, VecTy);		auto *V = Gather(E->Scalars, VecTy);
if (NeedToShuffleReuses) {		if (NeedToShuffleReuses) {
V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),		V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
E->ReuseShuffleIndices, "shuffle");		E->ReuseShuffleIndices, "shuffle");
if (auto *I = dyn_cast<Instruction>(V)) {		if (auto *I = dyn_cast<Instruction>(V)) {
GatherSeq.insert(I);		GatherSeq.insert(I);
CSEBlocks.insert(I->getParent());		CSEBlocks.insert(I->getParent());
▲ Show 20 Lines • Show All 1,187 Lines • ▼ Show 20 Lines	while (BundleMember) {
}		}
LastScheduledInst = pickedInst;		LastScheduledInst = pickedInst;
BundleMember = BundleMember->NextInBundle;		BundleMember = BundleMember->NextInBundle;
}		}

BS->schedule(picked, ReadyInsts);		BS->schedule(picked, ReadyInsts);
NumToSchedule--;		NumToSchedule--;
}		}
assert(NumToSchedule == 0 && "could not schedule all instructions");		//assert(NumToSchedule == 0 && "could not schedule all instructions");

// Avoid duplicate scheduling of the block.		// Avoid duplicate scheduling of the block.
BS->ScheduleStart = nullptr;		BS->ScheduleStart = nullptr;
}		}

unsigned BoUpSLP::getVectorElementSize(Value *V) const {		unsigned BoUpSLP::getVectorElementSize(Value *V) const {
// If V is a store, just return the width of the stored value without		// If V is a store, just return the width of the stored value without
// traversing the expression tree. This is the common case.		// traversing the expression tree. This is the common case.
▲ Show 20 Lines • Show All 555 Lines • ▼ Show 20 Lines	void SLPVectorizerPass::collectSeedInstructions(BasicBlock *BB) {
// Stores and GEPs according to the underlying objects of their pointer		// Stores and GEPs according to the underlying objects of their pointer
// operands.		// operands.
for (Instruction &I : *BB) {		for (Instruction &I : *BB) {
// Ignore store instructions that are volatile or have a pointer operand		// Ignore store instructions that are volatile or have a pointer operand
// that doesn't point to a scalar type.		// that doesn't point to a scalar type.
if (auto *SI = dyn_cast<StoreInst>(&I)) {		if (auto *SI = dyn_cast<StoreInst>(&I)) {
if (!SI->isSimple())		if (!SI->isSimple())
continue;		continue;
if (!isValidElementType(SI->getValueOperand()->getType()))		if (isValidElementType(SI->getValueOperand()->getType()) \|\|
continue;		isa<VectorType>(SI->getValueOperand()->getType()))
Stores[GetUnderlyingObject(SI->getPointerOperand(), *DL)].push_back(SI);		Stores[GetUnderlyingObject(SI->getPointerOperand(), *DL)].push_back(SI);
		continue;
}		}

// Ignore getelementptr instructions that have more than one index, a		// Ignore getelementptr instructions that have more than one index, a
// constant index, or a pointer operand that doesn't point to a scalar		// constant index, or a pointer operand that doesn't point to a scalar
// type.		// type.
else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {		else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
auto Idx = GEP->idx_begin()->get();		auto Idx = GEP->idx_begin()->get();
if (GEP->getNumIndices() > 1 \|\| isa<Constant>(Idx))		if (GEP->getNumIndices() > 1 \|\| isa<Constant>(Idx))
▲ Show 20 Lines • Show All 1,124 Lines • ▼ Show 20 Lines
/// {{float, float}, {float, float}}, [2 x {float, float}] and so on.		/// {{float, float}, {float, float}}, [2 x {float, float}] and so on.
/// See llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll for examples.		/// See llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll for examples.
///		///
/// Assume LastInsertInst is of InsertElementInst or InsertValueInst type.		/// Assume LastInsertInst is of InsertElementInst or InsertValueInst type.
///		///
/// \return true if it matches.		/// \return true if it matches.
static bool findBuildAggregate(Value LastInsertInst, TargetTransformInfo TTI,		static bool findBuildAggregate(Value LastInsertInst, TargetTransformInfo TTI,
SmallVectorImpl<Value *> &BuildVectorOpds,		SmallVectorImpl<Value *> &BuildVectorOpds,
		SmallVectorImpl<Value > AllInsertInsts,
int &UserCost) {		int &UserCost) {
assert((isa<InsertElementInst>(LastInsertInst) \|\|		assert((isa<InsertElementInst>(LastInsertInst) \|\|
isa<InsertValueInst>(LastInsertInst)) &&		isa<InsertValueInst>(LastInsertInst)) &&
"Expected insertelement or insertvalue instruction!");		"Expected insertelement or insertvalue instruction!");
UserCost = 0;		UserCost = 0;
do {		do {
Value *InsertedOperand;		Value *InsertedOperand;
		if (AllInsertInsts)
		AllInsertInsts->push_back(LastInsertInst);
if (auto *IE = dyn_cast<InsertElementInst>(LastInsertInst)) {		if (auto *IE = dyn_cast<InsertElementInst>(LastInsertInst)) {
InsertedOperand = IE->getOperand(1);		InsertedOperand = IE->getOperand(1);
LastInsertInst = IE->getOperand(0);		LastInsertInst = IE->getOperand(0);
if (auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2))) {		if (auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2))) {
UserCost += TTI->getVectorInstrCost(Instruction::InsertElement,		UserCost += TTI->getVectorInstrCost(Instruction::InsertElement,
IE->getType(), CI->getZExtValue());		IE->getType(), CI->getZExtValue());
}		}
} else {		} else {
auto *IV = cast<InsertValueInst>(LastInsertInst);		auto *IV = cast<InsertValueInst>(LastInsertInst);
InsertedOperand = IV->getInsertedValueOperand();		InsertedOperand = IV->getInsertedValueOperand();
LastInsertInst = IV->getAggregateOperand();		LastInsertInst = IV->getAggregateOperand();
}		}
if (isa<InsertElementInst>(InsertedOperand) \|\|		if (isa<InsertElementInst>(InsertedOperand) \|\|
isa<InsertValueInst>(InsertedOperand)) {		isa<InsertValueInst>(InsertedOperand)) {
int TmpUserCost;		if (!findBuildAggregate(InsertedOperand, TTI, BuildVectorOpds,
SmallVector<Value *, 8> TmpBuildVectorOpds;		AllInsertInsts, UserCost))
if (!findBuildAggregate(InsertedOperand, TTI, TmpBuildVectorOpds,		return false;
TmpUserCost))
return false;
BuildVectorOpds.append(TmpBuildVectorOpds.rbegin(),
TmpBuildVectorOpds.rend());
UserCost += TmpUserCost;
} else {		} else {
BuildVectorOpds.push_back(InsertedOperand);		BuildVectorOpds.push_back(InsertedOperand);
}		}
if (isa<UndefValue>(LastInsertInst))		if (isa<UndefValue>(LastInsertInst))
break;		break;
if ((!isa<InsertValueInst>(LastInsertInst) &&		if ((!isa<InsertValueInst>(LastInsertInst) &&
!isa<InsertElementInst>(LastInsertInst)) \|\|		!isa<InsertElementInst>(LastInsertInst)) \|\|
!LastInsertInst->hasOneUse())		!LastInsertInst->hasOneUse())
return false;		return false;
} while (true);		} while (true);
std::reverse(BuildVectorOpds.begin(), BuildVectorOpds.end());
return true;		return true;
}		}

static bool PhiTypeSorterFunc(Value V, Value V2) {		static bool PhiTypeSorterFunc(Value V, Value V2) {
return V->getType() < V2->getType();		return V->getType() < V2->getType();
}		}

/// Try and get a reduction value from a phi node.		/// Try and get a reduction value from a phi node.
▲ Show 20 Lines • Show All 154 Lines • ▼ Show 20 Lines
bool SLPVectorizerPass::vectorizeInsertValueInst(InsertValueInst *IVI,		bool SLPVectorizerPass::vectorizeInsertValueInst(InsertValueInst *IVI,
BasicBlock *BB, BoUpSLP &R) {		BasicBlock *BB, BoUpSLP &R) {
int UserCost = 0;		int UserCost = 0;
const DataLayout &DL = BB->getModule()->getDataLayout();		const DataLayout &DL = BB->getModule()->getDataLayout();
if (!R.canMapToVector(IVI->getType(), DL))		if (!R.canMapToVector(IVI->getType(), DL))
return false;		return false;

SmallVector<Value *, 16> BuildVectorOpds;		SmallVector<Value *, 16> BuildVectorOpds;
if (!findBuildAggregate(IVI, TTI, BuildVectorOpds, UserCost))		if (!findBuildAggregate(IVI, TTI, BuildVectorOpds, nullptr, UserCost))
return false;		return false;
		std::reverse(BuildVectorOpds.begin(), BuildVectorOpds.end());

LLVM_DEBUG(dbgs() << "SLP: array mappable to vector: " << *IVI << "\n");		LLVM_DEBUG(dbgs() << "SLP: array mappable to vector: " << *IVI << "\n");
// Aggregate value is unlikely to be processed in vector register, we need to		// Aggregate value is unlikely to be processed in vector register, we need to
// extract scalars into scalar registers, so NeedExtraction is set true.		// extract scalars into scalar registers, so NeedExtraction is set true.
return tryToVectorizeList(BuildVectorOpds, R, UserCost);		return tryToVectorizeList(BuildVectorOpds, R, UserCost);
}		}

bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI,		bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI,
BasicBlock *BB, BoUpSLP &R) {		BasicBlock *BB, BoUpSLP &R) {
int UserCost;		int UserCost = 0;
SmallVector<Value *, 16> BuildVectorOpds;		SmallVector<Value *, 16> BuildVectorOpds;
if (!findBuildAggregate(IEI, TTI, BuildVectorOpds, UserCost) \|\|		if (!findBuildAggregate(IEI, TTI, BuildVectorOpds, nullptr, UserCost) \|\|
(llvm::all_of(BuildVectorOpds,		(llvm::all_of(BuildVectorOpds,
[](Value *V) { return isa<ExtractElementInst>(V); }) &&		[](Value *V) { return isa<ExtractElementInst>(V); }) &&
isShuffle(BuildVectorOpds)))		isShuffle(BuildVectorOpds)))
return false;		return false;
		std::reverse(BuildVectorOpds.begin(), BuildVectorOpds.end());

// Vectorize starting with the build vector operands ignoring the BuildVector		// Vectorize starting with the build vector operands ignoring the BuildVector
// instructions for the purpose of scheduling and user extraction.		// instructions for the purpose of scheduling and user extraction.
return tryToVectorizeList(BuildVectorOpds, R, UserCost);		return tryToVectorizeList(BuildVectorOpds, R, UserCost);
}		}

bool SLPVectorizerPass::vectorizeCmpInst(CmpInst CI, BasicBlock BB,		bool SLPVectorizerPass::vectorizeCmpInst(CmpInst CI, BasicBlock BB,
BoUpSLP &R) {		BoUpSLP &R) {
▲ Show 20 Lines • Show All 282 Lines • Show Last 20 Lines