Diff 109983

llvm/trunk/include/llvm/Transforms/Vectorize/SLPVectorizer.h

Show First 20 Lines • Show All 94 Lines • ▼ Show 20 Lines	private:
bool vectorizeGEPIndices(BasicBlock *BB, slpvectorizer::BoUpSLP &R);		bool vectorizeGEPIndices(BasicBlock *BB, slpvectorizer::BoUpSLP &R);

/// Try to find horizontal reduction or otherwise vectorize a chain of binary		/// Try to find horizontal reduction or otherwise vectorize a chain of binary
/// operators.		/// operators.
bool vectorizeRootInstruction(PHINode P, Value V, BasicBlock *BB,		bool vectorizeRootInstruction(PHINode P, Value V, BasicBlock *BB,
slpvectorizer::BoUpSLP &R,		slpvectorizer::BoUpSLP &R,
TargetTransformInfo *TTI);		TargetTransformInfo *TTI);

		/// Try to vectorize trees that start at insertvalue instructions.
		bool vectorizeInsertValueInst(InsertValueInst IVI, BasicBlock BB,
		slpvectorizer::BoUpSLP &R);
		/// Try to vectorize trees that start at insertelement instructions.
		bool vectorizeInsertElementInst(InsertElementInst IEI, BasicBlock BB,
		slpvectorizer::BoUpSLP &R);
		/// Try to vectorize trees that start at compare instructions.
		bool vectorizeCmpInst(CmpInst CI, BasicBlock BB, slpvectorizer::BoUpSLP &R);
		/// Tries to vectorize constructs started from CmpInst, InsertValueInst or
		/// InsertElementInst instructions.
		bool vectorizeSimpleInstructions(SmallVectorImpl<WeakVH> &Instructions,
		BasicBlock *BB, slpvectorizer::BoUpSLP &R);

/// \brief Scan the basic block and look for patterns that are likely to start		/// \brief Scan the basic block and look for patterns that are likely to start
/// a vectorization chain.		/// a vectorization chain.
bool vectorizeChainsInBlock(BasicBlock *BB, slpvectorizer::BoUpSLP &R);		bool vectorizeChainsInBlock(BasicBlock *BB, slpvectorizer::BoUpSLP &R);

bool vectorizeStoreChain(ArrayRef<Value *> Chain, slpvectorizer::BoUpSLP &R,		bool vectorizeStoreChain(ArrayRef<Value *> Chain, slpvectorizer::BoUpSLP &R,
unsigned VecRegSize);		unsigned VecRegSize);

bool vectorizeStores(ArrayRef<StoreInst *> Stores, slpvectorizer::BoUpSLP &R);		bool vectorizeStores(ArrayRef<StoreInst *> Stores, slpvectorizer::BoUpSLP &R);
Show All 10 Lines

llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp

Show First 20 Lines • Show All 4,381 Lines • ▼ Show 20 Lines	bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,

return Changed;		return Changed;
}		}

bool SLPVectorizerPass::tryToVectorize(Instruction *I, BoUpSLP &R) {		bool SLPVectorizerPass::tryToVectorize(Instruction *I, BoUpSLP &R) {
if (!I)		if (!I)
return false;		return false;

if (!isa<BinaryOperator>(I))		if (!isa<BinaryOperator>(I) && !isa<CmpInst>(I))
return false;		return false;

Value *P = I->getParent();		Value *P = I->getParent();

// Vectorize in current basic block only.		// Vectorize in current basic block only.
auto *Op0 = dyn_cast<Instruction>(I->getOperand(0));		auto *Op0 = dyn_cast<Instruction>(I->getOperand(0));
auto *Op1 = dyn_cast<Instruction>(I->getOperand(1));		auto *Op1 = dyn_cast<Instruction>(I->getOperand(1));
if (!Op0 \|\| !Op1 \|\| Op0->getParent() != P \|\| Op1->getParent() != P)		if (!Op0 \|\| !Op1 \|\| Op0->getParent() != P \|\| Op1->getParent() != P)
▲ Show 20 Lines • Show All 521 Lines • ▼ Show 20 Lines
};		};
} // end anonymous namespace		} // end anonymous namespace

/// \brief Recognize construction of vectors like		/// \brief Recognize construction of vectors like
/// %ra = insertelement <4 x float> undef, float %s0, i32 0		/// %ra = insertelement <4 x float> undef, float %s0, i32 0
/// %rb = insertelement <4 x float> %ra, float %s1, i32 1		/// %rb = insertelement <4 x float> %ra, float %s1, i32 1
/// %rc = insertelement <4 x float> %rb, float %s2, i32 2		/// %rc = insertelement <4 x float> %rb, float %s2, i32 2
/// %rd = insertelement <4 x float> %rc, float %s3, i32 3		/// %rd = insertelement <4 x float> %rc, float %s3, i32 3
		/// starting from the last insertelement instruction.
///		///
/// Returns true if it matches		/// Returns true if it matches
///		///
static bool findBuildVector(InsertElementInst *FirstInsertElem,		static bool findBuildVector(InsertElementInst *LastInsertElem,
SmallVectorImpl<Value *> &BuildVector,		SmallVectorImpl<Value *> &BuildVector,
SmallVectorImpl<Value *> &BuildVectorOpds) {		SmallVectorImpl<Value *> &BuildVectorOpds) {
if (!isa<UndefValue>(FirstInsertElem->getOperand(0)))		Value *V = nullptr;
return false;		do {
		BuildVector.push_back(LastInsertElem);
InsertElementInst *IE = FirstInsertElem;		BuildVectorOpds.push_back(LastInsertElem->getOperand(1));
while (true) {		V = LastInsertElem->getOperand(0);
BuildVector.push_back(IE);		if (isa<UndefValue>(V))
BuildVectorOpds.push_back(IE->getOperand(1));		break;
		LastInsertElem = dyn_cast<InsertElementInst>(V);
if (IE->use_empty())		if (!LastInsertElem \|\| !LastInsertElem->hasOneUse())
return false;		return false;
		} while (true);
InsertElementInst *NextUse = dyn_cast<InsertElementInst>(IE->user_back());		std::reverse(BuildVector.begin(), BuildVector.end());
if (!NextUse)		std::reverse(BuildVectorOpds.begin(), BuildVectorOpds.end());
return true;		return true;

// If this isn't the final use, make sure the next insertelement is the only
// use. It's OK if the final constructed vector is used multiple times
if (!IE->hasOneUse())
return false;

IE = NextUse;
}

return false;
}		}

/// \brief Like findBuildVector, but looks backwards for construction of aggregate.		/// \brief Like findBuildVector, but looks for construction of aggregate.
///		///
/// \return true if it matches.		/// \return true if it matches.
static bool findBuildAggregate(InsertValueInst *IV,		static bool findBuildAggregate(InsertValueInst *IV,
SmallVectorImpl<Value *> &BuildVector,		SmallVectorImpl<Value *> &BuildVector,
SmallVectorImpl<Value *> &BuildVectorOpds) {		SmallVectorImpl<Value *> &BuildVectorOpds) {
Value *V;		Value *V;
do {		do {
BuildVector.push_back(IV);		BuildVector.push_back(IV);
▲ Show 20 Lines • Show All 168 Lines • ▼ Show 20 Lines	bool SLPVectorizerPass::vectorizeRootInstruction(PHINode P, Value V,
// Try to match and vectorize a horizontal reduction.		// Try to match and vectorize a horizontal reduction.
auto &&ExtraVectorization = [this](Instruction *I, BoUpSLP &R) -> bool {		auto &&ExtraVectorization = [this](Instruction *I, BoUpSLP &R) -> bool {
return tryToVectorize(I, R);		return tryToVectorize(I, R);
};		};
return tryToVectorizeHorReductionOrInstOperands(P, I, BB, R, TTI,		return tryToVectorizeHorReductionOrInstOperands(P, I, BB, R, TTI,
ExtraVectorization);		ExtraVectorization);
}		}

		bool SLPVectorizerPass::vectorizeInsertValueInst(InsertValueInst *IVI,
		BasicBlock *BB, BoUpSLP &R) {
		const DataLayout &DL = BB->getModule()->getDataLayout();
		if (!R.canMapToVector(IVI->getType(), DL))
		return false;

		SmallVector<Value *, 16> BuildVector;
		SmallVector<Value *, 16> BuildVectorOpds;
		if (!findBuildAggregate(IVI, BuildVector, BuildVectorOpds))
		return false;

		DEBUG(dbgs() << "SLP: array mappable to vector: " << *IVI << "\n");
		return tryToVectorizeList(BuildVectorOpds, R, BuildVector, false);
		}

		bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI,
		BasicBlock *BB, BoUpSLP &R) {
		SmallVector<Value *, 16> BuildVector;
		SmallVector<Value *, 16> BuildVectorOpds;
		if (!findBuildVector(IEI, BuildVector, BuildVectorOpds))
		return false;

		// Vectorize starting with the build vector operands ignoring the BuildVector
		// instructions for the purpose of scheduling and user extraction.
		return tryToVectorizeList(BuildVectorOpds, R, BuildVector);
		}

		bool SLPVectorizerPass::vectorizeCmpInst(CmpInst CI, BasicBlock BB,
		BoUpSLP &R) {
		if (tryToVectorizePair(CI->getOperand(0), CI->getOperand(1), R))
		return true;

		bool OpsChanged = false;
		for (int Idx = 0; Idx < 2; ++Idx) {
		OpsChanged \|=
		vectorizeRootInstruction(nullptr, CI->getOperand(Idx), BB, R, TTI);
		}
		return OpsChanged;
		}

		bool SLPVectorizerPass::vectorizeSimpleInstructions(
		SmallVectorImpl<WeakVH> &Instructions, BasicBlock *BB, BoUpSLP &R) {
		bool OpsChanged = false;
		for (auto &VH : reverse(Instructions)) {
		auto *I = dyn_cast_or_null<Instruction>(VH);
		if (!I)
		continue;
		if (auto *LastInsertValue = dyn_cast<InsertValueInst>(I))
		OpsChanged \|= vectorizeInsertValueInst(LastInsertValue, BB, R);
		else if (auto *LastInsertElem = dyn_cast<InsertElementInst>(I))
		OpsChanged \|= vectorizeInsertElementInst(LastInsertElem, BB, R);
		else if (auto *CI = dyn_cast<CmpInst>(I))
		OpsChanged \|= vectorizeCmpInst(CI, BB, R);
		}
		Instructions.clear();
		return OpsChanged;
		}

bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {		bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
bool Changed = false;		bool Changed = false;
SmallVector<Value *, 4> Incoming;		SmallVector<Value *, 4> Incoming;
SmallSet<Value *, 16> VisitedInstrs;		SmallSet<Value *, 16> VisitedInstrs;

bool HaveVectorizedPhiNodes = true;		bool HaveVectorizedPhiNodes = true;
while (HaveVectorizedPhiNodes) {		while (HaveVectorizedPhiNodes) {
HaveVectorizedPhiNodes = false;		HaveVectorizedPhiNodes = false;
▲ Show 20 Lines • Show All 43 Lines • ▼ Show 20 Lines	for (SmallVector<Value *, 4>::iterator IncIt = Incoming.begin(),

// Start over at the next instruction of a different type (or the end).		// Start over at the next instruction of a different type (or the end).
IncIt = SameTypeIt;		IncIt = SameTypeIt;
}		}
}		}

VisitedInstrs.clear();		VisitedInstrs.clear();

		SmallVector<WeakVH, 8> PostProcessInstructions;
		SmallDenseSet<Instruction *, 4> KeyNodes;
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; it++) {		for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; it++) {
// We may go through BB multiple times so skip the one we have checked.		// We may go through BB multiple times so skip the one we have checked.
if (!VisitedInstrs.insert(&*it).second)		if (!VisitedInstrs.insert(&*it).second) {
		if (it->use_empty() && KeyNodes.count(&*it) > 0 &&
		vectorizeSimpleInstructions(PostProcessInstructions, BB, R)) {
		// We would like to start over since some instructions are deleted
		// and the iterator may become invalid value.
		Changed = true;
		it = BB->begin();
		e = BB->end();
		}
continue;		continue;
		}

if (isa<DbgInfoIntrinsic>(it))		if (isa<DbgInfoIntrinsic>(it))
continue;		continue;

// Try to vectorize reductions that use PHINodes.		// Try to vectorize reductions that use PHINodes.
if (PHINode *P = dyn_cast<PHINode>(it)) {		if (PHINode *P = dyn_cast<PHINode>(it)) {
// Check that the PHI is a reduction PHI.		// Check that the PHI is a reduction PHI.
if (P->getNumIncomingValues() != 2)		if (P->getNumIncomingValues() != 2)
return Changed;		return Changed;

// Try to match and vectorize a horizontal reduction.		// Try to match and vectorize a horizontal reduction.
if (vectorizeRootInstruction(P, getReductionValue(DT, P, BB, LI), BB, R,		if (vectorizeRootInstruction(P, getReductionValue(DT, P, BB, LI), BB, R,
TTI)) {		TTI)) {
Changed = true;		Changed = true;
it = BB->begin();		it = BB->begin();
e = BB->end();		e = BB->end();
continue;		continue;
}		}
continue;		continue;
}		}

if (ShouldStartVectorizeHorAtStore) {		// Ran into an instruction without users, like terminator, or function call
if (StoreInst *SI = dyn_cast<StoreInst>(it)) {		// with ignored return value, store. Ignore unused instructions (basing on
		// instruction type, except for CallInst and InvokeInst).
		if (it->use_empty() && (it->getType()->isVoidTy() \|\| isa<CallInst>(it) \|\|
		isa<InvokeInst>(it))) {
		KeyNodes.insert(&*it);
		bool OpsChanged = false;
		if (ShouldStartVectorizeHorAtStore \|\| !isa<StoreInst>(it)) {
		for (auto *V : it->operand_values()) {
// Try to match and vectorize a horizontal reduction.		// Try to match and vectorize a horizontal reduction.
if (vectorizeRootInstruction(nullptr, SI->getValueOperand(), BB, R,		OpsChanged \|= vectorizeRootInstruction(nullptr, V, BB, R, TTI);
TTI)) {
Changed = true;
it = BB->begin();
e = BB->end();
continue;
}
}
}

// Try to vectorize horizontal reductions feeding into a return.
if (ReturnInst *RI = dyn_cast<ReturnInst>(it)) {
if (RI->getNumOperands() != 0) {
// Try to match and vectorize a horizontal reduction.
if (vectorizeRootInstruction(nullptr, RI->getOperand(0), BB, R, TTI)) {
Changed = true;
it = BB->begin();
e = BB->end();
continue;
}		}
}		}
}		// Start vectorization of post-process list of instructions from the
		// top-tree instructions to try to vectorize as many instructions as
// Try to vectorize trees that start at compare instructions.		// possible.
if (CmpInst *CI = dyn_cast<CmpInst>(it)) {		OpsChanged \|= vectorizeSimpleInstructions(PostProcessInstructions, BB, R);
if (tryToVectorizePair(CI->getOperand(0), CI->getOperand(1), R)) {		if (OpsChanged) {
Changed = true;
// We would like to start over since some instructions are deleted		// We would like to start over since some instructions are deleted
// and the iterator may become invalid value.		// and the iterator may become invalid value.
it = BB->begin();
e = BB->end();
continue;
}

for (int I = 0; I < 2; ++I) {
if (vectorizeRootInstruction(nullptr, CI->getOperand(I), BB, R, TTI)) {
Changed = true;		Changed = true;
// We would like to start over since some instructions are deleted
// and the iterator may become invalid value.
it = BB->begin();		it = BB->begin();
e = BB->end();		e = BB->end();
break;
}
}
continue;
}

// Try to vectorize trees that start at insertelement instructions.
if (InsertElementInst *FirstInsertElem = dyn_cast<InsertElementInst>(it)) {
SmallVector<Value *, 16> BuildVector;
SmallVector<Value *, 16> BuildVectorOpds;
if (!findBuildVector(FirstInsertElem, BuildVector, BuildVectorOpds))
continue;		continue;

// Vectorize starting with the build vector operands ignoring the
// BuildVector instructions for the purpose of scheduling and user
// extraction.
if (tryToVectorizeList(BuildVectorOpds, R, BuildVector)) {
Changed = true;
it = BB->begin();
e = BB->end();
}		}

continue;
}		}

// Try to vectorize trees that start at insertvalue instructions feeding into		if (isa<InsertElementInst>(it) \|\| isa<CmpInst>(it) \|\|
// a store.		isa<InsertValueInst>(it))
if (StoreInst *SI = dyn_cast<StoreInst>(it)) {		PostProcessInstructions.push_back(&*it);
if (InsertValueInst *LastInsertValue = dyn_cast<InsertValueInst>(SI->getValueOperand())) {
const DataLayout &DL = BB->getModule()->getDataLayout();
if (R.canMapToVector(SI->getValueOperand()->getType(), DL)) {
SmallVector<Value *, 16> BuildVector;
SmallVector<Value *, 16> BuildVectorOpds;
if (!findBuildAggregate(LastInsertValue, BuildVector, BuildVectorOpds))
continue;

DEBUG(dbgs() << "SLP: store of array mappable to vector: " << *SI << "\n");
if (tryToVectorizeList(BuildVectorOpds, R, BuildVector, false)) {
Changed = true;
it = BB->begin();
e = BB->end();
}
continue;
}
}
}
}		}
		assert(PostProcessInstructions.empty() &&
		"Not all instruction were processed.");

return Changed;		return Changed;
}		}

bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) {		bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) {
auto Changed = false;		auto Changed = false;
for (auto &Entry : GEPs) {		for (auto &Entry : GEPs) {

▲ Show 20 Lines • Show All 118 Lines • Show Last 20 Lines

llvm/trunk/test/Transforms/SLPVectorizer/AArch64/gather-root.ll

	Show All 25 Lines
	; DEFAULT-NEXT: [[TMP32:%.*]] = add i32 [[TMP30]], undef			; DEFAULT-NEXT: [[TMP32:%.*]] = add i32 [[TMP30]], undef
	; DEFAULT-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP2]])			; DEFAULT-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP2]])
	; DEFAULT-NEXT: [[BIN_EXTRA]] = add i32 [[TMP3]], [[TMP17]]			; DEFAULT-NEXT: [[BIN_EXTRA]] = add i32 [[TMP3]], [[TMP17]]
	; DEFAULT-NEXT: [[TMP34:%.*]] = add i32 [[TMP32]], undef			; DEFAULT-NEXT: [[TMP34:%.*]] = add i32 [[TMP32]], undef
	; DEFAULT-NEXT: br label [[FOR_BODY]]			; DEFAULT-NEXT: br label [[FOR_BODY]]
	;			;
	; GATHER-LABEL: @PR28330(			; GATHER-LABEL: @PR28330(
	; GATHER-NEXT: entry:			; GATHER-NEXT: entry:
	; GATHER-NEXT: [[TMP0:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1), align 1			; GATHER-NEXT: [[TMP0:%.]] = load <2 x i8>, <2 x i8> bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <2 x i8>*), align 1
	; GATHER-NEXT: [[TMP1:%.*]] = icmp eq i8 [[TMP0]], 0			; GATHER-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[TMP0]], zeroinitializer
	; GATHER-NEXT: [[TMP2:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 2), align 2
	; GATHER-NEXT: [[TMP3:%.*]] = icmp eq i8 [[TMP2]], 0
	; GATHER-NEXT: [[TMP4:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 3), align 1			; GATHER-NEXT: [[TMP4:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 3), align 1
	; GATHER-NEXT: [[TMP5:%.*]] = icmp eq i8 [[TMP4]], 0			; GATHER-NEXT: [[TMP5:%.*]] = icmp eq i8 [[TMP4]], 0
	; GATHER-NEXT: [[TMP6:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 4), align 4			; GATHER-NEXT: [[TMP6:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 4), align 4
	; GATHER-NEXT: [[TMP7:%.*]] = icmp eq i8 [[TMP6]], 0			; GATHER-NEXT: [[TMP7:%.*]] = icmp eq i8 [[TMP6]], 0
	; GATHER-NEXT: [[TMP8:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 5), align 1			; GATHER-NEXT: [[TMP8:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 5), align 1
	; GATHER-NEXT: [[TMP9:%.*]] = icmp eq i8 [[TMP8]], 0			; GATHER-NEXT: [[TMP9:%.*]] = icmp eq i8 [[TMP8]], 0
	; GATHER-NEXT: [[TMP10:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 6), align 2			; GATHER-NEXT: [[TMP10:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 6), align 2
	; GATHER-NEXT: [[TMP11:%.*]] = icmp eq i8 [[TMP10]], 0			; GATHER-NEXT: [[TMP11:%.*]] = icmp eq i8 [[TMP10]], 0
	; GATHER-NEXT: [[TMP12:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 7), align 1			; GATHER-NEXT: [[TMP12:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 7), align 1
	; GATHER-NEXT: [[TMP13:%.*]] = icmp eq i8 [[TMP12]], 0			; GATHER-NEXT: [[TMP13:%.*]] = icmp eq i8 [[TMP12]], 0
	; GATHER-NEXT: [[TMP14:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 8), align 8			; GATHER-NEXT: [[TMP14:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 8), align 8
	; GATHER-NEXT: [[TMP15:%.*]] = icmp eq i8 [[TMP14]], 0			; GATHER-NEXT: [[TMP15:%.*]] = icmp eq i8 [[TMP14]], 0
	; GATHER-NEXT: br label [[FOR_BODY:%.*]]			; GATHER-NEXT: br label [[FOR_BODY:%.*]]
	; GATHER: for.body:			; GATHER: for.body:
	; GATHER-NEXT: [[TMP17:%.]] = phi i32 [ [[BIN_EXTRA:%.]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]			; GATHER-NEXT: [[TMP17:%.]] = phi i32 [ [[BIN_EXTRA:%.]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
	; GATHER-NEXT: [[TMP19:%.*]] = select i1 [[TMP1]], i32 -720, i32 -80			; GATHER-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> <i32 -720, i32 -720>, <2 x i32> <i32 -80, i32 -80>
	; GATHER-NEXT: [[TMP20:%.*]] = add i32 [[TMP17]], [[TMP19]]			; GATHER-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
	; GATHER-NEXT: [[TMP21:%.*]] = select i1 [[TMP3]], i32 -720, i32 -80			; GATHER-NEXT: [[TMP20:%.*]] = add i32 [[TMP17]], [[TMP3]]
	; GATHER-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], [[TMP21]]			; GATHER-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
				; GATHER-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], [[TMP4]]
	; GATHER-NEXT: [[TMP23:%.*]] = select i1 [[TMP5]], i32 -720, i32 -80			; GATHER-NEXT: [[TMP23:%.*]] = select i1 [[TMP5]], i32 -720, i32 -80
	; GATHER-NEXT: [[TMP24:%.*]] = add i32 [[TMP22]], [[TMP23]]			; GATHER-NEXT: [[TMP24:%.*]] = add i32 [[TMP22]], [[TMP23]]
	; GATHER-NEXT: [[TMP25:%.*]] = select i1 [[TMP7]], i32 -720, i32 -80			; GATHER-NEXT: [[TMP25:%.*]] = select i1 [[TMP7]], i32 -720, i32 -80
	; GATHER-NEXT: [[TMP26:%.*]] = add i32 [[TMP24]], [[TMP25]]			; GATHER-NEXT: [[TMP26:%.*]] = add i32 [[TMP24]], [[TMP25]]
	; GATHER-NEXT: [[TMP27:%.*]] = select i1 [[TMP9]], i32 -720, i32 -80			; GATHER-NEXT: [[TMP27:%.*]] = select i1 [[TMP9]], i32 -720, i32 -80
	; GATHER-NEXT: [[TMP28:%.*]] = add i32 [[TMP26]], [[TMP27]]			; GATHER-NEXT: [[TMP28:%.*]] = add i32 [[TMP26]], [[TMP27]]
	; GATHER-NEXT: [[TMP29:%.*]] = select i1 [[TMP11]], i32 -720, i32 -80			; GATHER-NEXT: [[TMP29:%.*]] = select i1 [[TMP11]], i32 -720, i32 -80
	; GATHER-NEXT: [[TMP30:%.*]] = add i32 [[TMP28]], [[TMP29]]			; GATHER-NEXT: [[TMP30:%.*]] = add i32 [[TMP28]], [[TMP29]]
	; GATHER-NEXT: [[TMP31:%.*]] = select i1 [[TMP13]], i32 -720, i32 -80			; GATHER-NEXT: [[TMP31:%.*]] = select i1 [[TMP13]], i32 -720, i32 -80
	; GATHER-NEXT: [[TMP32:%.*]] = add i32 [[TMP30]], [[TMP31]]			; GATHER-NEXT: [[TMP32:%.*]] = add i32 [[TMP30]], [[TMP31]]
	; GATHER-NEXT: [[TMP33:%.*]] = select i1 [[TMP15]], i32 -720, i32 -80			; GATHER-NEXT: [[TMP33:%.*]] = select i1 [[TMP15]], i32 -720, i32 -80
	; GATHER-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP19]], i32 0			; GATHER-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> undef, i32 [[TMP3]], i32 0
	; GATHER-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> [[TMP0]], i32 [[TMP21]], i32 1			; GATHER-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[TMP4]], i32 1
	; GATHER-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[TMP23]], i32 2			; GATHER-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[TMP23]], i32 2
	; GATHER-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[TMP25]], i32 3			; GATHER-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[TMP25]], i32 3
	; GATHER-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[TMP27]], i32 4			; GATHER-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[TMP27]], i32 4
	; GATHER-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[TMP29]], i32 5			; GATHER-NEXT: [[TMP10:%.*]] = insertelement <8 x i32> [[TMP9]], i32 [[TMP29]], i32 5
	; GATHER-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[TMP31]], i32 6			; GATHER-NEXT: [[TMP11:%.*]] = insertelement <8 x i32> [[TMP10]], i32 [[TMP31]], i32 6
	; GATHER-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[TMP33]], i32 7			; GATHER-NEXT: [[TMP12:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[TMP33]], i32 7
	; GATHER-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP7]])			; GATHER-NEXT: [[TMP13:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP12]])
	; GATHER-NEXT: [[BIN_EXTRA]] = add i32 [[TMP8]], [[TMP17]]			; GATHER-NEXT: [[BIN_EXTRA]] = add i32 [[TMP13]], [[TMP17]]
	; GATHER-NEXT: [[TMP34:%.*]] = add i32 [[TMP32]], [[TMP33]]			; GATHER-NEXT: [[TMP34:%.*]] = add i32 [[TMP32]], [[TMP33]]
	; GATHER-NEXT: br label [[FOR_BODY]]			; GATHER-NEXT: br label [[FOR_BODY]]
	;			;
	; MAX-COST-LABEL: @PR28330(			; MAX-COST-LABEL: @PR28330(
	; MAX-COST-NEXT: entry:			; MAX-COST-NEXT: entry:
	; MAX-COST-NEXT: [[TMP0:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1), align 1			; MAX-COST-NEXT: [[TMP0:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1), align 1
	; MAX-COST-NEXT: [[TMP1:%.*]] = icmp eq i8 [[TMP0]], 0			; MAX-COST-NEXT: [[TMP1:%.*]] = icmp eq i8 [[TMP0]], 0
	; MAX-COST-NEXT: [[TMP2:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 2), align 2			; MAX-COST-NEXT: [[TMP2:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 2), align 2
	▲ Show 20 Lines • Show All 89 Lines • ▼ Show 20 Lines
	; DEFAULT-NEXT: [[TMP32:%.*]] = add i32 [[TMP30]], undef			; DEFAULT-NEXT: [[TMP32:%.*]] = add i32 [[TMP30]], undef
	; DEFAULT-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP2]])			; DEFAULT-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP2]])
	; DEFAULT-NEXT: [[BIN_EXTRA]] = add i32 [[TMP3]], -5			; DEFAULT-NEXT: [[BIN_EXTRA]] = add i32 [[TMP3]], -5
	; DEFAULT-NEXT: [[TMP34:%.*]] = add i32 [[TMP32]], undef			; DEFAULT-NEXT: [[TMP34:%.*]] = add i32 [[TMP32]], undef
	; DEFAULT-NEXT: br label [[FOR_BODY]]			; DEFAULT-NEXT: br label [[FOR_BODY]]
	;			;
	; GATHER-LABEL: @PR32038(			; GATHER-LABEL: @PR32038(
	; GATHER-NEXT: entry:			; GATHER-NEXT: entry:
	; GATHER-NEXT: [[TMP0:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1), align 1			; GATHER-NEXT: [[TMP0:%.]] = load <2 x i8>, <2 x i8> bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <2 x i8>*), align 1
	; GATHER-NEXT: [[TMP1:%.*]] = icmp eq i8 [[TMP0]], 0			; GATHER-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[TMP0]], zeroinitializer
	; GATHER-NEXT: [[TMP2:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 2), align 2
	; GATHER-NEXT: [[TMP3:%.*]] = icmp eq i8 [[TMP2]], 0
	; GATHER-NEXT: [[TMP4:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 3), align 1			; GATHER-NEXT: [[TMP4:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 3), align 1
	; GATHER-NEXT: [[TMP5:%.*]] = icmp eq i8 [[TMP4]], 0			; GATHER-NEXT: [[TMP5:%.*]] = icmp eq i8 [[TMP4]], 0
	; GATHER-NEXT: [[TMP6:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 4), align 4			; GATHER-NEXT: [[TMP6:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 4), align 4
	; GATHER-NEXT: [[TMP7:%.*]] = icmp eq i8 [[TMP6]], 0			; GATHER-NEXT: [[TMP7:%.*]] = icmp eq i8 [[TMP6]], 0
	; GATHER-NEXT: [[TMP8:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 5), align 1			; GATHER-NEXT: [[TMP8:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 5), align 1
	; GATHER-NEXT: [[TMP9:%.*]] = icmp eq i8 [[TMP8]], 0			; GATHER-NEXT: [[TMP9:%.*]] = icmp eq i8 [[TMP8]], 0
	; GATHER-NEXT: [[TMP10:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 6), align 2			; GATHER-NEXT: [[TMP10:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 6), align 2
	; GATHER-NEXT: [[TMP11:%.*]] = icmp eq i8 [[TMP10]], 0			; GATHER-NEXT: [[TMP11:%.*]] = icmp eq i8 [[TMP10]], 0
	; GATHER-NEXT: [[TMP12:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 7), align 1			; GATHER-NEXT: [[TMP12:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 7), align 1
	; GATHER-NEXT: [[TMP13:%.*]] = icmp eq i8 [[TMP12]], 0			; GATHER-NEXT: [[TMP13:%.*]] = icmp eq i8 [[TMP12]], 0
	; GATHER-NEXT: [[TMP14:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 8), align 8			; GATHER-NEXT: [[TMP14:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 8), align 8
	; GATHER-NEXT: [[TMP15:%.*]] = icmp eq i8 [[TMP14]], 0			; GATHER-NEXT: [[TMP15:%.*]] = icmp eq i8 [[TMP14]], 0
	; GATHER-NEXT: br label [[FOR_BODY:%.*]]			; GATHER-NEXT: br label [[FOR_BODY:%.*]]
	; GATHER: for.body:			; GATHER: for.body:
	; GATHER-NEXT: [[TMP17:%.]] = phi i32 [ [[BIN_EXTRA:%.]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]			; GATHER-NEXT: [[TMP17:%.]] = phi i32 [ [[BIN_EXTRA:%.]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
	; GATHER-NEXT: [[TMP19:%.*]] = select i1 [[TMP1]], i32 -720, i32 -80			; GATHER-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> <i32 -720, i32 -720>, <2 x i32> <i32 -80, i32 -80>
	; GATHER-NEXT: [[TMP20:%.*]] = add i32 -5, [[TMP19]]			; GATHER-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
	; GATHER-NEXT: [[TMP21:%.*]] = select i1 [[TMP3]], i32 -720, i32 -80			; GATHER-NEXT: [[TMP20:%.*]] = add i32 -5, [[TMP3]]
	; GATHER-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], [[TMP21]]			; GATHER-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
				; GATHER-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], [[TMP4]]
	; GATHER-NEXT: [[TMP23:%.*]] = select i1 [[TMP5]], i32 -720, i32 -80			; GATHER-NEXT: [[TMP23:%.*]] = select i1 [[TMP5]], i32 -720, i32 -80
	; GATHER-NEXT: [[TMP24:%.*]] = add i32 [[TMP22]], [[TMP23]]			; GATHER-NEXT: [[TMP24:%.*]] = add i32 [[TMP22]], [[TMP23]]
	; GATHER-NEXT: [[TMP25:%.*]] = select i1 [[TMP7]], i32 -720, i32 -80			; GATHER-NEXT: [[TMP25:%.*]] = select i1 [[TMP7]], i32 -720, i32 -80
	; GATHER-NEXT: [[TMP26:%.*]] = add i32 [[TMP24]], [[TMP25]]			; GATHER-NEXT: [[TMP26:%.*]] = add i32 [[TMP24]], [[TMP25]]
	; GATHER-NEXT: [[TMP27:%.*]] = select i1 [[TMP9]], i32 -720, i32 -80			; GATHER-NEXT: [[TMP27:%.*]] = select i1 [[TMP9]], i32 -720, i32 -80
	; GATHER-NEXT: [[TMP28:%.*]] = add i32 [[TMP26]], [[TMP27]]			; GATHER-NEXT: [[TMP28:%.*]] = add i32 [[TMP26]], [[TMP27]]
	; GATHER-NEXT: [[TMP29:%.*]] = select i1 [[TMP11]], i32 -720, i32 -80			; GATHER-NEXT: [[TMP29:%.*]] = select i1 [[TMP11]], i32 -720, i32 -80
	; GATHER-NEXT: [[TMP30:%.*]] = add i32 [[TMP28]], [[TMP29]]			; GATHER-NEXT: [[TMP30:%.*]] = add i32 [[TMP28]], [[TMP29]]
	; GATHER-NEXT: [[TMP31:%.*]] = select i1 [[TMP13]], i32 -720, i32 -80			; GATHER-NEXT: [[TMP31:%.*]] = select i1 [[TMP13]], i32 -720, i32 -80
	; GATHER-NEXT: [[TMP32:%.*]] = add i32 [[TMP30]], [[TMP31]]			; GATHER-NEXT: [[TMP32:%.*]] = add i32 [[TMP30]], [[TMP31]]
	; GATHER-NEXT: [[TMP33:%.*]] = select i1 [[TMP15]], i32 -720, i32 -80			; GATHER-NEXT: [[TMP33:%.*]] = select i1 [[TMP15]], i32 -720, i32 -80
	; GATHER-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP19]], i32 0			; GATHER-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> undef, i32 [[TMP3]], i32 0
	; GATHER-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> [[TMP0]], i32 [[TMP21]], i32 1			; GATHER-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[TMP4]], i32 1
	; GATHER-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[TMP23]], i32 2			; GATHER-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[TMP23]], i32 2
	; GATHER-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[TMP25]], i32 3			; GATHER-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[TMP25]], i32 3
	; GATHER-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[TMP27]], i32 4			; GATHER-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[TMP27]], i32 4
	; GATHER-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[TMP29]], i32 5			; GATHER-NEXT: [[TMP10:%.*]] = insertelement <8 x i32> [[TMP9]], i32 [[TMP29]], i32 5
	; GATHER-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[TMP31]], i32 6			; GATHER-NEXT: [[TMP11:%.*]] = insertelement <8 x i32> [[TMP10]], i32 [[TMP31]], i32 6
	; GATHER-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[TMP33]], i32 7			; GATHER-NEXT: [[TMP12:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[TMP33]], i32 7
	; GATHER-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP7]])			; GATHER-NEXT: [[TMP13:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP12]])
	; GATHER-NEXT: [[BIN_EXTRA]] = add i32 [[TMP8]], -5			; GATHER-NEXT: [[BIN_EXTRA]] = add i32 [[TMP13]], -5
	; GATHER-NEXT: [[TMP34:%.*]] = add i32 [[TMP32]], [[TMP33]]			; GATHER-NEXT: [[TMP34:%.*]] = add i32 [[TMP32]], [[TMP33]]
	; GATHER-NEXT: br label [[FOR_BODY]]			; GATHER-NEXT: br label [[FOR_BODY]]
	;			;
	; MAX-COST-LABEL: @PR32038(			; MAX-COST-LABEL: @PR32038(
	; MAX-COST-NEXT: entry:			; MAX-COST-NEXT: entry:
	; MAX-COST-NEXT: [[TMP0:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1), align 1			; MAX-COST-NEXT: [[TMP0:%.]] = load <2 x i8>, <2 x i8> bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <2 x i8>*), align 1
	; MAX-COST-NEXT: [[TMP1:%.*]] = icmp eq i8 [[TMP0]], 0			; MAX-COST-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[TMP0]], zeroinitializer
	; MAX-COST-NEXT: [[TMP2:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 2), align 2
	; MAX-COST-NEXT: [[TMP3:%.*]] = icmp eq i8 [[TMP2]], 0
	; MAX-COST-NEXT: [[TMP4:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 3), align 1			; MAX-COST-NEXT: [[TMP4:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 3), align 1
	; MAX-COST-NEXT: [[TMP5:%.*]] = icmp eq i8 [[TMP4]], 0			; MAX-COST-NEXT: [[TMPP5:%.*]] = icmp eq i8 [[TMP4]], 0
	; MAX-COST-NEXT: [[TMP6:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 4), align 4			; MAX-COST-NEXT: [[TMP6:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 4), align 4
	; MAX-COST-NEXT: [[TMP7:%.*]] = icmp eq i8 [[TMP6]], 0			; MAX-COST-NEXT: [[TMPP7:%.*]] = icmp eq i8 [[TMP6]], 0
	; MAX-COST-NEXT: [[TMP8:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 5), align 1			; MAX-COST-NEXT: [[TMP8:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 5), align 1
	; MAX-COST-NEXT: [[TMP9:%.*]] = icmp eq i8 [[TMP8]], 0			; MAX-COST-NEXT: [[TMP9:%.*]] = icmp eq i8 [[TMP8]], 0
	; MAX-COST-NEXT: [[TMP10:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 6), align 2			; MAX-COST-NEXT: [[TMP10:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 6), align 2
	; MAX-COST-NEXT: [[TMP11:%.*]] = icmp eq i8 [[TMP10]], 0			; MAX-COST-NEXT: [[TMP11:%.*]] = icmp eq i8 [[TMP10]], 0
	; MAX-COST-NEXT: [[TMP12:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 7), align 1			; MAX-COST-NEXT: [[TMP12:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 7), align 1
	; MAX-COST-NEXT: [[TMP13:%.*]] = icmp eq i8 [[TMP12]], 0			; MAX-COST-NEXT: [[TMP13:%.*]] = icmp eq i8 [[TMP12]], 0
	; MAX-COST-NEXT: [[TMP14:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 8), align 8			; MAX-COST-NEXT: [[TMP14:%.]] = load i8, i8 getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 8), align 8
	; MAX-COST-NEXT: [[TMP15:%.*]] = icmp eq i8 [[TMP14]], 0			; MAX-COST-NEXT: [[TMP15:%.*]] = icmp eq i8 [[TMP14]], 0
	; MAX-COST-NEXT: [[TMP0:%.*]] = insertelement <4 x i1> undef, i1 [[TMP1]], i32 0
	; MAX-COST-NEXT: [[TMP1:%.*]] = insertelement <4 x i1> [[TMP0]], i1 [[TMP3]], i32 1
	; MAX-COST-NEXT: [[TMP2:%.*]] = insertelement <4 x i1> [[TMP1]], i1 [[TMP5]], i32 2
	; MAX-COST-NEXT: [[TMP3:%.*]] = insertelement <4 x i1> [[TMP2]], i1 [[TMP7]], i32 3
	; MAX-COST-NEXT: br label [[FOR_BODY:%.*]]			; MAX-COST-NEXT: br label [[FOR_BODY:%.*]]
	; MAX-COST: for.body:			; MAX-COST: for.body:
	; MAX-COST-NEXT: [[TMP17:%.]] = phi i32 [ [[TMP34:%.]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]			; MAX-COST-NEXT: [[TMP17:%.]] = phi i32 [ [[TMP34:%.]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
	; MAX-COST-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> <i32 -720, i32 -720, i32 -720, i32 -720>, <4 x i32> <i32 -80, i32 -80, i32 -80, i32 -80>			; MAX-COST-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
				; MAX-COST-NEXT: [[TMP3:%.*]] = insertelement <4 x i1> undef, i1 [[TMP2]], i32 0
				; MAX-COST-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
				; MAX-COST-NEXT: [[TMP5:%.*]] = insertelement <4 x i1> [[TMP3]], i1 [[TMP4]], i32 1
				; MAX-COST-NEXT: [[TMP6:%.*]] = insertelement <4 x i1> [[TMP5]], i1 [[TMPP5]], i32 2
				; MAX-COST-NEXT: [[TMP7:%.*]] = insertelement <4 x i1> [[TMP6]], i1 [[TMPP7]], i32 3
				; MAX-COST-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> <i32 -720, i32 -720, i32 -720, i32 -720>, <4 x i32> <i32 -80, i32 -80, i32 -80, i32 -80>
	; MAX-COST-NEXT: [[TMP20:%.*]] = add i32 -5, undef			; MAX-COST-NEXT: [[TMP20:%.*]] = add i32 -5, undef
	; MAX-COST-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], undef			; MAX-COST-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], undef
	; MAX-COST-NEXT: [[TMP24:%.*]] = add i32 [[TMP22]], undef			; MAX-COST-NEXT: [[TMP24:%.*]] = add i32 [[TMP22]], undef
	; MAX-COST-NEXT: [[TMP26:%.*]] = add i32 [[TMP24]], undef			; MAX-COST-NEXT: [[TMP26:%.*]] = add i32 [[TMP24]], undef
	; MAX-COST-NEXT: [[TMP27:%.*]] = select i1 [[TMP9]], i32 -720, i32 -80			; MAX-COST-NEXT: [[TMP27:%.*]] = select i1 [[TMP9]], i32 -720, i32 -80
	; MAX-COST-NEXT: [[TMP28:%.*]] = add i32 [[TMP26]], [[TMP27]]			; MAX-COST-NEXT: [[TMP28:%.*]] = add i32 [[TMP26]], [[TMP27]]
	; MAX-COST-NEXT: [[TMP29:%.*]] = select i1 [[TMP11]], i32 -720, i32 -80			; MAX-COST-NEXT: [[TMP29:%.*]] = select i1 [[TMP11]], i32 -720, i32 -80
	; MAX-COST-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> [[TMP4]])			; MAX-COST-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> [[TMP8]])
	; MAX-COST-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], [[TMP27]]			; MAX-COST-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], [[TMP27]]
	; MAX-COST-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], [[TMP29]]			; MAX-COST-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], [[TMP29]]
	; MAX-COST-NEXT: [[BIN_EXTRA:%.*]] = add i32 [[TMP7]], -5			; MAX-COST-NEXT: [[BIN_EXTRA:%.*]] = add i32 [[TMP11]], -5
	; MAX-COST-NEXT: [[TMP30:%.*]] = add i32 [[TMP28]], [[TMP29]]			; MAX-COST-NEXT: [[TMP30:%.*]] = add i32 [[TMP28]], [[TMP29]]
	; MAX-COST-NEXT: [[TMP31:%.*]] = select i1 [[TMP13]], i32 -720, i32 -80			; MAX-COST-NEXT: [[TMP31:%.*]] = select i1 [[TMP13]], i32 -720, i32 -80
	; MAX-COST-NEXT: [[TMP32:%.*]] = add i32 [[BIN_EXTRA]], [[TMP31]]			; MAX-COST-NEXT: [[TMP32:%.*]] = add i32 [[BIN_EXTRA]], [[TMP31]]
	; MAX-COST-NEXT: [[TMP33:%.*]] = select i1 [[TMP15]], i32 -720, i32 -80			; MAX-COST-NEXT: [[TMP33:%.*]] = select i1 [[TMP15]], i32 -720, i32 -80
	; MAX-COST-NEXT: [[TMP34]] = add i32 [[TMP32]], [[TMP33]]			; MAX-COST-NEXT: [[TMP34]] = add i32 [[TMP32]], [[TMP33]]
	; MAX-COST-NEXT: br label [[FOR_BODY]]			; MAX-COST-NEXT: br label [[FOR_BODY]]
	;			;
	entry:			entry:
	Show All 38 Lines

llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal.ll

Show First 20 Lines • Show All 811 Lines • ▼ Show 20 Lines	entry:
ret void		ret void
}		}

declare i32 @foobar(i32)		declare i32 @foobar(i32)

define void @i32_red_call(i32 %val) {		define void @i32_red_call(i32 %val) {
; CHECK-LABEL: @i32_red_call(		; CHECK-LABEL: @i32_red_call(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.]] = load i32, i32 getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16		; CHECK-NEXT: [[TMP0:%.]] = load <8 x i32>, <8 x i32> bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16
; CHECK-NEXT: [[TMP1:%.]] = load i32, i32 getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4		; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 undef, undef
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]]		; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
; CHECK-NEXT: [[TMP2:%.]] = load i32, i32 getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8		; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP2]], [[ADD]]		; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]]
; CHECK-NEXT: [[TMP3:%.]] = load i32, i32 getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4		; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]]
; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP3]], [[ADD_1]]		; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]]
; CHECK-NEXT: [[TMP4:%.]] = load i32, i32 getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 4), align 16		; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP4]], [[ADD_2]]		; CHECK-NEXT: [[BIN_RDX:%.*]] = add nsw <8 x i32> [[TMP0]], [[RDX_SHUF]]
; CHECK-NEXT: [[TMP5:%.]] = load i32, i32 getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 5), align 4		; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP5]], [[ADD_3]]		; CHECK-NEXT: [[BIN_RDX2:%.*]] = add nsw <8 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
; CHECK-NEXT: [[TMP6:%.]] = load i32, i32 getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 6), align 8		; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP6]], [[ADD_4]]		; CHECK-NEXT: [[BIN_RDX4:%.*]] = add nsw <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
; CHECK-NEXT: [[TMP7:%.]] = load i32, i32 getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 7), align 4		; CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP7]], [[ADD_5]]		; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]]
; CHECK-NEXT: [[RES:%.*]] = call i32 @foobar(i32 [[ADD_6]])		; CHECK-NEXT: [[RES:%.*]] = call i32 @foobar(i32 [[TMP1]])
; CHECK-NEXT: ret void		; CHECK-NEXT: ret void
;		;
entry:		entry:
%0 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16		%0 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16
%1 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4		%1 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4
%add = add nsw i32 %1, %0		%add = add nsw i32 %1, %0
%2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8		%2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8
%add.1 = add nsw i32 %2, %add		%add.1 = add nsw i32 %2, %add
Show All 9 Lines	entry:
%add.6 = add nsw i32 %7, %add.5		%add.6 = add nsw i32 %7, %add.5
%res = call i32 @foobar(i32 %add.6)		%res = call i32 @foobar(i32 %add.6)
ret void		ret void
}		}

define void @i32_red_invoke(i32 %val) personality i32 (...)* @__gxx_personality_v0 {		define void @i32_red_invoke(i32 %val) personality i32 (...)* @__gxx_personality_v0 {
; CHECK-LABEL: @i32_red_invoke(		; CHECK-LABEL: @i32_red_invoke(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.]] = load i32, i32 getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16		; CHECK-NEXT: [[TMP0:%.]] = load <8 x i32>, <8 x i32> bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16
; CHECK-NEXT: [[TMP1:%.]] = load i32, i32 getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4		; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 undef, undef
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]]		; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
; CHECK-NEXT: [[TMP2:%.]] = load i32, i32 getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8		; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP2]], [[ADD]]		; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]]
; CHECK-NEXT: [[TMP3:%.]] = load i32, i32 getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4		; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]]
; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP3]], [[ADD_1]]		; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]]
; CHECK-NEXT: [[TMP4:%.]] = load i32, i32 getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 4), align 16		; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP4]], [[ADD_2]]		; CHECK-NEXT: [[BIN_RDX:%.*]] = add nsw <8 x i32> [[TMP0]], [[RDX_SHUF]]
; CHECK-NEXT: [[TMP5:%.]] = load i32, i32 getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 5), align 4		; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP5]], [[ADD_3]]		; CHECK-NEXT: [[BIN_RDX2:%.*]] = add nsw <8 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
; CHECK-NEXT: [[TMP6:%.]] = load i32, i32 getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 6), align 8		; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP6]], [[ADD_4]]		; CHECK-NEXT: [[BIN_RDX4:%.*]] = add nsw <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
; CHECK-NEXT: [[TMP7:%.]] = load i32, i32 getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 7), align 4		; CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP7]], [[ADD_5]]		; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]]
; CHECK-NEXT: [[RES:%.*]] = invoke i32 @foobar(i32 [[ADD_6]])		; CHECK-NEXT: [[RES:%.*]] = invoke i32 @foobar(i32 [[TMP1]])
; CHECK-NEXT: to label [[NORMAL:%.]] unwind label [[EXCEPTION:%.]]		; CHECK-NEXT: to label [[NORMAL:%.]] unwind label [[EXCEPTION:%.]]
; CHECK: exception:		; CHECK: exception:
; CHECK-NEXT: [[CLEANUP:%.*]] = landingpad i8		; CHECK-NEXT: [[CLEANUP:%.*]] = landingpad i8
; CHECK-NEXT: cleanup		; CHECK-NEXT: cleanup
; CHECK-NEXT: br label [[NORMAL]]		; CHECK-NEXT: br label [[NORMAL]]
; CHECK: normal:		; CHECK: normal:
; CHECK-NEXT: ret void		; CHECK-NEXT: ret void
;		;
Show All 25 Lines

llvm/trunk/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll

	Show First 20 Lines • Show All 297 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> %a, i32 0			; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> %a, i32 0
	; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> %a, i32 1			; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> %a, i32 1
	; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x float> %a, i32 2			; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x float> %a, i32 2
	; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x float> %a, i32 3			; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x float> %a, i32 3
	; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> %b, i32 0			; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> %b, i32 0
	; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> %b, i32 1			; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> %b, i32 1
	; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x float> %b, i32 2			; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x float> %b, i32 2
	; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x float> %b, i32 3			; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x float> %b, i32 3
	; CHECK-NEXT: [[CMP0:%.*]] = icmp ne i32 [[C0]], 0			; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[C0]], i32 0
	; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[C1]], 0			; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1
	; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[C2]], i32 0
	; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C3]], i32 1
	; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer			; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
	; CHECK-NEXT: [[S0:%.*]] = select i1 [[CMP0]], float [[A0]], float [[B0]]			; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> undef, i32 [[C2]], i32 0
	; CHECK-NEXT: [[S1:%.*]] = select i1 [[CMP1]], float [[A1]], float [[B1]]			; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[C3]], i32 1
	; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> undef, float [[A2]], i32 0			; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <2 x i32> [[TMP5]], zeroinitializer
	; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> [[TMP4]], float [[A3]], i32 1			; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x float> undef, float [[A0]], i32 0
	; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> undef, float [[B2]], i32 0			; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x float> [[TMP7]], float [[A1]], i32 1
	; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[B3]], i32 1			; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x float> undef, float [[B0]], i32 0
	; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP3]], <2 x float> [[TMP5]], <2 x float> [[TMP7]]			; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[B1]], i32 1
	; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[S0]], i32 0			; CHECK-NEXT: [[TMP11:%.*]] = select <2 x i1> [[TMP3]], <2 x float> [[TMP8]], <2 x float> [[TMP10]]
	; CHECK-NEXT: [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[S1]], i32 1			; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x float> undef, float [[A2]], i32 0
	; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0			; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x float> [[TMP12]], float [[A3]], i32 1
	; CHECK-NEXT: [[RC:%.*]] = insertelement <4 x float> undef, float [[TMP9]], i32 2			; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x float> undef, float [[B2]], i32 0
	; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1			; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[B3]], i32 1
	; CHECK-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[TMP10]], i32 3			; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP6]], <2 x float> [[TMP13]], <2 x float> [[TMP15]]
				; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[TMP11]], i32 0
				; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP17]], i32 0
				; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[TMP11]], i32 1
				; CHECK-NEXT: [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[TMP18]], i32 1
				; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x float> [[TMP16]], i32 0
				; CHECK-NEXT: [[RC:%.*]] = insertelement <4 x float> undef, float [[TMP19]], i32 2
				; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[TMP16]], i32 1
				; CHECK-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[TMP20]], i32 3
	; CHECK-NEXT: ret <4 x float> [[RD]]			; CHECK-NEXT: ret <4 x float> [[RD]]
	;			;
	; ZEROTHRESH-LABEL: @simple_select_no_users(			; ZEROTHRESH-LABEL: @simple_select_no_users(
	; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <4 x i32> %c, i32 0			; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <4 x i32> %c, i32 0
	; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <4 x i32> %c, i32 1			; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <4 x i32> %c, i32 1
	; ZEROTHRESH-NEXT: [[C2:%.*]] = extractelement <4 x i32> %c, i32 2			; ZEROTHRESH-NEXT: [[C2:%.*]] = extractelement <4 x i32> %c, i32 2
	; ZEROTHRESH-NEXT: [[C3:%.*]] = extractelement <4 x i32> %c, i32 3			; ZEROTHRESH-NEXT: [[C3:%.*]] = extractelement <4 x i32> %c, i32 3
	; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <4 x float> %a, i32 0			; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <4 x float> %a, i32 0
	▲ Show 20 Lines • Show All 417 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[SLP] General improvements of SLP vectorization process.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 109983

llvm/trunk/include/llvm/Transforms/Vectorize/SLPVectorizer.h

llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp

llvm/trunk/test/Transforms/SLPVectorizer/AArch64/gather-root.ll

llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal.ll

llvm/trunk/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll

This is an archive of the discontinued LLVM Phabricator instance.

[SLP] General improvements of SLP vectorization process.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 109983

llvm/trunk/include/llvm/Transforms/Vectorize/SLPVectorizer.h

llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp

llvm/trunk/test/Transforms/SLPVectorizer/AArch64/gather-root.ll

llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal.ll

llvm/trunk/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll

[SLP] General improvements of SLP vectorization process.
ClosedPublic