Diff 79513

llvm/trunk/include/llvm/Transforms/Vectorize/SLPVectorizer.h

Show First 20 Lines • Show All 86 Lines • ▼ Show 20 Lines	private:

/// \brief Vectorize the store instructions collected in Stores.		/// \brief Vectorize the store instructions collected in Stores.
bool vectorizeStoreChains(slpvectorizer::BoUpSLP &R);		bool vectorizeStoreChains(slpvectorizer::BoUpSLP &R);

/// \brief Vectorize the index computations of the getelementptr instructions		/// \brief Vectorize the index computations of the getelementptr instructions
/// collected in GEPs.		/// collected in GEPs.
bool vectorizeGEPIndices(BasicBlock *BB, slpvectorizer::BoUpSLP &R);		bool vectorizeGEPIndices(BasicBlock *BB, slpvectorizer::BoUpSLP &R);

		/// Try to find horizontal reduction or otherwise vectorize a chain of binary
		/// operators.
		bool vectorizeRootInstruction(PHINode P, Value V, BasicBlock *BB,
		slpvectorizer::BoUpSLP &R,
		TargetTransformInfo *TTI);

/// \brief Scan the basic block and look for patterns that are likely to start		/// \brief Scan the basic block and look for patterns that are likely to start
/// a vectorization chain.		/// a vectorization chain.
bool vectorizeChainsInBlock(BasicBlock *BB, slpvectorizer::BoUpSLP &R);		bool vectorizeChainsInBlock(BasicBlock *BB, slpvectorizer::BoUpSLP &R);

bool vectorizeStoreChain(ArrayRef<Value *> Chain, slpvectorizer::BoUpSLP &R,		bool vectorizeStoreChain(ArrayRef<Value *> Chain, slpvectorizer::BoUpSLP &R,
unsigned VecRegSize);		unsigned VecRegSize);

bool vectorizeStores(ArrayRef<StoreInst *> Stores, slpvectorizer::BoUpSLP &R);		bool vectorizeStores(ArrayRef<StoreInst *> Stores, slpvectorizer::BoUpSLP &R);
Show All 10 Lines

llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp

Show First 20 Lines • Show All 3,963 Lines • ▼ Show 20 Lines	bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,

return Changed;		return Changed;
}		}

bool SLPVectorizerPass::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {		bool SLPVectorizerPass::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
if (!V)		if (!V)
return false;		return false;

		Value *P = V->getParent();

		// Vectorize in current basic block only.
		auto *Op0 = dyn_cast<Instruction>(V->getOperand(0));
		auto *Op1 = dyn_cast<Instruction>(V->getOperand(1));
		if (!Op0 \|\| !Op1 \|\| Op0->getParent() != P \|\| Op1->getParent() != P)
		return false;

// Try to vectorize V.		// Try to vectorize V.
if (tryToVectorizePair(V->getOperand(0), V->getOperand(1), R))		if (tryToVectorizePair(Op0, Op1, R))
return true;		return true;

BinaryOperator *A = dyn_cast<BinaryOperator>(V->getOperand(0));		auto *A = dyn_cast<BinaryOperator>(Op0);
BinaryOperator *B = dyn_cast<BinaryOperator>(V->getOperand(1));		auto *B = dyn_cast<BinaryOperator>(Op1);
// Try to skip B.		// Try to skip B.
if (B && B->hasOneUse()) {		if (B && B->hasOneUse()) {
BinaryOperator *B0 = dyn_cast<BinaryOperator>(B->getOperand(0));		auto *B0 = dyn_cast<BinaryOperator>(B->getOperand(0));
BinaryOperator *B1 = dyn_cast<BinaryOperator>(B->getOperand(1));		auto *B1 = dyn_cast<BinaryOperator>(B->getOperand(1));
if (tryToVectorizePair(A, B0, R)) {		if (B0 && B0->getParent() == P && tryToVectorizePair(A, B0, R))
return true;		return true;
}		if (B1 && B1->getParent() == P && tryToVectorizePair(A, B1, R))
if (tryToVectorizePair(A, B1, R)) {
return true;		return true;
}		}
}

// Try to skip A.		// Try to skip A.
if (A && A->hasOneUse()) {		if (A && A->hasOneUse()) {
BinaryOperator *A0 = dyn_cast<BinaryOperator>(A->getOperand(0));		auto *A0 = dyn_cast<BinaryOperator>(A->getOperand(0));
BinaryOperator *A1 = dyn_cast<BinaryOperator>(A->getOperand(1));		auto *A1 = dyn_cast<BinaryOperator>(A->getOperand(1));
if (tryToVectorizePair(A0, B, R)) {		if (A0 && A0->getParent() == P && tryToVectorizePair(A0, B, R))
return true;		return true;
}		if (A1 && A1->getParent() == P && tryToVectorizePair(A1, B, R))
if (tryToVectorizePair(A1, B, R)) {
return true;		return true;
}		}
}		return false;
return 0;
}		}

/// \brief Generate a shuffle mask to be used in a reduction tree.		/// \brief Generate a shuffle mask to be used in a reduction tree.
///		///
/// \param VecLen The length of the vector to be reduced.		/// \param VecLen The length of the vector to be reduced.
/// \param NumEltsToRdx The number of elements that should be reduced in the		/// \param NumEltsToRdx The number of elements that should be reduced in the
/// vector.		/// vector.
/// \param IsPairwise Whether the reduction is a pairwise or splitting		/// \param IsPairwise Whether the reduction is a pairwise or splitting
▲ Show 20 Lines • Show All 434 Lines • ▼ Show 20 Lines	static Value getReductionValue(const DominatorTree DT, PHINode *P,
}		}

if (Rdx && DominatedReduxValue(Rdx))		if (Rdx && DominatedReduxValue(Rdx))
return Rdx;		return Rdx;

return nullptr;		return nullptr;
}		}

		namespace {
		/// Tracks instructons and its children.
		anemetUnsubmitted Not Done Reply Inline Actions Tracks for what? anemet: Tracks for what?
		ABataevAuthorUnsubmitted Not Done Reply Inline Actions Traks if the instruction is deleted (replaced by undef value) /replaced by the new instruction (the vector version, as the result of the whole vectorization process) + tracks the processing of the instruction operands ABataev: Traks if the instruction is deleted (replaced by undef value) /replaced by the new instruction…
		anemetUnsubmitted Not Done Reply Inline Actions Improve the comment then, please. anemet: Improve the comment then, please.
		class WeakVHWithLevel final : public CallbackVH {
		/// Operand index of the instruction currently beeing analized.
		unsigned Level = 0;
		anemetUnsubmitted Not Done Reply Inline Actions OperandIndexAnalyzed? anemet: OperandIndexAnalyzed?
		/// Is this the instruction that should be vectorized, or are we now
		/// processing children (i.e. operands of this instruction) for potential
		/// vectorization?
		bool IsInitial = true;
		anemetUnsubmitted Not Done Reply Inline Actions IsAnalyzingOperands? Or just merge these into a single state variable, Optional<unsigned> which if None means we're processing the instruction itself. anemet: IsAnalyzingOperands? Or just merge these into a single state variable, Optional<unsigned>…

		public:
		explicit WeakVHWithLevel() = default;
		WeakVHWithLevel(Value *V) : CallbackVH(V){};
		/// Restart children analysis each time it is repaced by the new instruction.
		void allUsesReplacedWith(Value *New) override {
		setValPtr(New);
		Level = 0;
		IsInitial = true;
		}
		/// Check if the instruction was not deleted during vectorization.
		bool isValid() const { return !getValPtr(); }
		/// Is the istruction itself must be vectorized?
		bool isInitial() const { return IsInitial; }
		/// Try to vectorize children.
		void clearInitial() { IsInitial = false; }
		/// Are all children processed already?
		bool isFinal() const {
		assert(getValPtr() &&
		(isa<Instruction>(getValPtr()) &&
		cast<Instruction>(getValPtr())->getNumOperands() >= Level));
		return getValPtr() &&
		cast<Instruction>(getValPtr())->getNumOperands() == Level;
		}
		/// Get next child operation.
		Value *nextOperand() {
		assert(getValPtr() && isa<Instruction>(getValPtr()) &&
		cast<Instruction>(getValPtr())->getNumOperands() > Level);
		return cast<Instruction>(getValPtr())->getOperand(Level++);
		}
		virtual ~WeakVHWithLevel() = default;
		};
		} // namespace

/// \brief Attempt to reduce a horizontal reduction.		/// \brief Attempt to reduce a horizontal reduction.
/// If it is legal to match a horizontal reduction feeding		/// If it is legal to match a horizontal reduction feeding
/// the phi node P with reduction operators BI, then check if it		/// the phi node P with reduction operators Root in a basic block BB, then check
/// can be done.		/// if it can be done.
/// \returns true if a horizontal reduction was matched and reduced.		/// \returns true if a horizontal reduction was matched and reduced.
/// \returns false if a horizontal reduction was not matched.		/// \returns false if a horizontal reduction was not matched.
static bool canMatchHorizontalReduction(PHINode P, BinaryOperator BI,		static bool canBeVectorized(
		anemetUnsubmitted Not Done Reply Inline Actions The name of the function and the comment mismatch. What is this function supposed to do? anemet: The name of the function and the comment mismatch. What is this function supposed to do?
		ABataevAuthorUnsubmitted Not Done Reply Inline Actions Yes, probably. This function checks if it is possible to vectorize the tree + performs the vectorization of horizontal reduction or, if the instruction is not the top instruction of the horizontal reduction and this is a binary operation, vectorizes the operands of this binary instruction. ABataev: Yes, probably. This function checks if it is possible to vectorize the tree + performs the…
		anemetUnsubmitted Not Done Reply Inline Actions Then improve the comment please. anemet: Then improve the comment please.
BoUpSLP &R, TargetTransformInfo *TTI,		PHINode P, Instruction Root, BasicBlock *BB, BoUpSLP &R,
unsigned MinRegSize) {		TargetTransformInfo *TTI,
		const function_ref<bool(BinaryOperator *, BoUpSLP &)> Vectorize) {
if (!ShouldVectorizeHor)		if (!ShouldVectorizeHor)
return false;		return false;

HorizontalReduction HorRdx(MinRegSize);		if (!Root)
if (!HorRdx.matchAssociativeReduction(P, BI))
return false;		return false;

		if (Root->getParent() != BB)
		return false;
		SmallVector<WeakVHWithLevel, 8> Stack(1, Root);
		SmallSet<Value *, 8> VisitedInstrs;
		bool Res = false;
		while (!Stack.empty()) {
		Value *V = Stack.back();
		if (!V) {
		Stack.pop_back();
		continue;
		}
		auto *Inst = dyn_cast<Instruction>(V);
		if (!Inst \|\| isa<PHINode>(Inst)) {
		Stack.pop_back();
		continue;
		}
		if (Stack.back().isInitial()) {
		Stack.back().clearInitial();
		if (auto *BI = dyn_cast<BinaryOperator>(Inst)) {
		HorizontalReduction HorRdx(R.getMinVecRegSize());
		if (HorRdx.matchAssociativeReduction(P, BI)) {
// If there is a sufficient number of reduction values, reduce		// If there is a sufficient number of reduction values, reduce
// to a nearby power-of-2. Can safely generate oversized		// to a nearby power-of-2. Can safely generate oversized
// vectors and rely on the backend to split them to legal sizes.		// vectors and rely on the backend to split them to legal sizes.
HorRdx.ReduxWidth =		HorRdx.ReduxWidth =
std::max((uint64_t)4, PowerOf2Floor(HorRdx.numReductionValues()));		std::max((uint64_t)4, PowerOf2Floor(HorRdx.numReductionValues()));

return HorRdx.tryToReduce(R, TTI);		if (HorRdx.tryToReduce(R, TTI)) {
		Res = true;
		P = nullptr;
		continue;
		}
		}
		if (P) {
		Inst = dyn_cast<Instruction>(BI->getOperand(0));
		if (Inst == P)
		Inst = dyn_cast<Instruction>(BI->getOperand(1));
		if (!Inst) {
		P = nullptr;
		continue;
		}
		}
		}
		P = nullptr;
		if (Vectorize(dyn_cast<BinaryOperator>(Inst), R)) {
		Res = true;
		continue;
		}
		}
		if (Stack.back().isFinal()) {
		Stack.pop_back();
		continue;
		}

		if (auto *NextV = dyn_cast<Instruction>(Stack.back().nextOperand()))
		if (NextV->getParent() == BB && VisitedInstrs.insert(NextV).second &&
		Stack.size() < RecursionMaxDepth)
		Stack.push_back(NextV);
		}
		anemetUnsubmitted Not Done Reply Inline Actions This needs a description of the algorithm. anemet: This needs a description of the algorithm.
		ABataevAuthorUnsubmitted Not Done Reply Inline Actions The algorithm is the same as before, just exit criteria become a bit weaker. ABataev: The algorithm is the same as before, just exit criteria become a bit weaker.
		anemetUnsubmitted Not Done Reply Inline Actions There is a non-trivial loop that wasn't there before!? Please explain what it does and what state the various new data structures represent. anemet: There is a non-trivial loop that wasn't there before!? Please explain what it does and what…
		return Res;
		}

		bool SLPVectorizerPass::vectorizeRootInstruction(PHINode P, Value V,
		BasicBlock *BB, BoUpSLP &R,
		TargetTransformInfo *TTI) {
		if (!V)
		return false;
		auto *I = dyn_cast<Instruction>(V);
		if (!I)
		return false;

		if (!isa<BinaryOperator>(I))
		P = nullptr;
		// Try to match and vectorize a horizontal reduction.
		return canBeVectorized(P, I, BB, R, TTI,
		[this](BinaryOperator *BI, BoUpSLP &R) -> bool {
		return tryToVectorize(BI, R);
		});
}		}

bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {		bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
bool Changed = false;		bool Changed = false;
SmallVector<Value *, 4> Incoming;		SmallVector<Value *, 4> Incoming;
SmallSet<Value *, 16> VisitedInstrs;		SmallSet<Value *, 16> VisitedInstrs;

bool HaveVectorizedPhiNodes = true;		bool HaveVectorizedPhiNodes = true;
▲ Show 20 Lines • Show All 53 Lines • ▼ Show 20 Lines	if (isa<DbgInfoIntrinsic>(it))
continue;		continue;

// Try to vectorize reductions that use PHINodes.		// Try to vectorize reductions that use PHINodes.
if (PHINode *P = dyn_cast<PHINode>(it)) {		if (PHINode *P = dyn_cast<PHINode>(it)) {
// Check that the PHI is a reduction PHI.		// Check that the PHI is a reduction PHI.
if (P->getNumIncomingValues() != 2)		if (P->getNumIncomingValues() != 2)
return Changed;		return Changed;

Value *Rdx = getReductionValue(DT, P, BB, LI);

// Check if this is a Binary Operator.
BinaryOperator *BI = dyn_cast_or_null<BinaryOperator>(Rdx);
if (!BI)
continue;

// Try to match and vectorize a horizontal reduction.		// Try to match and vectorize a horizontal reduction.
if (canMatchHorizontalReduction(P, BI, R, TTI, R.getMinVecRegSize())) {		if (vectorizeRootInstruction(P, getReductionValue(DT, P, BB, LI), BB, R,
		TTI)) {
Changed = true;		Changed = true;
it = BB->begin();		it = BB->begin();
e = BB->end();		e = BB->end();
continue;		continue;
}		}

Value *Inst = BI->getOperand(0);
if (Inst == P)
Inst = BI->getOperand(1);

if (tryToVectorize(dyn_cast<BinaryOperator>(Inst), R)) {
// We would like to start over since some instructions are deleted
// and the iterator may become invalid value.
Changed = true;
it = BB->begin();
e = BB->end();
continue;		continue;
}		}

continue;		if (ShouldStartVectorizeHorAtStore) {
}		if (StoreInst *SI = dyn_cast<StoreInst>(it)) {
		// Try to match and vectorize a horizontal reduction.
if (ShouldStartVectorizeHorAtStore)		if (vectorizeRootInstruction(nullptr, SI->getValueOperand(), BB, R,
if (StoreInst *SI = dyn_cast<StoreInst>(it))		TTI)) {
if (BinaryOperator *BinOp =
dyn_cast<BinaryOperator>(SI->getValueOperand())) {
if (canMatchHorizontalReduction(nullptr, BinOp, R, TTI,
R.getMinVecRegSize()) \|\|
tryToVectorize(BinOp, R)) {
Changed = true;		Changed = true;
it = BB->begin();		it = BB->begin();
e = BB->end();		e = BB->end();
continue;		continue;
}		}
}		}
		}

// Try to vectorize horizontal reductions feeding into a return.		// Try to vectorize horizontal reductions feeding into a return.
if (ReturnInst *RI = dyn_cast<ReturnInst>(it))		if (ReturnInst *RI = dyn_cast<ReturnInst>(it)) {
if (RI->getNumOperands() != 0)		if (RI->getNumOperands() != 0) {
if (BinaryOperator *BinOp =		// Try to match and vectorize a horizontal reduction.
dyn_cast<BinaryOperator>(RI->getOperand(0))) {		if (vectorizeRootInstruction(nullptr, RI->getOperand(0), BB, R, TTI)) {
DEBUG(dbgs() << "SLP: Found a return to vectorize.\n");
if (canMatchHorizontalReduction(nullptr, BinOp, R, TTI,
R.getMinVecRegSize()) \|\|
tryToVectorizePair(BinOp->getOperand(0), BinOp->getOperand(1),
R)) {
Changed = true;		Changed = true;
it = BB->begin();		it = BB->begin();
e = BB->end();		e = BB->end();
continue;		continue;
}		}
}		}
		}

// Try to vectorize trees that start at compare instructions.		// Try to vectorize trees that start at compare instructions.
if (CmpInst *CI = dyn_cast<CmpInst>(it)) {		if (CmpInst *CI = dyn_cast<CmpInst>(it)) {
if (tryToVectorizePair(CI->getOperand(0), CI->getOperand(1), R)) {		if (tryToVectorizePair(CI->getOperand(0), CI->getOperand(1), R)) {
Changed = true;		Changed = true;
// We would like to start over since some instructions are deleted		// We would like to start over since some instructions are deleted
// and the iterator may become invalid value.		// and the iterator may become invalid value.
it = BB->begin();		it = BB->begin();
e = BB->end();		e = BB->end();
continue;		continue;
}		}

for (int i = 0; i < 2; ++i) {		for (int I = 0; I < 2; ++I) {
if (BinaryOperator *BI = dyn_cast<BinaryOperator>(CI->getOperand(i))) {		if (vectorizeRootInstruction(nullptr, CI->getOperand(I), BB, R, TTI)) {
if (tryToVectorizePair(BI->getOperand(0), BI->getOperand(1), R)) {
Changed = true;		Changed = true;
// We would like to start over since some instructions are deleted		// We would like to start over since some instructions are deleted
// and the iterator may become invalid value.		// and the iterator may become invalid value.
it = BB->begin();		it = BB->begin();
e = BB->end();		e = BB->end();
break;		break;
}		}
}		}
}
continue;		continue;
}		}

// Try to vectorize trees that start at insertelement instructions.		// Try to vectorize trees that start at insertelement instructions.
if (InsertElementInst *FirstInsertElem = dyn_cast<InsertElementInst>(it)) {		if (InsertElementInst *FirstInsertElem = dyn_cast<InsertElementInst>(it)) {
SmallVector<Value *, 16> BuildVector;		SmallVector<Value *, 16> BuildVector;
SmallVector<Value *, 16> BuildVectorOpds;		SmallVector<Value *, 16> BuildVectorOpds;
if (!findBuildVector(FirstInsertElem, BuildVector, BuildVectorOpds))		if (!findBuildVector(FirstInsertElem, BuildVector, BuildVectorOpds))
▲ Show 20 Lines • Show All 160 Lines • Show Last 20 Lines

llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-list.ll

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py		; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 \| FileCheck %s		; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 \| FileCheck %s

@n = external local_unnamed_addr global i32, align 4		@n = external local_unnamed_addr global i32, align 4
@arr = common local_unnamed_addr global [20 x float] zeroinitializer, align 16		@arr = common local_unnamed_addr global [20 x float] zeroinitializer, align 16
@arr1 = common local_unnamed_addr global [20 x float] zeroinitializer, align 16		@arr1 = common local_unnamed_addr global [20 x float] zeroinitializer, align 16
@res = external local_unnamed_addr global float, align 4		@res = external local_unnamed_addr global float, align 4

define float @baz() {		define float @baz() {
; CHECK-LABEL: @baz(		; CHECK-LABEL: @baz(
; CHECK: [[TMP0:%.]] = load i32, i32 @n, align 4		; CHECK: [[TMP0:%.]] = load i32, i32 @n, align 4
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3		; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float		; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float
; CHECK-NEXT: [[TMP1:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16		; CHECK-NEXT: [[TMP1:%.]] = load <2 x float>, <2 x float> bitcast ([20 x float]* @arr to <2 x float>*), align 16
; CHECK-NEXT: [[TMP2:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16		; CHECK-NEXT: [[TMP2:%.]] = load <2 x float>, <2 x float> bitcast ([20 x float]* @arr1 to <2 x float>*), align 16
; CHECK-NEXT: [[MUL4:%.*]] = fmul fast float [[TMP2]], [[TMP1]]		; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[TMP2]], [[TMP1]]
; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[MUL4]], [[CONV]]		; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0
; CHECK-NEXT: [[TMP3:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4		; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP4]], [[CONV]]
; CHECK-NEXT: [[TMP4:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4		; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1
; CHECK-NEXT: [[MUL4_1:%.*]] = fmul fast float [[TMP4]], [[TMP3]]		; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[TMP5]], [[ADD]]
; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[MUL4_1]], [[ADD]]		; CHECK-NEXT: [[TMP6:%.]] = load <2 x float>, <2 x float> bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2) to <2 x float>*), align 8
; CHECK-NEXT: [[TMP5:%.]] = load <2 x float>, <2 x float> bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2) to <2 x float>*), align 8		; CHECK-NEXT: [[TMP7:%.]] = load <2 x float>, <2 x float> bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2) to <2 x float>*), align 8
; CHECK-NEXT: [[TMP6:%.]] = load <2 x float>, <2 x float> bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2) to <2 x float>*), align 8		; CHECK-NEXT: [[TMP8:%.*]] = fmul fast <2 x float> [[TMP7]], [[TMP6]]
; CHECK-NEXT: [[TMP7:%.*]] = fmul fast <2 x float> [[TMP6]], [[TMP5]]		; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP7]], i32 0		; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float [[TMP9]], [[ADD_1]]
; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float [[TMP8]], [[ADD_1]]		; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP7]], i32 1		; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float [[TMP10]], [[ADD_2]]
; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float [[TMP9]], [[ADD_2]]
; CHECK-NEXT: [[ADD7:%.*]] = fadd fast float [[ADD_3]], [[CONV]]		; CHECK-NEXT: [[ADD7:%.*]] = fadd fast float [[ADD_3]], [[CONV]]
; CHECK-NEXT: [[ADD19:%.*]] = fadd fast float [[MUL4]], [[ADD7]]		; CHECK-NEXT: [[ADD19:%.*]] = fadd fast float [[TMP4]], [[ADD7]]
; CHECK-NEXT: [[ADD19_1:%.*]] = fadd fast float [[MUL4_1]], [[ADD19]]		; CHECK-NEXT: [[ADD19_1:%.*]] = fadd fast float [[TMP5]], [[ADD19]]
; CHECK-NEXT: [[ADD19_2:%.*]] = fadd fast float [[TMP8]], [[ADD19_1]]		; CHECK-NEXT: [[ADD19_2:%.*]] = fadd fast float [[TMP9]], [[ADD19_1]]
; CHECK-NEXT: [[ADD19_3:%.*]] = fadd fast float [[TMP9]], [[ADD19_2]]		; CHECK-NEXT: [[ADD19_3:%.*]] = fadd fast float [[TMP10]], [[ADD19_2]]
; CHECK-NEXT: store float [[ADD19_3]], float* @res, align 4		; CHECK-NEXT: store float [[ADD19_3]], float* @res, align 4
; CHECK-NEXT: ret float [[ADD19_3]]		; CHECK-NEXT: ret float [[ADD19_3]]
;		;
entry:		entry:
%0 = load i32, i32* @n, align 4		%0 = load i32, i32* @n, align 4
%mul = mul nsw i32 %0, 3		%mul = mul nsw i32 %0, 3
%conv = sitofp i32 %mul to float		%conv = sitofp i32 %mul to float
%1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16		%1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16
Show All 21 Lines	entry:
ret float %add19.3		ret float %add19.3
}		}

define float @bazz() {		define float @bazz() {
; CHECK-LABEL: @bazz(		; CHECK-LABEL: @bazz(
; CHECK: [[TMP0:%.]] = load i32, i32 @n, align 4		; CHECK: [[TMP0:%.]] = load i32, i32 @n, align 4
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3		; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float		; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float
; CHECK-NEXT: [[TMP1:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16		; CHECK-NEXT: [[TMP1:%.]] = load <2 x float>, <2 x float> bitcast ([20 x float]* @arr to <2 x float>*), align 16
; CHECK-NEXT: [[TMP2:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16		; CHECK-NEXT: [[TMP2:%.]] = load <2 x float>, <2 x float> bitcast ([20 x float]* @arr1 to <2 x float>*), align 16
; CHECK-NEXT: [[MUL4:%.*]] = fmul fast float [[TMP2]], [[TMP1]]		; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[TMP2]], [[TMP1]]
; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[MUL4]], [[CONV]]		; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0
; CHECK-NEXT: [[TMP3:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4		; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP4]], [[CONV]]
; CHECK-NEXT: [[TMP4:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4		; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1
; CHECK-NEXT: [[MUL4_1:%.*]] = fmul fast float [[TMP4]], [[TMP3]]		; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[TMP5]], [[ADD]]
; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[MUL4_1]], [[ADD]]		; CHECK-NEXT: [[TMP6:%.]] = load <2 x float>, <2 x float> bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2) to <2 x float>*), align 8
; CHECK-NEXT: [[TMP5:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8		; CHECK-NEXT: [[TMP7:%.]] = load <2 x float>, <2 x float> bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2) to <2 x float>*), align 8
; CHECK-NEXT: [[TMP6:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8		; CHECK-NEXT: [[TMP8:%.*]] = fmul fast <2 x float> [[TMP7]], [[TMP6]]
; CHECK-NEXT: [[MUL4_2:%.*]] = fmul fast float [[TMP6]], [[TMP5]]		; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0
; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float [[MUL4_2]], [[ADD_1]]		; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float [[TMP9]], [[ADD_1]]
; CHECK-NEXT: [[TMP7:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4		; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1
; CHECK-NEXT: [[TMP8:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4		; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float [[TMP10]], [[ADD_2]]
; CHECK-NEXT: [[MUL4_3:%.*]] = fmul fast float [[TMP8]], [[TMP7]]
; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float [[MUL4_3]], [[ADD_2]]
; CHECK-NEXT: [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2		; CHECK-NEXT: [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2
; CHECK-NEXT: [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float		; CHECK-NEXT: [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float
; CHECK-NEXT: [[ADD7:%.*]] = fadd fast float [[ADD_3]], [[CONV6]]		; CHECK-NEXT: [[ADD7:%.*]] = fadd fast float [[ADD_3]], [[CONV6]]
; CHECK-NEXT: [[TMP9:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 4), align 16		; CHECK-NEXT: [[TMP11:%.]] = load <2 x float>, <2 x float> bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 4) to <2 x float>*), align 16
; CHECK-NEXT: [[TMP10:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 4), align 16		; CHECK-NEXT: [[TMP12:%.]] = load <2 x float>, <2 x float> bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 4) to <2 x float>*), align 16
; CHECK-NEXT: [[MUL18:%.*]] = fmul fast float [[TMP10]], [[TMP9]]		; CHECK-NEXT: [[TMP13:%.*]] = fmul fast <2 x float> [[TMP12]], [[TMP11]]
; CHECK-NEXT: [[ADD19:%.*]] = fadd fast float [[MUL18]], [[ADD7]]		; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x float> [[TMP13]], i32 0
; CHECK-NEXT: [[TMP11:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 5), align 4		; CHECK-NEXT: [[ADD19:%.*]] = fadd fast float [[TMP14]], [[ADD7]]
; CHECK-NEXT: [[TMP12:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 5), align 4		; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x float> [[TMP13]], i32 1
; CHECK-NEXT: [[MUL18_1:%.*]] = fmul fast float [[TMP12]], [[TMP11]]		; CHECK-NEXT: [[ADD19_1:%.*]] = fadd fast float [[TMP15]], [[ADD19]]
; CHECK-NEXT: [[ADD19_1:%.*]] = fadd fast float [[MUL18_1]], [[ADD19]]		; CHECK-NEXT: [[TMP16:%.]] = load <2 x float>, <2 x float> bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 6) to <2 x float>*), align 8
; CHECK-NEXT: [[TMP13:%.]] = load <2 x float>, <2 x float> bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 6) to <2 x float>*), align 8		; CHECK-NEXT: [[TMP17:%.]] = load <2 x float>, <2 x float> bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 6) to <2 x float>*), align 8
; CHECK-NEXT: [[TMP14:%.]] = load <2 x float>, <2 x float> bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 6) to <2 x float>*), align 8		; CHECK-NEXT: [[TMP18:%.*]] = fmul fast <2 x float> [[TMP17]], [[TMP16]]
; CHECK-NEXT: [[TMP15:%.*]] = fmul fast <2 x float> [[TMP14]], [[TMP13]]		; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x float> [[TMP18]], i32 0
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[TMP15]], i32 0		; CHECK-NEXT: [[ADD19_2:%.*]] = fadd fast float [[TMP19]], [[ADD19_1]]
; CHECK-NEXT: [[ADD19_2:%.*]] = fadd fast float [[TMP16]], [[ADD19_1]]		; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[TMP18]], i32 1
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[TMP15]], i32 1		; CHECK-NEXT: [[ADD19_3:%.*]] = fadd fast float [[TMP20]], [[ADD19_2]]
; CHECK-NEXT: [[ADD19_3:%.*]] = fadd fast float [[TMP17]], [[ADD19_2]]
; CHECK-NEXT: store float [[ADD19_3]], float* @res, align 4		; CHECK-NEXT: store float [[ADD19_3]], float* @res, align 4
; CHECK-NEXT: ret float [[ADD19_3]]		; CHECK-NEXT: ret float [[ADD19_3]]
;		;
entry:		entry:
%0 = load i32, i32* @n, align 4		%0 = load i32, i32* @n, align 4
%mul = mul nsw i32 %0, 3		%mul = mul nsw i32 %0, 3
%conv = sitofp i32 %mul to float		%conv = sitofp i32 %mul to float
%1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16		%1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16
Show All 34 Lines	entry:
store float %add19.3, float* @res, align 4		store float %add19.3, float* @res, align 4
ret float %add19.3		ret float %add19.3
}		}

define float @bazzz() {		define float @bazzz() {
; CHECK-LABEL: @bazzz(		; CHECK-LABEL: @bazzz(
; CHECK: [[TMP0:%.]] = load i32, i32 @n, align 4		; CHECK: [[TMP0:%.]] = load i32, i32 @n, align 4
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float		; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
; CHECK-NEXT: [[TMP1:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16		; CHECK-NEXT: [[TMP1:%.]] = load <4 x float>, <4 x float> bitcast ([20 x float]* @arr to <4 x float>*), align 16
; CHECK-NEXT: [[TMP2:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16		; CHECK-NEXT: [[TMP2:%.]] = load <4 x float>, <4 x float> bitcast ([20 x float]* @arr1 to <4 x float>*), align 16
; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[TMP2]], [[TMP1]]		; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4		; CHECK-NEXT: [[TMP4:%.*]] = fadd fast float undef, undef
; CHECK-NEXT: [[TMP4:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4		; CHECK-NEXT: [[TMP5:%.*]] = fadd fast float undef, [[TMP4]]
; CHECK-NEXT: [[MUL_1:%.*]] = fmul fast float [[TMP4]], [[TMP3]]		; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP5:%.*]] = fadd fast float [[MUL_1]], [[MUL]]		; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]]
; CHECK-NEXT: [[TMP6:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8		; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP7:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8		; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
; CHECK-NEXT: [[MUL_2:%.*]] = fmul fast float [[TMP7]], [[TMP6]]		; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = fadd fast float [[MUL_2]], [[TMP5]]		; CHECK-NEXT: [[TMP7:%.*]] = fadd fast float undef, [[TMP5]]
; CHECK-NEXT: [[TMP9:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4		; CHECK-NEXT: [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]]
; CHECK-NEXT: [[TMP10:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4		; CHECK-NEXT: store float [[TMP8]], float* @res, align 4
; CHECK-NEXT: [[MUL_3:%.*]] = fmul fast float [[TMP10]], [[TMP9]]		; CHECK-NEXT: ret float [[TMP8]]
; CHECK-NEXT: [[TMP11:%.*]] = fadd fast float [[MUL_3]], [[TMP8]]
; CHECK-NEXT: [[TMP12:%.*]] = fmul fast float [[CONV]], [[TMP11]]
; CHECK-NEXT: store float [[TMP12]], float* @res, align 4
; CHECK-NEXT: ret float [[TMP12]]
;		;
entry:		entry:
%0 = load i32, i32* @n, align 4		%0 = load i32, i32* @n, align 4
%conv = sitofp i32 %0 to float		%conv = sitofp i32 %0 to float
%1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16		%1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16
%2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16		%2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16
%mul = fmul fast float %2, %1		%mul = fmul fast float %2, %1
%3 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4		%3 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4
Show All 12 Lines	entry:
store float %12, float* @res, align 4		store float %12, float* @res, align 4
ret float %12		ret float %12
}		}

define i32 @foo() {		define i32 @foo() {
; CHECK-LABEL: @foo(		; CHECK-LABEL: @foo(
; CHECK: [[TMP0:%.]] = load i32, i32 @n, align 4		; CHECK: [[TMP0:%.]] = load i32, i32 @n, align 4
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float		; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
; CHECK-NEXT: [[TMP1:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16		; CHECK-NEXT: [[TMP1:%.]] = load <4 x float>, <4 x float> bitcast ([20 x float]* @arr to <4 x float>*), align 16
; CHECK-NEXT: [[TMP2:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16		; CHECK-NEXT: [[TMP2:%.]] = load <4 x float>, <4 x float> bitcast ([20 x float]* @arr1 to <4 x float>*), align 16
; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[TMP2]], [[TMP1]]		; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4		; CHECK-NEXT: [[TMP4:%.*]] = fadd fast float undef, undef
; CHECK-NEXT: [[TMP4:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4		; CHECK-NEXT: [[TMP5:%.*]] = fadd fast float undef, [[TMP4]]
; CHECK-NEXT: [[MUL_1:%.*]] = fmul fast float [[TMP4]], [[TMP3]]		; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP5:%.*]] = fadd fast float [[MUL_1]], [[MUL]]		; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]]
; CHECK-NEXT: [[TMP6:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8		; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP7:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8		; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
; CHECK-NEXT: [[MUL_2:%.*]] = fmul fast float [[TMP7]], [[TMP6]]		; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = fadd fast float [[MUL_2]], [[TMP5]]		; CHECK-NEXT: [[TMP7:%.*]] = fadd fast float undef, [[TMP5]]
; CHECK-NEXT: [[TMP9:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4		; CHECK-NEXT: [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]]
; CHECK-NEXT: [[TMP10:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4		; CHECK-NEXT: [[CONV4:%.*]] = fptosi float [[TMP8]] to i32
; CHECK-NEXT: [[MUL_3:%.*]] = fmul fast float [[TMP10]], [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = fadd fast float [[MUL_3]], [[TMP8]]
; CHECK-NEXT: [[TMP12:%.*]] = fmul fast float [[CONV]], [[TMP11]]
; CHECK-NEXT: [[CONV4:%.*]] = fptosi float [[TMP12]] to i32
; CHECK-NEXT: store i32 [[CONV4]], i32* @n, align 4		; CHECK-NEXT: store i32 [[CONV4]], i32* @n, align 4
; CHECK-NEXT: ret i32 [[CONV4]]		; CHECK-NEXT: ret i32 [[CONV4]]
;		;
entry:		entry:
%0 = load i32, i32* @n, align 4		%0 = load i32, i32* @n, align 4
%conv = sitofp i32 %0 to float		%conv = sitofp i32 %0 to float
%1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16		%1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16
%2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16		%2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16
Show All 13 Lines	entry:
%12 = fmul fast float %conv, %11		%12 = fmul fast float %conv, %11
%conv4 = fptosi float %12 to i32		%conv4 = fptosi float %12 to i32
store i32 %conv4, i32* @n, align 4		store i32 %conv4, i32* @n, align 4
ret i32 %conv4		ret i32 %conv4
}		}

define float @bar() {		define float @bar() {
; CHECK-LABEL: @bar(		; CHECK-LABEL: @bar(
; CHECK-NEXT: entry:		; CHECK: [[TMP0:%.]] = load <2 x float>, <2 x float> bitcast ([20 x float]* @arr to <2 x float>*), align 16
; CHECK-NEXT: [[TMP0:%.]] = load <2 x float>, <2 x float> bitcast ([20 x float]* @arr to <2 x float>*), align 16
; CHECK-NEXT: [[TMP1:%.]] = load <2 x float>, <2 x float> bitcast ([20 x float]* @arr1 to <2 x float>*), align 16		; CHECK-NEXT: [[TMP1:%.]] = load <2 x float>, <2 x float> bitcast ([20 x float]* @arr1 to <2 x float>*), align 16
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP0]]		; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0		; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1		; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
; CHECK-NEXT: [[CMP4:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]]		; CHECK-NEXT: [[CMP4:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]]
; CHECK-NEXT: [[MAX_0_MUL3:%.*]] = select i1 [[CMP4]], float [[TMP3]], float [[TMP4]]		; CHECK-NEXT: [[MAX_0_MUL3:%.*]] = select i1 [[CMP4]], float [[TMP3]], float [[TMP4]]
; CHECK-NEXT: [[TMP5:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8		; CHECK-NEXT: [[TMP5:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8
; CHECK-NEXT: [[TMP6:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8		; CHECK-NEXT: [[TMP6:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8
Show All 34 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[SLPVectorizer] Improved support of partial tree vectorization.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 79513

llvm/trunk/include/llvm/Transforms/Vectorize/SLPVectorizer.h

llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp

llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-list.ll

This is an archive of the discontinued LLVM Phabricator instance.

[SLPVectorizer] Improved support of partial tree vectorization.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 79513

llvm/trunk/include/llvm/Transforms/Vectorize/SLPVectorizer.h

llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp

llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-list.ll

[SLPVectorizer] Improved support of partial tree vectorization.
ClosedPublic