Diff 86940

llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp

Show First 20 Lines • Show All 4,180 Lines • ▼ Show 20 Lines
/// \ /		/// \ /
/// +		/// +
/// \|		/// \|
/// *p =		/// *p =
///		///
class HorizontalReduction {		class HorizontalReduction {
SmallVector<Value *, 16> ReductionOps;		SmallVector<Value *, 16> ReductionOps;
SmallVector<Value *, 32> ReducedVals;		SmallVector<Value *, 32> ReducedVals;
		// Use map vector to make stable output.
		MapVector<Value , Value > ExtraArgs;

BinaryOperator *ReductionRoot = nullptr;		BinaryOperator *ReductionRoot = nullptr;
// After successfull horizontal reduction vectorization attempt for PHI node		// After successfull horizontal reduction vectorization attempt for PHI node
// vectorizer tries to update root binary op by combining vectorized tree and		// vectorizer tries to update root binary op by combining vectorized tree and
// the ReductionPHI node. But during vectorization this ReductionPHI can be		// the ReductionPHI node. But during vectorization this ReductionPHI can be
// vectorized itself and replaced by the undef value, while the instruction		// vectorized itself and replaced by the undef value, while the instruction
// itself is marked for deletion. This 'marked for deletion' PHI node then can		// itself is marked for deletion. This 'marked for deletion' PHI node then can
// be used in new binary operation, causing "Use still stuck around after Def		// be used in new binary operation, causing "Use still stuck around after Def
// is destroyed" crash upon PHI node deletion.		// is destroyed" crash upon PHI node deletion.
WeakVH ReductionPHI;		WeakVH ReductionPHI;

/// The opcode of the reduction.		/// The opcode of the reduction.
Instruction::BinaryOps ReductionOpcode = Instruction::BinaryOpsEnd;		Instruction::BinaryOps ReductionOpcode = Instruction::BinaryOpsEnd;
/// The opcode of the values we perform a reduction on.		/// The opcode of the values we perform a reduction on.
unsigned ReducedValueOpcode = 0;		unsigned ReducedValueOpcode = 0;
/// Should we model this reduction as a pairwise reduction tree or a tree that		/// Should we model this reduction as a pairwise reduction tree or a tree that
/// splits the vector in halves and adds those halves.		/// splits the vector in halves and adds those halves.
bool IsPairwiseReduction = false;		bool IsPairwiseReduction = false;

		/// Checks if the ParentStackElem.first should be marked as a reduction
		/// operation with an extra argument or as extra argument itself.
		void markExtraArg(std::pair<Instruction *, unsigned> &ParentStackElem,
		Value *ExtraArg) {
		if (ExtraArgs.count(ParentStackElem.first)) {
		ExtraArgs[ParentStackElem.first] = nullptr;
		// We ran into something like:
		// ParentStackElem.first = ExtraArgs[ParentStackElem.first] + ExtraArg.
		// The whole ParentStackElem.first should be considered as an extra value
		// in this case.
		// Do not perform analysis of remaining operands of ParentStackElem.first
		// instruction, this whole instruction is an extra argument.
		ParentStackElem.second = ParentStackElem.first->getNumOperands();
		} else {
		// We ran into something like:
		// ParentStackElem.first += ... + ExtraArg + ...
		ExtraArgs[ParentStackElem.first] = ExtraArg;
		}
		}

public:		public:
HorizontalReduction() = default;		HorizontalReduction() = default;

/// \brief Try to find a reduction tree.		/// \brief Try to find a reduction tree.
bool matchAssociativeReduction(PHINode Phi, BinaryOperator B) {		bool matchAssociativeReduction(PHINode Phi, BinaryOperator B) {
assert((!Phi \|\| is_contained(Phi->operands(), B)) &&		assert((!Phi \|\| is_contained(Phi->operands(), B)) &&
"Thi phi needs to use the binary operator");		"Thi phi needs to use the binary operator");

Show All 36 Lines	while (!Stack.empty()) {
Instruction *TreeN = Stack.back().first;		Instruction *TreeN = Stack.back().first;
unsigned EdgeToVist = Stack.back().second++;		unsigned EdgeToVist = Stack.back().second++;
bool IsReducedValue = TreeN->getOpcode() != ReductionOpcode;		bool IsReducedValue = TreeN->getOpcode() != ReductionOpcode;

// Postorder vist.		// Postorder vist.
if (EdgeToVist == 2 \|\| IsReducedValue) {		if (EdgeToVist == 2 \|\| IsReducedValue) {
if (IsReducedValue)		if (IsReducedValue)
ReducedVals.push_back(TreeN);		ReducedVals.push_back(TreeN);
else		else {
		auto I = ExtraArgs.find(TreeN);
		if (I != ExtraArgs.end() && !I->second) {
		// Check if TreeN is an extra argument of its parent operation.
		if (Stack.size() <= 1) {
		// TreeN can't be an extra argument as it is a root reduction
		// operation.
		return false;
		}
		// Yes, TreeN is an extra argument, do not add it to a list of
		// reduction operations.
		// Stack[Stack.size() - 2] always points to the parent operation.
		markExtraArg(Stack[Stack.size() - 2], TreeN);
		ExtraArgs.erase(TreeN);
		} else
ReductionOps.push_back(TreeN);		ReductionOps.push_back(TreeN);
		}
// Retract.		// Retract.
Stack.pop_back();		Stack.pop_back();
continue;		continue;
}		}

// Visit left or right.		// Visit left or right.
Value *NextV = TreeN->getOperand(EdgeToVist);		Value *NextV = TreeN->getOperand(EdgeToVist);
if (NextV != Phi) {		if (NextV != Phi) {
auto *I = dyn_cast<Instruction>(NextV);		auto *I = dyn_cast<Instruction>(NextV);
// Continue analysis if the next operand is a reduction operation or		// Continue analysis if the next operand is a reduction operation or
// (possibly) a reduced value. If the reduced value opcode is not set,		// (possibly) a reduced value. If the reduced value opcode is not set,
// the first met operation != reduction operation is considered as the		// the first met operation != reduction operation is considered as the
// reduced value class.		// reduced value class.
if (I && (!ReducedValueOpcode \|\| I->getOpcode() == ReducedValueOpcode \|\|		if (I && (!ReducedValueOpcode \|\| I->getOpcode() == ReducedValueOpcode \|\|
I->getOpcode() == ReductionOpcode)) {		I->getOpcode() == ReductionOpcode)) {
// Only handle trees in the current basic block.		// Only handle trees in the current basic block.
if (I->getParent() != B->getParent())		if (I->getParent() != B->getParent()) {
return false;		// I is an extra argument for TreeN (its parent operation).
		markExtraArg(Stack.back(), I);
		continue;
		}

// Each tree node needs to have one user except for the ultimate		// Each tree node needs to have one user except for the ultimate
// reduction.		// reduction.
if (!I->hasOneUse() && I != B)		if (!I->hasOneUse() && I != B) {
return false;		// I is an extra argument for TreeN (its parent operation).
		markExtraArg(Stack.back(), I);
		continue;
		}

if (I->getOpcode() == ReductionOpcode) {		if (I->getOpcode() == ReductionOpcode) {
// We need to be able to reassociate the reduction operations.		// We need to be able to reassociate the reduction operations.
if (!I->isAssociative())		if (!I->isAssociative()) {
return false;		// I is an extra argument for TreeN (its parent operation).
		markExtraArg(Stack.back(), I);
		continue;
		}
} else if (ReducedValueOpcode &&		} else if (ReducedValueOpcode &&
ReducedValueOpcode != I->getOpcode()) {		ReducedValueOpcode != I->getOpcode()) {
// Make sure that the opcodes of the operations that we are going to		// Make sure that the opcodes of the operations that we are going to
// reduce match.		// reduce match.
return false;		// I is an extra argument for TreeN (its parent operation).
		markExtraArg(Stack.back(), I);
		continue;
} else if (!ReducedValueOpcode)		} else if (!ReducedValueOpcode)
ReducedValueOpcode = I->getOpcode();		ReducedValueOpcode = I->getOpcode();

Stack.push_back(std::make_pair(I, 0));		Stack.push_back(std::make_pair(I, 0));
continue;		continue;
}		}
return false;		// NextV is an extra argument for TreeN (its parent operation).
		markExtraArg(Stack.back(), NextV);
}		}
}		}
return true;		return true;
}		}

/// \brief Attempt to vectorize the tree found by		/// \brief Attempt to vectorize the tree found by
/// matchAssociativeReduction.		/// matchAssociativeReduction.
bool tryToReduce(BoUpSLP &V, TargetTransformInfo *TTI) {		bool tryToReduce(BoUpSLP &V, TargetTransformInfo *TTI) {
▲ Show 20 Lines • Show All 52 Lines • ▼ Show 20 Lines	while (i < NumReducedVals - ReduxWidth + 1 && ReduxWidth > 2) {
VectorizedTree = ReducedSubTree;		VectorizedTree = ReducedSubTree;
i += ReduxWidth;		i += ReduxWidth;
ReduxWidth = PowerOf2Floor(NumReducedVals - i);		ReduxWidth = PowerOf2Floor(NumReducedVals - i);
}		}

if (VectorizedTree) {		if (VectorizedTree) {
// Finish the reduction.		// Finish the reduction.
for (; i < NumReducedVals; ++i) {		for (; i < NumReducedVals; ++i) {
		auto *I = cast<Instruction>(ReducedVals[i]);
		Builder.SetCurrentDebugLocation(I->getDebugLoc());
		VectorizedTree =
		Builder.CreateBinOp(ReductionOpcode, VectorizedTree, I);
		}
		for (auto &Pair : ExtraArgs) {
Builder.SetCurrentDebugLocation(		Builder.SetCurrentDebugLocation(
cast<Instruction>(ReducedVals[i])->getDebugLoc());		cast<Instruction>(Pair.first)->getDebugLoc());
VectorizedTree = Builder.CreateBinOp(ReductionOpcode, VectorizedTree,		VectorizedTree = Builder.CreateBinOp(ReductionOpcode, VectorizedTree,
ReducedVals[i]);		Pair.second, "bin.extra");
}		}
// Update users.		// Update users.
if (ReductionPHI && !isa<UndefValue>(ReductionPHI)) {		if (ReductionPHI && !isa<UndefValue>(ReductionPHI)) {
assert(ReductionRoot && "Need a reduction operation");		assert(ReductionRoot && "Need a reduction operation");
ReductionRoot->setOperand(0, VectorizedTree);		ReductionRoot->setOperand(0, VectorizedTree);
ReductionRoot->setOperand(1, ReductionPHI);		ReductionRoot->setOperand(1, ReductionPHI);
} else		} else
ReductionRoot->replaceAllUsesWith(VectorizedTree);		ReductionRoot->replaceAllUsesWith(VectorizedTree);
▲ Show 20 Lines • Show All 609 Lines • Show Last 20 Lines

llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-list.ll

Show First 20 Lines • Show All 63 Lines • ▼ Show 20 Lines
}		}

define float @bazz() {		define float @bazz() {
; CHECK-LABEL: @bazz(		; CHECK-LABEL: @bazz(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.]] = load i32, i32 @n, align 4		; CHECK-NEXT: [[TMP0:%.]] = load i32, i32 @n, align 4
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3		; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float		; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float
; CHECK-NEXT: [[TMP1:%.]] = load <2 x float>, <2 x float> bitcast ([20 x float]* @arr to <2 x float>*), align 16		; CHECK-NEXT: [[TMP1:%.]] = load <8 x float>, <8 x float> bitcast ([20 x float]* @arr to <8 x float>*), align 16
; CHECK-NEXT: [[TMP2:%.]] = load <2 x float>, <2 x float> bitcast ([20 x float]* @arr1 to <2 x float>*), align 16		; CHECK-NEXT: [[TMP2:%.]] = load <8 x float>, <8 x float> bitcast ([20 x float]* @arr1 to <8 x float>*), align 16
; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[TMP2]], [[TMP1]]		; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]], [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0		; CHECK-NEXT: [[ADD:%.*]] = fadd fast float undef, [[CONV]]
; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP4]], [[CONV]]		; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]]
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1		; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[TMP5]], [[ADD]]		; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
; CHECK-NEXT: [[TMP6:%.]] = load <2 x float>, <2 x float> bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2) to <2 x float>*), align 8
; CHECK-NEXT: [[TMP7:%.]] = load <2 x float>, <2 x float> bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2) to <2 x float>*), align 8
; CHECK-NEXT: [[TMP8:%.*]] = fmul fast <2 x float> [[TMP7]], [[TMP6]]
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0
; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float [[TMP9]], [[ADD_1]]
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1
; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float [[TMP10]], [[ADD_2]]
; CHECK-NEXT: [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2		; CHECK-NEXT: [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2
; CHECK-NEXT: [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float		; CHECK-NEXT: [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float
; CHECK-NEXT: [[ADD7:%.*]] = fadd fast float [[ADD_3]], [[CONV6]]		; CHECK-NEXT: [[ADD7:%.*]] = fadd fast float [[ADD_3]], [[CONV6]]
; CHECK-NEXT: [[TMP11:%.]] = load <2 x float>, <2 x float> bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 4) to <2 x float>*), align 16		; CHECK-NEXT: [[ADD19:%.*]] = fadd fast float undef, [[ADD7]]
; CHECK-NEXT: [[TMP12:%.]] = load <2 x float>, <2 x float> bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 4) to <2 x float>*), align 16		; CHECK-NEXT: [[ADD19_1:%.*]] = fadd fast float undef, [[ADD19]]
; CHECK-NEXT: [[TMP13:%.*]] = fmul fast <2 x float> [[TMP12]], [[TMP11]]		; CHECK-NEXT: [[ADD19_2:%.*]] = fadd fast float undef, [[ADD19_1]]
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x float> [[TMP13]], i32 0		; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[ADD19:%.*]] = fadd fast float [[TMP14]], [[ADD7]]		; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP3]], [[RDX_SHUF]]
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x float> [[TMP13]], i32 1		; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[ADD19_1:%.*]] = fadd fast float [[TMP15]], [[ADD19]]		; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <8 x float> [[BIN_RDX]], [[RDX_SHUF1]]
; CHECK-NEXT: [[TMP16:%.]] = load <2 x float>, <2 x float> bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 6) to <2 x float>*), align 8		; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x float> [[BIN_RDX2]], <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP17:%.]] = load <2 x float>, <2 x float> bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 6) to <2 x float>*), align 8		; CHECK-NEXT: [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]]
; CHECK-NEXT: [[TMP18:%.*]] = fmul fast <2 x float> [[TMP17]], [[TMP16]]		; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x float> [[TMP18]], i32 0		; CHECK-NEXT: [[BIN_EXTRA:%.*]] = fadd fast float [[TMP4]], [[CONV]]
; CHECK-NEXT: [[ADD19_2:%.*]] = fadd fast float [[TMP19]], [[ADD19_1]]		; CHECK-NEXT: [[BIN_EXTRA5:%.*]] = fadd fast float [[BIN_EXTRA]], [[CONV6]]
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[TMP18]], i32 1		; CHECK-NEXT: [[ADD19_3:%.*]] = fadd fast float undef, [[ADD19_2]]
; CHECK-NEXT: [[ADD19_3:%.*]] = fadd fast float [[TMP20]], [[ADD19_2]]		; CHECK-NEXT: store float [[BIN_EXTRA5]], float* @res, align 4
; CHECK-NEXT: store float [[ADD19_3]], float* @res, align 4		; CHECK-NEXT: ret float [[BIN_EXTRA5]]
; CHECK-NEXT: ret float [[ADD19_3]]
;		;
entry:		entry:
%0 = load i32, i32* @n, align 4		%0 = load i32, i32* @n, align 4
%mul = mul nsw i32 %0, 3		%mul = mul nsw i32 %0, 3
%conv = sitofp i32 %mul to float		%conv = sitofp i32 %mul to float
%1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16		%1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16
%2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16		%2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16
%mul4 = fmul fast float %2, %1		%mul4 = fmul fast float %2, %1
▲ Show 20 Lines • Show All 437 Lines • ▼ Show 20 Lines	;
ret float %add.47		ret float %add.47
}		}

define float @f1(float* nocapture readonly %x, i32 %a, i32 %b) {		define float @f1(float* nocapture readonly %x, i32 %a, i32 %b) {
; CHECK-LABEL: @f1(		; CHECK-LABEL: @f1(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[REM:%.]] = srem i32 [[A:%.]], [[B:%.*]]		; CHECK-NEXT: [[REM:%.]] = srem i32 [[A:%.]], [[B:%.*]]
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[REM]] to float		; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[REM]] to float
; CHECK-NEXT: [[TMP0:%.]] = load float, float [[X:%.*]], align 4		; CHECK-NEXT: [[ARRAYIDX_1:%.]] = getelementptr inbounds float, float [[X:%.*]], i64 1
; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP0]], [[CONV]]
; CHECK-NEXT: [[ARRAYIDX_1:%.]] = getelementptr inbounds float, float [[X]], i64 1
; CHECK-NEXT: [[TMP1:%.]] = load float, float [[ARRAYIDX_1]], align 4
; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[TMP1]], [[ADD]]
; CHECK-NEXT: [[ARRAYIDX_2:%.]] = getelementptr inbounds float, float [[X]], i64 2		; CHECK-NEXT: [[ARRAYIDX_2:%.]] = getelementptr inbounds float, float [[X]], i64 2
; CHECK-NEXT: [[TMP2:%.]] = load float, float [[ARRAYIDX_2]], align 4
; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float [[TMP2]], [[ADD_1]]
; CHECK-NEXT: [[ARRAYIDX_3:%.]] = getelementptr inbounds float, float [[X]], i64 3		; CHECK-NEXT: [[ARRAYIDX_3:%.]] = getelementptr inbounds float, float [[X]], i64 3
; CHECK-NEXT: [[TMP3:%.]] = load float, float [[ARRAYIDX_3]], align 4
; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float [[TMP3]], [[ADD_2]]
; CHECK-NEXT: [[ARRAYIDX_4:%.]] = getelementptr inbounds float, float [[X]], i64 4		; CHECK-NEXT: [[ARRAYIDX_4:%.]] = getelementptr inbounds float, float [[X]], i64 4
; CHECK-NEXT: [[TMP4:%.]] = load float, float [[ARRAYIDX_4]], align 4
; CHECK-NEXT: [[ADD_4:%.*]] = fadd fast float [[TMP4]], [[ADD_3]]
; CHECK-NEXT: [[ARRAYIDX_5:%.]] = getelementptr inbounds float, float [[X]], i64 5		; CHECK-NEXT: [[ARRAYIDX_5:%.]] = getelementptr inbounds float, float [[X]], i64 5
; CHECK-NEXT: [[TMP5:%.]] = load float, float [[ARRAYIDX_5]], align 4
; CHECK-NEXT: [[ADD_5:%.*]] = fadd fast float [[TMP5]], [[ADD_4]]
; CHECK-NEXT: [[ARRAYIDX_6:%.]] = getelementptr inbounds float, float [[X]], i64 6		; CHECK-NEXT: [[ARRAYIDX_6:%.]] = getelementptr inbounds float, float [[X]], i64 6
; CHECK-NEXT: [[TMP6:%.]] = load float, float [[ARRAYIDX_6]], align 4
; CHECK-NEXT: [[ADD_6:%.*]] = fadd fast float [[TMP6]], [[ADD_5]]
; CHECK-NEXT: [[ARRAYIDX_7:%.]] = getelementptr inbounds float, float [[X]], i64 7		; CHECK-NEXT: [[ARRAYIDX_7:%.]] = getelementptr inbounds float, float [[X]], i64 7
; CHECK-NEXT: [[TMP7:%.]] = load float, float [[ARRAYIDX_7]], align 4
; CHECK-NEXT: [[ADD_7:%.*]] = fadd fast float [[TMP7]], [[ADD_6]]
; CHECK-NEXT: [[ARRAYIDX_8:%.]] = getelementptr inbounds float, float [[X]], i64 8		; CHECK-NEXT: [[ARRAYIDX_8:%.]] = getelementptr inbounds float, float [[X]], i64 8
; CHECK-NEXT: [[TMP8:%.]] = load float, float [[ARRAYIDX_8]], align 4
; CHECK-NEXT: [[ADD_8:%.*]] = fadd fast float [[TMP8]], [[ADD_7]]
; CHECK-NEXT: [[ARRAYIDX_9:%.]] = getelementptr inbounds float, float [[X]], i64 9		; CHECK-NEXT: [[ARRAYIDX_9:%.]] = getelementptr inbounds float, float [[X]], i64 9
; CHECK-NEXT: [[TMP9:%.]] = load float, float [[ARRAYIDX_9]], align 4
; CHECK-NEXT: [[ADD_9:%.*]] = fadd fast float [[TMP9]], [[ADD_8]]
; CHECK-NEXT: [[ARRAYIDX_10:%.]] = getelementptr inbounds float, float [[X]], i64 10		; CHECK-NEXT: [[ARRAYIDX_10:%.]] = getelementptr inbounds float, float [[X]], i64 10
; CHECK-NEXT: [[TMP10:%.]] = load float, float [[ARRAYIDX_10]], align 4
; CHECK-NEXT: [[ADD_10:%.*]] = fadd fast float [[TMP10]], [[ADD_9]]
; CHECK-NEXT: [[ARRAYIDX_11:%.]] = getelementptr inbounds float, float [[X]], i64 11		; CHECK-NEXT: [[ARRAYIDX_11:%.]] = getelementptr inbounds float, float [[X]], i64 11
; CHECK-NEXT: [[TMP11:%.]] = load float, float [[ARRAYIDX_11]], align 4
; CHECK-NEXT: [[ADD_11:%.*]] = fadd fast float [[TMP11]], [[ADD_10]]
; CHECK-NEXT: [[ARRAYIDX_12:%.]] = getelementptr inbounds float, float [[X]], i64 12		; CHECK-NEXT: [[ARRAYIDX_12:%.]] = getelementptr inbounds float, float [[X]], i64 12
; CHECK-NEXT: [[TMP12:%.]] = load float, float [[ARRAYIDX_12]], align 4
; CHECK-NEXT: [[ADD_12:%.*]] = fadd fast float [[TMP12]], [[ADD_11]]
; CHECK-NEXT: [[ARRAYIDX_13:%.]] = getelementptr inbounds float, float [[X]], i64 13		; CHECK-NEXT: [[ARRAYIDX_13:%.]] = getelementptr inbounds float, float [[X]], i64 13
; CHECK-NEXT: [[TMP13:%.]] = load float, float [[ARRAYIDX_13]], align 4
; CHECK-NEXT: [[ADD_13:%.*]] = fadd fast float [[TMP13]], [[ADD_12]]
; CHECK-NEXT: [[ARRAYIDX_14:%.]] = getelementptr inbounds float, float [[X]], i64 14		; CHECK-NEXT: [[ARRAYIDX_14:%.]] = getelementptr inbounds float, float [[X]], i64 14
; CHECK-NEXT: [[TMP14:%.]] = load float, float [[ARRAYIDX_14]], align 4
; CHECK-NEXT: [[ADD_14:%.*]] = fadd fast float [[TMP14]], [[ADD_13]]
; CHECK-NEXT: [[ARRAYIDX_15:%.]] = getelementptr inbounds float, float [[X]], i64 15		; CHECK-NEXT: [[ARRAYIDX_15:%.]] = getelementptr inbounds float, float [[X]], i64 15
; CHECK-NEXT: [[TMP15:%.]] = load float, float [[ARRAYIDX_15]], align 4
; CHECK-NEXT: [[ADD_15:%.*]] = fadd fast float [[TMP15]], [[ADD_14]]
; CHECK-NEXT: [[ARRAYIDX_16:%.]] = getelementptr inbounds float, float [[X]], i64 16		; CHECK-NEXT: [[ARRAYIDX_16:%.]] = getelementptr inbounds float, float [[X]], i64 16
; CHECK-NEXT: [[TMP16:%.]] = load float, float [[ARRAYIDX_16]], align 4
; CHECK-NEXT: [[ADD_16:%.*]] = fadd fast float [[TMP16]], [[ADD_15]]
; CHECK-NEXT: [[ARRAYIDX_17:%.]] = getelementptr inbounds float, float [[X]], i64 17		; CHECK-NEXT: [[ARRAYIDX_17:%.]] = getelementptr inbounds float, float [[X]], i64 17
; CHECK-NEXT: [[TMP17:%.]] = load float, float [[ARRAYIDX_17]], align 4
; CHECK-NEXT: [[ADD_17:%.*]] = fadd fast float [[TMP17]], [[ADD_16]]
; CHECK-NEXT: [[ARRAYIDX_18:%.]] = getelementptr inbounds float, float [[X]], i64 18		; CHECK-NEXT: [[ARRAYIDX_18:%.]] = getelementptr inbounds float, float [[X]], i64 18
; CHECK-NEXT: [[TMP18:%.]] = load float, float [[ARRAYIDX_18]], align 4
; CHECK-NEXT: [[ADD_18:%.*]] = fadd fast float [[TMP18]], [[ADD_17]]
; CHECK-NEXT: [[ARRAYIDX_19:%.]] = getelementptr inbounds float, float [[X]], i64 19		; CHECK-NEXT: [[ARRAYIDX_19:%.]] = getelementptr inbounds float, float [[X]], i64 19
; CHECK-NEXT: [[TMP19:%.]] = load float, float [[ARRAYIDX_19]], align 4
; CHECK-NEXT: [[ADD_19:%.*]] = fadd fast float [[TMP19]], [[ADD_18]]
; CHECK-NEXT: [[ARRAYIDX_20:%.]] = getelementptr inbounds float, float [[X]], i64 20		; CHECK-NEXT: [[ARRAYIDX_20:%.]] = getelementptr inbounds float, float [[X]], i64 20
; CHECK-NEXT: [[TMP20:%.]] = load float, float [[ARRAYIDX_20]], align 4
; CHECK-NEXT: [[ADD_20:%.*]] = fadd fast float [[TMP20]], [[ADD_19]]
; CHECK-NEXT: [[ARRAYIDX_21:%.]] = getelementptr inbounds float, float [[X]], i64 21		; CHECK-NEXT: [[ARRAYIDX_21:%.]] = getelementptr inbounds float, float [[X]], i64 21
; CHECK-NEXT: [[TMP21:%.]] = load float, float [[ARRAYIDX_21]], align 4
; CHECK-NEXT: [[ADD_21:%.*]] = fadd fast float [[TMP21]], [[ADD_20]]
; CHECK-NEXT: [[ARRAYIDX_22:%.]] = getelementptr inbounds float, float [[X]], i64 22		; CHECK-NEXT: [[ARRAYIDX_22:%.]] = getelementptr inbounds float, float [[X]], i64 22
; CHECK-NEXT: [[TMP22:%.]] = load float, float [[ARRAYIDX_22]], align 4
; CHECK-NEXT: [[ADD_22:%.*]] = fadd fast float [[TMP22]], [[ADD_21]]
; CHECK-NEXT: [[ARRAYIDX_23:%.]] = getelementptr inbounds float, float [[X]], i64 23		; CHECK-NEXT: [[ARRAYIDX_23:%.]] = getelementptr inbounds float, float [[X]], i64 23
; CHECK-NEXT: [[TMP23:%.]] = load float, float [[ARRAYIDX_23]], align 4
; CHECK-NEXT: [[ADD_23:%.*]] = fadd fast float [[TMP23]], [[ADD_22]]
; CHECK-NEXT: [[ARRAYIDX_24:%.]] = getelementptr inbounds float, float [[X]], i64 24		; CHECK-NEXT: [[ARRAYIDX_24:%.]] = getelementptr inbounds float, float [[X]], i64 24
; CHECK-NEXT: [[TMP24:%.]] = load float, float [[ARRAYIDX_24]], align 4
; CHECK-NEXT: [[ADD_24:%.*]] = fadd fast float [[TMP24]], [[ADD_23]]
; CHECK-NEXT: [[ARRAYIDX_25:%.]] = getelementptr inbounds float, float [[X]], i64 25		; CHECK-NEXT: [[ARRAYIDX_25:%.]] = getelementptr inbounds float, float [[X]], i64 25
; CHECK-NEXT: [[TMP25:%.]] = load float, float [[ARRAYIDX_25]], align 4
; CHECK-NEXT: [[ADD_25:%.*]] = fadd fast float [[TMP25]], [[ADD_24]]
; CHECK-NEXT: [[ARRAYIDX_26:%.]] = getelementptr inbounds float, float [[X]], i64 26		; CHECK-NEXT: [[ARRAYIDX_26:%.]] = getelementptr inbounds float, float [[X]], i64 26
; CHECK-NEXT: [[TMP26:%.]] = load float, float [[ARRAYIDX_26]], align 4
; CHECK-NEXT: [[ADD_26:%.*]] = fadd fast float [[TMP26]], [[ADD_25]]
; CHECK-NEXT: [[ARRAYIDX_27:%.]] = getelementptr inbounds float, float [[X]], i64 27		; CHECK-NEXT: [[ARRAYIDX_27:%.]] = getelementptr inbounds float, float [[X]], i64 27
; CHECK-NEXT: [[TMP27:%.]] = load float, float [[ARRAYIDX_27]], align 4
; CHECK-NEXT: [[ADD_27:%.*]] = fadd fast float [[TMP27]], [[ADD_26]]
; CHECK-NEXT: [[ARRAYIDX_28:%.]] = getelementptr inbounds float, float [[X]], i64 28		; CHECK-NEXT: [[ARRAYIDX_28:%.]] = getelementptr inbounds float, float [[X]], i64 28
; CHECK-NEXT: [[TMP28:%.]] = load float, float [[ARRAYIDX_28]], align 4
; CHECK-NEXT: [[ADD_28:%.*]] = fadd fast float [[TMP28]], [[ADD_27]]
; CHECK-NEXT: [[ARRAYIDX_29:%.]] = getelementptr inbounds float, float [[X]], i64 29		; CHECK-NEXT: [[ARRAYIDX_29:%.]] = getelementptr inbounds float, float [[X]], i64 29
; CHECK-NEXT: [[TMP29:%.]] = load float, float [[ARRAYIDX_29]], align 4
; CHECK-NEXT: [[ADD_29:%.*]] = fadd fast float [[TMP29]], [[ADD_28]]
; CHECK-NEXT: [[ARRAYIDX_30:%.]] = getelementptr inbounds float, float [[X]], i64 30		; CHECK-NEXT: [[ARRAYIDX_30:%.]] = getelementptr inbounds float, float [[X]], i64 30
; CHECK-NEXT: [[TMP30:%.]] = load float, float [[ARRAYIDX_30]], align 4
; CHECK-NEXT: [[ADD_30:%.*]] = fadd fast float [[TMP30]], [[ADD_29]]
; CHECK-NEXT: [[ARRAYIDX_31:%.]] = getelementptr inbounds float, float [[X]], i64 31		; CHECK-NEXT: [[ARRAYIDX_31:%.]] = getelementptr inbounds float, float [[X]], i64 31
; CHECK-NEXT: [[TMP31:%.]] = load float, float [[ARRAYIDX_31]], align 4		; CHECK-NEXT: [[TMP0:%.]] = bitcast float [[X]] to <32 x float>*
; CHECK-NEXT: [[ADD_31:%.*]] = fadd fast float [[TMP31]], [[ADD_30]]		; CHECK-NEXT: [[TMP1:%.]] = load <32 x float>, <32 x float> [[TMP0]], align 4
; CHECK-NEXT: ret float [[ADD_31]]		; CHECK-NEXT: [[ADD:%.*]] = fadd fast float undef, [[CONV]]
		; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]]
		; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
		; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
		; CHECK-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
		; CHECK-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
		; CHECK-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
		; CHECK-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]]
		; CHECK-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]]
		; CHECK-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]]
		; CHECK-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]]
		; CHECK-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]]
		; CHECK-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]]
		; CHECK-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]]
		; CHECK-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]]
		; CHECK-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]]
		; CHECK-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]]
		; CHECK-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]]
		; CHECK-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]]
		; CHECK-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]]
		; CHECK-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]]
		; CHECK-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]]
		; CHECK-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]]
		; CHECK-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]]
		; CHECK-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]]
		; CHECK-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]]
		; CHECK-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]]
		; CHECK-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]]
		; CHECK-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]]
		; CHECK-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]]
		; CHECK-NEXT: [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]]
		; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP1]], <32 x float> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
		; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP1]], [[RDX_SHUF]]
		; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[BIN_RDX]], <32 x float> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
		; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <32 x float> [[BIN_RDX]], [[RDX_SHUF1]]
		; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <32 x float> [[BIN_RDX2]], <32 x float> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
		; CHECK-NEXT: [[BIN_RDX4:%.*]] = fadd fast <32 x float> [[BIN_RDX2]], [[RDX_SHUF3]]
		; CHECK-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <32 x float> [[BIN_RDX4]], <32 x float> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
		; CHECK-NEXT: [[BIN_RDX6:%.*]] = fadd fast <32 x float> [[BIN_RDX4]], [[RDX_SHUF5]]
		; CHECK-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <32 x float> [[BIN_RDX6]], <32 x float> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
		; CHECK-NEXT: [[BIN_RDX8:%.*]] = fadd fast <32 x float> [[BIN_RDX6]], [[RDX_SHUF7]]
		; CHECK-NEXT: [[TMP2:%.*]] = extractelement <32 x float> [[BIN_RDX8]], i32 0
		; CHECK-NEXT: [[BIN_EXTRA:%.*]] = fadd fast float [[TMP2]], [[CONV]]
		; CHECK-NEXT: [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]]
		; CHECK-NEXT: ret float [[BIN_EXTRA]]
;		;
entry:		entry:
%rem = srem i32 %a, %b		%rem = srem i32 %a, %b
%conv = sitofp i32 %rem to float		%conv = sitofp i32 %rem to float
%0 = load float, float* %x, align 4		%0 = load float, float* %x, align 4
%add = fadd fast float %0, %conv		%add = fadd fast float %0, %conv
%arrayidx.1 = getelementptr inbounds float, float* %x, i64 1		%arrayidx.1 = getelementptr inbounds float, float* %x, i64 1
%1 = load float, float* %arrayidx.1, align 4		%1 = load float, float* %arrayidx.1, align 4
▲ Show 20 Lines • Show All 281 Lines • ▼ Show 20 Lines	;
ret float %add.29		ret float %add.29
}		}

define float @extra_args(float* nocapture readonly %x, i32 %a, i32 %b) {		define float @extra_args(float* nocapture readonly %x, i32 %a, i32 %b) {
; CHECK-LABEL: @extra_args(		; CHECK-LABEL: @extra_args(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[MUL:%.]] = mul nsw i32 [[B:%.]], [[A:%.*]]		; CHECK-NEXT: [[MUL:%.]] = mul nsw i32 [[B:%.]], [[A:%.*]]
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float		; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float
; CHECK-NEXT: [[TMP0:%.]] = load float, float [[X:%.*]], align 4
; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[CONV]], 3.000000e+00		; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[CONV]], 3.000000e+00
; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float [[TMP0]], [[ADD]]		; CHECK-NEXT: [[ARRAYIDX3:%.]] = getelementptr inbounds float, float [[X:%.*]], i64 1
; CHECK-NEXT: [[ARRAYIDX3:%.]] = getelementptr inbounds float, float [[X]], i64 1
; CHECK-NEXT: [[TMP1:%.]] = load float, float [[ARRAYIDX3]], align 4
; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float [[TMP1]], [[ADD1]]
; CHECK-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4]], [[CONV]]
; CHECK-NEXT: [[ARRAYIDX3_1:%.]] = getelementptr inbounds float, float [[X]], i64 2		; CHECK-NEXT: [[ARRAYIDX3_1:%.]] = getelementptr inbounds float, float [[X]], i64 2
; CHECK-NEXT: [[TMP2:%.]] = load float, float [[ARRAYIDX3_1]], align 4
; CHECK-NEXT: [[ADD4_1:%.*]] = fadd fast float [[TMP2]], [[ADD5]]
; CHECK-NEXT: [[ARRAYIDX3_2:%.]] = getelementptr inbounds float, float [[X]], i64 3		; CHECK-NEXT: [[ARRAYIDX3_2:%.]] = getelementptr inbounds float, float [[X]], i64 3
; CHECK-NEXT: [[TMP3:%.]] = load float, float [[ARRAYIDX3_2]], align 4
; CHECK-NEXT: [[ADD4_2:%.*]] = fadd fast float [[TMP3]], [[ADD4_1]]
; CHECK-NEXT: [[ARRAYIDX3_3:%.]] = getelementptr inbounds float, float [[X]], i64 4		; CHECK-NEXT: [[ARRAYIDX3_3:%.]] = getelementptr inbounds float, float [[X]], i64 4
; CHECK-NEXT: [[TMP4:%.]] = load float, float [[ARRAYIDX3_3]], align 4
; CHECK-NEXT: [[ADD4_3:%.*]] = fadd fast float [[TMP4]], [[ADD4_2]]
; CHECK-NEXT: [[ARRAYIDX3_4:%.]] = getelementptr inbounds float, float [[X]], i64 5		; CHECK-NEXT: [[ARRAYIDX3_4:%.]] = getelementptr inbounds float, float [[X]], i64 5
; CHECK-NEXT: [[TMP5:%.]] = load float, float [[ARRAYIDX3_4]], align 4
; CHECK-NEXT: [[ADD4_4:%.*]] = fadd fast float [[TMP5]], [[ADD4_3]]
; CHECK-NEXT: [[ARRAYIDX3_5:%.]] = getelementptr inbounds float, float [[X]], i64 6		; CHECK-NEXT: [[ARRAYIDX3_5:%.]] = getelementptr inbounds float, float [[X]], i64 6
; CHECK-NEXT: [[TMP6:%.]] = load float, float [[ARRAYIDX3_5]], align 4
; CHECK-NEXT: [[ADD4_5:%.*]] = fadd fast float [[TMP6]], [[ADD4_4]]
; CHECK-NEXT: [[ARRAYIDX3_6:%.]] = getelementptr inbounds float, float [[X]], i64 7		; CHECK-NEXT: [[ARRAYIDX3_6:%.]] = getelementptr inbounds float, float [[X]], i64 7
; CHECK-NEXT: [[TMP7:%.]] = load float, float [[ARRAYIDX3_6]], align 4		; CHECK-NEXT: [[TMP0:%.]] = bitcast float [[X]] to <8 x float>*
; CHECK-NEXT: [[ADD4_6:%.*]] = fadd fast float [[TMP7]], [[ADD4_5]]		; CHECK-NEXT: [[TMP1:%.]] = load <8 x float>, <8 x float> [[TMP0]], align 4
; CHECK-NEXT: ret float [[ADD4_6]]		; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]]
		; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]]
		; CHECK-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4]], [[CONV]]
		; CHECK-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]]
		; CHECK-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]]
		; CHECK-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]]
		; CHECK-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]]
		; CHECK-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]]
		; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
		; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]]
		; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
		; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <8 x float> [[BIN_RDX]], [[RDX_SHUF1]]
		; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x float> [[BIN_RDX2]], <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
		; CHECK-NEXT: [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]]
		; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0
		; CHECK-NEXT: [[BIN_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
		; CHECK-NEXT: [[BIN_EXTRA5:%.*]] = fadd fast float [[BIN_EXTRA]], [[CONV]]
		; CHECK-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
		; CHECK-NEXT: ret float [[BIN_EXTRA5]]
;		;
entry:		entry:
%mul = mul nsw i32 %b, %a		%mul = mul nsw i32 %b, %a
%conv = sitofp i32 %mul to float		%conv = sitofp i32 %mul to float
%0 = load float, float* %x, align 4		%0 = load float, float* %x, align 4
%add = fadd fast float %conv, 3.000000e+00		%add = fadd fast float %conv, 3.000000e+00
%add1 = fadd fast float %0, %add		%add1 = fadd fast float %0, %add
%arrayidx3 = getelementptr inbounds float, float* %x, i64 1		%arrayidx3 = getelementptr inbounds float, float* %x, i64 1
Show All 21 Lines	;
ret float %add4.6		ret float %add4.6
}		}

define float @extra_args_no_replace(float* nocapture readonly %x, i32 %a, i32 %b, i32 %c) {		define float @extra_args_no_replace(float* nocapture readonly %x, i32 %a, i32 %b, i32 %c) {
; CHECK-LABEL: @extra_args_no_replace(		; CHECK-LABEL: @extra_args_no_replace(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[MUL:%.]] = mul nsw i32 [[B:%.]], [[A:%.*]]		; CHECK-NEXT: [[MUL:%.]] = mul nsw i32 [[B:%.]], [[A:%.*]]
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float		; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float
; CHECK-NEXT: [[TMP0:%.]] = load float, float [[X:%.*]], align 4
; CHECK-NEXT: [[CONVC:%.]] = sitofp i32 [[C:%.]] to float		; CHECK-NEXT: [[CONVC:%.]] = sitofp i32 [[C:%.]] to float
; CHECK-NEXT: [[ADDC:%.*]] = fadd fast float [[CONVC]], 3.000000e+00		; CHECK-NEXT: [[ADDC:%.*]] = fadd fast float [[CONVC]], 3.000000e+00
; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[CONV]], [[ADDC]]		; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[CONV]], [[ADDC]]
; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float [[TMP0]], [[ADD]]		; CHECK-NEXT: [[ARRAYIDX3:%.]] = getelementptr inbounds float, float [[X:%.*]], i64 1
; CHECK-NEXT: [[ARRAYIDX3:%.]] = getelementptr inbounds float, float [[X]], i64 1
; CHECK-NEXT: [[TMP1:%.]] = load float, float [[ARRAYIDX3]], align 4
; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float [[TMP1]], [[ADD1]]
; CHECK-NEXT: [[ARRAYIDX3_1:%.]] = getelementptr inbounds float, float [[X]], i64 2		; CHECK-NEXT: [[ARRAYIDX3_1:%.]] = getelementptr inbounds float, float [[X]], i64 2
; CHECK-NEXT: [[TMP2:%.]] = load float, float [[ARRAYIDX3_1]], align 4
; CHECK-NEXT: [[ADD4_1:%.*]] = fadd fast float [[TMP2]], [[ADD4]]
; CHECK-NEXT: [[ARRAYIDX3_2:%.]] = getelementptr inbounds float, float [[X]], i64 3		; CHECK-NEXT: [[ARRAYIDX3_2:%.]] = getelementptr inbounds float, float [[X]], i64 3
; CHECK-NEXT: [[TMP3:%.]] = load float, float [[ARRAYIDX3_2]], align 4
; CHECK-NEXT: [[ADD4_2:%.*]] = fadd fast float [[TMP3]], [[ADD4_1]]
; CHECK-NEXT: [[ARRAYIDX3_3:%.]] = getelementptr inbounds float, float [[X]], i64 4		; CHECK-NEXT: [[ARRAYIDX3_3:%.]] = getelementptr inbounds float, float [[X]], i64 4
; CHECK-NEXT: [[TMP4:%.]] = load float, float [[ARRAYIDX3_3]], align 4
; CHECK-NEXT: [[ADD4_3:%.*]] = fadd fast float [[TMP4]], [[ADD4_2]]
; CHECK-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4_3]], [[CONV]]
; CHECK-NEXT: [[ARRAYIDX3_4:%.]] = getelementptr inbounds float, float [[X]], i64 5		; CHECK-NEXT: [[ARRAYIDX3_4:%.]] = getelementptr inbounds float, float [[X]], i64 5
; CHECK-NEXT: [[TMP5:%.]] = load float, float [[ARRAYIDX3_4]], align 4
; CHECK-NEXT: [[ADD4_4:%.*]] = fadd fast float [[TMP5]], [[ADD5]]
; CHECK-NEXT: [[ARRAYIDX3_5:%.]] = getelementptr inbounds float, float [[X]], i64 6		; CHECK-NEXT: [[ARRAYIDX3_5:%.]] = getelementptr inbounds float, float [[X]], i64 6
; CHECK-NEXT: [[TMP6:%.]] = load float, float [[ARRAYIDX3_5]], align 4
; CHECK-NEXT: [[ADD4_5:%.*]] = fadd fast float [[TMP6]], [[ADD4_4]]
; CHECK-NEXT: [[ARRAYIDX3_6:%.]] = getelementptr inbounds float, float [[X]], i64 7		; CHECK-NEXT: [[ARRAYIDX3_6:%.]] = getelementptr inbounds float, float [[X]], i64 7
; CHECK-NEXT: [[TMP7:%.]] = load float, float [[ARRAYIDX3_6]], align 4		; CHECK-NEXT: [[TMP0:%.]] = bitcast float [[X]] to <8 x float>*
; CHECK-NEXT: [[ADD4_6:%.*]] = fadd fast float [[TMP7]], [[ADD4_5]]		; CHECK-NEXT: [[TMP1:%.]] = load <8 x float>, <8 x float> [[TMP0]], align 4
; CHECK-NEXT: ret float [[ADD4_6]]		; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]]
		; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]]
		; CHECK-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD4]]
		; CHECK-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]]
		; CHECK-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]]
		; CHECK-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4_3]], [[CONV]]
		; CHECK-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD5]]
		; CHECK-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]]
		; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
		; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]]
		; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
		; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <8 x float> [[BIN_RDX]], [[RDX_SHUF1]]
		; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x float> [[BIN_RDX2]], <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
		; CHECK-NEXT: [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]]
		; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0
		; CHECK-NEXT: [[BIN_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
		; CHECK-NEXT: [[BIN_EXTRA5:%.*]] = fadd fast float [[BIN_EXTRA]], [[CONV]]
		; CHECK-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
		; CHECK-NEXT: ret float [[BIN_EXTRA5]]
;		;
entry:		entry:
%mul = mul nsw i32 %b, %a		%mul = mul nsw i32 %b, %a
%conv = sitofp i32 %mul to float		%conv = sitofp i32 %mul to float
%0 = load float, float* %x, align 4		%0 = load float, float* %x, align 4
%convc = sitofp i32 %c to float		%convc = sitofp i32 %c to float
%addc = fadd fast float %convc, 3.000000e+00		%addc = fadd fast float %convc, 3.000000e+00
%add = fadd fast float %conv, %addc		%add = fadd fast float %conv, %addc
Show All 26 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[SLP] Fix for PR31690: Allow using of extra values in horizontal reductions.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 86940

llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp

llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-list.ll

This is an archive of the discontinued LLVM Phabricator instance.

[SLP] Fix for PR31690: Allow using of extra values in horizontal reductions.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 86940

llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp

llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-list.ll

[SLP] Fix for PR31690: Allow using of extra values in horizontal reductions.
ClosedPublic