This is an archive of the discontinued LLVM Phabricator instance.

Differential D32093

[InstCombine] PR32078: convert scalar operations to vector.
AbandonedPublic

Authored by ABataev on Apr 14 2017, 11:57 AM.

Download Raw Diff

Details

Reviewers

spatel
sanjoy
hfinkel
efriedma

Summary

If we have a code like:

%xn = extractelement <2 x i32> %x, i32 %n
%yn = extractelement <2 x i32> %y, i32 %n
%cmpn = icmp eq i32 %xn, %yn

we can convert it to something like this:

%cmp = icmp eq <2 x i32> %x, %y
%cmpn = extractelement <2 x i1> %cmp0, i32 %n

Diff Detail

Build Status

Buildable 5565
Build 5565: arc lint + arc unit

Event Timeline

ABataev created this revision.Apr 14 2017, 11:57 AM

This transform is not safe as written. This program compiled with -O1 will crash/cause exception after this patch, but it does not before:

typedef int v4si __attribute__((__vector_size__(16)));

v4si divs(v4si x, v4si y) {
  int eltX = x[1];
  int eltY = y[1];
  x[1] = eltX / eltY;
  return x;
}

int main() {
  v4si x = (v4si){0, 1, 2, 3};
  x = divs(x, x);
  return x[0];
}

The division example shows that we're operating on elements that the original program does not, so any FP op would also be a concern. If some element in a vector is a denorm, that could cause a perf explosion that doesn't exist in the original program. Besides that, I'm not sure that we can actually do this transform for any op in a target-independent pass. We're replacing scalar ops with potentially more expensive vector ops.

This revision now requires changes to proceed.Apr 17 2017, 7:51 AM

Need cost analysis for this kind of transformation, moving it back to SLP vectorizer.

spatel mentioned this in D30686: [SLP] PR32078: convert scalar operations to vector..May 18 2017, 8:35 AM

Revision Contents

Path

Size

lib/

Transforms/

InstCombine/

InstCombineAddSub.cpp

12 lines

InstCombineAndOrXor.cpp

9 lines

InstCombineCompares.cpp

7 lines

InstCombineInternal.h

5 lines

InstCombineMulDivRem.cpp

24 lines

InstCombinePHI.cpp

3 lines

InstCombineShifts.cpp

10 lines

InstructionCombining.cpp

60 lines

test/

Transforms/

InstCombine/

7 lines

7 lines

6 lines

204 lines

Diff 95331

lib/Transforms/InstCombine/InstCombineAddSub.cpp

Show First 20 Lines • Show All 1,341 Lines • ▼ Show 20 Lines	Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
}		}
if (!I.hasNoUnsignedWrap() &&		if (!I.hasNoUnsignedWrap() &&
computeOverflowForUnsignedAdd(LHS, RHS, &I) ==		computeOverflowForUnsignedAdd(LHS, RHS, &I) ==
OverflowResult::NeverOverflows) {		OverflowResult::NeverOverflows) {
Changed = true;		Changed = true;
I.setHasNoUnsignedWrap(true);		I.setHasNoUnsignedWrap(true);
}		}

		if (Value *V = WidenScalarOp(I))
		return replaceInstUsesWith(I, V);

return Changed ? &I : nullptr;		return Changed ? &I : nullptr;
}		}

Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {		Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
bool Changed = SimplifyAssociativeOrCommutative(I);		bool Changed = SimplifyAssociativeOrCommutative(I);
Value LHS = I.getOperand(0), RHS = I.getOperand(1);		Value LHS = I.getOperand(0), RHS = I.getOperand(1);

if (Value *V = SimplifyVectorOp(I))		if (Value *V = SimplifyVectorOp(I))
▲ Show 20 Lines • Show All 89 Lines • ▼ Show 20 Lines	// select C, 0, B + select C, A, 0 -> select C, A, B
}		}
}		}

if (I.hasUnsafeAlgebra()) {		if (I.hasUnsafeAlgebra()) {
if (Value *V = FAddCombine(Builder).simplify(&I))		if (Value *V = FAddCombine(Builder).simplify(&I))
return replaceInstUsesWith(I, V);		return replaceInstUsesWith(I, V);
}		}

		if (Value *V = WidenScalarOp(I))
		return replaceInstUsesWith(I, V);

return Changed ? &I : nullptr;		return Changed ? &I : nullptr;
}		}

/// Optimize pointer differences into the same array into a size. Consider:		/// Optimize pointer differences into the same array into a size. Consider:
/// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer		/// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer
/// operands to the ptrtoint instructions for the LHS/RHS of the subtract.		/// operands to the ptrtoint instructions for the LHS/RHS of the subtract.
///		///
Value InstCombiner::OptimizePointerDifference(Value LHS, Value *RHS,		Value InstCombiner::OptimizePointerDifference(Value LHS, Value *RHS,
▲ Show 20 Lines • Show All 255 Lines • ▼ Show 20 Lines	if (!I.hasNoSignedWrap() && WillNotOverflowSignedSub(Op0, Op1, I)) {
Changed = true;		Changed = true;
I.setHasNoSignedWrap(true);		I.setHasNoSignedWrap(true);
}		}
if (!I.hasNoUnsignedWrap() && WillNotOverflowUnsignedSub(Op0, Op1, I)) {		if (!I.hasNoUnsignedWrap() && WillNotOverflowUnsignedSub(Op0, Op1, I)) {
Changed = true;		Changed = true;
I.setHasNoUnsignedWrap(true);		I.setHasNoUnsignedWrap(true);
}		}

		if (Value *V = WidenScalarOp(I))
		return replaceInstUsesWith(I, V);

return Changed ? &I : nullptr;		return Changed ? &I : nullptr;
}		}

Instruction *InstCombiner::visitFSub(BinaryOperator &I) {		Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
Value Op0 = I.getOperand(0), Op1 = I.getOperand(1);		Value Op0 = I.getOperand(0), Op1 = I.getOperand(1);

if (Value *V = SimplifyVectorOp(I))		if (Value *V = SimplifyVectorOp(I))
return replaceInstUsesWith(I, V);		return replaceInstUsesWith(I, V);
Show All 38 Lines	if (FPTruncInst *FPTI = dyn_cast<FPTruncInst>(Op1)) {
}		}
}		}

if (I.hasUnsafeAlgebra()) {		if (I.hasUnsafeAlgebra()) {
if (Value *V = FAddCombine(Builder).simplify(&I))		if (Value *V = FAddCombine(Builder).simplify(&I))
return replaceInstUsesWith(I, V);		return replaceInstUsesWith(I, V);
}		}

		if (Value *V = WidenScalarOp(I))
		return replaceInstUsesWith(I, V);

return nullptr;		return nullptr;
}		}

lib/Transforms/InstCombine/InstCombineAndOrXor.cpp

Show First 20 Lines • Show All 1,421 Lines • ▼ Show 20 Lines	if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
return replaceInstUsesWith(I, Res);		return replaceInstUsesWith(I, Res);

if (Instruction *CastedAnd = foldCastedBitwiseLogic(I))		if (Instruction *CastedAnd = foldCastedBitwiseLogic(I))
return CastedAnd;		return CastedAnd;

if (Instruction *Select = foldBoolSextMaskToSelect(I))		if (Instruction *Select = foldBoolSextMaskToSelect(I))
return Select;		return Select;

		if (Value *V = WidenScalarOp(I))
		return replaceInstUsesWith(I, V);

return Changed ? &I : nullptr;		return Changed ? &I : nullptr;
}		}

/// Given an OR instruction, check to see if this is a bswap idiom. If so,		/// Given an OR instruction, check to see if this is a bswap idiom. If so,
/// insert the new intrinsic and return it.		/// insert the new intrinsic and return it.
Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {		Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
Value Op0 = I.getOperand(0), Op1 = I.getOperand(1);		Value Op0 = I.getOperand(0), Op1 = I.getOperand(1);

▲ Show 20 Lines • Show All 899 Lines • ▼ Show 20 Lines	if (Op0->hasOneUse() && Op1->hasOneUse() &&
match(Op0, m_Select(m_Value(X), m_Value(A), m_Value(B))) &&		match(Op0, m_Select(m_Value(X), m_Value(A), m_Value(B))) &&
match(Op1, m_Select(m_Value(Y), m_Value(C), m_Value(D))) && X == Y) {		match(Op1, m_Select(m_Value(Y), m_Value(C), m_Value(D))) && X == Y) {
Value *orTrue = Builder->CreateOr(A, C);		Value *orTrue = Builder->CreateOr(A, C);
Value *orFalse = Builder->CreateOr(B, D);		Value *orFalse = Builder->CreateOr(B, D);
return SelectInst::Create(X, orTrue, orFalse);		return SelectInst::Create(X, orTrue, orFalse);
}		}
}		}

		if (Value *V = WidenScalarOp(I))
		return replaceInstUsesWith(I, V);

return Changed ? &I : nullptr;		return Changed ? &I : nullptr;
}		}

// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches		// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
// here. We should standardize that construct where it is needed or choose some		// here. We should standardize that construct where it is needed or choose some
// other way to ensure that commutated variants of patterns are not missed.		// other way to ensure that commutated variants of patterns are not missed.
Instruction *InstCombiner::visitXor(BinaryOperator &I) {		Instruction *InstCombiner::visitXor(BinaryOperator &I) {
bool Changed = SimplifyAssociativeOrCommutative(I);		bool Changed = SimplifyAssociativeOrCommutative(I);
▲ Show 20 Lines • Show All 279 Lines • ▼ Show 20 Lines	if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
getNewICmpValue(isSigned, Code, Op0, Op1,		getNewICmpValue(isSigned, Code, Op0, Op1,
Builder));		Builder));
}		}
}		}

if (Instruction *CastedXor = foldCastedBitwiseLogic(I))		if (Instruction *CastedXor = foldCastedBitwiseLogic(I))
return CastedXor;		return CastedXor;

		if (Value *V = WidenScalarOp(I))
		return replaceInstUsesWith(I, V);

return Changed ? &I : nullptr;		return Changed ? &I : nullptr;
}		}

lib/Transforms/InstCombine/InstCombineCompares.cpp

Show First 20 Lines • Show All 4,522 Lines • ▼ Show 20 Lines	if (I.getPredicate() == ICmpInst::ICMP_EQ)
// icmp X+Cst, X		// icmp X+Cst, X
if (match(Op0, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op1 == X)		if (match(Op0, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op1 == X)
return foldICmpAddOpConst(I, X, Cst, I.getPredicate());		return foldICmpAddOpConst(I, X, Cst, I.getPredicate());

// icmp X, X+Cst		// icmp X, X+Cst
if (match(Op1, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op0 == X)		if (match(Op1, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op0 == X)
return foldICmpAddOpConst(I, X, Cst, I.getSwappedPredicate());		return foldICmpAddOpConst(I, X, Cst, I.getSwappedPredicate());
}		}

		if (Value *V = WidenScalarOp(I))
		return replaceInstUsesWith(I, V);

return Changed ? &I : nullptr;		return Changed ? &I : nullptr;
}		}

/// Fold fcmp ([us]itofp x, cst) if possible.		/// Fold fcmp ([us]itofp x, cst) if possible.
Instruction InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction LHSI,		Instruction InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction LHSI,
Constant *RHSC) {		Constant *RHSC) {
if (!isa<ConstantFP>(RHSC)) return nullptr;		if (!isa<ConstantFP>(RHSC)) return nullptr;
const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF();		const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF();
▲ Show 20 Lines • Show All 404 Lines • ▼ Show 20 Lines	Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {

// fcmp (fpext x), (fpext y) -> fcmp x, y		// fcmp (fpext x), (fpext y) -> fcmp x, y
if (FPExtInst *LHSExt = dyn_cast<FPExtInst>(Op0))		if (FPExtInst *LHSExt = dyn_cast<FPExtInst>(Op0))
if (FPExtInst *RHSExt = dyn_cast<FPExtInst>(Op1))		if (FPExtInst *RHSExt = dyn_cast<FPExtInst>(Op1))
if (LHSExt->getSrcTy() == RHSExt->getSrcTy())		if (LHSExt->getSrcTy() == RHSExt->getSrcTy())
return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0),		return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0),
RHSExt->getOperand(0));		RHSExt->getOperand(0));

		if (Value *V = WidenScalarOp(I))
		return replaceInstUsesWith(I, V);

return Changed ? &I : nullptr;		return Changed ? &I : nullptr;
}		}

lib/Transforms/InstCombine/InstCombineInternal.h

Show First 20 Lines • Show All 559 Lines • ▼ Show 20 Lines	private:
bool SimplifyDemandedInstructionBits(Instruction &Inst);		bool SimplifyDemandedInstructionBits(Instruction &Inst);

Value SimplifyDemandedVectorElts(Value V, APInt DemandedElts,		Value SimplifyDemandedVectorElts(Value V, APInt DemandedElts,
APInt &UndefElts, unsigned Depth = 0);		APInt &UndefElts, unsigned Depth = 0);

Value *SimplifyVectorOp(BinaryOperator &Inst);		Value *SimplifyVectorOp(BinaryOperator &Inst);
Value *SimplifyBSwap(BinaryOperator &Inst);		Value *SimplifyBSwap(BinaryOperator &Inst);

		/// Try to combine instructions with all ExtractElement operands only
		/// that extract from vector operands of the same type, the same vector
		/// size at the same index, into a vector form with single resulting
		/// ExtractElement instruction only.
		Value *WidenScalarOp(Instruction &Inst);

/// Given a binary operator, cast instruction, or select which has a PHI node		/// Given a binary operator, cast instruction, or select which has a PHI node
/// as operand #0, see if we can fold the instruction into the PHI (which is		/// as operand #0, see if we can fold the instruction into the PHI (which is
/// only possible if all operands to the PHI are constants).		/// only possible if all operands to the PHI are constants).
Instruction *FoldOpIntoPhi(Instruction &I);		Instruction *FoldOpIntoPhi(Instruction &I);

/// Given an instruction with a select as one operand and a constant as the		/// Given an instruction with a select as one operand and a constant as the
/// other operand, try to fold the binary operator into the select arguments.		/// other operand, try to fold the binary operator into the select arguments.
▲ Show 20 Lines • Show All 110 Lines • Show Last 20 Lines

lib/Transforms/InstCombine/InstCombineMulDivRem.cpp

Show First 20 Lines • Show All 457 Lines • ▼ Show 20 Lines	Instruction *InstCombiner::visitMul(BinaryOperator &I) {

if (!I.hasNoUnsignedWrap() &&		if (!I.hasNoUnsignedWrap() &&
computeOverflowForUnsignedMul(Op0, Op1, &I) ==		computeOverflowForUnsignedMul(Op0, Op1, &I) ==
OverflowResult::NeverOverflows) {		OverflowResult::NeverOverflows) {
Changed = true;		Changed = true;
I.setHasNoUnsignedWrap(true);		I.setHasNoUnsignedWrap(true);
}		}

		if (Value *V = WidenScalarOp(I))
		return replaceInstUsesWith(I, V);

return Changed ? &I : nullptr;		return Changed ? &I : nullptr;
}		}

/// Detect pattern log2(Y * 0.5) with corresponding fast math flags.		/// Detect pattern log2(Y * 0.5) with corresponding fast math flags.
static void detectLog2OfHalf(Value &Op, Value &Y, IntrinsicInst *&Log2) {		static void detectLog2OfHalf(Value &Op, Value &Y, IntrinsicInst *&Log2) {
if (!Op->hasOneUse())		if (!Op->hasOneUse())
return;		return;

▲ Show 20 Lines • Show All 299 Lines • ▼ Show 20 Lines	for (int i = 0; i < 2; i++) {
}		}

if (!isa<Constant>(Op1))		if (!isa<Constant>(Op1))
std::swap(Opnd0, Opnd1);		std::swap(Opnd0, Opnd1);
else		else
break;		break;
}		}

		if (Value *V = WidenScalarOp(I))
		return replaceInstUsesWith(I, V);

return Changed ? &I : nullptr;		return Changed ? &I : nullptr;
}		}

/// Try to fold a divide or remainder of a select instruction.		/// Try to fold a divide or remainder of a select instruction.
bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {		bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
SelectInst *SI = cast<SelectInst>(I.getOperand(1));		SelectInst *SI = cast<SelectInst>(I.getOperand(1));

// div/rem X, (Cond ? 0 : Y) -> div/rem X, Y		// div/rem X, (Cond ? 0 : Y) -> div/rem X, Y
▲ Show 20 Lines • Show All 382 Lines • ▼ Show 20 Lines	for (unsigned i = 0, e = UDivActions.size(); i != e; ++i) {
// use it as part of a joining action (i.e., a SelectInst).		// use it as part of a joining action (i.e., a SelectInst).
if (e - i != 1) {		if (e - i != 1) {
Inst->insertBefore(&I);		Inst->insertBefore(&I);
UDivActions[i].FoldResult = Inst;		UDivActions[i].FoldResult = Inst;
} else		} else
return Inst;		return Inst;
}		}

		if (Value *V = WidenScalarOp(I))
		return replaceInstUsesWith(I, V);

return nullptr;		return nullptr;
}		}

Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {		Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
Value Op0 = I.getOperand(0), Op1 = I.getOperand(1);		Value Op0 = I.getOperand(0), Op1 = I.getOperand(1);

if (Value *V = SimplifyVectorOp(I))		if (Value *V = SimplifyVectorOp(I))
return replaceInstUsesWith(I, V);		return replaceInstUsesWith(I, V);
▲ Show 20 Lines • Show All 68 Lines • ▼ Show 20 Lines	if (MaskedValueIsZero(Op0, Mask, 0, &I)) {
// the sign bit set.		// the sign bit set.
auto *BO = BinaryOperator::CreateUDiv(Op0, Op1, I.getName());		auto *BO = BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
BO->setIsExact(I.isExact());		BO->setIsExact(I.isExact());
return BO;		return BO;
}		}
}		}
}		}

		if (Value *V = WidenScalarOp(I))
		return replaceInstUsesWith(I, V);

return nullptr;		return nullptr;
}		}

/// CvtFDivConstToReciprocal tries to convert X/C into X*1/C if C not a special		/// CvtFDivConstToReciprocal tries to convert X/C into X*1/C if C not a special
/// FP value and:		/// FP value and:
/// 1) 1/C is exact, or		/// 1) 1/C is exact, or
/// 2) reciprocal is allowed.		/// 2) reciprocal is allowed.
/// If the conversion was successful, the simplified expression "X * 1/C" is		/// If the conversion was successful, the simplified expression "X * 1/C" is
▲ Show 20 Lines • Show All 154 Lines • ▼ Show 20 Lines	Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {

// -x / -y -> x / y		// -x / -y -> x / y
if (match(Op0, m_FNeg(m_Value(LHS))) && match(Op1, m_FNeg(m_Value(RHS)))) {		if (match(Op0, m_FNeg(m_Value(LHS))) && match(Op1, m_FNeg(m_Value(RHS)))) {
I.setOperand(0, LHS);		I.setOperand(0, LHS);
I.setOperand(1, RHS);		I.setOperand(1, RHS);
return &I;		return &I;
}		}

		if (Value *V = WidenScalarOp(I))
		return replaceInstUsesWith(I, V);

return nullptr;		return nullptr;
}		}

/// This function implements the transforms common to both integer remainder		/// This function implements the transforms common to both integer remainder
/// instructions (urem and srem). It is called by the visitors to those integer		/// instructions (urem and srem). It is called by the visitors to those integer
/// remainder instructions.		/// remainder instructions.
/// @brief Common integer remainder transforms		/// @brief Common integer remainder transforms
Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) {		Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) {
▲ Show 20 Lines • Show All 72 Lines • ▼ Show 20 Lines	Instruction *InstCombiner::visitURem(BinaryOperator &I) {
// X urem C -> X < C ? X : X - C, where C >= signbit.		// X urem C -> X < C ? X : X - C, where C >= signbit.
const APInt *DivisorC;		const APInt *DivisorC;
if (match(Op1, m_APInt(DivisorC)) && DivisorC->isNegative()) {		if (match(Op1, m_APInt(DivisorC)) && DivisorC->isNegative()) {
Value *Cmp = Builder->CreateICmpULT(Op0, Op1);		Value *Cmp = Builder->CreateICmpULT(Op0, Op1);
Value *Sub = Builder->CreateSub(Op0, Op1);		Value *Sub = Builder->CreateSub(Op0, Op1);
return SelectInst::Create(Cmp, Op0, Sub);		return SelectInst::Create(Cmp, Op0, Sub);
}		}

		if (Value *V = WidenScalarOp(I))
		return replaceInstUsesWith(I, V);

return nullptr;		return nullptr;
}		}

Instruction *InstCombiner::visitSRem(BinaryOperator &I) {		Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
Value Op0 = I.getOperand(0), Op1 = I.getOperand(1);		Value Op0 = I.getOperand(0), Op1 = I.getOperand(1);

if (Value *V = SimplifyVectorOp(I))		if (Value *V = SimplifyVectorOp(I))
return replaceInstUsesWith(I, V);		return replaceInstUsesWith(I, V);
▲ Show 20 Lines • Show All 59 Lines • ▼ Show 20 Lines	if (hasNegative && !hasMissing) {
if (NewRHSV != C) { // Don't loop on -MININT		if (NewRHSV != C) { // Don't loop on -MININT
Worklist.AddValue(I.getOperand(1));		Worklist.AddValue(I.getOperand(1));
I.setOperand(1, NewRHSV);		I.setOperand(1, NewRHSV);
return &I;		return &I;
}		}
}		}
}		}

		if (Value *V = WidenScalarOp(I))
		return replaceInstUsesWith(I, V);

return nullptr;		return nullptr;
}		}

Instruction *InstCombiner::visitFRem(BinaryOperator &I) {		Instruction *InstCombiner::visitFRem(BinaryOperator &I) {
Value Op0 = I.getOperand(0), Op1 = I.getOperand(1);		Value Op0 = I.getOperand(0), Op1 = I.getOperand(1);

if (Value *V = SimplifyVectorOp(I))		if (Value *V = SimplifyVectorOp(I))
return replaceInstUsesWith(I, V);		return replaceInstUsesWith(I, V);

if (Value *V = SimplifyFRemInst(Op0, Op1, I.getFastMathFlags(),		if (Value *V = SimplifyFRemInst(Op0, Op1, I.getFastMathFlags(),
DL, &TLI, &DT, &AC))		DL, &TLI, &DT, &AC))
return replaceInstUsesWith(I, V);		return replaceInstUsesWith(I, V);

// Handle cases involving: rem X, (select Cond, Y, Z)		// Handle cases involving: rem X, (select Cond, Y, Z)
if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))		if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
return &I;		return &I;

		if (Value *V = WidenScalarOp(I))
		return replaceInstUsesWith(I, V);

return nullptr;		return nullptr;
}		}

lib/Transforms/InstCombine/InstCombinePHI.cpp

Show First 20 Lines • Show All 1,008 Lines • ▼ Show 20 Lines	Instruction *InstCombiner::visitPHINode(PHINode &PN) {
// it is only used by trunc or trunc(lshr) operations. If so, we split the		// it is only used by trunc or trunc(lshr) operations. If so, we split the
// PHI into the various pieces being extracted. This sort of thing is		// PHI into the various pieces being extracted. This sort of thing is
// introduced when SROA promotes an aggregate to a single large integer type.		// introduced when SROA promotes an aggregate to a single large integer type.
if (PN.getType()->isIntegerTy() &&		if (PN.getType()->isIntegerTy() &&
!DL.isLegalInteger(PN.getType()->getPrimitiveSizeInBits()))		!DL.isLegalInteger(PN.getType()->getPrimitiveSizeInBits()))
if (Instruction *Res = SliceUpIllegalIntegerPHI(PN))		if (Instruction *Res = SliceUpIllegalIntegerPHI(PN))
return Res;		return Res;

		if (Value *V = WidenScalarOp(PN))
		return replaceInstUsesWith(PN, V);

return nullptr;		return nullptr;
}		}

lib/Transforms/InstCombine/InstCombineShifts.cpp

Show First 20 Lines • Show All 604 Lines • ▼ Show 20 Lines	if (match(Op1, m_Constant(C1))) {
if (match(Op0, m_OneUse(m_Shl(m_Constant(C2), m_Value(X)))))		if (match(Op0, m_OneUse(m_Shl(m_Constant(C2), m_Value(X)))))
return BinaryOperator::CreateShl(ConstantExpr::getShl(C2, C1), X);		return BinaryOperator::CreateShl(ConstantExpr::getShl(C2, C1), X);

// (X * C2) << C1 --> X * (C2 << C1)		// (X * C2) << C1 --> X * (C2 << C1)
if (match(Op0, m_Mul(m_Value(X), m_Constant(C2))))		if (match(Op0, m_Mul(m_Value(X), m_Constant(C2))))
return BinaryOperator::CreateMul(X, ConstantExpr::getShl(C2, C1));		return BinaryOperator::CreateMul(X, ConstantExpr::getShl(C2, C1));
}		}

		if (Value *V = WidenScalarOp(I))
		return replaceInstUsesWith(I, V);

return nullptr;		return nullptr;
}		}

Instruction *InstCombiner::visitLShr(BinaryOperator &I) {		Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
if (Value *V = SimplifyVectorOp(I))		if (Value *V = SimplifyVectorOp(I))
return replaceInstUsesWith(I, V);		return replaceInstUsesWith(I, V);

Value Op0 = I.getOperand(0), Op1 = I.getOperand(1);		Value Op0 = I.getOperand(0), Op1 = I.getOperand(1);
▲ Show 20 Lines • Show All 68 Lines • ▼ Show 20 Lines	if (match(Op1, m_APInt(ShAmtAPInt))) {

// If the shifted-out value is known-zero, then this is an exact shift.		// If the shifted-out value is known-zero, then this is an exact shift.
if (!I.isExact() &&		if (!I.isExact() &&
MaskedValueIsZero(Op0, APInt::getLowBitsSet(BitWidth, ShAmt), 0, &I)) {		MaskedValueIsZero(Op0, APInt::getLowBitsSet(BitWidth, ShAmt), 0, &I)) {
I.setIsExact();		I.setIsExact();
return &I;		return &I;
}		}
}		}

		if (Value *V = WidenScalarOp(I))
		return replaceInstUsesWith(I, V);

return nullptr;		return nullptr;
}		}

Instruction *InstCombiner::visitAShr(BinaryOperator &I) {		Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
if (Value *V = SimplifyVectorOp(I))		if (Value *V = SimplifyVectorOp(I))
return replaceInstUsesWith(I, V);		return replaceInstUsesWith(I, V);

Value Op0 = I.getOperand(0), Op1 = I.getOperand(1);		Value Op0 = I.getOperand(0), Op1 = I.getOperand(1);
▲ Show 20 Lines • Show All 53 Lines • ▼ Show 20 Lines	if (!I.isExact() &&
return &I;		return &I;
}		}
}		}

// See if we can turn a signed shr into an unsigned shr.		// See if we can turn a signed shr into an unsigned shr.
if (MaskedValueIsZero(Op0, APInt::getSignBit(BitWidth), 0, &I))		if (MaskedValueIsZero(Op0, APInt::getSignBit(BitWidth), 0, &I))
return BinaryOperator::CreateLShr(Op0, Op1);		return BinaryOperator::CreateLShr(Op0, Op1);

		if (Value *V = WidenScalarOp(I))
		return replaceInstUsesWith(I, V);

return nullptr;		return nullptr;
}		}

lib/Transforms/InstCombine/InstructionCombining.cpp

Show First 20 Lines • Show All 1,391 Lines • ▼ Show 20 Lines	if (MayChange) {
return Builder->CreateShuffleVector(NewBO,		return Builder->CreateShuffleVector(NewBO,
UndefValue::get(Inst.getType()), Shuffle->getMask());		UndefValue::get(Inst.getType()), Shuffle->getMask());
}		}
}		}

return nullptr;		return nullptr;
}		}

		Value *InstCombiner::WidenScalarOp(Instruction &Inst) {
		if (Inst.getType()->isVectorTy() \|\|
		!VectorType::isValidElementType(Inst.getType()) \|\|
		Inst.mayHaveSideEffects())
		return nullptr;
		auto EI = dyn_cast<ExtractElementInst>(Inst.op_begin());
		if (!EI)
		return nullptr;
		unsigned NE = EI->getVectorOperandType()->getNumElements();
		Value *EIdx = EI->getIndexOperand();
		// Check that all operands of the user instruction are extractelement
		// from the vectors of the same size and from the same lanes and the vector
		// operand is not an insertelement instruction (this sequence is handled
		// differently).
		if (!std::all_of(Inst.op_begin(), Inst.op_end(), [NE, EIdx](const Value *V) {
		auto *EEI = dyn_cast<ExtractElementInst>(V);
		return EEI && !isa<InsertElementInst>(EEI->getVectorOperand()) &&
		EEI->getVectorOperandType()->getNumElements() == NE &&
		EEI->getIndexOperand() == EIdx;
		}))
		return nullptr;
		int NumVectorizedExtracts = 0;
		SmallSet<ExtractElementInst *, 4> CountedOperands;
		for (auto *Op : Inst.operand_values()) {
		auto *EEOp = cast<ExtractElementInst>(Op);
		const Instruction *UserLast = EEOp->user_back();
		// If the only user of the extractelement instruction is the
		// to-be-vectorized user instruction, count this instruction as the
		// one to be removed.
		if (EEOp->hasOneUse() \|\|
		(std::all_of(EEOp->user_begin(), EEOp->user_end(),
		[UserLast](User *U) { return U == UserLast; }) &&
		CountedOperands.insert(EEOp).second))
		++NumVectorizedExtracts;
		}
		// If the number of extractelement instructions to be removed does not exceed
		// 1, do not widen this instruction sequence.
		if (NumVectorizedExtracts <= 1)
		return nullptr;
		// Generate vector code instead of the scalar one.
		Instruction *NewI = Inst.clone();
		NewI->setName("widen.vect");
		NewI->mutateType(VectorType::get(Inst.getType(), NE));
		for (unsigned Idx = 0, EIdx = NewI->getNumOperands(); Idx < EIdx; ++Idx) {
		auto *EE = cast<ExtractElementInst>(NewI->getOperand(Idx));
		NewI->setOperand(Idx, EE->getVectorOperand());
		}
		for (auto *V : Inst.operand_values()) {
		// Remove extractelement instructions.
		if (auto *I = dyn_cast<Instruction>(V))
		replaceInstUsesWith(*I, UndefValue::get(V->getType()));
		}
		InsertNewInstWith(NewI, Inst);
		// %widen.extract = extractelement <ty x n> %widen.vect, i32 Idx
		// Replace uses of the scalar instruction by the %widen.extract
		// instruction.
		return InsertNewInstWith(
		ExtractElementInst::Create(NewI, EIdx, "widen.extract"), Inst);
		}

Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {		Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end());		SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end());

if (Value *V =		if (Value *V =
SimplifyGEPInst(GEP.getSourceElementType(), Ops, DL, &TLI, &DT, &AC))		SimplifyGEPInst(GEP.getSourceElementType(), Ops, DL, &TLI, &DT, &AC))
return replaceInstUsesWith(GEP, V);		return replaceInstUsesWith(GEP, V);

Value *PtrOp = GEP.getOperand(0);		Value *PtrOp = GEP.getOperand(0);
▲ Show 20 Lines • Show All 1,839 Lines • Show Last 20 Lines

test/Transforms/InstCombine/bitcast-bigendian.ll

	; NOTE: Assertions have been autogenerated by utils/update_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
	; RUN: opt < %s -instcombine -S \| FileCheck %s			; RUN: opt < %s -instcombine -S \| FileCheck %s

	target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"			target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
	target triple = "powerpc64-unknown-linux-gnu"			target triple = "powerpc64-unknown-linux-gnu"

	; These tests are extracted from bitcast.ll.			; These tests are extracted from bitcast.ll.
	; Verify that they also work correctly on big-endian targets.			; Verify that they also work correctly on big-endian targets.

	define float @test2(<2 x float> %A, <2 x i32> %B) {			define float @test2(<2 x float> %A, <2 x i32> %B) {
	; CHECK-LABEL: @test2(			; CHECK-LABEL: @test2(
	; CHECK-NEXT: [[TMP24:%.]] = extractelement <2 x float> [[A:%.]], i32 1
	; CHECK-NEXT: [[BC:%.]] = bitcast <2 x i32> [[B:%.]] to <2 x float>			; CHECK-NEXT: [[BC:%.]] = bitcast <2 x i32> [[B:%.]] to <2 x float>
	; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[BC]], i32 1			; CHECK-NEXT: [[WIDEN_VECT:%.]] = fadd <2 x float> [[BC]], [[A:%.]]
	; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP24]], [[TMP4]]			; CHECK-NEXT: [[WIDEN_EXTRACT:%.*]] = extractelement <2 x float> [[WIDEN_VECT]], i32 1
	; CHECK-NEXT: ret float [[ADD]]			; CHECK-NEXT: ret float [[WIDEN_EXTRACT]]
	;			;
	%tmp28 = bitcast <2 x float> %A to i64			%tmp28 = bitcast <2 x float> %A to i64
	%tmp23 = trunc i64 %tmp28 to i32			%tmp23 = trunc i64 %tmp28 to i32
	%tmp24 = bitcast i32 %tmp23 to float			%tmp24 = bitcast i32 %tmp23 to float

	%tmp = bitcast <2 x i32> %B to i64			%tmp = bitcast <2 x i32> %B to i64
	%tmp2 = trunc i64 %tmp to i32			%tmp2 = trunc i64 %tmp to i32
	%tmp4 = bitcast i32 %tmp2 to float			%tmp4 = bitcast i32 %tmp2 to float
	▲ Show 20 Lines • Show All 109 Lines • Show Last 20 Lines

test/Transforms/InstCombine/bitcast.ll

Show First 20 Lines • Show All 238 Lines • ▼ Show 20 Lines	;
%t7 = bitcast <8 x i8> %t6 to <2 x float>		%t7 = bitcast <8 x i8> %t6 to <2 x float>
ret <2 x float> %t7		ret <2 x float> %t7
}		}

; Optimize bitcasts that are extracting low element of vector. This happens because of SRoA.		; Optimize bitcasts that are extracting low element of vector. This happens because of SRoA.
; rdar://7892780		; rdar://7892780
define float @test2(<2 x float> %A, <2 x i32> %B) {		define float @test2(<2 x float> %A, <2 x i32> %B) {
; CHECK-LABEL: @test2(		; CHECK-LABEL: @test2(
; CHECK-NEXT: [[TMP24:%.]] = extractelement <2 x float> [[A:%.]], i32 0
; CHECK-NEXT: [[BC:%.]] = bitcast <2 x i32> [[B:%.]] to <2 x float>		; CHECK-NEXT: [[BC:%.]] = bitcast <2 x i32> [[B:%.]] to <2 x float>
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[BC]], i32 0		; CHECK-NEXT: [[WIDEN_VECT:%.]] = fadd <2 x float> [[BC]], [[A:%.]]
; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP24]], [[TMP4]]		; CHECK-NEXT: [[WIDEN_EXTRACT:%.*]] = extractelement <2 x float> [[WIDEN_VECT]], i32 0
; CHECK-NEXT: ret float [[ADD]]		; CHECK-NEXT: ret float [[WIDEN_EXTRACT]]
;		;
%tmp28 = bitcast <2 x float> %A to i64 ; <i64> [#uses=2]		%tmp28 = bitcast <2 x float> %A to i64 ; <i64> [#uses=2]
%tmp23 = trunc i64 %tmp28 to i32 ; <i32> [#uses=1]		%tmp23 = trunc i64 %tmp28 to i32 ; <i32> [#uses=1]
%tmp24 = bitcast i32 %tmp23 to float ; <float> [#uses=1]		%tmp24 = bitcast i32 %tmp23 to float ; <float> [#uses=1]

%tmp = bitcast <2 x i32> %B to i64		%tmp = bitcast <2 x i32> %B to i64
%tmp2 = trunc i64 %tmp to i32 ; <i32> [#uses=1]		%tmp2 = trunc i64 %tmp to i32 ; <i32> [#uses=1]
%tmp4 = bitcast i32 %tmp2 to float ; <float> [#uses=1]		%tmp4 = bitcast i32 %tmp2 to float ; <float> [#uses=1]
▲ Show 20 Lines • Show All 259 Lines • Show Last 20 Lines

test/Transforms/InstCombine/type_pun.ll

	Show First 20 Lines • Show All 112 Lines • ▼ Show 20 Lines
	; Type punning two i32 values, with control flow.			; Type punning two i32 values, with control flow.
	; Verify that the bitcast is shared and dominates usage.			; Verify that the bitcast is shared and dominates usage.
	define i32 @type_pun_i32_ctrl(<16 x i8> %in) {			define i32 @type_pun_i32_ctrl(<16 x i8> %in) {
	; CHECK-LABEL: @type_pun_i32_ctrl(			; CHECK-LABEL: @type_pun_i32_ctrl(
	; CHECK-NEXT: entry:			; CHECK-NEXT: entry:
	; CHECK-NEXT: [[SROA_BC:%.]] = bitcast <16 x i8> [[IN:%.]] to <4 x i32>			; CHECK-NEXT: [[SROA_BC:%.]] = bitcast <16 x i8> [[IN:%.]] to <4 x i32>
	; CHECK-NEXT: br i1 undef, label [[LEFT:%.]], label [[RIGHT:%.]]			; CHECK-NEXT: br i1 undef, label [[LEFT:%.]], label [[RIGHT:%.]]
	; CHECK: left:			; CHECK: left:
	; CHECK-NEXT: [[SROA_EXTRACT1:%.*]] = extractelement <4 x i32> [[SROA_BC]], i32 0
	; CHECK-NEXT: br label [[TAIL:%.*]]			; CHECK-NEXT: br label [[TAIL:%.*]]
	; CHECK: right:			; CHECK: right:
	; CHECK-NEXT: [[SROA_EXTRACT:%.*]] = extractelement <4 x i32> [[SROA_BC]], i32 0
	; CHECK-NEXT: br label [[TAIL]]			; CHECK-NEXT: br label [[TAIL]]
	; CHECK: tail:			; CHECK: tail:
	; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[SROA_EXTRACT1]], [[LEFT]] ], [ [[SROA_EXTRACT]], [[RIGHT]] ]			; CHECK-NEXT: [[WIDEN_EXTRACT:%.*]] = extractelement <4 x i32> [[SROA_BC]], i32 0
	; CHECK-NEXT: ret i32 [[I]]			; CHECK-NEXT: ret i32 [[WIDEN_EXTRACT]]
	;			;
	entry:			entry:
	%sroa = shufflevector <16 x i8> %in, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>			%sroa = shufflevector <16 x i8> %in, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
	br i1 undef, label %left, label %right			br i1 undef, label %left, label %right
	left:			left:
	%lhs = bitcast <4 x i8> %sroa to i32			%lhs = bitcast <4 x i8> %sroa to i32
	br label %tail			br label %tail
	right:			right:
	Show All 19 Lines

test/Transforms/InstCombine/x86-avx512.ll

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py		; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S \| FileCheck %s		; RUN: opt < %s -instcombine -S \| FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"		target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

declare <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32)		declare <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32)

define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) {		define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @test_add_ss(		; CHECK-LABEL: @test_add_ss(
; CHECK-NEXT: [[TMP1:%.]] = extractelement <4 x float> [[A:%.]], i64 0		; CHECK-NEXT: [[TMP1:%.]] = shufflevector <4 x float> [[B:%.]], <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP2:%.]] = extractelement <4 x float> [[B:%.]], i32 0		; CHECK-NEXT: [[WIDEN_VECT:%.]] = fadd <4 x float> [[TMP1]], [[A:%.]]
; CHECK-NEXT: [[TMP3:%.*]] = fadd float [[TMP1]], [[TMP2]]		; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[WIDEN_VECT]], <4 x i32> <i32 4, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[A]], float [[TMP3]], i64 0		; CHECK-NEXT: ret <4 x float> [[TMP2]]
; CHECK-NEXT: ret <4 x float> [[TMP4]]
;		;
%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1		%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2		%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3		%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
%4 = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 4)		%4 = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 4)
ret <4 x float> %4		ret <4 x float> %4
}		}

define <4 x float> @test_add_ss_round(<4 x float> %a, <4 x float> %b) {		define <4 x float> @test_add_ss_round(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @test_add_ss_round(		; CHECK-LABEL: @test_add_ss_round(
; CHECK-NEXT: [[TMP1:%.]] = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A:%.]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8)		; CHECK-NEXT: [[TMP1:%.]] = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A:%.]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8)
; CHECK-NEXT: ret <4 x float> [[TMP1]]		; CHECK-NEXT: ret <4 x float> [[TMP1]]
;		;
%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1		%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2		%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3		%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
%4 = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 8)		%4 = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 8)
ret <4 x float> %4		ret <4 x float> %4
}		}

define <4 x float> @test_add_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {		define <4 x float> @test_add_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
; CHECK-LABEL: @test_add_ss_mask(		; CHECK-LABEL: @test_add_ss_mask(
; CHECK-NEXT: [[TMP1:%.]] = extractelement <4 x float> [[A:%.]], i64 0		; CHECK-NEXT: [[WIDEN_VECT:%.]] = fadd <4 x float> [[A:%.]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.]] = extractelement <4 x float> [[B:%.]], i64 0		; CHECK-NEXT: [[WIDEN_EXTRACT:%.*]] = extractelement <4 x float> [[WIDEN_VECT]], i64 0
; CHECK-NEXT: [[TMP3:%.*]] = fadd float [[TMP1]], [[TMP2]]		; CHECK-NEXT: [[TMP1:%.]] = bitcast i8 [[MASK:%.]] to <8 x i1>
; CHECK-NEXT: [[TMP4:%.]] = bitcast i8 [[MASK:%.]] to <8 x i1>		; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i64 0
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0		; CHECK-NEXT: [[TMP3:%.]] = extractelement <4 x float> [[C:%.]], i32 0
; CHECK-NEXT: [[TMP6:%.]] = extractelement <4 x float> [[C:%.]], i32 0		; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], float [[WIDEN_EXTRACT]], float [[TMP3]]
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]]		; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[A]], float [[TMP4]], i64 0
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0		; CHECK-NEXT: ret <4 x float> [[TMP5]]
; CHECK-NEXT: ret <4 x float> [[TMP8]]
;		;
%1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1		%1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2		%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3		%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
%4 = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4)		%4 = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4)
ret <4 x float> %4		ret <4 x float> %4
}		}

Show All 25 Lines	;
%10 = extractelement <4 x float> %9, i32 1		%10 = extractelement <4 x float> %9, i32 1
ret float %10		ret float %10
}		}

declare <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32)		declare <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32)

define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) {		define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @test_add_sd(		; CHECK-LABEL: @test_add_sd(
; CHECK-NEXT: [[TMP1:%.]] = extractelement <2 x double> [[A:%.]], i64 0		; CHECK-NEXT: [[WIDEN_VECT:%.]] = fadd <2 x double> [[A:%.]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.]] = extractelement <2 x double> [[B:%.]], i64 0		; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[WIDEN_VECT]], <2 x i32> <i32 2, i32 1>
; CHECK-NEXT: [[TMP3:%.*]] = fadd double [[TMP1]], [[TMP2]]		; CHECK-NEXT: ret <2 x double> [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[A]], double [[TMP3]], i64 0
; CHECK-NEXT: ret <2 x double> [[TMP4]]
;		;
%1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1		%1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4)		%2 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4)
ret <2 x double> %2		ret <2 x double> %2
}		}

define <2 x double> @test_add_sd_round(<2 x double> %a, <2 x double> %b) {		define <2 x double> @test_add_sd_round(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @test_add_sd_round(		; CHECK-LABEL: @test_add_sd_round(
; CHECK-NEXT: [[TMP1:%.]] = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A:%.]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 8)		; CHECK-NEXT: [[TMP1:%.]] = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A:%.]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 8)
; CHECK-NEXT: ret <2 x double> [[TMP1]]		; CHECK-NEXT: ret <2 x double> [[TMP1]]
;		;
%1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1		%1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 8)		%2 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 8)
ret <2 x double> %2		ret <2 x double> %2
}		}

define <2 x double> @test_add_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {		define <2 x double> @test_add_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
; CHECK-LABEL: @test_add_sd_mask(		; CHECK-LABEL: @test_add_sd_mask(
; CHECK-NEXT: [[TMP1:%.]] = extractelement <2 x double> [[A:%.]], i64 0		; CHECK-NEXT: [[WIDEN_VECT:%.]] = fadd <2 x double> [[A:%.]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.]] = extractelement <2 x double> [[B:%.]], i64 0		; CHECK-NEXT: [[WIDEN_EXTRACT:%.*]] = extractelement <2 x double> [[WIDEN_VECT]], i64 0
; CHECK-NEXT: [[TMP3:%.*]] = fadd double [[TMP1]], [[TMP2]]		; CHECK-NEXT: [[TMP1:%.]] = bitcast i8 [[MASK:%.]] to <8 x i1>
; CHECK-NEXT: [[TMP4:%.]] = bitcast i8 [[MASK:%.]] to <8 x i1>		; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i64 0
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0		; CHECK-NEXT: [[TMP3:%.]] = extractelement <2 x double> [[C:%.]], i64 0
; CHECK-NEXT: [[TMP6:%.]] = extractelement <2 x double> [[C:%.]], i64 0		; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], double [[WIDEN_EXTRACT]], double [[TMP3]]
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], double [[TMP3]], double [[TMP6]]		; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[A]], double [[TMP4]], i64 0
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0		; CHECK-NEXT: ret <2 x double> [[TMP5]]
; CHECK-NEXT: ret <2 x double> [[TMP8]]
;		;
%1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1		%1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)		%2 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
ret <2 x double> %2		ret <2 x double> %2
}		}

define <2 x double> @test_add_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {		define <2 x double> @test_add_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
; CHECK-LABEL: @test_add_sd_mask_round(		; CHECK-LABEL: @test_add_sd_mask_round(
Show All 17 Lines	;
%6 = extractelement <2 x double> %5, i32 1		%6 = extractelement <2 x double> %5, i32 1
ret double %6		ret double %6
}		}

declare <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32)		declare <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32)

define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) {		define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @test_sub_ss(		; CHECK-LABEL: @test_sub_ss(
; CHECK-NEXT: [[TMP1:%.]] = extractelement <4 x float> [[A:%.]], i64 0		; CHECK-NEXT: [[TMP1:%.]] = shufflevector <4 x float> [[B:%.]], <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP2:%.]] = extractelement <4 x float> [[B:%.]], i32 0		; CHECK-NEXT: [[WIDEN_VECT:%.]] = fsub <4 x float> [[A:%.]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = fsub float [[TMP1]], [[TMP2]]		; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[WIDEN_VECT]], <4 x i32> <i32 4, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[A]], float [[TMP3]], i64 0		; CHECK-NEXT: ret <4 x float> [[TMP2]]
; CHECK-NEXT: ret <4 x float> [[TMP4]]
;		;
%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1		%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2		%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3		%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
%4 = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 4)		%4 = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 4)
ret <4 x float> %4		ret <4 x float> %4
}		}

define <4 x float> @test_sub_ss_round(<4 x float> %a, <4 x float> %b) {		define <4 x float> @test_sub_ss_round(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @test_sub_ss_round(		; CHECK-LABEL: @test_sub_ss_round(
; CHECK-NEXT: [[TMP1:%.]] = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> [[A:%.]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8)		; CHECK-NEXT: [[TMP1:%.]] = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> [[A:%.]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8)
; CHECK-NEXT: ret <4 x float> [[TMP1]]		; CHECK-NEXT: ret <4 x float> [[TMP1]]
;		;
%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1		%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2		%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3		%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
%4 = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 8)		%4 = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 8)
ret <4 x float> %4		ret <4 x float> %4
}		}

define <4 x float> @test_sub_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {		define <4 x float> @test_sub_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
; CHECK-LABEL: @test_sub_ss_mask(		; CHECK-LABEL: @test_sub_ss_mask(
; CHECK-NEXT: [[TMP1:%.]] = extractelement <4 x float> [[A:%.]], i64 0		; CHECK-NEXT: [[WIDEN_VECT:%.]] = fsub <4 x float> [[A:%.]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.]] = extractelement <4 x float> [[B:%.]], i64 0		; CHECK-NEXT: [[WIDEN_EXTRACT:%.*]] = extractelement <4 x float> [[WIDEN_VECT]], i64 0
; CHECK-NEXT: [[TMP3:%.*]] = fsub float [[TMP1]], [[TMP2]]		; CHECK-NEXT: [[TMP1:%.]] = bitcast i8 [[MASK:%.]] to <8 x i1>
; CHECK-NEXT: [[TMP4:%.]] = bitcast i8 [[MASK:%.]] to <8 x i1>		; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i64 0
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0		; CHECK-NEXT: [[TMP3:%.]] = extractelement <4 x float> [[C:%.]], i32 0
; CHECK-NEXT: [[TMP6:%.]] = extractelement <4 x float> [[C:%.]], i32 0		; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], float [[WIDEN_EXTRACT]], float [[TMP3]]
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]]		; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[A]], float [[TMP4]], i64 0
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0		; CHECK-NEXT: ret <4 x float> [[TMP5]]
; CHECK-NEXT: ret <4 x float> [[TMP8]]
;		;
%1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1		%1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2		%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3		%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
%4 = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4)		%4 = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4)
ret <4 x float> %4		ret <4 x float> %4
}		}

Show All 25 Lines	;
%10 = extractelement <4 x float> %9, i32 1		%10 = extractelement <4 x float> %9, i32 1
ret float %10		ret float %10
}		}

declare <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32)		declare <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32)

define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) {		define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @test_sub_sd(		; CHECK-LABEL: @test_sub_sd(
; CHECK-NEXT: [[TMP1:%.]] = extractelement <2 x double> [[A:%.]], i64 0		; CHECK-NEXT: [[WIDEN_VECT:%.]] = fsub <2 x double> [[A:%.]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.]] = extractelement <2 x double> [[B:%.]], i64 0		; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[WIDEN_VECT]], <2 x i32> <i32 2, i32 1>
; CHECK-NEXT: [[TMP3:%.*]] = fsub double [[TMP1]], [[TMP2]]		; CHECK-NEXT: ret <2 x double> [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[A]], double [[TMP3]], i64 0
; CHECK-NEXT: ret <2 x double> [[TMP4]]
;		;
%1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1		%1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4)		%2 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4)
ret <2 x double> %2		ret <2 x double> %2
}		}

define <2 x double> @test_sub_sd_round(<2 x double> %a, <2 x double> %b) {		define <2 x double> @test_sub_sd_round(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @test_sub_sd_round(		; CHECK-LABEL: @test_sub_sd_round(
; CHECK-NEXT: [[TMP1:%.]] = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> [[A:%.]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 8)		; CHECK-NEXT: [[TMP1:%.]] = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> [[A:%.]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 8)
; CHECK-NEXT: ret <2 x double> [[TMP1]]		; CHECK-NEXT: ret <2 x double> [[TMP1]]
;		;
%1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1		%1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 8)		%2 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 8)
ret <2 x double> %2		ret <2 x double> %2
}		}

define <2 x double> @test_sub_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {		define <2 x double> @test_sub_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
; CHECK-LABEL: @test_sub_sd_mask(		; CHECK-LABEL: @test_sub_sd_mask(
; CHECK-NEXT: [[TMP1:%.]] = extractelement <2 x double> [[A:%.]], i64 0		; CHECK-NEXT: [[WIDEN_VECT:%.]] = fsub <2 x double> [[A:%.]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.]] = extractelement <2 x double> [[B:%.]], i64 0		; CHECK-NEXT: [[WIDEN_EXTRACT:%.*]] = extractelement <2 x double> [[WIDEN_VECT]], i64 0
; CHECK-NEXT: [[TMP3:%.*]] = fsub double [[TMP1]], [[TMP2]]		; CHECK-NEXT: [[TMP1:%.]] = bitcast i8 [[MASK:%.]] to <8 x i1>
; CHECK-NEXT: [[TMP4:%.]] = bitcast i8 [[MASK:%.]] to <8 x i1>		; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i64 0
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0		; CHECK-NEXT: [[TMP3:%.]] = extractelement <2 x double> [[C:%.]], i64 0
; CHECK-NEXT: [[TMP6:%.]] = extractelement <2 x double> [[C:%.]], i64 0		; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], double [[WIDEN_EXTRACT]], double [[TMP3]]
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], double [[TMP3]], double [[TMP6]]		; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[A]], double [[TMP4]], i64 0
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0		; CHECK-NEXT: ret <2 x double> [[TMP5]]
; CHECK-NEXT: ret <2 x double> [[TMP8]]
;		;
%1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1		%1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)		%2 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
ret <2 x double> %2		ret <2 x double> %2
}		}

define <2 x double> @test_sub_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {		define <2 x double> @test_sub_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
; CHECK-LABEL: @test_sub_sd_mask_round(		; CHECK-LABEL: @test_sub_sd_mask_round(
Show All 17 Lines	;
%6 = extractelement <2 x double> %5, i32 1		%6 = extractelement <2 x double> %5, i32 1
ret double %6		ret double %6
}		}

declare <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32)		declare <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32)

define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) {		define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @test_mul_ss(		; CHECK-LABEL: @test_mul_ss(
; CHECK-NEXT: [[TMP1:%.]] = extractelement <4 x float> [[A:%.]], i64 0		; CHECK-NEXT: [[TMP1:%.]] = shufflevector <4 x float> [[B:%.]], <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP2:%.]] = extractelement <4 x float> [[B:%.]], i32 0		; CHECK-NEXT: [[WIDEN_VECT:%.]] = fmul <4 x float> [[TMP1]], [[A:%.]]
; CHECK-NEXT: [[TMP3:%.*]] = fmul float [[TMP1]], [[TMP2]]		; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[WIDEN_VECT]], <4 x i32> <i32 4, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[A]], float [[TMP3]], i64 0		; CHECK-NEXT: ret <4 x float> [[TMP2]]
; CHECK-NEXT: ret <4 x float> [[TMP4]]
;		;
%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1		%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2		%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3		%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
%4 = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 4)		%4 = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 4)
ret <4 x float> %4		ret <4 x float> %4
}		}

define <4 x float> @test_mul_ss_round(<4 x float> %a, <4 x float> %b) {		define <4 x float> @test_mul_ss_round(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @test_mul_ss_round(		; CHECK-LABEL: @test_mul_ss_round(
; CHECK-NEXT: [[TMP1:%.]] = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> [[A:%.]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8)		; CHECK-NEXT: [[TMP1:%.]] = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> [[A:%.]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8)
; CHECK-NEXT: ret <4 x float> [[TMP1]]		; CHECK-NEXT: ret <4 x float> [[TMP1]]
;		;
%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1		%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2		%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3		%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
%4 = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 8)		%4 = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 8)
ret <4 x float> %4		ret <4 x float> %4
}		}

define <4 x float> @test_mul_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {		define <4 x float> @test_mul_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
; CHECK-LABEL: @test_mul_ss_mask(		; CHECK-LABEL: @test_mul_ss_mask(
; CHECK-NEXT: [[TMP1:%.]] = extractelement <4 x float> [[A:%.]], i64 0		; CHECK-NEXT: [[WIDEN_VECT:%.]] = fmul <4 x float> [[A:%.]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.]] = extractelement <4 x float> [[B:%.]], i64 0		; CHECK-NEXT: [[WIDEN_EXTRACT:%.*]] = extractelement <4 x float> [[WIDEN_VECT]], i64 0
; CHECK-NEXT: [[TMP3:%.*]] = fmul float [[TMP1]], [[TMP2]]		; CHECK-NEXT: [[TMP1:%.]] = bitcast i8 [[MASK:%.]] to <8 x i1>
; CHECK-NEXT: [[TMP4:%.]] = bitcast i8 [[MASK:%.]] to <8 x i1>		; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i64 0
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0		; CHECK-NEXT: [[TMP3:%.]] = extractelement <4 x float> [[C:%.]], i32 0
; CHECK-NEXT: [[TMP6:%.]] = extractelement <4 x float> [[C:%.]], i32 0		; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], float [[WIDEN_EXTRACT]], float [[TMP3]]
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]]		; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[A]], float [[TMP4]], i64 0
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0		; CHECK-NEXT: ret <4 x float> [[TMP5]]
; CHECK-NEXT: ret <4 x float> [[TMP8]]
;		;
%1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1		%1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2		%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3		%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
%4 = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4)		%4 = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4)
ret <4 x float> %4		ret <4 x float> %4
}		}

Show All 25 Lines	;
%10 = extractelement <4 x float> %9, i32 1		%10 = extractelement <4 x float> %9, i32 1
ret float %10		ret float %10
}		}

declare <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32)		declare <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32)

define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) {		define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @test_mul_sd(		; CHECK-LABEL: @test_mul_sd(
; CHECK-NEXT: [[TMP1:%.]] = extractelement <2 x double> [[A:%.]], i64 0		; CHECK-NEXT: [[WIDEN_VECT:%.]] = fmul <2 x double> [[A:%.]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.]] = extractelement <2 x double> [[B:%.]], i64 0		; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[WIDEN_VECT]], <2 x i32> <i32 2, i32 1>
; CHECK-NEXT: [[TMP3:%.*]] = fmul double [[TMP1]], [[TMP2]]		; CHECK-NEXT: ret <2 x double> [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[A]], double [[TMP3]], i64 0
; CHECK-NEXT: ret <2 x double> [[TMP4]]
;		;
%1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1		%1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4)		%2 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4)
ret <2 x double> %2		ret <2 x double> %2
}		}

define <2 x double> @test_mul_sd_round(<2 x double> %a, <2 x double> %b) {		define <2 x double> @test_mul_sd_round(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @test_mul_sd_round(		; CHECK-LABEL: @test_mul_sd_round(
; CHECK-NEXT: [[TMP1:%.]] = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> [[A:%.]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 8)		; CHECK-NEXT: [[TMP1:%.]] = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> [[A:%.]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 8)
; CHECK-NEXT: ret <2 x double> [[TMP1]]		; CHECK-NEXT: ret <2 x double> [[TMP1]]
;		;
%1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1		%1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 8)		%2 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 8)
ret <2 x double> %2		ret <2 x double> %2
}		}

define <2 x double> @test_mul_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {		define <2 x double> @test_mul_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
; CHECK-LABEL: @test_mul_sd_mask(		; CHECK-LABEL: @test_mul_sd_mask(
; CHECK-NEXT: [[TMP1:%.]] = extractelement <2 x double> [[A:%.]], i64 0		; CHECK-NEXT: [[WIDEN_VECT:%.]] = fmul <2 x double> [[A:%.]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.]] = extractelement <2 x double> [[B:%.]], i64 0		; CHECK-NEXT: [[WIDEN_EXTRACT:%.*]] = extractelement <2 x double> [[WIDEN_VECT]], i64 0
; CHECK-NEXT: [[TMP3:%.*]] = fmul double [[TMP1]], [[TMP2]]		; CHECK-NEXT: [[TMP1:%.]] = bitcast i8 [[MASK:%.]] to <8 x i1>
; CHECK-NEXT: [[TMP4:%.]] = bitcast i8 [[MASK:%.]] to <8 x i1>		; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i64 0
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0		; CHECK-NEXT: [[TMP3:%.]] = extractelement <2 x double> [[C:%.]], i64 0
; CHECK-NEXT: [[TMP6:%.]] = extractelement <2 x double> [[C:%.]], i64 0		; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], double [[WIDEN_EXTRACT]], double [[TMP3]]
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], double [[TMP3]], double [[TMP6]]		; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[A]], double [[TMP4]], i64 0
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0		; CHECK-NEXT: ret <2 x double> [[TMP5]]
; CHECK-NEXT: ret <2 x double> [[TMP8]]
;		;
%1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1		%1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)		%2 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
ret <2 x double> %2		ret <2 x double> %2
}		}

define <2 x double> @test_mul_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {		define <2 x double> @test_mul_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
; CHECK-LABEL: @test_mul_sd_mask_round(		; CHECK-LABEL: @test_mul_sd_mask_round(
Show All 17 Lines	;
%6 = extractelement <2 x double> %5, i32 1		%6 = extractelement <2 x double> %5, i32 1
ret double %6		ret double %6
}		}

declare <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32)		declare <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32)

define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) {		define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @test_div_ss(		; CHECK-LABEL: @test_div_ss(
; CHECK-NEXT: [[TMP1:%.]] = extractelement <4 x float> [[A:%.]], i64 0		; CHECK-NEXT: [[TMP1:%.]] = shufflevector <4 x float> [[B:%.]], <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP2:%.]] = extractelement <4 x float> [[B:%.]], i32 0		; CHECK-NEXT: [[WIDEN_VECT:%.]] = fdiv <4 x float> [[A:%.]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = fdiv float [[TMP1]], [[TMP2]]		; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[WIDEN_VECT]], <4 x i32> <i32 4, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[A]], float [[TMP3]], i64 0		; CHECK-NEXT: ret <4 x float> [[TMP2]]
; CHECK-NEXT: ret <4 x float> [[TMP4]]
;		;
%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1		%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2		%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3		%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
%4 = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 4)		%4 = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 4)
ret <4 x float> %4		ret <4 x float> %4
}		}

define <4 x float> @test_div_ss_round(<4 x float> %a, <4 x float> %b) {		define <4 x float> @test_div_ss_round(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @test_div_ss_round(		; CHECK-LABEL: @test_div_ss_round(
; CHECK-NEXT: [[TMP1:%.]] = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> [[A:%.]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8)		; CHECK-NEXT: [[TMP1:%.]] = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> [[A:%.]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8)
; CHECK-NEXT: ret <4 x float> [[TMP1]]		; CHECK-NEXT: ret <4 x float> [[TMP1]]
;		;
%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1		%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2		%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3		%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
%4 = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 8)		%4 = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 8)
ret <4 x float> %4		ret <4 x float> %4
}		}

define <4 x float> @test_div_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {		define <4 x float> @test_div_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
; CHECK-LABEL: @test_div_ss_mask(		; CHECK-LABEL: @test_div_ss_mask(
; CHECK-NEXT: [[TMP1:%.]] = extractelement <4 x float> [[A:%.]], i64 0		; CHECK-NEXT: [[WIDEN_VECT:%.]] = fdiv <4 x float> [[A:%.]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.]] = extractelement <4 x float> [[B:%.]], i64 0		; CHECK-NEXT: [[WIDEN_EXTRACT:%.*]] = extractelement <4 x float> [[WIDEN_VECT]], i64 0
; CHECK-NEXT: [[TMP3:%.*]] = fdiv float [[TMP1]], [[TMP2]]		; CHECK-NEXT: [[TMP1:%.]] = bitcast i8 [[MASK:%.]] to <8 x i1>
; CHECK-NEXT: [[TMP4:%.]] = bitcast i8 [[MASK:%.]] to <8 x i1>		; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i64 0
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0		; CHECK-NEXT: [[TMP3:%.]] = extractelement <4 x float> [[C:%.]], i32 0
; CHECK-NEXT: [[TMP6:%.]] = extractelement <4 x float> [[C:%.]], i32 0		; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], float [[WIDEN_EXTRACT]], float [[TMP3]]
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]]		; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[A]], float [[TMP4]], i64 0
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0		; CHECK-NEXT: ret <4 x float> [[TMP5]]
; CHECK-NEXT: ret <4 x float> [[TMP8]]
;		;
%1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1		%1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2		%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3		%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
%4 = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4)		%4 = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4)
ret <4 x float> %4		ret <4 x float> %4
}		}

Show All 25 Lines	;
%10 = extractelement <4 x float> %9, i32 1		%10 = extractelement <4 x float> %9, i32 1
ret float %10		ret float %10
}		}

declare <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32)		declare <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32)

define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) {		define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @test_div_sd(		; CHECK-LABEL: @test_div_sd(
; CHECK-NEXT: [[TMP1:%.]] = extractelement <2 x double> [[A:%.]], i64 0		; CHECK-NEXT: [[WIDEN_VECT:%.]] = fdiv <2 x double> [[A:%.]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.]] = extractelement <2 x double> [[B:%.]], i64 0		; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[WIDEN_VECT]], <2 x i32> <i32 2, i32 1>
; CHECK-NEXT: [[TMP3:%.*]] = fdiv double [[TMP1]], [[TMP2]]		; CHECK-NEXT: ret <2 x double> [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[A]], double [[TMP3]], i64 0
; CHECK-NEXT: ret <2 x double> [[TMP4]]
;		;
%1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1		%1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4)		%2 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4)
ret <2 x double> %2		ret <2 x double> %2
}		}

define <2 x double> @test_div_sd_round(<2 x double> %a, <2 x double> %b) {		define <2 x double> @test_div_sd_round(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @test_div_sd_round(		; CHECK-LABEL: @test_div_sd_round(
; CHECK-NEXT: [[TMP1:%.]] = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> [[A:%.]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 8)		; CHECK-NEXT: [[TMP1:%.]] = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> [[A:%.]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 8)
; CHECK-NEXT: ret <2 x double> [[TMP1]]		; CHECK-NEXT: ret <2 x double> [[TMP1]]
;		;
%1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1		%1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 8)		%2 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 8)
ret <2 x double> %2		ret <2 x double> %2
}		}

define <2 x double> @test_div_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {		define <2 x double> @test_div_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
; CHECK-LABEL: @test_div_sd_mask(		; CHECK-LABEL: @test_div_sd_mask(
; CHECK-NEXT: [[TMP1:%.]] = extractelement <2 x double> [[A:%.]], i64 0		; CHECK-NEXT: [[WIDEN_VECT:%.]] = fdiv <2 x double> [[A:%.]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.]] = extractelement <2 x double> [[B:%.]], i64 0		; CHECK-NEXT: [[WIDEN_EXTRACT:%.*]] = extractelement <2 x double> [[WIDEN_VECT]], i64 0
; CHECK-NEXT: [[TMP3:%.*]] = fdiv double [[TMP1]], [[TMP2]]		; CHECK-NEXT: [[TMP1:%.]] = bitcast i8 [[MASK:%.]] to <8 x i1>
; CHECK-NEXT: [[TMP4:%.]] = bitcast i8 [[MASK:%.]] to <8 x i1>		; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i64 0
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0		; CHECK-NEXT: [[TMP3:%.]] = extractelement <2 x double> [[C:%.]], i64 0
; CHECK-NEXT: [[TMP6:%.]] = extractelement <2 x double> [[C:%.]], i64 0		; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], double [[WIDEN_EXTRACT]], double [[TMP3]]
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], double [[TMP3]], double [[TMP6]]		; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[A]], double [[TMP4]], i64 0
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0		; CHECK-NEXT: ret <2 x double> [[TMP5]]
; CHECK-NEXT: ret <2 x double> [[TMP8]]
;		;
%1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1		%1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)		%2 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
ret <2 x double> %2		ret <2 x double> %2
}		}

define <2 x double> @test_div_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {		define <2 x double> @test_div_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
; CHECK-LABEL: @test_div_sd_mask_round(		; CHECK-LABEL: @test_div_sd_mask_round(
▲ Show 20 Lines • Show All 2,242 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[InstCombine] PR32078: convert scalar operations to vector.AbandonedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 95331

lib/Transforms/InstCombine/InstCombineAddSub.cpp

lib/Transforms/InstCombine/InstCombineAndOrXor.cpp

lib/Transforms/InstCombine/InstCombineCompares.cpp

lib/Transforms/InstCombine/InstCombineInternal.h

lib/Transforms/InstCombine/InstCombineMulDivRem.cpp

lib/Transforms/InstCombine/InstCombinePHI.cpp

lib/Transforms/InstCombine/InstCombineShifts.cpp

lib/Transforms/InstCombine/InstructionCombining.cpp

test/Transforms/InstCombine/bitcast-bigendian.ll

test/Transforms/InstCombine/bitcast.ll

test/Transforms/InstCombine/type_pun.ll

test/Transforms/InstCombine/x86-avx512.ll

[InstCombine] PR32078: convert scalar operations to vector.
AbandonedPublic