Diff 37474

lib/Target/X86/InstPrinter/X86InstComments.cpp

Show First 20 Lines • Show All 101 Lines • ▼ Show 20 Lines	static void getZeroExtensionTypes(const MCInst *MI, MVT &SrcVT, MVT &DstVT) {
case X86::VPMOVZXDQYrm:		case X86::VPMOVZXDQYrm:
case X86::VPMOVZXDQYrr:		case X86::VPMOVZXDQYrr:
SrcVT = MVT::v4i32;		SrcVT = MVT::v4i32;
DstVT = MVT::v4i64;		DstVT = MVT::v4i64;
break;		break;
}		}
}		}

		#define CASE_VSHUF_COMMON(Inst, Suffix, src2) \
		case X86::VSHUFF##Inst##Suffix##r##src2##i: \
		case X86::VSHUFF##Inst##Suffix##r##src2##ik: \
		case X86::VSHUFF##Inst##Suffix##r##src2##ikz: \
		case X86::VSHUFI##Inst##Suffix##r##src2##i: \
		case X86::VSHUFI##Inst##Suffix##r##src2##ik: \
		case X86::VSHUFI##Inst##Suffix##r##src2##ikz:

		#define CASE_VSHUF(Inst) \
		CASE_VSHUF_COMMON(Inst, Z, r) \
		CASE_VSHUF_COMMON(Inst, Z, m) \
		CASE_VSHUF_COMMON(Inst, Z256, r) \
		CASE_VSHUF_COMMON(Inst, Z256, m) \

		/// \brief Extracts the types and if it has memory operand for a given
		/// (SHUFF32x4/SHUFF64x2/SHUFI32x4/SHUFI64x2) instruction.
		static void getVSHUF64x2FamilyInfo(const MCInst *MI, MVT &VT, bool &HasMemOp) {
		HasMemOp = false;
		switch (MI->getOpcode()) {
		default:
		llvm_unreachable("Unknown VSHUF64x2 family instructions.");
		break;
		CASE_VSHUF_COMMON(64X2, Z, m)
		HasMemOp = true; // FALL THROUGH.
		CASE_VSHUF_COMMON(64X2, Z, r)
		VT = MVT::v8i64;
		break;
		CASE_VSHUF_COMMON(64X2, Z256, m)
		HasMemOp = true; // FALL THROUGH.
		CASE_VSHUF_COMMON(64X2, Z256, r)
		VT = MVT::v4i64;
		break;
		CASE_VSHUF_COMMON(32X4, Z, m)
		HasMemOp = true; // FALL THROUGH.
		CASE_VSHUF_COMMON(32X4, Z, r)
		VT = MVT::v16i32;
		break;
		CASE_VSHUF_COMMON(32X4, Z256, m)
		HasMemOp = true; // FALL THROUGH.
		CASE_VSHUF_COMMON(32X4, Z256, r)
		VT = MVT::v8i32;
		break;
		}
		}

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// Top Level Entrypoint		// Top Level Entrypoint
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

/// EmitAnyX86InstComments - This function decodes x86 instructions and prints		/// EmitAnyX86InstComments - This function decodes x86 instructions and prints
/// newline terminated strings to the specified string if desired. This		/// newline terminated strings to the specified string if desired. This
/// information is shown in disassembly dumps when verbose assembly is enabled.		/// information is shown in disassembly dumps when verbose assembly is enabled.
bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,		bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
▲ Show 20 Lines • Show All 603 Lines • ▼ Show 20 Lines	bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
case X86::VSHUFPSYrmi:		case X86::VSHUFPSYrmi:
if (MI->getOperand(MI->getNumOperands() - 1).isImm())		if (MI->getOperand(MI->getNumOperands() - 1).isImm())
DecodeSHUFPMask(MVT::v8f32,		DecodeSHUFPMask(MVT::v8f32,
MI->getOperand(MI->getNumOperands() - 1).getImm(),		MI->getOperand(MI->getNumOperands() - 1).getImm(),
ShuffleMask);		ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());		Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());		DestName = getRegName(MI->getOperand(0).getReg());
break;		break;
		CASE_VSHUF(64X2)
		CASE_VSHUF(32X4) {
		MVT VT;
		bool HasMemOp;
		unsigned NumOp = MI->getNumOperands();
		getVSHUF64x2FamilyInfo(MI, VT, HasMemOp);
		decodeVSHUF64x2FamilyMask(VT, MI->getOperand(NumOp - 1).getImm(),
		ShuffleMask);
		DestName = getRegName(MI->getOperand(0).getReg());
		if (HasMemOp) {
		assert((NumOp >= 8) && "Expected at least 8 operands!");
		Src1Name = getRegName(MI->getOperand(NumOp - 7).getReg());
		} else {
		assert((NumOp >= 4) && "Expected at least 4 operands!");
		Src2Name = getRegName(MI->getOperand(NumOp - 2).getReg());
		Src1Name = getRegName(MI->getOperand(NumOp - 3).getReg());
		}
		break;
		}
		delenaUnsubmitted Done Reply Inline Actions You can use a macro to hide the bunch of "cases" and handle 256-bit form of these instructions. You also can distinguish between memory and register forms in one boolean variable. delena: You can use a macro to hide the bunch of "cases" and handle 256-bit form of these instructions.
		delenaUnsubmitted Done Reply Inline Actions case_VSHUF(64x2) case_VSHUF(32x4) delena: case_VSHUF(64x2) case_VSHUF(32x4)
case X86::UNPCKLPDrr:		case X86::UNPCKLPDrr:
case X86::VUNPCKLPDrr:		case X86::VUNPCKLPDrr:
Src2Name = getRegName(MI->getOperand(2).getReg());		Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.		// FALL THROUGH.
case X86::UNPCKLPDrm:		case X86::UNPCKLPDrm:
case X86::VUNPCKLPDrm:		case X86::VUNPCKLPDrm:
DecodeUNPCKLMask(MVT::v2f64, ShuffleMask);		DecodeUNPCKLMask(MVT::v2f64, ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());		Src1Name = getRegName(MI->getOperand(1).getReg());
▲ Show 20 Lines • Show All 328 Lines • Show Last 20 Lines

lib/Target/X86/Utils/X86ShuffleDecode.h

Show First 20 Lines • Show All 80 Lines • ▼ Show 20 Lines	void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask,
SmallVectorImpl<int> &ShuffleMask);		SmallVectorImpl<int> &ShuffleMask);

/// \brief Decode a BLEND immediate mask into a shuffle mask.		/// \brief Decode a BLEND immediate mask into a shuffle mask.
void DecodeBLENDMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);		void DecodeBLENDMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);

void DecodeVPERM2X128Mask(MVT VT, unsigned Imm,		void DecodeVPERM2X128Mask(MVT VT, unsigned Imm,
SmallVectorImpl<int> &ShuffleMask);		SmallVectorImpl<int> &ShuffleMask);

		/// \brief Decode a shuffle packed values at 128-bit granularity
		/// immediate mask into a shuffle mask.
		void decodeVSHUF64x2FamilyMask(MVT VT, unsigned Imm,
		RKSimonUnsubmitted Done Reply Inline Actions Shouldn't this be called DecodeVSHUF64x2Mask? RKSimon: Shouldn't this be called DecodeVSHUF64x2Mask?
		igorbAuthorUnsubmitted Not Done Reply Inline Actions This function decode mask for SHUFF32x4/SHUFF64x2/SHUFI32x4/SHUFI64x2 instructions. igorb: This function decode mask for SHUFF32x4/SHUFF64x2/SHUFI32x4/SHUFI64x2 instructions.
		RKSimonUnsubmitted Done Reply Inline Actions What I meant was the Decode functions in this file tend to be named after the x86 instruction itself and not the shuffle / ISD type. AFAICT all of these use the VSHUFF64x2 / VSHUFI64x2 instruction - that's why I suggested calling it DecodeVSHUF64x2Mask. RKSimon: What I meant was the Decode functions in this file tend to be named after the x86 instruction…
		SmallVectorImpl<int> &ShuffleMask);

/// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD.		/// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD.
/// No VT provided since it only works on 256-bit, 4 element vectors.		/// No VT provided since it only works on 256-bit, 4 element vectors.
void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);		void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);

/// \brief Decode a VPERMILP variable mask from an IR-level vector constant.		/// \brief Decode a VPERMILP variable mask from an IR-level vector constant.
void DecodeVPERMILPMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask);		void DecodeVPERMILPMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask);

/// \brief Decode a zero extension instruction as a shuffle mask.		/// \brief Decode a zero extension instruction as a shuffle mask.
Show All 36 Lines

lib/Target/X86/Utils/X86ShuffleDecode.cpp

Show First 20 Lines • Show All 258 Lines • ▼ Show 20 Lines	void DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
for (unsigned l = 0; l != NumElts; l += NumLaneElts) {		for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
for (unsigned i = l, e = l + NumLaneElts / 2; i != e; ++i) {		for (unsigned i = l, e = l + NumLaneElts / 2; i != e; ++i) {
ShuffleMask.push_back(i); // Reads from dest/src1		ShuffleMask.push_back(i); // Reads from dest/src1
ShuffleMask.push_back(i + NumElts); // Reads from src/src2		ShuffleMask.push_back(i + NumElts); // Reads from src/src2
}		}
}		}
}		}

		/// \brief Decode a shuffle packed values at 128-bit granularity
		/// (SHUFF32x4/SHUFF64x2/SHUFI32x4/SHUFI64x2)
		/// immediate mask into a shuffle mask.
		void decodeVSHUF64x2FamilyMask(MVT VT, unsigned Imm,
		SmallVectorImpl<int> &ShuffleMask) {
		unsigned NumLanes = VT.getSizeInBits() / 128;
		unsigned NumElementsInLane = 128 / VT.getScalarSizeInBits();
		delenaUnsubmitted Done Reply Inline Actions you can use VT.getScalarSizeInBits() delena: you can use VT.getScalarSizeInBits()
		unsigned ControlBitsMask = NumLanes - 1;
		unsigned NumControlBits = NumLanes / 2;

		for (unsigned l = 0; l != NumLanes; ++l) {
		unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
		// We actually need the other source.
		if (l >= NumLanes / 2)
		LaneMask += NumLanes;
		for (unsigned i = 0; i != NumElementsInLane; ++i)
		ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
		}
		}

void DecodeVPERM2X128Mask(MVT VT, unsigned Imm,		void DecodeVPERM2X128Mask(MVT VT, unsigned Imm,
SmallVectorImpl<int> &ShuffleMask) {		SmallVectorImpl<int> &ShuffleMask) {
unsigned HalfSize = VT.getVectorNumElements() / 2;		unsigned HalfSize = VT.getVectorNumElements() / 2;

for (unsigned l = 0; l != 2; ++l) {		for (unsigned l = 0; l != 2; ++l) {
unsigned HalfMask = Imm >> (l * 4);		unsigned HalfMask = Imm >> (l * 4);
unsigned HalfBegin = (HalfMask & 0x3) * HalfSize;		unsigned HalfBegin = (HalfMask & 0x3) * HalfSize;
for (unsigned i = HalfBegin, e = HalfBegin + HalfSize; i != e; ++i)		for (unsigned i = HalfBegin, e = HalfBegin + HalfSize; i != e; ++i)
▲ Show 20 Lines • Show All 313 Lines • Show Last 20 Lines

lib/Target/X86/X86ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 10,707 Lines • ▼ Show 20 Lines	static SDValue lower256BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
case MVT::v32i8:		case MVT::v32i8:
return lowerV32I8VectorShuffle(Op, V1, V2, Subtarget, DAG);		return lowerV32I8VectorShuffle(Op, V1, V2, Subtarget, DAG);

default:		default:
llvm_unreachable("Not a valid 256-bit x86 vector type!");		llvm_unreachable("Not a valid 256-bit x86 vector type!");
}		}
}		}

		/// \brief Try to lower a vector shuffle as a 128-bit shuffles.
		static SDValue lowerV4X128VectorShuffle(SDLoc DL, MVT VT,
		ArrayRef<int> Mask,
		SDValue V1, SDValue V2,
		SelectionDAG &DAG) {
		assert(VT.getScalarSizeInBits() == 64 &&
		delenaUnsubmitted Done Reply Inline Actions VT.getScalarSizeInBits() delena: VT.getScalarSizeInBits()
		"Unexpected element type size for 128bit shuffle.");

		// To handle 256 bit vector requires VLX and most probably
		// function lowerV2X128VectorShuffle() is better solution.
		assert(VT.getSizeInBits() == 512 &&
		"Unexpected vector size for 128bit shuffle.");
		RKSimonUnsubmitted Not Done Reply Inline Actions Is this actually necessary? I'd expect it to have been dealt with by canonicalization in lowerVectorShuffle. RKSimon: Is this actually necessary? I'd expect it to have been dealt with by canonicalization in…
		igorbAuthorUnsubmitted Not Done Reply Inline Actions I can't do canonicalization for 256bit vector, lowerV2X128VectorShuffle can't be moved to lowerVectorShuffle, a lot of test failed. igorb: I can't do canonicalization for 256bit vector, lowerV2X128VectorShuffle can't be moved to…
		RKSimonUnsubmitted Done Reply Inline Actions OK. RKSimon: OK.

		SmallVector<int, 4> WidenedMask;
		if (!canWidenShuffleElements(Mask, WidenedMask))
		return SDValue();

		// Form a 128-bit permutation.
		// Convert the 64-bit shuffle mask selection values into 128-bit selection
		// bits defined by a vshuf64x2 instruction's immediate control byte.
		unsigned PermMask = 0, Imm = 0;
		unsigned ControlBitsNum = WidenedMask.size() / 2;

		for (int i = 0, Size = WidenedMask.size(); i < Size; ++i) {
		if (WidenedMask[i] == SM_SentinelZero)
		return SDValue();

		// Use first element in place of undef mask.
		delenaUnsubmitted Done Reply Inline Actions mask delena: mask
		Imm = (WidenedMask[i] == SM_SentinelUndef) ? 0 : WidenedMask[i];
		PermMask \|= (Imm % WidenedMask.size()) << (i * ControlBitsNum);
		}

		return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2,
		DAG.getConstant(PermMask, DL, MVT::i8));
		}

static SDValue lowerVectorShuffleWithPERMV(SDLoc DL, MVT VT,		static SDValue lowerVectorShuffleWithPERMV(SDLoc DL, MVT VT,
ArrayRef<int> Mask, SDValue V1,		ArrayRef<int> Mask, SDValue V1,
SDValue V2, SelectionDAG &DAG) {		SDValue V2, SelectionDAG &DAG) {

assert(VT.getScalarSizeInBits() >= 16 && "Unexpected data type for PERMV");		assert(VT.getScalarSizeInBits() >= 16 && "Unexpected data type for PERMV");

MVT MaskEltVT = MVT::getIntegerVT(VT.getScalarSizeInBits());		MVT MaskEltVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
MVT MaskVecVT = MVT::getVectorVT(MaskEltVT, VT.getVectorNumElements());		MVT MaskVecVT = MVT::getVectorVT(MaskEltVT, VT.getVectorNumElements());
Show All 16 Lines	static SDValue lowerV8F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
SelectionDAG &DAG) {		SelectionDAG &DAG) {
SDLoc DL(Op);		SDLoc DL(Op);
assert(V1.getSimpleValueType() == MVT::v8f64 && "Bad operand type!");		assert(V1.getSimpleValueType() == MVT::v8f64 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v8f64 && "Bad operand type!");		assert(V2.getSimpleValueType() == MVT::v8f64 && "Bad operand type!");
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);		ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> Mask = SVOp->getMask();		ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");		assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");

		if (SDValue Shuf128 =
		lowerV4X128VectorShuffle(DL, MVT::v8f64, Mask, V1, V2, DAG))
		return Shuf128;

if (SDValue Unpck =		if (SDValue Unpck =
lowerVectorShuffleWithUNPCK(DL, MVT::v8f64, Mask, V1, V2, DAG))		lowerVectorShuffleWithUNPCK(DL, MVT::v8f64, Mask, V1, V2, DAG))
return Unpck;		return Unpck;

return lowerVectorShuffleWithPERMV(DL, MVT::v8f64, Mask, V1, V2, DAG);		return lowerVectorShuffleWithPERMV(DL, MVT::v8f64, Mask, V1, V2, DAG);
}		}

/// \brief Handle lowering of 16-lane 32-bit floating point shuffles.		/// \brief Handle lowering of 16-lane 32-bit floating point shuffles.
Show All 20 Lines	static SDValue lowerV8I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
SelectionDAG &DAG) {		SelectionDAG &DAG) {
SDLoc DL(Op);		SDLoc DL(Op);
assert(V1.getSimpleValueType() == MVT::v8i64 && "Bad operand type!");		assert(V1.getSimpleValueType() == MVT::v8i64 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v8i64 && "Bad operand type!");		assert(V2.getSimpleValueType() == MVT::v8i64 && "Bad operand type!");
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);		ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> Mask = SVOp->getMask();		ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");		assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");

		if (SDValue Shuf128 =
		lowerV4X128VectorShuffle(DL, MVT::v8i64, Mask, V1, V2, DAG))
		return Shuf128;

if (SDValue Unpck =		if (SDValue Unpck =
lowerVectorShuffleWithUNPCK(DL, MVT::v8i64, Mask, V1, V2, DAG))		lowerVectorShuffleWithUNPCK(DL, MVT::v8i64, Mask, V1, V2, DAG))
return Unpck;		return Unpck;

return lowerVectorShuffleWithPERMV(DL, MVT::v8i64, Mask, V1, V2, DAG);		return lowerVectorShuffleWithPERMV(DL, MVT::v8i64, Mask, V1, V2, DAG);
}		}

/// \brief Handle lowering of 16-lane 32-bit integer shuffles.		/// \brief Handle lowering of 16-lane 32-bit integer shuffles.
▲ Show 20 Lines • Show All 16,438 Lines • Show Last 20 Lines

test/CodeGen/X86/avx512-intrinsics.ll

	Show First 20 Lines • Show All 4,156 Lines • ▼ Show 20 Lines

	declare <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float>, <16 x float>, i32, <16 x float>, i16)			declare <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float>, <16 x float>, i32, <16 x float>, i16)

	define <16 x float>@test_int_x86_avx512_mask_shuf_f32x4(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) {			define <16 x float>@test_int_x86_avx512_mask_shuf_f32x4(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) {
	; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f32x4:			; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f32x4:
	; CHECK: ## BB#0:			; CHECK: ## BB#0:
	; CHECK-NEXT: kmovw %edi, %k1			; CHECK-NEXT: kmovw %edi, %k1
	; CHECK-NEXT: vshuff32x4 $22, %zmm1, %zmm0, %zmm2 {%k1}			; CHECK-NEXT: vshuff32x4 $22, %zmm1, %zmm0, %zmm2 {%k1}
				; CHECK-NEXT: ## zmm2 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
	; CHECK-NEXT: vshuff32x4 $22, %zmm1, %zmm0, %zmm0			; CHECK-NEXT: vshuff32x4 $22, %zmm1, %zmm0, %zmm0
				; CHECK-NEXT: ## zmm0 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
	; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0			; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%res = call <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 %x4)			%res = call <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 %x4)
	%res1 = call <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1)			%res1 = call <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1)
	%res2 = fadd <16 x float> %res, %res1			%res2 = fadd <16 x float> %res, %res1
	ret <16 x float> %res2			ret <16 x float> %res2
	}			}

	declare <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double>, <8 x double>, i32, <8 x double>, i8)			declare <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double>, <8 x double>, i32, <8 x double>, i8)

	define <8 x double>@test_int_x86_avx512_mask_shuf_f64x2(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) {			define <8 x double>@test_int_x86_avx512_mask_shuf_f64x2(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) {
	; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f64x2:			; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f64x2:
	; CHECK: ## BB#0:			; CHECK: ## BB#0:
	; CHECK-NEXT: movzbl %dil, %eax			; CHECK-NEXT: movzbl %dil, %eax
	; CHECK-NEXT: kmovw %eax, %k1			; CHECK-NEXT: kmovw %eax, %k1
	; CHECK-NEXT: vshuff64x2 $22, %zmm1, %zmm0, %zmm2 {%k1}			; CHECK-NEXT: vshuff64x2 $22, %zmm1, %zmm0, %zmm2 {%k1}
				; CHECK-NEXT: ## zmm2 = zmm0[4,5,2,3],zmm1[2,3,0,1]
	; CHECK-NEXT: vshuff64x2 $22, %zmm1, %zmm0, %zmm3 {%k1} {z}			; CHECK-NEXT: vshuff64x2 $22, %zmm1, %zmm0, %zmm3 {%k1} {z}
				; CHECK-NEXT: ## zmm3 = zmm0[4,5,2,3],zmm1[2,3,0,1]
	; CHECK-NEXT: vshuff64x2 $22, %zmm1, %zmm0, %zmm0			; CHECK-NEXT: vshuff64x2 $22, %zmm1, %zmm0, %zmm0
				; CHECK-NEXT: ## zmm0 = zmm0[4,5,2,3],zmm1[2,3,0,1]
	; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0			; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
	; CHECK-NEXT: vaddpd %zmm3, %zmm0, %zmm0			; CHECK-NEXT: vaddpd %zmm3, %zmm0, %zmm0
				RKSimonUnsubmitted Done Reply Inline Actions Why isn't this CHECK-NEXT any more? RKSimon: Why isn't this CHECK-NEXT any more?
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
				RKSimonUnsubmitted Done Reply Inline Actions Why isn't this CHECK-NEXT any more? RKSimon: Why isn't this CHECK-NEXT any more?
	%res = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 %x4)			%res = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 %x4)
	%res1 = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 -1)			%res1 = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 -1)
	%res2 = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> zeroinitializer, i8 %x4)			%res2 = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> zeroinitializer, i8 %x4)

	%res3 = fadd <8 x double> %res, %res1			%res3 = fadd <8 x double> %res, %res1
	%res4 = fadd <8 x double> %res3, %res2			%res4 = fadd <8 x double> %res3, %res2
	ret <8 x double> %res4			ret <8 x double> %res4
	}			}

	declare <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16)			declare <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16)

	define <16 x i32>@test_int_x86_avx512_mask_shuf_i32x4(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) {			define <16 x i32>@test_int_x86_avx512_mask_shuf_i32x4(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) {
	; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i32x4:			; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i32x4:
	; CHECK: ## BB#0:			; CHECK: ## BB#0:
	; CHECK-NEXT: kmovw %edi, %k1			; CHECK-NEXT: kmovw %edi, %k1
	; CHECK-NEXT: vshufi32x4 $22, %zmm1, %zmm0, %zmm2 {%k1}			; CHECK-NEXT: vshufi32x4 $22, %zmm1, %zmm0, %zmm2 {%k1}
				; CHECK-NEXT: ## zmm2 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
	; CHECK-NEXT: vshufi32x4 $22, %zmm1, %zmm0, %zmm0			; CHECK-NEXT: vshufi32x4 $22, %zmm1, %zmm0, %zmm0
				; CHECK-NEXT: ## zmm0 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
	; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0			; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%res = call <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 %x4)			%res = call <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 %x4)
	%res1 = call <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 -1)			%res1 = call <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 -1)
	%res2 = add <16 x i32> %res, %res1			%res2 = add <16 x i32> %res, %res1
	ret <16 x i32> %res2			ret <16 x i32> %res2
	}			}

	declare <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8)			declare <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8)

	define <8 x i64>@test_int_x86_avx512_mask_shuf_i64x2(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) {			define <8 x i64>@test_int_x86_avx512_mask_shuf_i64x2(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) {
	; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i64x2:			; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i64x2:
	; CHECK: ## BB#0:			; CHECK: ## BB#0:
	; CHECK-NEXT: movzbl %dil, %eax			; CHECK-NEXT: movzbl %dil, %eax
	; CHECK-NEXT: kmovw %eax, %k1			; CHECK-NEXT: kmovw %eax, %k1
	; CHECK-NEXT: vshufi64x2 $22, %zmm1, %zmm0, %zmm2 {%k1}			; CHECK-NEXT: vshufi64x2 $22, %zmm1, %zmm0, %zmm2 {%k1}
				; CHECK-NEXT: ## zmm2 = zmm0[4,5,2,3],zmm1[2,3,0,1]
	; CHECK-NEXT: vshufi64x2 $22, %zmm1, %zmm0, %zmm0			; CHECK-NEXT: vshufi64x2 $22, %zmm1, %zmm0, %zmm0
				; CHECK-NEXT: ## zmm0 = zmm0[4,5,2,3],zmm1[2,3,0,1]
	; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0			; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%res = call <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 %x4)			%res = call <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 %x4)
				RKSimonUnsubmitted Done Reply Inline Actions Why isn't this CHECK-NEXT any more? RKSimon: Why isn't this CHECK-NEXT any more?
	%res1 = call <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 -1)			%res1 = call <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 -1)
	%res2 = add <8 x i64> %res, %res1			%res2 = add <8 x i64> %res, %res1
	ret <8 x i64> %res2			ret <8 x i64> %res2
	}			}

	declare <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double>, i32, <8 x double>, i8, i32)			declare <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double>, i32, <8 x double>, i8, i32)

	define <8 x double>@test_int_x86_avx512_mask_getmant_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) {			define <8 x double>@test_int_x86_avx512_mask_getmant_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) {
	▲ Show 20 Lines • Show All 312 Lines • Show Last 20 Lines

test/CodeGen/X86/avx512vl-intrinsics.ll

Show First 20 Lines • Show All 1,861 Lines • ▼ Show 20 Lines	define <4 x i32> @test_mask_xor_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
%vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0		%vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
%b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer		%b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
%res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)		%res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
ret <4 x i32> %res		ret <4 x i32> %res
}		}

define <4 x i32> @test_mask_xor_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {		define <4 x i32> @test_mask_xor_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
;CHECK-LABEL: test_mask_xor_epi32_rmbkz_128		;CHECK-LABEL: test_mask_xor_epi32_rmbkz_128
;CHECK: vpxord (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xef,0x07]		;CHECK: vpxord (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xef,0x07]
%q = load i32, i32* %ptr_b		%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0		%vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
%b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer		%b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
%res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)		%res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
ret <4 x i32> %res		ret <4 x i32> %res
}		}

declare <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)		declare <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
▲ Show 20 Lines • Show All 415 Lines • ▼ Show 20 Lines	define <8 x float> @test_mm512_maskz_add_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
;CHECK-LABEL: test_mm512_maskz_add_ps_256		;CHECK-LABEL: test_mm512_maskz_add_ps_256
;CHECK: vaddps %ymm1, %ymm0, %ymm0 {%k1} {z}		;CHECK: vaddps %ymm1, %ymm0, %ymm0 {%k1} {z}
%res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)		%res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
ret <8 x float> %res		ret <8 x float> %res
}		}

define <8 x float> @test_mm512_mask_add_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {		define <8 x float> @test_mm512_mask_add_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
;CHECK-LABEL: test_mm512_mask_add_ps_256		;CHECK-LABEL: test_mm512_mask_add_ps_256
;CHECK: vaddps %ymm1, %ymm0, %ymm2 {%k1}		;CHECK: vaddps %ymm1, %ymm0, %ymm2 {%k1}
%res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)		%res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
ret <8 x float> %res		ret <8 x float> %res
}		}

define <8 x float> @test_mm512_add_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {		define <8 x float> @test_mm512_add_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
;CHECK-LABEL: test_mm512_add_ps_256		;CHECK-LABEL: test_mm512_add_ps_256
;CHECK: vaddps %ymm1, %ymm0, %ymm0		;CHECK: vaddps %ymm1, %ymm0, %ymm0
%res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)		%res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
ret <8 x float> %res		ret <8 x float> %res
}		}
declare <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)		declare <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)

define <4 x float> @test_mm512_maskz_add_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {		define <4 x float> @test_mm512_maskz_add_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
;CHECK-LABEL: test_mm512_maskz_add_ps_128		;CHECK-LABEL: test_mm512_maskz_add_ps_128
;CHECK: vaddps %xmm1, %xmm0, %xmm0 {%k1} {z}		;CHECK: vaddps %xmm1, %xmm0, %xmm0 {%k1} {z}
%res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)		%res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
ret <4 x float> %res		ret <4 x float> %res
}		}

define <4 x float> @test_mm512_mask_add_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {		define <4 x float> @test_mm512_mask_add_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
;CHECK-LABEL: test_mm512_mask_add_ps_128		;CHECK-LABEL: test_mm512_mask_add_ps_128
;CHECK: vaddps %xmm1, %xmm0, %xmm2 {%k1}		;CHECK: vaddps %xmm1, %xmm0, %xmm2 {%k1}
%res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)		%res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
ret <4 x float> %res		ret <4 x float> %res
}		}

define <4 x float> @test_mm512_add_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {		define <4 x float> @test_mm512_add_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
;CHECK-LABEL: test_mm512_add_ps_128		;CHECK-LABEL: test_mm512_add_ps_128
;CHECK: vaddps %xmm1, %xmm0, %xmm0		;CHECK: vaddps %xmm1, %xmm0, %xmm0
%res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)		%res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
ret <4 x float> %res		ret <4 x float> %res
}		}
declare <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)		declare <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)

define <8 x float> @test_mm512_maskz_sub_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {		define <8 x float> @test_mm512_maskz_sub_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
;CHECK-LABEL: test_mm512_maskz_sub_ps_256		;CHECK-LABEL: test_mm512_maskz_sub_ps_256
;CHECK: vsubps %ymm1, %ymm0, %ymm0 {%k1} {z}		;CHECK: vsubps %ymm1, %ymm0, %ymm0 {%k1} {z}
%res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)		%res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
ret <8 x float> %res		ret <8 x float> %res
}		}

define <8 x float> @test_mm512_mask_sub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {		define <8 x float> @test_mm512_mask_sub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
;CHECK-LABEL: test_mm512_mask_sub_ps_256		;CHECK-LABEL: test_mm512_mask_sub_ps_256
;CHECK: vsubps %ymm1, %ymm0, %ymm2 {%k1}		;CHECK: vsubps %ymm1, %ymm0, %ymm2 {%k1}
%res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)		%res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
ret <8 x float> %res		ret <8 x float> %res
}		}

define <8 x float> @test_mm512_sub_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {		define <8 x float> @test_mm512_sub_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
;CHECK-LABEL: test_mm512_sub_ps_256		;CHECK-LABEL: test_mm512_sub_ps_256
;CHECK: vsubps %ymm1, %ymm0, %ymm0		;CHECK: vsubps %ymm1, %ymm0, %ymm0
%res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)		%res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
ret <8 x float> %res		ret <8 x float> %res
}		}
declare <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)		declare <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)

define <4 x float> @test_mm512_maskz_sub_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {		define <4 x float> @test_mm512_maskz_sub_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
;CHECK-LABEL: test_mm512_maskz_sub_ps_128		;CHECK-LABEL: test_mm512_maskz_sub_ps_128
;CHECK: vsubps %xmm1, %xmm0, %xmm0 {%k1} {z}		;CHECK: vsubps %xmm1, %xmm0, %xmm0 {%k1} {z}
%res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)		%res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
ret <4 x float> %res		ret <4 x float> %res
}		}

define <4 x float> @test_mm512_mask_sub_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {		define <4 x float> @test_mm512_mask_sub_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
;CHECK-LABEL: test_mm512_mask_sub_ps_128		;CHECK-LABEL: test_mm512_mask_sub_ps_128
;CHECK: vsubps %xmm1, %xmm0, %xmm2 {%k1}		;CHECK: vsubps %xmm1, %xmm0, %xmm2 {%k1}
%res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)		%res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
ret <4 x float> %res		ret <4 x float> %res
}		}

define <4 x float> @test_mm512_sub_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {		define <4 x float> @test_mm512_sub_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
;CHECK-LABEL: test_mm512_sub_ps_128		;CHECK-LABEL: test_mm512_sub_ps_128
;CHECK: vsubps %xmm1, %xmm0, %xmm0		;CHECK: vsubps %xmm1, %xmm0, %xmm0
%res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)		%res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
ret <4 x float> %res		ret <4 x float> %res
}		}
declare <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)		declare <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)

define <8 x float> @test_mm512_maskz_mul_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {		define <8 x float> @test_mm512_maskz_mul_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
;CHECK-LABEL: test_mm512_maskz_mul_ps_256		;CHECK-LABEL: test_mm512_maskz_mul_ps_256
;CHECK: vmulps %ymm1, %ymm0, %ymm0 {%k1} {z}		;CHECK: vmulps %ymm1, %ymm0, %ymm0 {%k1} {z}
%res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)		%res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
ret <8 x float> %res		ret <8 x float> %res
}		}

define <8 x float> @test_mm512_mask_mul_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {		define <8 x float> @test_mm512_mask_mul_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
;CHECK-LABEL: test_mm512_mask_mul_ps_256		;CHECK-LABEL: test_mm512_mask_mul_ps_256
;CHECK: vmulps %ymm1, %ymm0, %ymm2 {%k1}		;CHECK: vmulps %ymm1, %ymm0, %ymm2 {%k1}
%res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)		%res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
ret <8 x float> %res		ret <8 x float> %res
}		}

define <8 x float> @test_mm512_mul_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {		define <8 x float> @test_mm512_mul_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
;CHECK-LABEL: test_mm512_mul_ps_256		;CHECK-LABEL: test_mm512_mul_ps_256
;CHECK: vmulps %ymm1, %ymm0, %ymm0		;CHECK: vmulps %ymm1, %ymm0, %ymm0
%res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)		%res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
ret <8 x float> %res		ret <8 x float> %res
}		}
declare <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)		declare <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)

define <4 x float> @test_mm512_maskz_mul_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {		define <4 x float> @test_mm512_maskz_mul_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
;CHECK-LABEL: test_mm512_maskz_mul_ps_128		;CHECK-LABEL: test_mm512_maskz_mul_ps_128
;CHECK: vmulps %xmm1, %xmm0, %xmm0 {%k1} {z}		;CHECK: vmulps %xmm1, %xmm0, %xmm0 {%k1} {z}
%res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)		%res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
ret <4 x float> %res		ret <4 x float> %res
}		}

define <4 x float> @test_mm512_mask_mul_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {		define <4 x float> @test_mm512_mask_mul_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
;CHECK-LABEL: test_mm512_mask_mul_ps_128		;CHECK-LABEL: test_mm512_mask_mul_ps_128
;CHECK: vmulps %xmm1, %xmm0, %xmm2 {%k1}		;CHECK: vmulps %xmm1, %xmm0, %xmm2 {%k1}
%res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)		%res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
ret <4 x float> %res		ret <4 x float> %res
}		}

define <4 x float> @test_mm512_mul_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {		define <4 x float> @test_mm512_mul_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
;CHECK-LABEL: test_mm512_mul_ps_128		;CHECK-LABEL: test_mm512_mul_ps_128
;CHECK: vmulps %xmm1, %xmm0, %xmm0		;CHECK: vmulps %xmm1, %xmm0, %xmm0
%res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)		%res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
ret <4 x float> %res		ret <4 x float> %res
}		}
declare <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)		declare <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)

define <8 x float> @test_mm512_maskz_div_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {		define <8 x float> @test_mm512_maskz_div_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
;CHECK-LABEL: test_mm512_maskz_div_ps_256		;CHECK-LABEL: test_mm512_maskz_div_ps_256
;CHECK: vdivps %ymm1, %ymm0, %ymm0 {%k1} {z}		;CHECK: vdivps %ymm1, %ymm0, %ymm0 {%k1} {z}
%res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)		%res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
ret <8 x float> %res		ret <8 x float> %res
}		}

define <8 x float> @test_mm512_mask_div_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {		define <8 x float> @test_mm512_mask_div_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
;CHECK-LABEL: test_mm512_mask_div_ps_256		;CHECK-LABEL: test_mm512_mask_div_ps_256
;CHECK: vdivps %ymm1, %ymm0, %ymm2 {%k1}		;CHECK: vdivps %ymm1, %ymm0, %ymm2 {%k1}
%res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)		%res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
ret <8 x float> %res		ret <8 x float> %res
}		}

define <8 x float> @test_mm512_div_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {		define <8 x float> @test_mm512_div_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
;CHECK-LABEL: test_mm512_div_ps_256		;CHECK-LABEL: test_mm512_div_ps_256
;CHECK: vdivps %ymm1, %ymm0, %ymm0		;CHECK: vdivps %ymm1, %ymm0, %ymm0
%res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)		%res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
ret <8 x float> %res		ret <8 x float> %res
}		}
declare <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)		declare <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)

define <4 x float> @test_mm512_maskz_div_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {		define <4 x float> @test_mm512_maskz_div_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
;CHECK-LABEL: test_mm512_maskz_div_ps_128		;CHECK-LABEL: test_mm512_maskz_div_ps_128
;CHECK: vdivps %xmm1, %xmm0, %xmm0 {%k1} {z}		;CHECK: vdivps %xmm1, %xmm0, %xmm0 {%k1} {z}
%res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)		%res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
ret <4 x float> %res		ret <4 x float> %res
}		}

define <4 x float> @test_mm512_mask_div_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {		define <4 x float> @test_mm512_mask_div_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
;CHECK-LABEL: test_mm512_mask_div_ps_128		;CHECK-LABEL: test_mm512_mask_div_ps_128
;CHECK: vdivps %xmm1, %xmm0, %xmm2 {%k1}		;CHECK: vdivps %xmm1, %xmm0, %xmm2 {%k1}
%res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)		%res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
ret <4 x float> %res		ret <4 x float> %res
}		}

define <4 x float> @test_mm512_div_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {		define <4 x float> @test_mm512_div_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
;CHECK-LABEL: test_mm512_div_ps_128		;CHECK-LABEL: test_mm512_div_ps_128
;CHECK: vdivps %xmm1, %xmm0, %xmm0		;CHECK: vdivps %xmm1, %xmm0, %xmm0
%res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)		%res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
ret <4 x float> %res		ret <4 x float> %res
}		}
declare <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)		declare <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)

define <8 x float> @test_mm512_maskz_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {		define <8 x float> @test_mm512_maskz_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
;CHECK-LABEL: test_mm512_maskz_max_ps_256		;CHECK-LABEL: test_mm512_maskz_max_ps_256
;CHECK: vmaxps %ymm1, %ymm0, %ymm0 {%k1} {z}		;CHECK: vmaxps %ymm1, %ymm0, %ymm0 {%k1} {z}
%res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)		%res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
ret <8 x float> %res		ret <8 x float> %res
}		}

define <8 x float> @test_mm512_mask_max_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {		define <8 x float> @test_mm512_mask_max_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
;CHECK-LABEL: test_mm512_mask_max_ps_256		;CHECK-LABEL: test_mm512_mask_max_ps_256
;CHECK: vmaxps %ymm1, %ymm0, %ymm2 {%k1}		;CHECK: vmaxps %ymm1, %ymm0, %ymm2 {%k1}
%res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)		%res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
ret <8 x float> %res		ret <8 x float> %res
}		}

define <8 x float> @test_mm512_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {		define <8 x float> @test_mm512_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
;CHECK-LABEL: test_mm512_max_ps_256		;CHECK-LABEL: test_mm512_max_ps_256
;CHECK: vmaxps %ymm1, %ymm0, %ymm0		;CHECK: vmaxps %ymm1, %ymm0, %ymm0
%res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)		%res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
ret <8 x float> %res		ret <8 x float> %res
}		}
declare <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)		declare <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)

define <4 x float> @test_mm512_maskz_max_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {		define <4 x float> @test_mm512_maskz_max_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
;CHECK-LABEL: test_mm512_maskz_max_ps_128		;CHECK-LABEL: test_mm512_maskz_max_ps_128
;CHECK: vmaxps %xmm1, %xmm0, %xmm0 {%k1} {z}		;CHECK: vmaxps %xmm1, %xmm0, %xmm0 {%k1} {z}
%res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)		%res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
ret <4 x float> %res		ret <4 x float> %res
}		}

define <4 x float> @test_mm512_mask_max_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {		define <4 x float> @test_mm512_mask_max_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
;CHECK-LABEL: test_mm512_mask_max_ps_128		;CHECK-LABEL: test_mm512_mask_max_ps_128
;CHECK: vmaxps %xmm1, %xmm0, %xmm2 {%k1}		;CHECK: vmaxps %xmm1, %xmm0, %xmm2 {%k1}
%res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)		%res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
ret <4 x float> %res		ret <4 x float> %res
}		}

define <4 x float> @test_mm512_max_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {		define <4 x float> @test_mm512_max_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
;CHECK-LABEL: test_mm512_max_ps_128		;CHECK-LABEL: test_mm512_max_ps_128
;CHECK: vmaxps %xmm1, %xmm0, %xmm0		;CHECK: vmaxps %xmm1, %xmm0, %xmm0
%res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)		%res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
ret <4 x float> %res		ret <4 x float> %res
}		}
declare <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)		declare <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)

define <8 x float> @test_mm512_maskz_min_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {		define <8 x float> @test_mm512_maskz_min_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
;CHECK-LABEL: test_mm512_maskz_min_ps_256		;CHECK-LABEL: test_mm512_maskz_min_ps_256
;CHECK: vminps %ymm1, %ymm0, %ymm0 {%k1} {z}		;CHECK: vminps %ymm1, %ymm0, %ymm0 {%k1} {z}
%res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)		%res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
ret <8 x float> %res		ret <8 x float> %res
}		}

define <8 x float> @test_mm512_mask_min_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {		define <8 x float> @test_mm512_mask_min_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
;CHECK-LABEL: test_mm512_mask_min_ps_256		;CHECK-LABEL: test_mm512_mask_min_ps_256
;CHECK: vminps %ymm1, %ymm0, %ymm2 {%k1}		;CHECK: vminps %ymm1, %ymm0, %ymm2 {%k1}
%res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)		%res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
ret <8 x float> %res		ret <8 x float> %res
}		}

define <8 x float> @test_mm512_min_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {		define <8 x float> @test_mm512_min_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
;CHECK-LABEL: test_mm512_min_ps_256		;CHECK-LABEL: test_mm512_min_ps_256
;CHECK: vminps %ymm1, %ymm0, %ymm0		;CHECK: vminps %ymm1, %ymm0, %ymm0
%res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)		%res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
ret <8 x float> %res		ret <8 x float> %res
}		}
declare <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)		declare <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)

define <4 x float> @test_mm512_maskz_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {		define <4 x float> @test_mm512_maskz_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
;CHECK-LABEL: test_mm512_maskz_min_ps_128		;CHECK-LABEL: test_mm512_maskz_min_ps_128
;CHECK: vminps %xmm1, %xmm0, %xmm0 {%k1} {z}		;CHECK: vminps %xmm1, %xmm0, %xmm0 {%k1} {z}
%res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)		%res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
ret <4 x float> %res		ret <4 x float> %res
}		}

define <4 x float> @test_mm512_mask_min_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {		define <4 x float> @test_mm512_mask_min_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
;CHECK-LABEL: test_mm512_mask_min_ps_128		;CHECK-LABEL: test_mm512_mask_min_ps_128
;CHECK: vminps %xmm1, %xmm0, %xmm2 {%k1}		;CHECK: vminps %xmm1, %xmm0, %xmm2 {%k1}
%res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)		%res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
ret <4 x float> %res		ret <4 x float> %res
}		}

define <4 x float> @test_mm512_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {		define <4 x float> @test_mm512_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
;CHECK-LABEL: test_mm512_min_ps_128		;CHECK-LABEL: test_mm512_min_ps_128
;CHECK: vminps %xmm1, %xmm0, %xmm0		;CHECK: vminps %xmm1, %xmm0, %xmm0
%res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)		%res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
Show All 33 Lines	define <8 x float> @test_getexp_ps_256(<8 x float> %a0) {
%res = call <8 x float> @llvm.x86.avx512.mask.getexp.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 -1)		%res = call <8 x float> @llvm.x86.avx512.mask.getexp.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 -1)
ret <8 x float> %res		ret <8 x float> %res
}		}
declare <8 x float> @llvm.x86.avx512.mask.getexp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone		declare <8 x float> @llvm.x86.avx512.mask.getexp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone

declare <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)		declare <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)

; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_d_128		; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_d_128
; CHECK-NOT: call		; CHECK-NOT: call
; CHECK: vpmaxsd %xmm		; CHECK: vpmaxsd %xmm
; CHECK: {%k1}		; CHECK: {%k1}
define <4 x i32>@test_int_x86_avx512_mask_pmaxs_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) {		define <4 x i32>@test_int_x86_avx512_mask_pmaxs_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) {
%res = call <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2 ,i8 %mask)		%res = call <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2 ,i8 %mask)
%res1 = call <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)		%res1 = call <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)
%res2 = add <4 x i32> %res, %res1		%res2 = add <4 x i32> %res, %res1
ret <4 x i32> %res2		ret <4 x i32> %res2
}		}

declare <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)		declare <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)

; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_d_256		; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_d_256
; CHECK-NOT: call		; CHECK-NOT: call
; CHECK: vpmaxsd %ymm		; CHECK: vpmaxsd %ymm
; CHECK: {%k1}		; CHECK: {%k1}
define <8 x i32>@test_int_x86_avx512_mask_pmaxs_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {		define <8 x i32>@test_int_x86_avx512_mask_pmaxs_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
%res = call <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)		%res = call <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
%res1 = call <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)		%res1 = call <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
%res2 = add <8 x i32> %res, %res1		%res2 = add <8 x i32> %res, %res1
ret <8 x i32> %res2		ret <8 x i32> %res2
}		}

declare <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)		declare <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)

; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_q_128		; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_q_128
; CHECK-NOT: call		; CHECK-NOT: call
; CHECK: vpmaxsq %xmm		; CHECK: vpmaxsq %xmm
; CHECK: {%k1}		; CHECK: {%k1}
define <2 x i64>@test_int_x86_avx512_mask_pmaxs_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {		define <2 x i64>@test_int_x86_avx512_mask_pmaxs_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
%res = call <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)		%res = call <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
%res1 = call <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)		%res1 = call <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
%res2 = add <2 x i64> %res, %res1		%res2 = add <2 x i64> %res, %res1
ret <2 x i64> %res2		ret <2 x i64> %res2
}		}

declare <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)		declare <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)

; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_q_256		; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_q_256
; CHECK-NOT: call		; CHECK-NOT: call
; CHECK: vpmaxsq %ymm		; CHECK: vpmaxsq %ymm
; CHECK: {%k1}		; CHECK: {%k1}
define <4 x i64>@test_int_x86_avx512_mask_pmaxs_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {		define <4 x i64>@test_int_x86_avx512_mask_pmaxs_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {
%res = call <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)		%res = call <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)
%res1 = call <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)		%res1 = call <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)
%res2 = add <4 x i64> %res, %res1		%res2 = add <4 x i64> %res, %res1
ret <4 x i64> %res2		ret <4 x i64> %res2
}		}

declare <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)		declare <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)

; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_d_128		; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_d_128
; CHECK-NOT: call		; CHECK-NOT: call
; CHECK: vpmaxud %xmm		; CHECK: vpmaxud %xmm
; CHECK: {%k1}		; CHECK: {%k1}
define <4 x i32>@test_int_x86_avx512_mask_pmaxu_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2,i8 %mask) {		define <4 x i32>@test_int_x86_avx512_mask_pmaxu_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2,i8 %mask) {
%res = call <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask)		%res = call <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask)
%res1 = call <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)		%res1 = call <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)
%res2 = add <4 x i32> %res, %res1		%res2 = add <4 x i32> %res, %res1
ret <4 x i32> %res2		ret <4 x i32> %res2
}		}

declare <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)		declare <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)

; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_d_256		; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_d_256
; CHECK-NOT: call		; CHECK-NOT: call
; CHECK: vpmaxud %ymm		; CHECK: vpmaxud %ymm
; CHECK: {%k1}		; CHECK: {%k1}
define <8 x i32>@test_int_x86_avx512_mask_pmaxu_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {		define <8 x i32>@test_int_x86_avx512_mask_pmaxu_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
%res = call <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)		%res = call <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
%res1 = call <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)		%res1 = call <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
%res2 = add <8 x i32> %res, %res1		%res2 = add <8 x i32> %res, %res1
ret <8 x i32> %res2		ret <8 x i32> %res2
}		}

declare <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)		declare <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)

; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_q_128		; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_q_128
; CHECK-NOT: call		; CHECK-NOT: call
; CHECK: vpmaxuq %xmm		; CHECK: vpmaxuq %xmm
; CHECK: {%k1}		; CHECK: {%k1}
define <2 x i64>@test_int_x86_avx512_mask_pmaxu_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {		define <2 x i64>@test_int_x86_avx512_mask_pmaxu_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
%res = call <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)		%res = call <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
%res1 = call <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)		%res1 = call <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
%res2 = add <2 x i64> %res, %res1		%res2 = add <2 x i64> %res, %res1
ret <2 x i64> %res2		ret <2 x i64> %res2
}		}

declare <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)		declare <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)

; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_q_256		; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_q_256
; CHECK-NOT: call		; CHECK-NOT: call
; CHECK: vpmaxuq %ymm		; CHECK: vpmaxuq %ymm
; CHECK: {%k1}		; CHECK: {%k1}
define <4 x i64>@test_int_x86_avx512_mask_pmaxu_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {		define <4 x i64>@test_int_x86_avx512_mask_pmaxu_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {
%res = call <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)		%res = call <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)
%res1 = call <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)		%res1 = call <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)
%res2 = add <4 x i64> %res, %res1		%res2 = add <4 x i64> %res, %res1
ret <4 x i64> %res2		ret <4 x i64> %res2
}		}

declare <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)		declare <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)

; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_d_128		; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_d_128
; CHECK-NOT: call		; CHECK-NOT: call
; CHECK: vpminsd %xmm		; CHECK: vpminsd %xmm
; CHECK: {%k1}		; CHECK: {%k1}
define <4 x i32>@test_int_x86_avx512_mask_pmins_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) {		define <4 x i32>@test_int_x86_avx512_mask_pmins_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) {
%res = call <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask)		%res = call <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask)
%res1 = call <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)		%res1 = call <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)
%res2 = add <4 x i32> %res, %res1		%res2 = add <4 x i32> %res, %res1
ret <4 x i32> %res2		ret <4 x i32> %res2
}		}

declare <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)		declare <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)

; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_d_256		; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_d_256
; CHECK-NOT: call		; CHECK-NOT: call
; CHECK: vpminsd %ymm		; CHECK: vpminsd %ymm
; CHECK: {%k1}		; CHECK: {%k1}
define <8 x i32>@test_int_x86_avx512_mask_pmins_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {		define <8 x i32>@test_int_x86_avx512_mask_pmins_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
%res = call <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)		%res = call <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
%res1 = call <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)		%res1 = call <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
%res2 = add <8 x i32> %res, %res1		%res2 = add <8 x i32> %res, %res1
ret <8 x i32> %res2		ret <8 x i32> %res2
}		}

declare <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)		declare <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)

; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_q_128		; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_q_128
; CHECK-NOT: call		; CHECK-NOT: call
; CHECK: vpminsq %xmm		; CHECK: vpminsq %xmm
; CHECK: {%k1}		; CHECK: {%k1}
define <2 x i64>@test_int_x86_avx512_mask_pmins_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {		define <2 x i64>@test_int_x86_avx512_mask_pmins_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
%res = call <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)		%res = call <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
%res1 = call <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)		%res1 = call <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
%res2 = add <2 x i64> %res, %res1		%res2 = add <2 x i64> %res, %res1
ret <2 x i64> %res2		ret <2 x i64> %res2
}		}

declare <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)		declare <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)

; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_q_256		; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_q_256
; CHECK-NOT: call		; CHECK-NOT: call
; CHECK: vpminsq %ymm		; CHECK: vpminsq %ymm
; CHECK: {%k1}		; CHECK: {%k1}
define <4 x i64>@test_int_x86_avx512_mask_pmins_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {		define <4 x i64>@test_int_x86_avx512_mask_pmins_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {
%res = call <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)		%res = call <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)
%res1 = call <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)		%res1 = call <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)
%res2 = add <4 x i64> %res, %res1		%res2 = add <4 x i64> %res, %res1
ret <4 x i64> %res2		ret <4 x i64> %res2
}		}

declare <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)		declare <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)

; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_d_128		; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_d_128
; CHECK-NOT: call		; CHECK-NOT: call
; CHECK: vpminud %xmm		; CHECK: vpminud %xmm
; CHECK: {%k1}		; CHECK: {%k1}
define <4 x i32>@test_int_x86_avx512_mask_pminu_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) {		define <4 x i32>@test_int_x86_avx512_mask_pminu_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) {
%res = call <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask)		%res = call <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask)
%res1 = call <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)		%res1 = call <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)
%res2 = add <4 x i32> %res, %res1		%res2 = add <4 x i32> %res, %res1
ret <4 x i32> %res2		ret <4 x i32> %res2
}		}

declare <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)		declare <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)

; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_d_256		; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_d_256
; CHECK-NOT: call		; CHECK-NOT: call
; CHECK: vpminud %ymm		; CHECK: vpminud %ymm
; CHECK: {%k1}		; CHECK: {%k1}
define <8 x i32>@test_int_x86_avx512_mask_pminu_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {		define <8 x i32>@test_int_x86_avx512_mask_pminu_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
%res = call <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)		%res = call <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
%res1 = call <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)		%res1 = call <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
%res2 = add <8 x i32> %res, %res1		%res2 = add <8 x i32> %res, %res1
ret <8 x i32> %res2		ret <8 x i32> %res2
}		}

declare <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)		declare <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)

; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_q_128		; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_q_128
; CHECK-NOT: call		; CHECK-NOT: call
; CHECK: vpminuq %xmm		; CHECK: vpminuq %xmm
; CHECK: {%k1}		; CHECK: {%k1}
define <2 x i64>@test_int_x86_avx512_mask_pminu_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {		define <2 x i64>@test_int_x86_avx512_mask_pminu_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
%res = call <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)		%res = call <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
%res1 = call <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)		%res1 = call <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
%res2 = add <2 x i64> %res, %res1		%res2 = add <2 x i64> %res, %res1
ret <2 x i64> %res2		ret <2 x i64> %res2
}		}

declare <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)		declare <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)

; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_q_256		; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_q_256
; CHECK-NOT: call		; CHECK-NOT: call
; CHECK: vpminuq %ymm		; CHECK: vpminuq %ymm
; CHECK: {%k1}		; CHECK: {%k1}
define <4 x i64>@test_int_x86_avx512_mask_pminu_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {		define <4 x i64>@test_int_x86_avx512_mask_pminu_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {
%res = call <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)		%res = call <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)
%res1 = call <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)		%res1 = call <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)
%res2 = add <4 x i64> %res, %res1		%res2 = add <4 x i64> %res, %res1
ret <4 x i64> %res2		ret <4 x i64> %res2
}		}

declare <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)		declare <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)

; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_d_128		; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_d_128
; CHECK-NOT: call		; CHECK-NOT: call
; CHECK: kmov		; CHECK: kmov
; CHECK: vpermt2d %xmm{{.*}}{%k1}		; CHECK: vpermt2d %xmm{{.*}}{%k1}
; CHECK-NOT: {z}		; CHECK-NOT: {z}
define <4 x i32>@test_int_x86_avx512_mask_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {		define <4 x i32>@test_int_x86_avx512_mask_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
%res = call <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)		%res = call <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
%res1 = call <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)		%res1 = call <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
%res2 = add <4 x i32> %res, %res1		%res2 = add <4 x i32> %res, %res1
ret <4 x i32> %res2		ret <4 x i32> %res2
}		}

declare <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)		declare <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)

; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_d_128		; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_d_128
; CHECK-NOT: call		; CHECK-NOT: call
; CHECK: kmov		; CHECK: kmov
; CHECK: vpermt2d %xmm{{.*}}{%k1} {z}		; CHECK: vpermt2d %xmm{{.*}}{%k1} {z}
define <4 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {		define <4 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
%res = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)		%res = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
%res1 = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)		%res1 = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
%res2 = add <4 x i32> %res, %res1		%res2 = add <4 x i32> %res, %res1
ret <4 x i32> %res2		ret <4 x i32> %res2
}		}

declare <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)		declare <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)

; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_d_256		; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_d_256
; CHECK-NOT: call		; CHECK-NOT: call
; CHECK: kmov		; CHECK: kmov
; CHECK: vpermt2d %ymm{{.*}}{%k1}		; CHECK: vpermt2d %ymm{{.*}}{%k1}
; CHECK-NOT: {z}		; CHECK-NOT: {z}
define <8 x i32>@test_int_x86_avx512_mask_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {		define <8 x i32>@test_int_x86_avx512_mask_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
%res = call <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)		%res = call <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
%res1 = call <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)		%res1 = call <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
%res2 = add <8 x i32> %res, %res1		%res2 = add <8 x i32> %res, %res1
ret <8 x i32> %res2		ret <8 x i32> %res2
}		}

declare <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)		declare <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)

; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_d_256		; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_d_256
; CHECK-NOT: call		; CHECK-NOT: call
; CHECK: kmov		; CHECK: kmov
; CHECK: vpermt2d {{.*}}{%k1} {z}		; CHECK: vpermt2d {{.*}}{%k1} {z}
define <8 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {		define <8 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
%res = call <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)		%res = call <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
%res1 = call <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)		%res1 = call <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
%res2 = add <8 x i32> %res, %res1		%res2 = add <8 x i32> %res, %res1
ret <8 x i32> %res2		ret <8 x i32> %res2
}		}

declare <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double>, <2 x i64>, <2 x double>, i8)		declare <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double>, <2 x i64>, <2 x double>, i8)

; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_pd_128		; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_pd_128
; CHECK-NOT: call		; CHECK-NOT: call
; CHECK: kmov		; CHECK: kmov
; CHECK: vpermi2pd %xmm{{.*}}{%k1}		; CHECK: vpermi2pd %xmm{{.*}}{%k1}
define <2 x double>@test_int_x86_avx512_mask_vpermi2var_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) {		define <2 x double>@test_int_x86_avx512_mask_vpermi2var_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) {
%res = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3)		%res = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3)
%res1 = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 -1)		%res1 = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 -1)
%res2 = fadd <2 x double> %res, %res1		%res2 = fadd <2 x double> %res, %res1
ret <2 x double> %res2		ret <2 x double> %res2
}		}

declare <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double>, <4 x i64>, <4 x double>, i8)		declare <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double>, <4 x i64>, <4 x double>, i8)

; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_pd_256		; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_pd_256
; CHECK-NOT: call		; CHECK-NOT: call
; CHECK: kmov		; CHECK: kmov
; CHECK: vpermi2pd %ymm{{.*}}{%k1}		; CHECK: vpermi2pd %ymm{{.*}}{%k1}
define <4 x double>@test_int_x86_avx512_mask_vpermi2var_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {		define <4 x double>@test_int_x86_avx512_mask_vpermi2var_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {
%res = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3)		%res = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3)
%res1 = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1)		%res1 = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1)
%res2 = fadd <4 x double> %res, %res1		%res2 = fadd <4 x double> %res, %res1
ret <4 x double> %res2		ret <4 x double> %res2
}		}

declare <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float>, <4 x i32>, <4 x float>, i8)		declare <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float>, <4 x i32>, <4 x float>, i8)

; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_ps_128		; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_ps_128
; CHECK-NOT: call		; CHECK-NOT: call
; CHECK: kmov		; CHECK: kmov
; CHECK: vpermi2ps %xmm{{.*}}{%k1}		; CHECK: vpermi2ps %xmm{{.*}}{%k1}
define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) {		define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) {
%res = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3)		%res = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3)
%res1 = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 -1)		%res1 = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 -1)
%res2 = fadd <4 x float> %res, %res1		%res2 = fadd <4 x float> %res, %res1
ret <4 x float> %res2		ret <4 x float> %res2
}		}

declare <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float>, <8 x i32>, <8 x float>, i8)		declare <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float>, <8 x i32>, <8 x float>, i8)

; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_ps_256		; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_ps_256
; CHECK-NOT: call		; CHECK-NOT: call
; CHECK: kmov		; CHECK: kmov
; CHECK: vpermi2ps %ymm{{.*}}{%k1}		; CHECK: vpermi2ps %ymm{{.*}}{%k1}
define <8 x float>@test_int_x86_avx512_mask_vpermi2var_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) {		define <8 x float>@test_int_x86_avx512_mask_vpermi2var_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) {
%res = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3)		%res = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3)
%res1 = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1)		%res1 = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1)
%res2 = fadd <8 x float> %res, %res1		%res2 = fadd <8 x float> %res, %res1
ret <8 x float> %res2		ret <8 x float> %res2
}		}

declare <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64>, <2 x i64>, i8)		declare <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64>, <2 x i64>, i8)

; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_q_128		; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_q_128
; CHECK-NOT: call		; CHECK-NOT: call
; CHECK: kmov		; CHECK: kmov
; CHECK: vpabsq{{.*}}{%k1}		; CHECK: vpabsq{{.*}}{%k1}
define <2 x i64>@test_int_x86_avx512_mask_pabs_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {		define <2 x i64>@test_int_x86_avx512_mask_pabs_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
%res = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)		%res = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
%res1 = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1)		%res1 = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1)
%res2 = add <2 x i64> %res, %res1		%res2 = add <2 x i64> %res, %res1
ret <2 x i64> %res2		ret <2 x i64> %res2
}		}

declare <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64>, <4 x i64>, i8)		declare <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64>, <4 x i64>, i8)

; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_q_256		; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_q_256
; CHECK-NOT: call		; CHECK-NOT: call
; CHECK: kmov		; CHECK: kmov
; CHECK: vpabsq{{.*}}{%k1}		; CHECK: vpabsq{{.*}}{%k1}
define <4 x i64>@test_int_x86_avx512_mask_pabs_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {		define <4 x i64>@test_int_x86_avx512_mask_pabs_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
%res = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)		%res = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
%res1 = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1)		%res1 = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1)
%res2 = add <4 x i64> %res, %res1		%res2 = add <4 x i64> %res, %res1
ret <4 x i64> %res2		ret <4 x i64> %res2
}		}

declare <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32>, <4 x i32>, i8)		declare <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32>, <4 x i32>, i8)

; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_d_128		; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_d_128
; CHECK-NOT: call		; CHECK-NOT: call
; CHECK: kmov		; CHECK: kmov
; CHECK: vpabsd{{.*}}{%k1}		; CHECK: vpabsd{{.*}}{%k1}
define <4 x i32>@test_int_x86_avx512_mask_pabs_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {		define <4 x i32>@test_int_x86_avx512_mask_pabs_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
%res = call <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)		%res = call <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
%res1 = call <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)		%res1 = call <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)
%res2 = add <4 x i32> %res, %res1		%res2 = add <4 x i32> %res, %res1
ret <4 x i32> %res2		ret <4 x i32> %res2
}		}

declare <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32>, <8 x i32>, i8)		declare <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32>, <8 x i32>, i8)

; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_d_256		; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_d_256
; CHECK-NOT: call		; CHECK-NOT: call
; CHECK: kmov		; CHECK: kmov
; CHECK: vpabsd{{.*}}{%k1}		; CHECK: vpabsd{{.*}}{%k1}
define <8 x i32>@test_int_x86_avx512_mask_pabs_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {		define <8 x i32>@test_int_x86_avx512_mask_pabs_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
%res = call <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)		%res = call <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
%res1 = call <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1)		%res1 = call <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1)
%res2 = add <8 x i32> %res, %res1		%res2 = add <8 x i32> %res, %res1
ret <8 x i32> %res2		ret <8 x i32> %res2
}		}


declare <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)		declare <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)

; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_pd_128		; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_pd_128
; CHECK-NOT: call		; CHECK-NOT: call
; CHECK: kmov		; CHECK: kmov
; CHECK: vscalefpd{{.*}}{%k1}		; CHECK: vscalefpd{{.*}}{%k1}
define <2 x double>@test_int_x86_avx512_mask_scalef_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {		define <2 x double>@test_int_x86_avx512_mask_scalef_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
%res = call <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)		%res = call <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
%res1 = call <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)		%res1 = call <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
%res2 = fadd <2 x double> %res, %res1		%res2 = fadd <2 x double> %res, %res1
ret <2 x double> %res2		ret <2 x double> %res2
}		}

declare <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)		declare <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)

; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_pd_256		; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_pd_256
; CHECK-NOT: call		; CHECK-NOT: call
; CHECK: kmov		; CHECK: kmov
; CHECK: vscalefpd{{.*}}{%k1}		; CHECK: vscalefpd{{.*}}{%k1}
define <4 x double>@test_int_x86_avx512_mask_scalef_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {		define <4 x double>@test_int_x86_avx512_mask_scalef_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
%res = call <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)		%res = call <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
%res1 = call <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)		%res1 = call <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
%res2 = fadd <4 x double> %res, %res1		%res2 = fadd <4 x double> %res, %res1
ret <4 x double> %res2		ret <4 x double> %res2
}		}

declare <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)		declare <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_ps_128		; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_ps_128
; CHECK-NOT: call		; CHECK-NOT: call
; CHECK: kmov		; CHECK: kmov
; CHECK: vscalefps{{.*}}{%k1}		; CHECK: vscalefps{{.*}}{%k1}
define <4 x float>@test_int_x86_avx512_mask_scalef_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {		define <4 x float>@test_int_x86_avx512_mask_scalef_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
%res = call <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)		%res = call <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
%res1 = call <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)		%res1 = call <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
%res2 = fadd <4 x float> %res, %res1		%res2 = fadd <4 x float> %res, %res1
ret <4 x float> %res2		ret <4 x float> %res2
}		}

declare <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)		declare <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_ps_256		; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_ps_256
; CHECK-NOT: call		; CHECK-NOT: call
; CHECK: kmov		; CHECK: kmov
; CHECK: vscalefps{{.*}}{%k1}		; CHECK: vscalefps{{.*}}{%k1}
define <8 x float>@test_int_x86_avx512_mask_scalef_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {		define <8 x float>@test_int_x86_avx512_mask_scalef_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
%res = call <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)		%res = call <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
%res1 = call <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)		%res1 = call <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
%res2 = fadd <8 x float> %res, %res1		%res2 = fadd <8 x float> %res, %res1
ret <8 x float> %res2		ret <8 x float> %res2
}		}

declare <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)		declare <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)

define <2 x double>@test_int_x86_avx512_mask_unpckh_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {		define <2 x double>@test_int_x86_avx512_mask_unpckh_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_pd_128:		; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_pd_128:
; CHECK: vunpckhpd %xmm1, %xmm0, %xmm2 {%k1}		; CHECK: vunpckhpd %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x15,0xc1]		; CHECK-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x15,0xc1]
%res = call <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)		%res = call <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
%res1 = call <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)		%res1 = call <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
%res2 = fadd <2 x double> %res, %res1		%res2 = fadd <2 x double> %res, %res1
ret <2 x double> %res2		ret <2 x double> %res2
}		}

declare <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)		declare <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
Show All 20 Lines	; CHECK-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x15,0xc1]
ret <4 x float> %res2		ret <4 x float> %res2
}		}

declare <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)		declare <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)

define <8 x float>@test_int_x86_avx512_mask_unpckh_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {		define <8 x float>@test_int_x86_avx512_mask_unpckh_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_ps_256:		; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_ps_256:
; CHECK: ## BB#0:		; CHECK: ## BB#0:
; CHECK: vunpckhps %ymm1, %ymm0, %ymm2 {%k1}		; CHECK: vunpckhps %ymm1, %ymm0, %ymm2 {%k1}
; CHECK-NEXT: vunpckhps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x15,0xc1]		; CHECK-NEXT: vunpckhps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x15,0xc1]
%res = call <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)		%res = call <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
%res1 = call <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)		%res1 = call <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
%res2 = fadd <8 x float> %res, %res1		%res2 = fadd <8 x float> %res, %res1
ret <8 x float> %res2		ret <8 x float> %res2
}		}

declare <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)		declare <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
▲ Show 20 Lines • Show All 1,394 Lines • ▼ Show 20 Lines	; CHECK-NEXT: retq
%res = call <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 %x2)		%res = call <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 %x2)
%res1 = call <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 -1)		%res1 = call <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 -1)
%res2 = fadd <8 x float> %res, %res1		%res2 = fadd <8 x float> %res, %res1
ret <8 x float> %res2		ret <8 x float> %res2
}		}

declare <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double>, i32, <2 x double>, i8)		declare <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double>, i32, <2 x double>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_rndscale_pd_128		; CHECK-LABEL: @test_int_x86_avx512_mask_rndscale_pd_128
; CHECK-NOT: call		; CHECK-NOT: call
; CHECK: kmov		; CHECK: kmov
; CHECK: vrndscalepd {{.*}}{%k1}		; CHECK: vrndscalepd {{.*}}{%k1}
; CHECK: vrndscalepd		; CHECK: vrndscalepd
define <2 x double>@test_int_x86_avx512_mask_rndscale_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {		define <2 x double>@test_int_x86_avx512_mask_rndscale_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
%res = call <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double> %x0, i32 4, <2 x double> %x2, i8 %x3)		%res = call <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double> %x0, i32 4, <2 x double> %x2, i8 %x3)
%res1 = call <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double> %x0, i32 88, <2 x double> %x2, i8 -1)		%res1 = call <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double> %x0, i32 88, <2 x double> %x2, i8 -1)
%res2 = fadd <2 x double> %res, %res1		%res2 = fadd <2 x double> %res, %res1
ret <2 x double> %res2		ret <2 x double> %res2
}		}

declare <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double>, i32, <4 x double>, i8)		declare <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double>, i32, <4 x double>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_rndscale_pd_256		; CHECK-LABEL: @test_int_x86_avx512_mask_rndscale_pd_256
; CHECK-NOT: call		; CHECK-NOT: call
; CHECK: kmov		; CHECK: kmov
; CHECK: vrndscalepd {{.*}}{%k1}		; CHECK: vrndscalepd {{.*}}{%k1}
; CHECK: vrndscalepd		; CHECK: vrndscalepd
define <4 x double>@test_int_x86_avx512_mask_rndscale_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {		define <4 x double>@test_int_x86_avx512_mask_rndscale_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
%res = call <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double> %x0, i32 4, <4 x double> %x2, i8 %x3)		%res = call <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double> %x0, i32 4, <4 x double> %x2, i8 %x3)
%res1 = call <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double> %x0, i32 88, <4 x double> %x2, i8 -1)		%res1 = call <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double> %x0, i32 88, <4 x double> %x2, i8 -1)
%res2 = fadd <4 x double> %res, %res1		%res2 = fadd <4 x double> %res, %res1
ret <4 x double> %res2		ret <4 x double> %res2
}		}

declare <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float>, i32, <4 x float>, i8)		declare <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float>, i32, <4 x float>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_rndscale_ps_128		; CHECK-LABEL: @test_int_x86_avx512_mask_rndscale_ps_128
; CHECK-NOT: call		; CHECK-NOT: call
; CHECK: kmov		; CHECK: kmov
; CHECK: vrndscaleps {{.*}}{%k1}		; CHECK: vrndscaleps {{.*}}{%k1}
; CHECK: vrndscaleps		; CHECK: vrndscaleps
define <4 x float>@test_int_x86_avx512_mask_rndscale_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {		define <4 x float>@test_int_x86_avx512_mask_rndscale_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
%res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float> %x0, i32 88, <4 x float> %x2, i8 %x3)		%res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float> %x0, i32 88, <4 x float> %x2, i8 %x3)
%res1 = call <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float> %x0, i32 4, <4 x float> %x2, i8 -1)		%res1 = call <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float> %x0, i32 4, <4 x float> %x2, i8 -1)
%res2 = fadd <4 x float> %res, %res1		%res2 = fadd <4 x float> %res, %res1
ret <4 x float> %res2		ret <4 x float> %res2
}		}

declare <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float>, i32, <8 x float>, i8)		declare <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float>, i32, <8 x float>, i8)

; CHECK-LABEL: @test_int_x86_avx512_mask_rndscale_ps_256		; CHECK-LABEL: @test_int_x86_avx512_mask_rndscale_ps_256
; CHECK-NOT: call		; CHECK-NOT: call
; CHECK: kmov		; CHECK: kmov
; CHECK: vrndscaleps {{.*}}{%k1}		; CHECK: vrndscaleps {{.*}}{%k1}
; CHECK: vrndscaleps		; CHECK: vrndscaleps
define <8 x float>@test_int_x86_avx512_mask_rndscale_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {		define <8 x float>@test_int_x86_avx512_mask_rndscale_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
%res = call <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float> %x0, i32 5, <8 x float> %x2, i8 %x3)		%res = call <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float> %x0, i32 5, <8 x float> %x2, i8 %x3)
%res1 = call <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float> %x0, i32 66, <8 x float> %x2, i8 -1)		%res1 = call <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float> %x0, i32 66, <8 x float> %x2, i8 -1)
%res2 = fadd <8 x float> %res, %res1		%res2 = fadd <8 x float> %res, %res1
ret <8 x float> %res2		ret <8 x float> %res2
}		}

declare <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float>, <8 x float>, i32, <8 x float>, i8)		declare <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float>, <8 x float>, i32, <8 x float>, i8)

define <8 x float>@test_int_x86_avx512_mask_shuf_f32x4_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) {		define <8 x float>@test_int_x86_avx512_mask_shuf_f32x4_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f32x4_256:		; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f32x4_256:
; CHECK: ## BB#0:		; CHECK: ## BB#0:
; CHECK-NEXT: movzbl %dil, %eax		; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: kmovw %eax, %k1		; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm2 {%k1}		; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm2 {%k1}
		; CHECK-NEXT: ## ymm2 = ymm0[0,1,2,3],ymm1[4,5,6,7]
		; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm3 {%k1} {z}
		; CHECK-NEXT: ## ymm3 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm0		; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm0
		; CHECK-NEXT: ## ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0		; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0
		; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0
; CHECK-NEXT: retq		; CHECK-NEXT: retq
%res = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 %x4)		%res = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 %x4)
%res1 = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 -1)		%res1 = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 -1)
%res2 = fadd <8 x float> %res, %res1		%res2 = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> zeroinitializer, i8 %x4)
ret <8 x float> %res2		%res3 = fadd <8 x float> %res, %res1
		%res4 = fadd <8 x float> %res2, %res3
		ret <8 x float> %res4
}		}

declare <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double>, <4 x double>, i32, <4 x double>, i8)		declare <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double>, <4 x double>, i32, <4 x double>, i8)

define <4 x double>@test_int_x86_avx512_mask_shuf_f64x2_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) {		define <4 x double>@test_int_x86_avx512_mask_shuf_f64x2_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f64x2_256:		; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f64x2_256:
; CHECK: ## BB#0:		; CHECK: ## BB#0:
; CHECK-NEXT: movzbl %dil, %eax		; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: kmovw %eax, %k1		; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm2 {%k1}		; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm2 {%k1}
		; CHECK-NEXT: ## ymm2 = ymm0[0,1],ymm1[2,3]
		; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm3 {%k1} {z}
		; CHECK-NEXT: ## ymm3 = ymm0[0,1],ymm1[2,3]
; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm0		; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm0
		; CHECK-NEXT: ## ymm0 = ymm0[0,1],ymm1[2,3]
; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0		; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0
		; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0
; CHECK-NEXT: retq		; CHECK-NEXT: retq
%res = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 %x4)		%res = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 %x4)
%res1 = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 -1)		%res1 = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 -1)
%res2 = fadd <4 x double> %res, %res1		%res2 = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> zeroinitializer, i8 %x4)
ret <4 x double> %res2		%res3 = fadd <4 x double> %res, %res1
		%res4 = fadd <4 x double> %res2, %res3
		ret <4 x double> %res4
}		}

declare <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32>, <8 x i32>, i32, <8 x i32>, i8)		declare <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32>, <8 x i32>, i32, <8 x i32>, i8)

define <8 x i32>@test_int_x86_avx512_mask_shuf_i32x4_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {		define <8 x i32>@test_int_x86_avx512_mask_shuf_i32x4_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i32x4_256:		; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i32x4_256:
; CHECK: ## BB#0:		; CHECK: ## BB#0:
; CHECK-NEXT: movzbl %dil, %eax		; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: kmovw %eax, %k1		; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufi32x4 $22, %ymm1, %ymm0, %ymm2 {%k1}		; CHECK-NEXT: vshufi32x4 $22, %ymm1, %ymm0, %ymm2 {%k1}
		; CHECK-NEXT: ## ymm2 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; CHECK-NEXT: vshufi32x4 $22, %ymm1, %ymm0, %ymm0		; CHECK-NEXT: vshufi32x4 $22, %ymm1, %ymm0, %ymm0
		; CHECK-NEXT: ## ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0		; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0
; CHECK-NEXT: retq		; CHECK-NEXT: retq
%res = call <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 %x4)		%res = call <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 %x4)
%res1 = call <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 -1)		%res1 = call <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 -1)
%res2 = add <8 x i32> %res, %res1		%res2 = add <8 x i32> %res, %res1
ret <8 x i32> %res2		ret <8 x i32> %res2
}		}

declare <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64>, <4 x i64>, i32, <4 x i64>, i8)		declare <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64>, <4 x i64>, i32, <4 x i64>, i8)

define <4 x i64>@test_int_x86_avx512_mask_shuf_i64x2_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {		define <4 x i64>@test_int_x86_avx512_mask_shuf_i64x2_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i64x2_256:		; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i64x2_256:
; CHECK: ## BB#0:		; CHECK: ## BB#0:
; CHECK-NEXT: movzbl %dil, %eax		; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: kmovw %eax, %k1		; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufi64x2 $22, %ymm1, %ymm0, %ymm2 {%k1}		; CHECK-NEXT: vshufi64x2 $22, %ymm1, %ymm0, %ymm2 {%k1}
		; CHECK-NEXT: ## ymm2 = ymm0[0,1],ymm1[2,3]
; CHECK-NEXT: vshufi64x2 $22, %ymm1, %ymm0, %ymm0		; CHECK-NEXT: vshufi64x2 $22, %ymm1, %ymm0, %ymm0
		; CHECK-NEXT: ## ymm0 = ymm0[0,1],ymm1[2,3]
; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0		; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0
; CHECK-NEXT: retq		; CHECK-NEXT: retq
%res = call <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 %x4)		%res = call <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 %x4)
%res1 = call <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 -1)		%res1 = call <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 -1)
%res2 = add <4 x i64> %res, %res1		%res2 = add <4 x i64> %res, %res1
ret <4 x i64> %res2		ret <4 x i64> %res2
}		}

▲ Show 20 Lines • Show All 446 Lines • Show Last 20 Lines

test/CodeGen/X86/vector-shuffle-512-v8.ll

	Show First 20 Lines • Show All 82 Lines • ▼ Show 20 Lines
	; ALL-NEXT: retq			; ALL-NEXT: retq
	%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>			%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
	ret <8 x double> %shuffle			ret <8 x double> %shuffle
	}			}

	define <8 x double> @shuffle_v8f64_01014545(<8 x double> %a, <8 x double> %b) {			define <8 x double> @shuffle_v8f64_01014545(<8 x double> %a, <8 x double> %b) {
	; ALL-LABEL: shuffle_v8f64_01014545:			; ALL-LABEL: shuffle_v8f64_01014545:
	; ALL: # BB#0:			; ALL: # BB#0:
	; ALL-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,0,1,4,5,4,5]			; ALL-NEXT: vshuff64x2 $160, %zmm0, %zmm0, %zmm0 # zmm0 = zmm0[0,1,0,1,4,5,4,5]
	; ALL-NEXT: vpermpd %zmm0, %zmm1, %zmm0
	; ALL-NEXT: retq			; ALL-NEXT: retq
	%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>			%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
	ret <8 x double> %shuffle			ret <8 x double> %shuffle
	}			}

	define <8 x double> @shuffle_v8f64_00112233(<8 x double> %a, <8 x double> %b) {			define <8 x double> @shuffle_v8f64_00112233(<8 x double> %a, <8 x double> %b) {
	; ALL-LABEL: shuffle_v8f64_00112233:			; ALL-LABEL: shuffle_v8f64_00112233:
	; ALL: # BB#0:			; ALL: # BB#0:
	▲ Show 20 Lines • Show All 561 Lines • ▼ Show 20 Lines
	; ALL-NEXT: retq			; ALL-NEXT: retq
	%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>			%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
	ret <8 x i64> %shuffle			ret <8 x i64> %shuffle
	}			}

	define <8 x i64> @shuffle_v8i64_01014545(<8 x i64> %a, <8 x i64> %b) {			define <8 x i64> @shuffle_v8i64_01014545(<8 x i64> %a, <8 x i64> %b) {
	; ALL-LABEL: shuffle_v8i64_01014545:			; ALL-LABEL: shuffle_v8i64_01014545:
	; ALL: # BB#0:			; ALL: # BB#0:
	; ALL-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,0,1,4,5,4,5]			; ALL-NEXT: vshufi64x2 $160, %zmm0, %zmm0, %zmm0 # zmm0 = zmm0[0,1,0,1,4,5,4,5]
	; ALL-NEXT: vpermq %zmm0, %zmm1, %zmm0
	; ALL-NEXT: retq			; ALL-NEXT: retq
	%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>			%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
	ret <8 x i64> %shuffle			ret <8 x i64> %shuffle
	}			}

	define <8 x i64> @shuffle_v8i64_00112233(<8 x i64> %a, <8 x i64> %b) {			define <8 x i64> @shuffle_v8i64_00112233(<8 x i64> %a, <8 x i64> %b) {
	; ALL-LABEL: shuffle_v8i64_00112233:			; ALL-LABEL: shuffle_v8i64_00112233:
	; ALL: # BB#0:			; ALL: # BB#0:
	▲ Show 20 Lines • Show All 501 Lines • ▼ Show 20 Lines
	define <8 x i64> @shuffle_v8i64_193b5d7f(<8 x i64> %a, <8 x i64> %b) {			define <8 x i64> @shuffle_v8i64_193b5d7f(<8 x i64> %a, <8 x i64> %b) {
	; ALL-LABEL: shuffle_v8i64_193b5d7f:			; ALL-LABEL: shuffle_v8i64_193b5d7f:
	; ALL: # BB#0:			; ALL: # BB#0:
	; ALL-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]			; ALL-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
	; ALL-NEXT: retq			; ALL-NEXT: retq
	%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32><i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>			%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32><i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
	ret <8 x i64> %shuffle			ret <8 x i64> %shuffle
	}			}

				define <8 x double> @test_vshuff64x2_512(<8 x double> %x, <8 x double> %x1) nounwind {
				; ALL-LABEL: test_vshuff64x2_512:
				; ALL: # BB#0:
				; ALL-NEXT: vshuff64x2 $24, %zmm1, %zmm0, %zmm0 # zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1]
				; ALL-NEXT: retq
				%res = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9>
				ret <8 x double> %res
				}

				define <8 x double> @test_vshuff64x2_512_maskz(<8 x double> %x, <8 x double> %x1, <8 x i1> %mask) nounwind {
				; ALL-LABEL: test_vshuff64x2_512_maskz:
				; ALL: # BB#0:
				; ALL-NEXT: vpmovsxwq %xmm2, %zmm2
				; ALL-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
				; ALL-NEXT: vptestmq %zmm2, %zmm2, %k1
				; ALL-NEXT: vshuff64x2 $24, %zmm1, %zmm0, %zmm0 {%k1} {z} # zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1]
				; ALL-NEXT: retq
				%y = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9>
				%res = select <8 x i1> %mask, <8 x double> %y, <8 x double> zeroinitializer
				ret <8 x double> %res
				}

				define <8 x i64> @test_vshufi64x2_512_mask(<8 x i64> %x, <8 x i64> %x1, <8 x i1> %mask) nounwind {
				; ALL-LABEL: test_vshufi64x2_512_mask:
				; ALL: # BB#0:
				; ALL-NEXT: vpmovsxwq %xmm2, %zmm2
				; ALL-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
				; ALL-NEXT: vptestmq %zmm2, %zmm2, %k1
				; ALL-NEXT: vshufi64x2 $24, %zmm1, %zmm0, %zmm0 {%k1} # zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1]
				; ALL-NEXT: retq
				%y = shufflevector <8 x i64> %x, <8 x i64> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9>
				%res = select <8 x i1> %mask, <8 x i64> %y, <8 x i64> %x
				ret <8 x i64> %res
				}

				define <8 x double> @test_vshuff64x2_512_mem(<8 x double> %x, <8 x double> *%ptr) nounwind {
				; ALL-LABEL: test_vshuff64x2_512_mem:
				; ALL: # BB#0:
				; ALL-NEXT: vshuff64x2 $24, (%rdi), %zmm0, %zmm0 # zmm0 = zmm0[0,1,4,5],mem[2,3,0,1]
				; ALL-NEXT: retq
				%x1 = load <8 x double>,<8 x double> *%ptr,align 1
				%res = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9>
				ret <8 x double> %res
				}

				define <8 x double> @test_vshuff64x2_512_mem_mask(<8 x double> %x, <8 x double> *%ptr, <8 x i1> %mask) nounwind {
				; ALL-LABEL: test_vshuff64x2_512_mem_mask:
				; ALL: # BB#0:
				; ALL-NEXT: vpmovsxwq %xmm1, %zmm1
				; ALL-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm1, %zmm1
				RKSimonUnsubmitted Not Done Reply Inline Actions Nothing to do with this patch but its a shame that this is ANDing with a 512-bit constant mask instead of pre-ANDing with a 128-bit constant mask and then calling vpmovzxwq. RKSimon: Nothing to do with this patch but its a shame that this is ANDing with a 512-bit constant mask…
				igorbAuthorUnsubmitted Not Done Reply Inline Actions This is AND with a 512-bit vector broadcasted from a 64-bit memory location. vpandq LCPI0_0(%rip){1to8}, %zmm1, %zmm1 igorb: This is AND with a 512-bit vector broadcasted from a 64-bit memory location.
				; ALL-NEXT: vptestmq %zmm1, %zmm1, %k1
				; ALL-NEXT: vshuff64x2 $24, (%rdi), %zmm0, %zmm0 {%k1} # zmm0 = zmm0[0,1,4,5],mem[2,3,0,1]
				; ALL-NEXT: retq
				%x1 = load <8 x double>,<8 x double> *%ptr,align 1
				%y = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9>
				%res = select <8 x i1> %mask, <8 x double> %y, <8 x double> %x
				ret <8 x double> %res
				}

				define <8 x double> @test_vshuff64x2_512_mem_maskz(<8 x double> %x, <8 x double> *%ptr, <8 x i1> %mask) nounwind {
				; ALL-LABEL: test_vshuff64x2_512_mem_maskz:
				; ALL: # BB#0:
				; ALL-NEXT: vpmovsxwq %xmm1, %zmm1
				; ALL-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm1, %zmm1
				; ALL-NEXT: vptestmq %zmm1, %zmm1, %k1
				; ALL-NEXT: vshuff64x2 $24, (%rdi), %zmm0, %zmm0 {%k1} {z} # zmm0 = zmm0[0,1,4,5],mem[2,3,0,1]
				; ALL-NEXT: retq
				%x1 = load <8 x double>,<8 x double> *%ptr,align 1
				%y = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9>
				%res = select <8 x i1> %mask, <8 x double> %y, <8 x double> zeroinitializer
				ret <8 x double> %res
				}

				define <16 x float> @test_vshuff32x4_512(<16 x float> %x, <16 x float> %x1) nounwind {
				; ALL-LABEL: test_vshuff32x4_512:
				; ALL: # BB#0:
				; ALL-NEXT: vshuff64x2 $20, %zmm1, %zmm0, %zmm0 # zmm0 = zmm0[0,1,2,3],zmm1[2,3,0,1]
				; ALL-NEXT: retq
				%res = shufflevector <16 x float> %x, <16 x float> %x1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 16, i32 17, i32 18, i32 19>
				ret <16 x float> %res
				}
				No newline at end of file

test/CodeGen/X86/vector-shuffle-v1.ll

	Show First 20 Lines • Show All 207 Lines • ▼ Show 20 Lines
	}			}

	define i8 @shuf8i1_0_1_4_5_u_u_u_u(i8 %a) {			define i8 @shuf8i1_0_1_4_5_u_u_u_u(i8 %a) {
	; AVX512F-LABEL: shuf8i1_0_1_4_5_u_u_u_u:			; AVX512F-LABEL: shuf8i1_0_1_4_5_u_u_u_u:
	; AVX512F: # BB#0:			; AVX512F: # BB#0:
	; AVX512F-NEXT: movzbl %dil, %eax			; AVX512F-NEXT: movzbl %dil, %eax
	; AVX512F-NEXT: kmovw %eax, %k1			; AVX512F-NEXT: kmovw %eax, %k1
	; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}			; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
	; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,1,4,5,u,u,u,u>			; AVX512F-NEXT: vshufi64x2 $8, %zmm0, %zmm0, %zmm0 # zmm0 = zmm0[0,1,4,5,0,1,0,1]
	; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
	; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0			; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0
	; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0			; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
	; AVX512F-NEXT: kmovw %k0, %eax			; AVX512F-NEXT: kmovw %k0, %eax
	; AVX512F-NEXT: retq			; AVX512F-NEXT: retq
	;			;
	; VL_BW_DQ-LABEL: shuf8i1_0_1_4_5_u_u_u_u:			; VL_BW_DQ-LABEL: shuf8i1_0_1_4_5_u_u_u_u:
	; VL_BW_DQ: # BB#0:			; VL_BW_DQ: # BB#0:
	; VL_BW_DQ-NEXT: kmovb %edi, %k0			; VL_BW_DQ-NEXT: kmovb %edi, %k0
	; VL_BW_DQ-NEXT: vpmovm2q %k0, %zmm0			; VL_BW_DQ-NEXT: vpmovm2q %k0, %zmm0
	; VL_BW_DQ-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,1,4,5,u,u,u,u>			; VL_BW_DQ-NEXT: vshufi64x2 $8, %zmm0, %zmm0, %zmm0 # zmm0 = zmm0[0,1,4,5,0,1,0,1]
	; VL_BW_DQ-NEXT: vpermq %zmm0, %zmm1, %zmm0
	; VL_BW_DQ-NEXT: vpmovq2m %zmm0, %k0			; VL_BW_DQ-NEXT: vpmovq2m %zmm0, %k0
	; VL_BW_DQ-NEXT: kmovb %k0, %eax			; VL_BW_DQ-NEXT: kmovb %k0, %eax
	; VL_BW_DQ-NEXT: retq			; VL_BW_DQ-NEXT: retq
	%b = bitcast i8 %a to <8 x i1>			%b = bitcast i8 %a to <8 x i1>
	%c = shufflevector < 8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>			%c = shufflevector < 8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>
	%d = bitcast <8 x i1> %c to i8			%d = bitcast <8 x i1> %c to i8
	ret i8 %d			ret i8 %d
	}			}
	▲ Show 20 Lines • Show All 165 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

AVX512: shuff62x2 DAG lowering
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 37474

lib/Target/X86/InstPrinter/X86InstComments.cpp

lib/Target/X86/Utils/X86ShuffleDecode.h

lib/Target/X86/Utils/X86ShuffleDecode.cpp

lib/Target/X86/X86ISelLowering.cpp

test/CodeGen/X86/avx512-intrinsics.ll

test/CodeGen/X86/avx512vl-intrinsics.ll

test/CodeGen/X86/vector-shuffle-512-v8.ll

test/CodeGen/X86/vector-shuffle-v1.ll

This is an archive of the discontinued LLVM Phabricator instance.

AVX512: shuff62x2 DAG loweringClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 37474

lib/Target/X86/InstPrinter/X86InstComments.cpp

lib/Target/X86/Utils/X86ShuffleDecode.h

lib/Target/X86/Utils/X86ShuffleDecode.cpp

lib/Target/X86/X86ISelLowering.cpp

test/CodeGen/X86/avx512-intrinsics.ll

test/CodeGen/X86/avx512vl-intrinsics.ll

test/CodeGen/X86/vector-shuffle-512-v8.ll

test/CodeGen/X86/vector-shuffle-v1.ll

AVX512: shuff62x2 DAG lowering
ClosedPublic