This is an archive of the discontinued LLVM Phabricator instance.

Differential D20049

[X86][XOP] Support for VPERMIL2PD/VPERMIL2PS 2-input shuffle instructions
ClosedPublic

Authored by RKSimon on May 7 2016, 8:53 AM.

Download Raw Diff

Details

Reviewers

spatel
andreadb
craig.topper

Commits

rGe85506b6e0de: [X86][XOP] Support for VPERMIL2PD/VPERMIL2PS 2-input shuffle instructions
rL271633: [X86][XOP] Support for VPERMIL2PD/VPERMIL2PS 2-input shuffle instructions

Summary

This patch begins adding support for lowering to the XOP VPERMIL2PD/VPERMIL2PS shuffle instructions - adding the X86ISD::VPERMIL2 opcode and cleaning up the usage.

Mask decoding/target shuffle support will be added in future patches - this patch has to do some initial cleanup as the internal llvm intrinsics were assuming the shuffle mask operand was the same type as the float/double input operands (I guess to simplify the intrinsic definitions in X86InstrXOP.td to a single value type). These needed changing to integer types - this matches the clang builtin and the AMD intrinsics definitions. As its just the llvm intrinsic ir I don't think I need to provide an upgrade path but can if anybody thinks it necessary.

Diff Detail

Repository: rL LLVM

Event Timeline

RKSimon updated this revision to Diff 56499.May 7 2016, 8:53 AM

RKSimon retitled this revision from to [X86][XOP] Support for VPERMIL2PD/VPERMIL2PS 2-input shuffle instructions.

RKSimon updated this object.

RKSimon added reviewers: craig.topper, spatel, andreadb.

RKSimon set the repository for this revision to rL LLVM.

RKSimon added a subscriber: llvm-commits.

ping?

Added auto-upgrade for old vpermil2 intrinsics still using a float/double vector for its shuffle mask

LGTM.

This revision is now accepted and ready to land.Jun 2 2016, 8:50 AM

Closed by commit rL271633: [X86][XOP] Support for VPERMIL2PD/VPERMIL2PS 2-input shuffle instructions (authored by RKSimon). · Explain WhyJun 3 2016, 1:12 AM

This revision was automatically updated to reflect the committed changes.

Revision Contents

Path

Size

llvm/

trunk/

include/

llvm/

IR/

IntrinsicsX86.td

8 lines

lib/

IR/

AutoUpgrade.cpp

35 lines

Target/

X86/

X86ISelLowering.h

2 lines

X86ISelLowering.cpp

1 line

X86InstrFragmentsSIMD.td

7 lines

X86InstrXOP.td

42 lines

X86IntrinsicsInfo.h

4 lines

test/

CodeGen/

X86/

stack-folding-xop.ll

40 lines

vector-shuffle-combining-xop.ll

40 lines

xop-intrinsics-x86_64-upgrade.ll

76 lines

xop-intrinsics-x86_64.ll

44 lines

Diff 59495

llvm/trunk/include/llvm/IR/IntrinsicsX86.td

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 3,867 Lines • ▼ Show 20 Lines	def int_x86_avx512_maskz_vpmadd52l_uq_512 :
llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;		llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
}		}

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// XOP		// XOP

def int_x86_xop_vpermil2pd : GCCBuiltin<"__builtin_ia32_vpermil2pd">,		def int_x86_xop_vpermil2pd : GCCBuiltin<"__builtin_ia32_vpermil2pd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,		Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_v2f64_ty, llvm_i8_ty],		llvm_v2i64_ty, llvm_i8_ty],
[IntrNoMem]>;		[IntrNoMem]>;

def int_x86_xop_vpermil2pd_256 :		def int_x86_xop_vpermil2pd_256 :
GCCBuiltin<"__builtin_ia32_vpermil2pd256">,		GCCBuiltin<"__builtin_ia32_vpermil2pd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,		Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
llvm_v4f64_ty, llvm_i8_ty],		llvm_v4i64_ty, llvm_i8_ty],
[IntrNoMem]>;		[IntrNoMem]>;

def int_x86_xop_vpermil2ps : GCCBuiltin<"__builtin_ia32_vpermil2ps">,		def int_x86_xop_vpermil2ps : GCCBuiltin<"__builtin_ia32_vpermil2ps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,		Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
llvm_v4f32_ty, llvm_i8_ty],		llvm_v4i32_ty, llvm_i8_ty],
[IntrNoMem]>;		[IntrNoMem]>;
def int_x86_xop_vpermil2ps_256 :		def int_x86_xop_vpermil2ps_256 :
GCCBuiltin<"__builtin_ia32_vpermil2ps256">,		GCCBuiltin<"__builtin_ia32_vpermil2ps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,		Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
llvm_v8f32_ty, llvm_i8_ty],		llvm_v8i32_ty, llvm_i8_ty],
[IntrNoMem]>;		[IntrNoMem]>;

def int_x86_xop_vfrcz_pd : GCCBuiltin<"__builtin_ia32_vfrczpd">,		def int_x86_xop_vfrcz_pd : GCCBuiltin<"__builtin_ia32_vfrczpd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;		Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_xop_vfrcz_ps : GCCBuiltin<"__builtin_ia32_vfrczps">,		def int_x86_xop_vfrcz_ps : GCCBuiltin<"__builtin_ia32_vfrczps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;		Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_xop_vfrcz_sd : GCCBuiltin<"__builtin_ia32_vfrczsd">,		def int_x86_xop_vfrcz_sd : GCCBuiltin<"__builtin_ia32_vfrczsd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;		Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
▲ Show 20 Lines • Show All 4,448 Lines • Show Last 20 Lines

llvm/trunk/lib/IR/AutoUpgrade.cpp

Show First 20 Lines • Show All 276 Lines • ▼ Show 20 Lines	if (Name.startswith("x86.xop.vfrcz.sd") && F->arg_size() == 2) {
return true;		return true;
}		}
// Fix the FMA4 intrinsics to remove the 4		// Fix the FMA4 intrinsics to remove the 4
if (Name.startswith("x86.fma4.")) {		if (Name.startswith("x86.fma4.")) {
F->setName("llvm.x86.fma" + Name.substr(8));		F->setName("llvm.x86.fma" + Name.substr(8));
NewFn = F;		NewFn = F;
return true;		return true;
}		}
		// Upgrade any XOP PERMIL2 index operand still using a float/double vector.
		if (Name.startswith("x86.xop.vpermil2")) {
		auto Params = F->getFunctionType()->params();
		auto Idx = Params[2];
		if (Idx->getScalarType()->isFloatingPointTy()) {
		F->setName(Name + ".old");
		unsigned IdxSize = Idx->getPrimitiveSizeInBits();
		unsigned EltSize = Idx->getScalarSizeInBits();
		Intrinsic::ID Permil2ID;
		if (EltSize == 64 && IdxSize == 128)
		Permil2ID = Intrinsic::x86_xop_vpermil2pd;
		else if (EltSize == 32 && IdxSize == 128)
		Permil2ID = Intrinsic::x86_xop_vpermil2ps;
		else if (EltSize == 64 && IdxSize == 256)
		Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
		else
		Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
		NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
		return true;
		}
		}
break;		break;
}		}
}		}

// This may not belong here. This function is effectively being overloaded		// This may not belong here. This function is effectively being overloaded
// to both detect an intrinsic which needs upgrading, and to provide the		// to both detect an intrinsic which needs upgrading, and to provide the
// upgraded form of the intrinsic. We should perhaps have two separate		// upgraded form of the intrinsic. We should perhaps have two separate
// functions for this.		// functions for this.
▲ Show 20 Lines • Show All 613 Lines • ▼ Show 20 Lines	void llvm::UpgradeIntrinsicCall(CallInst CI, Function NewFn) {

case Intrinsic::x86_xop_vfrcz_ss:		case Intrinsic::x86_xop_vfrcz_ss:
case Intrinsic::x86_xop_vfrcz_sd:		case Intrinsic::x86_xop_vfrcz_sd:
CI->replaceAllUsesWith(		CI->replaceAllUsesWith(
Builder.CreateCall(NewFn, {CI->getArgOperand(1)}, Name));		Builder.CreateCall(NewFn, {CI->getArgOperand(1)}, Name));
CI->eraseFromParent();		CI->eraseFromParent();
return;		return;

		case Intrinsic::x86_xop_vpermil2pd:
		case Intrinsic::x86_xop_vpermil2ps:
		case Intrinsic::x86_xop_vpermil2pd_256:
		case Intrinsic::x86_xop_vpermil2ps_256: {
		SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
		CI->arg_operands().end());
		VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
		VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
		Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
		CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args, Name));
		CI->eraseFromParent();
		return;
		}

case Intrinsic::x86_sse41_ptestc:		case Intrinsic::x86_sse41_ptestc:
case Intrinsic::x86_sse41_ptestz:		case Intrinsic::x86_sse41_ptestz:
case Intrinsic::x86_sse41_ptestnzc: {		case Intrinsic::x86_sse41_ptestnzc: {
// The arguments for these intrinsics used to be v4f32, and changed		// The arguments for these intrinsics used to be v4f32, and changed
// to v2i64. This is purely a nop, since those are bitwise intrinsics.		// to v2i64. This is purely a nop, since those are bitwise intrinsics.
// So, the only thing required is a bitcast for both arguments.		// So, the only thing required is a bitcast for both arguments.
// First, check the arguments have the old type.		// First, check the arguments have the old type.
Value *Arg0 = CI->getArgOperand(0);		Value *Arg0 = CI->getArgOperand(0);
▲ Show 20 Lines • Show All 236 Lines • Show Last 20 Lines

llvm/trunk/lib/Target/X86/X86ISelLowering.h

Show First 20 Lines • Show All 445 Lines • ▼ Show 20 Lines	enum NodeType : unsigned {
// XOP variable/immediate rotations.		// XOP variable/immediate rotations.
VPROT, VPROTI,		VPROT, VPROTI,
// XOP arithmetic/logical shifts.		// XOP arithmetic/logical shifts.
VPSHA, VPSHL,		VPSHA, VPSHL,
// XOP signed/unsigned integer comparisons.		// XOP signed/unsigned integer comparisons.
VPCOM, VPCOMU,		VPCOM, VPCOMU,
// XOP packed permute bytes.		// XOP packed permute bytes.
VPPERM,		VPPERM,
		// XOP two source permutation.
		VPERMIL2,

// Vector multiply packed unsigned doubleword integers.		// Vector multiply packed unsigned doubleword integers.
PMULUDQ,		PMULUDQ,
// Vector multiply packed signed doubleword integers.		// Vector multiply packed signed doubleword integers.
PMULDQ,		PMULDQ,
// Vector Multiply Packed UnsignedIntegers with Round and Scale.		// Vector Multiply Packed UnsignedIntegers with Round and Scale.
MULHRS,		MULHRS,

▲ Show 20 Lines • Show All 772 Lines • Show Last 20 Lines

llvm/trunk/lib/Target/X86/X86ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 21,941 Lines • ▼ Show 20 Lines	const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VPMADDUBSW: return "X86ISD::VPMADDUBSW";		case X86ISD::VPMADDUBSW: return "X86ISD::VPMADDUBSW";
case X86ISD::VPMADDWD: return "X86ISD::VPMADDWD";		case X86ISD::VPMADDWD: return "X86ISD::VPMADDWD";
case X86ISD::VPROT: return "X86ISD::VPROT";		case X86ISD::VPROT: return "X86ISD::VPROT";
case X86ISD::VPROTI: return "X86ISD::VPROTI";		case X86ISD::VPROTI: return "X86ISD::VPROTI";
case X86ISD::VPSHA: return "X86ISD::VPSHA";		case X86ISD::VPSHA: return "X86ISD::VPSHA";
case X86ISD::VPSHL: return "X86ISD::VPSHL";		case X86ISD::VPSHL: return "X86ISD::VPSHL";
case X86ISD::VPCOM: return "X86ISD::VPCOM";		case X86ISD::VPCOM: return "X86ISD::VPCOM";
case X86ISD::VPCOMU: return "X86ISD::VPCOMU";		case X86ISD::VPCOMU: return "X86ISD::VPCOMU";
		case X86ISD::VPERMIL2: return "X86ISD::VPERMIL2";
case X86ISD::FMADD: return "X86ISD::FMADD";		case X86ISD::FMADD: return "X86ISD::FMADD";
case X86ISD::FMSUB: return "X86ISD::FMSUB";		case X86ISD::FMSUB: return "X86ISD::FMSUB";
case X86ISD::FNMADD: return "X86ISD::FNMADD";		case X86ISD::FNMADD: return "X86ISD::FNMADD";
case X86ISD::FNMSUB: return "X86ISD::FNMSUB";		case X86ISD::FNMSUB: return "X86ISD::FNMSUB";
case X86ISD::FMADDSUB: return "X86ISD::FMADDSUB";		case X86ISD::FMADDSUB: return "X86ISD::FMADDSUB";
case X86ISD::FMSUBADD: return "X86ISD::FMSUBADD";		case X86ISD::FMSUBADD: return "X86ISD::FMSUBADD";
case X86ISD::FMADD_RND: return "X86ISD::FMADD_RND";		case X86ISD::FMADD_RND: return "X86ISD::FMADD_RND";
case X86ISD::FNMADD_RND: return "X86ISD::FNMADD_RND";		case X86ISD::FNMADD_RND: return "X86ISD::FNMADD_RND";
▲ Show 20 Lines • Show All 9,054 Lines • Show Last 20 Lines

llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td

	Show First 20 Lines • Show All 239 Lines • ▼ Show 20 Lines
	def X86vpcom : SDNode<"X86ISD::VPCOM",			def X86vpcom : SDNode<"X86ISD::VPCOM",
	SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,			SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
	SDTCisSameAs<0,2>,			SDTCisSameAs<0,2>,
	SDTCisVT<3, i8>]>>;			SDTCisVT<3, i8>]>>;
	def X86vpcomu : SDNode<"X86ISD::VPCOMU",			def X86vpcomu : SDNode<"X86ISD::VPCOMU",
	SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,			SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
	SDTCisSameAs<0,2>,			SDTCisSameAs<0,2>,
	SDTCisVT<3, i8>]>>;			SDTCisVT<3, i8>]>>;
				def X86vpermil2 : SDNode<"X86ISD::VPERMIL2",
				SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisSameAs<0,1>,
				SDTCisSameAs<0,2>,
				SDTCisSameSizeAs<0,3>,
				SDTCisSameNumEltsAs<0, 3>,
				SDTCisVT<4, i8>]>>;
	def X86vpperm : SDNode<"X86ISD::VPPERM",			def X86vpperm : SDNode<"X86ISD::VPPERM",
	SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,			SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
	SDTCisSameAs<0,2>]>>;			SDTCisSameAs<0,2>]>>;

	def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,			def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
	SDTCisVec<1>,			SDTCisVec<1>,
	SDTCisSameAs<2, 1>]>;			SDTCisSameAs<2, 1>]>;

	▲ Show 20 Lines • Show All 769 Lines • Show Last 20 Lines

llvm/trunk/lib/Target/X86/X86InstrXOP.td

Show First 20 Lines • Show All 336 Lines • ▼ Show 20 Lines	def : Pat<(v2i64 (or (and VR128:$src3, VR128:$src1),
(X86andnp VR128:$src3, VR128:$src2))),		(X86andnp VR128:$src3, VR128:$src2))),
(VPCMOVrrr VR128:$src1, VR128:$src2, VR128:$src3)>;		(VPCMOVrrr VR128:$src1, VR128:$src2, VR128:$src3)>;

def : Pat<(v4i64 (or (and VR256:$src3, VR256:$src1),		def : Pat<(v4i64 (or (and VR256:$src3, VR256:$src1),
(X86andnp VR256:$src3, VR256:$src2))),		(X86andnp VR256:$src3, VR256:$src2))),
(VPCMOVrrrY VR256:$src1, VR256:$src2, VR256:$src3)>;		(VPCMOVrrrY VR256:$src1, VR256:$src2, VR256:$src3)>;
}		}

multiclass xop5op<bits<8> opc, string OpcodeStr, Intrinsic Int128,		multiclass xop5op<bits<8> opc, string OpcodeStr, SDNode OpNode,
Intrinsic Int256, PatFrag ld_128, PatFrag ld_256> {		ValueType vt128, ValueType vt256,
		ValueType id128, ValueType id256,
		PatFrag ld_128, PatFrag ld_256> {
def rr : IXOP5<opc, MRMSrcReg, (outs VR128:$dst),		def rr : IXOP5<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3, u8imm:$src4),		(ins VR128:$src1, VR128:$src2, VR128:$src3, u8imm:$src4),
!strconcat(OpcodeStr,		!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst\|$dst, $src1, $src2, $src3, $src4}"),		"\t{$src4, $src3, $src2, $src1, $dst\|$dst, $src1, $src2, $src3, $src4}"),
[(set VR128:$dst,		[(set VR128:$dst,
(Int128 VR128:$src1, VR128:$src2, VR128:$src3, imm:$src4))]>;		(vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
		(id128 VR128:$src3), (i8 imm:$src4))))]>;
def rm : IXOP5<opc, MRMSrcMem, (outs VR128:$dst),		def rm : IXOP5<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, f128mem:$src3, u8imm:$src4),		(ins VR128:$src1, VR128:$src2, i128mem:$src3, u8imm:$src4),
!strconcat(OpcodeStr,		!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst\|$dst, $src1, $src2, $src3, $src4}"),		"\t{$src4, $src3, $src2, $src1, $dst\|$dst, $src1, $src2, $src3, $src4}"),
[(set VR128:$dst,		[(set VR128:$dst,
(Int128 VR128:$src1, VR128:$src2, (ld_128 addr:$src3), imm:$src4))]>,		(vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
		(id128 (bitconvert (loadv2i64 addr:$src3))),
		(i8 imm:$src4))))]>,
VEX_W, MemOp4;		VEX_W, MemOp4;
def mr : IXOP5<opc, MRMSrcMem, (outs VR128:$dst),		def mr : IXOP5<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, f128mem:$src2, VR128:$src3, u8imm:$src4),		(ins VR128:$src1, f128mem:$src2, VR128:$src3, u8imm:$src4),
!strconcat(OpcodeStr,		!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst\|$dst, $src1, $src2, $src3, $src4}"),		"\t{$src4, $src3, $src2, $src1, $dst\|$dst, $src1, $src2, $src3, $src4}"),
[(set VR128:$dst,		[(set VR128:$dst,
(Int128 VR128:$src1, (ld_128 addr:$src2), VR128:$src3, imm:$src4))]>;		(vt128 (OpNode (vt128 VR128:$src1),
		(vt128 (bitconvert (ld_128 addr:$src2))),
		(id128 VR128:$src3), (i8 imm:$src4))))]>;
// For disassembler		// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in		let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
def rr_REV : IXOP5<opc, MRMSrcReg, (outs VR128:$dst),		def rr_REV : IXOP5<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3, u8imm:$src4),		(ins VR128:$src1, VR128:$src2, VR128:$src3, u8imm:$src4),
!strconcat(OpcodeStr,		!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst\|$dst, $src1, $src2, $src3, $src4}"),		"\t{$src4, $src3, $src2, $src1, $dst\|$dst, $src1, $src2, $src3, $src4}"),
[]>, VEX_W, MemOp4;		[]>, VEX_W, MemOp4;

def rrY : IXOP5<opc, MRMSrcReg, (outs VR256:$dst),		def rrY : IXOP5<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, VR256:$src3, u8imm:$src4),		(ins VR256:$src1, VR256:$src2, VR256:$src3, u8imm:$src4),
!strconcat(OpcodeStr,		!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst\|$dst, $src1, $src2, $src3, $src4}"),		"\t{$src4, $src3, $src2, $src1, $dst\|$dst, $src1, $src2, $src3, $src4}"),
[(set VR256:$dst,		[(set VR256:$dst,
(Int256 VR256:$src1, VR256:$src2, VR256:$src3, imm:$src4))]>, VEX_L;		(vt256 (OpNode (vt256 VR256:$src1), (vt256 VR256:$src2),
		(id256 VR256:$src3), (i8 imm:$src4))))]>, VEX_L;
def rmY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst),		def rmY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, f256mem:$src3, u8imm:$src4),		(ins VR256:$src1, VR256:$src2, i256mem:$src3, u8imm:$src4),
!strconcat(OpcodeStr,		!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst\|$dst, $src1, $src2, $src3, $src4}"),		"\t{$src4, $src3, $src2, $src1, $dst\|$dst, $src1, $src2, $src3, $src4}"),
[(set VR256:$dst,		[(set VR256:$dst,
(Int256 VR256:$src1, VR256:$src2, (ld_256 addr:$src3), imm:$src4))]>,		(vt256 (OpNode (vt256 VR256:$src1), (vt256 VR256:$src2),
VEX_W, MemOp4, VEX_L;		(id256 (bitconvert (loadv4i64 addr:$src3))),
		(i8 imm:$src4))))]>, VEX_W, MemOp4, VEX_L;
def mrY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst),		def mrY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2, VR256:$src3, u8imm:$src4),		(ins VR256:$src1, f256mem:$src2, VR256:$src3, u8imm:$src4),
!strconcat(OpcodeStr,		!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst\|$dst, $src1, $src2, $src3, $src4}"),		"\t{$src4, $src3, $src2, $src1, $dst\|$dst, $src1, $src2, $src3, $src4}"),
[(set VR256:$dst,		[(set VR256:$dst,
(Int256 VR256:$src1, (ld_256 addr:$src2), VR256:$src3, imm:$src4))]>,		(vt256 (OpNode (vt256 VR256:$src1),
VEX_L;		(vt256 (bitconvert (ld_256 addr:$src2))),
		(id256 VR256:$src3), (i8 imm:$src4))))]>, VEX_L;
// For disassembler		// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in		let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
def rrY_REV : IXOP5<opc, MRMSrcReg, (outs VR256:$dst),		def rrY_REV : IXOP5<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, VR256:$src3, u8imm:$src4),		(ins VR256:$src1, VR256:$src2, VR256:$src3, u8imm:$src4),
!strconcat(OpcodeStr,		!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst\|$dst, $src1, $src2, $src3, $src4}"),		"\t{$src4, $src3, $src2, $src1, $dst\|$dst, $src1, $src2, $src3, $src4}"),
[]>, VEX_W, MemOp4, VEX_L;		[]>, VEX_W, MemOp4, VEX_L;
}		}

let ExeDomain = SSEPackedDouble in		let ExeDomain = SSEPackedDouble in
defm VPERMIL2PD : xop5op<0x49, "vpermil2pd", int_x86_xop_vpermil2pd,		defm VPERMIL2PD : xop5op<0x49, "vpermil2pd", X86vpermil2, v2f64, v4f64,
int_x86_xop_vpermil2pd_256, loadv2f64, loadv4f64>;		v2i64, v4i64, loadv2f64, loadv4f64>;

let ExeDomain = SSEPackedSingle in		let ExeDomain = SSEPackedSingle in
defm VPERMIL2PS : xop5op<0x48, "vpermil2ps", int_x86_xop_vpermil2ps,		defm VPERMIL2PS : xop5op<0x48, "vpermil2ps", X86vpermil2, v4f32, v8f32,
int_x86_xop_vpermil2ps_256, loadv4f32, loadv8f32>;		v4i32, v8i32, loadv4f32, loadv8f32>;

llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h

	Show First 20 Lines • Show All 2,228 Lines • ▼ Show 20 Lines
	X86_INTRINSIC_DATA(xop_vpcomb, INTR_TYPE_3OP, X86ISD::VPCOM, 0),			X86_INTRINSIC_DATA(xop_vpcomb, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
	X86_INTRINSIC_DATA(xop_vpcomd, INTR_TYPE_3OP, X86ISD::VPCOM, 0),			X86_INTRINSIC_DATA(xop_vpcomd, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
	X86_INTRINSIC_DATA(xop_vpcomq, INTR_TYPE_3OP, X86ISD::VPCOM, 0),			X86_INTRINSIC_DATA(xop_vpcomq, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
	X86_INTRINSIC_DATA(xop_vpcomub, INTR_TYPE_3OP, X86ISD::VPCOMU, 0),			X86_INTRINSIC_DATA(xop_vpcomub, INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
	X86_INTRINSIC_DATA(xop_vpcomud, INTR_TYPE_3OP, X86ISD::VPCOMU, 0),			X86_INTRINSIC_DATA(xop_vpcomud, INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
	X86_INTRINSIC_DATA(xop_vpcomuq, INTR_TYPE_3OP, X86ISD::VPCOMU, 0),			X86_INTRINSIC_DATA(xop_vpcomuq, INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
	X86_INTRINSIC_DATA(xop_vpcomuw, INTR_TYPE_3OP, X86ISD::VPCOMU, 0),			X86_INTRINSIC_DATA(xop_vpcomuw, INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
	X86_INTRINSIC_DATA(xop_vpcomw, INTR_TYPE_3OP, X86ISD::VPCOM, 0),			X86_INTRINSIC_DATA(xop_vpcomw, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
				X86_INTRINSIC_DATA(xop_vpermil2pd, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0),
				X86_INTRINSIC_DATA(xop_vpermil2pd_256, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0),
				X86_INTRINSIC_DATA(xop_vpermil2ps, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0),
				X86_INTRINSIC_DATA(xop_vpermil2ps_256, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0),
	X86_INTRINSIC_DATA(xop_vpperm, INTR_TYPE_3OP, X86ISD::VPPERM, 0),			X86_INTRINSIC_DATA(xop_vpperm, INTR_TYPE_3OP, X86ISD::VPPERM, 0),
	X86_INTRINSIC_DATA(xop_vprotb, INTR_TYPE_2OP, X86ISD::VPROT, 0),			X86_INTRINSIC_DATA(xop_vprotb, INTR_TYPE_2OP, X86ISD::VPROT, 0),
	X86_INTRINSIC_DATA(xop_vprotbi, INTR_TYPE_2OP, X86ISD::VPROTI, 0),			X86_INTRINSIC_DATA(xop_vprotbi, INTR_TYPE_2OP, X86ISD::VPROTI, 0),
	X86_INTRINSIC_DATA(xop_vprotd, INTR_TYPE_2OP, X86ISD::VPROT, 0),			X86_INTRINSIC_DATA(xop_vprotd, INTR_TYPE_2OP, X86ISD::VPROT, 0),
	X86_INTRINSIC_DATA(xop_vprotdi, INTR_TYPE_2OP, X86ISD::VPROTI, 0),			X86_INTRINSIC_DATA(xop_vprotdi, INTR_TYPE_2OP, X86ISD::VPROTI, 0),
	X86_INTRINSIC_DATA(xop_vprotq, INTR_TYPE_2OP, X86ISD::VPROT, 0),			X86_INTRINSIC_DATA(xop_vprotq, INTR_TYPE_2OP, X86ISD::VPROT, 0),
	X86_INTRINSIC_DATA(xop_vprotqi, INTR_TYPE_2OP, X86ISD::VPROTI, 0),			X86_INTRINSIC_DATA(xop_vprotqi, INTR_TYPE_2OP, X86ISD::VPROTI, 0),
	X86_INTRINSIC_DATA(xop_vprotw, INTR_TYPE_2OP, X86ISD::VPROT, 0),			X86_INTRINSIC_DATA(xop_vprotw, INTR_TYPE_2OP, X86ISD::VPROT, 0),
	▲ Show 20 Lines • Show All 42 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/X86/stack-folding-xop.ll

Show First 20 Lines • Show All 160 Lines • ▼ Show 20 Lines	define <8 x i16> @stack_fold_vpcomw(<8 x i16> %a0, <8 x i16> %a1) {
;CHECK-LABEL: stack_fold_vpcomw		;CHECK-LABEL: stack_fold_vpcomw
;CHECK: vpcomltw {{-?[0-9]}}(%rsp), {{%xmm[0-9][0-9]}}, {{%xmm[0-9][0-9]}} {{.#+}} 16-byte Folded Reload		;CHECK: vpcomltw {{-?[0-9]}}(%rsp), {{%xmm[0-9][0-9]}}, {{%xmm[0-9][0-9]}} {{.#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()		%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %a0, <8 x i16> %a1, i8 0)		%2 = call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %a0, <8 x i16> %a1, i8 0)
ret <8 x i16> %2		ret <8 x i16> %2
}		}
declare <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16>, <8 x i16>, i8) nounwind readnone		declare <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16>, <8 x i16>, i8) nounwind readnone

define <2 x double> @stack_fold_vpermil2pd_rm(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {		define <2 x double> @stack_fold_vpermil2pd_rm(<2 x double> %a0, <2 x double> %a1, <2 x i64> %a2) {
;CHECK-LABEL: stack_fold_vpermil2pd_rm		;CHECK-LABEL: stack_fold_vpermil2pd_rm
;CHECK: vpermil2pd $0, {{-?[0-9]}}(%rsp), {{%xmm[0-9][0-9]}}, {{%xmm[0-9][0-9]}}, {{%xmm[0-9][0-9]}} {{.*#+}} 16-byte Folded Reload		;CHECK: vpermil2pd $0, {{-?[0-9]}}(%rsp), {{%xmm[0-9][0-9]}}, {{%xmm[0-9][0-9]}}, {{%xmm[0-9][0-9]}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()		%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 0)		%2 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> %a2, i8 0)
ret <2 x double> %2		ret <2 x double> %2
}		}
define <2 x double> @stack_fold_vpermil2pd_mr(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {		define <2 x double> @stack_fold_vpermil2pd_mr(<2 x double> %a0, <2 x i64> %a1, <2 x double> %a2) {
;CHECK-LABEL: stack_fold_vpermil2pd_mr		;CHECK-LABEL: stack_fold_vpermil2pd_mr
;CHECK: vpermil2pd $0, {{%xmm[0-9][0-9]}}, {{-?[0-9]}}(%rsp), {{%xmm[0-9][0-9]}}, {{%xmm[0-9][0-9]}} {{.*#+}} 16-byte Folded Reload		;CHECK: vpermil2pd $0, {{%xmm[0-9][0-9]}}, {{-?[0-9]}}(%rsp), {{%xmm[0-9][0-9]}}, {{%xmm[0-9][0-9]}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()		%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a2, <2 x double> %a1, i8 0)		%2 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a2, <2 x i64> %a1, i8 0)
ret <2 x double> %2		ret <2 x double> %2
}		}
declare <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone		declare <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double>, <2 x double>, <2 x i64>, i8) nounwind readnone

define <4 x double> @stack_fold_vpermil2pd_rm_ymm(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {		define <4 x double> @stack_fold_vpermil2pd_rm_ymm(<4 x double> %a0, <4 x double> %a1, <4 x i64> %a2) {
;CHECK-LABEL: stack_fold_vpermil2pd_rm		;CHECK-LABEL: stack_fold_vpermil2pd_rm
;CHECK: vpermil2pd $0, {{-?[0-9]}}(%rsp), {{%ymm[0-9][0-9]}}, {{%ymm[0-9][0-9]}}, {{%ymm[0-9][0-9]}} {{.*#+}} 32-byte Folded Reload		;CHECK: vpermil2pd $0, {{-?[0-9]}}(%rsp), {{%ymm[0-9][0-9]}}, {{%ymm[0-9][0-9]}}, {{%ymm[0-9][0-9]}} {{.*#+}} 32-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()		%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 0)		%2 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x i64> %a2, i8 0)
ret <4 x double> %2		ret <4 x double> %2
}		}
define <4 x double> @stack_fold_vpermil2pd_mr_ymm(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {		define <4 x double> @stack_fold_vpermil2pd_mr_ymm(<4 x double> %a0, <4 x i64> %a1, <4 x double> %a2) {
;CHECK-LABEL: stack_fold_vpermil2pd_mr		;CHECK-LABEL: stack_fold_vpermil2pd_mr
;CHECK: vpermil2pd $0, {{%ymm[0-9][0-9]}}, {{-?[0-9]}}(%rsp), {{%ymm[0-9][0-9]}}, {{%ymm[0-9][0-9]}} {{.*#+}} 32-byte Folded Reload		;CHECK: vpermil2pd $0, {{%ymm[0-9][0-9]}}, {{-?[0-9]}}(%rsp), {{%ymm[0-9][0-9]}}, {{%ymm[0-9][0-9]}} {{.*#+}} 32-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()		%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a2, <4 x double> %a1, i8 0)		%2 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a2, <4 x i64> %a1, i8 0)
ret <4 x double> %2		ret <4 x double> %2
}		}
declare <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone		declare <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double>, <4 x double>, <4 x i64>, i8) nounwind readnone

define <4 x float> @stack_fold_vpermil2ps_rm(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {		define <4 x float> @stack_fold_vpermil2ps_rm(<4 x float> %a0, <4 x float> %a1, <4 x i32> %a2) {
;CHECK-LABEL: stack_fold_vpermil2ps_rm		;CHECK-LABEL: stack_fold_vpermil2ps_rm
;CHECK: vpermil2ps $0, {{-?[0-9]}}(%rsp), {{%xmm[0-9][0-9]}}, {{%xmm[0-9][0-9]}}, {{%xmm[0-9][0-9]}} {{.*#+}} 16-byte Folded Reload		;CHECK: vpermil2ps $0, {{-?[0-9]}}(%rsp), {{%xmm[0-9][0-9]}}, {{%xmm[0-9][0-9]}}, {{%xmm[0-9][0-9]}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()		%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 0)		%2 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x i32> %a2, i8 0)
ret <4 x float> %2		ret <4 x float> %2
}		}
define <4 x float> @stack_fold_vpermil2ps_mr(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {		define <4 x float> @stack_fold_vpermil2ps_mr(<4 x float> %a0, <4 x i32> %a1, <4 x float> %a2) {
;CHECK-LABEL: stack_fold_vpermil2ps_mr		;CHECK-LABEL: stack_fold_vpermil2ps_mr
;CHECK: vpermil2ps $0, {{%xmm[0-9][0-9]}}, {{-?[0-9]}}(%rsp), {{%xmm[0-9][0-9]}}, {{%xmm[0-9][0-9]}} {{.*#+}} 16-byte Folded Reload		;CHECK: vpermil2ps $0, {{%xmm[0-9][0-9]}}, {{-?[0-9]}}(%rsp), {{%xmm[0-9][0-9]}}, {{%xmm[0-9][0-9]}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()		%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a2, <4 x float> %a1, i8 0)		%2 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a2, <4 x i32> %a1, i8 0)
ret <4 x float> %2		ret <4 x float> %2
}		}
declare <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone		declare <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float>, <4 x float>, <4 x i32>, i8) nounwind readnone

define <8 x float> @stack_fold_vpermil2ps_rm_ymm(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {		define <8 x float> @stack_fold_vpermil2ps_rm_ymm(<8 x float> %a0, <8 x float> %a1, <8 x i32> %a2) {
;CHECK-LABEL: stack_fold_vpermil2ps_rm		;CHECK-LABEL: stack_fold_vpermil2ps_rm
;CHECK: vpermil2ps $0, {{-?[0-9]}}(%rsp), {{%ymm[0-9][0-9]}}, {{%ymm[0-9][0-9]}}, {{%ymm[0-9][0-9]}} {{.*#+}} 32-byte Folded Reload		;CHECK: vpermil2ps $0, {{-?[0-9]}}(%rsp), {{%ymm[0-9][0-9]}}, {{%ymm[0-9][0-9]}}, {{%ymm[0-9][0-9]}} {{.*#+}} 32-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()		%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 0)		%2 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a1, <8 x i32> %a2, i8 0)
ret <8 x float> %2		ret <8 x float> %2
}		}
define <8 x float> @stack_fold_vpermil2ps_mr_ymm(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {		define <8 x float> @stack_fold_vpermil2ps_mr_ymm(<8 x float> %a0, <8 x i32> %a1, <8 x float> %a2) {
;CHECK-LABEL: stack_fold_vpermil2ps_mr		;CHECK-LABEL: stack_fold_vpermil2ps_mr
;CHECK: vpermil2ps $0, {{%ymm[0-9][0-9]}}, {{-?[0-9]}}(%rsp), {{%ymm[0-9][0-9]}}, {{%ymm[0-9][0-9]}} {{.*#+}} 32-byte Folded Reload		;CHECK: vpermil2ps $0, {{%ymm[0-9][0-9]}}, {{-?[0-9]}}(%rsp), {{%ymm[0-9][0-9]}}, {{%ymm[0-9][0-9]}} {{.*#+}} 32-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()		%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a2, <8 x float> %a1, i8 0)		%2 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a2, <8 x i32> %a1, i8 0)
ret <8 x float> %2		ret <8 x float> %2
}		}
declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone		declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x i32>, i8) nounwind readnone

define <4 x i32> @stack_fold_vphaddbd(<16 x i8> %a0) {		define <4 x i32> @stack_fold_vphaddbd(<16 x i8> %a0) {
;CHECK-LABEL: stack_fold_vphaddbd		;CHECK-LABEL: stack_fold_vphaddbd
;CHECK: vphaddbd {{-?[0-9]}}(%rsp), {{%xmm[0-9][0-9]}} {{.*#+}} 16-byte Folded Reload		;CHECK: vphaddbd {{-?[0-9]}}(%rsp), {{%xmm[0-9][0-9]}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()		%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = call <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8> %a0)		%2 = call <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8> %a0)
ret <4 x i32> %2		ret <4 x i32> %2
}		}
▲ Show 20 Lines • Show All 479 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+xop \| FileCheck %s			; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+xop \| FileCheck %s
	; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+xop \| FileCheck %s			; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+xop \| FileCheck %s

	declare <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone			declare <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double>, <2 x double>, <2 x i64>, i8) nounwind readnone
	declare <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone			declare <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double>, <4 x double>, <4 x i64>, i8) nounwind readnone

	declare <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone			declare <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float>, <4 x float>, <4 x i32>, i8) nounwind readnone
	declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone			declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x i32>, i8) nounwind readnone

	declare <16 x i8> @llvm.x86.xop.vpperm(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone			declare <16 x i8> @llvm.x86.xop.vpperm(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone

	define <2 x double> @combine_vpermil2pd_identity(<2 x double> %a0, <2 x double> %a1) {			define <2 x double> @combine_vpermil2pd_identity(<2 x double> %a0, <2 x double> %a1) {
	; CHECK-LABEL: combine_vpermil2pd_identity:			; CHECK-LABEL: combine_vpermil2pd_identity:
	; CHECK: # BB#0:			; CHECK: # BB#0:
	; CHECK-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero			; CHECK-NEXT: movl $2, %eax
				; CHECK-NEXT: vmovq %rax, %xmm2
	; CHECK-NEXT: vpermil2pd $0, %xmm2, %xmm0, %xmm1, %xmm0			; CHECK-NEXT: vpermil2pd $0, %xmm2, %xmm0, %xmm1, %xmm0
	; CHECK-NEXT: vpermil2pd $0, %xmm2, %xmm0, %xmm0, %xmm0			; CHECK-NEXT: vpermil2pd $0, %xmm2, %xmm0, %xmm0, %xmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%mask = bitcast <2 x i64> <i64 2, i64 0> to <2 x double>			%res0 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a1, <2 x double> %a0, <2 x i64> <i64 2, i64 0>, i8 0)
	%res0 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a1, <2 x double> %a0, <2 x double> %mask, i8 0)			%res1 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %res0, <2 x double> undef, <2 x i64> <i64 2, i64 0>, i8 0)
	%res1 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %res0, <2 x double> undef, <2 x double> %mask, i8 0)
	ret <2 x double> %res1			ret <2 x double> %res1
	}			}

	define <4 x double> @combine_vpermil2pd256_identity(<4 x double> %a0, <4 x double> %a1) {			define <4 x double> @combine_vpermil2pd256_identity(<4 x double> %a0, <4 x double> %a1) {
	; CHECK-LABEL: combine_vpermil2pd256_identity:			; CHECK-LABEL: combine_vpermil2pd256_identity:
	; CHECK: # BB#0:			; CHECK: # BB#0:
	; CHECK-NEXT: vmovapd {{.*#+}} ymm2 = [9.881313e-324,0.000000e+00,9.881313e-324,0.000000e+00]			; CHECK-NEXT: vmovapd {{.*#+}} ymm2 = [2,0,2,0]
	; CHECK-NEXT: vpermil2pd $0, %ymm2, %ymm0, %ymm1, %ymm0			; CHECK-NEXT: vpermil2pd $0, %ymm2, %ymm0, %ymm1, %ymm0
	; CHECK-NEXT: vpermil2pd $0, %ymm2, %ymm0, %ymm0, %ymm0			; CHECK-NEXT: vpermil2pd $0, %ymm2, %ymm0, %ymm0, %ymm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%mask = bitcast <4 x i64> <i64 2, i64 0, i64 2, i64 0> to <4 x double>			%res0 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a1, <4 x double> %a0, <4 x i64> <i64 2, i64 0, i64 2, i64 0>, i8 0)
	%res0 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a1, <4 x double> %a0, <4 x double> %mask, i8 0)			%res1 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %res0, <4 x double> undef, <4 x i64> <i64 2, i64 0, i64 2, i64 0>, i8 0)
	%res1 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %res0, <4 x double> undef, <4 x double> %mask, i8 0)
	ret <4 x double> %res1			ret <4 x double> %res1
	}			}

	define <4 x float> @combine_vpermil2ps_identity(<4 x float> %a0, <4 x float> %a1) {			define <4 x float> @combine_vpermil2ps_identity(<4 x float> %a0, <4 x float> %a1) {
	; CHECK-LABEL: combine_vpermil2ps_identity:			; CHECK-LABEL: combine_vpermil2ps_identity:
	; CHECK: # BB#0:			; CHECK: # BB#0:
	; CHECK-NEXT: vmovaps {{.*#+}} xmm2 = [4.203895e-45,2.802597e-45,1.401298e-45,0.000000e+00]			; CHECK-NEXT: vmovaps {{.*#+}} xmm2 = [3,2,1,0]
	; CHECK-NEXT: vpermil2ps $0, %xmm2, %xmm0, %xmm1, %xmm0			; CHECK-NEXT: vpermil2ps $0, %xmm2, %xmm0, %xmm1, %xmm0
	; CHECK-NEXT: vpermil2ps $0, %xmm2, %xmm0, %xmm0, %xmm0			; CHECK-NEXT: vpermil2ps $0, %xmm2, %xmm0, %xmm0, %xmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%mask = bitcast <4 x i32> <i32 3, i32 2, i32 1, i32 0> to <4 x float>			%res0 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a1, <4 x float> %a0, <4 x i32> <i32 3, i32 2, i32 1, i32 0>, i8 0)
	%res0 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a1, <4 x float> %a0, <4 x float> %mask, i8 0)			%res1 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %res0, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>, i8 0)
	%res1 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %res0, <4 x float> undef, <4 x float> %mask, i8 0)
	ret <4 x float> %res1			ret <4 x float> %res1
	}			}

	define <8 x float> @combine_vpermil2ps256_identity(<8 x float> %a0, <8 x float> %a1) {			define <8 x float> @combine_vpermil2ps256_identity(<8 x float> %a0, <8 x float> %a1) {
	; CHECK-LABEL: combine_vpermil2ps256_identity:			; CHECK-LABEL: combine_vpermil2ps256_identity:
	; CHECK: # BB#0:			; CHECK: # BB#0:
	; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [4.203895e-45,2.802597e-45,1.401298e-45,0.000000e+00,1.401298e-45,0.000000e+00,4.203895e-45,2.802597e-45]			; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [3,2,1,0,1,0,3,2]
	; CHECK-NEXT: vpermil2ps $0, %ymm2, %ymm0, %ymm1, %ymm0			; CHECK-NEXT: vpermil2ps $0, %ymm2, %ymm0, %ymm1, %ymm0
	; CHECK-NEXT: vpermil2ps $0, %ymm2, %ymm0, %ymm0, %ymm0			; CHECK-NEXT: vpermil2ps $0, %ymm2, %ymm0, %ymm0, %ymm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%mask = bitcast <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 1, i32 0, i32 3, i32 2> to <8 x float>			%res0 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a1, <8 x float> %a0, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 1, i32 0, i32 3, i32 2>, i8 0)
	%res0 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a1, <8 x float> %a0, <8 x float> %mask, i8 0)			%res1 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %res0, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 1, i32 0, i32 3, i32 2>, i8 0)
	%res1 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %res0, <8 x float> undef, <8 x float> %mask, i8 0)
	ret <8 x float> %res1			ret <8 x float> %res1
	}			}

	define <4 x float> @combine_vpermil2ps_blend_with_zero(<4 x float> %a0, <4 x float> %a1) {			define <4 x float> @combine_vpermil2ps_blend_with_zero(<4 x float> %a0, <4 x float> %a1) {
	; CHECK-LABEL: combine_vpermil2ps_blend_with_zero:			; CHECK-LABEL: combine_vpermil2ps_blend_with_zero:
	; CHECK: # BB#0:			; CHECK: # BB#0:
	; CHECK-NEXT: vpermil2ps $2, {{.*}}(%rip), %xmm1, %xmm0, %xmm0			; CHECK-NEXT: vpermil2ps $2, {{.*}}(%rip), %xmm1, %xmm0, %xmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%mask = bitcast <4 x i32> <i32 8, i32 1, i32 2, i32 3> to <4 x float>			%res0 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 8, i32 1, i32 2, i32 3>, i8 2)
	%res0 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %mask, i8 2)
	ret <4 x float> %res0			ret <4 x float> %res0
	}			}

	define <16 x i8> @combine_vpperm_identity(<16 x i8> %a0, <16 x i8> %a1) {			define <16 x i8> @combine_vpperm_identity(<16 x i8> %a0, <16 x i8> %a1) {
	; CHECK-LABEL: combine_vpperm_identity:			; CHECK-LABEL: combine_vpperm_identity:
	; CHECK: # BB#0:			; CHECK: # BB#0:
	; CHECK-NEXT: vmovaps %xmm1, %xmm0			; CHECK-NEXT: vmovaps %xmm1, %xmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	▲ Show 20 Lines • Show All 56 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/X86/xop-intrinsics-x86_64-upgrade.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+xop \| FileCheck %s			; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+xop \| FileCheck %s

				define <2 x double> @test_int_x86_xop_vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
				; CHECK-LABEL: test_int_x86_xop_vpermil2pd:
				; CHECK: # BB#0:
				; CHECK-NEXT: vpermil2pd $1, %xmm2, %xmm1, %xmm0, %xmm0
				; CHECK-NEXT: retq
				%res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 1) ; [#uses=1]
				ret <2 x double> %res
				}
				define <2 x double> @test_int_x86_xop_vpermil2pd_mr(<2 x double> %a0, <2 x double>* %a1, <2 x double> %a2) {
				; CHECK-LABEL: test_int_x86_xop_vpermil2pd_mr:
				; CHECK: # BB#0:
				; CHECK-NEXT: vpermil2pd $1, %xmm1, (%rdi), %xmm0, %xmm0
				; CHECK-NEXT: retq
				%vec = load <2 x double>, <2 x double>* %a1
				%res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %vec, <2 x double> %a2, i8 1) ; [#uses=1]
				ret <2 x double> %res
				}
				define <2 x double> @test_int_x86_xop_vpermil2pd_rm(<2 x double> %a0, <2 x double> %a1, <2 x double>* %a2) {
				; CHECK-LABEL: test_int_x86_xop_vpermil2pd_rm:
				; CHECK: # BB#0:
				; CHECK-NEXT: vpermil2pd $1, (%rdi), %xmm1, %xmm0, %xmm0
				; CHECK-NEXT: retq
				%vec = load <2 x double>, <2 x double>* %a2
				%res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %vec, i8 1) ; [#uses=1]
				ret <2 x double> %res
				}
				declare <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone

				define <4 x double> @test_int_x86_xop_vpermil2pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
				; CHECK-LABEL: test_int_x86_xop_vpermil2pd_256:
				; CHECK: # BB#0:
				; CHECK-NEXT: vpermil2pd $2, %ymm2, %ymm1, %ymm0, %ymm0
				; CHECK-NEXT: retq
				%res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 2) ;
				ret <4 x double> %res
				}
				define <4 x double> @test_int_x86_xop_vpermil2pd_256_mr(<4 x double> %a0, <4 x double>* %a1, <4 x double> %a2) {
				; CHECK-LABEL: test_int_x86_xop_vpermil2pd_256_mr:
				; CHECK: # BB#0:
				; CHECK-NEXT: vpermil2pd $2, %ymm1, (%rdi), %ymm0, %ymm0
				; CHECK-NEXT: retq
				%vec = load <4 x double>, <4 x double>* %a1
				%res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %vec, <4 x double> %a2, i8 2) ;
				ret <4 x double> %res
				}
				define <4 x double> @test_int_x86_xop_vpermil2pd_256_rm(<4 x double> %a0, <4 x double> %a1, <4 x double>* %a2) {
				; CHECK-LABEL: test_int_x86_xop_vpermil2pd_256_rm:
				; CHECK: # BB#0:
				; CHECK-NEXT: vpermil2pd $2, (%rdi), %ymm1, %ymm0, %ymm0
				; CHECK-NEXT: retq
				%vec = load <4 x double>, <4 x double>* %a2
				%res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %vec, i8 2) ;
				ret <4 x double> %res
				}
				declare <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone

				define <4 x float> @test_int_x86_xop_vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
				; CHECK-LABEL: test_int_x86_xop_vpermil2ps:
				; CHECK: # BB#0:
				; CHECK-NEXT: vpermil2ps $3, %xmm2, %xmm1, %xmm0, %xmm0
				; CHECK-NEXT: retq
				%res = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 3) ;
				ret <4 x float> %res
				}
				declare <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone

				define <8 x float> @test_int_x86_xop_vpermil2ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
				; CHECK-LABEL: test_int_x86_xop_vpermil2ps_256:
				; CHECK: # BB#0:
				; CHECK-NEXT: vpermil2ps $4, %ymm2, %ymm1, %ymm0, %ymm0
				; CHECK-NEXT: retq
				%res = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 4) ;
				ret <8 x float> %res
				}
				declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone

	define <16 x i8> @test_int_x86_xop_vpcomeqb(<16 x i8> %a0, <16 x i8> %a1) {			define <16 x i8> @test_int_x86_xop_vpcomeqb(<16 x i8> %a0, <16 x i8> %a1) {
	; CHECK-LABEL: test_int_x86_xop_vpcomeqb:			; CHECK-LABEL: test_int_x86_xop_vpcomeqb:
	; CHECK: # BB#0:			; CHECK: # BB#0:
	; CHECK-NEXT: vpcomeqb %xmm1, %xmm0, %xmm0			; CHECK-NEXT: vpcomeqb %xmm1, %xmm0, %xmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%res = call <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8> %a0, <16 x i8> %a1) ;			%res = call <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8> %a0, <16 x i8> %a1) ;
	ret <16 x i8> %res			ret <16 x i8> %res
	}			}
	▲ Show 20 Lines • Show All 640 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/X86/xop-intrinsics-x86_64.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+xop \| FileCheck %s			; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+xop \| FileCheck %s

	define <2 x double> @test_int_x86_xop_vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {			define <2 x double> @test_int_x86_xop_vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> %a2) {
	; CHECK-LABEL: test_int_x86_xop_vpermil2pd:			; CHECK-LABEL: test_int_x86_xop_vpermil2pd:
	; CHECK: # BB#0:			; CHECK: # BB#0:
	; CHECK-NEXT: vpermil2pd $1, %xmm2, %xmm1, %xmm0, %xmm0			; CHECK-NEXT: vpermil2pd $1, %xmm2, %xmm1, %xmm0, %xmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 1) ; [#uses=1]			%res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> %a2, i8 1) ; [#uses=1]
	ret <2 x double> %res			ret <2 x double> %res
	}			}
	define <2 x double> @test_int_x86_xop_vpermil2pd_mr(<2 x double> %a0, <2 x double>* %a1, <2 x double> %a2) {			define <2 x double> @test_int_x86_xop_vpermil2pd_mr(<2 x double> %a0, <2 x double>* %a1, <2 x i64> %a2) {
	; CHECK-LABEL: test_int_x86_xop_vpermil2pd_mr:			; CHECK-LABEL: test_int_x86_xop_vpermil2pd_mr:
	; CHECK: # BB#0:			; CHECK: # BB#0:
	; CHECK-NEXT: vpermil2pd $1, %xmm1, (%rdi), %xmm0, %xmm0			; CHECK-NEXT: vpermil2pd $1, %xmm1, (%rdi), %xmm0, %xmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%vec = load <2 x double>, <2 x double>* %a1			%vec = load <2 x double>, <2 x double>* %a1
	%res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %vec, <2 x double> %a2, i8 1) ; [#uses=1]			%res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %vec, <2 x i64> %a2, i8 1) ; [#uses=1]
	ret <2 x double> %res			ret <2 x double> %res
	}			}
	define <2 x double> @test_int_x86_xop_vpermil2pd_rm(<2 x double> %a0, <2 x double> %a1, <2 x double>* %a2) {			define <2 x double> @test_int_x86_xop_vpermil2pd_rm(<2 x double> %a0, <2 x double> %a1, <2 x i64>* %a2) {
	; CHECK-LABEL: test_int_x86_xop_vpermil2pd_rm:			; CHECK-LABEL: test_int_x86_xop_vpermil2pd_rm:
	; CHECK: # BB#0:			; CHECK: # BB#0:
	; CHECK-NEXT: vpermil2pd $1, (%rdi), %xmm1, %xmm0, %xmm0			; CHECK-NEXT: vpermil2pd $1, (%rdi), %xmm1, %xmm0, %xmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%vec = load <2 x double>, <2 x double>* %a2			%vec = load <2 x i64>, <2 x i64>* %a2
	%res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %vec, i8 1) ; [#uses=1]			%res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> %vec, i8 1) ; [#uses=1]
	ret <2 x double> %res			ret <2 x double> %res
	}			}
	declare <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone			declare <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double>, <2 x double>, <2 x i64>, i8) nounwind readnone

	define <4 x double> @test_int_x86_xop_vpermil2pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {			define <4 x double> @test_int_x86_xop_vpermil2pd_256(<4 x double> %a0, <4 x double> %a1, <4 x i64> %a2) {
	; CHECK-LABEL: test_int_x86_xop_vpermil2pd_256:			; CHECK-LABEL: test_int_x86_xop_vpermil2pd_256:
	; CHECK: # BB#0:			; CHECK: # BB#0:
	; CHECK-NEXT: vpermil2pd $2, %ymm2, %ymm1, %ymm0, %ymm0			; CHECK-NEXT: vpermil2pd $2, %ymm2, %ymm1, %ymm0, %ymm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 2) ;			%res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x i64> %a2, i8 2) ;
	ret <4 x double> %res			ret <4 x double> %res
	}			}
	define <4 x double> @test_int_x86_xop_vpermil2pd_256_mr(<4 x double> %a0, <4 x double>* %a1, <4 x double> %a2) {			define <4 x double> @test_int_x86_xop_vpermil2pd_256_mr(<4 x double> %a0, <4 x double>* %a1, <4 x i64> %a2) {
	; CHECK-LABEL: test_int_x86_xop_vpermil2pd_256_mr:			; CHECK-LABEL: test_int_x86_xop_vpermil2pd_256_mr:
	; CHECK: # BB#0:			; CHECK: # BB#0:
	; CHECK-NEXT: vpermil2pd $2, %ymm1, (%rdi), %ymm0, %ymm0			; CHECK-NEXT: vpermil2pd $2, %ymm1, (%rdi), %ymm0, %ymm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%vec = load <4 x double>, <4 x double>* %a1			%vec = load <4 x double>, <4 x double>* %a1
	%res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %vec, <4 x double> %a2, i8 2) ;			%res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %vec, <4 x i64> %a2, i8 2) ;
	ret <4 x double> %res			ret <4 x double> %res
	}			}
	define <4 x double> @test_int_x86_xop_vpermil2pd_256_rm(<4 x double> %a0, <4 x double> %a1, <4 x double>* %a2) {			define <4 x double> @test_int_x86_xop_vpermil2pd_256_rm(<4 x double> %a0, <4 x double> %a1, <4 x i64>* %a2) {
	; CHECK-LABEL: test_int_x86_xop_vpermil2pd_256_rm:			; CHECK-LABEL: test_int_x86_xop_vpermil2pd_256_rm:
	; CHECK: # BB#0:			; CHECK: # BB#0:
	; CHECK-NEXT: vpermil2pd $2, (%rdi), %ymm1, %ymm0, %ymm0			; CHECK-NEXT: vpermil2pd $2, (%rdi), %ymm1, %ymm0, %ymm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%vec = load <4 x double>, <4 x double>* %a2			%vec = load <4 x i64>, <4 x i64>* %a2
	%res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %vec, i8 2) ;			%res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x i64> %vec, i8 2) ;
	ret <4 x double> %res			ret <4 x double> %res
	}			}
	declare <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone			declare <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double>, <4 x double>, <4 x i64>, i8) nounwind readnone

	define <4 x float> @test_int_x86_xop_vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {			define <4 x float> @test_int_x86_xop_vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x i32> %a2) {
	; CHECK-LABEL: test_int_x86_xop_vpermil2ps:			; CHECK-LABEL: test_int_x86_xop_vpermil2ps:
	; CHECK: # BB#0:			; CHECK: # BB#0:
	; CHECK-NEXT: vpermil2ps $3, %xmm2, %xmm1, %xmm0, %xmm0			; CHECK-NEXT: vpermil2ps $3, %xmm2, %xmm1, %xmm0, %xmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%res = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 3) ;			%res = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x i32> %a2, i8 3) ;
	ret <4 x float> %res			ret <4 x float> %res
	}			}
	declare <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone			declare <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float>, <4 x float>, <4 x i32>, i8) nounwind readnone

	define <8 x float> @test_int_x86_xop_vpermil2ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {			define <8 x float> @test_int_x86_xop_vpermil2ps_256(<8 x float> %a0, <8 x float> %a1, <8 x i32> %a2) {
	; CHECK-LABEL: test_int_x86_xop_vpermil2ps_256:			; CHECK-LABEL: test_int_x86_xop_vpermil2ps_256:
	; CHECK: # BB#0:			; CHECK: # BB#0:
	; CHECK-NEXT: vpermil2ps $4, %ymm2, %ymm1, %ymm0, %ymm0			; CHECK-NEXT: vpermil2ps $4, %ymm2, %ymm1, %ymm0, %ymm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%res = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 4) ;			%res = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a1, <8 x i32> %a2, i8 4) ;
	ret <8 x float> %res			ret <8 x float> %res
	}			}
	declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone			declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x i32>, i8) nounwind readnone

	define <2 x i64> @test_int_x86_xop_vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) {			define <2 x i64> @test_int_x86_xop_vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) {
	; CHECK-LABEL: test_int_x86_xop_vpcmov:			; CHECK-LABEL: test_int_x86_xop_vpcmov:
	; CHECK: # BB#0:			; CHECK: # BB#0:
	; CHECK-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0			; CHECK-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%res = call <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) ;			%res = call <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) ;
	ret <2 x i64> %res			ret <2 x i64> %res
	▲ Show 20 Lines • Show All 730 Lines • Show Last 20 Lines