Diff 74799

lib/Target/X86/X86ISelLowering.h

Show First 20 Lines • Show All 296 Lines • ▼ Show 20 Lines	enum NodeType : unsigned {
VTRUNCUS, VTRUNCS,		VTRUNCUS, VTRUNCS,

// Vector FP extend.		// Vector FP extend.
VFPEXT, VFPEXT_RND, VFPEXTS_RND,		VFPEXT, VFPEXT_RND, VFPEXTS_RND,

// Vector FP round.		// Vector FP round.
VFPROUND, VFPROUND_RND, VFPROUNDS_RND,		VFPROUND, VFPROUND_RND, VFPROUNDS_RND,

		// Vector double to signed integer (truncated).
		CVTTPD2DQ,

// Vector signed/unsigned integer to double.		// Vector signed/unsigned integer to double.
CVTDQ2PD, CVTUDQ2PD,		CVTDQ2PD, CVTUDQ2PD,

// Convert a vector to mask, set bits base on MSB.		// Convert a vector to mask, set bits base on MSB.
CVT2MASK,		CVT2MASK,

// 128-bit vector logical left / right shift		// 128-bit vector logical left / right shift
VSHLDQ, VSRLDQ,		VSHLDQ, VSRLDQ,
▲ Show 20 Lines • Show All 972 Lines • Show Last 20 Lines

lib/Target/X86/X86ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 849 Lines • ▼ Show 20 Lines	for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
setOperationPromotedToType(ISD::SELECT, VT, MVT::v2i64);		setOperationPromotedToType(ISD::SELECT, VT, MVT::v2i64);
}		}

// Custom lower v2i64 and v2f64 selects.		// Custom lower v2i64 and v2f64 selects.
setOperationAction(ISD::SELECT, MVT::v2f64, Custom);		setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
setOperationAction(ISD::SELECT, MVT::v2i64, Custom);		setOperationAction(ISD::SELECT, MVT::v2i64, Custom);

setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);		setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);		setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);

		setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);		setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);

setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);		setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);		setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);

// Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.		// Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);		setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);

▲ Show 20 Lines • Show All 21,364 Lines • ▼ Show 20 Lines	case ISD::UDIVREM: {
SDValue V = LowerWin64_i128OP(SDValue(N,0), DAG);		SDValue V = LowerWin64_i128OP(SDValue(N,0), DAG);
Results.push_back(V);		Results.push_back(V);
return;		return;
}		}
case ISD::FP_TO_SINT:		case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: {		case ISD::FP_TO_UINT: {
bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;		bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;

		if (IsSigned && N->getValueType(0) == MVT::v2i32) {
		delenaUnsubmitted Not Done Reply Inline Actions Why you do not call Lower_FP_TO_SINT ? delena: Why you do not call Lower_FP_TO_SINT ?
		RKSimonAuthorUnsubmitted Not Done Reply Inline Actions As the result type is the illegal v2i32 type we need to use ReplaceNodeResults - we can't just go direct to Lower_FP_TO_SINT as type legalization will get in the way. RKSimon: As the result type is the illegal v2i32 type we need to use ReplaceNodeResults - we can't just…
		delenaUnsubmitted Not Done Reply Inline Actions If you specify setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom) - specify source type, because type legalizer looks at it first. it takes you directly from type legalizer to LowerFP_TO_SINT(). Please look at LowerSINT_TO_FP(), we do the same there. Additionally, you do setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom) - it will call LowerFP_TO_SINT() for v2f64 -> v2i32 delena: If you specify setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom) - specify…
		RKSimonAuthorUnsubmitted Not Done Reply Inline Actions I think the difference is that we are returning a illegal type (v2i32) while your SINT_TO_FP examples are taking it as an input. RKSimon: I think the difference is that we are returning a illegal type (v2i32) while your SINT_TO_FP…
		assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
		SDValue Src = N->getOperand(0);
		delenaUnsubmitted Done Reply Inline Actions It is source, not Res. Please change variable name. delena: It is source, not Res. Please change variable name.
		if (Src.getValueType() == MVT::v2f64) {
		SDValue Idx = DAG.getIntPtrConstant(0, dl);
		SDValue Res = DAG.getNode(X86ISD::CVTTPD2DQ, dl, MVT::v4i32, Src);
		delenaUnsubmitted Not Done Reply Inline Actions Can you use FP_TO_SINT here? delena: Can you use FP_TO_SINT here?
		RKSimonAuthorUnsubmitted Not Done Reply Inline Actions See above. RKSimon: See above.
		delenaUnsubmitted Not Done Reply Inline Actions I meant using ISD::FP_TO_SINT instead of X86ISD::CVTTPD2DQ delena: I meant using ISD::FP_TO_SINT instead of X86ISD::CVTTPD2DQ
		RKSimonAuthorUnsubmitted Not Done Reply Inline Actions This is similar to the need for X86ISD::CVTDQ2PD - can't use (v2f32 SINT_TO_FP (v2i32)) would return an illegal type. RKSimon: This is similar to the need for X86ISD::CVTDQ2PD - can't use (v2f32 SINT_TO_FP (v2i32)) would…
		Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, Idx);
		Results.push_back(Res);
		return;
		}
		if (Src.getValueType() == MVT::v2f32) {
		SDValue Idx = DAG.getIntPtrConstant(0, dl);
		SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
		DAG.getUNDEF(MVT::v2f32));
		Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, Res);
		Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, Idx);
		Results.push_back(Res);
		return;
		}

		craig.topperUnsubmitted Not Done Reply Inline Actions What types does the FIST code below handle? Should there be a return or llvm_unreachable after the MVT::v2f32 if. I don't think the FIST code was intended for a v2i32 result type so we shouldn't fallthrough into it. craig.topper: What types does the FIST code below handle? Should there be a return or llvm_unreachable after…
		RKSimonAuthorUnsubmitted Not Done Reply Inline Actions FP_TO_INTHelper only handles f32/f64/f80 scalar inputs, returning null for other inputs. rL283485 added tests to check this. If you like I can add an early return at the end of the MVT::v2i32 block to make it clearer? RKSimon: FP_TO_INTHelper only handles f32/f64/f80 scalar inputs, returning null for other inputs.
		craig.topperUnsubmitted Not Done Reply Inline Actions Yeah that's probably a little clearer. craig.topper: Yeah that's probably a little clearer.
		// The FP_TO_INTHelper below only handles f32/f64/f80 scalar inputs,
		// so early out here.
		return;
		}

std::pair<SDValue,SDValue> Vals =		std::pair<SDValue,SDValue> Vals =
FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned, /IsReplace=/ true);		FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned, /IsReplace=/ true);
SDValue FIST = Vals.first, StackSlot = Vals.second;		SDValue FIST = Vals.first, StackSlot = Vals.second;
if (FIST.getNode()) {		if (FIST.getNode()) {
EVT VT = N->getValueType(0);		EVT VT = N->getValueType(0);
// Return a load from the stack slot.		// Return a load from the stack slot.
if (StackSlot.getNode())		if (StackSlot.getNode())
Results.push_back(		Results.push_back(
▲ Show 20 Lines • Show All 302 Lines • ▼ Show 20 Lines	const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VTRUNCUS: return "X86ISD::VTRUNCUS";		case X86ISD::VTRUNCUS: return "X86ISD::VTRUNCUS";
case X86ISD::VINSERT: return "X86ISD::VINSERT";		case X86ISD::VINSERT: return "X86ISD::VINSERT";
case X86ISD::VFPEXT: return "X86ISD::VFPEXT";		case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
case X86ISD::VFPEXT_RND: return "X86ISD::VFPEXT_RND";		case X86ISD::VFPEXT_RND: return "X86ISD::VFPEXT_RND";
case X86ISD::VFPEXTS_RND: return "X86ISD::VFPEXTS_RND";		case X86ISD::VFPEXTS_RND: return "X86ISD::VFPEXTS_RND";
case X86ISD::VFPROUND: return "X86ISD::VFPROUND";		case X86ISD::VFPROUND: return "X86ISD::VFPROUND";
case X86ISD::VFPROUND_RND: return "X86ISD::VFPROUND_RND";		case X86ISD::VFPROUND_RND: return "X86ISD::VFPROUND_RND";
case X86ISD::VFPROUNDS_RND: return "X86ISD::VFPROUNDS_RND";		case X86ISD::VFPROUNDS_RND: return "X86ISD::VFPROUNDS_RND";
		case X86ISD::CVTTPD2DQ: return "X86ISD::CVTTPD2DQ";
case X86ISD::CVTDQ2PD: return "X86ISD::CVTDQ2PD";		case X86ISD::CVTDQ2PD: return "X86ISD::CVTDQ2PD";
case X86ISD::CVTUDQ2PD: return "X86ISD::CVTUDQ2PD";		case X86ISD::CVTUDQ2PD: return "X86ISD::CVTUDQ2PD";
case X86ISD::CVT2MASK: return "X86ISD::CVT2MASK";		case X86ISD::CVT2MASK: return "X86ISD::CVT2MASK";
case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";		case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ";		case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ";
case X86ISD::VSHL: return "X86ISD::VSHL";		case X86ISD::VSHL: return "X86ISD::VSHL";
case X86ISD::VSRL: return "X86ISD::VSRL";		case X86ISD::VSRL: return "X86ISD::VSRL";
case X86ISD::VSRA: return "X86ISD::VSRA";		case X86ISD::VSRA: return "X86ISD::VSRA";
▲ Show 20 Lines • Show All 10,260 Lines • Show Last 20 Lines

lib/Target/X86/X86InstrAVX512.td

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 6,256 Lines • ▼ Show 20 Lines	def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
VR128X:$src1, sub_xmm)))), sub_xmm)>;		VR128X:$src1, sub_xmm)))), sub_xmm)>;

def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),		def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
(EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr		(EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
(v8i32 (INSERT_SUBREG (IMPLICIT_DEF),		(v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
VR128X:$src1, sub_xmm)))), sub_ymm)>;		VR128X:$src1, sub_xmm)))), sub_ymm)>;
}		}

		let Predicates = [HasAVX512, HasVLX] in {
		def : Pat<(v4i32 (bitconvert (X86vzmovl (v2i64 (bitconvert
		(v4i32 (X86cvttpd2dq (v2f64 VR128X:$src)))))))),
		(VCVTTPD2DQZ128rr VR128:$src)>;
		def : Pat<(v4i32 (X86cvttpd2dq (v2f64 VR128X:$src))),
		(VCVTTPD2DQZ128rr VR128X:$src)>;
		def : Pat<(v4i32 (X86cvttpd2dq (loadv2f64 addr:$src))),
		(VCVTTPD2DQZ128rm addr:$src)>;
		}

let Predicates = [HasAVX512] in {		let Predicates = [HasAVX512] in {
def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))),		def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))),
(VCVTPD2PSZrm addr:$src)>;		(VCVTPD2PSZrm addr:$src)>;
def : Pat<(v8f64 (extloadv8f32 addr:$src)),		def : Pat<(v8f64 (extloadv8f32 addr:$src)),
(VCVTPS2PDZrm addr:$src)>;		(VCVTPS2PDZrm addr:$src)>;
}		}

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// Half precision conversion instructions		// Half precision conversion instructions
		craig.topperUnsubmitted Not Done Reply Inline Actions Why is this block under HasAVX512, but the patterns AVX pattersn in X86InstrSSE.td are only disabled if VLX is enabled? craig.topper: Why is this block under HasAVX512, but the patterns AVX pattersn in X86InstrSSE.td are only…
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,		multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
X86MemOperand x86memop, PatFrag ld_frag> {		X86MemOperand x86memop, PatFrag ld_frag> {
defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), (ins _src.RC:$src),		defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), (ins _src.RC:$src),
"vcvtph2ps", "$src", "$src",		"vcvtph2ps", "$src", "$src",
(X86cvtph2ps (_src.VT _src.RC:$src),		(X86cvtph2ps (_src.VT _src.RC:$src),
(i32 FROUND_CURRENT))>, T8PD;		(i32 FROUND_CURRENT))>, T8PD;
defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst), (ins x86memop:$src),		defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst), (ins x86memop:$src),
▲ Show 20 Lines • Show All 2,474 Lines • Show Last 20 Lines

lib/Target/X86/X86InstrFragmentsSIMD.td

	Show First 20 Lines • Show All 61 Lines • ▼ Show 20 Lines
	def X86frcp14s : SDNode<"X86ISD::FRCPS", SDTFPBinOp>;			def X86frcp14s : SDNode<"X86ISD::FRCPS", SDTFPBinOp>;
	def X86fhadd : SDNode<"X86ISD::FHADD", SDTFPBinOp>;			def X86fhadd : SDNode<"X86ISD::FHADD", SDTFPBinOp>;
	def X86fhsub : SDNode<"X86ISD::FHSUB", SDTFPBinOp>;			def X86fhsub : SDNode<"X86ISD::FHSUB", SDTFPBinOp>;
	def X86hadd : SDNode<"X86ISD::HADD", SDTIntBinOp>;			def X86hadd : SDNode<"X86ISD::HADD", SDTIntBinOp>;
	def X86hsub : SDNode<"X86ISD::HSUB", SDTIntBinOp>;			def X86hsub : SDNode<"X86ISD::HSUB", SDTIntBinOp>;
	def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>;			def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>;
	def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>;			def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>;
	def X86cmps : SDNode<"X86ISD::FSETCC", SDTX86Cmps>;			def X86cmps : SDNode<"X86ISD::FSETCC", SDTX86Cmps>;
				def X86cvttpd2dq: SDNode<"X86ISD::CVTTPD2DQ",
				SDTypeProfile<1, 1, [SDTCisVT<0, v4i32>,
				SDTCisVT<1, v2f64>]>>;
	def X86cvtdq2pd: SDNode<"X86ISD::CVTDQ2PD",			def X86cvtdq2pd: SDNode<"X86ISD::CVTDQ2PD",
	SDTypeProfile<1, 1, [SDTCisVT<0, v2f64>,			SDTypeProfile<1, 1, [SDTCisVT<0, v2f64>,
	SDTCisVT<1, v4i32>]>>;			SDTCisVT<1, v4i32>]>>;
	def X86cvtudq2pd: SDNode<"X86ISD::CVTUDQ2PD",			def X86cvtudq2pd: SDNode<"X86ISD::CVTUDQ2PD",
	SDTypeProfile<1, 1, [SDTCisVT<0, v2f64>,			SDTypeProfile<1, 1, [SDTCisVT<0, v2f64>,
	SDTCisVT<1, v4i32>]>>;			SDTCisVT<1, v4i32>]>>;
	def X86pshufb : SDNode<"X86ISD::PSHUFB",			def X86pshufb : SDNode<"X86ISD::PSHUFB",
	SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i8>, SDTCisSameAs<0,1>,			SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i8>, SDTCisSameAs<0,1>,
	▲ Show 20 Lines • Show All 943 Lines • Show Last 20 Lines

lib/Target/X86/X86InstrSSE.td

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 2,105 Lines • ▼ Show 20 Lines	def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"cvttpd2dq{y}\t{$src, $dst\|$dst, $src}",		"cvttpd2dq{y}\t{$src, $dst\|$dst, $src}",
[(set VR128:$dst,		[(set VR128:$dst,
(int_x86_avx_cvtt_pd2dq_256 (loadv4f64 addr:$src)))],		(int_x86_avx_cvtt_pd2dq_256 (loadv4f64 addr:$src)))],
IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>;		IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
def : InstAlias<"vcvttpd2dq\t{$src, $dst\|$dst, $src}",		def : InstAlias<"vcvttpd2dq\t{$src, $dst\|$dst, $src}",
(VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0>;		(VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0>;

let Predicates = [HasAVX, NoVLX] in {		let Predicates = [HasAVX, NoVLX] in {
		def : Pat<(v4i32 (bitconvert (X86vzmovl (v2i64 (bitconvert
		(v4i32 (X86cvttpd2dq (v2f64 VR128:$src)))))))),
		(VCVTTPD2DQrr VR128:$src)>;
		def : Pat<(v4i32 (X86cvttpd2dq (v2f64 VR128:$src))),
		(VCVTTPD2DQrr VR128:$src)>;
		def : Pat<(v4i32 (X86cvttpd2dq (loadv2f64 addr:$src))),
		(VCVTTPD2DQXrm addr:$src)>;

def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))),		def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))),
(VCVTTPD2DQYrr VR256:$src)>;		(VCVTTPD2DQYrr VR256:$src)>;
def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))),		def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))),
(VCVTTPD2DQYrm addr:$src)>;		(VCVTTPD2DQYrm addr:$src)>;
} // Predicates = [HasAVX]		} // Predicates = [HasAVX]

def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),		def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttpd2dq\t{$src, $dst\|$dst, $src}",		"cvttpd2dq\t{$src, $dst\|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))],		[(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))],
IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>;		IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>;
def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),		def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
"cvttpd2dq\t{$src, $dst\|$dst, $src}",		"cvttpd2dq\t{$src, $dst\|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq		[(set VR128:$dst, (int_x86_sse2_cvttpd2dq
(memopv2f64 addr:$src)))],		(memopv2f64 addr:$src)))],
IIC_SSE_CVT_PD_RM>,		IIC_SSE_CVT_PD_RM>,
Sched<[WriteCvtF2ILd]>;		Sched<[WriteCvtF2ILd]>;

		let Predicates = [UseSSE2] in {
		def : Pat<(v4i32 (bitconvert (X86vzmovl (v2i64 (bitconvert
		(v4i32 (X86cvttpd2dq (v2f64 VR128:$src)))))))),
		(CVTTPD2DQrr VR128:$src)>;
		def : Pat<(v4i32 (X86cvttpd2dq (v2f64 VR128:$src))),
		(CVTTPD2DQrr VR128:$src)>;
		def : Pat<(v4i32 (X86cvttpd2dq (memopv2f64 addr:$src))),
		(CVTTPD2DQrm addr:$src)>;
		} // Predicates = [UseSSE2]

// Convert packed single to packed double		// Convert packed single to packed double
let Predicates = [HasAVX] in {		let Predicates = [HasAVX] in {
// SSE2 instructions without OpSize prefix		// SSE2 instructions without OpSize prefix
def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),		def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst\|$dst, $src}",		"vcvtps2pd\t{$src, $dst\|$dst, $src}",
[], IIC_SSE_CVT_PD_RR>, PS, VEX, Sched<[WriteCvtF2F]>;		[], IIC_SSE_CVT_PD_RR>, PS, VEX, Sched<[WriteCvtF2F]>;
def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),		def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"vcvtps2pd\t{$src, $dst\|$dst, $src}",		"vcvtps2pd\t{$src, $dst\|$dst, $src}",
▲ Show 20 Lines • Show All 6,699 Lines • Show Last 20 Lines

lib/Target/X86/X86TargetTransformInfo.cpp

Show First 20 Lines • Show All 799 Lines • ▼ Show 20 Lines	static const TypeConversionCostTblEntry SSE2ConversionTbl[] = {
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },		{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },		{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },		{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },		{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 8 },		{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 8 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },		{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },		{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },

		{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 3 },

{ ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i8, 1 },		{ ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i8, 1 },
{ ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i8, 6 },		{ ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i8, 6 },
{ ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 2 },		{ ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 2 },
{ ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 3 },		{ ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 3 },
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 4 },		{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 4 },
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 8 },		{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 8 },
{ ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1 },		{ ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1 },
{ ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 2 },		{ ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 2 },
▲ Show 20 Lines • Show All 893 Lines • Show Last 20 Lines

test/Analysis/CostModel/X86/fptosi.ll

	Show All 38 Lines
	; CHECK-LABEL: 'fptosi_double_i32'			; CHECK-LABEL: 'fptosi_double_i32'
	define i32 @fptosi_double_i32(i32 %arg) {			define i32 @fptosi_double_i32(i32 %arg) {
	; SSE2: cost of 1 {{.*}} %I32 = fptosi			; SSE2: cost of 1 {{.*}} %I32 = fptosi
	; SSE42: cost of 1 {{.*}} %I32 = fptosi			; SSE42: cost of 1 {{.*}} %I32 = fptosi
	; AVX1: cost of 1 {{.*}} %I32 = fptosi			; AVX1: cost of 1 {{.*}} %I32 = fptosi
	; AVX2: cost of 1 {{.*}} %I32 = fptosi			; AVX2: cost of 1 {{.*}} %I32 = fptosi
	; AVX512: cost of 1 {{.*}} %I32 = fptosi			; AVX512: cost of 1 {{.*}} %I32 = fptosi
	%I32 = fptosi double undef to i32			%I32 = fptosi double undef to i32
	; SSE2: cost of 6 {{.*}} %V2I32 = fptosi			; SSE2: cost of 3 {{.*}} %V2I32 = fptosi
	; SSE42: cost of 6 {{.*}} %V2I32 = fptosi			; SSE42: cost of 3 {{.*}} %V2I32 = fptosi
	; AVX1: cost of 6 {{.*}} %V2I32 = fptosi			; AVX1: cost of 3 {{.*}} %V2I32 = fptosi
	; AVX2: cost of 6 {{.*}} %V2I32 = fptosi			; AVX2: cost of 3 {{.*}} %V2I32 = fptosi
	; AVX512: cost of 6 {{.*}} %V2I32 = fptosi			; AVX512: cost of 3 {{.*}} %V2I32 = fptosi
	%V2I32 = fptosi <2 x double> undef to <2 x i32>			%V2I32 = fptosi <2 x double> undef to <2 x i32>
	; SSE2: cost of 13 {{.*}} %V4I32 = fptosi			; SSE2: cost of 7 {{.*}} %V4I32 = fptosi
	; SSE42: cost of 13 {{.*}} %V4I32 = fptosi			; SSE42: cost of 7 {{.*}} %V4I32 = fptosi
	; AVX1: cost of 1 {{.*}} %V4I32 = fptosi			; AVX1: cost of 1 {{.*}} %V4I32 = fptosi
	; AVX2: cost of 1 {{.*}} %V4I32 = fptosi			; AVX2: cost of 1 {{.*}} %V4I32 = fptosi
	; AVX512: cost of 1 {{.*}} %V4I32 = fptosi			; AVX512: cost of 1 {{.*}} %V4I32 = fptosi
	%V4I32 = fptosi <4 x double> undef to <4 x i32>			%V4I32 = fptosi <4 x double> undef to <4 x i32>
	; SSE2: cost of 27 {{.*}} %V8I32 = fptosi			; SSE2: cost of 15 {{.*}} %V8I32 = fptosi
	; SSE42: cost of 27 {{.*}} %V8I32 = fptosi			; SSE42: cost of 15 {{.*}} %V8I32 = fptosi
	; AVX1: cost of 3 {{.*}} %V8I32 = fptosi			; AVX1: cost of 3 {{.*}} %V8I32 = fptosi
	; AVX2: cost of 3 {{.*}} %V8I32 = fptosi			; AVX2: cost of 3 {{.*}} %V8I32 = fptosi
	; AVX512: cost of 1 {{.*}} %V8I32 = fptosi			; AVX512: cost of 1 {{.*}} %V8I32 = fptosi
	%V8I32 = fptosi <8 x double> undef to <8 x i32>			%V8I32 = fptosi <8 x double> undef to <8 x i32>

	ret i32 undef			ret i32 undef
	}			}

	▲ Show 20 Lines • Show All 181 Lines • Show Last 20 Lines

test/CodeGen/X86/vec_fp_to_int.ll

	Show First 20 Lines • Show All 50 Lines • ▼ Show 20 Lines
	; AVX512DQ-NEXT: retq			; AVX512DQ-NEXT: retq
	%cvt = fptosi <2 x double> %a to <2 x i64>			%cvt = fptosi <2 x double> %a to <2 x i64>
	ret <2 x i64> %cvt			ret <2 x i64> %cvt
	}			}

	define <4 x i32> @fptosi_2f64_to_4i32(<2 x double> %a) {			define <4 x i32> @fptosi_2f64_to_4i32(<2 x double> %a) {
	; SSE-LABEL: fptosi_2f64_to_4i32:			; SSE-LABEL: fptosi_2f64_to_4i32:
	; SSE: # BB#0:			; SSE: # BB#0:
	; SSE-NEXT: cvttsd2si %xmm0, %rax			; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
	; SSE-NEXT: movd %rax, %xmm1
	; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
	; SSE-NEXT: cvttsd2si %xmm0, %rax
	; SSE-NEXT: movd %rax, %xmm0
	; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
	; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,2]
	; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
	; SSE-NEXT: retq			; SSE-NEXT: retq
	;			;
	; AVX-LABEL: fptosi_2f64_to_4i32:			; AVX-LABEL: fptosi_2f64_to_4i32:
	; AVX: # BB#0:			; AVX: # BB#0:
	; AVX-NEXT: vcvttsd2si %xmm0, %rax			; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
	; AVX-NEXT: vmovq %rax, %xmm1
	; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
	; AVX-NEXT: vcvttsd2si %xmm0, %rax
	; AVX-NEXT: vmovq %rax, %xmm0
	; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
	; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
	; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
	; AVX-NEXT: retq			; AVX-NEXT: retq
	;			;
	; AVX512F-LABEL: fptosi_2f64_to_4i32:			; AVX512-LABEL: fptosi_2f64_to_4i32:
	; AVX512F: # BB#0:			; AVX512: # BB#0:
	; AVX512F-NEXT: vcvttsd2si %xmm0, %rax			; AVX512-NEXT: vcvttpd2dq %xmm0, %xmm0
	; AVX512F-NEXT: vmovq %rax, %xmm1			; AVX512-NEXT: retq
	; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
	; AVX512F-NEXT: vcvttsd2si %xmm0, %rax
	; AVX512F-NEXT: vmovq %rax, %xmm0
	; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
	; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
	; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
	; AVX512F-NEXT: retq
	;
	; AVX512DQ-LABEL: fptosi_2f64_to_4i32:
	; AVX512DQ: # BB#0:
	; AVX512DQ-NEXT: vcvttpd2qq %xmm0, %xmm0
	; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
	; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
	; AVX512DQ-NEXT: retq
	%cvt = fptosi <2 x double> %a to <2 x i32>			%cvt = fptosi <2 x double> %a to <2 x i32>
	%ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>			%ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
	ret <4 x i32> %ext			ret <4 x i32> %ext
	}			}

	define <2 x i32> @fptosi_2f64_to_2i32(<2 x double> %a) {			define <2 x i32> @fptosi_2f64_to_2i32(<2 x double> %a) {
	; SSE-LABEL: fptosi_2f64_to_2i32:			; SSE-LABEL: fptosi_2f64_to_2i32:
	; SSE: # BB#0:			; SSE: # BB#0:
	; SSE-NEXT: cvttsd2si %xmm0, %rax			; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
	; SSE-NEXT: movd %rax, %xmm1			; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
	; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
	; SSE-NEXT: cvttsd2si %xmm0, %rax
	; SSE-NEXT: movd %rax, %xmm0
	; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
	; SSE-NEXT: movdqa %xmm1, %xmm0
	; SSE-NEXT: retq			; SSE-NEXT: retq
	;			;
	; AVX-LABEL: fptosi_2f64_to_2i32:			; AVX-LABEL: fptosi_2f64_to_2i32:
	; AVX: # BB#0:			; AVX: # BB#0:
	; AVX-NEXT: vcvttsd2si %xmm0, %rax			; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
	; AVX-NEXT: vmovq %rax, %xmm1			; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
	; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
	; AVX-NEXT: vcvttsd2si %xmm0, %rax
	; AVX-NEXT: vmovq %rax, %xmm0
	; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
	; AVX-NEXT: retq			; AVX-NEXT: retq
	;			;
	; AVX512F-LABEL: fptosi_2f64_to_2i32:			; AVX512-LABEL: fptosi_2f64_to_2i32:
	; AVX512F: # BB#0:			; AVX512: # BB#0:
	; AVX512F-NEXT: vcvttsd2si %xmm0, %rax			; AVX512-NEXT: vcvttpd2dq %xmm0, %xmm0
	; AVX512F-NEXT: vmovq %rax, %xmm1			; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
	; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]			; AVX512-NEXT: retq
	; AVX512F-NEXT: vcvttsd2si %xmm0, %rax
	; AVX512F-NEXT: vmovq %rax, %xmm0
	; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
	; AVX512F-NEXT: retq
	;
	; AVX512DQ-LABEL: fptosi_2f64_to_2i32:
	; AVX512DQ: # BB#0:
	; AVX512DQ-NEXT: vcvttpd2qq %xmm0, %xmm0
	; AVX512DQ-NEXT: retq
	%cvt = fptosi <2 x double> %a to <2 x i32>			%cvt = fptosi <2 x double> %a to <2 x i32>
	ret <2 x i32> %cvt			ret <2 x i32> %cvt
	}			}

	define <4 x i32> @fptosi_4f64_to_2i32(<2 x double> %a) {			define <4 x i32> @fptosi_4f64_to_2i32(<2 x double> %a) {
	; SSE-LABEL: fptosi_4f64_to_2i32:			; SSE-LABEL: fptosi_4f64_to_2i32:
	; SSE: # BB#0:			; SSE: # BB#0:
	; SSE-NEXT: cvttsd2si %xmm0, %rax			; SSE-NEXT: cvttpd2dq %xmm0, %xmm1
	; SSE-NEXT: movd %rax, %xmm1			; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
				craig.topperUnsubmitted Not Done Reply Inline Actions Why does this test case end up with 2 cvttpd2dq instructions that need to be unpacked? craig.topper: Why does this test case end up with 2 cvttpd2dq instructions that need to be unpacked?
				RKSimonAuthorUnsubmitted Not Done Reply Inline Actions Its demonstrating that we're not propagating the undef nature of the upper <2 x double> from the shuffle in the test. On SSE it results in an unnecessary extra cvttpd2dq and on AVX it results in a vcvttpd2dqy instead of just vcvttpd2dq. Makes a difference on 128-bit ALU Jaguar but is a separate fix from this patch. RKSimon: Its demonstrating that we're not propagating the undef nature of the upper <2 x double> from…
	; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
	; SSE-NEXT: cvttsd2si %xmm0, %rax
	; SSE-NEXT: movd %rax, %xmm0
	; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
	; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
	; SSE-NEXT: cvttsd2si %xmm0, %rax
	; SSE-NEXT: movd %rax, %xmm1
	; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
	; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
	; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]			; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
	; SSE-NEXT: retq			; SSE-NEXT: retq
	;			;
	; AVX-LABEL: fptosi_4f64_to_2i32:			; AVX-LABEL: fptosi_4f64_to_2i32:
	; AVX: # BB#0:			; AVX: # BB#0:
	; AVX-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>			; AVX-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
	; AVX-NEXT: vcvttpd2dqy %ymm0, %xmm0			; AVX-NEXT: vcvttpd2dqy %ymm0, %xmm0
	; AVX-NEXT: vzeroupper			; AVX-NEXT: vzeroupper
	▲ Show 20 Lines • Show All 88 Lines • ▼ Show 20 Lines
	; AVX512DQ-NEXT: retq			; AVX512DQ-NEXT: retq
	%cvt = fptosi <4 x double> %a to <4 x i64>			%cvt = fptosi <4 x double> %a to <4 x i64>
	ret <4 x i64> %cvt			ret <4 x i64> %cvt
	}			}

	define <4 x i32> @fptosi_4f64_to_4i32(<4 x double> %a) {			define <4 x i32> @fptosi_4f64_to_4i32(<4 x double> %a) {
	; SSE-LABEL: fptosi_4f64_to_4i32:			; SSE-LABEL: fptosi_4f64_to_4i32:
	; SSE: # BB#0:			; SSE: # BB#0:
	; SSE-NEXT: cvttsd2si %xmm1, %rax			; SSE-NEXT: cvttpd2dq %xmm1, %xmm1
	; SSE-NEXT: movd %rax, %xmm2			; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
	; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
	; SSE-NEXT: cvttsd2si %xmm1, %rax
	; SSE-NEXT: movd %rax, %xmm1
	; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
	; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
	; SSE-NEXT: cvttsd2si %xmm0, %rax
	; SSE-NEXT: movd %rax, %xmm2
	; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
	; SSE-NEXT: cvttsd2si %xmm0, %rax
	; SSE-NEXT: movd %rax, %xmm0
	; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
	; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
	; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]			; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
	; SSE-NEXT: retq			; SSE-NEXT: retq
	;			;
	; AVX-LABEL: fptosi_4f64_to_4i32:			; AVX-LABEL: fptosi_4f64_to_4i32:
	; AVX: # BB#0:			; AVX: # BB#0:
	; AVX-NEXT: vcvttpd2dqy %ymm0, %xmm0			; AVX-NEXT: vcvttpd2dqy %ymm0, %xmm0
	; AVX-NEXT: vzeroupper			; AVX-NEXT: vzeroupper
	; AVX-NEXT: retq			; AVX-NEXT: retq
	▲ Show 20 Lines • Show All 424 Lines • ▼ Show 20 Lines

	;			;
	; Float to Signed Integer			; Float to Signed Integer
	;			;

	define <2 x i32> @fptosi_2f32_to_2i32(<2 x float> %a) {			define <2 x i32> @fptosi_2f32_to_2i32(<2 x float> %a) {
	; SSE-LABEL: fptosi_2f32_to_2i32:			; SSE-LABEL: fptosi_2f32_to_2i32:
	; SSE: # BB#0:			; SSE: # BB#0:
	; SSE-NEXT: cvttss2si %xmm0, %rax			; SSE-NEXT: cvttps2dq %xmm0, %xmm0
	; SSE-NEXT: movd %rax, %xmm1			; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
	; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
	; SSE-NEXT: cvttss2si %xmm0, %rax
	; SSE-NEXT: movd %rax, %xmm0
	; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
	; SSE-NEXT: movdqa %xmm1, %xmm0
	; SSE-NEXT: retq			; SSE-NEXT: retq
	;			;
	; AVX-LABEL: fptosi_2f32_to_2i32:			; AVX-LABEL: fptosi_2f32_to_2i32:
	; AVX: # BB#0:			; AVX: # BB#0:
	; AVX-NEXT: vcvttss2si %xmm0, %rax			; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
	; AVX-NEXT: vmovq %rax, %xmm1			; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
	; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
	; AVX-NEXT: vcvttss2si %xmm0, %rax
	; AVX-NEXT: vmovq %rax, %xmm0
	; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
	; AVX-NEXT: retq			; AVX-NEXT: retq
	;			;
	; AVX512-LABEL: fptosi_2f32_to_2i32:			; AVX512-LABEL: fptosi_2f32_to_2i32:
	; AVX512: # BB#0:			; AVX512: # BB#0:
	; AVX512-NEXT: vcvttss2si %xmm0, %rax			; AVX512-NEXT: vcvttps2dq %xmm0, %xmm0
	; AVX512-NEXT: vmovq %rax, %xmm1			; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
	; AVX512-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
	; AVX512-NEXT: vcvttss2si %xmm0, %rax
	; AVX512-NEXT: vmovq %rax, %xmm0
	; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
	; AVX512-NEXT: retq			; AVX512-NEXT: retq
	%cvt = fptosi <2 x float> %a to <2 x i32>			%cvt = fptosi <2 x float> %a to <2 x i32>
	ret <2 x i32> %cvt			ret <2 x i32> %cvt
	}			}

	define <4 x i32> @fptosi_4f32_to_4i32(<4 x float> %a) {			define <4 x i32> @fptosi_4f32_to_4i32(<4 x float> %a) {
	; SSE-LABEL: fptosi_4f32_to_4i32:			; SSE-LABEL: fptosi_4f32_to_4i32:
	; SSE: # BB#0:			; SSE: # BB#0:
	▲ Show 20 Lines • Show All 1,356 Lines • Show Last 20 Lines

test/Transforms/SLPVectorizer/X86/fptosi.ll

Show First 20 Lines • Show All 102 Lines • ▼ Show 20 Lines	;
store i64 %cvt5, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 5), align 8		store i64 %cvt5, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 5), align 8
store i64 %cvt6, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 6), align 8		store i64 %cvt6, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 6), align 8
store i64 %cvt7, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 7), align 8		store i64 %cvt7, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 7), align 8
ret void		ret void
}		}

define void @fptosi_8f64_8i32() #0 {		define void @fptosi_8f64_8i32() #0 {
; SSE-LABEL: @fptosi_8f64_8i32(		; SSE-LABEL: @fptosi_8f64_8i32(
; SSE-NEXT: [[A0:%.]] = load double, double getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8		; SSE-NEXT: [[TMP1:%.]] = load <4 x double>, <4 x double> bitcast ([8 x double]* @src64 to <4 x double>*), align 8
; SSE-NEXT: [[A1:%.]] = load double, double getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8		; SSE-NEXT: [[TMP2:%.]] = load <4 x double>, <4 x double> bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <4 x double>*), align 8
; SSE-NEXT: [[A2:%.]] = load double, double getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8		; SSE-NEXT: [[TMP3:%.*]] = fptosi <4 x double> [[TMP1]] to <4 x i32>
; SSE-NEXT: [[A3:%.]] = load double, double getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8		; SSE-NEXT: [[TMP4:%.*]] = fptosi <4 x double> [[TMP2]] to <4 x i32>
; SSE-NEXT: [[A4:%.]] = load double, double getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8		; SSE-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* bitcast ([16 x i32]* @dst32 to <4 x i32>*), align 4
; SSE-NEXT: [[A5:%.]] = load double, double getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8		; SSE-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 4) to <4 x i32>*), align 4
; SSE-NEXT: [[A6:%.]] = load double, double getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
; SSE-NEXT: [[A7:%.]] = load double, double getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
; SSE-NEXT: [[CVT0:%.*]] = fptosi double [[A0]] to i32
; SSE-NEXT: [[CVT1:%.*]] = fptosi double [[A1]] to i32
; SSE-NEXT: [[CVT2:%.*]] = fptosi double [[A2]] to i32
; SSE-NEXT: [[CVT3:%.*]] = fptosi double [[A3]] to i32
; SSE-NEXT: [[CVT4:%.*]] = fptosi double [[A4]] to i32
; SSE-NEXT: [[CVT5:%.*]] = fptosi double [[A5]] to i32
; SSE-NEXT: [[CVT6:%.*]] = fptosi double [[A6]] to i32
; SSE-NEXT: [[CVT7:%.*]] = fptosi double [[A7]] to i32
; SSE-NEXT: store i32 [[CVT0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 0), align 4
; SSE-NEXT: store i32 [[CVT1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 1), align 4
; SSE-NEXT: store i32 [[CVT2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 2), align 4
; SSE-NEXT: store i32 [[CVT3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 3), align 4
; SSE-NEXT: store i32 [[CVT4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 4), align 4
; SSE-NEXT: store i32 [[CVT5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 5), align 4
; SSE-NEXT: store i32 [[CVT6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 6), align 4
; SSE-NEXT: store i32 [[CVT7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 7), align 4
; SSE-NEXT: ret void		; SSE-NEXT: ret void
;		;
; AVX-LABEL: @fptosi_8f64_8i32(		; AVX-LABEL: @fptosi_8f64_8i32(
; AVX-NEXT: [[TMP1:%.]] = load <8 x double>, <8 x double> bitcast ([8 x double]* @src64 to <8 x double>*), align 8		; AVX-NEXT: [[TMP1:%.]] = load <8 x double>, <8 x double> bitcast ([8 x double]* @src64 to <8 x double>*), align 8
; AVX-NEXT: [[TMP2:%.*]] = fptosi <8 x double> [[TMP1]] to <8 x i32>		; AVX-NEXT: [[TMP2:%.*]] = fptosi <8 x double> [[TMP1]] to <8 x i32>
; AVX-NEXT: store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([16 x i32]* @dst32 to <8 x i32>*), align 4		; AVX-NEXT: store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([16 x i32]* @dst32 to <8 x i32>*), align 4
; AVX-NEXT: ret void		; AVX-NEXT: ret void
;		;
▲ Show 20 Lines • Show All 368 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[X86][SSE] Add lowering to cvttpd2dq/cvttps2dq for sitofp v2f64/2f32 to 2i32
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 74799

lib/Target/X86/X86ISelLowering.h

lib/Target/X86/X86ISelLowering.cpp

lib/Target/X86/X86InstrAVX512.td

lib/Target/X86/X86InstrFragmentsSIMD.td

lib/Target/X86/X86InstrSSE.td

lib/Target/X86/X86TargetTransformInfo.cpp

test/Analysis/CostModel/X86/fptosi.ll

test/CodeGen/X86/vec_fp_to_int.ll

test/Transforms/SLPVectorizer/X86/fptosi.ll

This is an archive of the discontinued LLVM Phabricator instance.

[X86][SSE] Add lowering to cvttpd2dq/cvttps2dq for sitofp v2f64/2f32 to 2i32ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 74799

lib/Target/X86/X86ISelLowering.h

lib/Target/X86/X86ISelLowering.cpp

lib/Target/X86/X86InstrAVX512.td

lib/Target/X86/X86InstrFragmentsSIMD.td

lib/Target/X86/X86InstrSSE.td

lib/Target/X86/X86TargetTransformInfo.cpp

test/Analysis/CostModel/X86/fptosi.ll

test/CodeGen/X86/vec_fp_to_int.ll

test/Transforms/SLPVectorizer/X86/fptosi.ll

[X86][SSE] Add lowering to cvttpd2dq/cvttps2dq for sitofp v2f64/2f32 to 2i32
ClosedPublic