This is an archive of the discontinued LLVM Phabricator instance.

[X86] Lower AVX512 and SSE intrinsics for CVTTPD2DQ to X86ISD::CVTTPD2DQ.
ClosedPublic

Authored by craig.topper on Nov 6 2016, 12:47 AM.

Download Raw Diff

Details

Reviewers

RKSimon
zvi
delena

Commits

rG731bf9c5d617: [X86] Lower AVX512 and SSE intrinsics for CVTTPD2DQ to X86ISD::CVTTPD2DQ.
rL286344: [X86] Lower AVX512 and SSE intrinsics for CVTTPD2DQ to X86ISD::CVTTPD2DQ.

Summary

This allows the SSE intrinsic to use the EVEX instruction when available. It also fixes EVEX to not use a weird (v4i32 (fp_to_sint v2f64)) node and it merges some isel patterns.

Diff Detail

Repository: rL LLVM

Event Timeline

craig.topper updated this revision to Diff 76979.Nov 6 2016, 12:47 AM

craig.topper retitled this revision from to [X86] Lower AVX512 and SSE intrinsics for CVTTPD2DQ to X86ISD::CVTTPD2DQ..

craig.topper updated this object.

craig.topper added reviewers: RKSimon, delena, zvi.

craig.topper added a subscriber: llvm-commits.

craig.topper added a child revision: D26331: [AVX-512] Add lowering to cvttpd2udq/cvttps2udq for fptoui v2f64/2f32 to 2i32.Nov 6 2016, 12:53 AM

Any chance that you could add support for upper-64 bits zeroing to remove the (v)movq instructions in the test_mm_cvttpd_epi32_zext tests please?

Turns out those movqs were caused by the vzmovl patterns being rooted with a bitcast that didn't exist for the return values being 2xi64. To fix that remove the bitcasts, but then we hit priority problems with vzmovl patterns due to lots of AddedComplexity nodes on other patterns starting with vzmovl including the most vanilla vzmovl pattern. So threw in another AddedComplexity on these patterns to fix that. I'll try to scrub the vzmovl pattern mess sometime and see if we can't get these AddedComplexity's out.

I also added in a vzmovl cvtpd2ps pattern I noticed was missing

LGTM

This revision is now accepted and ready to land.Nov 8 2016, 8:35 AM

Closed by commit rL286344: [X86] Lower AVX512 and SSE intrinsics for CVTTPD2DQ to X86ISD::CVTTPD2DQ. (authored by ctopper). · Explain WhyNov 8 2016, 11:41 PM

This revision was automatically updated to reflect the committed changes.

Revision Contents

Path

Size

llvm/

trunk/

lib/

Target/

X86/

X86InstrAVX512.td

27 lines

X86InstrSSE.td

34 lines

X86IntrinsicsInfo.h

3 lines

test/

CodeGen/

X86/

avx-intrinsics-x86.ll

13 lines

sse2-intrinsics-x86.ll

23 lines

Diff 77311

llvm/trunk/lib/Target/X86/X86InstrAVX512.td

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 6,071 Lines • ▼ Show 20 Lines	defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps">,
VEX_W, PD, EVEX_CD8<64, CD8VF>;		VEX_W, PD, EVEX_CD8<64, CD8VF>;
defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd">,		defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd">,
PS, EVEX_CD8<32, CD8VH>;		PS, EVEX_CD8<32, CD8VH>;

def : Pat<(v8f64 (extloadv8f32 addr:$src)),		def : Pat<(v8f64 (extloadv8f32 addr:$src)),
(VCVTPS2PDZrm addr:$src)>;		(VCVTPS2PDZrm addr:$src)>;

let Predicates = [HasVLX] in {		let Predicates = [HasVLX] in {
		let AddedComplexity = 15 in
		def : Pat<(X86vzmovl (v2f64 (bitconvert
		(v4f32 (X86vfpround (v2f64 VR128X:$src)))))),
		(VCVTPD2PSZ128rr VR128X:$src)>;
def : Pat<(v2f64 (extloadv2f32 addr:$src)),		def : Pat<(v2f64 (extloadv2f32 addr:$src)),
(VCVTPS2PDZ128rm addr:$src)>;		(VCVTPS2PDZ128rm addr:$src)>;
def : Pat<(v4f64 (extloadv4f32 addr:$src)),		def : Pat<(v4f64 (extloadv4f32 addr:$src)),
(VCVTPS2PDZ256rm addr:$src)>;		(VCVTPS2PDZ256rm addr:$src)>;
}		}

// Convert Signed/Unsigned Doubleword to Double		// Convert Signed/Unsigned Doubleword to Double
multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,		multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
▲ Show 20 Lines • Show All 55 Lines • ▼ Show 20 Lines	let Predicates = [HasVLX] in {
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode>,		defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode>,
EVEX_V128;		EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode>,		defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode>,
EVEX_V256;		EVEX_V256;
}		}
}		}

// Convert Double to Signed/Unsigned Doubleword with truncation		// Convert Double to Signed/Unsigned Doubleword with truncation
multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr,		multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
SDNode OpNode, SDNode OpNodeRnd> {		SDNode OpNode128, SDNode OpNodeRnd> {
let Predicates = [HasAVX512] in {		let Predicates = [HasAVX512] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode>,		defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode>,
avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,		avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
OpNodeRnd>, EVEX_V512;		OpNodeRnd>, EVEX_V512;
}		}
let Predicates = [HasVLX] in {		let Predicates = [HasVLX] in {
// we need "x"/"y" suffixes in order to distinguish between 128 and 256		// we need "x"/"y" suffixes in order to distinguish between 128 and 256
// memory forms of these instructions in Asm Parcer. They have the same		// memory forms of these instructions in Asm Parser. They have the same
// dest type - 'v4i32x_info'. We also specify the broadcast string explicitly		// dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
// due to the same reason.		// due to the same reason.
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, OpNode,		defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
"{1to2}", "{x}">, EVEX_V128;		OpNode128, "{1to2}", "{x}">, EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,		defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
"{1to4}", "{y}">, EVEX_V256;		"{1to4}", "{y}">, EVEX_V256;
}		}
}		}

// Convert Double to Signed/Unsigned Doubleword		// Convert Double to Signed/Unsigned Doubleword
multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr,		multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr,
SDNode OpNode, SDNode OpNodeRnd> {		SDNode OpNode, SDNode OpNodeRnd> {
▲ Show 20 Lines • Show All 124 Lines • ▼ Show 20 Lines
defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp,		defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp,
X86VSintToFpRnd>,		X86VSintToFpRnd>,
PS, EVEX_CD8<32, CD8VF>;		PS, EVEX_CD8<32, CD8VF>;

defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", fp_to_sint,		defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", fp_to_sint,
X86cvttp2siRnd>,		X86cvttp2siRnd>,
XS, EVEX_CD8<32, CD8VF>;		XS, EVEX_CD8<32, CD8VF>;

defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", fp_to_sint,		defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", fp_to_sint, X86cvttpd2dq,
X86cvttp2siRnd>,		X86cvttp2siRnd>,
PD, VEX_W, EVEX_CD8<64, CD8VF>;		PD, VEX_W, EVEX_CD8<64, CD8VF>;

defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", fp_to_uint,		defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", fp_to_uint,
X86cvttp2uiRnd>, PS,		X86cvttp2uiRnd>, PS,
EVEX_CD8<32, CD8VF>;		EVEX_CD8<32, CD8VF>;

defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", fp_to_uint,		defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", fp_to_uint, fp_to_uint,
X86cvttp2uiRnd>, PS, VEX_W,		X86cvttp2uiRnd>, PS, VEX_W,
EVEX_CD8<64, CD8VF>;		EVEX_CD8<64, CD8VF>;

defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp, X86cvtudq2pd>,		defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp, X86cvtudq2pd>,
XS, EVEX_CD8<32, CD8VH>;		XS, EVEX_CD8<32, CD8VH>;

defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp,		defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp,
X86VUintToFpRnd>, XD,		X86VUintToFpRnd>, XD,
▲ Show 20 Lines • Show All 81 Lines • ▼ Show 20 Lines

def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),		def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
(EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr		(EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
(v8i32 (INSERT_SUBREG (IMPLICIT_DEF),		(v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
VR128X:$src1, sub_xmm)))), sub_ymm)>;		VR128X:$src1, sub_xmm)))), sub_ymm)>;
}		}

let Predicates = [HasAVX512, HasVLX] in {		let Predicates = [HasAVX512, HasVLX] in {
def : Pat<(v4i32 (bitconvert (X86vzmovl (v2i64 (bitconvert		let AddedComplexity = 15 in
(v4i32 (X86cvttpd2dq (v2f64 VR128X:$src)))))))),		def : Pat<(X86vzmovl (v2i64 (bitconvert
		(v4i32 (X86cvttpd2dq (v2f64 VR128X:$src)))))),
(VCVTTPD2DQZ128rr VR128:$src)>;		(VCVTTPD2DQZ128rr VR128:$src)>;
def : Pat<(v4i32 (X86cvttpd2dq (v2f64 VR128X:$src))),
(VCVTTPD2DQZ128rr VR128X:$src)>;
def : Pat<(v4i32 (X86cvttpd2dq (loadv2f64 addr:$src))),
(VCVTTPD2DQZ128rm addr:$src)>;
}		}

let Predicates = [HasAVX512] in {		let Predicates = [HasAVX512] in {
def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))),		def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))),
(VCVTPD2PSZrm addr:$src)>;		(VCVTPD2PSZrm addr:$src)>;
def : Pat<(v8f64 (extloadv8f32 addr:$src)),		def : Pat<(v8f64 (extloadv8f32 addr:$src)),
(VCVTPS2PDZrm addr:$src)>;		(VCVTPS2PDZrm addr:$src)>;
}		}
▲ Show 20 Lines • Show All 2,485 Lines • Show Last 20 Lines

llvm/trunk/lib/Target/X86/X86InstrSSE.td

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 2,059 Lines • ▼ Show 20 Lines	def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
(CVTDQ2PSrm addr:$src)>;		(CVTDQ2PSrm addr:$src)>;

def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),		def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
(CVTTPS2DQrr VR128:$src)>;		(CVTTPS2DQrr VR128:$src)>;
def : Pat<(v4i32 (fp_to_sint (memopv4f32 addr:$src))),		def : Pat<(v4i32 (fp_to_sint (memopv4f32 addr:$src))),
(CVTTPS2DQrm addr:$src)>;		(CVTTPS2DQrm addr:$src)>;
}		}

		let Predicates = [HasAVX, NoVLX] in
def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),		def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttpd2dq\t{$src, $dst\|$dst, $src}",		"cvttpd2dq\t{$src, $dst\|$dst, $src}",
[(set VR128:$dst,		[(set VR128:$dst,
(int_x86_sse2_cvttpd2dq VR128:$src))],		(v4i32 (X86cvttpd2dq (v2f64 VR128:$src))))],
IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2I]>;		IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2I]>;

// The assembler can recognize rr 256-bit instructions by seeing a ymm		// The assembler can recognize rr 256-bit instructions by seeing a ymm
// register, but the same isn't true when using memory operands instead.		// register, but the same isn't true when using memory operands instead.
// Provide other assembly rr and rm forms to address this explicitly.		// Provide other assembly rr and rm forms to address this explicitly.

// XMM only		// XMM only
def : InstAlias<"vcvttpd2dqx\t{$src, $dst\|$dst, $src}",		def : InstAlias<"vcvttpd2dqx\t{$src, $dst\|$dst, $src}",
(VCVTTPD2DQrr VR128:$dst, VR128:$src), 0>;		(VCVTTPD2DQrr VR128:$dst, VR128:$src), 0>;
		let Predicates = [HasAVX, NoVLX] in
def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),		def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttpd2dqx\t{$src, $dst\|$dst, $src}",		"cvttpd2dqx\t{$src, $dst\|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq		[(set VR128:$dst,
(loadv2f64 addr:$src)))],		(v4i32 (X86cvttpd2dq (loadv2f64 addr:$src))))],
IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2ILd]>;		IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2ILd]>;

// YMM only		// YMM only
def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),		def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"cvttpd2dq{y}\t{$src, $dst\|$dst, $src}",		"cvttpd2dq{y}\t{$src, $dst\|$dst, $src}",
[(set VR128:$dst,		[(set VR128:$dst,
(int_x86_avx_cvtt_pd2dq_256 VR256:$src))],		(int_x86_avx_cvtt_pd2dq_256 VR256:$src))],
IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;		IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),		def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"cvttpd2dq{y}\t{$src, $dst\|$dst, $src}",		"cvttpd2dq{y}\t{$src, $dst\|$dst, $src}",
[(set VR128:$dst,		[(set VR128:$dst,
(int_x86_avx_cvtt_pd2dq_256 (loadv4f64 addr:$src)))],		(int_x86_avx_cvtt_pd2dq_256 (loadv4f64 addr:$src)))],
IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>;		IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
def : InstAlias<"vcvttpd2dq\t{$src, $dst\|$dst, $src}",		def : InstAlias<"vcvttpd2dq\t{$src, $dst\|$dst, $src}",
(VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0>;		(VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0>;

let Predicates = [HasAVX, NoVLX] in {		let Predicates = [HasAVX, NoVLX] in {
def : Pat<(v4i32 (bitconvert (X86vzmovl (v2i64 (bitconvert		let AddedComplexity = 15 in
(v4i32 (X86cvttpd2dq (v2f64 VR128:$src)))))))),		def : Pat<(X86vzmovl (v2i64 (bitconvert
(VCVTTPD2DQrr VR128:$src)>;		(v4i32 (X86cvttpd2dq (v2f64 VR128:$src)))))),
def : Pat<(v4i32 (X86cvttpd2dq (v2f64 VR128:$src))),
(VCVTTPD2DQrr VR128:$src)>;		(VCVTTPD2DQrr VR128:$src)>;
def : Pat<(v4i32 (X86cvttpd2dq (loadv2f64 addr:$src))),
(VCVTTPD2DQXrm addr:$src)>;

def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))),		def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))),
(VCVTTPD2DQYrr VR256:$src)>;		(VCVTTPD2DQYrr VR256:$src)>;
def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))),		def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))),
(VCVTTPD2DQYrm addr:$src)>;		(VCVTTPD2DQYrm addr:$src)>;
} // Predicates = [HasAVX]		} // Predicates = [HasAVX]

def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),		def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttpd2dq\t{$src, $dst\|$dst, $src}",		"cvttpd2dq\t{$src, $dst\|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))],		[(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))],
IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>;		IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>;
def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),		def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
"cvttpd2dq\t{$src, $dst\|$dst, $src}",		"cvttpd2dq\t{$src, $dst\|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq		[(set VR128:$dst, (int_x86_sse2_cvttpd2dq
(memopv2f64 addr:$src)))],		(memopv2f64 addr:$src)))],
IIC_SSE_CVT_PD_RM>,		IIC_SSE_CVT_PD_RM>,
Sched<[WriteCvtF2ILd]>;		Sched<[WriteCvtF2ILd]>;

let Predicates = [UseSSE2] in {		let Predicates = [UseSSE2] in {
def : Pat<(v4i32 (bitconvert (X86vzmovl (v2i64 (bitconvert		let AddedComplexity = 15 in
(v4i32 (X86cvttpd2dq (v2f64 VR128:$src)))))))),		def : Pat<(X86vzmovl (v2i64 (bitconvert
		(v4i32 (X86cvttpd2dq (v2f64 VR128:$src)))))),
(CVTTPD2DQrr VR128:$src)>;		(CVTTPD2DQrr VR128:$src)>;
def : Pat<(v4i32 (X86cvttpd2dq (v2f64 VR128:$src))),		def : Pat<(v4i32 (X86cvttpd2dq (v2f64 VR128:$src))),
(CVTTPD2DQrr VR128:$src)>;		(CVTTPD2DQrr VR128:$src)>;
def : Pat<(v4i32 (X86cvttpd2dq (memopv2f64 addr:$src))),		def : Pat<(v4i32 (X86cvttpd2dq (memopv2f64 addr:$src))),
(CVTTPD2DQrm addr:$src)>;		(CVTTPD2DQrm addr:$src)>;
} // Predicates = [UseSSE2]		} // Predicates = [UseSSE2]

// Convert packed single to packed double		// Convert packed single to packed double
▲ Show 20 Lines • Show All 111 Lines • ▼ Show 20 Lines	def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
[], IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2FLd]>;		[], IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2FLd]>;

// AVX 256-bit register conversion intrinsics		// AVX 256-bit register conversion intrinsics
// FIXME: Migrate SSE conversion intrinsics matching to use patterns as below		// FIXME: Migrate SSE conversion intrinsics matching to use patterns as below
// whenever possible to avoid declaring two versions of each one.		// whenever possible to avoid declaring two versions of each one.

let Predicates = [HasAVX, NoVLX] in {		let Predicates = [HasAVX, NoVLX] in {
// Match fpround and fpextend for 128/256-bit conversions		// Match fpround and fpextend for 128/256-bit conversions
def : Pat<(v4f32 (bitconvert (X86vzmovl (v2f64 (bitconvert		let AddedComplexity = 15 in
(v4f32 (X86vfpround (v2f64 VR128:$src)))))))),		def : Pat<(X86vzmovl (v2f64 (bitconvert
		(v4f32 (X86vfpround (v2f64 VR128:$src)))))),
(VCVTPD2PSrr VR128:$src)>;		(VCVTPD2PSrr VR128:$src)>;
def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))),		def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))),
(VCVTPD2PSrr VR128:$src)>;		(VCVTPD2PSrr VR128:$src)>;
def : Pat<(v4f32 (X86vfpround (loadv2f64 addr:$src))),		def : Pat<(v4f32 (X86vfpround (loadv2f64 addr:$src))),
(VCVTPD2PSXrm addr:$src)>;		(VCVTPD2PSXrm addr:$src)>;

def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))),		def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))),
(VCVTPS2PDrr VR128:$src)>;		(VCVTPS2PDrr VR128:$src)>;
def : Pat<(v4f64 (fpextend (v4f32 VR128:$src))),		def : Pat<(v4f64 (fpextend (v4f32 VR128:$src))),
(VCVTPS2PDYrr VR128:$src)>;		(VCVTPS2PDYrr VR128:$src)>;
def : Pat<(v4f64 (extloadv4f32 addr:$src)),		def : Pat<(v4f64 (extloadv4f32 addr:$src)),
(VCVTPS2PDYrm addr:$src)>;		(VCVTPS2PDYrm addr:$src)>;
}		}

let Predicates = [UseSSE2] in {		let Predicates = [UseSSE2] in {
// Match fpround and fpextend for 128 conversions		// Match fpround and fpextend for 128 conversions
def : Pat<(v4f32 (bitconvert (X86vzmovl (v2f64 (bitconvert		let AddedComplexity = 15 in
(v4f32 (X86vfpround (v2f64 VR128:$src)))))))),		def : Pat<(X86vzmovl (v2f64 (bitconvert
		(v4f32 (X86vfpround (v2f64 VR128:$src)))))),
(CVTPD2PSrr VR128:$src)>;		(CVTPD2PSrr VR128:$src)>;
def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))),		def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))),
(CVTPD2PSrr VR128:$src)>;		(CVTPD2PSrr VR128:$src)>;
def : Pat<(v4f32 (X86vfpround (memopv2f64 addr:$src))),		def : Pat<(v4f32 (X86vfpround (memopv2f64 addr:$src))),
(CVTPD2PSrm addr:$src)>;		(CVTPD2PSrm addr:$src)>;

def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))),		def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))),
(CVTPS2PDrr VR128:$src)>;		(CVTPS2PDrr VR128:$src)>;
▲ Show 20 Lines • Show All 6,515 Lines • Show Last 20 Lines

llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h

Show First 20 Lines • Show All 568 Lines • ▼ Show 20 Lines	X86_INTRINSIC_DATA(avx512_mask_cvtqq2ps_256, INTR_TYPE_1OP_MASK,
ISD::SINT_TO_FP, 0),		ISD::SINT_TO_FP, 0),
X86_INTRINSIC_DATA(avx512_mask_cvtqq2ps_512, INTR_TYPE_1OP_MASK,		X86_INTRINSIC_DATA(avx512_mask_cvtqq2ps_512, INTR_TYPE_1OP_MASK,
ISD::SINT_TO_FP, X86ISD::SINT_TO_FP_RND),		ISD::SINT_TO_FP, X86ISD::SINT_TO_FP_RND),
X86_INTRINSIC_DATA(avx512_mask_cvtsd2ss_round, INTR_TYPE_SCALAR_MASK_RM,		X86_INTRINSIC_DATA(avx512_mask_cvtsd2ss_round, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::VFPROUNDS_RND, 0),		X86ISD::VFPROUNDS_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_cvtss2sd_round, INTR_TYPE_SCALAR_MASK_RM,		X86_INTRINSIC_DATA(avx512_mask_cvtss2sd_round, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::VFPEXTS_RND, 0),		X86ISD::VFPEXTS_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_128, INTR_TYPE_1OP_MASK,		X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_128, INTR_TYPE_1OP_MASK,
ISD::FP_TO_SINT, 0),		X86ISD::CVTTPD2DQ, 0),
X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_256, INTR_TYPE_1OP_MASK,		X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_256, INTR_TYPE_1OP_MASK,
ISD::FP_TO_SINT, 0),		ISD::FP_TO_SINT, 0),
X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_512, INTR_TYPE_1OP_MASK,		X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_512, INTR_TYPE_1OP_MASK,
ISD::FP_TO_SINT, X86ISD::CVTTP2SI_RND),		ISD::FP_TO_SINT, X86ISD::CVTTP2SI_RND),
X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_128, INTR_TYPE_1OP_MASK,		X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_128, INTR_TYPE_1OP_MASK,
ISD::FP_TO_SINT, 0),		ISD::FP_TO_SINT, 0),
X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_256, INTR_TYPE_1OP_MASK,		X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_256, INTR_TYPE_1OP_MASK,
ISD::FP_TO_SINT, 0),		ISD::FP_TO_SINT, 0),
▲ Show 20 Lines • Show All 1,045 Lines • ▼ Show 20 Lines
X86_INTRINSIC_DATA(sse2_comieq_sd, COMI, X86ISD::COMI, ISD::SETEQ),		X86_INTRINSIC_DATA(sse2_comieq_sd, COMI, X86ISD::COMI, ISD::SETEQ),
X86_INTRINSIC_DATA(sse2_comige_sd, COMI, X86ISD::COMI, ISD::SETGE),		X86_INTRINSIC_DATA(sse2_comige_sd, COMI, X86ISD::COMI, ISD::SETGE),
X86_INTRINSIC_DATA(sse2_comigt_sd, COMI, X86ISD::COMI, ISD::SETGT),		X86_INTRINSIC_DATA(sse2_comigt_sd, COMI, X86ISD::COMI, ISD::SETGT),
X86_INTRINSIC_DATA(sse2_comile_sd, COMI, X86ISD::COMI, ISD::SETLE),		X86_INTRINSIC_DATA(sse2_comile_sd, COMI, X86ISD::COMI, ISD::SETLE),
X86_INTRINSIC_DATA(sse2_comilt_sd, COMI, X86ISD::COMI, ISD::SETLT),		X86_INTRINSIC_DATA(sse2_comilt_sd, COMI, X86ISD::COMI, ISD::SETLT),
X86_INTRINSIC_DATA(sse2_comineq_sd, COMI, X86ISD::COMI, ISD::SETNE),		X86_INTRINSIC_DATA(sse2_comineq_sd, COMI, X86ISD::COMI, ISD::SETNE),
X86_INTRINSIC_DATA(sse2_cvtdq2ps, INTR_TYPE_1OP, ISD::SINT_TO_FP, 0),		X86_INTRINSIC_DATA(sse2_cvtdq2ps, INTR_TYPE_1OP, ISD::SINT_TO_FP, 0),
X86_INTRINSIC_DATA(sse2_cvtpd2ps, INTR_TYPE_1OP, X86ISD::VFPROUND, 0),		X86_INTRINSIC_DATA(sse2_cvtpd2ps, INTR_TYPE_1OP, X86ISD::VFPROUND, 0),
		X86_INTRINSIC_DATA(sse2_cvttpd2dq, INTR_TYPE_1OP, X86ISD::CVTTPD2DQ, 0),
X86_INTRINSIC_DATA(sse2_max_pd, INTR_TYPE_2OP, X86ISD::FMAX, 0),		X86_INTRINSIC_DATA(sse2_max_pd, INTR_TYPE_2OP, X86ISD::FMAX, 0),
X86_INTRINSIC_DATA(sse2_min_pd, INTR_TYPE_2OP, X86ISD::FMIN, 0),		X86_INTRINSIC_DATA(sse2_min_pd, INTR_TYPE_2OP, X86ISD::FMIN, 0),
X86_INTRINSIC_DATA(sse2_movmsk_pd, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),		X86_INTRINSIC_DATA(sse2_movmsk_pd, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
X86_INTRINSIC_DATA(sse2_packssdw_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),		X86_INTRINSIC_DATA(sse2_packssdw_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(sse2_packsswb_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),		X86_INTRINSIC_DATA(sse2_packsswb_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(sse2_packuswb_128, INTR_TYPE_2OP, X86ISD::PACKUS, 0),		X86_INTRINSIC_DATA(sse2_packuswb_128, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(sse2_padds_b, INTR_TYPE_2OP, X86ISD::ADDS, 0),		X86_INTRINSIC_DATA(sse2_padds_b, INTR_TYPE_2OP, X86ISD::ADDS, 0),
X86_INTRINSIC_DATA(sse2_padds_w, INTR_TYPE_2OP, X86ISD::ADDS, 0),		X86_INTRINSIC_DATA(sse2_padds_w, INTR_TYPE_2OP, X86ISD::ADDS, 0),
▲ Show 20 Lines • Show All 118 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll

	Show First 20 Lines • Show All 332 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: retl ## encoding: [0xc3]			; CHECK-NEXT: retl ## encoding: [0xc3]
	%res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]			%res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
	ret <2 x double> %res			ret <2 x double> %res
	}			}
	declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone			declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone


	define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) {			define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) {
	; CHECK-LABEL: test_x86_sse2_cvttpd2dq:			; AVX-LABEL: test_x86_sse2_cvttpd2dq:
	; CHECK: ## BB#0:			; AVX: ## BB#0:
	; CHECK-NEXT: vcvttpd2dq %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe6,0xc0]			; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe6,0xc0]
	; CHECK-NEXT: retl ## encoding: [0xc3]			; AVX-NEXT: retl ## encoding: [0xc3]
				;
				; AVX512VL-LABEL: test_x86_sse2_cvttpd2dq:
				; AVX512VL: ## BB#0:
				; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xe6,0xc0]
				; AVX512VL-NEXT: retl ## encoding: [0xc3]
	%res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]			%res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
	ret <4 x i32> %res			ret <4 x i32> %res
	}			}
	declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone			declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone


	define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) {			define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) {
	; CHECK-LABEL: test_x86_sse2_cvttps2dq:			; CHECK-LABEL: test_x86_sse2_cvttps2dq:
	▲ Show 20 Lines • Show All 3,508 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll

	Show First 20 Lines • Show All 318 Lines • ▼ Show 20 Lines
	; AVX2-LABEL: test_x86_sse2_cvtpd2ps_zext:			; AVX2-LABEL: test_x86_sse2_cvtpd2ps_zext:
	; AVX2: ## BB#0:			; AVX2: ## BB#0:
	; AVX2-NEXT: vcvtpd2ps %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x5a,0xc0]			; AVX2-NEXT: vcvtpd2ps %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x5a,0xc0]
	; AVX2-NEXT: retl ## encoding: [0xc3]			; AVX2-NEXT: retl ## encoding: [0xc3]
	;			;
	; SKX-LABEL: test_x86_sse2_cvtpd2ps_zext:			; SKX-LABEL: test_x86_sse2_cvtpd2ps_zext:
	; SKX: ## BB#0:			; SKX: ## BB#0:
	; SKX-NEXT: vcvtpd2ps %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x5a,0xc0]			; SKX-NEXT: vcvtpd2ps %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x5a,0xc0]
	; SKX-NEXT: vmovq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfe,0x08,0x7e,0xc0]
	; SKX-NEXT: ## xmm0 = xmm0[0],zero
	; SKX-NEXT: retl ## encoding: [0xc3]			; SKX-NEXT: retl ## encoding: [0xc3]
	%cvt = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0)			%cvt = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0)
	%res = shufflevector <4 x float> %cvt, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>			%res = shufflevector <4 x float> %cvt, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
	ret <4 x float> %res			ret <4 x float> %res
	}			}

	define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) {			define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) {
	; SSE-LABEL: test_x86_sse2_cvtps2dq:			; SSE-LABEL: test_x86_sse2_cvtps2dq:
	▲ Show 20 Lines • Show All 160 Lines • ▼ Show 20 Lines


	define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) {			define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) {
	; SSE-LABEL: test_x86_sse2_cvttpd2dq:			; SSE-LABEL: test_x86_sse2_cvttpd2dq:
	; SSE: ## BB#0:			; SSE: ## BB#0:
	; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 ## encoding: [0x66,0x0f,0xe6,0xc0]			; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 ## encoding: [0x66,0x0f,0xe6,0xc0]
	; SSE-NEXT: retl ## encoding: [0xc3]			; SSE-NEXT: retl ## encoding: [0xc3]
	;			;
	; VCHECK-LABEL: test_x86_sse2_cvttpd2dq:			; AVX2-LABEL: test_x86_sse2_cvttpd2dq:
	; VCHECK: ## BB#0:			; AVX2: ## BB#0:
	; VCHECK-NEXT: vcvttpd2dq %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe6,0xc0]			; AVX2-NEXT: vcvttpd2dq %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe6,0xc0]
	; VCHECK-NEXT: retl ## encoding: [0xc3]			; AVX2-NEXT: retl ## encoding: [0xc3]
				;
				; SKX-LABEL: test_x86_sse2_cvttpd2dq:
				; SKX: ## BB#0:
				; SKX-NEXT: vcvttpd2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xe6,0xc0]
				; SKX-NEXT: retl ## encoding: [0xc3]
	%res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]			%res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
	ret <4 x i32> %res			ret <4 x i32> %res
	}			}
	declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone			declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone


	define <2 x i64> @test_mm_cvttpd_epi32_zext(<2 x double> %a0) nounwind {			define <2 x i64> @test_mm_cvttpd_epi32_zext(<2 x double> %a0) nounwind {
	; SSE-LABEL: test_mm_cvttpd_epi32_zext:			; SSE-LABEL: test_mm_cvttpd_epi32_zext:
	; SSE: ## BB#0:			; SSE: ## BB#0:
	; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 ## encoding: [0x66,0x0f,0xe6,0xc0]			; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 ## encoding: [0x66,0x0f,0xe6,0xc0]
	; SSE-NEXT: movq %xmm0, %xmm0 ## encoding: [0xf3,0x0f,0x7e,0xc0]
	; SSE-NEXT: ## xmm0 = xmm0[0],zero
	; SSE-NEXT: retl ## encoding: [0xc3]			; SSE-NEXT: retl ## encoding: [0xc3]
	;			;
	; AVX2-LABEL: test_mm_cvttpd_epi32_zext:			; AVX2-LABEL: test_mm_cvttpd_epi32_zext:
	; AVX2: ## BB#0:			; AVX2: ## BB#0:
	; AVX2-NEXT: vcvttpd2dq %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe6,0xc0]			; AVX2-NEXT: vcvttpd2dq %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe6,0xc0]
	; AVX2-NEXT: vmovq %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x7e,0xc0]
	; AVX2-NEXT: ## xmm0 = xmm0[0],zero
	; AVX2-NEXT: retl ## encoding: [0xc3]			; AVX2-NEXT: retl ## encoding: [0xc3]
	;			;
	; SKX-LABEL: test_mm_cvttpd_epi32_zext:			; SKX-LABEL: test_mm_cvttpd_epi32_zext:
	; SKX: ## BB#0:			; SKX: ## BB#0:
	; SKX-NEXT: vcvttpd2dq %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe6,0xc0]			; SKX-NEXT: vcvttpd2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xe6,0xc0]
	; SKX-NEXT: vmovq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfe,0x08,0x7e,0xc0]
	; SKX-NEXT: ## xmm0 = xmm0[0],zero
	; SKX-NEXT: retl ## encoding: [0xc3]			; SKX-NEXT: retl ## encoding: [0xc3]
	%cvt = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0)			%cvt = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0)
	%res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>			%res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
	%bc = bitcast <4 x i32> %res to <2 x i64>			%bc = bitcast <4 x i32> %res to <2 x i64>
	ret <2 x i64> %bc			ret <2 x i64> %bc
	}			}


	▲ Show 20 Lines • Show All 1,211 Lines • Show Last 20 Lines