This is an archive of the discontinued LLVM Phabricator instance.

lib/Target/X86/X86InstrSSE.td
2285 ↗	(On Diff #68972)	I've only now realized we represent zeroing the two high lanes of a v4f32 with (v4f32 (bitconvert (X86vzmovl (v2f64 (bitconvert (v4f32 ...)))))) :-\ But there's nothing we can really do about this, right?
lib/Target/X86/X86IntrinsicsInfo.h
1887 ↗	(On Diff #68972)	This (and the change to the intrinsic test) can be a separate commit, right? Also, any reason not to add avx_cvt_pd2_ps_256 as well?

RKSimon added inline comments.Aug 29 2016, 2:39 PM

lib/Target/X86/X86InstrSSE.td
2285 ↗	(On Diff #68972)	Not much - we use VZEXT_MOVL to zero all but the first vector element. An alternative would be to have VZEXT32_MOVL and VZEXT64_MOVL (or something similar) - it would affect a lot of existing lowering patterns and I'm it sure its worth it. We have a number of similar bitcasting pattern situation.
lib/Target/X86/X86IntrinsicsInfo.h
1887 ↗	(On Diff #68972)	Not reason at all - I'll update it. And yes these could be separate commits.

RKSimon added inline comments.Aug 29 2016, 2:43 PM

lib/Target/X86/X86InstrSSE.td
2285 ↗	(On Diff #68972)	An alternative would be to have VZEXT32_MOVL and VZEXT64_MOVL (or something similar) - it would affect a lot of existing lowering patterns and I'm it sure its worth it. We have a number of similar bitcasting pattern situation. Sorry that should say "and I'm not sure its worth it."

LGTM.

lib/Target/X86/X86InstrSSE.td
2285 ↗	(On Diff #68972)	Even if we had VZEXT64_MOVL, it wouldn't help (at least, not with what bothers me), we'd still have the ugly casting back and forth.

This revision is now accepted and ready to land.Aug 29 2016, 2:47 PM

Closed by commit rL280214: [X86][SSE] Improve awareness of fptrunc implicit zeroing of upper 64-bits of… (authored by RKSimon). · Explain WhyAug 31 2016, 3:43 AM

This revision was automatically updated to reflect the committed changes.

RKSimon mentioned this in rL280376: [X86][SSE] Dropped (V)CVTPD2PS intrinsic patterns now that its bound to….Sep 1 2016, 8:07 AM

RKSimon mentioned this in rL284459: [X86][SSE] Add lowering to cvttpd2dq/cvttps2dq for sitofp v2f64/2f32 to 2i32.Oct 18 2016, 12:51 AM

Revision Contents

Path

Size

llvm/

trunk/

lib/

Target/

X86/

X86InstrSSE.td

6 lines

test/

CodeGen/

X86/

vec_fptrunc.ll

94 lines

Diff 69823

llvm/trunk/lib/Target/X86/X86InstrSSE.td

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 2,282 Lines • ▼ Show 20 Lines	let Predicates = [HasAVX] in {
def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src),		def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src),
(VCVTDQ2PSYrr VR256:$src)>;		(VCVTDQ2PSYrr VR256:$src)>;
def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (loadv4i64 addr:$src))),		def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (loadv4i64 addr:$src))),
(VCVTDQ2PSYrm addr:$src)>;		(VCVTDQ2PSYrm addr:$src)>;
}		}

let Predicates = [HasAVX, NoVLX] in {		let Predicates = [HasAVX, NoVLX] in {
// Match fpround and fpextend for 128/256-bit conversions		// Match fpround and fpextend for 128/256-bit conversions
		def : Pat<(v4f32 (bitconvert (X86vzmovl (v2f64 (bitconvert
		(v4f32 (X86vfpround (v2f64 VR128:$src)))))))),
		(VCVTPD2PSrr VR128:$src)>;
def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))),		def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))),
(VCVTPD2PSrr VR128:$src)>;		(VCVTPD2PSrr VR128:$src)>;
def : Pat<(v4f32 (X86vfpround (loadv2f64 addr:$src))),		def : Pat<(v4f32 (X86vfpround (loadv2f64 addr:$src))),
(VCVTPD2PSXrm addr:$src)>;		(VCVTPD2PSXrm addr:$src)>;
def : Pat<(v4f32 (fpround (v4f64 VR256:$src))),		def : Pat<(v4f32 (fpround (v4f64 VR256:$src))),
(VCVTPD2PSYrr VR256:$src)>;		(VCVTPD2PSYrr VR256:$src)>;
def : Pat<(v4f32 (fpround (loadv4f64 addr:$src))),		def : Pat<(v4f32 (fpround (loadv4f64 addr:$src))),
(VCVTPD2PSYrm addr:$src)>;		(VCVTPD2PSYrm addr:$src)>;

def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))),		def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))),
(VCVTPS2PDrr VR128:$src)>;		(VCVTPS2PDrr VR128:$src)>;
def : Pat<(v4f64 (fpextend (v4f32 VR128:$src))),		def : Pat<(v4f64 (fpextend (v4f32 VR128:$src))),
(VCVTPS2PDYrr VR128:$src)>;		(VCVTPS2PDYrr VR128:$src)>;
def : Pat<(v4f64 (extloadv4f32 addr:$src)),		def : Pat<(v4f64 (extloadv4f32 addr:$src)),
(VCVTPS2PDYrm addr:$src)>;		(VCVTPS2PDYrm addr:$src)>;
}		}

let Predicates = [UseSSE2] in {		let Predicates = [UseSSE2] in {
// Match fpround and fpextend for 128 conversions		// Match fpround and fpextend for 128 conversions
		def : Pat<(v4f32 (bitconvert (X86vzmovl (v2f64 (bitconvert
		(v4f32 (X86vfpround (v2f64 VR128:$src)))))))),
		(CVTPD2PSrr VR128:$src)>;
def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))),		def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))),
(CVTPD2PSrr VR128:$src)>;		(CVTPD2PSrr VR128:$src)>;
def : Pat<(v4f32 (X86vfpround (memopv2f64 addr:$src))),		def : Pat<(v4f32 (X86vfpround (memopv2f64 addr:$src))),
(CVTPD2PSrm addr:$src)>;		(CVTPD2PSrm addr:$src)>;

def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))),		def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))),
(CVTPS2PDrr VR128:$src)>;		(CVTPS2PDrr VR128:$src)>;
}		}
▲ Show 20 Lines • Show All 6,543 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/X86/vec_fptrunc.ll

Show First 20 Lines • Show All 129 Lines • ▼ Show 20 Lines	entry:
%0 = load <8 x double>, <8 x double>* %in		%0 = load <8 x double>, <8 x double>* %in
%1 = fptrunc <8 x double> %0 to <8 x float>		%1 = fptrunc <8 x double> %0 to <8 x float>
store <8 x float> %1, <8 x float>* %out, align 1		store <8 x float> %1, <8 x float>* %out, align 1
ret void		ret void
}		}

define <4 x float> @fptrunc_frommem2_zext(<2 x double> * %ld) {		define <4 x float> @fptrunc_frommem2_zext(<2 x double> * %ld) {
; X32-SSE-LABEL: fptrunc_frommem2_zext:		; X32-SSE-LABEL: fptrunc_frommem2_zext:
; X32-SSE: # BB#0:		; X32-SSE: # BB#0:
; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax		; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE-NEXT: cvtpd2ps (%eax), %xmm0		; X32-SSE-NEXT: cvtpd2ps (%eax), %xmm0
; X32-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero		; X32-SSE-NEXT: retl
; X32-SSE-NEXT: retl		;
;		; X32-AVX-LABEL: fptrunc_frommem2_zext:
; X32-AVX-LABEL: fptrunc_frommem2_zext:		; X32-AVX: # BB#0:
; X32-AVX: # BB#0:		; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax		; X32-AVX-NEXT: vcvtpd2psx (%eax), %xmm0
; X32-AVX-NEXT: vcvtpd2psx (%eax), %xmm0		; X32-AVX-NEXT: retl
; X32-AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero		;
; X32-AVX-NEXT: retl		; X64-SSE-LABEL: fptrunc_frommem2_zext:
;		; X64-SSE: # BB#0:
; X64-SSE-LABEL: fptrunc_frommem2_zext:		; X64-SSE-NEXT: cvtpd2ps (%rdi), %xmm0
; X64-SSE: # BB#0:		; X64-SSE-NEXT: retq
; X64-SSE-NEXT: cvtpd2ps (%rdi), %xmm0		;
; X64-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero		; X64-AVX-LABEL: fptrunc_frommem2_zext:
; X64-SSE-NEXT: retq		; X64-AVX: # BB#0:
;		; X64-AVX-NEXT: vcvtpd2psx (%rdi), %xmm0
; X64-AVX-LABEL: fptrunc_frommem2_zext:		; X64-AVX-NEXT: retq
; X64-AVX: # BB#0:		%arg = load <2 x double>, <2 x double> * %ld, align 16
; X64-AVX-NEXT: vcvtpd2psx (%rdi), %xmm0		%cvt = fptrunc <2 x double> %arg to <2 x float>
; X64-AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; X64-AVX-NEXT: retq
%arg = load <2 x double>, <2 x double> * %ld, align 16
%cvt = fptrunc <2 x double> %arg to <2 x float>
%ret = shufflevector <2 x float> %cvt, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 2>		%ret = shufflevector <2 x float> %cvt, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
ret <4 x float> %ret		ret <4 x float> %ret
}		}

define <4 x float> @fptrunc_fromreg2_zext(<2 x double> %arg) {		define <4 x float> @fptrunc_fromreg2_zext(<2 x double> %arg) {
; X32-SSE-LABEL: fptrunc_fromreg2_zext:		; X32-SSE-LABEL: fptrunc_fromreg2_zext:
; X32-SSE: # BB#0:		; X32-SSE: # BB#0:
; X32-SSE-NEXT: cvtpd2ps %xmm0, %xmm0		; X32-SSE-NEXT: cvtpd2ps %xmm0, %xmm0
; X32-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero		; X32-SSE-NEXT: retl
; X32-SSE-NEXT: retl		;
;		; X32-AVX-LABEL: fptrunc_fromreg2_zext:
; X32-AVX-LABEL: fptrunc_fromreg2_zext:		; X32-AVX: # BB#0:
; X32-AVX: # BB#0:		; X32-AVX-NEXT: vcvtpd2ps %xmm0, %xmm0
; X32-AVX-NEXT: vcvtpd2ps %xmm0, %xmm0		; X32-AVX-NEXT: retl
; X32-AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero		;
; X32-AVX-NEXT: retl		; X64-SSE-LABEL: fptrunc_fromreg2_zext:
;		; X64-SSE: # BB#0:
; X64-SSE-LABEL: fptrunc_fromreg2_zext:		; X64-SSE-NEXT: cvtpd2ps %xmm0, %xmm0
; X64-SSE: # BB#0:		; X64-SSE-NEXT: retq
; X64-SSE-NEXT: cvtpd2ps %xmm0, %xmm0		;
; X64-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero		; X64-AVX-LABEL: fptrunc_fromreg2_zext:
; X64-SSE-NEXT: retq		; X64-AVX: # BB#0:
;		; X64-AVX-NEXT: vcvtpd2ps %xmm0, %xmm0
; X64-AVX-LABEL: fptrunc_fromreg2_zext:		; X64-AVX-NEXT: retq
; X64-AVX: # BB#0:		%cvt = fptrunc <2 x double> %arg to <2 x float>
; X64-AVX-NEXT: vcvtpd2ps %xmm0, %xmm0		%ret = shufflevector <2 x float> %cvt, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
; X64-AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; X64-AVX-NEXT: retq
%cvt = fptrunc <2 x double> %arg to <2 x float>
%ret = shufflevector <2 x float> %cvt, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
ret <4 x float> %ret		ret <4 x float> %ret
}		}

; FIXME: For exact truncations we should be able to fold this.		; FIXME: For exact truncations we should be able to fold this.
define <4 x float> @fptrunc_fromconst() {		define <4 x float> @fptrunc_fromconst() {
; X32-SSE-LABEL: fptrunc_fromconst:		; X32-SSE-LABEL: fptrunc_fromconst:
; X32-SSE: # BB#0: # %entry		; X32-SSE: # BB#0: # %entry
; X32-SSE-NEXT: cvtpd2ps {{\.LCPI.*}}, %xmm1		; X32-SSE-NEXT: cvtpd2ps {{\.LCPI.*}}, %xmm1
Show All 28 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[X86][SSE] Improve awareness of (v)cvtpd2ps implicit zeroing of upper 64-bits of xmm resultClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 69823

llvm/trunk/lib/Target/X86/X86InstrSSE.td

llvm/trunk/test/CodeGen/X86/vec_fptrunc.ll

[X86][SSE] Improve awareness of (v)cvtpd2ps implicit zeroing of upper 64-bits of xmm result
ClosedPublic