Diff 83918

lib/Target/X86/X86InstrAVX512.td

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 5,926 Lines • ▼ Show 20 Lines	let Predicates = [HasAVX512] in {
def : Pat<(int_x86_sse2_cvtsi642sd VR128X:$src1, (loadi64 addr:$src2)),		def : Pat<(int_x86_sse2_cvtsi642sd VR128X:$src1, (loadi64 addr:$src2)),
(VCVTSI642SDZrm_Int VR128X:$src1, addr:$src2)>;		(VCVTSI642SDZrm_Int VR128X:$src1, addr:$src2)>;
def : Pat<(int_x86_avx512_cvtusi2sd VR128X:$src1, GR32:$src2),		def : Pat<(int_x86_avx512_cvtusi2sd VR128X:$src1, GR32:$src2),
(VCVTUSI2SDZrr_Int VR128X:$src1, GR32:$src2)>;		(VCVTUSI2SDZrr_Int VR128X:$src1, GR32:$src2)>;
def : Pat<(int_x86_avx512_cvtusi2sd VR128X:$src1, (loadi32 addr:$src2)),		def : Pat<(int_x86_avx512_cvtusi2sd VR128X:$src1, (loadi32 addr:$src2)),
(VCVTUSI2SDZrm_Int VR128X:$src1, addr:$src2)>;		(VCVTUSI2SDZrm_Int VR128X:$src1, addr:$src2)>;
} // Predicates = [HasAVX512]		} // Predicates = [HasAVX512]

		// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
		// which produce unnecsessary vmovs{s,d} instructions
		craig.topperUnsubmitted Not Done Reply Inline Actions unnecessary is spelled wrong craig.topper: unnecessary is spelled wrong
		let Predicates = [HasAVX512] in {
		def : Pat<(v4f32 (X86Movss
		(v4f32 VR128X:$dst),
		(v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
		(VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;

		def : Pat<(v4f32 (X86Movss
		(v4f32 VR128X:$dst),
		(v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
		(VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;

		def : Pat<(v2f64 (X86Movsd
		(v2f64 VR128X:$dst),
		(v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
		(VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;

		def : Pat<(v2f64 (X86Movsd
		(v2f64 VR128X:$dst),
		(v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
		(VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
		} // Predicates = [HasAVX512]

// Convert float/double to signed/unsigned int 32/64 with truncation		// Convert float/double to signed/unsigned int 32/64 with truncation
multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,		multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
X86VectorVTInfo _DstRC, SDNode OpNode,		X86VectorVTInfo _DstRC, SDNode OpNode,
SDNode OpNodeRnd, string aliasStr>{		SDNode OpNodeRnd, string aliasStr>{
let Predicates = [HasAVX512] in {		let Predicates = [HasAVX512] in {
def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),		def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
!strconcat(asm,"\t{$src, $dst\|$dst, $src}"),		!strconcat(asm,"\t{$src, $dst\|$dst, $src}"),
[(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>, EVEX;		[(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>, EVEX;
▲ Show 20 Lines • Show All 163 Lines • ▼ Show 20 Lines	def : Pat<(f64 (extloadf32 addr:$src)),
(COPY_TO_REGCLASS (VCVTSS2SDZrr (v4f32 (IMPLICIT_DEF)),		(COPY_TO_REGCLASS (VCVTSS2SDZrr (v4f32 (IMPLICIT_DEF)),
(COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)), VR128X)>,		(COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)), VR128X)>,
Requires<[HasAVX512, OptForSpeed]>;		Requires<[HasAVX512, OptForSpeed]>;

def : Pat<(f32 (fpround FR64X:$src)),		def : Pat<(f32 (fpround FR64X:$src)),
(COPY_TO_REGCLASS (VCVTSD2SSZrr (COPY_TO_REGCLASS FR64X:$src, VR128X),		(COPY_TO_REGCLASS (VCVTSD2SSZrr (COPY_TO_REGCLASS FR64X:$src, VR128X),
(COPY_TO_REGCLASS FR64X:$src, VR128X)), VR128X)>,		(COPY_TO_REGCLASS FR64X:$src, VR128X)), VR128X)>,
Requires<[HasAVX512]>;		Requires<[HasAVX512]>;

		def : Pat<(v4f32 (X86Movss
		(v4f32 VR128X:$dst),
		(v4f32 (scalar_to_vector
		(f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
		(VCVTSD2SSZrr VR128X:$dst, VR128X:$src)>,
		Requires<[HasAVX512]>;

		def : Pat<(v2f64 (X86Movsd
		(v2f64 VR128X:$dst),
		(v2f64 (scalar_to_vector
		(f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
		(VCVTSS2SDZrr VR128X:$dst, VR128X:$src)>,
		Requires<[HasAVX512]>;

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// AVX-512 Vector convert from signed/unsigned integer to float/double		// AVX-512 Vector convert from signed/unsigned integer to float/double
// and from float/double to signed/unsigned integer		// and from float/double to signed/unsigned integer
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,		multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86VectorVTInfo _Src, SDNode OpNode,		X86VectorVTInfo _Src, SDNode OpNode,
string Broadcast = _.BroadcastStr,		string Broadcast = _.BroadcastStr,
▲ Show 20 Lines • Show All 3,045 Lines • Show Last 20 Lines

lib/Target/X86/X86InstrSSE.td

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show All 27 Lines

class ShiftOpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm,		class ShiftOpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm,
InstrItinClass arg_ri> {		InstrItinClass arg_ri> {
InstrItinClass rr = arg_rr;		InstrItinClass rr = arg_rr;
InstrItinClass rm = arg_rm;		InstrItinClass rm = arg_rm;
InstrItinClass ri = arg_ri;		InstrItinClass ri = arg_ri;
}		}


// scalar		// scalar
let Sched = WriteFAdd in {		let Sched = WriteFAdd in {
def SSE_ALU_F32S : OpndItins<		def SSE_ALU_F32S : OpndItins<
IIC_SSE_ALU_F32S_RR, IIC_SSE_ALU_F32S_RM		IIC_SSE_ALU_F32S_RR, IIC_SSE_ALU_F32S_RM
>;		>;

def SSE_ALU_F64S : OpndItins<		def SSE_ALU_F64S : OpndItins<
IIC_SSE_ALU_F64S_RR, IIC_SSE_ALU_F64S_RM		IIC_SSE_ALU_F64S_RR, IIC_SSE_ALU_F64S_RM
▲ Show 20 Lines • Show All 1,873 Lines • ▼ Show 20 Lines	def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
"cvtss2sd\t{$src2, $dst\|$dst, $src2}",		"cvtss2sd\t{$src2, $dst\|$dst, $src2}",
[(set VR128:$dst,		[(set VR128:$dst,
(int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))],		(int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))],
IIC_SSE_CVT_Scalar_RM>, XS, Requires<[UseSSE2]>,		IIC_SSE_CVT_Scalar_RM>, XS, Requires<[UseSSE2]>,
Sched<[WriteCvtF2FLd, ReadAfterLd]>;		Sched<[WriteCvtF2FLd, ReadAfterLd]>;
}		}
} // isCodeGenOnly = 1		} // isCodeGenOnly = 1

		// Patterns used for matching (v)cvtsi2ss, (v)cvtsi2sd, (v)cvtsd2ss and
		// (v)cvtss2sd intrinsic sequences from clang which produce unnecsessary
		// vmovs{s,d} instructions
		let Predicates = [UseAVX] in {
		def : Pat<(v4f32 (X86Movss
		(v4f32 VR128:$dst),
		(v4f32 (scalar_to_vector
		(f32 (fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))),
		(Int_VCVTSD2SSrr VR128:$dst, VR128:$src)>;

		def : Pat<(v2f64 (X86Movsd
		(v2f64 VR128:$dst),
		(v2f64 (scalar_to_vector
		(f64 (fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))),
		(Int_VCVTSS2SDrr VR128:$dst, VR128:$src)>;

		def : Pat<(v4f32 (X86Movss
		(v4f32 VR128:$dst),
		(v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
		(Int_VCVTSI2SS64rr VR128:$dst, GR64:$src)>;

		def : Pat<(v4f32 (X86Movss
		(v4f32 VR128:$dst),
		(v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
		(Int_VCVTSI2SSrr VR128:$dst, GR32:$src)>;

		def : Pat<(v2f64 (X86Movsd
		(v2f64 VR128:$dst),
		(v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
		(Int_VCVTSI2SD64rr VR128:$dst, GR64:$src)>;

		def : Pat<(v2f64 (X86Movsd
		(v2f64 VR128:$dst),
		(v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
		(Int_VCVTSI2SDrr VR128:$dst, GR32:$src)>;
		} // Predicates = [UseAVX]
		let Predicates = [UseSSE2] in {
		craig.topperUnsubmitted Not Done Reply Inline Actions Can you add a blank like between the AVX block ending and the SSE block starting craig.topper: Can you add a blank like between the AVX block ending and the SSE block starting
		def : Pat<(v4f32 (X86Movss
		RKSimonUnsubmitted Not Done Reply Inline Actions Should this be [UseAVX] and then add AVX512 patterns in X86InstrAVX512.td ? RKSimon: Should this be [UseAVX] and then add AVX512 patterns in X86InstrAVX512.td ?
		eladcohenAuthorUnsubmitted Not Done Reply Inline Actions For AVX512 I see two types of intrinsics that correspond to these instructions: With Rounding mode or masks (e.g. _mm_cvt_roundi64_sd) - These will generate a builtin and not generic IR, so they don't require any new patterns. Without rounding mode and masks (e.g. _mm_cvti32_sd) - These are mapped by macros to their matching AVX instruction which are handled in the above patterns - And that's why I want [HasAVX] to catch them. eladcohen: For AVX512 I see two types of intrinsics that correspond to these instructions: 1) With…
		RKSimonUnsubmitted Not Done Reply Inline Actions @craig.topper @igorb Is using the AVX path for AVX512 alright with you guys? RKSimon: @craig.topper @igorb Is using the AVX path for AVX512 alright with you guys?
		(v4f32 VR128:$dst),
		(v4f32 (scalar_to_vector
		(f32 (fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))),
		(Int_CVTSD2SSrr VR128:$dst, VR128:$src)>;

		def : Pat<(v2f64 (X86Movsd
		(v2f64 VR128:$dst),
		(v2f64 (scalar_to_vector
		(f64 (fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))),
		(Int_CVTSS2SDrr VR128:$dst, VR128:$src)>;

		def : Pat<(v2f64 (X86Movsd
		(v2f64 VR128:$dst),
		(v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
		(Int_CVTSI2SD64rr VR128:$dst, GR64:$src)>;

		def : Pat<(v2f64 (X86Movsd
		(v2f64 VR128:$dst),
		(v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
		(Int_CVTSI2SDrr VR128:$dst, GR32:$src)>;
		} // Predicates = [UseSSE2]
		let Predicates = [UseSSE1] in {
		craig.topperUnsubmitted Not Done Reply Inline Actions Blank line here too craig.topper: Blank line here too
		def : Pat<(v4f32 (X86Movss
		(v4f32 VR128:$dst),
		(v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
		(Int_CVTSI2SS64rr VR128:$dst, GR64:$src)>;

		def : Pat<(v4f32 (X86Movss
		(v4f32 VR128:$dst),
		(v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
		(Int_CVTSI2SSrr VR128:$dst, GR32:$src)>;
		} // Predicates = [UseSSE1]
		RKSimonUnsubmitted Not Done Reply Inline Actions It's more typical to put the AVX patterns before the SSE. RKSimon: It's more typical to put the AVX patterns before the SSE.
		eladcohenAuthorUnsubmitted Not Done Reply Inline Actions I'll change this. Thanks eladcohen: I'll change this. Thanks

// Convert packed single/double fp to doubleword		// Convert packed single/double fp to doubleword
def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),		def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst\|$dst, $src}",		"cvtps2dq\t{$src, $dst\|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],		[(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>;		IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>;
def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),		def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtps2dq\t{$src, $dst\|$dst, $src}",		"cvtps2dq\t{$src, $dst\|$dst, $src}",
[(set VR128:$dst,		[(set VR128:$dst,
▲ Show 20 Lines • Show All 6,795 Lines • Show Last 20 Lines

test/CodeGen/X86/avx-cvt.ll

	Show First 20 Lines • Show All 56 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: vcvtpd2ps %ymm0, %xmm0			; CHECK-NEXT: vcvtpd2ps %ymm0, %xmm0
	; CHECK-NEXT: vcvtpd2ps %ymm1, %xmm1			; CHECK-NEXT: vcvtpd2ps %ymm1, %xmm1
	; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0			; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%a = fptrunc <8 x double> %b to <8 x float>			%a = fptrunc <8 x double> %b to <8 x float>
	ret <8 x float> %a			ret <8 x float> %a
	}			}

				define <4 x float> @fptrunc01(<2 x double> %a0, <4 x float> %a1) nounwind {
				; CHECK-LABEL: fptrunc01:
				; CHECK: # BB#0:
				; CHECK-NEXT: vcvtsd2ss %xmm0, %xmm1, %xmm0
				; CHECK-NEXT: retq
				%ext = extractelement <2 x double> %a0, i32 0
				%cvt = fptrunc double %ext to float
				%res = insertelement <4 x float> %a1, float %cvt, i32 0
				ret <4 x float> %res
				}

	define <4 x double> @fpext00(<4 x float> %b) nounwind {			define <4 x double> @fpext00(<4 x float> %b) nounwind {
	; CHECK-LABEL: fpext00:			; CHECK-LABEL: fpext00:
	; CHECK: # BB#0:			; CHECK: # BB#0:
	; CHECK-NEXT: vcvtps2pd %xmm0, %ymm0			; CHECK-NEXT: vcvtps2pd %xmm0, %ymm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%a = fpext <4 x float> %b to <4 x double>			%a = fpext <4 x float> %b to <4 x double>
	ret <4 x double> %a			ret <4 x double> %a
	}			}

				define <2 x double> @fpext01(<2 x double> %a0, <4 x float> %a1) nounwind {
				; CHECK-LABEL: fpext01:
				; CHECK: # BB#0:
				; CHECK-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0
				; CHECK-NEXT: retq
				%ext = extractelement <4 x float> %a1, i32 0
				%cvt = fpext float %ext to double
				%res = insertelement <2 x double> %a0, double %cvt, i32 0
				ret <2 x double> %res
				}

	define double @funcA(i64* nocapture %e) nounwind uwtable readonly ssp {			define double @funcA(i64* nocapture %e) nounwind uwtable readonly ssp {
	; CHECK-LABEL: funcA:			; CHECK-LABEL: funcA:
	; CHECK: # BB#0:			; CHECK: # BB#0:
	; CHECK-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0			; CHECK-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%tmp1 = load i64, i64* %e, align 8			%tmp1 = load i64, i64* %e, align 8
	%conv = sitofp i64 %tmp1 to double			%conv = sitofp i64 %tmp1 to double
	ret double %conv			ret double %conv
	▲ Show 20 Lines • Show All 67 Lines • Show Last 20 Lines

test/CodeGen/X86/avx512-cvt.ll

	Show First 20 Lines • Show All 442 Lines • ▼ Show 20 Lines
	; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1			; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1
	; SKX-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z}			; SKX-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z}
	; SKX-NEXT: retq			; SKX-NEXT: retq
	%a = fptrunc <4 x double> %b to <4 x float>			%a = fptrunc <4 x double> %b to <4 x float>
	%c = select <4 x i1>%mask, <4 x float>%a, <4 x float> zeroinitializer			%c = select <4 x i1>%mask, <4 x float>%a, <4 x float> zeroinitializer
	ret <4 x float> %c			ret <4 x float> %c
	}			}

				define <4 x float> @fptrunc03(<2 x double> %a0, <4 x float> %a1) nounwind {
				; ALL-LABEL: fptrunc03:
				; ALL: ## BB#0:
				; ALL-NEXT: vcvtsd2ss %xmm0, %xmm1, %xmm0
				; ALL-NEXT: retq
				%ext = extractelement <2 x double> %a0, i32 0
				%cvt = fptrunc double %ext to float
				%res = insertelement <4 x float> %a1, float %cvt, i32 0
				ret <4 x float> %res
				}

	define <8 x double> @fpext00(<8 x float> %b) nounwind {			define <8 x double> @fpext00(<8 x float> %b) nounwind {
	; ALL-LABEL: fpext00:			; ALL-LABEL: fpext00:
	; ALL: ## BB#0:			; ALL: ## BB#0:
	; ALL-NEXT: vcvtps2pd %ymm0, %zmm0			; ALL-NEXT: vcvtps2pd %ymm0, %zmm0
	; ALL-NEXT: retq			; ALL-NEXT: retq
	%a = fpext <8 x float> %b to <8 x double>			%a = fpext <8 x float> %b to <8 x double>
	ret <8 x double> %a			ret <8 x double> %a
	}			}
	Show All 12 Lines
	; SKX-NEXT: vcvtps2pd %xmm0, %ymm0 {%k1} {z}			; SKX-NEXT: vcvtps2pd %xmm0, %ymm0 {%k1} {z}
	; SKX-NEXT: retq			; SKX-NEXT: retq
	%a = fpext <4 x float> %b to <4 x double>			%a = fpext <4 x float> %b to <4 x double>
	%mask = fcmp ogt <4 x double>%a1, %b1			%mask = fcmp ogt <4 x double>%a1, %b1
	%c = select <4 x i1>%mask, <4 x double>%a, <4 x double>zeroinitializer			%c = select <4 x i1>%mask, <4 x double>%a, <4 x double>zeroinitializer
	ret <4 x double> %c			ret <4 x double> %c
	}			}

				define <2 x double> @fpext02(<2 x double> %a0, <4 x float> %a1) nounwind {
				; ALL-LABEL: fpext02:
				; ALL: ## BB#0:
				; ALL-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0
				; ALL-NEXT: retq
				%ext = extractelement <4 x float> %a1, i32 0
				%cvt = fpext float %ext to double
				%res = insertelement <2 x double> %a0, double %cvt, i32 0
				ret <2 x double> %res
				}

	define double @funcA(i64* nocapture %e) {			define double @funcA(i64* nocapture %e) {
	; ALL-LABEL: funcA:			; ALL-LABEL: funcA:
	; ALL: ## BB#0: ## %entry			; ALL: ## BB#0: ## %entry
	; ALL-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0			; ALL-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0
	; ALL-NEXT: retq			; ALL-NEXT: retq
	entry:			entry:
	%tmp1 = load i64, i64* %e, align 8			%tmp1 = load i64, i64* %e, align 8
	%conv = sitofp i64 %tmp1 to double			%conv = sitofp i64 %tmp1 to double
	▲ Show 20 Lines • Show All 681 Lines • Show Last 20 Lines

test/CodeGen/X86/sse2-intrinsics-fast-isel.ll

Show First 20 Lines • Show All 1,251 Lines • ▼ Show 20 Lines	; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <4 x i32>		%arg0 = bitcast <2 x i64> %a0 to <4 x i32>
%res = extractelement <4 x i32> %arg0, i32 0		%res = extractelement <4 x i32> %arg0, i32 0
ret i32 %res		ret i32 %res
}		}

define <2 x double> @test_mm_cvtsi32_sd(<2 x double> %a0, i32 %a1) nounwind {		define <2 x double> @test_mm_cvtsi32_sd(<2 x double> %a0, i32 %a1) nounwind {
; X32-LABEL: test_mm_cvtsi32_sd:		; X32-LABEL: test_mm_cvtsi32_sd:
; X32: # BB#0:		; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax		; X32-NEXT: cvtsi2sdl {{[0-9]+}}(%esp), %xmm0
; X32-NEXT: cvtsi2sdl %eax, %xmm1
; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; X32-NEXT: retl		; X32-NEXT: retl
;		;
; X64-LABEL: test_mm_cvtsi32_sd:		; X64-LABEL: test_mm_cvtsi32_sd:
; X64: # BB#0:		; X64: # BB#0:
; X64-NEXT: cvtsi2sdl %edi, %xmm1		; X64-NEXT: cvtsi2sdl %edi, %xmm0
; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; X64-NEXT: retq		; X64-NEXT: retq
%cvt = sitofp i32 %a1 to double		%cvt = sitofp i32 %a1 to double
%res = insertelement <2 x double> %a0, double %cvt, i32 0		%res = insertelement <2 x double> %a0, double %cvt, i32 0
ret <2 x double> %res		ret <2 x double> %res
}		}

define <2 x i64> @test_mm_cvtsi32_si128(i32 %a0) nounwind {		define <2 x i64> @test_mm_cvtsi32_si128(i32 %a0) nounwind {
; X32-LABEL: test_mm_cvtsi32_si128:		; X32-LABEL: test_mm_cvtsi32_si128:
Show All 11 Lines	; X64-NEXT: retq
%res3 = insertelement <4 x i32> %res2, i32 0, i32 3		%res3 = insertelement <4 x i32> %res2, i32 0, i32 3
%res = bitcast <4 x i32> %res3 to <2 x i64>		%res = bitcast <4 x i32> %res3 to <2 x i64>
ret <2 x i64> %res		ret <2 x i64> %res
}		}

define <2 x double> @test_mm_cvtss_sd(<2 x double> %a0, <4 x float> %a1) nounwind {		define <2 x double> @test_mm_cvtss_sd(<2 x double> %a0, <4 x float> %a1) nounwind {
; X32-LABEL: test_mm_cvtss_sd:		; X32-LABEL: test_mm_cvtss_sd:
; X32: # BB#0:		; X32: # BB#0:
; X32-NEXT: cvtss2sd %xmm1, %xmm1		; X32-NEXT: cvtss2sd %xmm1, %xmm0
; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; X32-NEXT: retl		; X32-NEXT: retl
;		;
; X64-LABEL: test_mm_cvtss_sd:		; X64-LABEL: test_mm_cvtss_sd:
; X64: # BB#0:		; X64: # BB#0:
; X64-NEXT: cvtss2sd %xmm1, %xmm1		; X64-NEXT: cvtss2sd %xmm1, %xmm0
; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; X64-NEXT: retq		; X64-NEXT: retq
%ext = extractelement <4 x float> %a1, i32 0		%ext = extractelement <4 x float> %a1, i32 0
%cvt = fpext float %ext to double		%cvt = fpext float %ext to double
%res = insertelement <2 x double> %a0, double %cvt, i32 0		%res = insertelement <2 x double> %a0, double %cvt, i32 0
ret <2 x double> %res		ret <2 x double> %res
}		}

define <2 x i64> @test_mm_cvttpd_epi32(<2 x double> %a0) nounwind {		define <2 x i64> @test_mm_cvttpd_epi32(<2 x double> %a0) nounwind {
▲ Show 20 Lines • Show All 2,570 Lines • Show Last 20 Lines

test/CodeGen/X86/vec_int_to_fp.ll

	Show First 20 Lines • Show All 4,812 Lines • ▼ Show 20 Lines
	; AVX512-NEXT: retq			; AVX512-NEXT: retq
	%1 = load %Arguments, %Arguments* %a0, align 1			%1 = load %Arguments, %Arguments* %a0, align 1
	%2 = extractvalue %Arguments %1, 1			%2 = extractvalue %Arguments %1, 1
	%3 = extractvalue %Arguments %1, 2			%3 = extractvalue %Arguments %1, 2
	%4 = sitofp <8 x i16> %2 to <8 x float>			%4 = sitofp <8 x i16> %2 to <8 x float>
	store <8 x float> %4, <8 x float>* %3, align 32			store <8 x float> %4, <8 x float>* %3, align 32
	ret void			ret void
	}			}

				define <2 x double> @sitofp_i32_to_2f64(<2 x double> %a0, i32 %a1) nounwind {
				; SSE-LABEL: sitofp_i32_to_2f64:
				; SSE: # BB#0:
				; SSE-NEXT: cvtsi2sdl %edi, %xmm0
				; SSE-NEXT: retq
				;
				; AVX-LABEL: sitofp_i32_to_2f64:
				; AVX: # BB#0:
				; AVX-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0
				; AVX-NEXT: retq
				%cvt = sitofp i32 %a1 to double
				%res = insertelement <2 x double> %a0, double %cvt, i32 0
				ret <2 x double> %res
				}

				define <4 x float> @sitofp_i32_to_4f32(<4 x float> %a0, i32 %a1) nounwind {
				; SSE-LABEL: sitofp_i32_to_4f32:
				; SSE: # BB#0:
				; SSE-NEXT: cvtsi2ssl %edi, %xmm0
				; SSE-NEXT: retq
				;
				; AVX-LABEL: sitofp_i32_to_4f32:
				; AVX: # BB#0:
				; AVX-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0
				; AVX-NEXT: retq
				%cvt = sitofp i32 %a1 to float
				%res = insertelement <4 x float> %a0, float %cvt, i32 0
				ret <4 x float> %res
				}

				define <2 x double> @sitofp_i64_to_2f64(<2 x double> %a0, i64 %a1) nounwind {
				; SSE-LABEL: sitofp_i64_to_2f64:
				; SSE: # BB#0:
				; SSE-NEXT: cvtsi2sdq %rdi, %xmm0
				; SSE-NEXT: retq
				;
				; AVX-LABEL: sitofp_i64_to_2f64:
				; AVX: # BB#0:
				; AVX-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0
				; AVX-NEXT: retq
				%cvt = sitofp i64 %a1 to double
				%res = insertelement <2 x double> %a0, double %cvt, i32 0
				ret <2 x double> %res
				}

				define <4 x float> @sitofp_i64_to_4f32(<4 x float> %a0, i64 %a1) nounwind {
				; SSE-LABEL: sitofp_i64_to_4f32:
				; SSE: # BB#0:
				; SSE-NEXT: cvtsi2ssq %rdi, %xmm0
				; SSE-NEXT: retq
				;
				; AVX-LABEL: sitofp_i64_to_4f32:
				; AVX: # BB#0:
				; AVX-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0
				; AVX-NEXT: retq
				%cvt = sitofp i64 %a1 to float
				%res = insertelement <4 x float> %a0, float %cvt, i32 0
				ret <4 x float> %res
				}

This is an archive of the discontinued LLVM Phabricator instance.

[X86] Fix PR30926 - Add patterns for optimizing (v)cvtsi2ss, (v)cvtsi2sd, (v)cvtsd2ss and (v)cvtss2sd clang intrinsic sequences
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 83918

lib/Target/X86/X86InstrAVX512.td

lib/Target/X86/X86InstrSSE.td

test/CodeGen/X86/avx-cvt.ll

test/CodeGen/X86/avx512-cvt.ll

test/CodeGen/X86/sse2-intrinsics-fast-isel.ll

test/CodeGen/X86/vec_int_to_fp.ll

This is an archive of the discontinued LLVM Phabricator instance.

[X86] Fix PR30926 - Add patterns for optimizing (v)cvtsi2ss, (v)cvtsi2sd, (v)cvtsd2ss and (v)cvtss2sd clang intrinsic sequencesClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 83918

lib/Target/X86/X86InstrAVX512.td

lib/Target/X86/X86InstrSSE.td

test/CodeGen/X86/avx-cvt.ll

test/CodeGen/X86/avx512-cvt.ll

test/CodeGen/X86/sse2-intrinsics-fast-isel.ll

test/CodeGen/X86/vec_int_to_fp.ll

[X86] Fix PR30926 - Add patterns for optimizing (v)cvtsi2ss, (v)cvtsi2sd, (v)cvtsd2ss and (v)cvtss2sd clang intrinsic sequences
ClosedPublic