This is an archive of the discontinued LLVM Phabricator instance.

[X86] Provide execution domains for scalar floating point operations (mk2)
ClosedPublic

Authored by RKSimon on Apr 18 2015, 5:42 AM.

Download Raw Diff

Details

Reviewers

spatel
qcolombet
chandlerc
delena

Commits

rG398ce22b860f: [X86][SSE] Provide execution domains for scalar floating point operations
rL235372: [X86][SSE] Provide execution domains for scalar floating point operations

Summary

This is an updated version of Chandler's patch D7402 that got accepted but never committed, and has bit-rotted a bit since.

I've updated the execution domain declarations to match the approach of the packed templates and also added some extra scalar unary tests.

Note that the extra tests demonstrate that scalar unary ops aren't aware of the 'pass through' nature of the remaining vector lanes - this can be fixed in a future patch with explicit patterns like for scalar binary instructions.

Including Sanjay + Elena as they have both worked on the scalar operations recently.

Diff Detail

Repository: rL LLVM

Event Timeline

RKSimon updated this revision to Diff 23989.Apr 18 2015, 5:42 AM

RKSimon retitled this revision from to [X86] Provide execution domains for scalar floating point operations (mk2).

RKSimon updated this object.

RKSimon edited the test plan for this revision. (Show Details)

RKSimon added reviewers: chandlerc, qcolombet, spatel, delena.

RKSimon set the repository for this revision to rL LLVM.

RKSimon added a subscriber: Unknown Object (MLST).

Hi Simon,

LGTM.

Cheers,
-Quentin

This revision is now accepted and ready to land.Apr 20 2015, 2:33 PM

Closed by commit rL235372: [X86][SSE] Provide execution domains for scalar floating point operations (authored by RKSimon). · Explain WhyApr 21 2015, 1:43 AM

This revision was automatically updated to reflect the committed changes.

RKSimon mentioned this in D7402: [x86] Provide execution domains for scalar floating point operations..Apr 21 2015, 1:45 AM

RKSimon mentioned this in D9504: [x86] eliminate unnecessary shuffling/moves with unary scalar math ops (PR21507).May 6 2015, 2:48 AM

Revision Contents

Path

Size

llvm/

trunk/

lib/

Target/

X86/

X86InstrSSE.td

79 lines

test/

CodeGen/

X86/

sink-hoist.ll

4 lines

sse-minmax.ll

24 lines

sse-scalar-fp-arith.ll

70 lines

Diff 24103

llvm/trunk/lib/Target/X86/X86InstrSSE.td

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 235 Lines • ▼ Show 20 Lines

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// SSE 1 & 2 Instructions Classes		// SSE 1 & 2 Instructions Classes
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

/// sse12_fp_scalar - SSE 1 & 2 scalar instructions class		/// sse12_fp_scalar - SSE 1 & 2 scalar instructions class
multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,		multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
RegisterClass RC, X86MemOperand x86memop,		RegisterClass RC, X86MemOperand x86memop,
OpndItins itins,		Domain d, OpndItins itins, bit Is2Addr = 1> {
bit Is2Addr = 1> {
let isCommutable = 1 in {		let isCommutable = 1 in {
def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),		def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!if(Is2Addr,		!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst\|$dst, $src2}"),		!strconcat(OpcodeStr, "\t{$src2, $dst\|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst\|$dst, $src1, $src2}")),		!strconcat(OpcodeStr, "\t{$src2, $src1, $dst\|$dst, $src1, $src2}")),
[(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>,		[(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr, d>,
Sched<[itins.Sched]>;		Sched<[itins.Sched]>;
}		}
def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),		def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,		!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst\|$dst, $src2}"),		!strconcat(OpcodeStr, "\t{$src2, $dst\|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst\|$dst, $src1, $src2}")),		!strconcat(OpcodeStr, "\t{$src2, $src1, $dst\|$dst, $src1, $src2}")),
[(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm>,		[(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm, d>,
Sched<[itins.Sched.Folded, ReadAfterLd]>;		Sched<[itins.Sched.Folded, ReadAfterLd]>;
}		}

/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class		/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class
multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC,		multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
string asm, string SSEVer, string FPSizeStr,		string asm, string SSEVer, string FPSizeStr,
Operand memopr, ComplexPattern mem_cpat,		Operand memopr, ComplexPattern mem_cpat,
OpndItins itins,		Domain d, OpndItins itins, bit Is2Addr = 1> {
bit Is2Addr = 1> {
let isCodeGenOnly = 1 in {		let isCodeGenOnly = 1 in {
def rr_Int : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),		def rr_Int : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!if(Is2Addr,		!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst\|$dst, $src2}"),		!strconcat(asm, "\t{$src2, $dst\|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst\|$dst, $src1, $src2}")),		!strconcat(asm, "\t{$src2, $src1, $dst\|$dst, $src1, $src2}")),
[(set RC:$dst, (!cast<Intrinsic>(		[(set RC:$dst, (!cast<Intrinsic>(
!strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr))		!strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr))
RC:$src1, RC:$src2))], itins.rr>,		RC:$src1, RC:$src2))], itins.rr, d>,
Sched<[itins.Sched]>;		Sched<[itins.Sched]>;
def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),		def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
!if(Is2Addr,		!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst\|$dst, $src2}"),		!strconcat(asm, "\t{$src2, $dst\|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst\|$dst, $src1, $src2}")),		!strconcat(asm, "\t{$src2, $src1, $dst\|$dst, $src1, $src2}")),
[(set RC:$dst, (!cast<Intrinsic>(!strconcat("int_x86_sse",		[(set RC:$dst, (!cast<Intrinsic>(!strconcat("int_x86_sse",
SSEVer, "_", OpcodeStr, FPSizeStr))		SSEVer, "_", OpcodeStr, FPSizeStr))
RC:$src1, mem_cpat:$src2))], itins.rm>,		RC:$src1, mem_cpat:$src2))], itins.rm, d>,
Sched<[itins.Sched.Folded, ReadAfterLd]>;		Sched<[itins.Sched.Folded, ReadAfterLd]>;
}		}
}		}

/// sse12_fp_packed - SSE 1 & 2 packed instructions class		/// sse12_fp_packed - SSE 1 & 2 packed instructions class
multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,		multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
RegisterClass RC, ValueType vt,		RegisterClass RC, ValueType vt,
X86MemOperand x86memop, PatFrag mem_frag,		X86MemOperand x86memop, PatFrag mem_frag,
▲ Show 20 Lines • Show All 2,757 Lines • ▼ Show 20 Lines	defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128,
v2f64, f128mem, memopv2f64, SSEPackedDouble,		v2f64, f128mem, memopv2f64, SSEPackedDouble,
itins.d>, PD;		itins.d>, PD;
}		}
}		}

multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,		multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
SizeItins itins> {		SizeItins itins> {
defm V#NAME#SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),		defm V#NAME#SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
OpNode, FR32, f32mem, itins.s, 0>, XS, VEX_4V, VEX_LIG;		OpNode, FR32, f32mem, SSEPackedSingle, itins.s, 0>,
		XS, VEX_4V, VEX_LIG;
defm V#NAME#SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),		defm V#NAME#SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
OpNode, FR64, f64mem, itins.d, 0>, XD, VEX_4V, VEX_LIG;		OpNode, FR64, f64mem, SSEPackedDouble, itins.d, 0>,
		XD, VEX_4V, VEX_LIG;

let Constraints = "$src1 = $dst" in {		let Constraints = "$src1 = $dst" in {
defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),		defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
OpNode, FR32, f32mem, itins.s>, XS;		OpNode, FR32, f32mem, SSEPackedSingle,
		itins.s>, XS;
defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),		defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
OpNode, FR64, f64mem, itins.d>, XD;		OpNode, FR64, f64mem, SSEPackedDouble,
		itins.d>, XD;
}		}
}		}

multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,		multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
SizeItins itins> {		SizeItins itins> {
defm V#NAME#SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,		defm V#NAME#SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
!strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32,		!strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32,
itins.s, 0>, XS, VEX_4V, VEX_LIG;		SSEPackedSingle, itins.s, 0>, XS, VEX_4V, VEX_LIG;
defm V#NAME#SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,		defm V#NAME#SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
!strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64,		!strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64,
itins.d, 0>, XD, VEX_4V, VEX_LIG;		SSEPackedDouble, itins.d, 0>, XD, VEX_4V, VEX_LIG;

let Constraints = "$src1 = $dst" in {		let Constraints = "$src1 = $dst" in {
defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,		defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
!strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32,		!strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32,
itins.s>, XS;		SSEPackedSingle, itins.s>, XS;
defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,		defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
!strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64,		!strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64,
itins.d>, XD;		SSEPackedDouble, itins.d>, XD;
}		}
}		}

// Binary Arithmetic instructions		// Binary Arithmetic instructions
defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P>,		defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P>,
basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>,		basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>,
basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S>;		basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S>;
defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P>,		defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P>,
▲ Show 20 Lines • Show All 72 Lines • ▼ Show 20 Lines	def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
(!cast<I>(OpcPrefix#SSrr_Int) v4f32:$dst,		(!cast<I>(OpcPrefix#SSrr_Int) v4f32:$dst,
(COPY_TO_REGCLASS FR32:$src, VR128))>;		(COPY_TO_REGCLASS FR32:$src, VR128))>;

// vector math op with insert via movss		// vector math op with insert via movss
def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),		def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
(Op (v4f32 VR128:$dst), (v4f32 VR128:$src)))),		(Op (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
(!cast<I>(OpcPrefix#SSrr_Int) v4f32:$dst, v4f32:$src)>;		(!cast<I>(OpcPrefix#SSrr_Int) v4f32:$dst, v4f32:$src)>;
}		}

// With SSE 4.1, insertps/blendi are preferred to movsd, so match those too.		// With SSE 4.1, insertps/blendi are preferred to movsd, so match those too.
let Predicates = [UseSSE41] in {		let Predicates = [UseSSE41] in {
// extracted scalar math op with insert via insertps		// extracted scalar math op with insert via insertps
def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector		def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
(Op (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),		(Op (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
FR32:$src))), (iPTR 0))),		FR32:$src))), (iPTR 0))),
(!cast<I>(OpcPrefix#SSrr_Int) v4f32:$dst,		(!cast<I>(OpcPrefix#SSrr_Int) v4f32:$dst,
(COPY_TO_REGCLASS FR32:$src, VR128))>;		(COPY_TO_REGCLASS FR32:$src, VR128))>;
Show All 16 Lines	multiclass scalar_math_f32_patterns<SDNode Op, string OpcPrefix> {
// because that one shouldn't occur with AVX codegen?		// because that one shouldn't occur with AVX codegen?
let Predicates = [HasAVX] in {		let Predicates = [HasAVX] in {
// extracted scalar math op with insert via insertps		// extracted scalar math op with insert via insertps
def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector		def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
(Op (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),		(Op (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
FR32:$src))), (iPTR 0))),		FR32:$src))), (iPTR 0))),
(!cast<I>("V"#OpcPrefix#SSrr_Int) v4f32:$dst,		(!cast<I>("V"#OpcPrefix#SSrr_Int) v4f32:$dst,
(COPY_TO_REGCLASS FR32:$src, VR128))>;		(COPY_TO_REGCLASS FR32:$src, VR128))>;

// extracted scalar math op with insert via blend		// extracted scalar math op with insert via blend
def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector		def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
(Op (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),		(Op (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
FR32:$src))), (i8 1))),		FR32:$src))), (i8 1))),
(!cast<I>("V"#OpcPrefix#SSrr_Int) v4f32:$dst,		(!cast<I>("V"#OpcPrefix#SSrr_Int) v4f32:$dst,
(COPY_TO_REGCLASS FR32:$src, VR128))>;		(COPY_TO_REGCLASS FR32:$src, VR128))>;

// vector math op with insert via movss		// vector math op with insert via movss
Show All 31 Lines	multiclass scalar_math_f64_patterns<SDNode Op, string OpcPrefix> {
// With SSE 4.1, blendi is preferred to movsd, so match those too.		// With SSE 4.1, blendi is preferred to movsd, so match those too.
let Predicates = [UseSSE41] in {		let Predicates = [UseSSE41] in {
// extracted scalar math op with insert via blend		// extracted scalar math op with insert via blend
def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector		def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector
(Op (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),		(Op (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
FR64:$src))), (i8 1))),		FR64:$src))), (i8 1))),
(!cast<I>(OpcPrefix#SDrr_Int) v2f64:$dst,		(!cast<I>(OpcPrefix#SDrr_Int) v2f64:$dst,
(COPY_TO_REGCLASS FR64:$src, VR128))>;		(COPY_TO_REGCLASS FR64:$src, VR128))>;

// vector math op with insert via blend		// vector math op with insert via blend
def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst),		def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst),
(Op (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))),		(Op (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))),
(!cast<I>(OpcPrefix#SDrr_Int) v2f64:$dst, v2f64:$src)>;		(!cast<I>(OpcPrefix#SDrr_Int) v2f64:$dst, v2f64:$src)>;
}		}

// Repeat everything for AVX.		// Repeat everything for AVX.
let Predicates = [HasAVX] in {		let Predicates = [HasAVX] in {
▲ Show 20 Lines • Show All 77 Lines • ▼ Show 20 Lines

/// sse_fp_unop_s - SSE1 unops in scalar form		/// sse_fp_unop_s - SSE1 unops in scalar form
/// For the non-AVX defs, we need $src1 to be tied to $dst because		/// For the non-AVX defs, we need $src1 to be tied to $dst because
/// the HW instructions are 2 operand / destructive.		/// the HW instructions are 2 operand / destructive.
multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,		multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
ValueType vt, ValueType ScalarVT,		ValueType vt, ValueType ScalarVT,
X86MemOperand x86memop, Operand vec_memop,		X86MemOperand x86memop, Operand vec_memop,
ComplexPattern mem_cpat, Intrinsic Intr,		ComplexPattern mem_cpat, Intrinsic Intr,
SDNode OpNode, OpndItins itins, Predicate target,		SDNode OpNode, Domain d, OpndItins itins,
string Suffix> {		Predicate target, string Suffix> {
let hasSideEffects = 0 in {		let hasSideEffects = 0 in {
def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1),		def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1),
!strconcat(OpcodeStr, "\t{$src1, $dst\|$dst, $src1}"),		!strconcat(OpcodeStr, "\t{$src1, $dst\|$dst, $src1}"),
[(set RC:$dst, (OpNode RC:$src1))], itins.rr>, Sched<[itins.Sched]>,		[(set RC:$dst, (OpNode RC:$src1))], itins.rr, d>, Sched<[itins.Sched]>,
Requires<[target]>;		Requires<[target]>;
let mayLoad = 1 in		let mayLoad = 1 in
def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1),		def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1),
!strconcat(OpcodeStr, "\t{$src1, $dst\|$dst, $src1}"),		!strconcat(OpcodeStr, "\t{$src1, $dst\|$dst, $src1}"),
[(set RC:$dst, (OpNode (load addr:$src1)))], itins.rm>,		[(set RC:$dst, (OpNode (load addr:$src1)))], itins.rm, d>,
Sched<[itins.Sched.Folded, ReadAfterLd]>,		Sched<[itins.Sched.Folded, ReadAfterLd]>,
Requires<[target, OptForSize]>;		Requires<[target, OptForSize]>;

let isCodeGenOnly = 1, Constraints = "$src1 = $dst" in {		let isCodeGenOnly = 1, Constraints = "$src1 = $dst" in {
def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),		def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst\|$dst, $src2}"),		!strconcat(OpcodeStr, "\t{$src2, $dst\|$dst, $src2}"),
[]>, Sched<[itins.Sched.Folded, ReadAfterLd]>;		[]>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
let mayLoad = 1 in		let mayLoad = 1 in
def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, vec_memop:$src2),		def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, vec_memop:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst\|$dst, $src2}"),		!strconcat(OpcodeStr, "\t{$src2, $dst\|$dst, $src2}"),
[]>, Sched<[itins.Sched.Folded, ReadAfterLd]>;		[]>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
}		}
}		}

let Predicates = [target] in {		let Predicates = [target] in {
def : Pat<(vt (OpNode mem_cpat:$src)),		def : Pat<(vt (OpNode mem_cpat:$src)),
(vt (COPY_TO_REGCLASS (vt (!cast<Instruction>(NAME#Suffix##m_Int)		(vt (COPY_TO_REGCLASS (vt (!cast<Instruction>(NAME#Suffix##m_Int)
(vt (IMPLICIT_DEF)), mem_cpat:$src)), RC))>;		(vt (IMPLICIT_DEF)), mem_cpat:$src)), RC))>;
// These are unary operations, but they are modeled as having 2 source operands		// These are unary operations, but they are modeled as having 2 source operands
// because the high elements of the destination are unchanged in SSE.		// because the high elements of the destination are unchanged in SSE.
def : Pat<(Intr VR128:$src),		def : Pat<(Intr VR128:$src),
(!cast<Instruction>(NAME#Suffix##r_Int) VR128:$src, VR128:$src)>;		(!cast<Instruction>(NAME#Suffix##r_Int) VR128:$src, VR128:$src)>;
def : Pat<(Intr (load addr:$src)),		def : Pat<(Intr (load addr:$src)),
(vt (COPY_TO_REGCLASS(!cast<Instruction>(NAME#Suffix##m)		(vt (COPY_TO_REGCLASS(!cast<Instruction>(NAME#Suffix##m)
addr:$src), VR128))>;		addr:$src), VR128))>;
def : Pat<(Intr mem_cpat:$src),		def : Pat<(Intr mem_cpat:$src),
(!cast<Instruction>(NAME#Suffix##m_Int)		(!cast<Instruction>(NAME#Suffix##m_Int)
(vt (IMPLICIT_DEF)), mem_cpat:$src)>;		(vt (IMPLICIT_DEF)), mem_cpat:$src)>;
}		}
}		}

multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,		multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
ValueType vt, ValueType ScalarVT,		ValueType vt, ValueType ScalarVT,
X86MemOperand x86memop, Operand vec_memop,		X86MemOperand x86memop, Operand vec_memop,
ComplexPattern mem_cpat,		ComplexPattern mem_cpat,
Intrinsic Intr, SDNode OpNode, OpndItins itins,		Intrinsic Intr, SDNode OpNode, Domain d,
Predicate target, string Suffix> {		OpndItins itins, Predicate target, string Suffix> {
let hasSideEffects = 0 in {		let hasSideEffects = 0 in {
def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),		def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst\|$dst, $src1, $src2}"),		!strconcat(OpcodeStr, "\t{$src2, $src1, $dst\|$dst, $src1, $src2}"),
[], itins.rr>, Sched<[itins.Sched]>;		[], itins.rr, d>, Sched<[itins.Sched]>;
let mayLoad = 1 in		let mayLoad = 1 in
def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),		def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst\|$dst, $src1, $src2}"),		!strconcat(OpcodeStr, "\t{$src2, $src1, $dst\|$dst, $src1, $src2}"),
[], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;		[], itins.rm, d>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
let isCodeGenOnly = 1 in {		let isCodeGenOnly = 1 in {
// todo: uncomment when all r_Int forms will be added to X86InstrInfo.cpp		// todo: uncomment when all r_Int forms will be added to X86InstrInfo.cpp
//def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst),		//def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst),
// (ins VR128:$src1, VR128:$src2),		// (ins VR128:$src1, VR128:$src2),
// !strconcat(OpcodeStr, "\t{$src2, $src1, $dst\|$dst, $src1, $src2}"),		// !strconcat(OpcodeStr, "\t{$src2, $src1, $dst\|$dst, $src1, $src2}"),
// []>, Sched<[itins.Sched.Folded]>;		// []>, Sched<[itins.Sched.Folded]>;
let mayLoad = 1 in		let mayLoad = 1 in
def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst),		def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, vec_memop:$src2),		(ins VR128:$src1, vec_memop:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst\|$dst, $src1, $src2}"),		!strconcat(OpcodeStr, "\t{$src2, $src1, $dst\|$dst, $src1, $src2}"),
[]>, Sched<[itins.Sched.Folded, ReadAfterLd]>;		[]>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
}		}
}		}

let Predicates = [target] in {		let Predicates = [target] in {
def : Pat<(OpNode RC:$src), (!cast<Instruction>("V"#NAME#Suffix##r)		def : Pat<(OpNode RC:$src), (!cast<Instruction>("V"#NAME#Suffix##r)
(ScalarVT (IMPLICIT_DEF)), RC:$src)>;		(ScalarVT (IMPLICIT_DEF)), RC:$src)>;

def : Pat<(vt (OpNode mem_cpat:$src)),		def : Pat<(vt (OpNode mem_cpat:$src)),
(!cast<Instruction>("V"#NAME#Suffix##m_Int) (vt (IMPLICIT_DEF)),		(!cast<Instruction>("V"#NAME#Suffix##m_Int) (vt (IMPLICIT_DEF)),
mem_cpat:$src)>;		mem_cpat:$src)>;

// todo: use r_Int form when it will be ready		// todo: use r_Int form when it will be ready
//def : Pat<(Intr VR128:$src), (!cast<Instruction>("V"#NAME#Suffix##r_Int)		//def : Pat<(Intr VR128:$src), (!cast<Instruction>("V"#NAME#Suffix##r_Int)
// (VT (IMPLICIT_DEF)), VR128:$src)>;		// (VT (IMPLICIT_DEF)), VR128:$src)>;
def : Pat<(Intr VR128:$src),		def : Pat<(Intr VR128:$src),
(vt (COPY_TO_REGCLASS(		(vt (COPY_TO_REGCLASS(
!cast<Instruction>("V"#NAME#Suffix##r) (ScalarVT (IMPLICIT_DEF)),		!cast<Instruction>("V"#NAME#Suffix##r) (ScalarVT (IMPLICIT_DEF)),
(ScalarVT (COPY_TO_REGCLASS VR128:$src, RC))), VR128))>;		(ScalarVT (COPY_TO_REGCLASS VR128:$src, RC))), VR128))>;
def : Pat<(Intr mem_cpat:$src),		def : Pat<(Intr mem_cpat:$src),
(!cast<Instruction>("V"#NAME#Suffix##m_Int)		(!cast<Instruction>("V"#NAME#Suffix##m_Int)
(vt (IMPLICIT_DEF)), mem_cpat:$src)>;		(vt (IMPLICIT_DEF)), mem_cpat:$src)>;
}		}
let Predicates = [target, OptForSize] in		let Predicates = [target, OptForSize] in
def : Pat<(ScalarVT (OpNode (load addr:$src))),		def : Pat<(ScalarVT (OpNode (load addr:$src))),
(!cast<Instruction>("V"#NAME#Suffix##m) (ScalarVT (IMPLICIT_DEF)),		(!cast<Instruction>("V"#NAME#Suffix##m) (ScalarVT (IMPLICIT_DEF)),
addr:$src)>;		addr:$src)>;
}		}

/// sse1_fp_unop_p - SSE1 unops in packed form.		/// sse1_fp_unop_p - SSE1 unops in packed form.
multiclass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,		multiclass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpndItins itins> {		OpndItins itins> {
let Predicates = [HasAVX] in {		let Predicates = [HasAVX] in {
▲ Show 20 Lines • Show All 105 Lines • ▼ Show 20 Lines	def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Sched<[itins.Sched.Folded]>;		Sched<[itins.Sched.Folded]>;
}		}

multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,		multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpndItins itins> {		OpndItins itins> {
defm SS : sse_fp_unop_s<opc, OpcodeStr##ss, FR32, v4f32, f32, f32mem,		defm SS : sse_fp_unop_s<opc, OpcodeStr##ss, FR32, v4f32, f32, f32mem,
ssmem, sse_load_f32,		ssmem, sse_load_f32,
!cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode,		!cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode,
itins, UseSSE1, "SS">, XS;		SSEPackedSingle, itins, UseSSE1, "SS">, XS;
defm V#NAME#SS : avx_fp_unop_s<opc, "v"#OpcodeStr##ss, FR32, v4f32, f32,		defm V#NAME#SS : avx_fp_unop_s<opc, "v"#OpcodeStr##ss, FR32, v4f32, f32,
f32mem, ssmem, sse_load_f32,		f32mem, ssmem, sse_load_f32,
!cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode,		!cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode,
itins, UseAVX, "SS">, XS, VEX_4V, VEX_LIG;		SSEPackedSingle, itins, UseAVX, "SS">, XS, VEX_4V, VEX_LIG;
}		}

multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,		multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpndItins itins> {		OpndItins itins> {
defm SD : sse_fp_unop_s<opc, OpcodeStr##sd, FR64, v2f64, f64, f64mem,		defm SD : sse_fp_unop_s<opc, OpcodeStr##sd, FR64, v2f64, f64, f64mem,
sdmem, sse_load_f64,		sdmem, sse_load_f64,
!cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd),		!cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd),
OpNode, itins, UseSSE2, "SD">, XD;		OpNode, SSEPackedDouble, itins, UseSSE2, "SD">, XD;
defm V#NAME#SD : avx_fp_unop_s<opc, "v"#OpcodeStr##sd, FR64, v2f64, f64,		defm V#NAME#SD : avx_fp_unop_s<opc, "v"#OpcodeStr##sd, FR64, v2f64, f64,
f64mem, sdmem, sse_load_f64,		f64mem, sdmem, sse_load_f64,
!cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd),		!cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd),
OpNode, itins, UseAVX, "SD">, XD, VEX_4V, VEX_LIG;		OpNode, SSEPackedDouble, itins, UseAVX, "SD">,
		XD, VEX_4V, VEX_LIG;
}		}

// Square root.		// Square root.
defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSS>,		defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSS>,
sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPS>,		sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPS>,
sse2_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSD>,		sse2_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSD>,
sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPD>;		sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPD>;

▲ Show 20 Lines • Show All 5,305 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/X86/sink-hoist.ll

	; RUN: llc < %s -verify-machineinstrs -march=x86-64 -asm-verbose=false -mtriple=x86_64-unknown-linux-gnu -mcpu=nehalem -post-RA-scheduler=true -schedmodel=false \| FileCheck %s			; RUN: llc < %s -verify-machineinstrs -march=x86-64 -asm-verbose=false -mtriple=x86_64-unknown-linux-gnu -mcpu=nehalem -post-RA-scheduler=true -schedmodel=false \| FileCheck %s

	; Currently, floating-point selects are lowered to CFG triangles.			; Currently, floating-point selects are lowered to CFG triangles.
	; This means that one side of the select is always unconditionally			; This means that one side of the select is always unconditionally
	; evaluated, however with MachineSink we can sink the other side so			; evaluated, however with MachineSink we can sink the other side so
	; that it's conditionally evaluated.			; that it's conditionally evaluated.

	; CHECK-LABEL: foo:			; CHECK-LABEL: foo:
	; CHECK-NEXT: testb $1, %dil			; CHECK-NEXT: testb $1, %dil
	; CHECK-NEXT: jne			; CHECK-NEXT: jne
	; CHECK-NEXT: divsd			; CHECK-NEXT: divsd
	; CHECK-NEXT: movaps			; CHECK-NEXT: movapd
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	; CHECK: divsd			; CHECK: divsd

	define double @foo(double %x, double %y, i1 %c) nounwind {			define double @foo(double %x, double %y, i1 %c) nounwind {
	%a = fdiv double %x, 3.2			%a = fdiv double %x, 3.2
	%b = fdiv double %y, 3.3			%b = fdiv double %y, 3.3
	%z = select i1 %c, double %a, double %b			%z = select i1 %c, double %a, double %b
	ret double %z			ret double %z
	}			}

	; Make sure the critical edge is broken so the divsd is sunken below			; Make sure the critical edge is broken so the divsd is sunken below
	; the conditional branch.			; the conditional branch.
	; rdar://8454886			; rdar://8454886

	; CHECK-LABEL: split:			; CHECK-LABEL: split:
	; CHECK-NEXT: testb $1, %dil			; CHECK-NEXT: testb $1, %dil
	; CHECK-NEXT: je			; CHECK-NEXT: je
	; CHECK: divsd			; CHECK: divsd
	; CHECK: movaps			; CHECK: movapd
	; CHECK: ret			; CHECK: ret
	define double @split(double %x, double %y, i1 %c) nounwind {			define double @split(double %x, double %y, i1 %c) nounwind {
	%a = fdiv double %x, 3.2			%a = fdiv double %x, 3.2
	%z = select i1 %c, double %a, double %y			%z = select i1 %c, double %a, double %y
	ret double %z			ret double %z
	}			}


	▲ Show 20 Lines • Show All 135 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/X86/sse-minmax.ll

Show First 20 Lines • Show All 799 Lines • ▼ Show 20 Lines	define double @ule_inverse_y(double %x) nounwind {
%d = select i1 %c, double -0.000000e+00, double %x		%d = select i1 %c, double -0.000000e+00, double %x
ret double %d		ret double %d
}		}
; Test a few more misc. cases.		; Test a few more misc. cases.

; CHECK-LABEL: clampTo3k_a:		; CHECK-LABEL: clampTo3k_a:
; CHECK-NEXT: movsd {{[^,]*}}, %xmm1		; CHECK-NEXT: movsd {{[^,]*}}, %xmm1
; CHECK-NEXT: minsd %xmm0, %xmm1		; CHECK-NEXT: minsd %xmm0, %xmm1
; CHECK-NEXT: movaps %xmm1, %xmm0		; CHECK-NEXT: movapd %xmm1, %xmm0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
; UNSAFE-LABEL: clampTo3k_a:		; UNSAFE-LABEL: clampTo3k_a:
; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0		; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0
; UNSAFE-NEXT: ret		; UNSAFE-NEXT: ret
; FINITE-LABEL: clampTo3k_a:		; FINITE-LABEL: clampTo3k_a:
; FINITE-NEXT: movsd {{[^,]*}}, %xmm1		; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
; FINITE-NEXT: minsd %xmm0, %xmm1		; FINITE-NEXT: minsd %xmm0, %xmm1
; FINITE-NEXT: movaps %xmm1, %xmm0		; FINITE-NEXT: movapd %xmm1, %xmm0
; FINITE-NEXT: ret		; FINITE-NEXT: ret
define double @clampTo3k_a(double %x) nounwind readnone {		define double @clampTo3k_a(double %x) nounwind readnone {
entry:		entry:
%0 = fcmp ogt double %x, 3.000000e+03 ; <i1> [#uses=1]		%0 = fcmp ogt double %x, 3.000000e+03 ; <i1> [#uses=1]
%x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]		%x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
ret double %x_addr.0		ret double %x_addr.0
}		}

; CHECK-LABEL: clampTo3k_b:		; CHECK-LABEL: clampTo3k_b:
; CHECK-NEXT: minsd {{[^,]*}}, %xmm0		; CHECK-NEXT: minsd {{[^,]*}}, %xmm0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
; UNSAFE-LABEL: clampTo3k_b:		; UNSAFE-LABEL: clampTo3k_b:
; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0		; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0
; UNSAFE-NEXT: ret		; UNSAFE-NEXT: ret
; FINITE-LABEL: clampTo3k_b:		; FINITE-LABEL: clampTo3k_b:
; FINITE-NEXT: movsd {{[^,]*}}, %xmm1		; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
; FINITE-NEXT: minsd %xmm0, %xmm1		; FINITE-NEXT: minsd %xmm0, %xmm1
; FINITE-NEXT: movaps %xmm1, %xmm0		; FINITE-NEXT: movapd %xmm1, %xmm0
; FINITE-NEXT: ret		; FINITE-NEXT: ret
define double @clampTo3k_b(double %x) nounwind readnone {		define double @clampTo3k_b(double %x) nounwind readnone {
entry:		entry:
%0 = fcmp uge double %x, 3.000000e+03 ; <i1> [#uses=1]		%0 = fcmp uge double %x, 3.000000e+03 ; <i1> [#uses=1]
%x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]		%x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
ret double %x_addr.0		ret double %x_addr.0
}		}

; CHECK-LABEL: clampTo3k_c:		; CHECK-LABEL: clampTo3k_c:
; CHECK-NEXT: movsd {{[^,]*}}, %xmm1		; CHECK-NEXT: movsd {{[^,]*}}, %xmm1
; CHECK-NEXT: maxsd %xmm0, %xmm1		; CHECK-NEXT: maxsd %xmm0, %xmm1
; CHECK-NEXT: movaps %xmm1, %xmm0		; CHECK-NEXT: movapd %xmm1, %xmm0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
; UNSAFE-LABEL: clampTo3k_c:		; UNSAFE-LABEL: clampTo3k_c:
; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0		; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0
; UNSAFE-NEXT: ret		; UNSAFE-NEXT: ret
; FINITE-LABEL: clampTo3k_c:		; FINITE-LABEL: clampTo3k_c:
; FINITE-NEXT: movsd {{[^,]*}}, %xmm1		; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
; FINITE-NEXT: maxsd %xmm0, %xmm1		; FINITE-NEXT: maxsd %xmm0, %xmm1
; FINITE-NEXT: movaps %xmm1, %xmm0		; FINITE-NEXT: movapd %xmm1, %xmm0
; FINITE-NEXT: ret		; FINITE-NEXT: ret
define double @clampTo3k_c(double %x) nounwind readnone {		define double @clampTo3k_c(double %x) nounwind readnone {
entry:		entry:
%0 = fcmp olt double %x, 3.000000e+03 ; <i1> [#uses=1]		%0 = fcmp olt double %x, 3.000000e+03 ; <i1> [#uses=1]
%x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]		%x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
ret double %x_addr.0		ret double %x_addr.0
}		}

; CHECK-LABEL: clampTo3k_d:		; CHECK-LABEL: clampTo3k_d:
; CHECK-NEXT: maxsd {{[^,]*}}, %xmm0		; CHECK-NEXT: maxsd {{[^,]*}}, %xmm0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
; UNSAFE-LABEL: clampTo3k_d:		; UNSAFE-LABEL: clampTo3k_d:
; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0		; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0
; UNSAFE-NEXT: ret		; UNSAFE-NEXT: ret
; FINITE-LABEL: clampTo3k_d:		; FINITE-LABEL: clampTo3k_d:
; FINITE-NEXT: movsd {{[^,]*}}, %xmm1		; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
; FINITE-NEXT: maxsd %xmm0, %xmm1		; FINITE-NEXT: maxsd %xmm0, %xmm1
; FINITE-NEXT: movaps %xmm1, %xmm0		; FINITE-NEXT: movapd %xmm1, %xmm0
; FINITE-NEXT: ret		; FINITE-NEXT: ret
define double @clampTo3k_d(double %x) nounwind readnone {		define double @clampTo3k_d(double %x) nounwind readnone {
entry:		entry:
%0 = fcmp ule double %x, 3.000000e+03 ; <i1> [#uses=1]		%0 = fcmp ule double %x, 3.000000e+03 ; <i1> [#uses=1]
%x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]		%x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
ret double %x_addr.0		ret double %x_addr.0
}		}

; CHECK-LABEL: clampTo3k_e:		; CHECK-LABEL: clampTo3k_e:
; CHECK-NEXT: movsd {{[^,]*}}, %xmm1		; CHECK-NEXT: movsd {{[^,]*}}, %xmm1
; CHECK-NEXT: maxsd %xmm0, %xmm1		; CHECK-NEXT: maxsd %xmm0, %xmm1
; CHECK-NEXT: movaps %xmm1, %xmm0		; CHECK-NEXT: movapd %xmm1, %xmm0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
; UNSAFE-LABEL: clampTo3k_e:		; UNSAFE-LABEL: clampTo3k_e:
; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0		; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0
; UNSAFE-NEXT: ret		; UNSAFE-NEXT: ret
; FINITE-LABEL: clampTo3k_e:		; FINITE-LABEL: clampTo3k_e:
; FINITE-NEXT: movsd {{[^,]*}}, %xmm1		; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
; FINITE-NEXT: maxsd %xmm0, %xmm1		; FINITE-NEXT: maxsd %xmm0, %xmm1
; FINITE-NEXT: movaps %xmm1, %xmm0		; FINITE-NEXT: movapd %xmm1, %xmm0
; FINITE-NEXT: ret		; FINITE-NEXT: ret
define double @clampTo3k_e(double %x) nounwind readnone {		define double @clampTo3k_e(double %x) nounwind readnone {
entry:		entry:
%0 = fcmp olt double %x, 3.000000e+03 ; <i1> [#uses=1]		%0 = fcmp olt double %x, 3.000000e+03 ; <i1> [#uses=1]
%x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]		%x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
ret double %x_addr.0		ret double %x_addr.0
}		}

; CHECK-LABEL: clampTo3k_f:		; CHECK-LABEL: clampTo3k_f:
; CHECK-NEXT: maxsd {{[^,]*}}, %xmm0		; CHECK-NEXT: maxsd {{[^,]*}}, %xmm0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
; UNSAFE-LABEL: clampTo3k_f:		; UNSAFE-LABEL: clampTo3k_f:
; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0		; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0
; UNSAFE-NEXT: ret		; UNSAFE-NEXT: ret
; FINITE-LABEL: clampTo3k_f:		; FINITE-LABEL: clampTo3k_f:
; FINITE-NEXT: movsd {{[^,]*}}, %xmm1		; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
; FINITE-NEXT: maxsd %xmm0, %xmm1		; FINITE-NEXT: maxsd %xmm0, %xmm1
; FINITE-NEXT: movaps %xmm1, %xmm0		; FINITE-NEXT: movapd %xmm1, %xmm0
; FINITE-NEXT: ret		; FINITE-NEXT: ret
define double @clampTo3k_f(double %x) nounwind readnone {		define double @clampTo3k_f(double %x) nounwind readnone {
entry:		entry:
%0 = fcmp ule double %x, 3.000000e+03 ; <i1> [#uses=1]		%0 = fcmp ule double %x, 3.000000e+03 ; <i1> [#uses=1]
%x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]		%x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
ret double %x_addr.0		ret double %x_addr.0
}		}

; CHECK-LABEL: clampTo3k_g:		; CHECK-LABEL: clampTo3k_g:
; CHECK-NEXT: movsd {{[^,]*}}, %xmm1		; CHECK-NEXT: movsd {{[^,]*}}, %xmm1
; CHECK-NEXT: minsd %xmm0, %xmm1		; CHECK-NEXT: minsd %xmm0, %xmm1
; CHECK-NEXT: movaps %xmm1, %xmm0		; CHECK-NEXT: movapd %xmm1, %xmm0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
; UNSAFE-LABEL: clampTo3k_g:		; UNSAFE-LABEL: clampTo3k_g:
; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0		; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0
; UNSAFE-NEXT: ret		; UNSAFE-NEXT: ret
; FINITE-LABEL: clampTo3k_g:		; FINITE-LABEL: clampTo3k_g:
; FINITE-NEXT: movsd {{[^,]*}}, %xmm1		; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
; FINITE-NEXT: minsd %xmm0, %xmm1		; FINITE-NEXT: minsd %xmm0, %xmm1
; FINITE-NEXT: movaps %xmm1, %xmm0		; FINITE-NEXT: movapd %xmm1, %xmm0
; FINITE-NEXT: ret		; FINITE-NEXT: ret
define double @clampTo3k_g(double %x) nounwind readnone {		define double @clampTo3k_g(double %x) nounwind readnone {
entry:		entry:
%0 = fcmp ogt double %x, 3.000000e+03 ; <i1> [#uses=1]		%0 = fcmp ogt double %x, 3.000000e+03 ; <i1> [#uses=1]
%x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]		%x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
ret double %x_addr.0		ret double %x_addr.0
}		}

; CHECK-LABEL: clampTo3k_h:		; CHECK-LABEL: clampTo3k_h:
; CHECK-NEXT: minsd {{[^,]*}}, %xmm0		; CHECK-NEXT: minsd {{[^,]*}}, %xmm0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
; UNSAFE-LABEL: clampTo3k_h:		; UNSAFE-LABEL: clampTo3k_h:
; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0		; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0
; UNSAFE-NEXT: ret		; UNSAFE-NEXT: ret
; FINITE-LABEL: clampTo3k_h:		; FINITE-LABEL: clampTo3k_h:
; FINITE-NEXT: movsd {{[^,]*}}, %xmm1		; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
; FINITE-NEXT: minsd %xmm0, %xmm1		; FINITE-NEXT: minsd %xmm0, %xmm1
; FINITE-NEXT: movaps %xmm1, %xmm0		; FINITE-NEXT: movapd %xmm1, %xmm0
; FINITE-NEXT: ret		; FINITE-NEXT: ret
define double @clampTo3k_h(double %x) nounwind readnone {		define double @clampTo3k_h(double %x) nounwind readnone {
entry:		entry:
%0 = fcmp uge double %x, 3.000000e+03 ; <i1> [#uses=1]		%0 = fcmp uge double %x, 3.000000e+03 ; <i1> [#uses=1]
%x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]		%x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
ret double %x_addr.0		ret double %x_addr.0
}		}

▲ Show 20 Lines • Show All 71 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/X86/sse-scalar-fp-arith.ll

Show First 20 Lines • Show All 70 Lines • ▼ Show 20 Lines
; AVX-NEXT: retq		; AVX-NEXT: retq
%1 = extractelement <4 x float> %b, i32 0		%1 = extractelement <4 x float> %b, i32 0
%2 = extractelement <4 x float> %a, i32 0		%2 = extractelement <4 x float> %a, i32 0
%div = fdiv float %2, %1		%div = fdiv float %2, %1
%3 = insertelement <4 x float> %a, float %div, i32 0		%3 = insertelement <4 x float> %a, float %div, i32 0
ret <4 x float> %3		ret <4 x float> %3
}		}

		define <4 x float> @test_sqrt_ss(<4 x float> %a) {
		; SSE2-LABEL: test_sqrt_ss:
		; SSE2: # BB#0:
		; SSE2-NEXT: sqrtss %xmm0, %xmm1
		; SSE2-NEXT: movss %xmm1, %xmm0
		; SSE2-NEXT: retq
		;
		; SSE41-LABEL: test_sqrt_ss:
		; SSE41: # BB#0:
		; SSE41-NEXT: sqrtss %xmm0, %xmm1
		; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
		; SSE41-NEXT: retq
		;
		; AVX-LABEL: test_sqrt_ss:
		; AVX: # BB#0:
		; AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm1
		; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
		; AVX-NEXT: retq
		%1 = extractelement <4 x float> %a, i32 0
		%2 = call float @llvm.sqrt.f32(float %1)
		%3 = insertelement <4 x float> %a, float %2, i32 0
		ret <4 x float> %3
		}
		declare float @llvm.sqrt.f32(float)

define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) {		define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: test_add_sd:		; SSE-LABEL: test_add_sd:
; SSE: # BB#0:		; SSE: # BB#0:
; SSE-NEXT: addsd %xmm1, %xmm0		; SSE-NEXT: addsd %xmm1, %xmm0
; SSE-NEXT: retq		; SSE-NEXT: retq
;		;
; AVX-LABEL: test_add_sd:		; AVX-LABEL: test_add_sd:
; AVX: # BB#0:		; AVX: # BB#0:
▲ Show 20 Lines • Show All 52 Lines • ▼ Show 20 Lines
; AVX-NEXT: retq		; AVX-NEXT: retq
%1 = extractelement <2 x double> %b, i32 0		%1 = extractelement <2 x double> %b, i32 0
%2 = extractelement <2 x double> %a, i32 0		%2 = extractelement <2 x double> %a, i32 0
%div = fdiv double %2, %1		%div = fdiv double %2, %1
%3 = insertelement <2 x double> %a, double %div, i32 0		%3 = insertelement <2 x double> %a, double %div, i32 0
ret <2 x double> %3		ret <2 x double> %3
}		}

		define <2 x double> @test_sqrt_sd(<2 x double> %a) {
		; SSE-LABEL: test_sqrt_sd:
		; SSE: # BB#0:
		; SSE-NEXT: sqrtsd %xmm0, %xmm1
		; SSE-NEXT: movsd %xmm1, %xmm0
		; SSE-NEXT: retq
		;
		; AVX-LABEL: test_sqrt_sd:
		; AVX: # BB#0:
		; AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm1
		; AVX-NEXT: vmovsd %xmm1, %xmm0, %xmm0
		; AVX-NEXT: retq
		%1 = extractelement <2 x double> %a, i32 0
		%2 = call double @llvm.sqrt.f64(double %1)
		%3 = insertelement <2 x double> %a, double %2, i32 0
		ret <2 x double> %3
		}
		declare double @llvm.sqrt.f64(double)

define <4 x float> @test2_add_ss(<4 x float> %a, <4 x float> %b) {		define <4 x float> @test2_add_ss(<4 x float> %a, <4 x float> %b) {
; SSE-LABEL: test2_add_ss:		; SSE-LABEL: test2_add_ss:
; SSE: # BB#0:		; SSE: # BB#0:
; SSE-NEXT: addss %xmm0, %xmm1		; SSE-NEXT: addss %xmm0, %xmm1
; SSE-NEXT: movaps %xmm1, %xmm0		; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq		; SSE-NEXT: retq
;		;
; AVX-LABEL: test2_add_ss:		; AVX-LABEL: test2_add_ss:
▲ Show 20 Lines • Show All 60 Lines • ▼ Show 20 Lines	; AVX-NEXT: retq
%3 = insertelement <4 x float> %b, float %div, i32 0		%3 = insertelement <4 x float> %b, float %div, i32 0
ret <4 x float> %3		ret <4 x float> %3
}		}

define <2 x double> @test2_add_sd(<2 x double> %a, <2 x double> %b) {		define <2 x double> @test2_add_sd(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: test2_add_sd:		; SSE-LABEL: test2_add_sd:
; SSE: # BB#0:		; SSE: # BB#0:
; SSE-NEXT: addsd %xmm0, %xmm1		; SSE-NEXT: addsd %xmm0, %xmm1
; SSE-NEXT: movaps %xmm1, %xmm0		; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq		; SSE-NEXT: retq
;		;
; AVX-LABEL: test2_add_sd:		; AVX-LABEL: test2_add_sd:
; AVX: # BB#0:		; AVX: # BB#0:
; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0		; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq		; AVX-NEXT: retq
%1 = extractelement <2 x double> %a, i32 0		%1 = extractelement <2 x double> %a, i32 0
%2 = extractelement <2 x double> %b, i32 0		%2 = extractelement <2 x double> %b, i32 0
%add = fadd double %1, %2		%add = fadd double %1, %2
%3 = insertelement <2 x double> %b, double %add, i32 0		%3 = insertelement <2 x double> %b, double %add, i32 0
ret <2 x double> %3		ret <2 x double> %3
}		}

define <2 x double> @test2_sub_sd(<2 x double> %a, <2 x double> %b) {		define <2 x double> @test2_sub_sd(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: test2_sub_sd:		; SSE-LABEL: test2_sub_sd:
; SSE: # BB#0:		; SSE: # BB#0:
; SSE-NEXT: subsd %xmm0, %xmm1		; SSE-NEXT: subsd %xmm0, %xmm1
; SSE-NEXT: movaps %xmm1, %xmm0		; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq		; SSE-NEXT: retq
;		;
; AVX-LABEL: test2_sub_sd:		; AVX-LABEL: test2_sub_sd:
; AVX: # BB#0:		; AVX: # BB#0:
; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0		; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq		; AVX-NEXT: retq
%1 = extractelement <2 x double> %a, i32 0		%1 = extractelement <2 x double> %a, i32 0
%2 = extractelement <2 x double> %b, i32 0		%2 = extractelement <2 x double> %b, i32 0
%sub = fsub double %2, %1		%sub = fsub double %2, %1
%3 = insertelement <2 x double> %b, double %sub, i32 0		%3 = insertelement <2 x double> %b, double %sub, i32 0
ret <2 x double> %3		ret <2 x double> %3
}		}

define <2 x double> @test2_mul_sd(<2 x double> %a, <2 x double> %b) {		define <2 x double> @test2_mul_sd(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: test2_mul_sd:		; SSE-LABEL: test2_mul_sd:
; SSE: # BB#0:		; SSE: # BB#0:
; SSE-NEXT: mulsd %xmm0, %xmm1		; SSE-NEXT: mulsd %xmm0, %xmm1
; SSE-NEXT: movaps %xmm1, %xmm0		; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq		; SSE-NEXT: retq
;		;
; AVX-LABEL: test2_mul_sd:		; AVX-LABEL: test2_mul_sd:
; AVX: # BB#0:		; AVX: # BB#0:
; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0		; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq		; AVX-NEXT: retq
%1 = extractelement <2 x double> %a, i32 0		%1 = extractelement <2 x double> %a, i32 0
%2 = extractelement <2 x double> %b, i32 0		%2 = extractelement <2 x double> %b, i32 0
%mul = fmul double %1, %2		%mul = fmul double %1, %2
%3 = insertelement <2 x double> %b, double %mul, i32 0		%3 = insertelement <2 x double> %b, double %mul, i32 0
ret <2 x double> %3		ret <2 x double> %3
}		}

define <2 x double> @test2_div_sd(<2 x double> %a, <2 x double> %b) {		define <2 x double> @test2_div_sd(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: test2_div_sd:		; SSE-LABEL: test2_div_sd:
; SSE: # BB#0:		; SSE: # BB#0:
; SSE-NEXT: divsd %xmm0, %xmm1		; SSE-NEXT: divsd %xmm0, %xmm1
; SSE-NEXT: movaps %xmm1, %xmm0		; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq		; SSE-NEXT: retq
;		;
; AVX-LABEL: test2_div_sd:		; AVX-LABEL: test2_div_sd:
; AVX: # BB#0:		; AVX: # BB#0:
; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0		; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq		; AVX-NEXT: retq
%1 = extractelement <2 x double> %a, i32 0		%1 = extractelement <2 x double> %a, i32 0
%2 = extractelement <2 x double> %b, i32 0		%2 = extractelement <2 x double> %b, i32 0
▲ Show 20 Lines • Show All 80 Lines • ▼ Show 20 Lines	; AVX-NEXT: retq
%2 = extractelement <4 x float> %a, i32 0		%2 = extractelement <4 x float> %a, i32 0
%div = fdiv float %2, %1		%div = fdiv float %2, %1
%div2 = fdiv float %2, %div		%div2 = fdiv float %2, %div
%3 = insertelement <4 x float> %a, float %div2, i32 0		%3 = insertelement <4 x float> %a, float %div2, i32 0
ret <4 x float> %3		ret <4 x float> %3
}		}

; With SSE4.1 or greater, the shuffles in the following tests may		; With SSE4.1 or greater, the shuffles in the following tests may
; be lowered to X86Blendi nodes.		; be lowered to X86Blendi nodes.

define <4 x float> @blend_add_ss(<4 x float> %a, float %b) {		define <4 x float> @blend_add_ss(<4 x float> %a, float %b) {
; SSE-LABEL: blend_add_ss:		; SSE-LABEL: blend_add_ss:
; SSE: # BB#0:		; SSE: # BB#0:
; SSE-NEXT: addss %xmm1, %xmm0		; SSE-NEXT: addss %xmm1, %xmm0
; SSE-NEXT: retq		; SSE-NEXT: retq
;		;
; AVX-LABEL: blend_add_ss:		; AVX-LABEL: blend_add_ss:
▲ Show 20 Lines • Show All 320 Lines • ▼ Show 20 Lines	; AVX-NEXT: retq
%2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>		%2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
ret <4 x float> %2		ret <4 x float> %2
}		}

define <2 x double> @insert_test2_add_sd(<2 x double> %a, <2 x double> %b) {		define <2 x double> @insert_test2_add_sd(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: insert_test2_add_sd:		; SSE-LABEL: insert_test2_add_sd:
; SSE: # BB#0:		; SSE: # BB#0:
; SSE-NEXT: addsd %xmm0, %xmm1		; SSE-NEXT: addsd %xmm0, %xmm1
; SSE-NEXT: movaps %xmm1, %xmm0		; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq		; SSE-NEXT: retq
;		;
; AVX-LABEL: insert_test2_add_sd:		; AVX-LABEL: insert_test2_add_sd:
; AVX: # BB#0:		; AVX: # BB#0:
; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0		; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq		; AVX-NEXT: retq
%1 = fadd <2 x double> %b, %a		%1 = fadd <2 x double> %b, %a
%2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>		%2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
ret <2 x double> %2		ret <2 x double> %2
}		}

define <2 x double> @insert_test2_sub_sd(<2 x double> %a, <2 x double> %b) {		define <2 x double> @insert_test2_sub_sd(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: insert_test2_sub_sd:		; SSE-LABEL: insert_test2_sub_sd:
; SSE: # BB#0:		; SSE: # BB#0:
; SSE-NEXT: subsd %xmm0, %xmm1		; SSE-NEXT: subsd %xmm0, %xmm1
; SSE-NEXT: movaps %xmm1, %xmm0		; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq		; SSE-NEXT: retq
;		;
; AVX-LABEL: insert_test2_sub_sd:		; AVX-LABEL: insert_test2_sub_sd:
; AVX: # BB#0:		; AVX: # BB#0:
; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0		; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq		; AVX-NEXT: retq
%1 = fsub <2 x double> %b, %a		%1 = fsub <2 x double> %b, %a
%2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>		%2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
ret <2 x double> %2		ret <2 x double> %2
}		}

define <2 x double> @insert_test2_mul_sd(<2 x double> %a, <2 x double> %b) {		define <2 x double> @insert_test2_mul_sd(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: insert_test2_mul_sd:		; SSE-LABEL: insert_test2_mul_sd:
; SSE: # BB#0:		; SSE: # BB#0:
; SSE-NEXT: mulsd %xmm0, %xmm1		; SSE-NEXT: mulsd %xmm0, %xmm1
; SSE-NEXT: movaps %xmm1, %xmm0		; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq		; SSE-NEXT: retq
;		;
; AVX-LABEL: insert_test2_mul_sd:		; AVX-LABEL: insert_test2_mul_sd:
; AVX: # BB#0:		; AVX: # BB#0:
; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0		; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq		; AVX-NEXT: retq
%1 = fmul <2 x double> %b, %a		%1 = fmul <2 x double> %b, %a
%2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>		%2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
ret <2 x double> %2		ret <2 x double> %2
}		}

define <2 x double> @insert_test2_div_sd(<2 x double> %a, <2 x double> %b) {		define <2 x double> @insert_test2_div_sd(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: insert_test2_div_sd:		; SSE-LABEL: insert_test2_div_sd:
; SSE: # BB#0:		; SSE: # BB#0:
; SSE-NEXT: divsd %xmm0, %xmm1		; SSE-NEXT: divsd %xmm0, %xmm1
; SSE-NEXT: movaps %xmm1, %xmm0		; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq		; SSE-NEXT: retq
;		;
; AVX-LABEL: insert_test2_div_sd:		; AVX-LABEL: insert_test2_div_sd:
; AVX: # BB#0:		; AVX: # BB#0:
; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0		; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq		; AVX-NEXT: retq
%1 = fdiv <2 x double> %b, %a		%1 = fdiv <2 x double> %b, %a
%2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>		%2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
▲ Show 20 Lines • Show All 183 Lines • ▼ Show 20 Lines	; AVX-NEXT: retq
%2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1		%2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
ret <4 x float> %2		ret <4 x float> %2
}		}

define <2 x double> @insert_test4_add_sd(<2 x double> %a, <2 x double> %b) {		define <2 x double> @insert_test4_add_sd(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: insert_test4_add_sd:		; SSE-LABEL: insert_test4_add_sd:
; SSE: # BB#0:		; SSE: # BB#0:
; SSE-NEXT: addsd %xmm0, %xmm1		; SSE-NEXT: addsd %xmm0, %xmm1
; SSE-NEXT: movaps %xmm1, %xmm0		; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq		; SSE-NEXT: retq
;		;
; AVX-LABEL: insert_test4_add_sd:		; AVX-LABEL: insert_test4_add_sd:
; AVX: # BB#0:		; AVX: # BB#0:
; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0		; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq		; AVX-NEXT: retq
%1 = fadd <2 x double> %b, %a		%1 = fadd <2 x double> %b, %a
%2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1		%2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
ret <2 x double> %2		ret <2 x double> %2
}		}

define <2 x double> @insert_test4_sub_sd(<2 x double> %a, <2 x double> %b) {		define <2 x double> @insert_test4_sub_sd(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: insert_test4_sub_sd:		; SSE-LABEL: insert_test4_sub_sd:
; SSE: # BB#0:		; SSE: # BB#0:
; SSE-NEXT: subsd %xmm0, %xmm1		; SSE-NEXT: subsd %xmm0, %xmm1
; SSE-NEXT: movaps %xmm1, %xmm0		; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq		; SSE-NEXT: retq
;		;
; AVX-LABEL: insert_test4_sub_sd:		; AVX-LABEL: insert_test4_sub_sd:
; AVX: # BB#0:		; AVX: # BB#0:
; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0		; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq		; AVX-NEXT: retq
%1 = fsub <2 x double> %b, %a		%1 = fsub <2 x double> %b, %a
%2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1		%2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
ret <2 x double> %2		ret <2 x double> %2
}		}

define <2 x double> @insert_test4_mul_sd(<2 x double> %a, <2 x double> %b) {		define <2 x double> @insert_test4_mul_sd(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: insert_test4_mul_sd:		; SSE-LABEL: insert_test4_mul_sd:
; SSE: # BB#0:		; SSE: # BB#0:
; SSE-NEXT: mulsd %xmm0, %xmm1		; SSE-NEXT: mulsd %xmm0, %xmm1
; SSE-NEXT: movaps %xmm1, %xmm0		; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq		; SSE-NEXT: retq
;		;
; AVX-LABEL: insert_test4_mul_sd:		; AVX-LABEL: insert_test4_mul_sd:
; AVX: # BB#0:		; AVX: # BB#0:
; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0		; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq		; AVX-NEXT: retq
%1 = fmul <2 x double> %b, %a		%1 = fmul <2 x double> %b, %a
%2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1		%2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
ret <2 x double> %2		ret <2 x double> %2
}		}

define <2 x double> @insert_test4_div_sd(<2 x double> %a, <2 x double> %b) {		define <2 x double> @insert_test4_div_sd(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: insert_test4_div_sd:		; SSE-LABEL: insert_test4_div_sd:
; SSE: # BB#0:		; SSE: # BB#0:
; SSE-NEXT: divsd %xmm0, %xmm1		; SSE-NEXT: divsd %xmm0, %xmm1
; SSE-NEXT: movaps %xmm1, %xmm0		; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq		; SSE-NEXT: retq
;		;
; AVX-LABEL: insert_test4_div_sd:		; AVX-LABEL: insert_test4_div_sd:
; AVX: # BB#0:		; AVX: # BB#0:
; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0		; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq		; AVX-NEXT: retq
%1 = fdiv <2 x double> %b, %a		%1 = fdiv <2 x double> %b, %a
%2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1		%2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
ret <2 x double> %2		ret <2 x double> %2
}		}