Index: lib/Target/X86/X86InstrSSE.td =================================================================== --- lib/Target/X86/X86InstrSSE.td +++ lib/Target/X86/X86InstrSSE.td @@ -3061,14 +3061,18 @@ multiclass basic_sse12_fp_binop_s opc, string OpcodeStr, SDNode OpNode, SizeItins itins> { + let ExeDomain = SSEPackedSingle in defm V#NAME#SS : sse12_fp_scalar, XS, VEX_4V, VEX_LIG; + let ExeDomain = SSEPackedDouble in defm V#NAME#SD : sse12_fp_scalar, XD, VEX_4V, VEX_LIG; let Constraints = "$src1 = $dst" in { + let ExeDomain = SSEPackedSingle in defm SS : sse12_fp_scalar, XS; + let ExeDomain = SSEPackedDouble in defm SD : sse12_fp_scalar, XD; } @@ -3076,17 +3080,21 @@ multiclass basic_sse12_fp_binop_s_int opc, string OpcodeStr, SizeItins itins> { + let ExeDomain = SSEPackedSingle in defm V#NAME#SS : sse12_fp_scalar_int, XS, VEX_4V, VEX_LIG; + let ExeDomain = SSEPackedDouble in defm V#NAME#SD : sse12_fp_scalar_int, XD, VEX_4V, VEX_LIG; let Constraints = "$src1 = $dst" in { + let ExeDomain = SSEPackedSingle in defm SS : sse12_fp_scalar_int, XS; + let ExeDomain = SSEPackedDouble in defm SD : sse12_fp_scalar_int, XD; @@ -3565,50 +3573,52 @@ /// the HW instructions are 2 operand / destructive. multiclass sse1_fp_unop_s opc, string OpcodeStr, SDNode OpNode, OpndItins itins> { -let Predicates = [HasAVX], hasSideEffects = 0 in { - def V#NAME#SSr : SSI, VEX_4V, VEX_LIG, Sched<[itins.Sched]>; - let mayLoad = 1 in { - def V#NAME#SSm : SSI, VEX_4V, VEX_LIG, - Sched<[itins.Sched.Folded, ReadAfterLd]>; - let isCodeGenOnly = 1 in - def V#NAME#SSm_Int : SSI, VEX_4V, VEX_LIG, - Sched<[itins.Sched.Folded, ReadAfterLd]>; - } -} - - def SSr : SSI, Sched<[itins.Sched]>; - // For scalar unary operations, fold a load into the operation - // only in OptForSize mode. It eliminates an instruction, but it also - // eliminates a whole-register clobber (the load), so it introduces a - // partial register update condition. - def SSm : I, XS, - Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>; - let isCodeGenOnly = 1, Constraints = "$src1 = $dst" in { - def SSr_Int : SSI, Sched<[itins.Sched]>; - let mayLoad = 1, hasSideEffects = 0 in - def SSm_Int : SSI, Sched<[itins.Sched.Folded, ReadAfterLd]>; + let ExeDomain = SSEPackedSingle in { + let Predicates = [HasAVX], hasSideEffects = 0 in { + def V#NAME#SSr : SSI, VEX_4V, VEX_LIG, Sched<[itins.Sched]>; + let mayLoad = 1 in { + def V#NAME#SSm : SSI, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; + let isCodeGenOnly = 1 in + def V#NAME#SSm_Int : SSI, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; + } + } + + def SSr : SSI, Sched<[itins.Sched]>; + // For scalar unary operations, fold a load into the operation + // only in OptForSize mode. It eliminates an instruction, but it also + // eliminates a whole-register clobber (the load), so it introduces a + // partial register update condition. + def SSm : I, XS, + Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>; + let isCodeGenOnly = 1, Constraints = "$src1 = $dst" in { + def SSr_Int : SSI, Sched<[itins.Sched]>; + let mayLoad = 1, hasSideEffects = 0 in + def SSm_Int : SSI, Sched<[itins.Sched.Folded, ReadAfterLd]>; + } } } @@ -3693,50 +3703,52 @@ // The only usage of these is for SQRT[S/P]D. See sse12_fp_binop* for example. multiclass sse2_fp_unop_s opc, string OpcodeStr, SDNode OpNode, OpndItins itins> { -let Predicates = [HasAVX], hasSideEffects = 0 in { - def V#NAME#SDr : SDI, VEX_4V, VEX_LIG, Sched<[itins.Sched]>; - let mayLoad = 1 in { - def V#NAME#SDm : SDI, VEX_4V, VEX_LIG, - Sched<[itins.Sched.Folded, ReadAfterLd]>; - let isCodeGenOnly = 1 in - def V#NAME#SDm_Int : SDI, VEX_4V, VEX_LIG, - Sched<[itins.Sched.Folded, ReadAfterLd]>; - } -} + let ExeDomain = SSEPackedDouble in { + let Predicates = [HasAVX], hasSideEffects = 0 in { + def V#NAME#SDr : SDI, VEX_4V, VEX_LIG, Sched<[itins.Sched]>; + let mayLoad = 1 in { + def V#NAME#SDm : SDI, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; + let isCodeGenOnly = 1 in + def V#NAME#SDm_Int : SDI, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; + } + } + + def SDr : SDI, + Sched<[itins.Sched]>; + // See the comments in sse1_fp_unop_s for why this is OptForSize. + def SDm : I, XD, + Requires<[UseSSE2, OptForSize]>, Sched<[itins.Sched.Folded]>; + let isCodeGenOnly = 1, Constraints = "$src1 = $dst" in { + def SDr_Int : + SDI, Sched<[itins.Sched]>; - def SDr : SDI, - Sched<[itins.Sched]>; - // See the comments in sse1_fp_unop_s for why this is OptForSize. - def SDm : I, XD, - Requires<[UseSSE2, OptForSize]>, Sched<[itins.Sched.Folded]>; - let isCodeGenOnly = 1, Constraints = "$src1 = $dst" in { - def SDr_Int : - SDI, Sched<[itins.Sched]>; - - let mayLoad = 1, hasSideEffects = 0 in - def SDm_Int : - SDI, Sched<[itins.Sched.Folded, ReadAfterLd]>; - } // isCodeGenOnly, Constraints + let mayLoad = 1, hasSideEffects = 0 in + def SDm_Int : + SDI, Sched<[itins.Sched.Folded, ReadAfterLd]>; + } // isCodeGenOnly, Constraints + } } /// sse2_fp_unop_p - SSE2 unops in vector forms. Index: test/CodeGen/X86/sink-hoist.ll =================================================================== --- test/CodeGen/X86/sink-hoist.ll +++ test/CodeGen/X86/sink-hoist.ll @@ -9,7 +9,7 @@ ; CHECK-NEXT: testb $1, %dil ; CHECK-NEXT: jne ; CHECK-NEXT: divsd -; CHECK-NEXT: movaps +; CHECK-NEXT: movapd ; CHECK-NEXT: ret ; CHECK: divsd @@ -28,7 +28,7 @@ ; CHECK-NEXT: testb $1, %dil ; CHECK-NEXT: je ; CHECK: divsd -; CHECK: movaps +; CHECK: movapd ; CHECK: ret define double @split(double %x, double %y, i1 %c) nounwind { %a = fdiv double %x, 3.2 Index: test/CodeGen/X86/sse-minmax.ll =================================================================== --- test/CodeGen/X86/sse-minmax.ll +++ test/CodeGen/X86/sse-minmax.ll @@ -805,7 +805,7 @@ ; CHECK-LABEL: clampTo3k_a: ; CHECK-NEXT: movsd {{[^,]*}}, %xmm1 ; CHECK-NEXT: minsd %xmm0, %xmm1 -; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: movapd %xmm1, %xmm0 ; CHECK-NEXT: ret ; UNSAFE-LABEL: clampTo3k_a: ; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0 @@ -813,7 +813,7 @@ ; FINITE-LABEL: clampTo3k_a: ; FINITE-NEXT: movsd {{[^,]*}}, %xmm1 ; FINITE-NEXT: minsd %xmm0, %xmm1 -; FINITE-NEXT: movaps %xmm1, %xmm0 +; FINITE-NEXT: movapd %xmm1, %xmm0 ; FINITE-NEXT: ret define double @clampTo3k_a(double %x) nounwind readnone { entry: @@ -831,7 +831,7 @@ ; FINITE-LABEL: clampTo3k_b: ; FINITE-NEXT: movsd {{[^,]*}}, %xmm1 ; FINITE-NEXT: minsd %xmm0, %xmm1 -; FINITE-NEXT: movaps %xmm1, %xmm0 +; FINITE-NEXT: movapd %xmm1, %xmm0 ; FINITE-NEXT: ret define double @clampTo3k_b(double %x) nounwind readnone { entry: @@ -843,7 +843,7 @@ ; CHECK-LABEL: clampTo3k_c: ; CHECK-NEXT: movsd {{[^,]*}}, %xmm1 ; CHECK-NEXT: maxsd %xmm0, %xmm1 -; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: movapd %xmm1, %xmm0 ; CHECK-NEXT: ret ; UNSAFE-LABEL: clampTo3k_c: ; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0 @@ -851,7 +851,7 @@ ; FINITE-LABEL: clampTo3k_c: ; FINITE-NEXT: movsd {{[^,]*}}, %xmm1 ; FINITE-NEXT: maxsd %xmm0, %xmm1 -; FINITE-NEXT: movaps %xmm1, %xmm0 +; FINITE-NEXT: movapd %xmm1, %xmm0 ; FINITE-NEXT: ret define double @clampTo3k_c(double %x) nounwind readnone { entry: @@ -869,7 +869,7 @@ ; FINITE-LABEL: clampTo3k_d: ; FINITE-NEXT: movsd {{[^,]*}}, %xmm1 ; FINITE-NEXT: maxsd %xmm0, %xmm1 -; FINITE-NEXT: movaps %xmm1, %xmm0 +; FINITE-NEXT: movapd %xmm1, %xmm0 ; FINITE-NEXT: ret define double @clampTo3k_d(double %x) nounwind readnone { entry: @@ -881,7 +881,7 @@ ; CHECK-LABEL: clampTo3k_e: ; CHECK-NEXT: movsd {{[^,]*}}, %xmm1 ; CHECK-NEXT: maxsd %xmm0, %xmm1 -; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: movapd %xmm1, %xmm0 ; CHECK-NEXT: ret ; UNSAFE-LABEL: clampTo3k_e: ; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0 @@ -889,7 +889,7 @@ ; FINITE-LABEL: clampTo3k_e: ; FINITE-NEXT: movsd {{[^,]*}}, %xmm1 ; FINITE-NEXT: maxsd %xmm0, %xmm1 -; FINITE-NEXT: movaps %xmm1, %xmm0 +; FINITE-NEXT: movapd %xmm1, %xmm0 ; FINITE-NEXT: ret define double @clampTo3k_e(double %x) nounwind readnone { entry: @@ -907,7 +907,7 @@ ; FINITE-LABEL: clampTo3k_f: ; FINITE-NEXT: movsd {{[^,]*}}, %xmm1 ; FINITE-NEXT: maxsd %xmm0, %xmm1 -; FINITE-NEXT: movaps %xmm1, %xmm0 +; FINITE-NEXT: movapd %xmm1, %xmm0 ; FINITE-NEXT: ret define double @clampTo3k_f(double %x) nounwind readnone { entry: @@ -919,7 +919,7 @@ ; CHECK-LABEL: clampTo3k_g: ; CHECK-NEXT: movsd {{[^,]*}}, %xmm1 ; CHECK-NEXT: minsd %xmm0, %xmm1 -; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: movapd %xmm1, %xmm0 ; CHECK-NEXT: ret ; UNSAFE-LABEL: clampTo3k_g: ; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0 @@ -927,7 +927,7 @@ ; FINITE-LABEL: clampTo3k_g: ; FINITE-NEXT: movsd {{[^,]*}}, %xmm1 ; FINITE-NEXT: minsd %xmm0, %xmm1 -; FINITE-NEXT: movaps %xmm1, %xmm0 +; FINITE-NEXT: movapd %xmm1, %xmm0 ; FINITE-NEXT: ret define double @clampTo3k_g(double %x) nounwind readnone { entry: @@ -945,7 +945,7 @@ ; FINITE-LABEL: clampTo3k_h: ; FINITE-NEXT: movsd {{[^,]*}}, %xmm1 ; FINITE-NEXT: minsd %xmm0, %xmm1 -; FINITE-NEXT: movaps %xmm1, %xmm0 +; FINITE-NEXT: movapd %xmm1, %xmm0 ; FINITE-NEXT: ret define double @clampTo3k_h(double %x) nounwind readnone { entry: Index: test/CodeGen/X86/sse-scalar-fp-arith.ll =================================================================== --- test/CodeGen/X86/sse-scalar-fp-arith.ll +++ test/CodeGen/X86/sse-scalar-fp-arith.ll @@ -220,7 +220,7 @@ ; SSE-LABEL: test2_add_sd: ; SSE: # BB#0: ; SSE-NEXT: addsd %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: test2_add_sd: @@ -238,7 +238,7 @@ ; SSE-LABEL: test2_sub_sd: ; SSE: # BB#0: ; SSE-NEXT: subsd %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: test2_sub_sd: @@ -256,7 +256,7 @@ ; SSE-LABEL: test2_mul_sd: ; SSE: # BB#0: ; SSE-NEXT: mulsd %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: test2_mul_sd: @@ -274,7 +274,7 @@ ; SSE-LABEL: test2_div_sd: ; SSE: # BB#0: ; SSE-NEXT: divsd %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: test2_div_sd: @@ -561,7 +561,7 @@ ; SSE-LABEL: insert_test2_add_sd: ; SSE: # BB#0: ; SSE-NEXT: addsd %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: insert_test2_add_sd: @@ -577,7 +577,7 @@ ; SSE-LABEL: insert_test2_sub_sd: ; SSE: # BB#0: ; SSE-NEXT: subsd %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: insert_test2_sub_sd: @@ -593,7 +593,7 @@ ; SSE-LABEL: insert_test2_mul_sd: ; SSE: # BB#0: ; SSE-NEXT: mulsd %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: insert_test2_mul_sd: @@ -609,7 +609,7 @@ ; SSE-LABEL: insert_test2_div_sd: ; SSE: # BB#0: ; SSE-NEXT: divsd %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: insert_test2_div_sd: @@ -809,7 +809,7 @@ ; SSE-LABEL: insert_test4_add_sd: ; SSE: # BB#0: ; SSE-NEXT: addsd %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: insert_test4_add_sd: @@ -825,7 +825,7 @@ ; SSE-LABEL: insert_test4_sub_sd: ; SSE: # BB#0: ; SSE-NEXT: subsd %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: insert_test4_sub_sd: @@ -841,7 +841,7 @@ ; SSE-LABEL: insert_test4_mul_sd: ; SSE: # BB#0: ; SSE-NEXT: mulsd %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: insert_test4_mul_sd: @@ -857,7 +857,7 @@ ; SSE-LABEL: insert_test4_div_sd: ; SSE: # BB#0: ; SSE-NEXT: divsd %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: insert_test4_div_sd: