diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -12430,8 +12430,8 @@ if (N0CFP && N0CFP->isZero()) { if (N0CFP->isNegative() || (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) { - if (TLI.getNegatibleCost(N1, DAG, LegalOperations, ForCodeSize) != - TargetLowering::NegatibleCost::Expensive) + if (TLI.getNegatibleCost(N1, DAG, LegalOperations, ForCodeSize) == + TargetLowering::NegatibleCost::Cheaper) return TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize); if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags); @@ -12450,8 +12450,8 @@ } // fold (fsub A, (fneg B)) -> (fadd A, B) - if (TLI.getNegatibleCost(N1, DAG, LegalOperations, ForCodeSize) != - TargetLowering::NegatibleCost::Expensive) + if (TLI.getNegatibleCost(N1, DAG, LegalOperations, ForCodeSize) == + TargetLowering::NegatibleCost::Cheaper) return DAG.getNode( ISD::FADD, DL, VT, N0, TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags); diff --git a/llvm/test/CodeGen/AMDGPU/fma-combine.ll b/llvm/test/CodeGen/AMDGPU/fma-combine.ll --- a/llvm/test/CodeGen/AMDGPU/fma-combine.ll +++ b/llvm/test/CodeGen/AMDGPU/fma-combine.ll @@ -517,7 +517,7 @@ } ; FUNC-LABEL: {{^}}test_f32_mul_sub_x_one_y: -; SI-NOFMA: v_add_f32_e32 [[VS:v[0-9]]], -1.0, [[VX:v[0-9]]] +; SI-NOFMA: v_subrev_f32_e32 [[VS:v[0-9]]], 1.0, [[VX:v[0-9]]] ; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VS]], [[VY:v[0-9]]] ; ; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], -[[VY:v[0-9]]] @@ -533,7 +533,7 @@ } ; FUNC-LABEL: {{^}}test_f32_mul_y_sub_x_one: -; SI-NOFMA: v_add_f32_e32 [[VS:v[0-9]]], -1.0, [[VX:v[0-9]]] +; SI-NOFMA: v_subrev_f32_e32 [[VS:v[0-9]]], 1.0, [[VX:v[0-9]]] ; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VY:v[0-9]]], [[VS]] ; ; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], -[[VY:v[0-9]]] @@ -549,7 +549,7 @@ } ; FUNC-LABEL: {{^}}test_f32_mul_sub_x_negone_y: -; SI-NOFMA: v_add_f32_e32 [[VS:v[0-9]]], 1.0, [[VX:v[0-9]]] +; SI-NOFMA: v_subrev_f32_e32 [[VS:v[0-9]]], -1.0, [[VX:v[0-9]]] ; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VS]], [[VY:v[0-9]]] ; ; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], [[VY:v[0-9]]] @@ -565,7 +565,7 @@ } ; FUNC-LABEL: {{^}}test_f32_mul_y_sub_x_negone: -; SI-NOFMA: v_add_f32_e32 [[VS:v[0-9]]], 1.0, [[VX:v[0-9]]] +; SI-NOFMA: v_subrev_f32_e32 [[VS:v[0-9]]], -1.0, [[VX:v[0-9]]] ; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VY:v[0-9]]], [[VS]] ; ; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], [[VY:v[0-9]]] diff --git a/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll --- a/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll @@ -195,14 +195,14 @@ ; GCN-LABEL: {{^}}fmuladd_neg_2.0_neg_a_b_f16 ; GCN: {{buffer|flat|global}}_load_ushort [[R1:v[0-9]+]], ; GCN: {{buffer|flat|global}}_load_ushort [[R2:v[0-9]+]], -; VI-FLUSH: v_mac_f16_e32 [[R2]], 2.0, [[R1]] +; VI-FLUSH: v_mad_f16 [[R2]], -[[R1]], -2.0, [[R2]] ; VI-FLUSH: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[R2]] ; VI-DENORM: v_fma_f16 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]] ; VI-DENORM: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] -; GFX10-FLUSH: v_add_f16_e32 [[MUL2:v[0-9]+]], [[R1]], [[R1]] -; GFX10-FLUSH: v_add_f16_e32 [[RESULT:v[0-9]+]], [[R2]], [[MUL2]] +; GFX10-FLUSH: v_mul_f16_e32 [[MUL2:v[0-9]+]], -2.0, [[R1]] +; GFX10-FLUSH: v_sub_f16_e32 [[RESULT:v[0-9]+]], [[R2]], [[MUL2]] ; GFX10-FLUSH: global_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] ; GFX10-DENORM: v_fmac_f16_e32 [[R2]], 2.0, [[R1]] diff --git a/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll --- a/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll @@ -232,16 +232,16 @@ ; GCN: {{buffer|flat|global}}_load_dword [[R1:v[0-9]+]], ; GCN: {{buffer|flat|global}}_load_dword [[R2:v[0-9]+]], -; GCN-FLUSH-MAD: v_mac_f32_e32 [[R2]], 2.0, [[R1]] +; GCN-FLUSH-MAD: v_mad_f32 [[R1]], -[[R1]], -2.0, [[R2]] ; GCN-FLUSH-FMAC: v_fmac_f32_e32 [[R2]], 2.0, [[R1]] -; SI-FLUSH: buffer_store_dword [[R2]] +; SI-FLUSH: buffer_store_dword [[R1]] ; VI-FLUSH: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[R2]] ; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]] -; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]] -; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]] +; GCN-DENORM-SLOWFMA: v_mul_f32_e32 [[TMP:v[0-9]+]], -2.0, [[R1]] +; GCN-DENORM-SLOWFMA: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]] ; SI-DENORM: buffer_store_dword [[RESULT]] ; VI-DENORM: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] diff --git a/llvm/test/CodeGen/AMDGPU/fsub.f16.ll b/llvm/test/CodeGen/AMDGPU/fsub.f16.ll --- a/llvm/test/CodeGen/AMDGPU/fsub.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fsub.f16.ll @@ -45,9 +45,9 @@ ; GCN-LABEL: {{^}}fsub_f16_imm_b: ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] ; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] -; SI: v_add_f32_e32 v[[R_F32:[0-9]+]], -2.0, v[[A_F32]] +; SI: v_subrev_f32_e32 v[[R_F32:[0-9]+]], 2.0, v[[A_F32]] ; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]] -; GFX89: v_add_f16_e32 v[[R_F16:[0-9]+]], -2.0, v[[A_F16]] +; GFX89: v_subrev_f16_e32 v[[R_F16:[0-9]+]], 2.0, v[[A_F16]] ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define amdgpu_kernel void @fsub_f16_imm_b( @@ -146,16 +146,16 @@ ; SI-DAG: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] ; SI-DAG: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] ; SI-DAG: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] -; SI-DAG: v_add_f32_e32 v[[R_F32_0:[0-9]+]], -2.0, v[[A_F32_0]] +; SI-DAG: v_subrev_f32_e32 v[[R_F32_0:[0-9]+]], 2.0, v[[A_F32_0]] ; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]] -; SI-DAG: v_add_f32_e32 v[[R_F32_1:[0-9]+]], -1.0, v[[A_F32_1]] +; SI-DAG: v_subrev_f32_e32 v[[R_F32_1:[0-9]+]], 1.0, v[[A_F32_1]] ; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] ; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] ; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]] -; VI-DAG: v_mov_b32_e32 [[CONSTM1:v[0-9]+]], 0xbc00 -; VI-DAG: v_add_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[A_V2_F16]], [[CONSTM1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-DAG: v_add_f16_e32 v[[R_F16_0:[0-9]+]], -2.0, v[[A_V2_F16]] +; VI-DAG: v_mov_b32_e32 [[CONSTM1:v[0-9]+]], 0x3c00 +; VI-DAG: v_sub_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[A_V2_F16]], [[CONSTM1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-DAG: v_subrev_f16_e32 v[[R_F16_0:[0-9]+]], 2.0, v[[A_V2_F16]] ; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]] ; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xbc00c000 diff --git a/llvm/test/CodeGen/AMDGPU/reduction.ll b/llvm/test/CodeGen/AMDGPU/reduction.ll --- a/llvm/test/CodeGen/AMDGPU/reduction.ll +++ b/llvm/test/CodeGen/AMDGPU/reduction.ll @@ -47,8 +47,8 @@ ; VI: s_waitcnt ; VI-NEXT: v_sub_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; VI-NEXT: v_sub_f16_e32 v0, v1, v0 -; VI-NEXT: v_add_f16_e32 v0, v2, v0 +; VI-NEXT: v_sub_f16_e32 v0, v0, v1 +; VI-NEXT: v_sub_f16_e32 v0, v2, v0 ; VI-NEXT: s_setpc_b64 define half @reduction_fsub_v4f16_preserve_fmf(<4 x half> %vec4) { entry: diff --git a/llvm/test/CodeGen/AMDGPU/v_mac.ll b/llvm/test/CodeGen/AMDGPU/v_mac.ll --- a/llvm/test/CodeGen/AMDGPU/v_mac.ll +++ b/llvm/test/CodeGen/AMDGPU/v_mac.ll @@ -221,7 +221,7 @@ ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] ; GCN: v_add_f32_e32 [[TMP2:v[0-9]+]], [[A]], [[A]] -; GCN: v_mad_f32 v{{[0-9]+}}, [[TMP2]], -4.0, 1.0 +; GCN: v_mad_f32 v{{[0-9]+}}, -[[TMP2]], 4.0, 1.0 define amdgpu_kernel void @fold_inline_imm_into_mac_src2_f32(float addrspace(1)* %out, float addrspace(1)* %a, float addrspace(1)* %b) #3 { bb: %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -252,11 +252,11 @@ ; SI-DAG: v_cvt_f32_f16_e32 [[CVT_B:v[0-9]+]], [[B]] ; SI: v_add_f32_e32 [[TMP2:v[0-9]+]], [[CVT_A]], [[CVT_A]] -; SI: v_mad_f32 v{{[0-9]+}}, [[TMP2]], -4.0, 1.0 +; SI: v_mad_f32 v{{[0-9]+}}, -[[TMP2]], 4.0, 1.0 ; SI: v_mac_f32_e32 v{{[0-9]+}}, 0x41000000, v{{[0-9]+}} ; VI-FLUSH: v_add_f16_e32 [[TMP2:v[0-9]+]], [[A]], [[A]] -; VI-FLUSH: v_mad_f16 v{{[0-9]+}}, [[TMP2]], -4.0, 1.0 +; VI-FLUSH: v_mad_f16 v{{[0-9]+}}, -[[TMP2]], 4.0, 1.0 define amdgpu_kernel void @fold_inline_imm_into_mac_src2_f16(half addrspace(1)* %out, half addrspace(1)* %a, half addrspace(1)* %b) #3 { bb: %tid = call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/PowerPC/qpx-recipest.ll b/llvm/test/CodeGen/PowerPC/qpx-recipest.ll --- a/llvm/test/CodeGen/PowerPC/qpx-recipest.ll +++ b/llvm/test/CodeGen/PowerPC/qpx-recipest.ll @@ -65,8 +65,8 @@ ; CHECK-NEXT: addi 3, 3, .LCPI2_0@toc@l ; CHECK-NEXT: qvlfsx 0, 0, 3 ; CHECK-NEXT: qvfmuls 4, 3, 3 -; CHECK-NEXT: qvfnmsubs 2, 2, 0, 2 -; CHECK-NEXT: qvfmadds 0, 2, 4, 0 +; CHECK-NEXT: qvfmsubs 2, 2, 0, 2 +; CHECK-NEXT: qvfnmsubs 0, 2, 4, 0 ; CHECK-NEXT: qvfmuls 0, 3, 0 ; CHECK-NEXT: qvfmul 1, 1, 0 ; CHECK-NEXT: blr @@ -182,8 +182,8 @@ ; CHECK-NEXT: addi 3, 3, .LCPI6_0@toc@l ; CHECK-NEXT: qvlfsx 0, 0, 3 ; CHECK-NEXT: qvfmuls 4, 3, 3 -; CHECK-NEXT: qvfnmsubs 2, 2, 0, 2 -; CHECK-NEXT: qvfmadds 0, 2, 4, 0 +; CHECK-NEXT: qvfmsubs 2, 2, 0, 2 +; CHECK-NEXT: qvfnmsubs 0, 2, 4, 0 ; CHECK-NEXT: qvfmuls 0, 3, 0 ; CHECK-NEXT: qvfmuls 1, 1, 0 ; CHECK-NEXT: blr @@ -408,8 +408,8 @@ ; CHECK-NEXT: addis 3, 2, .LCPI16_0@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI16_0@toc@l ; CHECK-NEXT: qvfmuls 4, 2, 2 -; CHECK-NEXT: qvfnmsubs 3, 1, 0, 1 -; CHECK-NEXT: qvfmadds 0, 3, 4, 0 +; CHECK-NEXT: qvfmsubs 3, 1, 0, 1 +; CHECK-NEXT: qvfnmsubs 0, 3, 4, 0 ; CHECK-NEXT: qvlfsx 3, 0, 3 ; CHECK-NEXT: addis 3, 2, .LCPI16_2@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI16_2@toc@l @@ -435,8 +435,8 @@ ; CHECK-NEXT: addis 3, 2, .LCPI17_0@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI17_0@toc@l ; CHECK-NEXT: qvfmuls 4, 2, 2 -; CHECK-NEXT: qvfnmsubs 3, 1, 0, 1 -; CHECK-NEXT: qvfmadds 0, 3, 4, 0 +; CHECK-NEXT: qvfmsubs 3, 1, 0, 1 +; CHECK-NEXT: qvfnmsubs 0, 3, 4, 0 ; CHECK-NEXT: qvlfsx 3, 0, 3 ; CHECK-NEXT: qvfmuls 0, 2, 0 ; CHECK-NEXT: qvfmuls 0, 0, 1 diff --git a/llvm/test/CodeGen/X86/dag-fmf-cse.ll b/llvm/test/CodeGen/X86/dag-fmf-cse.ll --- a/llvm/test/CodeGen/X86/dag-fmf-cse.ll +++ b/llvm/test/CodeGen/X86/dag-fmf-cse.ll @@ -9,8 +9,9 @@ define float @fmf_should_not_break_cse(float %a, float %b) { ; CHECK-LABEL: fmf_should_not_break_cse: ; CHECK: # %bb.0: -; CHECK-NEXT: vmulss %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vaddss %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vxorps {{.*}}(%rip), %xmm0, %xmm2 +; CHECK-NEXT: vmulss %xmm1, %xmm2, %xmm2 +; CHECK-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 ; CHECK-NEXT: retq %mul1 = fmul fast float %a, %b %nega = fsub fast float 0.0, %a @@ -22,8 +23,8 @@ define <4 x float> @fmf_should_not_break_cse_vector(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: fmf_should_not_break_cse_vector: ; CHECK: # %bb.0: -; CHECK-NEXT: vmulps %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vaddps %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vmulps %xmm1, %xmm0, %xmm2 +; CHECK-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 ; CHECK-NEXT: retq %mul1 = fmul fast <4 x float> %a, %b %nega = fsub fast <4 x float> , %a diff --git a/llvm/test/CodeGen/X86/fma_patterns.ll b/llvm/test/CodeGen/X86/fma_patterns.ll --- a/llvm/test/CodeGen/X86/fma_patterns.ll +++ b/llvm/test/CodeGen/X86/fma_patterns.ll @@ -1037,19 +1037,19 @@ define <4 x float> @test_v4f32_mul_sub_x_one_y(<4 x float> %x, <4 x float> %y) { ; FMA-INFS-LABEL: test_v4f32_mul_sub_x_one_y: ; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 +; FMA-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 ; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 ; FMA-INFS-NEXT: retq ; ; FMA4-INFS-LABEL: test_v4f32_mul_sub_x_one_y: ; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 +; FMA4-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 ; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 ; FMA4-INFS-NEXT: retq ; ; AVX512-INFS-LABEL: test_v4f32_mul_sub_x_one_y: ; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0 ; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 ; AVX512-INFS-NEXT: retq ; @@ -1075,19 +1075,19 @@ define <4 x float> @test_v4f32_mul_y_sub_x_one(<4 x float> %x, <4 x float> %y) { ; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_one: ; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 +; FMA-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 ; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 ; FMA-INFS-NEXT: retq ; ; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_one: ; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 +; FMA4-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 ; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 ; FMA4-INFS-NEXT: retq ; ; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_one: ; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0 ; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 ; AVX512-INFS-NEXT: retq ; @@ -1113,19 +1113,19 @@ define <4 x float> @test_v4f32_mul_y_sub_x_one_undefs(<4 x float> %x, <4 x float> %y) { ; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs: ; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 +; FMA-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 ; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 ; FMA-INFS-NEXT: retq ; ; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs: ; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 +; FMA4-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 ; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 ; FMA4-INFS-NEXT: retq ; ; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs: ; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0 ; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 ; AVX512-INFS-NEXT: retq ; @@ -1151,19 +1151,19 @@ define <4 x float> @test_v4f32_mul_sub_x_negone_y(<4 x float> %x, <4 x float> %y) { ; FMA-INFS-LABEL: test_v4f32_mul_sub_x_negone_y: ; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 +; FMA-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 ; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 ; FMA-INFS-NEXT: retq ; ; FMA4-INFS-LABEL: test_v4f32_mul_sub_x_negone_y: ; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 +; FMA4-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 ; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 ; FMA4-INFS-NEXT: retq ; ; AVX512-INFS-LABEL: test_v4f32_mul_sub_x_negone_y: ; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0 ; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 ; AVX512-INFS-NEXT: retq ; @@ -1189,19 +1189,19 @@ define <4 x float> @test_v4f32_mul_y_sub_x_negone(<4 x float> %x, <4 x float> %y) { ; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_negone: ; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 +; FMA-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 ; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 ; FMA-INFS-NEXT: retq ; ; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_negone: ; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 +; FMA4-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 ; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 ; FMA4-INFS-NEXT: retq ; ; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_negone: ; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0 ; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 ; AVX512-INFS-NEXT: retq ; @@ -1227,19 +1227,19 @@ define <4 x float> @test_v4f32_mul_y_sub_x_negone_undefs(<4 x float> %x, <4 x float> %y) { ; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs: ; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 +; FMA-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 ; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 ; FMA-INFS-NEXT: retq ; ; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs: ; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 +; FMA4-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 ; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 ; FMA4-INFS-NEXT: retq ; ; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs: ; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0 ; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 ; AVX512-INFS-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/fma_patterns_wide.ll b/llvm/test/CodeGen/X86/fma_patterns_wide.ll --- a/llvm/test/CodeGen/X86/fma_patterns_wide.ll +++ b/llvm/test/CodeGen/X86/fma_patterns_wide.ll @@ -631,25 +631,25 @@ define <16 x float> @test_v16f32_mul_sub_x_one_y(<16 x float> %x, <16 x float> %y) { ; FMA-INFS-LABEL: test_v16f32_mul_sub_x_one_y: ; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] -; FMA-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1 -; FMA-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0 +; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA-INFS-NEXT: vsubps %ymm4, %ymm1, %ymm1 +; FMA-INFS-NEXT: vsubps %ymm4, %ymm0, %ymm0 ; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 ; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 ; FMA-INFS-NEXT: retq ; ; FMA4-INFS-LABEL: test_v16f32_mul_sub_x_one_y: ; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] -; FMA4-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1 -; FMA4-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0 +; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA4-INFS-NEXT: vsubps %ymm4, %ymm1, %ymm1 +; FMA4-INFS-NEXT: vsubps %ymm4, %ymm0, %ymm0 ; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 ; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 ; FMA4-INFS-NEXT: retq ; ; AVX512-INFS-LABEL: test_v16f32_mul_sub_x_one_y: ; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to16}, %zmm0, %zmm0 ; AVX512-INFS-NEXT: vmulps %zmm1, %zmm0, %zmm0 ; AVX512-INFS-NEXT: retq ; @@ -677,25 +677,25 @@ define <8 x double> @test_v8f64_mul_y_sub_x_one(<8 x double> %x, <8 x double> %y) { ; FMA-INFS-LABEL: test_v8f64_mul_y_sub_x_one: ; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] -; FMA-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1 -; FMA-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0 +; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA-INFS-NEXT: vsubpd %ymm4, %ymm1, %ymm1 +; FMA-INFS-NEXT: vsubpd %ymm4, %ymm0, %ymm0 ; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 ; FMA-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 ; FMA-INFS-NEXT: retq ; ; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_x_one: ; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] -; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1 -; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0 +; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA4-INFS-NEXT: vsubpd %ymm4, %ymm1, %ymm1 +; FMA4-INFS-NEXT: vsubpd %ymm4, %ymm0, %ymm0 ; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 ; FMA4-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 ; FMA4-INFS-NEXT: retq ; ; AVX512-INFS-LABEL: test_v8f64_mul_y_sub_x_one: ; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vaddpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512-INFS-NEXT: vsubpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 ; AVX512-INFS-NEXT: vmulpd %zmm0, %zmm1, %zmm0 ; AVX512-INFS-NEXT: retq ; @@ -723,25 +723,25 @@ define <16 x float> @test_v16f32_mul_sub_x_negone_y(<16 x float> %x, <16 x float> %y) { ; FMA-INFS-LABEL: test_v16f32_mul_sub_x_negone_y: ; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; FMA-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1 -; FMA-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0 +; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; FMA-INFS-NEXT: vsubps %ymm4, %ymm1, %ymm1 +; FMA-INFS-NEXT: vsubps %ymm4, %ymm0, %ymm0 ; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 ; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 ; FMA-INFS-NEXT: retq ; ; FMA4-INFS-LABEL: test_v16f32_mul_sub_x_negone_y: ; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; FMA4-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1 -; FMA4-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0 +; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; FMA4-INFS-NEXT: vsubps %ymm4, %ymm1, %ymm1 +; FMA4-INFS-NEXT: vsubps %ymm4, %ymm0, %ymm0 ; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 ; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 ; FMA4-INFS-NEXT: retq ; ; AVX512-INFS-LABEL: test_v16f32_mul_sub_x_negone_y: ; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to16}, %zmm0, %zmm0 ; AVX512-INFS-NEXT: vmulps %zmm1, %zmm0, %zmm0 ; AVX512-INFS-NEXT: retq ; @@ -769,25 +769,25 @@ define <8 x double> @test_v8f64_mul_y_sub_x_negone(<8 x double> %x, <8 x double> %y) { ; FMA-INFS-LABEL: test_v8f64_mul_y_sub_x_negone: ; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; FMA-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1 -; FMA-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0 +; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; FMA-INFS-NEXT: vsubpd %ymm4, %ymm1, %ymm1 +; FMA-INFS-NEXT: vsubpd %ymm4, %ymm0, %ymm0 ; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 ; FMA-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 ; FMA-INFS-NEXT: retq ; ; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_x_negone: ; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1 -; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0 +; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; FMA4-INFS-NEXT: vsubpd %ymm4, %ymm1, %ymm1 +; FMA4-INFS-NEXT: vsubpd %ymm4, %ymm0, %ymm0 ; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 ; FMA4-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 ; FMA4-INFS-NEXT: retq ; ; AVX512-INFS-LABEL: test_v8f64_mul_y_sub_x_negone: ; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vaddpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512-INFS-NEXT: vsubpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 ; AVX512-INFS-NEXT: vmulpd %zmm0, %zmm1, %zmm0 ; AVX512-INFS-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/fp-fold.ll b/llvm/test/CodeGen/X86/fp-fold.ll --- a/llvm/test/CodeGen/X86/fp-fold.ll +++ b/llvm/test/CodeGen/X86/fp-fold.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s define float @fadd_zero_strict(float %x) { @@ -189,8 +190,7 @@ define float @fsub_negzero_strict(float %x) { ; CHECK-LABEL: fsub_negzero_strict: ; CHECK: # %bb.0: -; CHECK-NEXT: xorps %xmm1, %xmm1 -; CHECK-NEXT: addss %xmm1, %xmm0 +; CHECK-NEXT: subss {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %r = fsub float %x, -0.0 ret float %r @@ -207,8 +207,7 @@ define <4 x float> @fsub_negzero_strict_vector(<4 x float> %x) { ; CHECK-LABEL: fsub_negzero_strict_vector: ; CHECK: # %bb.0: -; CHECK-NEXT: xorps %xmm1, %xmm1 -; CHECK-NEXT: addps %xmm1, %xmm0 +; CHECK-NEXT: subps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %r = fsub <4 x float> %x, ret <4 x float> %r diff --git a/llvm/test/CodeGen/X86/fp_constant_op.ll b/llvm/test/CodeGen/X86/fp_constant_op.ll --- a/llvm/test/CodeGen/X86/fp_constant_op.ll +++ b/llvm/test/CodeGen/X86/fp_constant_op.ll @@ -21,7 +21,7 @@ ret double %tmp.1 } ; CHECK-LABEL: foo_sub: -; CHECK: fadd dword ptr +; CHECK: fsub dword ptr define double @foo_subr(double %P) { %tmp.1 = fsub double 1.230000e+02, %P ; [#uses=1] diff --git a/llvm/test/CodeGen/X86/limited-prec.ll b/llvm/test/CodeGen/X86/limited-prec.ll --- a/llvm/test/CodeGen/X86/limited-prec.ll +++ b/llvm/test/CodeGen/X86/limited-prec.ll @@ -327,7 +327,7 @@ ; precision6-NEXT: fmuls {{\.LCPI.*}} ; precision6-NEXT: fadds {{\.LCPI.*}} ; precision6-NEXT: fmulp %st, %st(1) -; precision6-NEXT: fadds {{\.LCPI.*}} +; precision6-NEXT: fsubs {{\.LCPI.*}} ; precision6-NEXT: fildl {{[0-9]+}}(%esp) ; precision6-NEXT: fmuls {{\.LCPI.*}} ; precision6-NEXT: faddp %st, %st(1) @@ -351,11 +351,11 @@ ; precision12-NEXT: fmuls {{\.LCPI.*}} ; precision12-NEXT: fadds {{\.LCPI.*}} ; precision12-NEXT: fmul %st(1), %st -; precision12-NEXT: fadds {{\.LCPI.*}} +; precision12-NEXT: fsubs {{\.LCPI.*}} ; precision12-NEXT: fmul %st(1), %st ; precision12-NEXT: fadds {{\.LCPI.*}} ; precision12-NEXT: fmulp %st, %st(1) -; precision12-NEXT: fadds {{\.LCPI.*}} +; precision12-NEXT: fsubs {{\.LCPI.*}} ; precision12-NEXT: fildl {{[0-9]+}}(%esp) ; precision12-NEXT: fmuls {{\.LCPI.*}} ; precision12-NEXT: faddp %st, %st(1) @@ -379,15 +379,15 @@ ; precision18-NEXT: fmuls {{\.LCPI.*}} ; precision18-NEXT: fadds {{\.LCPI.*}} ; precision18-NEXT: fmul %st(1), %st -; precision18-NEXT: fadds {{\.LCPI.*}} +; precision18-NEXT: fsubs {{\.LCPI.*}} ; precision18-NEXT: fmul %st(1), %st ; precision18-NEXT: fadds {{\.LCPI.*}} ; precision18-NEXT: fmul %st(1), %st -; precision18-NEXT: fadds {{\.LCPI.*}} +; precision18-NEXT: fsubs {{\.LCPI.*}} ; precision18-NEXT: fmul %st(1), %st ; precision18-NEXT: fadds {{\.LCPI.*}} ; precision18-NEXT: fmulp %st, %st(1) -; precision18-NEXT: fadds {{\.LCPI.*}} +; precision18-NEXT: fsubs {{\.LCPI.*}} ; precision18-NEXT: fildl {{[0-9]+}}(%esp) ; precision18-NEXT: fmuls {{\.LCPI.*}} ; precision18-NEXT: faddp %st, %st(1) @@ -419,7 +419,7 @@ ; precision6-NEXT: fmuls {{\.LCPI.*}} ; precision6-NEXT: fadds {{\.LCPI.*}} ; precision6-NEXT: fmulp %st, %st(1) -; precision6-NEXT: fadds {{\.LCPI.*}} +; precision6-NEXT: fsubs {{\.LCPI.*}} ; precision6-NEXT: fiaddl {{[0-9]+}}(%esp) ; precision6-NEXT: addl $8, %esp ; precision6-NEXT: retl @@ -441,11 +441,11 @@ ; precision12-NEXT: fmuls {{\.LCPI.*}} ; precision12-NEXT: fadds {{\.LCPI.*}} ; precision12-NEXT: fmul %st(1), %st -; precision12-NEXT: fadds {{\.LCPI.*}} +; precision12-NEXT: fsubs {{\.LCPI.*}} ; precision12-NEXT: fmul %st(1), %st ; precision12-NEXT: fadds {{\.LCPI.*}} ; precision12-NEXT: fmulp %st, %st(1) -; precision12-NEXT: fadds {{\.LCPI.*}} +; precision12-NEXT: fsubs {{\.LCPI.*}} ; precision12-NEXT: fiaddl {{[0-9]+}}(%esp) ; precision12-NEXT: addl $8, %esp ; precision12-NEXT: retl @@ -467,15 +467,15 @@ ; precision18-NEXT: fmuls {{\.LCPI.*}} ; precision18-NEXT: fadds {{\.LCPI.*}} ; precision18-NEXT: fmul %st(1), %st -; precision18-NEXT: fadds {{\.LCPI.*}} +; precision18-NEXT: fsubs {{\.LCPI.*}} ; precision18-NEXT: fmul %st(1), %st ; precision18-NEXT: fadds {{\.LCPI.*}} ; precision18-NEXT: fmul %st(1), %st -; precision18-NEXT: fadds {{\.LCPI.*}} +; precision18-NEXT: fsubs {{\.LCPI.*}} ; precision18-NEXT: fmul %st(1), %st ; precision18-NEXT: fadds {{\.LCPI.*}} ; precision18-NEXT: fmulp %st, %st(1) -; precision18-NEXT: fadds {{\.LCPI.*}} +; precision18-NEXT: fsubs {{\.LCPI.*}} ; precision18-NEXT: fiaddl {{[0-9]+}}(%esp) ; precision18-NEXT: addl $8, %esp ; precision18-NEXT: retl @@ -505,7 +505,7 @@ ; precision6-NEXT: fmuls {{\.LCPI.*}} ; precision6-NEXT: fadds {{\.LCPI.*}} ; precision6-NEXT: fmulp %st, %st(1) -; precision6-NEXT: fadds {{\.LCPI.*}} +; precision6-NEXT: fsubs {{\.LCPI.*}} ; precision6-NEXT: fildl {{[0-9]+}}(%esp) ; precision6-NEXT: fmuls {{\.LCPI.*}} ; precision6-NEXT: faddp %st, %st(1) @@ -527,11 +527,11 @@ ; precision12-NEXT: flds (%esp) ; precision12-NEXT: fld %st(0) ; precision12-NEXT: fmuls {{\.LCPI.*}} -; precision12-NEXT: fadds {{\.LCPI.*}} +; precision12-NEXT: fsubs {{\.LCPI.*}} ; precision12-NEXT: fmul %st(1), %st ; precision12-NEXT: fadds {{\.LCPI.*}} ; precision12-NEXT: fmulp %st, %st(1) -; precision12-NEXT: fadds {{\.LCPI.*}} +; precision12-NEXT: fsubs {{\.LCPI.*}} ; precision12-NEXT: fildl {{[0-9]+}}(%esp) ; precision12-NEXT: fmuls {{\.LCPI.*}} ; precision12-NEXT: faddp %st, %st(1) @@ -553,15 +553,15 @@ ; precision18-NEXT: flds (%esp) ; precision18-NEXT: fld %st(0) ; precision18-NEXT: fmuls {{\.LCPI.*}} -; precision18-NEXT: fadds {{\.LCPI.*}} +; precision18-NEXT: fsubs {{\.LCPI.*}} ; precision18-NEXT: fmul %st(1), %st ; precision18-NEXT: fadds {{\.LCPI.*}} ; precision18-NEXT: fmul %st(1), %st -; precision18-NEXT: fadds {{\.LCPI.*}} +; precision18-NEXT: fsubs {{\.LCPI.*}} ; precision18-NEXT: fmul %st(1), %st ; precision18-NEXT: fadds {{\.LCPI.*}} ; precision18-NEXT: fmulp %st, %st(1) -; precision18-NEXT: fadds {{\.LCPI.*}} +; precision18-NEXT: fsubs {{\.LCPI.*}} ; precision18-NEXT: fildl {{[0-9]+}}(%esp) ; precision18-NEXT: fmuls {{\.LCPI.*}} ; precision18-NEXT: faddp %st, %st(1) diff --git a/llvm/test/CodeGen/X86/load-scalar-as-vector.ll b/llvm/test/CodeGen/X86/load-scalar-as-vector.ll --- a/llvm/test/CodeGen/X86/load-scalar-as-vector.ll +++ b/llvm/test/CodeGen/X86/load-scalar-as-vector.ll @@ -573,13 +573,13 @@ ; SSE-LABEL: fsub_op1_constant: ; SSE: # %bb.0: ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE-NEXT: addsd {{.*}}(%rip), %xmm0 +; SSE-NEXT: subsd {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: fsub_op1_constant: ; AVX: # %bb.0: ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vsubsd {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq %x = load double, double* %p %b = fsub double %x, 42.0 diff --git a/llvm/test/CodeGen/X86/negative-sin.ll b/llvm/test/CodeGen/X86/negative-sin.ll --- a/llvm/test/CodeGen/X86/negative-sin.ll +++ b/llvm/test/CodeGen/X86/negative-sin.ll @@ -71,9 +71,10 @@ ; CHECK-LABEL: semi_strict2: ; CHECK: # %bb.0: ; CHECK-NEXT: pushq %rax +; CHECK-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0 ; CHECK-NEXT: callq sin ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vaddsd %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vsubsd %xmm0, %xmm1, %xmm0 ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq %f = fsub nsz double 0.0, %e diff --git a/llvm/test/CodeGen/X86/pr44749.ll b/llvm/test/CodeGen/X86/pr44749.ll --- a/llvm/test/CodeGen/X86/pr44749.ll +++ b/llvm/test/CodeGen/X86/pr44749.ll @@ -30,7 +30,7 @@ ; CHECK-NEXT: movsd (%rsp), %xmm0 ## 8-byte Reload ; CHECK-NEXT: ## xmm0 = mem[0],zero ; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: addsd %xmm1, %xmm0 +; CHECK-NEXT: subsd %xmm1, %xmm0 ; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 8-byte Reload ; CHECK-NEXT: ## xmm1 = mem[0],zero ; CHECK-NEXT: ucomisd %xmm0, %xmm1 diff --git a/llvm/test/CodeGen/X86/vec_ss_load_fold.ll b/llvm/test/CodeGen/X86/vec_ss_load_fold.ll --- a/llvm/test/CodeGen/X86/vec_ss_load_fold.ll +++ b/llvm/test/CodeGen/X86/vec_ss_load_fold.ll @@ -10,7 +10,7 @@ ; X32-LABEL: test1: ; X32: ## %bb.0: ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32-NEXT: addss LCPI0_0, %xmm0 +; X32-NEXT: subss LCPI0_0, %xmm0 ; X32-NEXT: mulss LCPI0_1, %xmm0 ; X32-NEXT: xorps %xmm1, %xmm1 ; X32-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] @@ -22,7 +22,7 @@ ; ; X64-LABEL: test1: ; X64: ## %bb.0: -; X64-NEXT: addss {{.*}}(%rip), %xmm0 +; X64-NEXT: subss {{.*}}(%rip), %xmm0 ; X64-NEXT: mulss {{.*}}(%rip), %xmm0 ; X64-NEXT: xorps %xmm1, %xmm1 ; X64-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] @@ -35,7 +35,7 @@ ; X32_AVX1-LABEL: test1: ; X32_AVX1: ## %bb.0: ; X32_AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32_AVX1-NEXT: vaddss LCPI0_0, %xmm0, %xmm0 +; X32_AVX1-NEXT: vsubss LCPI0_0, %xmm0, %xmm0 ; X32_AVX1-NEXT: vmulss LCPI0_1, %xmm0, %xmm0 ; X32_AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X32_AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] @@ -47,7 +47,7 @@ ; ; X64_AVX1-LABEL: test1: ; X64_AVX1: ## %bb.0: -; X64_AVX1-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0 +; X64_AVX1-NEXT: vsubss {{.*}}(%rip), %xmm0, %xmm0 ; X64_AVX1-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 ; X64_AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X64_AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] @@ -60,7 +60,7 @@ ; X32_AVX512-LABEL: test1: ; X32_AVX512: ## %bb.0: ; X32_AVX512-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32_AVX512-NEXT: vaddss LCPI0_0, %xmm0, %xmm0 +; X32_AVX512-NEXT: vsubss LCPI0_0, %xmm0, %xmm0 ; X32_AVX512-NEXT: vmulss LCPI0_1, %xmm0, %xmm0 ; X32_AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X32_AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] @@ -73,7 +73,7 @@ ; ; X64_AVX512-LABEL: test1: ; X64_AVX512: ## %bb.0: -; X64_AVX512-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0 +; X64_AVX512-NEXT: vsubss {{.*}}(%rip), %xmm0, %xmm0 ; X64_AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 ; X64_AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X64_AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] @@ -100,7 +100,7 @@ ; X32-LABEL: test2: ; X32: ## %bb.0: ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32-NEXT: addss LCPI1_0, %xmm0 +; X32-NEXT: subss LCPI1_0, %xmm0 ; X32-NEXT: mulss LCPI1_1, %xmm0 ; X32-NEXT: minss LCPI1_2, %xmm0 ; X32-NEXT: xorps %xmm1, %xmm1 @@ -111,7 +111,7 @@ ; ; X64-LABEL: test2: ; X64: ## %bb.0: -; X64-NEXT: addss {{.*}}(%rip), %xmm0 +; X64-NEXT: subss {{.*}}(%rip), %xmm0 ; X64-NEXT: mulss {{.*}}(%rip), %xmm0 ; X64-NEXT: minss {{.*}}(%rip), %xmm0 ; X64-NEXT: xorps %xmm1, %xmm1 @@ -123,7 +123,7 @@ ; X32_AVX-LABEL: test2: ; X32_AVX: ## %bb.0: ; X32_AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32_AVX-NEXT: vaddss LCPI1_0, %xmm0, %xmm0 +; X32_AVX-NEXT: vsubss LCPI1_0, %xmm0, %xmm0 ; X32_AVX-NEXT: vmulss LCPI1_1, %xmm0, %xmm0 ; X32_AVX-NEXT: vminss LCPI1_2, %xmm0, %xmm0 ; X32_AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 @@ -134,7 +134,7 @@ ; ; X64_AVX-LABEL: test2: ; X64_AVX: ## %bb.0: -; X64_AVX-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0 +; X64_AVX-NEXT: vsubss {{.*}}(%rip), %xmm0, %xmm0 ; X64_AVX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 ; X64_AVX-NEXT: vminss {{.*}}(%rip), %xmm0, %xmm0 ; X64_AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1