Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -798,6 +798,22 @@ TLI.isFPImmLegal(neg(cast(Op)->getValueAPF()), VT, ForCodeSize); } + case ISD::BUILD_VECTOR: { + // Only permit BUILD_VECTOR of constants. + // TODO: should we allow UNDEFS? + if (llvm::any_of(Op->op_values(), + [&](SDValue N) { return !isa(N); })) + return 0; + if (!LegalOperations) + return 1; + if (TLI.isOperationLegal(ISD::ConstantFP, VT) && + TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) + return 1; + return llvm::all_of(Op->op_values(), [&](SDValue N) { + return TLI.isFPImmLegal(neg(cast(N)->getValueAPF()), VT, + ForCodeSize); + }); + } case ISD::FADD: if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros()) return 0; @@ -858,6 +874,16 @@ V.changeSign(); return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType()); } + case ISD::BUILD_VECTOR: { + SmallVector Ops; + for (int i = 0, e = Op.getNumOperands(); i != e; ++i) { + SDValue C = Op.getOperand(i); + APFloat V = cast(C)->getValueAPF(); + V.changeSign(); + Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType())); + } + return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops); + } case ISD::FADD: assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros()); @@ -878,7 +904,8 @@ Op.getOperand(0), Flags); case ISD::FSUB: // fold (fneg (fsub 0, B)) -> B - if (auto *N0CFP = dyn_cast(Op.getOperand(0))) + // TODO: should we allow UNDEFS? + if (ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(Op.getOperand(0))) if (N0CFP->isZero()) return Op.getOperand(1); Index: test/CodeGen/AMDGPU/fneg-fabs.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fneg-fabs.f16.ll +++ test/CodeGen/AMDGPU/fneg-fabs.f16.ll @@ -110,18 +110,17 @@ ; GCN-LABEL: {{^}}fold_user_fneg_fabs_v2f16: ; CI: s_load_dword [[IN:s[0-9]+]] -; CI: s_or_b32 [[FNEG_FABS:s[0-9]+]], [[IN]], 0x80008000 ; CI: s_lshr_b32 -; CI: v_cvt_f32_f16_e32 v{{[0-9]+}}, s{{[0-9]+}} -; CI: v_cvt_f32_f16_e32 v{{[0-9]+}}, s{{[0-9]+}} -; CI: v_mul_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}} -; CI: v_mul_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}} +; CI: v_cvt_f32_f16_e64 v{{[0-9]+}}, |s{{[0-9]+}}| +; CI: v_cvt_f32_f16_e64 v{{[0-9]+}}, |s{{[0-9]+}}| +; CI: v_mul_f32_e32 v{{[0-9]+}}, -4.0, v{{[0-9]+}} +; CI: v_mul_f32_e32 v{{[0-9]+}}, -4.0, v{{[0-9]+}} -; VI: v_mul_f16_e64 v{{[0-9]+}}, -|s{{[0-9]+}}|, 4.0 -; VI: v_mul_f16_sdwa v{{[0-9]+}}, -|v{{[0-9]+}}|, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI: v_mul_f16_e64 v{{[0-9]+}}, |s{{[0-9]+}}|, -4.0 +; VI: v_mul_f16_sdwa v{{[0-9]+}}, |v{{[0-9]+}}|, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX9: s_and_b32 [[ABS:s[0-9]+]], s{{[0-9]+}}, 0x7fff7fff -; GFX9: v_pk_mul_f16 v{{[0-9]+}}, [[ABS]], 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] +; GFX9: v_pk_mul_f16 v{{[0-9]+}}, [[ABS]], -4.0 op_sel_hi:[1,0] define amdgpu_kernel void @fold_user_fneg_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %in) #0 { %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in) %fneg.fabs = fsub <2 x half> , %fabs @@ -147,7 +146,7 @@ ; GCN-LABEL: {{^}}s_fneg_multi_use_fabs_foldable_neg_v2f16: ; GFX9: s_and_b32 [[ABS:s[0-9]+]], s{{[0-9]+}}, 0x7fff7fff -; GFX9: v_pk_mul_f16 v{{[0-9]+}}, [[ABS]], 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] +; GFX9: v_pk_mul_f16 v{{[0-9]+}}, [[ABS]], -4.0 op_sel_hi:[1,0] define amdgpu_kernel void @s_fneg_multi_use_fabs_foldable_neg_v2f16(<2 x half> addrspace(1)* %out0, <2 x half> addrspace(1)* %out1, <2 x half> %in) { %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in) %fneg = fsub <2 x half> , %fabs Index: test/CodeGen/X86/fma_patterns.ll =================================================================== --- test/CodeGen/X86/fma_patterns.ll +++ test/CodeGen/X86/fma_patterns.ll @@ -1037,19 +1037,19 @@ define <4 x float> @test_v4f32_mul_sub_x_one_y(<4 x float> %x, <4 x float> %y) { ; FMA-INFS-LABEL: test_v4f32_mul_sub_x_one_y: ; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 +; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 ; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 ; FMA-INFS-NEXT: retq ; ; FMA4-INFS-LABEL: test_v4f32_mul_sub_x_one_y: ; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 +; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 ; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 ; FMA4-INFS-NEXT: retq ; ; AVX512-INFS-LABEL: test_v4f32_mul_sub_x_one_y: ; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0 ; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 ; AVX512-INFS-NEXT: retq ; @@ -1075,19 +1075,19 @@ define <4 x float> @test_v4f32_mul_y_sub_x_one(<4 x float> %x, <4 x float> %y) { ; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_one: ; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 +; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 ; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 ; FMA-INFS-NEXT: retq ; ; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_one: ; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 +; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 ; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 ; FMA4-INFS-NEXT: retq ; ; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_one: ; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0 ; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 ; AVX512-INFS-NEXT: retq ; @@ -1151,19 +1151,19 @@ define <4 x float> @test_v4f32_mul_sub_x_negone_y(<4 x float> %x, <4 x float> %y) { ; FMA-INFS-LABEL: test_v4f32_mul_sub_x_negone_y: ; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 +; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 ; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 ; FMA-INFS-NEXT: retq ; ; FMA4-INFS-LABEL: test_v4f32_mul_sub_x_negone_y: ; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 +; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 ; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 ; FMA4-INFS-NEXT: retq ; ; AVX512-INFS-LABEL: test_v4f32_mul_sub_x_negone_y: ; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0 ; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 ; AVX512-INFS-NEXT: retq ; @@ -1189,19 +1189,19 @@ define <4 x float> @test_v4f32_mul_y_sub_x_negone(<4 x float> %x, <4 x float> %y) { ; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_negone: ; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 +; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 ; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 ; FMA-INFS-NEXT: retq ; ; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_negone: ; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 +; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 ; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 ; FMA4-INFS-NEXT: retq ; ; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_negone: ; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0 ; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 ; AVX512-INFS-NEXT: retq ; Index: test/CodeGen/X86/fma_patterns_wide.ll =================================================================== --- test/CodeGen/X86/fma_patterns_wide.ll +++ test/CodeGen/X86/fma_patterns_wide.ll @@ -631,25 +631,25 @@ define <16 x float> @test_v16f32_mul_sub_x_one_y(<16 x float> %x, <16 x float> %y) { ; FMA-INFS-LABEL: test_v16f32_mul_sub_x_one_y: ; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; FMA-INFS-NEXT: vsubps %ymm4, %ymm1, %ymm1 -; FMA-INFS-NEXT: vsubps %ymm4, %ymm0, %ymm0 +; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; FMA-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1 +; FMA-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0 ; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 ; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 ; FMA-INFS-NEXT: retq ; ; FMA4-INFS-LABEL: test_v16f32_mul_sub_x_one_y: ; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; FMA4-INFS-NEXT: vsubps %ymm4, %ymm1, %ymm1 -; FMA4-INFS-NEXT: vsubps %ymm4, %ymm0, %ymm0 +; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; FMA4-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1 +; FMA4-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0 ; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 ; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 ; FMA4-INFS-NEXT: retq ; ; AVX512-INFS-LABEL: test_v16f32_mul_sub_x_one_y: ; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 ; AVX512-INFS-NEXT: vmulps %zmm1, %zmm0, %zmm0 ; AVX512-INFS-NEXT: retq ; @@ -677,25 +677,25 @@ define <8 x double> @test_v8f64_mul_y_sub_x_one(<8 x double> %x, <8 x double> %y) { ; FMA-INFS-LABEL: test_v8f64_mul_y_sub_x_one: ; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; FMA-INFS-NEXT: vsubpd %ymm4, %ymm1, %ymm1 -; FMA-INFS-NEXT: vsubpd %ymm4, %ymm0, %ymm0 +; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; FMA-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1 +; FMA-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0 ; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 ; FMA-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 ; FMA-INFS-NEXT: retq ; ; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_x_one: ; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; FMA4-INFS-NEXT: vsubpd %ymm4, %ymm1, %ymm1 -; FMA4-INFS-NEXT: vsubpd %ymm4, %ymm0, %ymm0 +; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1 +; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0 ; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 ; FMA4-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 ; FMA4-INFS-NEXT: retq ; ; AVX512-INFS-LABEL: test_v8f64_mul_y_sub_x_one: ; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vsubpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512-INFS-NEXT: vaddpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 ; AVX512-INFS-NEXT: vmulpd %zmm0, %zmm1, %zmm0 ; AVX512-INFS-NEXT: retq ; @@ -723,25 +723,25 @@ define <16 x float> @test_v16f32_mul_sub_x_negone_y(<16 x float> %x, <16 x float> %y) { ; FMA-INFS-LABEL: test_v16f32_mul_sub_x_negone_y: ; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] -; FMA-INFS-NEXT: vsubps %ymm4, %ymm1, %ymm1 -; FMA-INFS-NEXT: vsubps %ymm4, %ymm0, %ymm0 +; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1 +; FMA-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0 ; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 ; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 ; FMA-INFS-NEXT: retq ; ; FMA4-INFS-LABEL: test_v16f32_mul_sub_x_negone_y: ; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] -; FMA4-INFS-NEXT: vsubps %ymm4, %ymm1, %ymm1 -; FMA4-INFS-NEXT: vsubps %ymm4, %ymm0, %ymm0 +; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA4-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1 +; FMA4-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0 ; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 ; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 ; FMA4-INFS-NEXT: retq ; ; AVX512-INFS-LABEL: test_v16f32_mul_sub_x_negone_y: ; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 ; AVX512-INFS-NEXT: vmulps %zmm1, %zmm0, %zmm0 ; AVX512-INFS-NEXT: retq ; @@ -769,25 +769,25 @@ define <8 x double> @test_v8f64_mul_y_sub_x_negone(<8 x double> %x, <8 x double> %y) { ; FMA-INFS-LABEL: test_v8f64_mul_y_sub_x_negone: ; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] -; FMA-INFS-NEXT: vsubpd %ymm4, %ymm1, %ymm1 -; FMA-INFS-NEXT: vsubpd %ymm4, %ymm0, %ymm0 +; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1 +; FMA-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0 ; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 ; FMA-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 ; FMA-INFS-NEXT: retq ; ; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_x_negone: ; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] -; FMA4-INFS-NEXT: vsubpd %ymm4, %ymm1, %ymm1 -; FMA4-INFS-NEXT: vsubpd %ymm4, %ymm0, %ymm0 +; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1 +; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0 ; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 ; FMA4-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 ; FMA4-INFS-NEXT: retq ; ; AVX512-INFS-LABEL: test_v8f64_mul_y_sub_x_negone: ; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vsubpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512-INFS-NEXT: vaddpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 ; AVX512-INFS-NEXT: vmulpd %zmm0, %zmm1, %zmm0 ; AVX512-INFS-NEXT: retq ; Index: test/CodeGen/X86/fp-fold.ll =================================================================== --- test/CodeGen/X86/fp-fold.ll +++ test/CodeGen/X86/fp-fold.ll @@ -103,7 +103,6 @@ ; ANY-LABEL: fsub_neg_y_vector: ; ANY: # %bb.0: ; ANY-NEXT: mulps {{.*}}(%rip), %xmm0 -; ANY-NEXT: xorps {{.*}}(%rip), %xmm0 ; ANY-NEXT: retq %mul = fmul <4 x float> %x, %add = fadd <4 x float> %mul, %y @@ -115,7 +114,6 @@ ; ANY-LABEL: fsub_neg_y_vector_nonuniform: ; ANY: # %bb.0: ; ANY-NEXT: mulps {{.*}}(%rip), %xmm0 -; ANY-NEXT: xorps {{.*}}(%rip), %xmm0 ; ANY-NEXT: retq %mul = fmul <4 x float> %x, %add = fadd <4 x float> %mul, %y @@ -138,7 +136,6 @@ ; ANY-LABEL: fsub_neg_y_commute_vector: ; ANY: # %bb.0: ; ANY-NEXT: mulps {{.*}}(%rip), %xmm0 -; ANY-NEXT: xorps {{.*}}(%rip), %xmm0 ; ANY-NEXT: retq %mul = fmul <4 x float> %x, %add = fadd <4 x float> %y, %mul @@ -212,7 +209,8 @@ define <4 x float> @fsub_negzero_vector(<4 x float> %x) { ; STRICT-LABEL: fsub_negzero_vector: ; STRICT: # %bb.0: -; STRICT-NEXT: subps {{.*}}(%rip), %xmm0 +; STRICT-NEXT: xorps %xmm1, %xmm1 +; STRICT-NEXT: addps %xmm1, %xmm0 ; STRICT-NEXT: retq ; ; UNSAFE-LABEL: fsub_negzero_vector: