diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -47045,6 +47045,8 @@ EVT VT = Op.getValueType(); EVT SVT = VT.getScalarType(); unsigned Opc = Op.getOpcode(); + SDNodeFlags Flags = Op.getNode()->getFlags(); + const TargetOptions &Options = getTargetMachine().Options; switch (Opc) { case ISD::FMA: case X86ISD::FMSUB: @@ -47059,6 +47061,11 @@ !isOperationLegal(ISD::FMA, VT)) break; + // Don't fold (fneg (fma (fneg x), y, (fneg z))) to (fma x, y, z) + // if it may have signed zeros. + if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath) + break; + // This is always negatible for free but we might be able to remove some // extra operand negations as well. SmallVector NewOps(Op.getNumOperands(), SDValue()); diff --git a/llvm/test/CodeGen/X86/avx2-fma-fneg-combine.ll b/llvm/test/CodeGen/X86/avx2-fma-fneg-combine.ll --- a/llvm/test/CodeGen/X86/avx2-fma-fneg-combine.ll +++ b/llvm/test/CodeGen/X86/avx2-fma-fneg-combine.ll @@ -20,7 +20,7 @@ ; X64-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 ; X64-NEXT: retq %sub.i = fsub <8 x float> , %c - %r = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %sub.i) #2 + %r = tail call nsz <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %sub.i) #2 ret <8 x float> %r } @@ -34,7 +34,7 @@ ; X64: # %bb.0: ; X64-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 ; X64-NEXT: retq - %t0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #2 + %t0 = tail call nsz <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #2 %sub.i = fsub <4 x float> , %t0 ret <4 x float> %sub.i } @@ -57,7 +57,7 @@ %b0 = extractelement <4 x float> %b, i64 0 %c0 = extractelement <4 x float> %c, i64 0 %negb0 = fneg float %b0 - %t0 = tail call float @llvm.fma.f32(float %a0, float %negb0, float %c0) #2 + %t0 = tail call nsz float @llvm.fma.f32(float %a0, float %negb0, float %c0) #2 %i = insertelement <4 x float> %a, float %t0, i64 0 %sub.i = fsub <4 x float> , %i ret <4 x float> %sub.i @@ -74,7 +74,7 @@ ; X64-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 ; X64-NEXT: retq %negc = fneg <8 x float> %c - %t0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %negc) #2 + %t0 = tail call nsz <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %negc) #2 %sub.i = fsub <8 x float> , %t0 ret <8 x float> %sub.i } @@ -91,7 +91,7 @@ ; X64-NEXT: retq %sub.c = fsub <8 x float> , %c %negsubc = fneg <8 x float> %sub.c - %t0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %negsubc) #2 + %t0 = tail call nsz <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %negsubc) #2 ret <8 x float> %t0 } @@ -105,7 +105,7 @@ ; X64: # %bb.0: ; X64-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 ; X64-NEXT: retq - %t0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) #2 + %t0 = tail call nsz <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) #2 %sub.i = fsub <2 x double> , %t0 ret <2 x double> %sub.i } @@ -125,7 +125,7 @@ %t0 = insertelement <8 x float> undef, float %a, i32 0 %t1 = fsub <8 x float> , %t0 %t2 = shufflevector <8 x float> %t1, <8 x float> undef, <8 x i32> zeroinitializer - %t3 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %t2, <8 x float> %b, <8 x float> %c) + %t3 = tail call nsz <8 x float> @llvm.fma.v8f32(<8 x float> %t2, <8 x float> %b, <8 x float> %c) ret <8 x float> %t3 } @@ -145,6 +145,6 @@ %t0 = fsub float -0.0, %a %t1 = insertelement <8 x float> undef, float %t0, i32 0 %t2 = shufflevector <8 x float> %t1, <8 x float> undef, <8 x i32> zeroinitializer - %t3 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %t2, <8 x float> %b, <8 x float> %c) + %t3 = tail call nsz <8 x float> @llvm.fma.v8f32(<8 x float> %t2, <8 x float> %b, <8 x float> %c) ret <8 x float> %t3 } diff --git a/llvm/test/CodeGen/X86/fma-fneg-combine.ll b/llvm/test/CodeGen/X86/fma-fneg-combine.ll --- a/llvm/test/CodeGen/X86/fma-fneg-combine.ll +++ b/llvm/test/CodeGen/X86/fma-fneg-combine.ll @@ -29,10 +29,17 @@ } define <16 x float> @test2(<16 x float> %a, <16 x float> %b, <16 x float> %c) { -; CHECK-LABEL: test2: -; CHECK: # %bb.0: -; CHECK-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2 -; CHECK-NEXT: retq +; SKX-LABEL: test2: +; SKX: # %bb.0: +; SKX-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2 +; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; SKX-NEXT: retq +; +; KNL-LABEL: test2: +; KNL: # %bb.0: +; KNL-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2 +; KNL-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; KNL-NEXT: retq %fma = call <16 x float> @llvm.fma.v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c) %neg = fneg <16 x float> %fma ret <16 x float> %neg @@ -49,10 +56,17 @@ } define <16 x float> @test3(<16 x float> %a, <16 x float> %b, <16 x float> %c) { -; CHECK-LABEL: test3: -; CHECK: # %bb.0: -; CHECK-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2 -; CHECK-NEXT: retq +; SKX-LABEL: test3: +; SKX: # %bb.0: +; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2 +; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; SKX-NEXT: retq +; +; KNL-LABEL: test3: +; KNL: # %bb.0: +; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2 +; KNL-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; KNL-NEXT: retq %t0 = fneg <16 x float> %b %t1 = call <16 x float> @llvm.fma.v16f32(<16 x float> %a, <16 x float> %t0, <16 x float> %c) %sub.i = fneg <16 x float> %t1 @@ -71,10 +85,17 @@ } define <16 x float> @test4(<16 x float> %a, <16 x float> %b, <16 x float> %c) { -; CHECK-LABEL: test4: -; CHECK: # %bb.0: -; CHECK-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2 -; CHECK-NEXT: retq +; SKX-LABEL: test4: +; SKX: # %bb.0: +; SKX-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2 +; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; SKX-NEXT: retq +; +; KNL-LABEL: test4: +; KNL: # %bb.0: +; KNL-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2 +; KNL-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; KNL-NEXT: retq %t0 = fneg <16 x float> %b %t1 = fneg <16 x float> %c %t2 = call <16 x float> @llvm.fma.v16f32(<16 x float> %a, <16 x float> %t0, <16 x float> %t1) @@ -106,10 +127,17 @@ } define <16 x float> @test6(<16 x float> %a, <16 x float> %b, <16 x float> %c) { -; CHECK-LABEL: test6: -; CHECK: # %bb.0: -; CHECK-NEXT: vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0 -; CHECK-NEXT: retq +; SKX-LABEL: test6: +; SKX: # %bb.0: +; SKX-NEXT: vfnmsub213ps {ru-sae}, %zmm2, %zmm1, %zmm0 +; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; SKX-NEXT: retq +; +; KNL-LABEL: test6: +; KNL: # %bb.0: +; KNL-NEXT: vfnmsub213ps {ru-sae}, %zmm2, %zmm1, %zmm0 +; KNL-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; KNL-NEXT: retq %t0 = fneg <16 x float> %b %t1 = fneg <16 x float> %c %t2 = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a, <16 x float> %t0, <16 x float> %t1, i32 10) @@ -130,10 +158,18 @@ } define <8 x float> @test7(<8 x float> %a, <8 x float> %b, <8 x float> %c) { -; CHECK-LABEL: test7: -; CHECK: # %bb.0: -; CHECK-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 -; CHECK-NEXT: retq +; SKX-LABEL: test7: +; SKX: # %bb.0: +; SKX-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 +; SKX-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0 +; SKX-NEXT: retq +; +; KNL-LABEL: test7: +; KNL: # %bb.0: +; KNL-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 +; KNL-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] +; KNL-NEXT: vxorps %ymm1, %ymm0, %ymm0 +; KNL-NEXT: retq %t0 = fneg <8 x float> %c %t1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %t0) %sub.i = fsub <8 x float> , %t1 @@ -163,10 +199,17 @@ } define <8 x double> @test9(<8 x double> %a, <8 x double> %b, <8 x double> %c) { -; CHECK-LABEL: test9: -; CHECK: # %bb.0: -; CHECK-NEXT: vfnmsub213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2 -; CHECK-NEXT: retq +; SKX-LABEL: test9: +; SKX: # %bb.0: +; SKX-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2 +; SKX-NEXT: vxorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; SKX-NEXT: retq +; +; KNL-LABEL: test9: +; KNL: # %bb.0: +; KNL-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2 +; KNL-NEXT: vpxorq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; KNL-NEXT: retq %t0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i32 4) %sub.i = fneg <8 x double> %t0 ret <8 x double> %sub.i diff --git a/llvm/test/CodeGen/X86/fma-signed-zero.ll b/llvm/test/CodeGen/X86/fma-signed-zero.ll --- a/llvm/test/CodeGen/X86/fma-signed-zero.ll +++ b/llvm/test/CodeGen/X86/fma-signed-zero.ll @@ -12,7 +12,8 @@ define float @fneg_fma32(float %x, float %y, float %z) { ; NO-NSZ-OPTION-LABEL: fneg_fma32: ; NO-NSZ-OPTION: # %bb.0: -; NO-NSZ-OPTION-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 +; NO-NSZ-OPTION-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 +; NO-NSZ-OPTION-NEXT: vxorps {{.*}}(%rip), %xmm0, %xmm0 ; NO-NSZ-OPTION-NEXT: retq ; ; NSZ-OPTION-LABEL: fneg_fma32: @@ -48,7 +49,8 @@ define double @fneg_fma64(double %x, double %y, double %z) { ; NO-NSZ-OPTION-LABEL: fneg_fma64: ; NO-NSZ-OPTION: # %bb.0: -; NO-NSZ-OPTION-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 +; NO-NSZ-OPTION-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 +; NO-NSZ-OPTION-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0 ; NO-NSZ-OPTION-NEXT: retq ; ; NSZ-OPTION-LABEL: fneg_fma64: diff --git a/llvm/test/CodeGen/X86/fma_patterns.ll b/llvm/test/CodeGen/X86/fma_patterns.ll --- a/llvm/test/CodeGen/X86/fma_patterns.ll +++ b/llvm/test/CodeGen/X86/fma_patterns.ll @@ -1308,10 +1308,10 @@ ; AVX512-NOINFS-NEXT: vfmsub213ss {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 ; AVX512-NOINFS-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 ; AVX512-NOINFS-NEXT: retq - %t1 = fsub float 1.0, %t - %tx = fmul float %x, %t - %ty = fmul float %y, %t1 - %r = fadd float %tx, %ty + %t1 = fsub nsz float 1.0, %t + %tx = fmul nsz float %x, %t + %ty = fmul nsz float %y, %t1 + %r = fadd nsz float %tx, %ty ret float %r } @@ -1357,10 +1357,10 @@ ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 ; AVX512-NOINFS-NEXT: retq - %t1 = fsub <4 x float> , %t - %tx = fmul <4 x float> %x, %t - %ty = fmul <4 x float> %y, %t1 - %r = fadd <4 x float> %tx, %ty + %t1 = fsub nsz <4 x float> , %t + %tx = fmul nsz <4 x float> %x, %t + %ty = fmul nsz <4 x float> %y, %t1 + %r = fadd nsz <4 x float> %tx, %ty ret <4 x float> %r } @@ -1406,10 +1406,10 @@ ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1 ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm1 ; AVX512-NOINFS-NEXT: retq - %t1 = fsub <8 x float> , %t - %tx = fmul <8 x float> %x, %t - %ty = fmul <8 x float> %y, %t1 - %r = fadd <8 x float> %tx, %ty + %t1 = fsub nsz <8 x float> , %t + %tx = fmul nsz <8 x float> %x, %t + %ty = fmul nsz <8 x float> %y, %t1 + %r = fadd nsz <8 x float> %tx, %ty ret <8 x float> %r } @@ -1455,10 +1455,10 @@ ; AVX512-NOINFS-NEXT: vfmsub213sd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 ; AVX512-NOINFS-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 ; AVX512-NOINFS-NEXT: retq - %t1 = fsub double 1.0, %t - %tx = fmul double %x, %t - %ty = fmul double %y, %t1 - %r = fadd double %tx, %ty + %t1 = fsub nsz double 1.0, %t + %tx = fmul nsz double %x, %t + %ty = fmul nsz double %y, %t1 + %r = fadd nsz double %tx, %ty ret double %r } @@ -1504,10 +1504,10 @@ ; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 ; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 ; AVX512-NOINFS-NEXT: retq - %t1 = fsub <2 x double> , %t - %tx = fmul <2 x double> %x, %t - %ty = fmul <2 x double> %y, %t1 - %r = fadd <2 x double> %tx, %ty + %t1 = fsub nsz <2 x double> , %t + %tx = fmul nsz <2 x double> %x, %t + %ty = fmul nsz <2 x double> %y, %t1 + %r = fadd nsz <2 x double> %tx, %ty ret <2 x double> %r } @@ -1553,10 +1553,10 @@ ; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1 ; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm1 ; AVX512-NOINFS-NEXT: retq - %t1 = fsub <4 x double> , %t - %tx = fmul <4 x double> %x, %t - %ty = fmul <4 x double> %y, %t1 - %r = fadd <4 x double> %tx, %ty + %t1 = fsub nsz <4 x double> , %t + %tx = fmul nsz <4 x double> %x, %t + %ty = fmul nsz <4 x double> %y, %t1 + %r = fadd nsz <4 x double> %tx, %ty ret <4 x double> %r } @@ -1579,9 +1579,9 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 ; AVX512-NEXT: retq - %mul = fmul <4 x float> %a0, %a1 - %add = fadd <4 x float> %mul, %a2 - %neg = fsub <4 x float> , %add + %mul = fmul nsz <4 x float> %a0, %a1 + %add = fadd nsz <4 x float> %mul, %a2 + %neg = fsub nsz <4 x float> , %add ret <4 x float> %neg } @@ -1600,9 +1600,9 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 ; AVX512-NEXT: retq - %mul = fmul <4 x double> %a0, %a1 - %sub = fsub <4 x double> %mul, %a2 - %neg = fsub <4 x double> , %sub + %mul = fmul nsz <4 x double> %a0, %a1 + %sub = fsub nsz <4 x double> %mul, %a2 + %neg = fsub nsz <4 x double> , %sub ret <4 x double> %neg } @@ -1621,10 +1621,10 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 ; AVX512-NEXT: retq - %mul = fmul <4 x float> %a0, %a1 - %neg0 = fsub <4 x float> , %mul - %add = fadd <4 x float> %neg0, %a2 - %neg1 = fsub <4 x float> , %add + %mul = fmul nsz <4 x float> %a0, %a1 + %neg0 = fsub nsz <4 x float> , %mul + %add = fadd nsz <4 x float> %neg0, %a2 + %neg1 = fsub nsz <4 x float> , %add ret <4 x float> %neg1 } @@ -1643,10 +1643,10 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 ; AVX512-NEXT: retq - %mul = fmul <4 x double> %a0, %a1 - %neg0 = fsub <4 x double> , %mul - %sub = fsub <4 x double> %neg0, %a2 - %neg1 = fsub <4 x double> , %sub + %mul = fmul nsz <4 x double> %a0, %a1 + %neg0 = fsub nsz <4 x double> , %mul + %sub = fsub nsz <4 x double> %neg0, %a2 + %neg1 = fsub nsz <4 x double> , %sub ret <4 x double> %neg1 } diff --git a/llvm/test/CodeGen/X86/fma_patterns_wide.ll b/llvm/test/CodeGen/X86/fma_patterns_wide.ll --- a/llvm/test/CodeGen/X86/fma_patterns_wide.ll +++ b/llvm/test/CodeGen/X86/fma_patterns_wide.ll @@ -868,10 +868,10 @@ ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} zmm1 = (zmm2 * zmm1) - zmm1 ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm2 * zmm0) - zmm1 ; AVX512-NOINFS-NEXT: retq - %t1 = fsub <16 x float> , %t - %tx = fmul <16 x float> %x, %t - %ty = fmul <16 x float> %y, %t1 - %r = fadd <16 x float> %tx, %ty + %t1 = fsub nsz <16 x float> , %t + %tx = fmul nsz <16 x float> %x, %t + %ty = fmul nsz <16 x float> %y, %t1 + %r = fadd nsz <16 x float> %tx, %ty ret <16 x float> %r } @@ -927,10 +927,10 @@ ; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} zmm1 = (zmm2 * zmm1) - zmm1 ; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} zmm0 = (zmm2 * zmm0) - zmm1 ; AVX512-NOINFS-NEXT: retq - %t1 = fsub <8 x double> , %t - %tx = fmul <8 x double> %x, %t - %ty = fmul <8 x double> %y, %t1 - %r = fadd <8 x double> %tx, %ty + %t1 = fsub nsz <8 x double> , %t + %tx = fmul nsz <8 x double> %x, %t + %ty = fmul nsz <8 x double> %y, %t1 + %r = fadd nsz <8 x double> %tx, %ty ret <8 x double> %r } @@ -955,9 +955,9 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2 ; AVX512-NEXT: retq - %mul = fmul <16 x float> %a0, %a1 - %add = fadd <16 x float> %mul, %a2 - %neg = fsub <16 x float> , %add + %mul = fmul nsz <16 x float> %a0, %a1 + %add = fadd nsz <16 x float> %mul, %a2 + %neg = fsub nsz <16 x float> , %add ret <16 x float> %neg } @@ -978,9 +978,9 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfnmadd213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2 ; AVX512-NEXT: retq - %mul = fmul <8 x double> %a0, %a1 - %sub = fsub <8 x double> %mul, %a2 - %neg = fsub <8 x double> , %sub + %mul = fmul nsz <8 x double> %a0, %a1 + %sub = fsub nsz <8 x double> %mul, %a2 + %neg = fsub nsz <8 x double> , %sub ret <8 x double> %neg } @@ -1001,10 +1001,10 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2 ; AVX512-NEXT: retq - %mul = fmul <16 x float> %a0, %a1 - %neg0 = fsub <16 x float> , %mul - %add = fadd <16 x float> %neg0, %a2 - %neg1 = fsub <16 x float> , %add + %mul = fmul nsz <16 x float> %a0, %a1 + %neg0 = fsub nsz <16 x float> , %mul + %add = fadd nsz <16 x float> %neg0, %a2 + %neg1 = fsub nsz <16 x float> , %add ret <16 x float> %neg1 } @@ -1025,10 +1025,10 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2 ; AVX512-NEXT: retq - %mul = fmul <8 x double> %a0, %a1 - %neg0 = fsub <8 x double> , %mul - %sub = fsub <8 x double> %neg0, %a2 - %neg1 = fsub <8 x double> , %sub + %mul = fmul nsz <8 x double> %a0, %a1 + %neg0 = fsub nsz <8 x double> , %mul + %sub = fsub nsz <8 x double> %neg0, %a2 + %neg1 = fsub nsz <8 x double> , %sub ret <8 x double> %neg1 }