diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -13032,7 +13032,7 @@ // that only minsize should restrict this. bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath; const SDNodeFlags Flags = N->getFlags(); - if (!UnsafeMath && !Flags.hasAllowReciprocal()) + if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal())) return SDValue(); // Skip if current node is a reciprocal/fneg-reciprocal. @@ -13186,8 +13186,9 @@ } // Fold into a reciprocal estimate and multiply instead of a real divide. - if (SDValue RV = BuildDivEstimate(N0, N1, Flags)) - return RV; + if (Options.NoInfsFPMath || Flags.hasNoInfs()) + if (SDValue RV = BuildDivEstimate(N0, N1, Flags)) + return RV; } // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) diff --git a/llvm/test/CodeGen/AMDGPU/fdiv.ll b/llvm/test/CodeGen/AMDGPU/fdiv.ll --- a/llvm/test/CodeGen/AMDGPU/fdiv.ll +++ b/llvm/test/CodeGen/AMDGPU/fdiv.ll @@ -32,7 +32,7 @@ ; GCN: v_div_fixup_f32 v{{[0-9]+}}, [[FMAS]], define amdgpu_kernel void @fdiv_f32(float addrspace(1)* %out, float %a, float %b) #0 { entry: - %fdiv = fdiv float %a, %b + %fdiv = fdiv ninf float %a, %b store float %fdiv, float addrspace(1)* %out ret void } @@ -152,7 +152,7 @@ ; GCN: buffer_store_dword [[RESULT]] define amdgpu_kernel void @fdiv_f32_arcp_math(float addrspace(1)* %out, float %a, float %b) #0 { entry: - %fdiv = fdiv arcp float %a, %b + %fdiv = fdiv arcp ninf float %a, %b store float %fdiv, float addrspace(1)* %out ret void } @@ -210,7 +210,7 @@ ; GCN: v_rcp_f32 define amdgpu_kernel void @fdiv_v2f32_arcp_math(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 { entry: - %fdiv = fdiv arcp <2 x float> %a, %b + %fdiv = fdiv arcp ninf <2 x float> %a, %b store <2 x float> %fdiv, <2 x float> addrspace(1)* %out ret void } @@ -279,7 +279,7 @@ %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1 %a = load <4 x float>, <4 x float> addrspace(1) * %in %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr - %result = fdiv arcp <4 x float> %a, %b + %result = fdiv arcp ninf <4 x float> %a, %b store <4 x float> %result, <4 x float> addrspace(1)* %out ret void } diff --git a/llvm/test/CodeGen/PowerPC/combine-fneg.ll b/llvm/test/CodeGen/PowerPC/combine-fneg.ll --- a/llvm/test/CodeGen/PowerPC/combine-fneg.ll +++ b/llvm/test/CodeGen/PowerPC/combine-fneg.ll @@ -23,7 +23,7 @@ entry: %splat.splatinsert = insertelement <4 x double> undef, double %a0, i32 0 %splat.splat = shufflevector <4 x double> %splat.splatinsert, <4 x double> undef, <4 x i32> zeroinitializer - %div = fdiv reassoc nsz arcp <4 x double> %a1, %splat.splat + %div = fdiv reassoc nsz arcp ninf <4 x double> %a1, %splat.splat %sub = fsub reassoc nsz <4 x double> , %div ret <4 x double> %sub } diff --git a/llvm/test/CodeGen/PowerPC/fdiv.ll b/llvm/test/CodeGen/PowerPC/fdiv.ll --- a/llvm/test/CodeGen/PowerPC/fdiv.ll +++ b/llvm/test/CodeGen/PowerPC/fdiv.ll @@ -1,6 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s +define dso_local float @foo_nosw(float %0, float %1) local_unnamed_addr { +; CHECK-LABEL: foo_nosw: +; CHECK: # %bb.0: +; CHECK-NEXT: xsdivsp 1, 1, 2 +; CHECK-NEXT: blr + %3 = fdiv reassoc arcp nsz float %0, %1 + ret float %3 +} + define dso_local float @foo(float %0, float %1) local_unnamed_addr { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: @@ -10,6 +19,6 @@ ; CHECK-NEXT: xsmaddasp 0, 3, 1 ; CHECK-NEXT: fmr 1, 0 ; CHECK-NEXT: blr - %3 = fdiv reassoc arcp nsz float %0, %1 + %3 = fdiv reassoc arcp nsz ninf float %0, %1 ret float %3 } diff --git a/llvm/test/CodeGen/PowerPC/qpx-recipest.ll b/llvm/test/CodeGen/PowerPC/qpx-recipest.ll --- a/llvm/test/CodeGen/PowerPC/qpx-recipest.ll +++ b/llvm/test/CodeGen/PowerPC/qpx-recipest.ll @@ -236,7 +236,7 @@ ; CHECK-NEXT: qvfmadd 1, 0, 1, 3 ; CHECK-NEXT: blr entry: - %r = fdiv arcp reassoc nsz <4 x double> %a, %b + %r = fdiv arcp reassoc nsz ninf <4 x double> %a, %b ret <4 x double> %r } @@ -272,7 +272,7 @@ ; CHECK-NEXT: qvfmadds 1, 0, 1, 3 ; CHECK-NEXT: blr entry: - %r = fdiv arcp reassoc <4 x float> %a, %b + %r = fdiv arcp reassoc ninf <4 x float> %a, %b ret <4 x float> %r } diff --git a/llvm/test/CodeGen/PowerPC/recipest.ll b/llvm/test/CodeGen/PowerPC/recipest.ll --- a/llvm/test/CodeGen/PowerPC/recipest.ll +++ b/llvm/test/CodeGen/PowerPC/recipest.ll @@ -431,7 +431,7 @@ ; CHECK-P9-NEXT: blr %x = call reassoc arcp nsz float @llvm.sqrt.f32(float %a) %y = fmul reassoc nsz float %x, %b - %z = fdiv reassoc arcp nsz float %c, %y + %z = fdiv reassoc arcp nsz ninf float %c, %y ret float %z } @@ -602,7 +602,7 @@ ; CHECK-P9-NEXT: xsmaddadp 0, 3, 1 ; CHECK-P9-NEXT: fmr 1, 0 ; CHECK-P9-NEXT: blr - %r = fdiv reassoc arcp nsz double %a, %b + %r = fdiv reassoc arcp nsz ninf double %a, %b ret double %r } @@ -651,7 +651,7 @@ ; CHECK-P9-NEXT: xsmaddasp 0, 3, 1 ; CHECK-P9-NEXT: fmr 1, 0 ; CHECK-P9-NEXT: blr - %r = fdiv reassoc arcp nsz float %a, %b + %r = fdiv reassoc arcp nsz ninf float %a, %b ret float %r } @@ -705,7 +705,7 @@ ; CHECK-P9-NEXT: xvmaddasp 0, 1, 34 ; CHECK-P9-NEXT: xxlor 34, 0, 0 ; CHECK-P9-NEXT: blr - %r = fdiv reassoc arcp nsz <4 x float> %a, %b + %r = fdiv reassoc arcp nsz ninf <4 x float> %a, %b ret <4 x float> %r } diff --git a/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll b/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll --- a/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll +++ b/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll @@ -1,15 +1,38 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-- < %s | FileCheck %s -define <4 x float> @repeated_fp_divisor(float %a, <4 x float> %b) { -; CHECK-LABEL: repeated_fp_divisor: +; Check if this causes infinite loop when estimation disabled +define <4 x float> @repeated_fp_divisor_noest(float %a, <4 x float> %b) { +; CHECK-LABEL: repeated_fp_divisor_noest: ; CHECK: # %bb.0: ; CHECK-NEXT: xscvdpspn 0, 1 +; CHECK-NEXT: addis 3, 2, .LCPI0_1@toc@ha +; CHECK-NEXT: addi 3, 3, .LCPI0_1@toc@l +; CHECK-NEXT: lvx 3, 0, 3 ; CHECK-NEXT: addis 3, 2, .LCPI0_0@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI0_0@toc@l +; CHECK-NEXT: xxspltw 0, 0, 0 +; CHECK-NEXT: xvdivsp 0, 35, 0 ; CHECK-NEXT: lvx 3, 0, 3 -; CHECK-NEXT: addis 3, 2, .LCPI0_1@toc@ha -; CHECK-NEXT: addi 3, 3, .LCPI0_1@toc@l +; CHECK-NEXT: xvmulsp 1, 34, 35 +; CHECK-NEXT: xvmulsp 34, 1, 0 +; CHECK-NEXT: blr + %ins = insertelement <4 x float> undef, float %a, i32 0 + %splat = shufflevector <4 x float> %ins, <4 x float> undef, <4 x i32> zeroinitializer + %t1 = fmul reassoc <4 x float> %b, + %mul = fdiv reassoc arcp nsz <4 x float> %t1, %splat + ret <4 x float> %mul +} + +define <4 x float> @repeated_fp_divisor(float %a, <4 x float> %b) { +; CHECK-LABEL: repeated_fp_divisor: +; CHECK: # %bb.0: +; CHECK-NEXT: xscvdpspn 0, 1 +; CHECK-NEXT: addis 3, 2, .LCPI1_0@toc@ha +; CHECK-NEXT: addi 3, 3, .LCPI1_0@toc@l +; CHECK-NEXT: lvx 3, 0, 3 +; CHECK-NEXT: addis 3, 2, .LCPI1_1@toc@ha +; CHECK-NEXT: addi 3, 3, .LCPI1_1@toc@l ; CHECK-NEXT: lvx 4, 0, 3 ; CHECK-NEXT: xxspltw 0, 0, 0 ; CHECK-NEXT: xvresp 1, 0 @@ -21,7 +44,7 @@ %ins = insertelement <4 x float> undef, float %a, i32 0 %splat = shufflevector <4 x float> %ins, <4 x float> undef, <4 x i32> zeroinitializer %t1 = fmul reassoc <4 x float> %b, - %mul = fdiv reassoc arcp nsz <4 x float> %t1, %splat + %mul = fdiv reassoc arcp nsz ninf <4 x float> %t1, %splat ret <4 x float> %mul } diff --git a/llvm/test/CodeGen/PowerPC/vsx-recip-est.ll b/llvm/test/CodeGen/PowerPC/vsx-recip-est.ll --- a/llvm/test/CodeGen/PowerPC/vsx-recip-est.ll +++ b/llvm/test/CodeGen/PowerPC/vsx-recip-est.ll @@ -10,7 +10,7 @@ entry: %0 = load float, float* @a, align 4 %1 = load float, float* @b, align 4 - %div = fdiv arcp float %0, %1 + %div = fdiv arcp ninf float %0, %1 ret float %div ; CHECK-LABEL: @emit_xsresp ; CHECK: xsresp {{[0-9]+}} @@ -38,7 +38,7 @@ entry: %0 = load double, double* @c, align 8 %1 = load double, double* @d, align 8 - %div = fdiv arcp double %0, %1 + %div = fdiv arcp ninf double %0, %1 ret double %div ; CHECK-LABEL: @emit_xsredp ; CHECK: xsredp {{[0-9]+}} diff --git a/llvm/test/CodeGen/X86/fdiv-combine-vec.ll b/llvm/test/CodeGen/X86/fdiv-combine-vec.ll --- a/llvm/test/CodeGen/X86/fdiv-combine-vec.ll +++ b/llvm/test/CodeGen/X86/fdiv-combine-vec.ll @@ -120,7 +120,7 @@ ; AVX-NEXT: retq %vy = insertelement <4 x float> undef, float %y, i32 0 %splaty = shufflevector <4 x float> %vy, <4 x float> undef, <4 x i32> zeroinitializer - %r = fdiv arcp reassoc <4 x float> %x, %splaty + %r = fdiv arcp reassoc ninf <4 x float> %x, %splaty ret <4 x float> %r }