Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -13353,6 +13353,12 @@ return RV; } + // Fold X/Sqrt(X) -> Sqrt(X) + if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) && + (Options.UnsafeFPMath || Flags.hasAllowReassociation())) + if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0)) + return N1; + // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) TargetLowering::NegatibleCost CostN0 = TargetLowering::NegatibleCost::Expensive; Index: llvm/test/CodeGen/AArch64/sqrt-fastmath.ll =================================================================== --- llvm/test/CodeGen/AArch64/sqrt-fastmath.ll +++ llvm/test/CodeGen/AArch64/sqrt-fastmath.ll @@ -448,8 +448,7 @@ define double @sqrt_fdiv_common_operand(double %x) nounwind { ; FAULT-LABEL: sqrt_fdiv_common_operand: ; FAULT: // %bb.0: -; FAULT-NEXT: fsqrt d1, d0 -; FAULT-NEXT: fdiv d0, d0, d1 +; FAULT-NEXT: fsqrt d0, d0 ; FAULT-NEXT: ret ; ; CHECK-LABEL: sqrt_fdiv_common_operand: @@ -474,8 +473,7 @@ define <2 x double> @sqrt_fdiv_common_operand_vec(<2 x double> %x) nounwind { ; FAULT-LABEL: sqrt_fdiv_common_operand_vec: ; FAULT: // %bb.0: -; FAULT-NEXT: fsqrt v1.2d, v0.2d -; FAULT-NEXT: fdiv v0.2d, v0.2d, v1.2d +; FAULT-NEXT: fsqrt v0.2d, v0.2d ; FAULT-NEXT: ret ; ; CHECK-LABEL: sqrt_fdiv_common_operand_vec: @@ -493,16 +491,15 @@ ; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d ; CHECK-NEXT: ret %sqrt = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x) - %r = fdiv arcp reassoc <2 x double> %x, %sqrt + %r = fdiv arcp nsz reassoc <2 x double> %x, %sqrt ret <2 x double> %r } define double @sqrt_fdiv_common_operand_extra_use(double %x, double* %p) nounwind { ; FAULT-LABEL: sqrt_fdiv_common_operand_extra_use: ; FAULT: // %bb.0: -; FAULT-NEXT: fsqrt d1, d0 -; FAULT-NEXT: fdiv d0, d0, d1 -; FAULT-NEXT: str d1, [x0] +; FAULT-NEXT: fsqrt d0, d0 +; FAULT-NEXT: str d0, [x0] ; FAULT-NEXT: ret ; ; CHECK-LABEL: sqrt_fdiv_common_operand_extra_use: Index: llvm/test/CodeGen/X86/sqrt-fastmath.ll =================================================================== --- llvm/test/CodeGen/X86/sqrt-fastmath.ll +++ llvm/test/CodeGen/X86/sqrt-fastmath.ll @@ -903,14 +903,12 @@ define double @sqrt_fdiv_common_operand(double %x) nounwind { ; SSE-LABEL: sqrt_fdiv_common_operand: ; SSE: # %bb.0: -; SSE-NEXT: sqrtsd %xmm0, %xmm1 -; SSE-NEXT: divsd %xmm1, %xmm0 +; SSE-NEXT: sqrtsd %xmm0, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: sqrt_fdiv_common_operand: ; AVX: # %bb.0: -; AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm1 -; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq %sqrt = call fast double @llvm.sqrt.f64(double %x) %r = fdiv fast double %x, %sqrt @@ -920,33 +918,29 @@ define <2 x double> @sqrt_fdiv_common_operand_vec(<2 x double> %x) nounwind { ; SSE-LABEL: sqrt_fdiv_common_operand_vec: ; SSE: # %bb.0: -; SSE-NEXT: sqrtpd %xmm0, %xmm1 -; SSE-NEXT: divpd %xmm1, %xmm0 +; SSE-NEXT: sqrtpd %xmm0, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: sqrt_fdiv_common_operand_vec: ; AVX: # %bb.0: -; AVX-NEXT: vsqrtpd %xmm0, %xmm1 -; AVX-NEXT: vdivpd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vsqrtpd %xmm0, %xmm0 ; AVX-NEXT: retq %sqrt = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x) - %r = fdiv arcp reassoc <2 x double> %x, %sqrt + %r = fdiv arcp nsz reassoc <2 x double> %x, %sqrt ret <2 x double> %r } define double @sqrt_fdiv_common_operand_extra_use(double %x, double* %p) nounwind { ; SSE-LABEL: sqrt_fdiv_common_operand_extra_use: ; SSE: # %bb.0: -; SSE-NEXT: sqrtsd %xmm0, %xmm1 -; SSE-NEXT: movsd %xmm1, (%rdi) -; SSE-NEXT: divsd %xmm1, %xmm0 +; SSE-NEXT: sqrtsd %xmm0, %xmm0 +; SSE-NEXT: movsd %xmm0, (%rdi) ; SSE-NEXT: retq ; ; AVX-LABEL: sqrt_fdiv_common_operand_extra_use: ; AVX: # %bb.0: -; AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm1 -; AVX-NEXT: vmovsd %xmm1, (%rdi) -; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vmovsd %xmm0, (%rdi) ; AVX-NEXT: retq %sqrt = call fast double @llvm.sqrt.f64(double %x) store double %sqrt, double* %p