diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -13995,6 +13995,7 @@ // TODO: Since we're approximating, we don't need an exact 1/3 exponent. // Some range near 1/3 should be fine. EVT VT = N->getValueType(0); + const TargetOptions &Options = DAG.getTarget().Options; if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) || (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) { // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0. @@ -14005,7 +14006,7 @@ // TODO: We could select out the special cases if we don't have nsz/ninf. SDNodeFlags Flags = N->getFlags(); if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() || - !Flags.hasApproximateFuncs()) + (!Flags.hasApproximateFuncs() && !Options.UnsafeFPMath)) return SDValue(); // Do not create a cbrt() libcall if the target does not have it, and do not @@ -14036,7 +14037,7 @@ // We only need no signed zeros for the 0.25 case. if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() || - !Flags.hasApproximateFuncs()) + (!Flags.hasApproximateFuncs() && !Options.UnsafeFPMath)) return SDValue(); // Don't double the number of libcalls. We are trying to inline fast code. diff --git a/llvm/test/CodeGen/PowerPC/pow.75.ll b/llvm/test/CodeGen/PowerPC/pow.75.ll --- a/llvm/test/CodeGen/PowerPC/pow.75.ll +++ b/llvm/test/CodeGen/PowerPC/pow.75.ll @@ -1,4 +1,3 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=powerpc64le-unknown-unknown -debug 2>&1 | FileCheck %s ; REQUIRES: asserts @@ -46,3 +45,15 @@ %r = call ninf afn <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) ret <2 x double> %r } + +define float @pow_f32_three_fourth_fmf_unsafe(float %x) nounwind #0 { +; CHECK: Combining: {{.*}}: f32 = fpow ninf [[X:t[0-9]+]], ConstantFP:f32<7.500000e-01> +; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f32 = fsqrt ninf [[X]] +; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f32 = fsqrt ninf [[SQRT]] +; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f32 = fmul ninf [[SQRT]], [[SQRTSQRT]] +; CHECK-NEXT: ... into: [[R]]: f32 = fmul ninf [[SQRT]], [[SQRTSQRT]] + %r = call ninf float @llvm.pow.f32(float %x, float 7.5e-01) + ret float %r +} + +attributes #0 = { "unsafe-fp-math"="true" }