diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h --- a/llvm/include/llvm/IR/Instruction.h +++ b/llvm/include/llvm/IR/Instruction.h @@ -464,6 +464,9 @@ /// Determine whether the no-infs flag is set. bool hasNoInfs() const; + /// A convenience function that checks for no-NaNs and no-infs + bool hasFiniteMath() const; + /// Determine whether the no-signed-zeros flag is set. bool hasNoSignedZeros() const; diff --git a/llvm/include/llvm/IR/Operator.h b/llvm/include/llvm/IR/Operator.h --- a/llvm/include/llvm/IR/Operator.h +++ b/llvm/include/llvm/IR/Operator.h @@ -260,6 +260,10 @@ return (SubclassOptionalData & FastMathFlags::NoInfs) != 0; } + bool hasFiniteMath() const { + return hasNoNaNs() && hasNoInfs(); + } + /// Test if this operation can ignore the sign of zero. bool hasNoSignedZeros() const { return (SubclassOptionalData & FastMathFlags::NoSignedZeros) != 0; diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp --- a/llvm/lib/IR/Instruction.cpp +++ b/llvm/lib/IR/Instruction.cpp @@ -266,6 +266,11 @@ return cast(this)->hasNoInfs(); } +bool Instruction::hasFiniteMath() const { + assert(isa(this) && "getting fast-math flag on invalid op"); + return cast(this)->hasFiniteMath(); +} + bool Instruction::hasNoSignedZeros() const { assert(isa(this) && "getting fast-math flag on invalid op"); return cast(this)->hasNoSignedZeros(); diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1866,7 +1866,9 @@ StringRef Name = Callee->getName(); Type *Ty = Pow->getType(); Module *M = Pow->getModule(); - bool AllowApprox = Pow->hasApproxFunc(); + bool HasApproxFunc = Pow->hasApproxFunc(); + bool AllowExpansion = Pow->doesNotAccessMemory() && Pow->hasAllowReassoc() && + Pow->hasFiniteMath(); bool Ignored; // Propagate the math semantics from the call to any created instructions. @@ -1904,7 +1906,8 @@ // pow(x, n) -> x * x * x * ... const APFloat *ExpoF; - if (AllowApprox && match(Expo, m_APFloat(ExpoF)) && + + if ((HasApproxFunc || AllowExpansion) && match(Expo, m_APFloat(ExpoF)) && !ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5)) { // We limit to a max of 7 multiplications, thus the maximum exponent is 32. // If the exponent is an integer+0.5 we generate a call to sqrt and an @@ -1970,7 +1973,7 @@ } // powf(x, itofp(y)) -> powi(x, y) - if (AllowApprox && (isa(Expo) || isa(Expo))) { + if (HasApproxFunc && (isa(Expo) || isa(Expo))) { if (Value *ExpoI = getIntToFPVal(Expo, B, TLI->getIntSize())) return copyFlags(*Pow, createPowWithIntegerExponent(Base, ExpoI, M, B)); } diff --git a/llvm/test/Transforms/InstCombine/pow-4.ll b/llvm/test/Transforms/InstCombine/pow-4.ll --- a/llvm/test/Transforms/InstCombine/pow-4.ll +++ b/llvm/test/Transforms/InstCombine/pow-4.ll @@ -12,8 +12,8 @@ declare double @pow(double, double) ; pow(x, 3.0) -define double @test_simplify_3(double %x) { -; CHECK-LABEL: @test_simplify_3( +define double @test_simplify_3a(double %x) { +; CHECK-LABEL: @test_simplify_3a( ; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast double [[X:%.*]], [[X]] ; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[X]] ; CHECK-NEXT: ret double [[TMP1]] @@ -22,6 +22,17 @@ ret double %1 } +; pow(x, 3.0) +define double @test_simplify_3b(double %x) { +; CHECK-LABEL: @test_simplify_3b( +; CHECK-NEXT: [[SQUARE:%.*]] = fmul reassoc nnan ninf nsz double [[X:%.*]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc nnan ninf nsz double [[SQUARE]], [[X]] +; CHECK-NEXT: ret double [[TMP1]] +; + %1 = call reassoc nnan ninf nsz double @llvm.pow.f64(double %x, double 3.000000e+00) + ret double %1 +} + ; powf(x, 4.0) define float @test_simplify_4f(float %x) { ; CHECK-LABEL: @test_simplify_4f( @@ -34,8 +45,8 @@ } ; pow(x, 4.0) -define double @test_simplify_4(double %x) { -; CHECK-LABEL: @test_simplify_4( +define double @test_simplify_4a(double %x) { +; CHECK-LABEL: @test_simplify_4a( ; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast double [[X:%.*]], [[X]] ; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]] ; CHECK-NEXT: ret double [[TMP1]] @@ -44,6 +55,17 @@ ret double %1 } +; pow(x, 4.0) +define double @test_simplify_4b(double %x) { +; CHECK-LABEL: @test_simplify_4b( +; CHECK-NEXT: [[SQUARE:%.*]] = fmul reassoc nnan ninf nsz double [[X:%.*]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc nnan ninf nsz double [[SQUARE]], [[SQUARE]] +; CHECK-NEXT: ret double [[TMP1]] +; + %1 = call reassoc nnan ninf nsz double @llvm.pow.f64(double %x, double 4.000000e+00) + ret double %1 +} + ; powf(x, <15.0, 15.0>) define <2 x float> @test_simplify_15(<2 x float> %x) { ; CHECK-LABEL: @test_simplify_15( @@ -73,8 +95,8 @@ } ; powf(x, -19.0) -define float @test_simplify_neg_19(float %x) { -; CHECK-LABEL: @test_simplify_neg_19( +define float @test_simplify_neg_19a(float %x) { +; CHECK-LABEL: @test_simplify_neg_19a( ; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast float [[X:%.*]], [[X]] ; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[SQUARE]], [[SQUARE]] ; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[TMP1]], [[TMP1]] @@ -88,6 +110,22 @@ ret float %1 } +; powf(x, -19.0) +define float @test_simplify_neg_19b(float %x) { +; CHECK-LABEL: @test_simplify_neg_19b( +; CHECK-NEXT: [[SQUARE:%.*]] = fmul reassoc nnan ninf nsz float [[X:%.*]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc nnan ninf nsz float [[SQUARE]], [[SQUARE]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc nnan ninf nsz float [[TMP1]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = fmul reassoc nnan ninf nsz float [[TMP2]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = fmul reassoc nnan ninf nsz float [[SQUARE]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = fmul reassoc nnan ninf nsz float [[TMP4]], [[X]] +; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv reassoc nnan ninf nsz float 1.000000e+00, [[TMP5]] +; CHECK-NEXT: ret float [[RECIPROCAL]] +; + %1 = call reassoc nnan ninf nsz float @llvm.pow.f32(float %x, float -1.900000e+01) + ret float %1 +} + ; pow(x, 11.23) define double @test_simplify_11_23(double %x) { ; CHECK-LABEL: @test_simplify_11_23(