Index: lib/Transforms/Utils/SimplifyLibCalls.cpp =================================================================== --- lib/Transforms/Utils/SimplifyLibCalls.cpp +++ lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1354,7 +1354,22 @@ // We limit to a max of 7 multiplications, thus the maximum exponent is 32. APFloat LimF(ExpoF->getSemantics(), 33.0), ExpoA(abs(*ExpoF)); - if (ExpoA.isInteger() && ExpoA.compare(LimF) == APFloat::cmpLessThan) { + if (ExpoA.compare(LimF) == APFloat::cmpLessThan) { + // This transformation applies to integer or integer+0.5 exponents only. + bool ExpoIsIntPlusHalf = false; + if (!ExpoA.isInteger()) { + APFloat VInt = ExpoA; + if (VInt.add(ExpoA, APFloat::rmNearestTiesToEven) != APFloat::opOK) + return nullptr; + + bool HasSqrtFn = hasUnaryFloatFn(TLI, Ty, LibFunc_sqrt, + LibFunc_sqrtf, LibFunc_sqrtl); + if (!VInt.isInteger() || !HasSqrtFn) + return nullptr; + + ExpoIsIntPlusHalf = true; + } + // We will memoize intermediate products of the Addition Chain. Value *InnerChain[33] = {nullptr}; InnerChain[1] = Base; @@ -1365,6 +1380,13 @@ ExpoA.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &Ignored); Value *FMul = getPow(InnerChain, ExpoA.convertToDouble(), B); + // Expand pow(x, y+0.5) to pow(x, y) * sqrt(x). + if (ExpoIsIntPlusHalf) { + Value *Sqrt = emitUnaryFloatFnCall(Base, "sqrt", B, + Callee->getAttributes()); + FMul = B.CreateFMul(FMul, Sqrt); + } + // If the exponent is negative, then get the reciprocal. if (ExpoF->isNegative()) FMul = B.CreateFDiv(ConstantFP::get(Ty, 1.0), FMul, "reciprocal"); Index: test/Transforms/InstCombine/pow-4.ll =================================================================== --- test/Transforms/InstCombine/pow-4.ll +++ test/Transforms/InstCombine/pow-4.ll @@ -117,3 +117,33 @@ ret double %1 } +; pow(x, 16.5) +define double @test_simplify_16_5(double %x) { +; CHECK-LABEL: @test_simplify_16_5( +; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast double [[X:%.*]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]] +; CHECK-NEXT: [[SQRT:%.*]] = call fast double @sqrt(double [[X]]) #1 +; CHECK-NEXT: [[TMP4:%.*]] = fmul fast double [[TMP3]], [[SQRT]] +; CHECK-NEXT: ret double [[TMP4]] +; + %1 = call fast double @llvm.pow.f64(double %x, double 1.650000e+01) + ret double %1 +} + +; pow(x, -16.5) +define double @test_simplify_neg_16_5(double %x) { +; CHECK-LABEL: @test_simplify_neg_16_5( +; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast double [[X:%.*]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]] +; CHECK-NEXT: [[SQRT:%.*]] = call fast double @sqrt(double [[X]]) #1 +; CHECK-NEXT: [[TMP4:%.*]] = fmul fast double [[TMP3]], [[SQRT]] +; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv fast double 1.000000e+00, [[TMP4]] +; CHECK-NEXT: ret double [[RECIPROCAL]] +; + %1 = call fast double @llvm.pow.f64(double %x, double -1.650000e+01) + ret double %1 +}