Index: lib/Transforms/Utils/SimplifyLibCalls.cpp =================================================================== --- lib/Transforms/Utils/SimplifyLibCalls.cpp +++ lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1181,11 +1181,6 @@ /// Use square root in place of pow(x, +/-0.5). Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilder<> &B) { - // TODO: There is some subset of 'fast' under which these transforms should - // be allowed. - if (!Pow->isFast()) - return nullptr; - Value *Sqrt, *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1); Type *Ty = Pow->getType(); @@ -1194,6 +1189,10 @@ (!ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5))) return nullptr; + if (ExpoF->isExactlyValue(-0.5) && + (!Pow->hasNoSignedZeros() || !Pow->hasNoInfs())) + return nullptr; + // If errno is never set, then use the intrinsic for sqrt(). if (Pow->hasFnAttr(Attribute::ReadNone)) { Function *SqrtFn = Intrinsic::getDeclaration(Pow->getModule(), @@ -1214,6 +1213,24 @@ if (ExpoF->isNegative()) Sqrt = B.CreateFDiv(ConstantFP::get(Ty, 1.0), Sqrt, "reciprocal"); + // Deal with x == -0. We expand pow(x, 0.5/-0.5) to fabs(sqrt(x)|1/sqrt(x)). + // This is faster than calling pow(), and still handles -0 correctly. + if (!Pow->hasNoSignedZeros()) { + Function *FAbsFn = + Intrinsic::getDeclaration(Pow->getModule(), Intrinsic::fabs, Ty); + Sqrt = B.CreateCall(FAbsFn, Sqrt, "abs"); + } + + // Deal with x == -infinity. We expand + // * pow(x, 0.5) to (x == -infinity ? +infinity : Sqrt)) + // This is faster than calling pow(), and still handles negative infinity + // correctly. + if (!Pow->hasNoInfs()) { + Value *NegInf = ConstantFP::getInfinity(Ty, true); + Value *PosInf = ConstantFP::getInfinity(Ty); + Value *FCmp = B.CreateFCmpOEQ(Base, NegInf, "isinf"); + Sqrt = B.CreateSelect(FCmp, PosInf, Sqrt); + } return Sqrt; } @@ -1299,28 +1316,6 @@ if (Value *Sqrt = replacePowWithSqrt(Pow, B)) return Sqrt; - // FIXME: Correct the transforms and pull this into replacePowWithSqrt(). - ConstantFP *ExpoC = dyn_cast(Expo); - if (ExpoC && ExpoC->isExactlyValue(0.5) && - hasUnaryFloatFn(TLI, Ty, LibFunc_sqrt, LibFunc_sqrtf, LibFunc_sqrtl)) { - // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))). - // This is faster than calling pow(), and still handles -0.0 and - // negative infinity correctly. - // TODO: In finite-only mode, this could be just fabs(sqrt(x)). - Value *PosInf = ConstantFP::getInfinity(Ty); - Value *NegInf = ConstantFP::getInfinity(Ty, true); - - // TODO: As above, we should lower to the sqrt() intrinsic if the pow() is - // an intrinsic, to match errno semantics. - Value *Sqrt = emitUnaryFloatFnCall(Base, TLI->getName(LibFunc_sqrt), - B, Attrs); - Function *FAbsFn = Intrinsic::getDeclaration(Module, Intrinsic::fabs, Ty); - Value *FAbs = B.CreateCall(FAbsFn, Sqrt, "abs"); - Value *FCmp = B.CreateFCmpOEQ(Base, NegInf, "isinf"); - Sqrt = B.CreateSelect(FCmp, PosInf, FAbs); - return Sqrt; - } - // pow(x, n) -> x * x * x * ... const APFloat *ExpoF; if (Pow->isFast() && match(Expo, m_APFloat(ExpoF))) { Index: test/Transforms/InstCombine/pow-1.ll =================================================================== --- test/Transforms/InstCombine/pow-1.ll +++ test/Transforms/InstCombine/pow-1.ll @@ -293,7 +293,7 @@ declare double @llvm.pow.f64(double %Val, double %Power) define double @test_simplify17(double %x) { ; ANY-LABEL: @test_simplify17( -; ANY-NEXT: [[SQRT:%.*]] = call double @sqrt(double [[X:%.*]]) #2 +; ANY-NEXT: [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[X:%.*]]) ; ANY-NEXT: [[ABS:%.*]] = call double @llvm.fabs.f64(double [[SQRT]]) ; ANY-NEXT: [[ISINF:%.*]] = fcmp oeq double [[X]], 0xFFF0000000000000 ; ANY-NEXT: [[TMP1:%.*]] = select i1 [[ISINF]], double 0x7FF0000000000000, double [[ABS]] Index: test/Transforms/InstCombine/pow-sqrt.ll =================================================================== --- test/Transforms/InstCombine/pow-sqrt.ll +++ test/Transforms/InstCombine/pow-sqrt.ll @@ -19,11 +19,11 @@ define double @pow_intrinsic_half_no_FMF(double %x) { ; CHECK-LABEL: @pow_intrinsic_half_no_FMF( -; CHECK-NEXT: [[SQRT:%.*]] = call double @sqrt(double [[X:%.*]]) #1 -; CHECK-NEXT: [[ABS:%.*]] = call double @llvm.fabs.f64(double [[SQRT]]) +; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.sqrt.f64(double [[X:%.*]]) +; CHECK-NEXT: [[ABS:%.*]] = call double @llvm.fabs.f64(double [[TMP1]]) ; CHECK-NEXT: [[ISINF:%.*]] = fcmp oeq double [[X]], 0xFFF0000000000000 -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[ISINF]], double 0x7FF0000000000000, double [[ABS]] -; CHECK-NEXT: ret double [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[ISINF]], double 0x7FF0000000000000, double [[ABS]] +; CHECK-NEXT: ret double [[TMP2]] ; %pow = call double @llvm.pow.f64(double %x, double 5.0e-01) ret double %pow @@ -43,95 +43,73 @@ ret double %pow } -; FIXME - define <2 x double> @pow_intrinsic_half_approx(<2 x double> %x) { ; CHECK-LABEL: @pow_intrinsic_half_approx( -; CHECK-NEXT: [[POW:%.*]] = call afn <2 x double> @llvm.pow.v2f64(<2 x double> [[X:%.*]], <2 x double> ) -; CHECK-NEXT: ret <2 x double> [[POW]] +; CHECK-NEXT: [[TMP1:%.*]] = call afn <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]]) +; CHECK-NEXT: [[ABS:%.*]] = call afn <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP1]]) +; CHECK-NEXT: [[ISINF:%.*]] = fcmp afn oeq <2 x double> [[X]], +; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[ISINF]], <2 x double> , <2 x double> [[ABS]] +; CHECK-NEXT: ret <2 x double> [[TMP2]] ; %pow = call afn <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) ret <2 x double> %pow } -; FIXME: ; If we can disregard INFs, no need for a select. - define double @pow_libcall_half_ninf(double %x) { ; CHECK-LABEL: @pow_libcall_half_ninf( ; CHECK-NEXT: [[SQRT:%.*]] = call ninf double @sqrt(double [[X:%.*]]) ; CHECK-NEXT: [[ABS:%.*]] = call ninf double @llvm.fabs.f64(double [[SQRT]]) -; CHECK-NEXT: [[ISINF:%.*]] = fcmp ninf oeq double [[X]], 0xFFF0000000000000 -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[ISINF]], double 0x7FF0000000000000, double [[ABS]] -; CHECK-NEXT: ret double [[TMP1]] +; CHECK-NEXT: ret double [[ABS]] ; %pow = call ninf double @pow(double %x, double 5.0e-01) ret double %pow } -; FIXME: - define <2 x double> @pow_intrinsic_half_ninf(<2 x double> %x) { ; CHECK-LABEL: @pow_intrinsic_half_ninf( -; CHECK-NEXT: [[POW:%.*]] = call ninf <2 x double> @llvm.pow.v2f64(<2 x double> [[X:%.*]], <2 x double> ) -; CHECK-NEXT: ret <2 x double> [[POW]] +; CHECK-NEXT: [[TMP1:%.*]] = call ninf <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]]) +; CHECK-NEXT: [[ABS:%.*]] = call ninf <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP1]]) +; CHECK-NEXT: ret <2 x double> [[ABS]] ; %pow = call ninf <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) ret <2 x double> %pow } -; FIXME: -; If we can disregard -0.0, no need for fabs. - define double @pow_libcall_half_nsz(double %x) { ; CHECK-LABEL: @pow_libcall_half_nsz( ; CHECK-NEXT: [[SQRT:%.*]] = call nsz double @sqrt(double [[X:%.*]]) -; CHECK-NEXT: [[ABS:%.*]] = call nsz double @llvm.fabs.f64(double [[SQRT]]) ; CHECK-NEXT: [[ISINF:%.*]] = fcmp nsz oeq double [[X]], 0xFFF0000000000000 -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[ISINF]], double 0x7FF0000000000000, double [[ABS]] +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[ISINF]], double 0x7FF0000000000000, double [[SQRT]] ; CHECK-NEXT: ret double [[TMP1]] ; %pow = call nsz double @pow(double %x, double 5.0e-01) ret double %pow } -; FIXME: - define double @pow_intrinsic_half_nsz(double %x) { ; CHECK-LABEL: @pow_intrinsic_half_nsz( -; CHECK-NEXT: [[SQRT:%.*]] = call nsz double @sqrt(double [[X:%.*]]) #1 -; CHECK-NEXT: [[ABS:%.*]] = call nsz double @llvm.fabs.f64(double [[SQRT]]) +; CHECK-NEXT: [[TMP1:%.*]] = call nsz double @llvm.sqrt.f64(double [[X:%.*]]) ; CHECK-NEXT: [[ISINF:%.*]] = fcmp nsz oeq double [[X]], 0xFFF0000000000000 -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[ISINF]], double 0x7FF0000000000000, double [[ABS]] -; CHECK-NEXT: ret double [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[ISINF]], double 0x7FF0000000000000, double [[TMP1]] +; CHECK-NEXT: ret double [[TMP2]] ; %pow = call nsz double @llvm.pow.f64(double %x, double 5.0e-01) ret double %pow } -; FIXME: -; This is just sqrt. - define float @pow_libcall_half_ninf_nsz(float %x) { ; CHECK-LABEL: @pow_libcall_half_ninf_nsz( ; CHECK-NEXT: [[SQRTF:%.*]] = call ninf nsz float @sqrtf(float [[X:%.*]]) -; CHECK-NEXT: [[ABS:%.*]] = call ninf nsz float @llvm.fabs.f32(float [[SQRTF]]) -; CHECK-NEXT: [[ISINF:%.*]] = fcmp ninf nsz oeq float [[X]], 0xFFF0000000000000 -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[ISINF]], float 0x7FF0000000000000, float [[ABS]] -; CHECK-NEXT: ret float [[TMP1]] +; CHECK-NEXT: ret float [[SQRTF]] ; %pow = call ninf nsz float @powf(float %x, float 5.0e-01) ret float %pow } -; FIXME: - define double @pow_intrinsic_half_ninf_nsz(double %x) { ; CHECK-LABEL: @pow_intrinsic_half_ninf_nsz( -; CHECK-NEXT: [[SQRT:%.*]] = call ninf nsz double @sqrt(double [[X:%.*]]) #1 -; CHECK-NEXT: [[ABS:%.*]] = call ninf nsz double @llvm.fabs.f64(double [[SQRT]]) -; CHECK-NEXT: [[ISINF:%.*]] = fcmp ninf nsz oeq double [[X]], 0xFFF0000000000000 -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[ISINF]], double 0x7FF0000000000000, double [[ABS]] +; CHECK-NEXT: [[TMP1:%.*]] = call ninf nsz double @llvm.sqrt.f64(double [[X:%.*]]) ; CHECK-NEXT: ret double [[TMP1]] ; %pow = call ninf nsz double @llvm.pow.f64(double %x, double 5.0e-01) @@ -227,24 +205,23 @@ ret double %pow } -; FIXME: ; This is just recip-sqrt. define double @pow_intrinsic_neghalf_ninf_nsz(double %x) { ; CHECK-LABEL: @pow_intrinsic_neghalf_ninf_nsz( -; CHECK-NEXT: [[POW:%.*]] = call ninf nsz double @llvm.pow.f64(double [[X:%.*]], double -5.000000e-01) -; CHECK-NEXT: ret double [[POW]] +; CHECK-NEXT: [[TMP1:%.*]] = call ninf nsz double @llvm.sqrt.f64(double [[X:%.*]]) +; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv ninf nsz double 1.000000e+00, [[TMP1]] +; CHECK-NEXT: ret double [[RECIPROCAL]] ; %pow = call ninf nsz double @llvm.pow.f64(double %x, double -5.0e-01) ret double %pow } -; FIXME: - define float @pow_libcall_neghalf_ninf_nsz(float %x) { ; CHECK-LABEL: @pow_libcall_neghalf_ninf_nsz( -; CHECK-NEXT: [[POW:%.*]] = call ninf nsz float @powf(float [[X:%.*]], float -5.000000e-01) -; CHECK-NEXT: ret float [[POW]] +; CHECK-NEXT: [[SQRTF:%.*]] = call ninf nsz float @sqrtf(float [[X:%.*]]) +; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv ninf nsz float 1.000000e+00, [[SQRTF]] +; CHECK-NEXT: ret float [[RECIPROCAL]] ; %pow = call ninf nsz float @powf(float %x, float -5.0e-01) ret float %pow