Index: lib/Transforms/Utils/SimplifyLibCalls.cpp =================================================================== --- lib/Transforms/Utils/SimplifyLibCalls.cpp +++ lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1189,10 +1189,6 @@ (!ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5))) return nullptr; - if (ExpoF->isExactlyValue(-0.5) && - (!Pow->hasNoSignedZeros() || !Pow->hasNoInfs())) - return nullptr; - // If errno is never set, then use the intrinsic for sqrt(). if (Pow->hasFnAttr(Attribute::ReadNone)) { Function *SqrtFn = Intrinsic::getDeclaration(Pow->getModule(), @@ -1213,8 +1209,8 @@ if (ExpoF->isNegative()) Sqrt = B.CreateFDiv(ConstantFP::get(Ty, 1.0), Sqrt, "reciprocal"); - // Deal with x == -0. We expand pow(x, 0.5/-0.5) to fabs(sqrt(x)|1/sqrt(x)). - // This is faster than calling pow(), and still handles -0 correctly. + // Deal with x == -0.0. We expand pow(x, 0.5|-0.5) to fabs(sqrt(x)|1/sqrt(x)). + // This is faster than calling pow(), and still handles -0.0 correctly. if (!Pow->hasNoSignedZeros()) { Function *FAbsFn = Intrinsic::getDeclaration(Pow->getModule(), Intrinsic::fabs, Ty); @@ -1223,13 +1219,20 @@ // Deal with x == -infinity. We expand // * pow(x, 0.5) to (x == -infinity ? +infinity : Sqrt)) + // * pow(x, -0.5) to (x == -infinity ? +0.0 : Sqrt)) // This is faster than calling pow(), and still handles negative infinity // correctly. if (!Pow->hasNoInfs()) { Value *NegInf = ConstantFP::getInfinity(Ty, true); - Value *PosInf = ConstantFP::getInfinity(Ty); - Value *FCmp = B.CreateFCmpOEQ(Base, NegInf, "isinf"); - Sqrt = B.CreateSelect(FCmp, PosInf, Sqrt); + if (ExpoF->isNegative()) { + Value *PositiveZero = ConstantFP::get(Ty, 0.0); + Value *FCmp = B.CreateFCmpOEQ(Base, NegInf); + Sqrt = B.CreateSelect(FCmp, PositiveZero, Sqrt); + } else { + Value *PosInf = ConstantFP::getInfinity(Ty); + Value *FCmp = B.CreateFCmpOEQ(Base, NegInf, "isinf"); + Sqrt = B.CreateSelect(FCmp, PosInf, Sqrt); + } } return Sqrt; } Index: test/Transforms/InstCombine/pow-sqrt.ll =================================================================== --- test/Transforms/InstCombine/pow-sqrt.ll +++ test/Transforms/InstCombine/pow-sqrt.ll @@ -136,70 +136,77 @@ ret double %pow } -; FIXME: ; -0.5 means take the reciprocal. define float @pow_libcall_neghalf_no_FMF(float %x) { ; CHECK-LABEL: @pow_libcall_neghalf_no_FMF( -; CHECK-NEXT: [[POW:%.*]] = call float @powf(float [[X:%.*]], float -5.000000e-01) -; CHECK-NEXT: ret float [[POW]] +; CHECK-NEXT: [[SQRTF:%.*]] = call float @sqrtf(float [[X:%.*]]) +; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv float 1.000000e+00, [[SQRTF]] +; CHECK-NEXT: [[ABS:%.*]] = call float @llvm.fabs.f32(float [[RECIPROCAL]]) +; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq float [[X]], 0xFFF0000000000000 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], float 0.000000e+00, float [[ABS]] +; CHECK-NEXT: ret float [[TMP2]] ; %pow = call float @powf(float %x, float -5.0e-01) ret float %pow } -; FIXME: - define <2 x double> @pow_intrinsic_neghalf_no_FMF(<2 x double> %x) { ; CHECK-LABEL: @pow_intrinsic_neghalf_no_FMF( -; CHECK-NEXT: [[POW:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> [[X:%.*]], <2 x double> ) -; CHECK-NEXT: ret <2 x double> [[POW]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]]) +; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv <2 x double> , [[TMP1]] +; CHECK-NEXT: [[ABS:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[RECIPROCAL]]) +; CHECK-NEXT: [[TMP2:%.*]] = fcmp oeq <2 x double> [[X]], +; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x double> zeroinitializer, <2 x double> [[ABS]] +; CHECK-NEXT: ret <2 x double> [[TMP3]] ; %pow = call <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) ret <2 x double> %pow } -; FIXME: ; If we can disregard INFs, no need for a select. define double @pow_libcall_neghalf_ninf(double %x) { ; CHECK-LABEL: @pow_libcall_neghalf_ninf( -; CHECK-NEXT: [[POW:%.*]] = call ninf double @pow(double [[X:%.*]], double -5.000000e-01) -; CHECK-NEXT: ret double [[POW]] +; CHECK-NEXT: [[SQRT:%.*]] = call ninf double @sqrt(double [[X:%.*]]) +; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv ninf double 1.000000e+00, [[SQRT]] +; CHECK-NEXT: [[ABS:%.*]] = call ninf double @llvm.fabs.f64(double [[RECIPROCAL]]) +; CHECK-NEXT: ret double [[ABS]] ; %pow = call ninf double @pow(double %x, double -5.0e-01) ret double %pow } -; FIXME: - define <2 x double> @pow_intrinsic_neghalf_ninf(<2 x double> %x) { ; CHECK-LABEL: @pow_intrinsic_neghalf_ninf( -; CHECK-NEXT: [[POW:%.*]] = call ninf <2 x double> @llvm.pow.v2f64(<2 x double> [[X:%.*]], <2 x double> ) -; CHECK-NEXT: ret <2 x double> [[POW]] +; CHECK-NEXT: [[TMP1:%.*]] = call ninf <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]]) +; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv ninf <2 x double> , [[TMP1]] +; CHECK-NEXT: [[ABS:%.*]] = call ninf <2 x double> @llvm.fabs.v2f64(<2 x double> [[RECIPROCAL]]) +; CHECK-NEXT: ret <2 x double> [[ABS]] ; %pow = call ninf <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) ret <2 x double> %pow } -; FIXME: -; If we can disregard -0.0, no need for fabs. - define double @pow_libcall_neghalf_nsz(double %x) { ; CHECK-LABEL: @pow_libcall_neghalf_nsz( -; CHECK-NEXT: [[POW:%.*]] = call nsz double @pow(double [[X:%.*]], double -5.000000e-01) -; CHECK-NEXT: ret double [[POW]] +; CHECK-NEXT: [[SQRT:%.*]] = call nsz double @sqrt(double [[X:%.*]]) +; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv nsz double 1.000000e+00, [[SQRT]] +; CHECK-NEXT: [[TMP1:%.*]] = fcmp nsz oeq double [[X]], 0xFFF0000000000000 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], double 0.000000e+00, double [[RECIPROCAL]] +; CHECK-NEXT: ret double [[TMP2]] ; %pow = call nsz double @pow(double %x, double -5.0e-01) ret double %pow } -; FIXME: - define double @pow_intrinsic_neghalf_nsz(double %x) { ; CHECK-LABEL: @pow_intrinsic_neghalf_nsz( -; CHECK-NEXT: [[POW:%.*]] = call nsz double @llvm.pow.f64(double [[X:%.*]], double -5.000000e-01) -; CHECK-NEXT: ret double [[POW]] +; CHECK-NEXT: [[TMP1:%.*]] = call nsz double @llvm.sqrt.f64(double [[X:%.*]]) +; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv nsz double 1.000000e+00, [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = fcmp nsz oeq double [[X]], 0xFFF0000000000000 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], double 0.000000e+00, double [[RECIPROCAL]] +; CHECK-NEXT: ret double [[TMP3]] ; %pow = call nsz double @llvm.pow.f64(double %x, double -5.0e-01) ret double %pow