Index: llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp =================================================================== --- llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -2218,10 +2218,25 @@ Callee->getIntrinsicID() == Intrinsic::sqrt)) Ret = optimizeUnaryDoubleFP(CI, B, TLI, true); + // Fast-math-flags for any created instructions should match the sqrt. + IRBuilderBase::FastMathFlagGuard Guard(B); + B.setFastMathFlags(CI->getFastMathFlags()); + + // If this is a sqrt libcall and we can assume that NAN is not created, then + // the arg must not be less than -0.0 and errno won't be set either. + // It is safe to convert this to an intrinsic call. + // TODO: Check if the arg is known non-negative. + Value *Arg = CI->getArgOperand(0); + Type *ArgType = Arg->getType(); + if (!Callee->isIntrinsic() && CI->hasNoNaNs()) { + Function *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, ArgType); + return copyFlags(*CI, B.CreateCall(Sqrt, Arg, "sqrt")); + } + if (!CI->isFast()) return Ret; - Instruction *I = dyn_cast(CI->getArgOperand(0)); + Instruction *I = dyn_cast(Arg); if (!I || I->getOpcode() != Instruction::FMul || !I->isFast()) return Ret; @@ -2254,14 +2269,8 @@ if (!RepeatOp) return Ret; - // Fast math flags for any created instructions should match the sqrt - // and multiply. - IRBuilderBase::FastMathFlagGuard Guard(B); - B.setFastMathFlags(I->getFastMathFlags()); - // If we found a repeated factor, hoist it out of the square root and // replace it with the fabs of that factor. - Type *ArgType = I->getType(); Function *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, ArgType); Value *FabsCall = B.CreateCall(Fabs, RepeatOp, "fabs"); if (OtherOp) { Index: llvm/test/Transforms/InstCombine/pow-4.ll =================================================================== --- llvm/test/Transforms/InstCombine/pow-4.ll +++ llvm/test/Transforms/InstCombine/pow-4.ll @@ -174,12 +174,12 @@ ; NOSQRT-NEXT: ret double [[TMP1]] ; ; CHECKSQRT-LABEL: @test_simplify_16_5_libcall( -; CHECKSQRT-NEXT: [[SQRT:%.*]] = call fast double @sqrt(double [[X:%.*]]) +; CHECKSQRT-NEXT: [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[X:%.*]]) ; CHECKSQRT-NEXT: [[SQUARE:%.*]] = fmul fast double [[X]], [[X]] ; CHECKSQRT-NEXT: [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]] ; CHECKSQRT-NEXT: [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]] ; CHECKSQRT-NEXT: [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]] -; CHECKSQRT-NEXT: [[TMP4:%.*]] = fmul fast double [[TMP3]], [[SQRT]] +; CHECKSQRT-NEXT: [[TMP4:%.*]] = fmul fast double [[TMP3]], [[SQRT1]] ; CHECKSQRT-NEXT: ret double [[TMP4]] ; ; CHECKNOSQRT-LABEL: @test_simplify_16_5_libcall( @@ -208,12 +208,12 @@ ; NOSQRT-NEXT: ret double [[TMP1]] ; ; CHECKSQRT-LABEL: @test_simplify_neg_16_5_libcall( -; CHECKSQRT-NEXT: [[SQRT:%.*]] = call fast double @sqrt(double [[X:%.*]]) +; CHECKSQRT-NEXT: [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[X:%.*]]) ; CHECKSQRT-NEXT: [[SQUARE:%.*]] = fmul fast double [[X]], [[X]] ; CHECKSQRT-NEXT: [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]] ; CHECKSQRT-NEXT: [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]] ; CHECKSQRT-NEXT: [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]] -; CHECKSQRT-NEXT: [[TMP4:%.*]] = fmul fast double [[TMP3]], [[SQRT]] +; CHECKSQRT-NEXT: [[TMP4:%.*]] = fmul fast double [[TMP3]], [[SQRT1]] ; CHECKSQRT-NEXT: [[RECIPROCAL:%.*]] = fdiv fast double 1.000000e+00, [[TMP4]] ; CHECKSQRT-NEXT: ret double [[RECIPROCAL]] ; @@ -269,9 +269,13 @@ ; (float)pow((double)(float)x, 0.5) define float @shrink_pow_libcall_half(float %x) { -; CHECK-LABEL: @shrink_pow_libcall_half( -; CHECK-NEXT: [[SQRTF:%.*]] = call fast float @sqrtf(float [[X:%.*]]) -; CHECK-NEXT: ret float [[SQRTF]] +; CHECKSQRT-LABEL: @shrink_pow_libcall_half( +; CHECKSQRT-NEXT: [[TMP1:%.*]] = call fast float @llvm.sqrt.f32(float [[X:%.*]]) +; CHECKSQRT-NEXT: ret float [[TMP1]] +; +; CHECKNOSQRT-LABEL: @shrink_pow_libcall_half( +; CHECKNOSQRT-NEXT: [[SQRT:%.*]] = call fast float @llvm.sqrt.f32(float [[X:%.*]]) +; CHECKNOSQRT-NEXT: ret float [[SQRT]] ; %dx = fpext float %x to double %call = call fast double @pow(double %dx, double 0.5) Index: llvm/test/Transforms/InstCombine/pow-sqrt.ll =================================================================== --- llvm/test/Transforms/InstCombine/pow-sqrt.ll +++ llvm/test/Transforms/InstCombine/pow-sqrt.ll @@ -148,8 +148,8 @@ define float @pow_libcall_half_fast(float %x) { ; CHECK-LABEL: @pow_libcall_half_fast( -; CHECK-NEXT: [[SQRTF:%.*]] = call fast float @sqrtf(float [[X:%.*]]) -; CHECK-NEXT: ret float [[SQRTF]] +; CHECK-NEXT: [[SQRT:%.*]] = call fast float @llvm.sqrt.f32(float [[X:%.*]]) +; CHECK-NEXT: ret float [[SQRT]] ; %pow = call fast float @powf(float %x, float 5.0e-01) ret float %pow @@ -320,8 +320,8 @@ define float @pow_libcall_neghalf_fast(float %x) { ; CHECK-LABEL: @pow_libcall_neghalf_fast( -; CHECK-NEXT: [[SQRTF:%.*]] = call fast float @sqrtf(float [[X:%.*]]) -; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv fast float 1.000000e+00, [[SQRTF]] +; CHECK-NEXT: [[SQRT:%.*]] = call fast float @llvm.sqrt.f32(float [[X:%.*]]) +; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv fast float 1.000000e+00, [[SQRT]] ; CHECK-NEXT: ret float [[RECIPROCAL]] ; %pow = call fast float @powf(float %x, float -5.0e-01) Index: llvm/test/Transforms/InstCombine/sqrt.ll =================================================================== --- llvm/test/Transforms/InstCombine/sqrt.ll +++ llvm/test/Transforms/InstCombine/sqrt.ll @@ -59,19 +59,23 @@ ret void } +; nnan implies no setting of errno, so transform to an intrinsic + define float @sqrt_call_nnan_f32(float %x) { ; CHECK-LABEL: @sqrt_call_nnan_f32( -; CHECK-NEXT: [[SQRT:%.*]] = call nnan float @sqrtf(float [[X:%.*]]) -; CHECK-NEXT: ret float [[SQRT]] +; CHECK-NEXT: [[SQRT1:%.*]] = call nnan float @llvm.sqrt.f32(float [[X:%.*]]) +; CHECK-NEXT: ret float [[SQRT1]] ; %sqrt = call nnan float @sqrtf(float %x) ret float %sqrt } +; verify that other function call FMF and attributes are propagated to the intrinsic call + define double @sqrt_call_nnan_f64(double %x) { ; CHECK-LABEL: @sqrt_call_nnan_f64( -; CHECK-NEXT: [[SQRT:%.*]] = tail call nnan ninf double @sqrt(double [[X:%.*]]) -; CHECK-NEXT: ret double [[SQRT]] +; CHECK-NEXT: [[SQRT1:%.*]] = tail call nnan ninf double @llvm.sqrt.f64(double [[X:%.*]]) +; CHECK-NEXT: ret double [[SQRT1]] ; %sqrt = tail call nnan ninf double @sqrt(double %x) ret double %sqrt