Index: llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
===================================================================
--- llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -2218,10 +2218,25 @@
        Callee->getIntrinsicID() == Intrinsic::sqrt))
     Ret = optimizeUnaryDoubleFP(CI, B, TLI, true);
 
+  // Fast-math-flags for any created instructions should match the sqrt.
+  IRBuilderBase::FastMathFlagGuard Guard(B);
+  B.setFastMathFlags(CI->getFastMathFlags());
+
+  // If this is a sqrt libcall and we can assume that NAN is not created, then
+  // the arg must not be less than -0.0 and errno won't be set either.
+  // It is safe to convert this to an intrinsic call.
+  // TODO: Check if the arg is known non-negative.
+  Value *Arg = CI->getArgOperand(0);
+  Type *ArgType = Arg->getType();
+  if (!Callee->isIntrinsic() && CI->hasNoNaNs()) {
+    Function *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, ArgType);
+    return copyFlags(*CI, B.CreateCall(Sqrt, Arg, "sqrt"));
+  }
+
   if (!CI->isFast())
     return Ret;
 
-  Instruction *I = dyn_cast<Instruction>(CI->getArgOperand(0));
+  Instruction *I = dyn_cast<Instruction>(Arg);
   if (!I || I->getOpcode() != Instruction::FMul || !I->isFast())
     return Ret;
 
@@ -2254,14 +2269,8 @@
   if (!RepeatOp)
     return Ret;
 
-  // Fast math flags for any created instructions should match the sqrt
-  // and multiply.
-  IRBuilderBase::FastMathFlagGuard Guard(B);
-  B.setFastMathFlags(I->getFastMathFlags());
-
   // If we found a repeated factor, hoist it out of the square root and
   // replace it with the fabs of that factor.
-  Type *ArgType = I->getType();
   Function *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, ArgType);
   Value *FabsCall = B.CreateCall(Fabs, RepeatOp, "fabs");
   if (OtherOp) {
Index: llvm/test/Transforms/InstCombine/pow-4.ll
===================================================================
--- llvm/test/Transforms/InstCombine/pow-4.ll
+++ llvm/test/Transforms/InstCombine/pow-4.ll
@@ -174,12 +174,12 @@
 ; NOSQRT-NEXT:    ret double [[TMP1]]
 ;
 ; CHECKSQRT-LABEL: @test_simplify_16_5_libcall(
-; CHECKSQRT-NEXT:    [[SQRT:%.*]] = call fast double @sqrt(double [[X:%.*]])
+; CHECKSQRT-NEXT:    [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[X:%.*]])
 ; CHECKSQRT-NEXT:    [[SQUARE:%.*]] = fmul fast double [[X]], [[X]]
 ; CHECKSQRT-NEXT:    [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]]
 ; CHECKSQRT-NEXT:    [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]]
 ; CHECKSQRT-NEXT:    [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]]
-; CHECKSQRT-NEXT:    [[TMP4:%.*]] = fmul fast double [[TMP3]], [[SQRT]]
+; CHECKSQRT-NEXT:    [[TMP4:%.*]] = fmul fast double [[TMP3]], [[SQRT1]]
 ; CHECKSQRT-NEXT:    ret double [[TMP4]]
 ;
 ; CHECKNOSQRT-LABEL: @test_simplify_16_5_libcall(
@@ -208,12 +208,12 @@
 ; NOSQRT-NEXT:    ret double [[TMP1]]
 ;
 ; CHECKSQRT-LABEL: @test_simplify_neg_16_5_libcall(
-; CHECKSQRT-NEXT:    [[SQRT:%.*]] = call fast double @sqrt(double [[X:%.*]])
+; CHECKSQRT-NEXT:    [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[X:%.*]])
 ; CHECKSQRT-NEXT:    [[SQUARE:%.*]] = fmul fast double [[X]], [[X]]
 ; CHECKSQRT-NEXT:    [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]]
 ; CHECKSQRT-NEXT:    [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]]
 ; CHECKSQRT-NEXT:    [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]]
-; CHECKSQRT-NEXT:    [[TMP4:%.*]] = fmul fast double [[TMP3]], [[SQRT]]
+; CHECKSQRT-NEXT:    [[TMP4:%.*]] = fmul fast double [[TMP3]], [[SQRT1]]
 ; CHECKSQRT-NEXT:    [[RECIPROCAL:%.*]] = fdiv fast double 1.000000e+00, [[TMP4]]
 ; CHECKSQRT-NEXT:    ret double [[RECIPROCAL]]
 ;
@@ -269,9 +269,13 @@
 
 ; (float)pow((double)(float)x, 0.5)
 define float @shrink_pow_libcall_half(float %x) {
-; CHECK-LABEL: @shrink_pow_libcall_half(
-; CHECK-NEXT:    [[SQRTF:%.*]] = call fast float @sqrtf(float [[X:%.*]])
-; CHECK-NEXT:    ret float [[SQRTF]]
+; CHECKSQRT-LABEL: @shrink_pow_libcall_half(
+; CHECKSQRT-NEXT:    [[TMP1:%.*]] = call fast float @llvm.sqrt.f32(float [[X:%.*]])
+; CHECKSQRT-NEXT:    ret float [[TMP1]]
+;
+; CHECKNOSQRT-LABEL: @shrink_pow_libcall_half(
+; CHECKNOSQRT-NEXT:    [[SQRT:%.*]] = call fast float @llvm.sqrt.f32(float [[X:%.*]])
+; CHECKNOSQRT-NEXT:    ret float [[SQRT]]
 ;
   %dx = fpext float %x to double
   %call = call fast double @pow(double %dx, double 0.5)
Index: llvm/test/Transforms/InstCombine/pow-sqrt.ll
===================================================================
--- llvm/test/Transforms/InstCombine/pow-sqrt.ll
+++ llvm/test/Transforms/InstCombine/pow-sqrt.ll
@@ -148,8 +148,8 @@
 
 define float @pow_libcall_half_fast(float %x) {
 ; CHECK-LABEL: @pow_libcall_half_fast(
-; CHECK-NEXT:    [[SQRTF:%.*]] = call fast float @sqrtf(float [[X:%.*]])
-; CHECK-NEXT:    ret float [[SQRTF]]
+; CHECK-NEXT:    [[SQRT:%.*]] = call fast float @llvm.sqrt.f32(float [[X:%.*]])
+; CHECK-NEXT:    ret float [[SQRT]]
 ;
   %pow = call fast float @powf(float %x, float 5.0e-01)
   ret float %pow
@@ -320,8 +320,8 @@
 
 define float @pow_libcall_neghalf_fast(float %x) {
 ; CHECK-LABEL: @pow_libcall_neghalf_fast(
-; CHECK-NEXT:    [[SQRTF:%.*]] = call fast float @sqrtf(float [[X:%.*]])
-; CHECK-NEXT:    [[RECIPROCAL:%.*]] = fdiv fast float 1.000000e+00, [[SQRTF]]
+; CHECK-NEXT:    [[SQRT:%.*]] = call fast float @llvm.sqrt.f32(float [[X:%.*]])
+; CHECK-NEXT:    [[RECIPROCAL:%.*]] = fdiv fast float 1.000000e+00, [[SQRT]]
 ; CHECK-NEXT:    ret float [[RECIPROCAL]]
 ;
   %pow = call fast float @powf(float %x, float -5.0e-01)
Index: llvm/test/Transforms/InstCombine/sqrt.ll
===================================================================
--- llvm/test/Transforms/InstCombine/sqrt.ll
+++ llvm/test/Transforms/InstCombine/sqrt.ll
@@ -59,19 +59,23 @@
   ret void
 }
 
+; nnan implies no setting of errno, so transform to an intrinsic
+
 define float @sqrt_call_nnan_f32(float %x) {
 ; CHECK-LABEL: @sqrt_call_nnan_f32(
-; CHECK-NEXT:    [[SQRT:%.*]] = call nnan float @sqrtf(float [[X:%.*]])
-; CHECK-NEXT:    ret float [[SQRT]]
+; CHECK-NEXT:    [[SQRT1:%.*]] = call nnan float @llvm.sqrt.f32(float [[X:%.*]])
+; CHECK-NEXT:    ret float [[SQRT1]]
 ;
   %sqrt = call nnan float @sqrtf(float %x)
   ret float %sqrt
 }
 
+; verify that other function call FMF and attributes are propagated to the intrinsic call
+
 define double @sqrt_call_nnan_f64(double %x) {
 ; CHECK-LABEL: @sqrt_call_nnan_f64(
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call nnan ninf double @sqrt(double [[X:%.*]])
-; CHECK-NEXT:    ret double [[SQRT]]
+; CHECK-NEXT:    [[SQRT1:%.*]] = tail call nnan ninf double @llvm.sqrt.f64(double [[X:%.*]])
+; CHECK-NEXT:    ret double [[SQRT1]]
 ;
   %sqrt = tail call nnan ninf double @sqrt(double %x)
   ret double %sqrt