Index: include/clang/Basic/Builtins.def
===================================================================
--- include/clang/Basic/Builtins.def
+++ include/clang/Basic/Builtins.def
@@ -165,9 +165,9 @@
 BUILTIN(__builtin_atanh , "dd", "Fne")
 BUILTIN(__builtin_atanhf, "ff", "Fne")
 BUILTIN(__builtin_atanhl, "LdLd", "Fne")
-BUILTIN(__builtin_cbrt , "dd", "Fne")
-BUILTIN(__builtin_cbrtf, "ff", "Fne")
-BUILTIN(__builtin_cbrtl, "LdLd", "Fne")
+BUILTIN(__builtin_cbrt , "dd", "Fnc")
+BUILTIN(__builtin_cbrtf, "ff", "Fnc")
+BUILTIN(__builtin_cbrtl, "LdLd", "Fnc")
 BUILTIN(__builtin_ceil , "dd"  , "Fnc")
 BUILTIN(__builtin_ceilf, "ff"  , "Fnc")
 BUILTIN(__builtin_ceill, "LdLd", "Fnc")
@@ -198,9 +198,11 @@
 BUILTIN(__builtin_floor , "dd"  , "Fnc")
 BUILTIN(__builtin_floorf, "ff"  , "Fnc")
 BUILTIN(__builtin_floorl, "LdLd", "Fnc")
-BUILTIN(__builtin_fma, "dddd", "Fne")
-BUILTIN(__builtin_fmaf, "ffff", "Fne")
-BUILTIN(__builtin_fmal, "LdLdLdLd", "Fne")
+// Disregard that 'fma' could set errno. This is based on the assumption that no
+// reasonable implementation would set errno and harm performance of a basic op.
+BUILTIN(__builtin_fma, "dddd", "Fnc")
+BUILTIN(__builtin_fmaf, "ffff", "Fnc")
+BUILTIN(__builtin_fmal, "LdLdLdLd", "Fnc")
 BUILTIN(__builtin_fmax, "ddd", "Fnc")
 BUILTIN(__builtin_fmaxf, "fff", "Fnc")
 BUILTIN(__builtin_fmaxl, "LdLdLd", "Fnc")
@@ -1040,9 +1042,9 @@
 LIBBUILTIN(atanhf, "ff", "fne", "math.h", ALL_LANGUAGES)
 LIBBUILTIN(atanhl, "LdLd", "fne", "math.h", ALL_LANGUAGES)
 
-LIBBUILTIN(cbrt, "dd", "fne", "math.h", ALL_LANGUAGES)
-LIBBUILTIN(cbrtf, "ff", "fne", "math.h", ALL_LANGUAGES)
-LIBBUILTIN(cbrtl, "LdLd", "fne", "math.h", ALL_LANGUAGES)
+LIBBUILTIN(cbrt, "dd", "fnc", "math.h", ALL_LANGUAGES)
+LIBBUILTIN(cbrtf, "ff", "fnc", "math.h", ALL_LANGUAGES)
+LIBBUILTIN(cbrtl, "LdLd", "fnc", "math.h", ALL_LANGUAGES)
 
 LIBBUILTIN(ceil, "dd", "fnc", "math.h", ALL_LANGUAGES)
 LIBBUILTIN(ceilf, "ff", "fnc", "math.h", ALL_LANGUAGES)
@@ -1084,9 +1086,11 @@
 LIBBUILTIN(floorf, "ff", "fnc", "math.h", ALL_LANGUAGES)
 LIBBUILTIN(floorl, "LdLd", "fnc", "math.h", ALL_LANGUAGES)
 
-LIBBUILTIN(fma, "dddd", "fne", "math.h", ALL_LANGUAGES)
-LIBBUILTIN(fmaf, "ffff", "fne", "math.h", ALL_LANGUAGES)
-LIBBUILTIN(fmal, "LdLdLdLd", "fne", "math.h", ALL_LANGUAGES)
+// Disregard that fma could set errno. This is based on the assumption that no
+// reasonable implementation would set errno and harm performance of a basic op.
+LIBBUILTIN(fma, "dddd", "fnc", "math.h", ALL_LANGUAGES)
+LIBBUILTIN(fmaf, "ffff", "fnc", "math.h", ALL_LANGUAGES)
+LIBBUILTIN(fmal, "LdLdLdLd", "fnc", "math.h", ALL_LANGUAGES)
 
 LIBBUILTIN(fmax, "ddd", "fnc", "math.h", ALL_LANGUAGES)
 LIBBUILTIN(fmaxf, "fff", "fnc", "math.h", ALL_LANGUAGES)
Index: lib/CodeGen/CGBuiltin.cpp
===================================================================
--- lib/CodeGen/CGBuiltin.cpp
+++ lib/CodeGen/CGBuiltin.cpp
@@ -2109,15 +2109,11 @@
   case Builtin::BIfmal:
   case Builtin::BI__builtin_fma:
   case Builtin::BI__builtin_fmaf:
-  case Builtin::BI__builtin_fmal: {
-    // Rewrite fma to intrinsic.
-    Value *FirstArg = EmitScalarExpr(E->getArg(0));
-    llvm::Type *ArgType = FirstArg->getType();
-    Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType);
-    return RValue::get(
-        Builder.CreateCall(F, {FirstArg, EmitScalarExpr(E->getArg(1)),
-                               EmitScalarExpr(E->getArg(2))}));
-  }
+  case Builtin::BI__builtin_fmal:
+    // A constant libcall or builtin is equivalent to the LLVM intrinsic.
+    if (FD->hasAttr<ConstAttr>())
+      return RValue::get(emitTernaryBuiltin(*this, E, Intrinsic::fma));
+    break;
 
   case Builtin::BI__builtin_signbit:
   case Builtin::BI__builtin_signbitf:
Index: test/CodeGen/math-builtins.c
===================================================================
--- test/CodeGen/math-builtins.c
+++ test/CodeGen/math-builtins.c
@@ -175,9 +175,9 @@
 // NO__ERRNO: declare double @cbrt(double) [[READNONE]]
 // NO__ERRNO: declare float @cbrtf(float) [[READNONE]]
 // NO__ERRNO: declare x86_fp80 @cbrtl(x86_fp80) [[READNONE]]
-// HAS_ERRNO: declare double @cbrt(double) [[NOT_READNONE]]
-// HAS_ERRNO: declare float @cbrtf(float) [[NOT_READNONE]]
-// HAS_ERRNO: declare x86_fp80 @cbrtl(x86_fp80) [[NOT_READNONE]]
+// HAS_ERRNO: declare double @cbrt(double) [[READNONE]]
+// HAS_ERRNO: declare float @cbrtf(float) [[READNONE]]
+// HAS_ERRNO: declare x86_fp80 @cbrtl(x86_fp80) [[READNONE]]
 
   __builtin_ceil(f);       __builtin_ceilf(f);      __builtin_ceill(f);
 
Index: test/CodeGen/math-libcalls.c
===================================================================
--- test/CodeGen/math-libcalls.c
+++ test/CodeGen/math-libcalls.c
@@ -145,9 +145,9 @@
 // NO__ERRNO: declare double @cbrt(double) [[READNONE]]
 // NO__ERRNO: declare float @cbrtf(float) [[READNONE]]
 // NO__ERRNO: declare x86_fp80 @cbrtl(x86_fp80) [[READNONE]]
-// HAS_ERRNO: declare double @cbrt(double) [[NOT_READNONE]]
-// HAS_ERRNO: declare float @cbrtf(float) [[NOT_READNONE]]
-// HAS_ERRNO: declare x86_fp80 @cbrtl(x86_fp80) [[NOT_READNONE]]
+// HAS_ERRNO: declare double @cbrt(double) [[READNONE]]
+// HAS_ERRNO: declare float @cbrtf(float) [[READNONE]]
+// HAS_ERRNO: declare x86_fp80 @cbrtl(x86_fp80) [[READNONE]]
 
   ceil(f);       ceilf(f);      ceill(f);