diff --git a/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake b/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake
--- a/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake
+++ b/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake
@@ -6,7 +6,7 @@
 set(ALL_CPU_FEATURES "")
 
 if(${LIBC_TARGET_ARCHITECTURE_IS_X86})
-  set(ALL_CPU_FEATURES SSE2 SSE4_2 AVX2 AVX512F)
+  set(ALL_CPU_FEATURES SSE2 SSE4_2 AVX2 AVX512F FMA)
   set(LIBC_COMPILE_OPTIONS_NATIVE -march=native)
 elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64})
   set(LIBC_COMPILE_OPTIONS_NATIVE -mcpu=native)
@@ -66,6 +66,7 @@
   if(NOT "${cpu_features}" STREQUAL "${LIBC_CPU_FEATURES}")
     message(FATAL_ERROR "Unsupported CPU features: ${cpu_features}")
   endif()
+  message(STATUS "Check CPU features: ${run_output}")
   set(LIBC_CPU_FEATURES "${cpu_features}")
 else()
   # Populates the LIBC_CPU_FEATURES list from host.
@@ -76,6 +77,7 @@
     COMPILE_OUTPUT_VARIABLE compile_output
     RUN_OUTPUT_VARIABLE run_output)
   if("${run_result}" EQUAL 0)
+    message(STATUS "Check CPU features: ${run_output}")
     set(LIBC_CPU_FEATURES "${run_output}")
   elseif(NOT ${compile_result})
     message(FATAL_ERROR "Failed to compile: ${compile_output}")
diff --git a/libc/cmake/modules/LLVMLibCLibraryRules.cmake b/libc/cmake/modules/LLVMLibCLibraryRules.cmake
--- a/libc/cmake/modules/LLVMLibCLibraryRules.cmake
+++ b/libc/cmake/modules/LLVMLibCLibraryRules.cmake
@@ -134,7 +134,7 @@
 function(add_header_library target_name)
   cmake_parse_arguments(
     "ADD_HEADER"
-    "" # No optional arguments
+    "FMA_OPT" # Use FMA if target supports
     "" # No Single value arguments
     "HDRS;DEPENDS" # Multi-value arguments
     ${ARGN}
@@ -160,6 +160,14 @@
   get_fq_deps_list(fq_deps_list ${ADD_HEADER_DEPENDS})
   if(ADD_HEADER_DEPENDS)
     add_dependencies(${interface_target_name} ${fq_deps_list})
+    foreach(dep IN LISTS fq_deps_list)
+      if(TARGET ${dep})
+        get_target_property(fma_opt ${dep} "FMA_OPT")
+        if(fma_opt)
+          set(ADD_HEADER_FMA_OPT TRUE)
+        endif()
+      endif()
+    endforeach()
   endif()
 
   add_custom_target(${fq_target_name})
@@ -170,4 +178,15 @@
       "TARGET_TYPE" "${HDR_LIBRARY_TARGET_TYPE}"
       "DEPS" "${fq_deps_list}"
   )
+  if(ADD_HEADER_FMA_OPT)
+    message(STATUS "Header library ${fq_target_name} will enable FMA for dependecy")
+    set_target_properties(
+      ${interface_target_name}
+      PROPERTIES INTERFACE_FMA_OPT TRUE
+    )
+    set_target_properties(
+      ${fq_target_name}
+      PROPERTIES FMA_OPT TRUE
+    )
+  endif()
 endfunction(add_header_library)
diff --git a/libc/cmake/modules/LLVMLibCObjectRules.cmake b/libc/cmake/modules/LLVMLibCObjectRules.cmake
--- a/libc/cmake/modules/LLVMLibCObjectRules.cmake
+++ b/libc/cmake/modules/LLVMLibCObjectRules.cmake
@@ -22,6 +22,7 @@
 # Usage:
 #     add_object_library(
 #       <target_name>
+#       FMA_OPT [optional] enable FMA if target supports.
 #       HDRS <list of header files>
 #       SRCS <list of source files>
 #       DEPENDS <list of dependencies>
@@ -29,7 +30,7 @@
 function(add_object_library target_name)
   cmake_parse_arguments(
     "ADD_OBJECT"
-    "" # No optional arguments
+    "FMA_OPT" # Optional arguments
     "CXX_STANDARD" # Single value arguments
     "SRCS;HDRS;COMPILE_OPTIONS;DEPENDS" # Multivalue arguments
     ${ARGN}
@@ -54,14 +55,35 @@
       ${LIBC_SOURCE_DIR}
       ${LIBC_BUILD_DIR}
   )
-  _get_common_compile_options(compile_options ${ADD_OBJECT_COMPILE_OPTIONS})
-  target_compile_options(${fq_target_name} PRIVATE ${compile_options})
-
   get_fq_deps_list(fq_deps_list ${ADD_OBJECT_DEPENDS})
   if(fq_deps_list)
     add_dependencies(${fq_target_name} ${fq_deps_list})
+
+    foreach(dep IN LISTS ${fq_deps_list})
+      if(TARGET ${dep})
+        get_target_property(fma_opt ${dep} "FMA_OPT")
+      endif()
+      if(fma_opt)
+        set(ADD_OBJECT_FMA_OPT TRUE)
+      endif()
+    endforeach()
+
+  endif()
+
+  if(ADD_OBJECT_FMA_OPT)
+    set_target_properties(
+      ${fq_target_name}
+      PROPERTIES FMA_OPT TRUE
+    )
+    if(LIBC_CPU_FEATURES MATCHES "FMA")
+      message(STATUS "FMA is enabled for ${fq_target_name}")
+      list(APPEND ADD_OBJECT_COMPILE_OPTIONS "-mfma")
+    endif()
   endif()
 
+  _get_common_compile_options(compile_options ${ADD_OBJECT_COMPILE_OPTIONS})
+  target_compile_options(${fq_target_name} PRIVATE ${compile_options})
+
   if(ADD_OBJECT_CXX_STANDARD)
     set_target_properties(
       ${fq_target_name}
@@ -85,7 +107,8 @@
 # Usage:
 #     add_entrypoint_object(
 #       <target_name>
-#       [ALIAS|REDIRECTED] # Specified if the entrypoint is redirected or an alias.
+#       [ALIAS|REDIRECTED|FMA_OPT] # Specified if the entrypoint is redirected or an alias,
+#                                  # or it can take advantage of FMA instructions.
 #       [NAME] <the C name of the entrypoint if different from target_name>
 #       SRCS <list of .cpp files>
 #       HDRS <list of .h files>
@@ -96,7 +119,7 @@
 function(add_entrypoint_object target_name)
   cmake_parse_arguments(
     "ADD_ENTRYPOINT_OBJ"
-    "ALIAS;REDIRECTED" # Optional argument
+    "ALIAS;REDIRECTED;FMA_OPT" # Optional argument
     "NAME;CXX_STANDARD" # Single value arguments
     "SRCS;HDRS;DEPENDS;COMPILE_OPTIONS"  # Multi value arguments
     ${ARGN}
@@ -169,12 +192,27 @@
     message(FATAL_ERROR "`add_entrypoint_object` rule requires HDRS to be specified.")
   endif()
 
-  _get_common_compile_options(common_compile_options ${ADD_ENTRYPOINT_OBJ_COMPILE_OPTIONS})
   set(internal_target_name ${fq_target_name}.__internal__)
   set(include_dirs ${LIBC_BUILD_DIR}/include ${LIBC_SOURCE_DIR} ${LIBC_BUILD_DIR})
   get_fq_deps_list(fq_deps_list ${ADD_ENTRYPOINT_OBJ_DEPENDS})
   set(full_deps_list ${fq_deps_list} libc.src.__support.common)
 
+  foreach(dep IN LISTS fq_deps_list)
+    if(TARGET ${dep})
+      get_target_property(fma_opt ${dep} "FMA_OPT")
+    endif()
+    if(fma_opt)
+      set(ADD_ENTRYPOINT_OBJ_FMA_OPT TRUE)
+    endif()
+  endforeach()
+
+  if(ADD_ENTRYPOINT_OBJ_FMA_OPT AND (LIBC_CPU_FEATURES MATCHES "FMA"))
+    message(STATUS "FMA is enabled for ${entrypoint_name}")
+    list(APPEND ADD_ENTRYPOINT_OBJ_COMPILE_OPTIONS "-mfma")
+  endif()
+
+  _get_common_compile_options(common_compile_options ${ADD_ENTRYPOINT_OBJ_COMPILE_OPTIONS})
+
   add_library(
     ${internal_target_name}
     # TODO: We don't need an object library for internal consumption.
@@ -209,6 +247,13 @@
     )
   endif()
 
+  if(ADD_ENTRYPOINT_OBJ_FMA_OPT)
+    set_target_properties(
+      ${fq_target_name}
+      PROPERTIES FMA_OPT TRUE
+    )
+  endif()
+
   set_target_properties(
     ${fq_target_name}
     PROPERTIES
diff --git a/libc/src/__support/FPUtil/CMakeLists.txt b/libc/src/__support/FPUtil/CMakeLists.txt
--- a/libc/src/__support/FPUtil/CMakeLists.txt
+++ b/libc/src/__support/FPUtil/CMakeLists.txt
@@ -35,6 +35,7 @@
 
 add_header_library(
   fma
+  FMA_OPT
   HDRS
     FMA.h
   DEPENDS
diff --git a/libc/src/__support/FPUtil/PolyEval.h b/libc/src/__support/FPUtil/PolyEval.h
--- a/libc/src/__support/FPUtil/PolyEval.h
+++ b/libc/src/__support/FPUtil/PolyEval.h
@@ -24,7 +24,7 @@
 template <typename T> static inline T polyeval(T x, T a0) { return a0; }
 
 template <typename T, typename... Ts>
-INLINE_FMA static inline T polyeval(T x, T a0, Ts... a) {
+static inline T polyeval(T x, T a0, Ts... a) {
   return multiply_add(x, polyeval(x, a...), a0);
 }
 
diff --git a/libc/src/__support/FPUtil/x86_64/FMA.h b/libc/src/__support/FPUtil/x86_64/FMA.h
--- a/libc/src/__support/FPUtil/x86_64/FMA.h
+++ b/libc/src/__support/FPUtil/x86_64/FMA.h
@@ -26,8 +26,8 @@
 namespace fputil {
 
 template <typename T>
-INLINE_FMA static inline cpp::EnableIfType<cpp::IsSame<T, float>::Value, T>
-fma(T x, T y, T z) {
+static inline cpp::EnableIfType<cpp::IsSame<T, float>::Value, T> fma(T x, T y,
+                                                                     T z) {
   float result;
   __m128 xmm = _mm_load_ss(&x);           // NOLINT
   __m128 ymm = _mm_load_ss(&y);           // NOLINT
@@ -38,8 +38,8 @@
 }
 
 template <typename T>
-INLINE_FMA static inline cpp::EnableIfType<cpp::IsSame<T, double>::Value, T>
-fma(T x, T y, T z) {
+static inline cpp::EnableIfType<cpp::IsSame<T, double>::Value, T> fma(T x, T y,
+                                                                      T z) {
   double result;
   __m128d xmm = _mm_load_sd(&x);           // NOLINT
   __m128d ymm = _mm_load_sd(&y);           // NOLINT
diff --git a/libc/src/__support/FPUtil/x86_64/PolyEval.h b/libc/src/__support/FPUtil/x86_64/PolyEval.h
--- a/libc/src/__support/FPUtil/x86_64/PolyEval.h
+++ b/libc/src/__support/FPUtil/x86_64/PolyEval.h
@@ -23,8 +23,7 @@
 // Cubic polynomials:
 //   polyeval(x, a0, a1, a2, a3) = a3*x^3 + a2*x^2 + a1*x + a0
 template <>
-INLINE_FMA inline float polyeval(float x, float a0, float a1, float a2,
-                                 float a3) {
+inline float polyeval(float x, float a0, float a1, float a2, float a3) {
   __m128 xmm = _mm_set1_ps(x);                 // NOLINT
   __m128 a13 = _mm_set_ps(0.0f, x, a3, a1);    // NOLINT
   __m128 a02 = _mm_set_ps(0.0f, 0.0f, a2, a0); // NOLINT
@@ -35,8 +34,7 @@
 }
 
 template <>
-INLINE_FMA inline double polyeval(double x, double a0, double a1, double a2,
-                                  double a3) {
+inline double polyeval(double x, double a0, double a1, double a2, double a3) {
   __m256d xmm = _mm256_set1_pd(x);               // NOLINT
   __m256d a13 = _mm256_set_pd(0.0, x, a3, a1);   // NOLINT
   __m256d a02 = _mm256_set_pd(0.0, 0.0, a2, a0); // NOLINT
@@ -50,8 +48,8 @@
 //   polyeval(x, a0, a1, a2, a3, a4, a5) = a5*x^5 + a4*x^4 + a3*x^3 + a2*x^2 +
 //                                         + a1*x + a0
 template <>
-INLINE_FMA inline float polyeval(float x, float a0, float a1, float a2,
-                                 float a3, float a4, float a5) {
+inline float polyeval(float x, float a0, float a1, float a2, float a3, float a4,
+                      float a5) {
   __m128 xmm = _mm_set1_ps(x);                 // NOLINT
   __m128 a25 = _mm_set_ps(0.0f, x, a5, a2);    // NOLINT
   __m128 a14 = _mm_set_ps(0.0f, 0.0f, a4, a1); // NOLINT
@@ -65,8 +63,8 @@
 }
 
 template <>
-INLINE_FMA inline double polyeval(double x, double a0, double a1, double a2,
-                                  double a3, double a4, double a5) {
+inline double polyeval(double x, double a0, double a1, double a2, double a3,
+                       double a4, double a5) {
   __m256d xmm = _mm256_set1_pd(x);               // NOLINT
   __m256d a25 = _mm256_set_pd(0.0, x, a5, a2);   // NOLINT
   __m256d a14 = _mm256_set_pd(0.0, 0.0, a4, a1); // NOLINT
diff --git a/libc/src/__support/architectures.h b/libc/src/__support/architectures.h
--- a/libc/src/__support/architectures.h
+++ b/libc/src/__support/architectures.h
@@ -45,10 +45,4 @@
 #endif
 #endif
 
-#if (defined(LLVM_LIBC_ARCH_X86_64) && defined(LIBC_TARGET_HAS_FMA))
-#define INLINE_FMA __attribute__((target("fma")))
-#else
-#define INLINE_FMA
-#endif // LLVM_LIBC_ARCH_X86_64
-
 #endif // LLVM_LIBC_SUPPORT_ARCHITECTURES_H
diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
--- a/libc/src/math/CMakeLists.txt
+++ b/libc/src/math/CMakeLists.txt
@@ -51,7 +51,6 @@
     libc.src.__support.FPUtil.fma
   COMPILE_OPTIONS
     -O3
-    -mfma
 )
 
 add_entrypoint_object(
@@ -65,7 +64,6 @@
     libc.src.__support.FPUtil.fma
   COMPILE_OPTIONS
     -O3
-    -mfma
 )
 
 add_math_entrypoint_object(ceil)
diff --git a/libc/src/math/fma.cpp b/libc/src/math/fma.cpp
--- a/libc/src/math/fma.cpp
+++ b/libc/src/math/fma.cpp
@@ -13,7 +13,6 @@
 
 namespace __llvm_libc {
 
-INLINE_FMA
 LLVM_LIBC_FUNCTION(double, fma, (double x, double y, double z)) {
   return fputil::fma(x, y, z);
 }
diff --git a/libc/src/math/fmaf.cpp b/libc/src/math/fmaf.cpp
--- a/libc/src/math/fmaf.cpp
+++ b/libc/src/math/fmaf.cpp
@@ -13,7 +13,6 @@
 
 namespace __llvm_libc {
 
-INLINE_FMA
 LLVM_LIBC_FUNCTION(float, fmaf, (float x, float y, float z)) {
   return fputil::fma(x, y, z);
 }
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -482,7 +482,6 @@
     libc.include.math
   COMPILE_OPTIONS
     -O3
-    -mfma
 )
 
 add_entrypoint_object(
@@ -497,7 +496,6 @@
     libc.include.math
   COMPILE_OPTIONS
     -O3
-    -mfma
 )
 
 add_entrypoint_object(
@@ -514,7 +512,6 @@
     libc.include.math
   COMPILE_OPTIONS
     -O3
-    -mfma
 )
 
 add_entrypoint_object(
@@ -682,7 +679,6 @@
     libc.src.__support.FPUtil.polyeval
   COMPILE_OPTIONS
     -O3
-    -mfma
 )
 
 add_entrypoint_object(
@@ -698,7 +694,6 @@
     libc.src.__support.FPUtil.polyeval
   COMPILE_OPTIONS
     -O3
-    -mfma
 )
 
 add_entrypoint_object(
@@ -713,7 +708,6 @@
     libc.src.__support.FPUtil.polyeval
     COMPILE_OPTIONS
     -O3
-    -mfma
 )
 
 add_entrypoint_object(
@@ -729,7 +723,6 @@
     libc.src.__support.FPUtil.polyeval
   COMPILE_OPTIONS
     -O3
-    -mfma
 )
 
 add_entrypoint_object(
diff --git a/libc/src/math/generic/exp2f.cpp b/libc/src/math/generic/exp2f.cpp
--- a/libc/src/math/generic/exp2f.cpp
+++ b/libc/src/math/generic/exp2f.cpp
@@ -47,7 +47,6 @@
     0x1.fa7c1819e90d8p0,
 };
 
-INLINE_FMA
 LLVM_LIBC_FUNCTION(float, exp2f, (float x)) {
   using FPBits = typename fputil::FPBits<float>;
   FPBits xbits(x);
diff --git a/libc/src/math/generic/expf.cpp b/libc/src/math/generic/expf.cpp
--- a/libc/src/math/generic/expf.cpp
+++ b/libc/src/math/generic/expf.cpp
@@ -19,7 +19,6 @@
 
 namespace __llvm_libc {
 
-INLINE_FMA
 LLVM_LIBC_FUNCTION(float, expf, (float x)) {
   using FPBits = typename fputil::FPBits<float>;
   FPBits xbits(x);
diff --git a/libc/src/math/generic/expm1f.cpp b/libc/src/math/generic/expm1f.cpp
--- a/libc/src/math/generic/expm1f.cpp
+++ b/libc/src/math/generic/expm1f.cpp
@@ -19,7 +19,6 @@
 
 namespace __llvm_libc {
 
-INLINE_FMA
 LLVM_LIBC_FUNCTION(float, expm1f, (float x)) {
   using FPBits = typename fputil::FPBits<float>;
   FPBits xbits(x);
diff --git a/libc/src/math/generic/log10f.cpp b/libc/src/math/generic/log10f.cpp
--- a/libc/src/math/generic/log10f.cpp
+++ b/libc/src/math/generic/log10f.cpp
@@ -101,7 +101,6 @@
     0x1.2b7b9e258e422p-2, 0x1.2d404b073e27ep-2, 0x1.2f032cf56a5bep-2,
     0x1.30c4478f0835fp-2, 0x1.32839e681fc62p-2};
 
-INLINE_FMA
 LLVM_LIBC_FUNCTION(float, log10f, (float x)) {
   constexpr double LOG10_2 = 0x1.34413509f79ffp-2;
 
diff --git a/libc/src/math/generic/log1pf.cpp b/libc/src/math/generic/log1pf.cpp
--- a/libc/src/math/generic/log1pf.cpp
+++ b/libc/src/math/generic/log1pf.cpp
@@ -32,7 +32,7 @@
 namespace internal {
 
 // We don't need to treat denormal
-INLINE_FMA static inline float log(double x) {
+static inline float log(double x) {
   constexpr double LOG_2 = 0x1.62e42fefa39efp-1;
 
   using FPBits = typename fputil::FPBits<double>;
@@ -77,7 +77,6 @@
 
 } // namespace internal
 
-INLINE_FMA
 LLVM_LIBC_FUNCTION(float, log1pf, (float x)) {
   using FPBits = typename fputil::FPBits<float>;
   FPBits xbits(x);
diff --git a/libc/src/math/generic/log2f.cpp b/libc/src/math/generic/log2f.cpp
--- a/libc/src/math/generic/log2f.cpp
+++ b/libc/src/math/generic/log2f.cpp
@@ -98,7 +98,6 @@
     0x1.f16e281db7630p-1, 0x1.f45e08bcf0655p-1, 0x1.f74aef0efafaep-1,
     0x1.fa34e1177c233p-1, 0x1.fd1be4c7f2af9p-1};
 
-INLINE_FMA
 LLVM_LIBC_FUNCTION(float, log2f, (float x)) {
   using FPBits = typename fputil::FPBits<float>;
   FPBits xbits(x);
diff --git a/libc/src/math/generic/logf.cpp b/libc/src/math/generic/logf.cpp
--- a/libc/src/math/generic/logf.cpp
+++ b/libc/src/math/generic/logf.cpp
@@ -49,7 +49,6 @@
 
 namespace __llvm_libc {
 
-INLINE_FMA
 LLVM_LIBC_FUNCTION(float, logf, (float x)) {
   constexpr double LOG_2 = 0x1.62e42fefa39efp-1;
   using FPBits = typename fputil::FPBits<float>;