diff --git a/libc/cmake/modules/LLVMLibCObjectRules.cmake b/libc/cmake/modules/LLVMLibCObjectRules.cmake
--- a/libc/cmake/modules/LLVMLibCObjectRules.cmake
+++ b/libc/cmake/modules/LLVMLibCObjectRules.cmake
@@ -504,7 +504,7 @@
   list(SORT flags)
 
   if(SHOW_INTERMEDIATE_OBJECTS AND flags)
-    message(STATUS "Object library ${fq_target_name} has FLAGS: ${flags}")
+    message(STATUS "Entrypoint object ${fq_target_name} has FLAGS: ${flags}")
   endif()
 
   if(NOT ADD_TO_EXPAND_NAME)
diff --git a/libc/src/__support/FPUtil/CMakeLists.txt b/libc/src/__support/FPUtil/CMakeLists.txt
--- a/libc/src/__support/FPUtil/CMakeLists.txt
+++ b/libc/src/__support/FPUtil/CMakeLists.txt
@@ -13,6 +13,7 @@
     NormalFloat.h
     PlatformDefs.h
     builtin_wrappers.h
+    except_value_utils.h
   DEPENDS
     libc.include.math
     libc.include.errno
diff --git a/libc/src/__support/FPUtil/except_value_utils.h b/libc/src/__support/FPUtil/except_value_utils.h
new file mode 100644
--- /dev/null
+++ b/libc/src/__support/FPUtil/except_value_utils.h
@@ -0,0 +1,69 @@
+//===-- Common header for helpers to set exceptional values -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_SUPPORT_FPUTIL_EXCEPT_VALUE_UTILS_H
+#define LLVM_LIBC_SRC_SUPPORT_FPUTIL_EXCEPT_VALUE_UTILS_H
+
+#include "FPBits.h"
+
+namespace __llvm_libc {
+
+namespace fputil {
+
+template <typename T, int N> struct ExceptionalValues {
+  using UIntType = typename FPBits<T>::UIntType;
+  static constexpr int SIZE = N;
+  // Input bits.
+  UIntType inputs[SIZE];
+  // Output bits contains 4 values:
+  //   output[i][0]: output bits corresponding to FE_TOWARDZERO
+  //   output[i][1]: offset for FE_UPWARD
+  //   output[i][2]: offset for FE_DOWNWARD
+  //   output[i][3]: offset for FE_TONEAREST
+  UIntType outputs[SIZE][4];
+};
+
+template <typename T, int N> struct ExceptionChecker {
+  using UIntType = typename FPBits<T>::UIntType;
+  using FPBits = FPBits<T>;
+  using ExceptionalValues = ExceptionalValues<T, N>;
+
+  static bool check_odd_func(const ExceptionalValues &ExceptVals,
+                             UIntType x_abs, bool sign, T &result) {
+    for (int i = 0; i < N; ++i) {
+      if (unlikely(x_abs == ExceptVals.inputs[i])) {
+        UIntType out_bits = ExceptVals.outputs[i][0]; // FE_TOWARDZERO
+        switch (fputil::get_round()) {
+        case FE_UPWARD:
+          out_bits +=
+              sign ? ExceptVals.outputs[i][2] : ExceptVals.outputs[i][1];
+          break;
+        case FE_DOWNWARD:
+          out_bits +=
+              sign ? ExceptVals.outputs[i][1] : ExceptVals.outputs[i][2];
+          break;
+        case FE_TONEAREST:
+          out_bits += ExceptVals.outputs[i][3];
+          break;
+        }
+        result = FPBits(out_bits).get_val();
+        if (sign)
+          result = -result;
+
+        return true;
+      }
+    }
+    return false;
+  }
+};
+
+} // namespace fputil
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_SUPPORT_FPUTIL_EXCEPT_VALUE_UTILS_H
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -76,11 +76,15 @@
     sinf.cpp
   HDRS
     ../sinf.h
+    range_reduction.h
+    range_reduction_fma.h
   DEPENDS
-    .sincosf_utils
     libc.include.math
     libc.src.errno.errno
     libc.src.__support.FPUtil.fputil
+    libc.src.__support.FPUtil.fma
+    libc.src.__support.FPUtil.multiply_add
+    libc.src.__support.FPUtil.polyeval
   COMPILE_OPTIONS
     -O3
 )
diff --git a/libc/src/math/generic/range_reduction.h b/libc/src/math/generic/range_reduction.h
new file mode 100644
--- /dev/null
+++ b/libc/src/math/generic/range_reduction.h
@@ -0,0 +1,131 @@
+//===-- Utilities for trigonometric functions -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_H
+#define LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_H
+
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/except_value_utils.h"
+#include "src/__support/FPUtil/multiply_add.h"
+
+namespace __llvm_libc {
+
+namespace generic {
+
+static constexpr uint32_t FAST_PASS_BOUND = 0x4c80'0000U; // 2^26
+
+static constexpr int N_ENTRIES = 8;
+
+// Highest bits of 1/pi divided into 28-bit precision pieces.
+// > a1 = D(round(1/pi, 28, RN)); a1;
+// > a2 = D(round(1/pi - a1, 28, RN)); a2;
+// > a3 = D(round(1/pi - a1 - a2, 28, RN)); a3;
+// > a4 = D(round(1/pi - a1 - a2 - a3, 28, RN)); a4;
+// ...
+static constexpr double ONE_OVER_PI_28[N_ENTRIES] = {
+    0x1.45f306ep-2,   -0x1.b1bbeaep-33,  0x1.3f84ebp-62,    -0x1.7056592p-92,
+    0x1.c0db62ap-121, -0x1.4cd8778p-150, -0x1.bef806cp-179, 0x1.63abdecp-209};
+
+static constexpr double TWOFIFTYSIX_OVER_PI_28[N_ENTRIES] = {
+    0x1.45f306ep+6,   -0x1.b1bbeaep-25,  0x1.3f84ebp-54,    -0x1.7056592p-84,
+    0x1.c0db62ap-113, -0x1.4cd8778p-142, -0x1.bef806cp-171, 0x1.63abdecp-201};
+
+// Exponents of the least significant bits of the corresponding entries in
+// ONE_OVER_PI_28.
+static constexpr int ONE_OVER_PI_28_LSB_EXP[N_ENTRIES] = {
+    -29, -60, -86, -119, -148, -175, -205, -235};
+
+// Return (k mod 2) and y, where
+//   k = round(x / pi) and y = (x / pi) - k.
+static inline bool small_range_reduction(double x, double &y) {
+  int64_t k =
+      static_cast<int64_t>(fputil::multiply_add(x, ONE_OVER_PI_28[0], 0.5));
+  y = fputil::multiply_add(x, ONE_OVER_PI_28[0], -static_cast<double>(k));
+  y = fputil::multiply_add(x, ONE_OVER_PI_28[1], y);
+  y = fputil::multiply_add(x, ONE_OVER_PI_28[2], y);
+  return (k & 1);
+}
+
+// Return (k mod 2) and y, where
+//   k = round(x / pi) and y = (x / pi) - k.
+// For large range, to reduce the errors of estimating the integral part of
+// (x/pi), we perform the following computation:
+//   k = round( trunc(x * 256/pi) / 256)
+//   y = x * 1/pi - k
+static inline bool large_range_reduction(double x, int x_exp, double &y) {
+  int idx = 0;
+  y = 0;
+  int x_lsb_exp = x_exp - fputil::FloatProperties<float>::MANTISSA_WIDTH;
+
+  while (x_lsb_exp + ONE_OVER_PI_28_LSB_EXP[idx] > 0)
+    ++idx;
+
+  double prod = x * TWOFIFTYSIX_OVER_PI_28[idx];
+  int64_t k = static_cast<int64_t>(prod);
+  double frac = prod - static_cast<double>(k);
+  k += static_cast<int64_t>(
+      fputil::multiply_add(x, TWOFIFTYSIX_OVER_PI_28[idx + 1], frac));
+  // round k/256 to nearest integer.
+  k = (k + 128) >> 8;
+  y = fputil::multiply_add(x, ONE_OVER_PI_28[idx], -static_cast<double>(k));
+  y = fputil::multiply_add(x, ONE_OVER_PI_28[idx + 1], y);
+  y = fputil::multiply_add(x, ONE_OVER_PI_28[idx + 2], y);
+  y = fputil::multiply_add(x, ONE_OVER_PI_28[idx + 3], y);
+
+  return (k & 1);
+}
+
+// Exceptional cases.
+static constexpr int N_EXCEPT_SMALL = 6;
+
+static constexpr fputil::ExceptionalValues<float, N_EXCEPT_SMALL> SmallExcepts{
+    /* inputs */ {
+        0x3f3adc51, // x = 0x1.75b8a2p-1
+        0x3fa7832a, // x = 0x1.4f0654p0
+        0x437ce5f1, // x = 0x1.f9cbe2p7
+        0x46199998, // x = 0x1.33333p13
+        0x4afdece4, // x = 0x1.fbd9c8p22
+        0x4c2332e9, // x = 0x1.4665d2p25
+    },
+    /* outputs (RZ, RU offset, RD offset, RN offset) */
+    {
+        {0x3f2ab444, 1, 0, 1}, // x = 0x1.75b8a2p-1, sin(x) = 0x1.556888p-1 (RZ)
+        {0x3f7741b5, 1, 0, 1}, // x = 0x1.4f0654p0, sin(x) = 0x1.ee836ap-1 (RZ)
+        {0x3f7fffff, 1, 0, 1}, // x = 0x1.f9cbe2p7, sin(x) = 0x1.fffffep-1 (RZ)
+        {0xbeb1fa5d, 0, 1, 0}, // x = 0x1.33333p13, sin(x) = -0x1.63f4bap-2 (RZ)
+        {0xbf7fb6e0, 0, 1, 1}, // x = 0x1.fbd9c8p22, sin(x) = -0x1.ff6dcp-1 (RZ)
+        {0xbf7fffff, 0, 1,
+         1}, // x = 0x1.4665d2p25, sin(x) = -0x1.fffffep-1 (RZ)
+    }};
+
+static constexpr int N_EXCEPT_LARGE = 5;
+
+static constexpr fputil::ExceptionalValues<float, N_EXCEPT_LARGE> LargeExcepts{
+    /* inputs */ {
+        0x50a3e87f, // x = 0x1.47d0fep34
+        0x523947f6, // x = 0x1.728fecp37
+        0x55cafb2a, // x = 0x1.95f654p44
+        0x66b014aa, // x = 0x1.602954p78
+        0x6a1976f1, // x = 0x1.32ede2p85
+    },
+    /* outputs (RZ, RU offset, RD offset, RN offset) */
+    {
+        {0x3f7fffff, 1, 0, 1}, // x = 0x1.47d0fep34, sin(x) = 0x1.fffffep-1 (RZ)
+        {0xbf12791d, 0, 1,
+         1}, // x = 0x1.728fecp37, sin(x) = -0x1.24f23ap-1 (RZ)
+        {0xbf7e7a16, 0, 1,
+         1}, // x = 0x1.95f654p44, sin(x) = -0x1.fcf42cp-1 (RZ)
+        {0x3ef29860, 1, 0, 1}, // x = 0x1.602954p78, sin(x) = 0x1.e530cp-2 (RZ)
+        {0x3f7fffff, 1, 0, 1}, // x = 0x1.32ede2p85, sin(x) = 0x1.fffffep-1 (RZ)
+    }};
+
+} // namespace generic
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_H
diff --git a/libc/src/math/generic/range_reduction_fma.h b/libc/src/math/generic/range_reduction_fma.h
new file mode 100644
--- /dev/null
+++ b/libc/src/math/generic/range_reduction_fma.h
@@ -0,0 +1,137 @@
+//===-- Utilities for trigonometric functions with FMA ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_FMA_H
+#define LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_FMA_H
+
+#include "src/__support/FPUtil/FMA.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/except_value_utils.h"
+
+namespace __llvm_libc {
+
+namespace fma {
+
+static constexpr uint32_t FAST_PASS_BOUND = 0x5880'0000U; // 2^50
+
+// Digits of 1/pi, generated by Sollya with:
+// > a0 = D(1/pi);
+// > a1 = D(1/pi - a0);
+// > a2 = D(1/pi - a0 - a1);
+// > a3 = D(1/pi - a0 - a1 - a2);
+static constexpr double ONE_OVER_PI[5] = {
+    0x1.45f306dc9c883p-2, -0x1.6b01ec5417056p-56, -0x1.6447e493ad4cep-110,
+    0x1.e21c820ff28b2p-164, -0x1.508510ea79237p-219};
+
+static constexpr double TWOFIFTYSIX_OVER_PI[3] = {
+    0x1.45f306dc9c883p+6, -0x1.6b01ec5417056p-48, -0x1.6447e493ad4cep-102};
+
+// Return (k mod 2) and y, where
+//   k = round(x / pi) and y = (x / pi) - k.
+// Assume x is non-negative.
+static inline bool small_range_reduction(double x, double &y) {
+  int64_t k = static_cast<int64_t>(fputil::fma(x, ONE_OVER_PI[0], 0.5));
+  y = fputil::fma(x, ONE_OVER_PI[0], -static_cast<double>(k));
+  y = fputil::fma(x, ONE_OVER_PI[1], y);
+  return (k & 1);
+}
+
+// Return (k mod 2) and y, where
+//   k = round(x / pi) and y = (x / pi) - k.
+// For large range, to reduce the errors of estimating the integral part of
+// (x/pi), we perform the following computation:
+//   k = round( trunc(x * 256/pi) / 256)
+//   y = x * 1/pi - k
+static inline bool large_range_reduction(double x, int x_exp, double &y) {
+  int64_t k;
+  // 2^50 <= |x| < 2^98
+  if (x_exp < 98) {
+    // - When x >= 2^50, double(x*TWOFIFTYSIX_OVER_PI[0]) is an integer, so
+    // we just need to keep its lowest 9 bits, so that when adding with the
+    // lower part, it does not result in overflow when converting to integer.
+    // - When x >= 2^55, the lowest 9 bit of the product
+    // double(x*TWOFIFTYSIX_OVER_PI[0]) can also be dropped.
+    fputil::FPBits<double> prod(x * TWOFIFTYSIX_OVER_PI[0]);
+    prod.bits &= (x_exp < 55) ? (~0x1ffULL) : (~0ULL); // |x| < 2^55
+    double truncated_prod =
+        fputil::fma(x, TWOFIFTYSIX_OVER_PI[0], -static_cast<double>(prod));
+    k = static_cast<int64_t>(
+        fputil::fma(x, TWOFIFTYSIX_OVER_PI[1], truncated_prod));
+    // Round k/256 to nearest integer.
+    k = (k + 128) >> 8;
+    double frac =
+        fputil::fma(truncated_prod, 1.0 / 256, -static_cast<double>(k));
+    y = fputil::fma(x, ONE_OVER_PI[1], frac);
+    y = fputil::fma(x, ONE_OVER_PI[2], y);
+    y = fputil::fma(x, ONE_OVER_PI[3], y);
+    return (k & 1);
+  }
+
+  // - When x >= 2^98, double(x*TWOFIFTYSIX_OVER_PI[1])*4 is an integer, so
+  // we just need to keep its lowest 11 bits, so that when adding with the
+  // lower part, it does not result in overflow when converting to integer.
+  // - When x >= 2^109, the lowest 11 bit of the product
+  // double(x*ONE_OVER_PI[1]) can also be dropped.
+  fputil::FPBits<double> prod(x * TWOFIFTYSIX_OVER_PI[1]);
+  prod.bits &= (x_exp < 109) ? (~0x0fffULL) : (~0ULL); // |x| < 2^109
+  double truncated_prod =
+      fputil::fma(x, TWOFIFTYSIX_OVER_PI[1], -static_cast<double>(prod));
+  k = static_cast<int64_t>(
+      fputil::fma(x, TWOFIFTYSIX_OVER_PI[2], truncated_prod));
+  // Round k/256 to nearest integer.
+  k = (k + 128) >> 8;
+  double frac = fputil::fma(truncated_prod, 1.0 / 256, -static_cast<double>(k));
+  y = fputil::fma(x, ONE_OVER_PI[2], frac);
+  y = fputil::fma(x, ONE_OVER_PI[3], y);
+  y = fputil::fma(x, ONE_OVER_PI[4], y);
+  return (k & 1);
+}
+
+// Exceptional cases.
+static constexpr int N_EXCEPT_SMALL = 7;
+
+static constexpr fputil::ExceptionalValues<float, N_EXCEPT_SMALL> SmallExcepts{
+    /* inputs */ {
+        0x3f3adc51, // x = 0x1.75b8a2p-1
+        0x3fa7832a, // x = 0x1.4f0654p0
+        0x433b7490, // x = 0x1.76e92p7
+        0x46199998, // x = 0x1.33333p13
+        0x4afdece4, // x = 0x1.fbd9c8p22
+        0x50a3e87f, // x = 0x1.47d0fep34
+        0x55cafb2a, // x = 0x1.95f654p44
+    },
+    /* outputs (RZ, RU offset, RD offset, RN offset) */
+    {
+        {0x3f2ab444, 1, 0, 1}, // x = 0x1.75b8a2p-1, sin(x) = 0x1.556888p-1 (RZ)
+        {0x3f7741b5, 1, 0, 1}, // x = 0x1.4f0654p0, sin(x) = 0x1.ee836ap-1 (RZ)
+        {0xbf5cce62, 0, 1, 0}, // x = 0x1.76e92p7, sin(x) = -0x1.b99cc4p-1 (RZ)
+        {0xbeb1fa5d, 0, 1, 0}, // x = 0x1.33333p13, sin(x) = -0x1.63f4bap-2 (RZ)
+        {0xbf7fb6e0, 0, 1, 1}, // x = 0x1.fbd9c8p22, sin(x) = -0x1.ff6dcp-1 (RZ)
+        {0x3f7fffff, 1, 0, 1}, // x = 0x1.47d0fep34, sin(x) = 0x1.fffffep-1 (RZ)
+        {0xbf7e7a16, 0, 1,
+         1}, // x = 0x1.95f654p44, sin(x) = -0x1.fcf42cp-1 (RZ)
+    }};
+
+static constexpr int N_EXCEPT_LARGE = 2;
+
+static constexpr fputil::ExceptionalValues<float, N_EXCEPT_LARGE> LargeExcepts{
+    /* inputs */ {
+        0x5c07bcd0, // x = 0x1.0f79ap57
+        0x6a1976f1, // x = 0x1.32ede2p85
+    },
+    /* outputs (RZ, RU offset, RD offset, RN offset) */
+    {
+        {0xbf7fffff, 0, 1, 1}, // x = 0x1.0f79ap57, sin(x) = -0x1.fffffep-1 (RZ)
+        {0x3f7fffff, 1, 0, 1}, // x = 0x1.32ede2p85, sin(x) = 0x1.fffffep-1 (RZ)
+    }};
+
+} // namespace fma
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_FMA_H
diff --git a/libc/src/math/generic/sinf.cpp b/libc/src/math/generic/sinf.cpp
--- a/libc/src/math/generic/sinf.cpp
+++ b/libc/src/math/generic/sinf.cpp
@@ -7,63 +7,195 @@
 //===----------------------------------------------------------------------===//
 
 #include "src/math/sinf.h"
-#include "math_utils.h"
-#include "sincosf_utils.h"
-
+#include "src/__support/FPUtil/BasicOperations.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/except_value_utils.h"
+#include "src/__support/FPUtil/multiply_add.h"
 #include "src/__support/common.h"
-#include <math.h>
 
-#include <stdint.h>
+#include <errno.h>
+
+#if defined(LIBC_TARGET_HAS_FMA)
+#include "range_reduction_fma.h"
+// using namespace __llvm_libc::fma;
+using __llvm_libc::fma::FAST_PASS_BOUND;
+using __llvm_libc::fma::large_range_reduction;
+using __llvm_libc::fma::LargeExcepts;
+using __llvm_libc::fma::N_EXCEPT_LARGE;
+using __llvm_libc::fma::N_EXCEPT_SMALL;
+using __llvm_libc::fma::small_range_reduction;
+using __llvm_libc::fma::SmallExcepts;
+#else
+#include "range_reduction.h"
+// using namespace __llvm_libc::generic;
+using __llvm_libc::generic::FAST_PASS_BOUND;
+using __llvm_libc::generic::large_range_reduction;
+using __llvm_libc::generic::LargeExcepts;
+using __llvm_libc::generic::N_EXCEPT_LARGE;
+using __llvm_libc::generic::N_EXCEPT_SMALL;
+using __llvm_libc::generic::small_range_reduction;
+using __llvm_libc::generic::SmallExcepts;
+#endif
 
 namespace __llvm_libc {
 
-// Fast sinf implementation. Worst-case ULP is 0.5607, maximum relative
-// error is 0.5303 * 2^-23. A single-step range reduction is used for
-// small values. Large inputs have their range reduced using fast integer
-// arithmetic.
-LLVM_LIBC_FUNCTION(float, sinf, (float y)) {
-  double x = y;
-  double s;
-  int n;
-  const sincos_t *p = &SINCOSF_TABLE[0];
-
-  if (abstop12(y) < abstop12(PIO4)) {
-    s = x * x;
-
-    if (unlikely(abstop12(y) < abstop12(as_float(0x39800000)))) {
-      if (unlikely(abstop12(y) < abstop12(as_float(0x800000))))
-        // Force underflow for tiny y.
-        force_eval<float>(s);
-      return y;
+LLVM_LIBC_FUNCTION(float, sinf, (float x)) {
+  using FPBits = typename fputil::FPBits<float>;
+  FPBits xbits(x);
+
+  uint32_t x_u = xbits.uintval();
+  uint32_t x_abs = x_u & 0x7fff'ffffU;
+  double xd, y;
+
+  // Range reduction:
+  // For |x| > pi/16, we perform range reduction as follows:
+  // Find k and y such that:
+  //   x = (k + y) * pi
+  //   k is an integer
+  //   |y| < 0.5
+  // For small range (|x| < 2^50 when FMA instructions are available, 2^26
+  // otherwise), this is done by performing:
+  //   k = round(x * 1/pi)
+  //   y = x * 1/pi - k
+  // For large range, to reduce the errors of estimating the integral part of
+  // (x/pi), we perform the following computation:
+  //   k = round( trunc(x * 256/pi) / 256)
+  //   y = x * 1/pi - k
+  // Notice that the 2-step rounding will technically putting the bound of y
+  // to be: |y| < 0.5 + 1/256.
+  //
+  // When FMA instructions are not available, we store the digits of 1/pi in
+  // chunks of 28-bit precision.  This will make sure that the products:
+  //   x * ONE_OVER_PI_28[i] are all exact.
+  // When FMA instructions are available, we simply store the digits of 1/pi in
+  // chunks of doubles (53-bit of precision).
+  // So when multiplying by the largest values of single precision, the
+  // resulting output should be correct up to 2^(-208 + 128) ~ 2^-80.  By the
+  // worst-case analysis of range reduction, |y| >= 2^-38, so this should give
+  // us more than 40 bits of accuracy. For the worst-case estimation of range
+  // reduction, see for instances:
+  //   Elementary Functions by J-M. Muller, Chapter 11,
+  //   Handbook of Floating-Point Arithmetic by J-M. Muller et. al.,
+  //   Chapter 10.2.
+  //
+  // Once k and y are computed, we then deduce the answer by the sine of sum
+  // formula:
+  //   sin(x) = sin((k + y)*pi)
+  //          = sin(y*pi) * cos(k*pi) + cos(y*pi) * sin(k*pi)
+  //          = (-1)^(k & 1) * sin(y*pi)
+  //          ~ (-1)^(k & 1) * y * P(y^2)
+  // where y*P(y^2) is a degree-15 minimax polynomial generated by Sollya
+  // with: > Q = fpminimax(sin(x*pi)/x, [|0, 2, 4, 6, 8, 10, 12, 14|],
+  // [|D...|], [0, 0.5]);
+
+  // |x| <= pi/16
+  if (x_abs <= 0x3e49'0fdbU) {
+    xd = static_cast<double>(x);
+
+    // |x| < 0x1.d12ed2p-12f
+    if (x_abs < 0x39e8'9769U) {
+      if (unlikely(x_abs == 0U)) {
+        // For signed zeros.
+        return x;
+      }
+      // When |x| < 2^-12, the relative error of the approximation sin(x) ~ x
+      // is:
+      //   |sin(x) - x| / |sin(x)| < |x^3| / (6|x|)
+      //                           = x^2 / 6
+      //                           < 2^-25
+      //                           < epsilon(1)/2.
+      // So the correctly rounded values of sin(x) are:
+      //   = x - sign(x)*eps(x) if rounding mode = FE_TOWARDZERO,
+      //                        or (rounding mode = FE_UPWARD and x is
+      //                        negative),
+      //   = x otherwise.
+      // To simplify the rounding decision and make it more efficient, we use
+      //   fma(x, -2^-25, x) instead.
+      // An exhaustive test shows that this formula work correctly for all
+      // rounding modes up to |x| < 0x1.c555dep-11f.
+      // Note: to use the formula x - 2^-25*x to decide the correct rounding, we
+      // do need fma(x, -2^-25, x) to prevent underflow caused by -2^-25*x when
+      // |x| < 2^-125. For targets without FMA instructions, we simply use
+      // double for intermediate results as it is more efficient than using an
+      // emulated version of FMA.
+#if defined(LIBC_TARGET_HAS_FMA)
+      return fputil::multiply_add(x, -0x1.0p-25f, x);
+#else
+      xd = static_cast<double>(x);
+      return static_cast<float>(fputil::multiply_add(xd, -0x1.0p-25, xd));
+#endif // LIBC_TARGET_HAS_FMA
     }
 
-    return sinf_poly(x, s, p, 0);
-  } else if (likely(abstop12(y) < abstop12(120.0f))) {
-    x = reduce_fast(x, p, &n);
-
-    // Setup the signs for sin and cos.
-    s = p->sign[n & 3];
+    // |x| < pi/16.
+    double xsq = xd * xd;
+
+    // Degree-9 polynomial approximation:
+    //   sin(x) ~ x + a_3 x^3 + a_5 x^5 + a_7 x^7 + a_9 x^9
+    //          = x (1 + a_3 x^2 + ... + a_9 x^8)
+    //          = x * P(x^2)
+    // generated by Sollya with the following commands:
+    // > display = hexadecimal;
+    // > Q = fpminimax(sin(x)/x, [|0, 2, 4, 6, 8|], [|1, D...|], [0, pi/16]);
+    double result =
+        fputil::polyeval(xsq, 1.0, -0x1.55555555554c6p-3, 0x1.1111111085e65p-7,
+                         -0x1.a019f70fb4d4fp-13, 0x1.718d179815e74p-19);
+    return xd * result;
+  }
 
-    if (n & 2)
-      p = &SINCOSF_TABLE[1];
+  bool x_sign = xbits.get_sign();
+  int x_exp = xbits.get_exponent();
+  xbits.set_val(0);
 
-    return sinf_poly(x * s, x * x, p, n);
-  } else if (abstop12(y) < abstop12(INFINITY)) {
-    uint32_t xi = as_uint32_bits(y);
-    int sign = xi >> 31;
+  bool sign;
+  xd = static_cast<double>(FPBits(x_abs).get_val()); // xd = |x|
 
-    x = reduce_large(xi, &n);
+  if (x_abs < FAST_PASS_BOUND) {
+    using ExceptChecker =
+        typename fputil::ExceptionChecker<float, N_EXCEPT_SMALL>;
+    if (float result;
+        ExceptChecker::check_odd_func(SmallExcepts, x_abs, x_sign, result)) {
+      return result;
+    }
 
-    // Setup signs for sin and cos - include original sign.
-    s = p->sign[(n + sign) & 3];
+    sign = small_range_reduction(xd, y);
+  } else {
+    // x is inf or nan.
+    if (unlikely(x_abs >= 0x7f80'0000U)) {
+      if (x_abs == 0x7f80'0000U)
+        errno = EDOM;
+      return x +
+             FPBits::build_nan(1 << (fputil::MantissaWidth<float>::VALUE - 1));
+    }
 
-    if ((n + sign) & 2)
-      p = &SINCOSF_TABLE[1];
+    using ExceptChecker =
+        typename fputil::ExceptionChecker<float, N_EXCEPT_LARGE>;
+    if (float result;
+        ExceptChecker::check_odd_func(LargeExcepts, x_abs, x_sign, result))
+      return result;
 
-    return sinf_poly(x * s, x * x, p, n);
+    sign = large_range_reduction(xd, x_exp, y);
   }
 
-  return invalid(y);
+  // After range reduction, k = round(x / pi) and y = (x/pi) - k.
+  // So k is an integer and -0.5 <= y <= 0.5.
+  // Then sin(x) = sin(y*pi + k*pi)
+  //             = (-1)^(k & 1) * sin(y*pi)
+  //             ~ (-1)^(k & 1) * y * P(y^2)
+  // where y*P(y^2) is a degree-15 minimax polynomial generated by Sollya
+  // with: > P = fpminimax(sin(x*pi)/x, [|0, 2, 4, 6, 8, 10, 12, 14|],
+  // [|D...|], [0, 0.5]);
+
+  double ysq = y * y;
+  double result =
+      y * fputil::polyeval(ysq, 0x1.921fb54442d17p1, -0x1.4abbce625bd4bp2,
+                           0x1.466bc67750a3fp1, -0x1.32d2cce1612b5p-1,
+                           0x1.507832417bce6p-4, -0x1.e3062119b6071p-8,
+                           0x1.e89c7aa14122dp-12, -0x1.625b1709dece6p-16);
+
+  return (sign != x_sign) ? -result : result;
+  // }
 }
 
 } // namespace __llvm_libc
diff --git a/libc/test/src/math/exhaustive/CMakeLists.txt b/libc/test/src/math/exhaustive/CMakeLists.txt
--- a/libc/test/src/math/exhaustive/CMakeLists.txt
+++ b/libc/test/src/math/exhaustive/CMakeLists.txt
@@ -23,15 +23,19 @@
 
 add_fp_unittest(
   sinf_test
+  NO_RUN_POSTBUILD
   NEED_MPFR
   SUITE
     libc_math_exhaustive_tests
   SRCS
     sinf_test.cpp
   DEPENDS
+    .exhaustive_test
     libc.include.math
     libc.src.math.sinf
     libc.src.__support.FPUtil.fputil
+  LINK_LIBRARIES
+    -lpthread
 )
 
 add_fp_unittest(
diff --git a/libc/test/src/math/exhaustive/sinf_test.cpp b/libc/test/src/math/exhaustive/sinf_test.cpp
--- a/libc/test/src/math/exhaustive/sinf_test.cpp
+++ b/libc/test/src/math/exhaustive/sinf_test.cpp
@@ -6,20 +6,71 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "exhaustive_test.h"
 #include "src/__support/FPUtil/FPBits.h"
 #include "src/math/sinf.h"
 #include "utils/MPFRWrapper/MPFRUtils.h"
-#include <math.h>
+#include "utils/UnitTest/FPMatcher.h"
+
+#include <thread>
 
 using FPBits = __llvm_libc::fputil::FPBits<float>;
 
 namespace mpfr = __llvm_libc::testing::mpfr;
 
-TEST(LlvmLibcsinffExhaustiveTest, AllValues) {
-  uint32_t bits = 0;
-  do {
-    FPBits xbits(bits);
-    float x = float(xbits);
-    ASSERT_MPFR_MATCH(mpfr::Operation::Sin, x, __llvm_libc::sinf(x), 1.0);
-  } while (bits++ < 0xffff'ffffU);
+struct LlvmLibcSinfExhaustiveTest : public LlvmLibcExhaustiveTest<uint32_t> {
+  bool check(uint32_t start, uint32_t stop,
+             mpfr::RoundingMode rounding) override {
+    mpfr::ForceRoundingMode r(rounding);
+    uint32_t bits = start;
+    bool result = true;
+    do {
+      FPBits xbits(bits);
+      float x = float(xbits);
+      bool r = EXPECT_MPFR_MATCH(mpfr::Operation::Sin, x, __llvm_libc::sinf(x),
+                                 0.5, rounding);
+      result &= r;
+    } while (++bits < stop);
+    return result;
+  }
+};
+
+// Range: [0, +Inf);
+static constexpr uint32_t POS_START = 0x0000'0000U;
+static constexpr uint32_t POS_STOP = 0x7f80'0000U;
+
+TEST_F(LlvmLibcSinfExhaustiveTest, PostiveRangeRoundNearestTieToEven) {
+  test_full_range(POS_START, POS_STOP, mpfr::RoundingMode::Nearest);
+}
+
+TEST_F(LlvmLibcSinfExhaustiveTest, PostiveRangeRoundUp) {
+  test_full_range(POS_START, POS_STOP, mpfr::RoundingMode::Upward);
+}
+
+TEST_F(LlvmLibcSinfExhaustiveTest, PostiveRangeRoundDown) {
+  test_full_range(POS_START, POS_STOP, mpfr::RoundingMode::Downward);
+}
+
+TEST_F(LlvmLibcSinfExhaustiveTest, PostiveRangeRoundTowardZero) {
+  test_full_range(POS_START, POS_STOP, mpfr::RoundingMode::TowardZero);
+}
+
+// Range: (-Inf, 0];
+static constexpr uint32_t NEG_START = 0x8000'0000U;
+static constexpr uint32_t NEG_STOP = 0xff80'0000U;
+
+TEST_F(LlvmLibcSinfExhaustiveTest, NegativeRangeRoundNearestTieToEven) {
+  test_full_range(NEG_START, NEG_STOP, mpfr::RoundingMode::Nearest);
+}
+
+TEST_F(LlvmLibcSinfExhaustiveTest, NegativeRangeRoundUp) {
+  test_full_range(NEG_START, NEG_STOP, mpfr::RoundingMode::Upward);
+}
+
+TEST_F(LlvmLibcSinfExhaustiveTest, NegativeRangeRoundDown) {
+  test_full_range(NEG_START, NEG_STOP, mpfr::RoundingMode::Downward);
+}
+
+TEST_F(LlvmLibcSinfExhaustiveTest, NegativeRangeRoundTowardZero) {
+  test_full_range(NEG_START, NEG_STOP, mpfr::RoundingMode::TowardZero);
 }
diff --git a/libc/test/src/math/sinf_test.cpp b/libc/test/src/math/sinf_test.cpp
--- a/libc/test/src/math/sinf_test.cpp
+++ b/libc/test/src/math/sinf_test.cpp
@@ -51,26 +51,67 @@
     float x = float(FPBits(v));
     if (isnan(x) || isinf(x))
       continue;
-    ASSERT_MPFR_MATCH(mpfr::Operation::Sin, x, __llvm_libc::sinf(x), 1.0);
+    ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Sin, x,
+                                   __llvm_libc::sinf(x), 0.5);
   }
 }
 
 TEST(LlvmLibcSinfTest, SpecificBitPatterns) {
-  float x = float(FPBits(uint32_t(0xc70d39a1)));
-  EXPECT_MPFR_MATCH(mpfr::Operation::Sin, x, __llvm_libc::sinf(x), 1.0);
+  constexpr int N = 33;
+  constexpr uint32_t INPUTS[N] = {
+      0x3f06'0a92U, // x = pi/6
+      0x3f3a'dc51U, // x = 0x1.75b8a2p-1f
+      0x3f49'0fdbU, // x = pi/4
+      0x3f86'0a92U, // x = pi/3
+      0x3fa7'832aU, // x = 0x1.4f0654p+0f
+      0x3fc9'0fdbU, // x = pi/2
+      0x4017'1973U, // x = 0x1.2e32e6p+1f
+      0x4049'0fdbU, // x = pi
+      0x4096'cbe4U, // x = 0x1.2d97c8p+2f
+      0x40c9'0fdbU, // x = 2*pi
+      0x433b'7490U, // x = 0x1.76e92p+7f
+      0x437c'e5f1U, // x = 0x1.f9cbe2p+7f
+      0x4619'9998U, // x = 0x1.33333p+13f
+      0x474d'246fU, // x = 0x1.9a48dep+15f
+      0x4afd'ece4U, // x = 0x1.fbd9c8p+22f
+      0x4c23'32e9U, // x = 0x1.4665d2p+25f
+      0x50a3'e87fU, // x = 0x1.47d0fep+34f
+      0x5239'47f6U, // x = 0x1.728fecp+37f
+      0x53b1'46a6U, // x = 0x1.628d4cp+40f
+      0x55ca'fb2aU, // x = 0x1.95f654p+44f
+      0x588e'f060U, // x = 0x1.1de0cp+50f
+      0x5c07'bcd0U, // x = 0x1.0f79ap+57f
+      0x5fa6'eba7U, // x = 0x1.4dd74ep+64f
+      0x61a4'0b40U, // x = 0x1.48168p+68f
+      0x6589'8498U, // x = 0x1.13093p+76f
+      0x6600'0001U, // x = 0x1.000002p+77f
+      0x664e'46e4U, // x = 0x1.9c8dc8p+77f
+      0x66b0'14aaU, // x = 0x1.602954p+78f
+      0x67a9'242bU, // x = 0x1.524856p+80f
+      0x6a19'76f1U, // x = 0x1.32ede2p+85f
+      0x6c55'da58U, // x = 0x1.abb4bp+89f
+      0x6f79'be45U, // x = 0x1.f37c8ap+95f
+      0x7758'4625U, // x = 0x1.b08c4ap+111f
+  };
+
+  for (int i = 0; i < N; ++i) {
+    float x = float(FPBits(INPUTS[i]));
+    EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Sin, x,
+                                   __llvm_libc::sinf(x), 0.5);
+    EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Sin, -x,
+                                   __llvm_libc::sinf(-x), 0.5);
+  }
 }
 
 // For small values, sin(x) is x.
 TEST(LlvmLibcSinfTest, SmallValues) {
-  float x = float(FPBits(uint32_t(0x17800000)));
-  float result = __llvm_libc::sinf(x);
-  EXPECT_MPFR_MATCH(mpfr::Operation::Sin, x, result, 1.0);
-  EXPECT_FP_EQ(x, result);
-
-  x = float(FPBits(uint32_t(0x00400000)));
-  result = __llvm_libc::sinf(x);
-  EXPECT_MPFR_MATCH(mpfr::Operation::Sin, x, result, 1.0);
-  EXPECT_FP_EQ(x, result);
+  float x = float(FPBits(0x1780'0000U));
+  EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Sin, x, __llvm_libc::sinf(x),
+                                 0.5);
+
+  x = float(FPBits(0x0040'0000U));
+  EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Sin, x, __llvm_libc::sinf(x),
+                                 0.5);
 }
 
 // SDCOMP-26094: check sinf in the cases for which the range reducer
@@ -78,6 +119,7 @@
 TEST(LlvmLibcSinfTest, SDCOMP_26094) {
   for (uint32_t v : SDCOMP26094_VALUES) {
     float x = float(FPBits((v)));
-    EXPECT_MPFR_MATCH(mpfr::Operation::Sin, x, __llvm_libc::sinf(x), 1.0);
+    EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Sin, x,
+                                   __llvm_libc::sinf(x), 0.5);
   }
 }
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -161,6 +161,7 @@
     "src/__support/FPUtil/NormalFloat.h",
     "src/__support/FPUtil/PlatformDefs.h",
     "src/__support/FPUtil/builtin_wrappers.h",
+    "src/__support/FPUtil/except_value_utils.h",
 ]
 
 fputil_hdrs = selects.with_or({
@@ -612,9 +613,14 @@
 
 libc_math_function(
     name = "sinf",
+    hdrs = [
+        "src/math/generic/range_reduction.h",
+        "src/math/generic/range_reduction_fma.h",
+    ],
     additional_deps = [
-        ":math_utils",
-        ":sincosf_utils",
+        ":__support_fputil_fma",
+        ":__support_fputil_multiply_add",
+        ":__support_fputil_polyeval",
     ],
 )