diff --git a/libc/src/math/generic/expm1f.cpp b/libc/src/math/generic/expm1f.cpp
--- a/libc/src/math/generic/expm1f.cpp
+++ b/libc/src/math/generic/expm1f.cpp
@@ -34,6 +34,15 @@
     return 0x1.8dbe62p-3f;
   }
 
+#if !defined(LIBC_TARGET_HAS_FMA)
+  if (unlikely(x_u == 0xbdc1'c6cbU)) { // x = -0x1.838d96p-4f
+    int round_mode = fputil::get_round();
+    if (round_mode == FE_TONEAREST || round_mode == FE_DOWNWARD)
+      return -0x1.71c884p-4f;
+    return -0x1.71c882p-4f;
+  }
+#endif // LIBC_TARGET_HAS_FMA
+
   // When |x| > 25*log(2), or nan
   if (unlikely(x_abs >= 0x418a'a123U)) {
     // x < log(2^-25)
@@ -70,19 +79,30 @@
       // x = -0.0f
       if (unlikely(xbits.uintval() == 0x8000'0000U))
         return x;
-      // When |x| < 2^-25, the relative error of the approximation e^x - 1 ~ x
-      // is:
-      //   |(e^x - 1) - x| / |e^x - 1| < |x^2| / |x|
-      //                               = |x|
-      //                               < 2^-25
-      //                               < epsilon(1)/2.
-      // So the correctly rounded values of expm1(x) are:
-      //   = x + eps(x) if rounding mode = FE_UPWARD,
-      //                   or (rounding mode = FE_TOWARDZERO and x is negative),
-      //   = x otherwise.
-      // To simplify the rounding decision and make it more efficient, we use
-      //   fma(x, x, x) ~ x + x^2 instead.
-      return fputil::multiply_add(x, x, x);
+        // When |x| < 2^-25, the relative error of the approximation e^x - 1 ~ x
+        // is:
+        //   |(e^x - 1) - x| / |e^x - 1| < |x^2| / |x|
+        //                               = |x|
+        //                               < 2^-25
+        //                               < epsilon(1)/2.
+        // So the correctly rounded values of expm1(x) are:
+        //   = x + eps(x) if rounding mode = FE_UPWARD,
+        //                   or (rounding mode = FE_TOWARDZERO and x is
+        //                   negative),
+        //   = x otherwise.
+        // To simplify the rounding decision and make it more efficient, we use
+        //   fma(x, x, x) ~ x + x^2 instead.
+        // Note: to use the formula x + x^2 to decide the correct rounding, we
+        // do need fma(x, x, x) to prevent underflow caused by x*x when |x| <
+        // 2^-76. For targets without FMA instructions, we simply use double for
+        // intermediate results as it is more efficient than using an emulated
+        // version of FMA.
+#if defined(LIBC_TARGET_HAS_FMA)
+      return fputil::fma(x, x, x);
+#else
+      double xd = x;
+      return static_cast<float>(fputil::multiply_add(xd, xd, xd));
+#endif // LIBC_TARGET_HAS_FMA
     }
 
     // 2^-25 <= |x| < 2^-4
diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt
--- a/libc/test/src/math/CMakeLists.txt
+++ b/libc/test/src/math/CMakeLists.txt
@@ -1201,8 +1201,6 @@
     libc.include.math
     libc.src.math.expm1f
     libc.src.__support.FPUtil.fputil
-  FLAGS
-    FMA_OPT__ONLY
 )
 
 add_fp_unittest(
diff --git a/libc/test/src/math/exhaustive/CMakeLists.txt b/libc/test/src/math/exhaustive/CMakeLists.txt
--- a/libc/test/src/math/exhaustive/CMakeLists.txt
+++ b/libc/test/src/math/exhaustive/CMakeLists.txt
@@ -92,7 +92,6 @@
   DEPENDS
     .exhaustive_test
     libc.include.math
-    libc.src.math.expf
     libc.src.math.expm1f
     libc.src.__support.FPUtil.fputil
   LINK_LIBRARIES
diff --git a/libc/test/src/math/exhaustive/expm1f_test.cpp b/libc/test/src/math/exhaustive/expm1f_test.cpp
--- a/libc/test/src/math/exhaustive/expm1f_test.cpp
+++ b/libc/test/src/math/exhaustive/expm1f_test.cpp
@@ -18,7 +18,7 @@
 
 namespace mpfr = __llvm_libc::testing::mpfr;
 
-struct LlvmLibcExpfExhaustiveTest : public LlvmLibcExhaustiveTest<uint32_t> {
+struct LlvmLibcExpm1fExhaustiveTest : public LlvmLibcExhaustiveTest<uint32_t> {
   bool check(uint32_t start, uint32_t stop,
              mpfr::RoundingMode rounding) override {
     mpfr::ForceRoundingMode r(rounding);
@@ -40,21 +40,21 @@
 static constexpr uint32_t POS_START = 0x0000'0000U;
 static constexpr uint32_t POS_STOP = 0x42b2'0000U;
 
-TEST_F(LlvmLibcExpfExhaustiveTest, PostiveRangeRoundNearestTieToEven) {
+TEST_F(LlvmLibcExpm1fExhaustiveTest, PostiveRangeRoundNearestTieToEven) {
   test_full_range(POS_START, POS_STOP, NUM_THREADS,
                   mpfr::RoundingMode::Nearest);
 }
 
-TEST_F(LlvmLibcExpfExhaustiveTest, PostiveRangeRoundUp) {
+TEST_F(LlvmLibcExpm1fExhaustiveTest, PostiveRangeRoundUp) {
   test_full_range(POS_START, POS_STOP, NUM_THREADS, mpfr::RoundingMode::Upward);
 }
 
-TEST_F(LlvmLibcExpfExhaustiveTest, PostiveRangeRoundDown) {
+TEST_F(LlvmLibcExpm1fExhaustiveTest, PostiveRangeRoundDown) {
   test_full_range(POS_START, POS_STOP, NUM_THREADS,
                   mpfr::RoundingMode::Downward);
 }
 
-TEST_F(LlvmLibcExpfExhaustiveTest, PostiveRangeRoundTowardZero) {
+TEST_F(LlvmLibcExpm1fExhaustiveTest, PostiveRangeRoundTowardZero) {
   test_full_range(POS_START, POS_STOP, NUM_THREADS,
                   mpfr::RoundingMode::TowardZero);
 }
@@ -63,21 +63,21 @@
 static constexpr uint32_t NEG_START = 0x8000'0000U;
 static constexpr uint32_t NEG_STOP = 0xc2d0'0000U;
 
-TEST_F(LlvmLibcExpfExhaustiveTest, NegativeRangeRoundNearestTieToEven) {
+TEST_F(LlvmLibcExpm1fExhaustiveTest, NegativeRangeRoundNearestTieToEven) {
   test_full_range(NEG_START, NEG_STOP, NUM_THREADS,
                   mpfr::RoundingMode::Nearest);
 }
 
-TEST_F(LlvmLibcExpfExhaustiveTest, NegativeRangeRoundUp) {
+TEST_F(LlvmLibcExpm1fExhaustiveTest, NegativeRangeRoundUp) {
   test_full_range(NEG_START, NEG_STOP, NUM_THREADS, mpfr::RoundingMode::Upward);
 }
 
-TEST_F(LlvmLibcExpfExhaustiveTest, NegativeRangeRoundDown) {
+TEST_F(LlvmLibcExpm1fExhaustiveTest, NegativeRangeRoundDown) {
   test_full_range(NEG_START, NEG_STOP, NUM_THREADS,
                   mpfr::RoundingMode::Downward);
 }
 
-TEST_F(LlvmLibcExpfExhaustiveTest, NegativeRangeRoundTowardZero) {
+TEST_F(LlvmLibcExpm1fExhaustiveTest, NegativeRangeRoundTowardZero) {
   test_full_range(NEG_START, NEG_STOP, NUM_THREADS,
                   mpfr::RoundingMode::TowardZero);
 }
diff --git a/libc/test/src/math/expm1f_test.cpp b/libc/test/src/math/expm1f_test.cpp
--- a/libc/test/src/math/expm1f_test.cpp
+++ b/libc/test/src/math/expm1f_test.cpp
@@ -97,6 +97,16 @@
   ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Expm1, x,
                                  __llvm_libc::expm1f(x), 0.5);
   EXPECT_MATH_ERRNO(0);
+
+  x = float(FPBits(0x942ed494U));
+  ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Expm1, x,
+                                 __llvm_libc::expm1f(x), 0.5);
+  EXPECT_MATH_ERRNO(0);
+
+  x = float(FPBits(0xbdc1c6cbU));
+  ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Expm1, x,
+                                 __llvm_libc::expm1f(x), 0.5);
+  EXPECT_MATH_ERRNO(0);
 }
 
 TEST(LlvmLibcExpm1fTest, InFloatRange) {