diff --git a/libc/test/src/math/LdExpTest.h b/libc/test/src/math/LdExpTest.h --- a/libc/test/src/math/LdExpTest.h +++ b/libc/test/src/math/LdExpTest.h @@ -112,6 +112,30 @@ T x = NormalFloat(-FPBits::exponentBias + 1, 2 * NormalFloat::one - 1, 0); ASSERT_FP_EQ(func(x, -1), x / 2); ASSERT_FP_EQ(func(-x, -1), -x / 2); + + // Start with a normal number high exponent but pass a very low number for + // exp. The result should be a subnormal number. + x = NormalFloat(FPBits::exponentBias, NormalFloat::one, 0); + int exp = -FPBits::maxExponent - 5; + T result = func(x, exp); + FPBits resultBits(result); + ASSERT_FALSE(resultBits.isZero()); + // Verify that the result is indeed subnormal. + ASSERT_EQ(resultBits.exponent, uint16_t(0)); + // But if the exp is so less that normalization leads to zero, then + // the result should be zero. + result = func(x, -FPBits::maxExponent - int(mantissaWidth) - 5); + ASSERT_TRUE(FPBits(result).isZero()); + + // Start with a subnormal number but pass a very high number for exponent. + // The result should not be infinity. + x = NormalFloat(-FPBits::exponentBias + 1, NormalFloat::one >> 10, 0); + exp = FPBits::maxExponent + 5; + ASSERT_EQ(isinf(func(x, exp)), 0); + // But if the exp is large enough to oversome than the normalization shift, + // then it should result in infinity. + exp = FPBits::maxExponent + 15; + ASSERT_NE(isinf(func(x, exp)), 0); } }; diff --git a/libc/utils/FPUtil/ManipulationFunctions.h b/libc/utils/FPUtil/ManipulationFunctions.h --- a/libc/utils/FPUtil/ManipulationFunctions.h +++ b/libc/utils/FPUtil/ManipulationFunctions.h @@ -124,14 +124,17 @@ return x; // NormalFloat uses int32_t to store the true exponent value. We should ensure - // that adding |exp| to it does not lead to integer rollover. But, we |exp| + // that adding |exp| to it does not lead to integer rollover. But, if |exp| // value is larger the exponent range for type T, then we can return infinity - // early. - if (exp > FPBits::maxExponent) + // early. Because the result of the ldexp operation can be a subnormal number, + // we need to accommodate the (mantissaWidht + 1) worth of shift in + // calculating the limit. + int expLimit = FPBits::maxExponent + MantissaWidth::value + 1; + if (exp > expLimit) return bits.sign ? FPBits::negInf() : FPBits::inf(); - // Similarly on the negative side. - if (exp < -FPBits::maxExponent) + // Similarly on the negative side we return zero early if |exp| is too small. + if (exp < -expLimit) return bits.sign ? FPBits::negZero() : FPBits::zero(); // For all other values, NormalFloat to T conversion handles it the right way.