diff --git a/libc/utils/FPUtil/CMakeLists.txt b/libc/utils/FPUtil/CMakeLists.txt --- a/libc/utils/FPUtil/CMakeLists.txt +++ b/libc/utils/FPUtil/CMakeLists.txt @@ -17,6 +17,7 @@ BasicOperations.h ManipulationFunctions.h NearestIntegerOperations.h + NormalFloat.h DEPENDS libc.utils.CPP.standalone_cpp ) diff --git a/libc/utils/FPUtil/ManipulationFunctions.h b/libc/utils/FPUtil/ManipulationFunctions.h --- a/libc/utils/FPUtil/ManipulationFunctions.h +++ b/libc/utils/FPUtil/ManipulationFunctions.h @@ -6,37 +6,20 @@ // //===----------------------------------------------------------------------===// +#ifndef LLVM_LIBC_UTILS_FPUTIL_MANIPULATION_FUNCTIONS_H +#define LLVM_LIBC_UTILS_FPUTIL_MANIPULATION_FUNCTIONS_H + #include "FPBits.h" #include "NearestIntegerOperations.h" +#include "NormalFloat.h" #include "utils/CPP/TypeTraits.h" -#ifndef LLVM_LIBC_UTILS_FPUTIL_MANIPULATION_FUNCTIONS_H -#define LLVM_LIBC_UTILS_FPUTIL_MANIPULATION_FUNCTIONS_H - namespace __llvm_libc { namespace fputil { -#if defined(__x86_64__) || defined(__i386__) -template struct Standard754Type { - static constexpr bool Value = - cpp::IsSame>::Value || - cpp::IsSame>::Value; -}; -#else -template struct Standard754Type { - static constexpr bool Value = cpp::IsFloatingPointType::Value; -}; -#endif - -template static inline T frexp_impl(FPBits &bits, int &exp) { - exp = bits.getExponent() + 1; - static constexpr uint16_t resultExponent = FPBits::exponentBias - 1; - bits.exponent = resultExponent; - return bits; -} - -template ::Value, int> = 0> +template ::Value, int> = 0> static inline T frexp(T x, int &exp) { FPBits bits(x); if (bits.isInfOrNaN()) @@ -46,42 +29,12 @@ return x; } - return frexp_impl(bits, exp); + NormalFloat normal(bits); + exp = normal.exponent + 1; + normal.exponent = -1; + return normal; } -#if defined(__x86_64__) || defined(__i386__) -static inline long double frexp(long double x, int &exp) { - FPBits bits(x); - if (bits.isInfOrNaN()) - return x; - if (bits.isZero()) { - exp = 0; - return x; - } - - if (bits.exponent != 0 || bits.implicitBit == 1) - return frexp_impl(bits, exp); - - exp = bits.getExponent(); - int shiftCount = 0; - uint64_t fullMantissa = *reinterpret_cast(&bits); - static constexpr uint64_t msBitMask = uint64_t(1) << 63; - for (; (fullMantissa & msBitMask) == uint64_t(0); - fullMantissa <<= 1, ++shiftCount) { - // This for loop will terminate as fullMantissa is != 0. If it were 0, - // then x will be NaN and handled before control reaches here. - // When the loop terminates, fullMantissa will represent the full mantissa - // of a normal long double value. That is, the implicit bit has the value - // of 1. - } - - exp = exp - shiftCount + 1; - *reinterpret_cast(&bits) = fullMantissa; - bits.exponent = FPBits::exponentBias - 1; - return bits; -} -#endif - template ::Value, int> = 0> static inline T modf(T x, T &iptr) { @@ -112,11 +65,8 @@ return xbits; } -template static inline T logb_impl(const FPBits &bits) { - return bits.getExponent(); -} - -template ::Value, int> = 0> +template ::Value, int> = 0> static inline T logb(T x) { FPBits bits(x); if (bits.isZero()) { @@ -130,42 +80,9 @@ return FPBits::inf(); } - return logb_impl(bits); -} - -#if defined(__x86_64__) || defined(__i386__) -static inline long double logb(long double x) { - FPBits bits(x); - if (bits.isZero()) { - // TODO(Floating point exception): Raise div-by-zero exception. - // TODO(errno): POSIX requires setting errno to ERANGE. - return FPBits::negInf(); - } else if (bits.isNaN()) { - return x; - } else if (bits.isInf()) { - // Return positive infinity. - return FPBits::inf(); - } - - if (bits.exponent != 0 || bits.implicitBit == 1) - return logb_impl(bits); - - int exp = bits.getExponent(); - int shiftCount = 0; - uint64_t fullMantissa = *reinterpret_cast(&bits); - static constexpr uint64_t msBitMask = uint64_t(1) << 63; - for (; (fullMantissa & msBitMask) == uint64_t(0); - fullMantissa <<= 1, ++shiftCount) { - // This for loop will terminate as fullMantissa is != 0. If it were 0, - // then x will be NaN and handled before control reaches here. - // When the loop terminates, fullMantissa will represent the full mantissa - // of a normal long double value. That is, the implicit bit has the value - // of 1. - } - - return exp - shiftCount; + NormalFloat normal(bits); + return normal.exponent; } -#endif } // namespace fputil } // namespace __llvm_libc diff --git a/libc/utils/FPUtil/NormalFloat.h b/libc/utils/FPUtil/NormalFloat.h new file mode 100644 --- /dev/null +++ b/libc/utils/FPUtil/NormalFloat.h @@ -0,0 +1,231 @@ +//===-- A class to store a normalized floating point number -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_UTILS_FPUTIL_NORMAL_FLOAT_H +#define LLVM_LIBC_UTILS_FPUTIL_NORMAL_FLOAT_H + +#include "FPBits.h" + +#include "utils/CPP/TypeTraits.h" + +#include + +namespace __llvm_libc { +namespace fputil { + +// A class which stores the normalized form of a floating point value. +// The special IEEE-754 bits patterns of Zero, infinity and NaNs are +// are not handled by this class. +// +// A normalized floating point number is of this form: +// (-1)*sign * 2^exponent * +// where is of the form 1.<...>. +template struct NormalFloat { + static_assert( + cpp::IsFloatingPointType::Value, + "NormalFloat template parameter has to be a floating point type."); + + using UIntType = typename FPBits::UIntType; + static constexpr UIntType one = (UIntType(1) << MantissaWidth::value); + + // Unbiased exponent value. + int32_t exponent; + + UIntType mantissa; + // We want |UIntType| to have atleast one bit more than the actual mantissa + // bit width to accommodate the implicit 1 value. + static_assert(sizeof(UIntType) * 8 >= MantissaWidth::value + 1, + "Bad type for mantissa in NormalFloat."); + + bool sign; + + NormalFloat(int32_t e, UIntType m, bool s) + : exponent(e), mantissa(m), sign(s) { + if (mantissa >= (one << 1) || (mantissa & one)) + return; + + for (unsigned i = 0; + (one & mantissa) == 0 && (i <= MantissaWidth::value); ++i) { + mantissa <<= 1; + --exponent; + } + } + + explicit NormalFloat(T x) { initFromBits(FPBits(x)); } + + explicit NormalFloat(FPBits bits) { initFromBits(bits); } + + // Compares this normalized number with another normalized number. + // Returns -1 is this number is less than |other|, 0 if this number is equal + // to |other|, and 1 if this number is greater than |other|. + int cmp(const NormalFloat &other) const { + if (sign != other.sign) + return sign ? -1 : 1; + + if (exponent > other.exponent) { + return sign ? -1 : 1; + } else if (exponent == other.exponent) { + if (mantissa > other.mantissa) + return sign ? -1 : 1; + else if (mantissa == other.mantissa) + return 0; + else + return sign ? 1 : -1; + } else { + return sign ? 1 : -1; + } + } + + // Returns a new normalized floating point number which is equal in value + // to this number multiplied by 2^e. That is: + // new = this * 2^e + NormalFloat mul2(int e) const { + NormalFloat result = *this; + result.exponent += e; + return result; + } + + operator T() const { + int biasedExponent = exponent + FPBits::exponentBias; + // Max exponent is of the form 0xFF...E. That is why -2 and not -1. + constexpr int maxExponentValue = (1 << ExponentWidth::value) - 2; + if (biasedExponent > maxExponentValue) { + // TODO: Should infinity with the correct sign be returned? + return FPBits::buildNaN(1); + } + + FPBits result(T(0.0)); + + constexpr int subnormalExponent = -FPBits::exponentBias + 1; + if (exponent < subnormalExponent) { + unsigned shift = subnormalExponent - exponent; + if (shift <= MantissaWidth::value) { + // Generate a subnormal number. Might lead to loss of precision. + result.exponent = 0; + result.mantissa = mantissa >> shift; + result.sign = sign; + return result; + } else { + // TODO: Should zero with the correct sign be returned? + return FPBits::buildNaN(1); + } + } + + result.exponent = exponent + FPBits::exponentBias; + result.mantissa = mantissa; + result.sign = sign; + return result; + } + +private: + void initFromBits(FPBits bits) { + sign = bits.sign; + + if (bits.isInfOrNaN() || bits.isZero()) { + // Ignore special bit patterns. Implementations deal with them separately + // anyway so this should not be a problem. + exponent = 0; + mantissa = 0; + return; + } + + // Normalize subnormal numbers. + if (bits.exponent == 0) { + unsigned shift = evaluateNormalizationShift(bits.mantissa); + mantissa = UIntType(bits.mantissa) << shift; + exponent = 1 - FPBits::exponentBias - shift; + ; + } else { + exponent = bits.exponent - FPBits::exponentBias; + mantissa = one | bits.mantissa; + } + } + + unsigned evaluateNormalizationShift(UIntType m) { + unsigned shift = 0; + for (; (one & m) == 0 && (shift < MantissaWidth::value); + m <<= 1, ++shift) + ; + return shift; + } +}; + +#if defined(__x86_64__) || defined(__i386__) +template <> +inline void NormalFloat::initFromBits(FPBits bits) { + sign = bits.sign; + + if (bits.isInfOrNaN() || bits.isZero()) { + // Ignore special bit patterns. Implementations deal with them separately + // anyway so this should not be a problem. + exponent = 0; + mantissa = 0; + return; + } + + if (bits.exponent == 0) { + if (bits.implicitBit == 0) { + // Since we ignore zero value, the mantissa in this case is non-zero. + int normalizationShift = evaluateNormalizationShift(bits.mantissa); + exponent = -16382 - normalizationShift; + mantissa = (bits.mantissa << normalizationShift); + } else { + exponent = -16382; + mantissa = one | bits.mantissa; + } + } else { + if (bits.implicitBit == 0) { + // Invalid number so just store 0 similar to a NaN. + exponent = 0; + mantissa = 0; + } else { + exponent = bits.exponent - 16383; + mantissa = one | bits.mantissa; + } + } +} + +template <> inline NormalFloat::operator long double() const { + int biasedExponent = exponent + FPBits::exponentBias; + // Max exponent is of the form 0xFF...E. That is why -2 and not -1. + constexpr int maxExponentValue = (1 << ExponentWidth::value) - 2; + if (biasedExponent > maxExponentValue) { + // TODO: Should infinity with the correct sign be returned? + return FPBits::buildNaN(1); + } + + FPBits result(0.0l); + + constexpr int subnormalExponent = -FPBits::exponentBias + 1; + if (exponent < subnormalExponent) { + unsigned shift = subnormalExponent - exponent; + if (shift <= MantissaWidth::value) { + // Generate a subnormal number. Might lead to loss of precision. + result.exponent = 0; + result.mantissa = mantissa >> shift; + result.implicitBit = 0; + result.sign = sign; + return result; + } else { + // TODO: Should zero with the correct sign be returned? + return FPBits::buildNaN(1); + } + } + + result.exponent = biasedExponent; + result.mantissa = mantissa; + result.implicitBit = 1; + result.sign = sign; + return result; +} +#endif + +} // namespace fputil +} // namespace __llvm_libc + +#endif // LLVM_LIBC_UTILS_FPUTIL_NORMAL_FLOAT_H