diff --git a/flang/include/flang/Common/uint128.h b/flang/include/flang/Common/uint128.h --- a/flang/include/flang/Common/uint128.h +++ b/flang/include/flang/Common/uint128.h @@ -12,8 +12,11 @@ #ifndef FORTRAN_COMMON_UINT128_H_ #define FORTRAN_COMMON_UINT128_H_ +// Define AVOID_NATIVE_UINT128_T to force the use of UnsignedInt128 below +// instead of the C++ compiler's native 128-bit unsigned integer type, if +// it has one. #ifndef AVOID_NATIVE_UINT128_T -#define AVOID_NATIVE_UINT128_T 1 // always use this code for now for testing +#define AVOID_NATIVE_UINT128_T 0 #endif #include "leading-zero-bit-count.h" diff --git a/flang/include/flang/Decimal/binary-floating-point.h b/flang/include/flang/Decimal/binary-floating-point.h --- a/flang/include/flang/Decimal/binary-floating-point.h +++ b/flang/include/flang/Decimal/binary-floating-point.h @@ -22,9 +22,8 @@ namespace Fortran::decimal { template -struct BinaryFloatingPointNumber - : public common::RealDetails { - +class BinaryFloatingPointNumber : public common::RealDetails { +public: using Details = common::RealDetails; using Details::bits; using Details::decimalPrecision; @@ -50,21 +49,23 @@ constexpr BinaryFloatingPointNumber &operator=( BinaryFloatingPointNumber &&that) = default; + RawType raw() const { return raw_; } + template explicit constexpr BinaryFloatingPointNumber(A x) { - static_assert(sizeof raw <= sizeof x); - std::memcpy(reinterpret_cast(&raw), - reinterpret_cast(&x), sizeof raw); + static_assert(sizeof raw_ <= sizeof x); + std::memcpy(reinterpret_cast(&raw_), + reinterpret_cast(&x), sizeof raw_); } constexpr int BiasedExponent() const { return static_cast( - (raw >> significandBits) & ((1 << exponentBits) - 1)); + (raw_ >> significandBits) & ((1 << exponentBits) - 1)); } constexpr int UnbiasedExponent() const { int biased{BiasedExponent()}; return biased - exponentBias + (biased == 0); } - constexpr RawType Significand() const { return raw & significandMask; } + constexpr RawType Significand() const { return raw_ & significandMask; } constexpr RawType Fraction() const { RawType sig{Significand()}; if (isImplicitMSB && BiasedExponent() > 0) { @@ -74,7 +75,7 @@ } constexpr bool IsZero() const { - return (raw & ((RawType{1} << (bits - 1)) - 1)) == 0; + return (raw_ & ((RawType{1} << (bits - 1)) - 1)) == 0; } constexpr bool IsNaN() const { return BiasedExponent() == maxExponent && Significand() != 0; @@ -86,11 +87,39 @@ return BiasedExponent() == maxExponent - 1 && Significand() == significandMask; } - constexpr bool IsNegative() const { return ((raw >> (bits - 1)) & 1) != 0; } + constexpr bool IsNegative() const { return ((raw_ >> (bits - 1)) & 1) != 0; } + + constexpr void Negate() { raw_ ^= RawType{1} << (bits - 1); } + + // For calculating the nearest neighbors of a floating-point value + constexpr void Previous() { + RemoveExplicitMSB(); + --raw_; + InsertExplicitMSB(); + } + constexpr void Next() { + RemoveExplicitMSB(); + ++raw_; + InsertExplicitMSB(); + } - constexpr void Negate() { raw ^= RawType{1} << (bits - 1); } +private: + constexpr void RemoveExplicitMSB() { + if constexpr (!isImplicitMSB) { + raw_ = (raw_ & (significandMask >> 1)) | ((raw_ & ~significandMask) >> 1); + } + } + constexpr void InsertExplicitMSB() { + if constexpr (!isImplicitMSB) { + constexpr RawType mask{significandMask >> 1}; + raw_ = (raw_ & mask) | ((raw_ & ~mask) << 1); + if (BiasedExponent() > 0) { + raw_ |= RawType{1} << (significandBits - 1); + } + } + } - RawType raw{0}; + RawType raw_{0}; }; } // namespace Fortran::decimal #endif diff --git a/flang/lib/Decimal/big-radix-floating-point.h b/flang/lib/Decimal/big-radix-floating-point.h --- a/flang/lib/Decimal/big-radix-floating-point.h +++ b/flang/lib/Decimal/big-radix-floating-point.h @@ -27,6 +27,7 @@ #include "flang/Common/unsigned-const-division.h" #include "flang/Decimal/binary-floating-point.h" #include "flang/Decimal/decimal.h" +#include "llvm/Support/raw_ostream.h" #include #include #include @@ -111,6 +112,8 @@ void Minimize( BigRadixFloatingPointNumber &&less, BigRadixFloatingPointNumber &&more); + llvm::raw_ostream &Dump(llvm::raw_ostream &) const; + private: BigRadixFloatingPointNumber(const BigRadixFloatingPointNumber &that) : digits_{that.digits_}, exponent_{that.exponent_}, @@ -283,14 +286,6 @@ } } - template void MultiplyByRounded() { - if (int carry{MultiplyBy()}) { - LoseLeastSignificantDigit(); - digit_[digits_ - 1] += carry; - exponent_ += log10Radix; - } - } - void LoseLeastSignificantDigit(); // with rounding void PushCarry(int carry) { diff --git a/flang/lib/Decimal/binary-to-decimal.cpp b/flang/lib/Decimal/binary-to-decimal.cpp --- a/flang/lib/Decimal/binary-to-decimal.cpp +++ b/flang/lib/Decimal/binary-to-decimal.cpp @@ -8,6 +8,8 @@ #include "big-radix-floating-point.h" #include "flang/Decimal/decimal.h" +#include +#include namespace Fortran::decimal { @@ -54,17 +56,18 @@ ++exponent_; } + int overflow{0}; for (; twoPow >= 9; twoPow -= 9) { // D * 10.**E * 2.**twoPow -> (D*(2**9)) * 10.**E * 2.**(twoPow-9) - MultiplyByRounded<512>(); + overflow |= MultiplyBy<512>(); } for (; twoPow >= 3; twoPow -= 3) { // D * 10.**E * 2.**twoPow -> (D*(2**3)) * 10.**E * 2.**(twoPow-3) - MultiplyByRounded<8>(); + overflow |= MultiplyBy<8>(); } for (; twoPow > 0; --twoPow) { // D * 10.**E * 2.**twoPow -> (2*D) * 10.**E * 2.**(twoPow-1) - MultiplyByRounded<2>(); + overflow |= MultiplyBy<2>(); } while (twoPow < 0) { @@ -85,21 +88,23 @@ for (; twoPow <= -4; twoPow += 4) { // D * 10.**E * 2.**twoPow -> 625D * 10.**(E-4) * 2.**(twoPow+4) - MultiplyByRounded<(5 * 5 * 5 * 5)>(); + overflow |= MultiplyBy<(5 * 5 * 5 * 5)>(); exponent_ -= 4; } if (twoPow <= -2) { // D * 10.**E * 2.**twoPow -> 25D * 10.**(E-2) * 2.**(twoPow+2) - MultiplyByRounded<25>(); + overflow |= MultiplyBy<5 * 5>(); twoPow += 2; exponent_ -= 2; } for (; twoPow < 0; ++twoPow) { // D * 10.**E * 2.**twoPow -> 5D * 10.**(E-1) * 2.**(twoPow+1) - MultiplyByRounded<5>(); + overflow |= MultiplyBy<5>(); --exponent_; } + assert(overflow == 0); + // twoPow == 0, the decimal encoding is complete. Normalize(); } @@ -299,37 +304,6 @@ Normalize(); } -template -void BigRadixFloatingPointNumber::LoseLeastSignificantDigit() { - Digit LSD{digit_[0]}; - for (int j{0}; j < digits_ - 1; ++j) { - digit_[j] = digit_[j + 1]; - } - digit_[digits_ - 1] = 0; - bool incr{false}; - switch (rounding_) { - case RoundNearest: - case RoundDefault: - incr = LSD > radix / 2 || (LSD == radix / 2 && digit_[0] % 2 != 0); - break; - case RoundUp: - incr = LSD > 0 && !isNegative_; - break; - case RoundDown: - incr = LSD > 0 && isNegative_; - break; - case RoundToZero: - break; - case RoundCompatible: - incr = LSD >= radix / 2; - break; - } - for (int j{0}; (digit_[j] += incr) == radix; ++j) { - digit_[j] = 0; - } -} - template ConversionToDecimalResult ConvertToDecimal(char *buffer, std::size_t size, enum DecimalConversionFlags flags, int digits, @@ -358,12 +332,13 @@ // decimal sequence in that range. using Binary = typename Big::Real; Binary less{x}; - --less.raw; + less.Previous(); Binary more{x}; if (!x.IsMaximalFiniteMagnitude()) { - ++more.raw; + more.Next(); } number.Minimize(Big{less, rounding}, Big{more, rounding}); + } else { } return number.ConvertToDecimal(buffer, size, flags, digits); } @@ -412,4 +387,22 @@ } #endif } + +template +llvm::raw_ostream &BigRadixFloatingPointNumber::Dump( + llvm::raw_ostream &o) const { + if (isNegative_) { + o << '-'; + } + o << "10**(" << exponent_ << ") * ...\n"; + for (int j{digits_}; --j >= 0;) { + std::string str{std::to_string(digit_[j])}; + o << std::string(20 - str.size(), ' ') << str << " [" << j << ']'; + if (j + 1 == digitLimit_) { + o << " (limit)"; + } + o << '\n'; + } + return o; +} } // namespace Fortran::decimal diff --git a/flang/lib/Decimal/decimal-to-binary.cpp b/flang/lib/Decimal/decimal-to-binary.cpp --- a/flang/lib/Decimal/decimal-to-binary.cpp +++ b/flang/lib/Decimal/decimal-to-binary.cpp @@ -139,6 +139,37 @@ return true; } +template +void BigRadixFloatingPointNumber::LoseLeastSignificantDigit() { + Digit LSD{digit_[0]}; + for (int j{0}; j < digits_ - 1; ++j) { + digit_[j] = digit_[j + 1]; + } + digit_[digits_ - 1] = 0; + bool incr{false}; + switch (rounding_) { + case RoundNearest: + case RoundDefault: + incr = LSD > radix / 2 || (LSD == radix / 2 && digit_[0] % 2 != 0); + break; + case RoundUp: + incr = LSD > 0 && !isNegative_; + break; + case RoundDown: + incr = LSD > 0 && isNegative_; + break; + case RoundToZero: + break; + case RoundCompatible: + incr = LSD >= radix / 2; + break; + } + for (int j{0}; (digit_[j] += incr) == radix; ++j) { + digit_[j] = 0; + } +} + // This local utility class represents an unrounded nonnegative // binary floating-point value with an unbiased (i.e., signed) // binary exponent, an integer value (not a fraction) with an implied diff --git a/flang/runtime/edit-output.cpp b/flang/runtime/edit-output.cpp --- a/flang/runtime/edit-output.cpp +++ b/flang/runtime/edit-output.cpp @@ -396,8 +396,8 @@ case 'B': case 'O': case 'Z': - return EditIntegerOutput( - io_, edit, decimal::BinaryFloatingPointNumber{x_}.raw); + return EditIntegerOutput(io_, edit, + decimal::BinaryFloatingPointNumber{x_}.raw()); case 'G': return Edit(EditForGOutput(edit)); default: