Diff 496233

llvm/include/llvm/ADT/APFloat.h

Show First 20 Lines • Show All 152 Lines • ▼ Show 20 Lines	enum Semantics {
S_BFloat,		S_BFloat,
S_IEEEsingle,		S_IEEEsingle,
S_IEEEdouble,		S_IEEEdouble,
S_IEEEquad,		S_IEEEquad,
S_PPCDoubleDouble,		S_PPCDoubleDouble,
// 8-bit floating point number following IEEE-754 conventions with bit		// 8-bit floating point number following IEEE-754 conventions with bit
// layout S1E5M2 as described in https://arxiv.org/abs/2209.05433.		// layout S1E5M2 as described in https://arxiv.org/abs/2209.05433.
S_Float8E5M2,		S_Float8E5M2,
		// 8-bit floating point number mostly following IEEE-754 conventions
		lattnerUnsubmitted Done Reply Inline Actions Typo follwing lattner: Typo follwing
		// and bit layout S1E5M2 described in https://arxiv.org/abs/2206.02915,
		// with expanded range and with no infinity or signed zero.
		// NaN is represnted as negative zero. (FN -> Finite, UZ -> unsigned zero).
		// This format's exponent bias is 16, instead of the 15 (2 ** (5 - 1) - 1)
		reedwmUnsubmitted Done Reply Inline Actions You should document that the exponent bias is 16, since it differs from the IEEE standard exponent bias of 2(num_exponent_bits - 1) - 1. And same for S_Float8E4M3FNUZ. reedwm:** You should document that the exponent bias is 16, since it differs from the IEEE standard…
		// that IEEE precedent would imply.
		S_Float8E5M2FNUZ,
// 8-bit floating point number mostly following IEEE-754 conventions with		// 8-bit floating point number mostly following IEEE-754 conventions with
// bit layout S1E4M3 as described in https://arxiv.org/abs/2209.05433.		// bit layout S1E4M3 as described in https://arxiv.org/abs/2209.05433.
// Unlike IEEE-754 types, there are no infinity values, and NaN is		// Unlike IEEE-754 types, there are no infinity values, and NaN is
// represented with the exponent and mantissa bits set to all 1s.		// represented with the exponent and mantissa bits set to all 1s.
S_Float8E4M3FN,		S_Float8E4M3FN,
		// 8-bit floating point number mostly following IEEE-754 conventions
		lattnerUnsubmitted Done Reply Inline Actions Typo follwing lattner: Typo follwing
		// and bit layout S1E4M3 described in https://arxiv.org/abs/2206.02915,
		// with expanded range and with no infinity or signed zero.
		// NaN is represnted as negative zero. (FN -> Finite, UZ -> unsigned zero).
		// This format's exponent bias is 8, instead of the 7 (2 ** (4 - 1) - 1)
		// that IEEE precedent would imply.
		S_Float8E4M3FNUZ,

S_x87DoubleExtended,		S_x87DoubleExtended,
S_MaxSemantics = S_x87DoubleExtended,		S_MaxSemantics = S_x87DoubleExtended,
};		};

static const llvm::fltSemantics &EnumToSemantics(Semantics S);		static const llvm::fltSemantics &EnumToSemantics(Semantics S);
static Semantics SemanticsToEnum(const llvm::fltSemantics &Sem);		static Semantics SemanticsToEnum(const llvm::fltSemantics &Sem);

static const fltSemantics &IEEEhalf() LLVM_READNONE;		static const fltSemantics &IEEEhalf() LLVM_READNONE;
static const fltSemantics &BFloat() LLVM_READNONE;		static const fltSemantics &BFloat() LLVM_READNONE;
static const fltSemantics &IEEEsingle() LLVM_READNONE;		static const fltSemantics &IEEEsingle() LLVM_READNONE;
static const fltSemantics &IEEEdouble() LLVM_READNONE;		static const fltSemantics &IEEEdouble() LLVM_READNONE;
static const fltSemantics &IEEEquad() LLVM_READNONE;		static const fltSemantics &IEEEquad() LLVM_READNONE;
static const fltSemantics &PPCDoubleDouble() LLVM_READNONE;		static const fltSemantics &PPCDoubleDouble() LLVM_READNONE;
static const fltSemantics &Float8E5M2() LLVM_READNONE;		static const fltSemantics &Float8E5M2() LLVM_READNONE;
		static const fltSemantics &Float8E5M2FNUZ() LLVM_READNONE;
static const fltSemantics &Float8E4M3FN() LLVM_READNONE;		static const fltSemantics &Float8E4M3FN() LLVM_READNONE;
		static const fltSemantics &Float8E4M3FNUZ() LLVM_READNONE;
static const fltSemantics &x87DoubleExtended() LLVM_READNONE;		static const fltSemantics &x87DoubleExtended() LLVM_READNONE;

/// A Pseudo fltsemantic used to construct APFloats that cannot conflict with		/// A Pseudo fltsemantic used to construct APFloats that cannot conflict with
/// anything real.		/// anything real.
static const fltSemantics &Bogus() LLVM_READNONE;		static const fltSemantics &Bogus() LLVM_READNONE;

/// @}		/// @}

▲ Show 20 Lines • Show All 376 Lines • ▼ Show 20 Lines	private:
APInt convertHalfAPFloatToAPInt() const;		APInt convertHalfAPFloatToAPInt() const;
APInt convertBFloatAPFloatToAPInt() const;		APInt convertBFloatAPFloatToAPInt() const;
APInt convertFloatAPFloatToAPInt() const;		APInt convertFloatAPFloatToAPInt() const;
APInt convertDoubleAPFloatToAPInt() const;		APInt convertDoubleAPFloatToAPInt() const;
APInt convertQuadrupleAPFloatToAPInt() const;		APInt convertQuadrupleAPFloatToAPInt() const;
APInt convertF80LongDoubleAPFloatToAPInt() const;		APInt convertF80LongDoubleAPFloatToAPInt() const;
APInt convertPPCDoubleDoubleAPFloatToAPInt() const;		APInt convertPPCDoubleDoubleAPFloatToAPInt() const;
APInt convertFloat8E5M2APFloatToAPInt() const;		APInt convertFloat8E5M2APFloatToAPInt() const;
		APInt convertFloat8E5M2FNUZAPFloatToAPInt() const;
APInt convertFloat8E4M3FNAPFloatToAPInt() const;		APInt convertFloat8E4M3FNAPFloatToAPInt() const;
		APInt convertFloat8E4M3FNUZAPFloatToAPInt() const;
void initFromAPInt(const fltSemantics *Sem, const APInt &api);		void initFromAPInt(const fltSemantics *Sem, const APInt &api);
void initFromHalfAPInt(const APInt &api);		void initFromHalfAPInt(const APInt &api);
void initFromBFloatAPInt(const APInt &api);		void initFromBFloatAPInt(const APInt &api);
void initFromFloatAPInt(const APInt &api);		void initFromFloatAPInt(const APInt &api);
void initFromDoubleAPInt(const APInt &api);		void initFromDoubleAPInt(const APInt &api);
void initFromQuadrupleAPInt(const APInt &api);		void initFromQuadrupleAPInt(const APInt &api);
void initFromF80LongDoubleAPInt(const APInt &api);		void initFromF80LongDoubleAPInt(const APInt &api);
void initFromPPCDoubleDoubleAPInt(const APInt &api);		void initFromPPCDoubleDoubleAPInt(const APInt &api);
void initFromFloat8E5M2APInt(const APInt &api);		void initFromFloat8E5M2APInt(const APInt &api);
		void initFromFloat8E5M2FNUZAPInt(const APInt &api);
void initFromFloat8E4M3FNAPInt(const APInt &api);		void initFromFloat8E4M3FNAPInt(const APInt &api);
		void initFromFloat8E4M3FNUZAPInt(const APInt &api);

void assign(const IEEEFloat &);		void assign(const IEEEFloat &);
void copySignificand(const IEEEFloat &);		void copySignificand(const IEEEFloat &);
void freeSignificand();		void freeSignificand();

/// Note: this must be the first data member.		/// Note: this must be the first data member.
/// The semantics that this value obeys.		/// The semantics that this value obeys.
const fltSemantics *semantics;		const fltSemantics *semantics;
▲ Show 20 Lines • Show All 774 Lines • Show Last 20 Lines

llvm/lib/Support/APFloat.cpp

//===-- APFloat.cpp - Implement APFloat class -----------------------------===//		//===-- APFloat.cpp - Implement APFloat class -----------------------------===//
//		//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.		// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.		// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception		// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
//		//
// This file implements a class to represent arbitrary precision floating		// This file implements a class to represent arbitrary precision floating
// point values and provide a variety of arithmetic operations on them.		// point values and provide a variety of arithmetic operations on them.
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

#include "llvm/ADT/APFloat.h"		#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APSInt.h"		#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/ArrayRef.h"		#include "llvm/ADT/ArrayRef.h"
		#include "llvm/ADT/FloatingPointMode.h"
#include "llvm/ADT/FoldingSet.h"		#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/Hashing.h"		#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/StringExtras.h"		#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"		#include "llvm/ADT/StringRef.h"
#include "llvm/Config/llvm-config.h"		#include "llvm/Config/llvm-config.h"
#include "llvm/Support/Debug.h"		#include "llvm/Support/Debug.h"
#include "llvm/Support/Error.h"		#include "llvm/Support/Error.h"
#include "llvm/Support/MathExtras.h"		#include "llvm/Support/MathExtras.h"
Show All 20 Lines
/// codebase, change this into a static inline function.		/// codebase, change this into a static inline function.
#define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))		#define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))

/* Assumed in hexadecimal significand parsing, and conversion to		/* Assumed in hexadecimal significand parsing, and conversion to
hexadecimal strings. */		hexadecimal strings. */
static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");		static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");

namespace llvm {		namespace llvm {

// How the nonfinite values Inf and NaN are represented.		// How the nonfinite values Inf and NaN are represented.
		reedwmUnsubmitted Done Reply Inline Actions This comment should be updated to mention the new dtypes, and also the NaN description no longer is accurate for the new dtypes. Also, "Float8E5M2" should be replaced with "Float8E4M3FN". I accidentally put the wrong dtype in the comment when adding Float8E4M3FN. reedwm: This comment should be updated to mention the new dtypes, and also the NaN description no…
enum class fltNonfiniteBehavior {		enum class fltNonfiniteBehavior {
// Represents standard IEEE 754 behavior. A value is nonfinite if the		// Represents standard IEEE 754 behavior. A value is nonfinite if the
// exponent field is all 1s. In such cases, a value is Inf if the		// exponent field is all 1s. In such cases, a value is Inf if the
// significand bits are all zero, and NaN otherwise		// significand bits are all zero, and NaN otherwise
IEEE754,		IEEE754,

// Only the Float8E5M2 has this behavior. There is no Inf representation. A		// This behavior is present in the Float8ExMyFN* types (Float8E4M3FN,
// value is NaN if the exponent field and the mantissa field are all 1s.		// Float8E5M2FNUZ, and Float8E4M3FNUZ). There is no representation for Inf,
		// and operations that would ordinarily produce Inf produce NaN instead.
		// The details of the NaN representation(s) in this form are determined by the
		reedwmUnsubmitted Done Reply Inline Actions fltNanEncoding::AllOnes technically has two NaN representations: a NaN with the sign bit set and a NaN with the sign bit unset. I would change this setence to "The details of the NaN representation(s) are determined by the `fltNanEncoding` enum reedwm: fltNanEncoding::AllOnes technically has two NaN representations: a NaN with the sign bit set…
		// `fltNanEncoding` enum. We treat all NaNs as quiet, as the available
		// encodings do not distinguish between signalling and quiet NaN.
		NanOnly,
		};

		// How NaN values are represented. This is curently only used in combination
		// with fltNonfiniteBehavior::NanOnly, and using a variant other than IEEE
		// while having IEEE non-finite behavior is liable to lead to unexpected
		// results.
		enum class fltNanEncoding {
		// Represents the standard IEEE behavior where a value is NaN if its
		mehdi_aminiUnsubmitted Done Reply Inline Actions Typo: point mehdi_amini: Typo: point
		// exponent is all 1s and the significand is non-zero.
		mehdi_aminiUnsubmitted Done Reply Inline Actions typo: represented mehdi_amini: typo: represented
		reedwmUnsubmitted Done Reply Inline Actions The sign bit can still be zero. I would clarify that for NaNs the exponent and mantissa are all 1s. reedwm: The sign bit can still be zero. I would clarify that for NaNs the exponent and mantissa are all…
		IEEE,

		// Represents the behavior in the Float8E4M3 floating point type where NaN is
		mehdi_aminiUnsubmitted Done Reply Inline Actions typo: behavior mehdi_amini: typo: behavior
		// represented by having the exponent and mantissa set to all 1s.
		reedwmUnsubmitted Done Reply Inline Actions Also mention that for NaNs, the mantissa is also all 0s (you only mentioned the exponent is all 0s). reedwm: Also mention that for NaNs, the mantissa is also all 0s (you only mentioned the exponent is all…
		reedwmUnsubmitted Done Reply Inline Actions typo: hhaving -> having reedwm: typo: hhaving -> having
// This behavior matches the FP8 E4M3 type described in		// This behavior matches the FP8 E4M3 type described in
// https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs		// https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs
// as non-signalling, although the paper does not state whether the NaN		// as non-signalling, although the paper does not state whether the NaN
// values are signalling or not.		// values are signalling or not.
NanOnly,		AllOnes,

		// Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types
		// where NaN is represented by a sign bit of 1 and all 0s in the exponent
		// and mantissa (i.e. the negative zero encoding in a IEEE float). Since
		// there is only one NaN value, it is treated as quiet NaN. This matches the
		// behavior described in https://arxiv.org/abs/2206.02915 .
		NegativeZero,
};		};

/* Represents floating point arithmetic semantics. */		/* Represents floating point arithmetic semantics. */
struct fltSemantics {		struct fltSemantics {
/* The largest E such that 2^E is representable; this matches the		/* The largest E such that 2^E is representable; this matches the
definition of IEEE 754. */		definition of IEEE 754. */
APFloatBase::ExponentType maxExponent;		APFloatBase::ExponentType maxExponent;

/* The smallest E such that 2^E is a normalized number; this		/* The smallest E such that 2^E is a normalized number; this
matches the definition of IEEE 754. */		matches the definition of IEEE 754. */
APFloatBase::ExponentType minExponent;		APFloatBase::ExponentType minExponent;

/* Number of bits in the significand. This includes the integer		/* Number of bits in the significand. This includes the integer
bit. */		bit. */
unsigned int precision;		unsigned int precision;

/* Number of bits actually used in the semantics. */		/* Number of bits actually used in the semantics. */
unsigned int sizeInBits;		unsigned int sizeInBits;

fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754;		fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754;

		fltNanEncoding nanEncoding = fltNanEncoding::IEEE;
// Returns true if any number described by this semantics can be precisely		// Returns true if any number described by this semantics can be precisely
// represented by the specified semantics. Does not take into account		// represented by the specified semantics. Does not take into account
// the value of fltNonfiniteBehavior.		// the value of fltNonfiniteBehavior.
bool isRepresentableBy(const fltSemantics &S) const {		bool isRepresentableBy(const fltSemantics &S) const {
return maxExponent <= S.maxExponent && minExponent >= S.minExponent &&		return maxExponent <= S.maxExponent && minExponent >= S.minExponent &&
precision <= S.precision;		precision <= S.precision;
}		}
};		};

static const fltSemantics semIEEEhalf = {15, -14, 11, 16};		static const fltSemantics semIEEEhalf = {15, -14, 11, 16};
static const fltSemantics semBFloat = {127, -126, 8, 16};		static const fltSemantics semBFloat = {127, -126, 8, 16};
static const fltSemantics semIEEEsingle = {127, -126, 24, 32};		static const fltSemantics semIEEEsingle = {127, -126, 24, 32};
static const fltSemantics semIEEEdouble = {1023, -1022, 53, 64};		static const fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
static const fltSemantics semIEEEquad = {16383, -16382, 113, 128};		static const fltSemantics semIEEEquad = {16383, -16382, 113, 128};
static const fltSemantics semFloat8E5M2 = {15, -14, 3, 8};		static const fltSemantics semFloat8E5M2 = {15, -14, 3, 8};
static const fltSemantics semFloat8E4M3FN = {8, -6, 4, 8,		static const fltSemantics semFloat8E5M2FNUZ = {15,
fltNonfiniteBehavior::NanOnly};		-15,
		3,
		8,
		fltNonfiniteBehavior::NanOnly,
		fltNanEncoding::NegativeZero};
		static const fltSemantics semFloat8E4M3FN = {
		8, -6, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes};
		static const fltSemantics semFloat8E4M3FNUZ = {
		7, -7, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
static const fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};		static const fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
static const fltSemantics semBogus = {0, 0, 0, 0};		static const fltSemantics semBogus = {0, 0, 0, 0};

/* The IBM double-double semantics. Such a number consists of a pair of IEEE		/* The IBM double-double semantics. Such a number consists of a pair of IEEE
64-bit doubles (Hi, Lo), where \|Hi\| > \|Lo\|, and if normal,		64-bit doubles (Hi, Lo), where \|Hi\| > \|Lo\|, and if normal,
(double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.		(double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent		Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
to each other, and two 11-bit exponents.		to each other, and two 11-bit exponents.
▲ Show 20 Lines • Show All 41 Lines • ▼ Show 20 Lines	const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
case S_IEEEdouble:		case S_IEEEdouble:
return IEEEdouble();		return IEEEdouble();
case S_IEEEquad:		case S_IEEEquad:
return IEEEquad();		return IEEEquad();
case S_PPCDoubleDouble:		case S_PPCDoubleDouble:
return PPCDoubleDouble();		return PPCDoubleDouble();
case S_Float8E5M2:		case S_Float8E5M2:
return Float8E5M2();		return Float8E5M2();
		case S_Float8E5M2FNUZ:
		return Float8E5M2FNUZ();
case S_Float8E4M3FN:		case S_Float8E4M3FN:
return Float8E4M3FN();		return Float8E4M3FN();
		case S_Float8E4M3FNUZ:
		return Float8E4M3FNUZ();
case S_x87DoubleExtended:		case S_x87DoubleExtended:
return x87DoubleExtended();		return x87DoubleExtended();
}		}
llvm_unreachable("Unrecognised floating semantics");		llvm_unreachable("Unrecognised floating semantics");
}		}

APFloatBase::Semantics		APFloatBase::Semantics
APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {		APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
if (&Sem == &llvm::APFloat::IEEEhalf())		if (&Sem == &llvm::APFloat::IEEEhalf())
return S_IEEEhalf;		return S_IEEEhalf;
else if (&Sem == &llvm::APFloat::BFloat())		else if (&Sem == &llvm::APFloat::BFloat())
return S_BFloat;		return S_BFloat;
else if (&Sem == &llvm::APFloat::IEEEsingle())		else if (&Sem == &llvm::APFloat::IEEEsingle())
return S_IEEEsingle;		return S_IEEEsingle;
else if (&Sem == &llvm::APFloat::IEEEdouble())		else if (&Sem == &llvm::APFloat::IEEEdouble())
return S_IEEEdouble;		return S_IEEEdouble;
else if (&Sem == &llvm::APFloat::IEEEquad())		else if (&Sem == &llvm::APFloat::IEEEquad())
return S_IEEEquad;		return S_IEEEquad;
else if (&Sem == &llvm::APFloat::PPCDoubleDouble())		else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
return S_PPCDoubleDouble;		return S_PPCDoubleDouble;
else if (&Sem == &llvm::APFloat::Float8E5M2())		else if (&Sem == &llvm::APFloat::Float8E5M2())
return S_Float8E5M2;		return S_Float8E5M2;
		else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ())
		return S_Float8E5M2FNUZ;
else if (&Sem == &llvm::APFloat::Float8E4M3FN())		else if (&Sem == &llvm::APFloat::Float8E4M3FN())
return S_Float8E4M3FN;		return S_Float8E4M3FN;
		else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ())
		return S_Float8E4M3FNUZ;
else if (&Sem == &llvm::APFloat::x87DoubleExtended())		else if (&Sem == &llvm::APFloat::x87DoubleExtended())
return S_x87DoubleExtended;		return S_x87DoubleExtended;
else		else
llvm_unreachable("Unknown floating semantics");		llvm_unreachable("Unknown floating semantics");
}		}

const fltSemantics &APFloatBase::IEEEhalf() {		const fltSemantics &APFloatBase::IEEEhalf() {
return semIEEEhalf;		return semIEEEhalf;
}		}
const fltSemantics &APFloatBase::BFloat() {		const fltSemantics &APFloatBase::BFloat() {
return semBFloat;		return semBFloat;
}		}
const fltSemantics &APFloatBase::IEEEsingle() {		const fltSemantics &APFloatBase::IEEEsingle() {
return semIEEEsingle;		return semIEEEsingle;
}		}
const fltSemantics &APFloatBase::IEEEdouble() {		const fltSemantics &APFloatBase::IEEEdouble() {
return semIEEEdouble;		return semIEEEdouble;
}		}
const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; }		const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; }
const fltSemantics &APFloatBase::PPCDoubleDouble() {		const fltSemantics &APFloatBase::PPCDoubleDouble() {
return semPPCDoubleDouble;		return semPPCDoubleDouble;
}		}
const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; }		const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; }
		const fltSemantics &APFloatBase::Float8E5M2FNUZ() {
		return semFloat8E5M2FNUZ;
		}
const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; }		const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; }
		const fltSemantics &APFloatBase::Float8E4M3FNUZ() {
		return semFloat8E4M3FNUZ;
		}
const fltSemantics &APFloatBase::x87DoubleExtended() {		const fltSemantics &APFloatBase::x87DoubleExtended() {
return semX87DoubleExtended;		return semX87DoubleExtended;
}		}
const fltSemantics &APFloatBase::Bogus() { return semBogus; }		const fltSemantics &APFloatBase::Bogus() { return semBogus; }

constexpr RoundingMode APFloatBase::rmNearestTiesToEven;		constexpr RoundingMode APFloatBase::rmNearestTiesToEven;
constexpr RoundingMode APFloatBase::rmTowardPositive;		constexpr RoundingMode APFloatBase::rmTowardPositive;
constexpr RoundingMode APFloatBase::rmTowardNegative;		constexpr RoundingMode APFloatBase::rmTowardNegative;
▲ Show 20 Lines • Show All 581 Lines • ▼ Show 20 Lines	void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
category = fcNaN;		category = fcNaN;
sign = Negative;		sign = Negative;
exponent = exponentNaN();		exponent = exponentNaN();

integerPart *significand = significandParts();		integerPart *significand = significandParts();
unsigned numParts = partCount();		unsigned numParts = partCount();

APInt fill_storage;		APInt fill_storage;
if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {		if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
// The only NaN representation is where the mantissa is all 1s, which is		// Finite-only types do not distinguish signalling and quiet NaN, so
// non-signalling.		// make them all signalling.
		reedwmUnsubmitted Done Reply Inline Actions This comment should be moved to the "else" branch you added (or just delete the comment). reedwm: This comment should be moved to the "else" branch you added (or just delete the comment).
		reedwmUnsubmitted Done Reply Inline Actions typo: signallling -> signalling reedwm: typo: signallling -> signalling
SNaN = false;		SNaN = false;
		if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
		sign = true;
		fill_storage = APInt::getZero(semantics->precision - 1);
		} else {
fill_storage = APInt::getAllOnes(semantics->precision - 1);		fill_storage = APInt::getAllOnes(semantics->precision - 1);
		}
fill = &fill_storage;		fill = &fill_storage;
}		}

// Set the significand bits to the fill.		// Set the significand bits to the fill.
if (!fill \|\| fill->getNumWords() < numParts)		if (!fill \|\| fill->getNumWords() < numParts)
APInt::tcSet(significand, 0, numParts);		APInt::tcSet(significand, 0, numParts);
if (fill) {		if (fill) {
APInt::tcAssign(significand, fill->getRawData(),		APInt::tcAssign(significand, fill->getRawData(),
Show All 14 Lines	if (SNaN) {
// We always have to clear the QNaN bit to make it an SNaN.		// We always have to clear the QNaN bit to make it an SNaN.
APInt::tcClearBit(significand, QNaNBit);		APInt::tcClearBit(significand, QNaNBit);

// If there are no bits set in the payload, we have to set		// If there are no bits set in the payload, we have to set
// something to make it a NaN instead of an infinity;		// something to make it a NaN instead of an infinity;
// conventionally, this is the next bit down from the QNaN bit.		// conventionally, this is the next bit down from the QNaN bit.
if (APInt::tcIsZero(significand, numParts))		if (APInt::tcIsZero(significand, numParts))
APInt::tcSetBit(significand, QNaNBit - 1);		APInt::tcSetBit(significand, QNaNBit - 1);
		} else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
		// The only NaN is a quiet NaN, and it has no bits sets in the significand.
		// Do nothing.
} else {		} else {
// We always have to set the QNaN bit to make it a QNaN.		// We always have to set the QNaN bit to make it a QNaN.
APInt::tcSetBit(significand, QNaNBit);		APInt::tcSetBit(significand, QNaNBit);
}		}

// For x87 extended precision, we want to make a NaN, not a		// For x87 extended precision, we want to make a NaN, not a
// pseudo-NaN. Maybe we should expose the ability to make		// pseudo-NaN. Maybe we should expose the ability to make
// pseudo-NaNs?		// pseudo-NaNs?
▲ Show 20 Lines • Show All 128 Lines • ▼ Show 20 Lines	bool IEEEFloat::isSignificandAllZerosExceptMSB() const {

const unsigned NumHighBits =		const unsigned NumHighBits =
PartCount * integerPartWidth - semantics->precision + 1;		PartCount * integerPartWidth - semantics->precision + 1;
return Parts[PartCount - 1] == integerPart(1)		return Parts[PartCount - 1] == integerPart(1)
<< (integerPartWidth - NumHighBits);		<< (integerPartWidth - NumHighBits);
}		}

bool IEEEFloat::isLargest() const {		bool IEEEFloat::isLargest() const {
if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {		if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
		semantics->nanEncoding == fltNanEncoding::AllOnes) {
// The largest number by magnitude in our format will be the floating point		// The largest number by magnitude in our format will be the floating point
// number with maximum exponent and with significand that is all ones except		// number with maximum exponent and with significand that is all ones except
// the LSB.		// the LSB.
return isFiniteNonZero() && exponent == semantics->maxExponent &&		return isFiniteNonZero() && exponent == semantics->maxExponent &&
isSignificandAllOnesExceptLSB();		isSignificandAllOnesExceptLSB();
} else {		} else {
// The largest number by magnitude in our format will be the floating point		// The largest number by magnitude in our format will be the floating point
// number with maximum exponent and with significand that is all ones.		// number with maximum exponent and with significand that is all ones.
▲ Show 20 Lines • Show All 425 Lines • ▼ Show 20 Lines	if (rounding_mode == rmNearestTiesToEven \|\|
return (opStatus) (opOverflow \| opInexact);		return (opStatus) (opOverflow \| opInexact);
}		}

/* Otherwise we become the largest finite number. */		/* Otherwise we become the largest finite number. */
category = fcNormal;		category = fcNormal;
exponent = semantics->maxExponent;		exponent = semantics->maxExponent;
tcSetLeastSignificantBits(significandParts(), partCount(),		tcSetLeastSignificantBits(significandParts(), partCount(),
semantics->precision);		semantics->precision);
if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)		if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
		semantics->nanEncoding == fltNanEncoding::AllOnes)
APInt::tcClearBit(significandParts(), 0);		APInt::tcClearBit(significandParts(), 0);

return opInexact;		return opInexact;
}		}

/* Returns TRUE if, when truncating the current number, with BIT the		/* Returns TRUE if, when truncating the current number, with BIT the
new LSB, with the given lost fraction and rounding mode, the result		new LSB, with the given lost fraction and rounding mode, the result
would need to be rounded away from zero (i.e., by increasing the		would need to be rounded away from zero (i.e., by increasing the
▲ Show 20 Lines • Show All 84 Lines • ▼ Show 20 Lines	if (exponentChange > 0) {
/* Keep OMSB up-to-date. */		/* Keep OMSB up-to-date. */
if (omsb > (unsigned) exponentChange)		if (omsb > (unsigned) exponentChange)
omsb -= exponentChange;		omsb -= exponentChange;
else		else
omsb = 0;		omsb = 0;
}		}
}		}

		// The all-ones values is an overflow if NaN is all ones. If NaN is
		// represented by negative zero, then it is a valid finite value.
if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&		if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
		semantics->nanEncoding == fltNanEncoding::AllOnes &&
exponent == semantics->maxExponent && isSignificandAllOnes())		exponent == semantics->maxExponent && isSignificandAllOnes())
return handleOverflow(rounding_mode);		return handleOverflow(rounding_mode);

/* Now round the number according to rounding_mode given the lost		/* Now round the number according to rounding_mode given the lost
fraction. */		fraction. */

/* As specified in IEEE 754, since we do not trap we do not report		/* As specified in IEEE 754, since we do not trap we do not report
underflow for exact results. */		underflow for exact results. */
if (lost_fraction == lfExactlyZero) {		if (lost_fraction == lfExactlyZero) {
/* Canonicalize zeroes. */		/* Canonicalize zeroes. */
if (omsb == 0)		if (omsb == 0) {
category = fcZero;		category = fcZero;
		if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
		sign = false;
		}

return opOK;		return opOK;
}		}

/* Increment the significand if we're rounding away from zero. */		/* Increment the significand if we're rounding away from zero. */
if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {		if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
if (omsb == 0)		if (omsb == 0)
exponent = semantics->minExponent;		exponent = semantics->minExponent;

incrementSignificand();		incrementSignificand();
omsb = significandMSB() + 1;		omsb = significandMSB() + 1;

/* Did the significand increment overflow? */		/* Did the significand increment overflow? */
if (omsb == (unsigned) semantics->precision + 1) {		if (omsb == (unsigned) semantics->precision + 1) {
/* Renormalize by incrementing the exponent and shifting our		/* Renormalize by incrementing the exponent and shifting our
significand right one. However if we already have the		significand right one. However if we already have the
maximum exponent we overflow to infinity. */		maximum exponent we overflow to infinity. */
if (exponent == semantics->maxExponent) {		if (exponent == semantics->maxExponent)
category = fcInfinity;		// Invoke overflow handling with a rounding mode that will guarantee
		// that the result gets turned into the correct infinity representation.
return (opStatus) (opOverflow \| opInexact);		// This is needed instead of just setting the category to infinity to
}		// account for 8-bit floating point types that have no inf, only NaN.
		return handleOverflow(sign ? rmTowardNegative : rmTowardPositive);

shiftSignificandRight(1);		shiftSignificandRight(1);

return opInexact;		return opInexact;
}		}

		// The all-ones values is an overflow if NaN is all ones. If NaN is
		// represented by negative zero, then it is a valid finite value.
if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&		if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
		semantics->nanEncoding == fltNanEncoding::AllOnes &&
exponent == semantics->maxExponent && isSignificandAllOnes())		exponent == semantics->maxExponent && isSignificandAllOnes())
return handleOverflow(rounding_mode);		return handleOverflow(rounding_mode);
}		}

/* The normal case - we were and are not denormal, and any		/* The normal case - we were and are not denormal, and any
significand increment above didn't overflow. */		significand increment above didn't overflow. */
if (omsb == semantics->precision)		if (omsb == semantics->precision)
return opInexact;		return opInexact;

/* We have a non-zero denormal. */		/* We have a non-zero denormal. */
assert(omsb < semantics->precision);		assert(omsb < semantics->precision);

/* Canonicalize zeroes. */		/* Canonicalize zeroes. */
if (omsb == 0)		if (omsb == 0) {
category = fcZero;		category = fcZero;
		if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
		sign = false;
		}

/* The fcZero case is a denormal that underflowed to zero. */		/* The fcZero case is a denormal that underflowed to zero. */
return (opStatus) (opUnderflow \| opInexact);		return (opStatus) (opUnderflow \| opInexact);
}		}

IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,		IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
bool subtract) {		bool subtract) {
switch (PackCategoriesIntoKey(category, rhs.category)) {		switch (PackCategoriesIntoKey(category, rhs.category)) {
▲ Show 20 Lines • Show All 285 Lines • ▼ Show 20 Lines	IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) {

case PackCategoriesIntoKey(fcNormal, fcNormal):		case PackCategoriesIntoKey(fcNormal, fcNormal):
return opDivByZero; // fake status, indicating this is not a special case		return opDivByZero; // fake status, indicating this is not a special case
}		}
}		}

/* Change sign. */		/* Change sign. */
void IEEEFloat::changeSign() {		void IEEEFloat::changeSign() {
		// With NaN-as-negative-zero, neither NaN or negative zero can change
		// their signs.
		if (semantics->nanEncoding == fltNanEncoding::NegativeZero &&
		(isZero() \|\| isNaN()))
		return;
/* Look mummy, this one's easy. */		/* Look mummy, this one's easy. */
sign = !sign;		sign = !sign;
}		}

/* Normalized addition or subtraction. */		/* Normalized addition or subtraction. */
IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,		IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
roundingMode rounding_mode,		roundingMode rounding_mode,
bool subtract) {		bool subtract) {
Show All 13 Lines	IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
}		}

/* If two numbers add (exactly) to zero, IEEE 754 decrees it is a		/* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
positive zero unless rounding to minus infinity, except that		positive zero unless rounding to minus infinity, except that
adding two like-signed zeroes gives that zero. */		adding two like-signed zeroes gives that zero. */
if (category == fcZero) {		if (category == fcZero) {
if (rhs.category != fcZero \|\| (sign == rhs.sign) == subtract)		if (rhs.category != fcZero \|\| (sign == rhs.sign) == subtract)
sign = (rounding_mode == rmTowardNegative);		sign = (rounding_mode == rmTowardNegative);
		// NaN-in-negative-zero means zeros need to be normalized to +0.
		if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
		sign = false;
}		}

return fs;		return fs;
}		}

/* Normalized addition. */		/* Normalized addition. */
IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs,		IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs,
roundingMode rounding_mode) {		roundingMode rounding_mode) {
Show All 9 Lines
/* Normalized multiply. */		/* Normalized multiply. */
IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,		IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,
roundingMode rounding_mode) {		roundingMode rounding_mode) {
opStatus fs;		opStatus fs;

sign ^= rhs.sign;		sign ^= rhs.sign;
fs = multiplySpecials(rhs);		fs = multiplySpecials(rhs);

		if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
		sign = false;
if (isFiniteNonZero()) {		if (isFiniteNonZero()) {
lostFraction lost_fraction = multiplySignificand(rhs);		lostFraction lost_fraction = multiplySignificand(rhs);
fs = normalize(rounding_mode, lost_fraction);		fs = normalize(rounding_mode, lost_fraction);
if (lost_fraction != lfExactlyZero)		if (lost_fraction != lfExactlyZero)
fs = (opStatus) (fs \| opInexact);		fs = (opStatus) (fs \| opInexact);
}		}

return fs;		return fs;
}		}

/* Normalized divide. */		/* Normalized divide. */
IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs,		IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs,
roundingMode rounding_mode) {		roundingMode rounding_mode) {
opStatus fs;		opStatus fs;

sign ^= rhs.sign;		sign ^= rhs.sign;
fs = divideSpecials(rhs);		fs = divideSpecials(rhs);

		if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
		sign = false;
if (isFiniteNonZero()) {		if (isFiniteNonZero()) {
lostFraction lost_fraction = divideSignificand(rhs);		lostFraction lost_fraction = divideSignificand(rhs);
fs = normalize(rounding_mode, lost_fraction);		fs = normalize(rounding_mode, lost_fraction);
if (lost_fraction != lfExactlyZero)		if (lost_fraction != lfExactlyZero)
fs = (opStatus) (fs \| opInexact);		fs = (opStatus) (fs \| opInexact);
}		}

return fs;		return fs;
▲ Show 20 Lines • Show All 92 Lines • ▼ Show 20 Lines	if (VEx.compare(PEx) == cmpGreaterThan) {

cmpResult result = VEx.compare(PEx);		cmpResult result = VEx.compare(PEx);
if (result == cmpGreaterThan \|\| result == cmpEqual) {		if (result == cmpGreaterThan \|\| result == cmpEqual) {
fs = subtract(P, rmNearestTiesToEven);		fs = subtract(P, rmNearestTiesToEven);
assert(fs == opOK);		assert(fs == opOK);
}		}
}		}

if (isZero())		if (isZero()) {
sign = origSign; // IEEE754 requires this		sign = origSign; // IEEE754 requires this
		if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
		// But some 8-bit floats only have positive 0.
		sign = false;
		}

else		else
sign ^= origSign;		sign ^= origSign;
return fs;		return fs;
}		}

/* Normalized llvm frem (C fmod). */		/* Normalized llvm frem (C fmod). */
IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {		IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {
opStatus fs;		opStatus fs;
fs = modSpecials(rhs);		fs = modSpecials(rhs);
unsigned int origSign = sign;		unsigned int origSign = sign;

while (isFiniteNonZero() && rhs.isFiniteNonZero() &&		while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
compareAbsoluteValue(rhs) != cmpLessThan) {		compareAbsoluteValue(rhs) != cmpLessThan) {
int Exp = ilogb(*this) - ilogb(rhs);		int Exp = ilogb(*this) - ilogb(rhs);
IEEEFloat V = scalbn(rhs, Exp, rmNearestTiesToEven);		IEEEFloat V = scalbn(rhs, Exp, rmNearestTiesToEven);
// V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly		// V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly
// check for it.		// check for it.
if (V.isNaN() \|\| compareAbsoluteValue(V) == cmpLessThan)		if (V.isNaN() \|\| compareAbsoluteValue(V) == cmpLessThan)
V = scalbn(rhs, Exp - 1, rmNearestTiesToEven);		V = scalbn(rhs, Exp - 1, rmNearestTiesToEven);
V.sign = sign;		V.sign = sign;

fs = subtract(V, rmNearestTiesToEven);		fs = subtract(V, rmNearestTiesToEven);
assert(fs==opOK);		assert(fs==opOK);
}		}
if (isZero())		if (isZero()) {
sign = origSign; // fmod requires this		sign = origSign; // fmod requires this
		if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
		sign = false;
		}
return fs;		return fs;
}		}

/* Normalized fused-multiply-add. */		/* Normalized fused-multiply-add. */
IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,		IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
const IEEEFloat &addend,		const IEEEFloat &addend,
roundingMode rounding_mode) {		roundingMode rounding_mode) {
opStatus fs;		opStatus fs;
Show All 11 Lines	if (isFiniteNonZero() &&
lost_fraction = multiplySignificand(multiplicand, addend);		lost_fraction = multiplySignificand(multiplicand, addend);
fs = normalize(rounding_mode, lost_fraction);		fs = normalize(rounding_mode, lost_fraction);
if (lost_fraction != lfExactlyZero)		if (lost_fraction != lfExactlyZero)
fs = (opStatus) (fs \| opInexact);		fs = (opStatus) (fs \| opInexact);

/* If two numbers add (exactly) to zero, IEEE 754 decrees it is a		/* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
positive zero unless rounding to minus infinity, except that		positive zero unless rounding to minus infinity, except that
adding two like-signed zeroes gives that zero. */		adding two like-signed zeroes gives that zero. */
if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign)		if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) {
sign = (rounding_mode == rmTowardNegative);		sign = (rounding_mode == rmTowardNegative);
		if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
		sign = false;
		}
} else {		} else {
fs = multiplySpecials(multiplicand);		fs = multiplySpecials(multiplicand);

/* FS can only be opOK or opInvalidOp. There is no more work		/* FS can only be opOK or opInvalidOp. There is no more work
to do in the latter case. The IEEE-754R standard says it is		to do in the latter case. The IEEE-754R standard says it is
implementation-defined in this case whether, if ADDEND is a		implementation-defined in this case whether, if ADDEND is a
quiet NaN, we raise invalid op; this implementation does so.		quiet NaN, we raise invalid op; this implementation does so.

▲ Show 20 Lines • Show All 259 Lines • ▼ Show 20 Lines	IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
} else if (category == fcNaN) {		} else if (category == fcNaN) {
if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {		if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
*losesInfo =		*losesInfo =
fromSemantics.nonFiniteBehavior != fltNonfiniteBehavior::NanOnly;		fromSemantics.nonFiniteBehavior != fltNonfiniteBehavior::NanOnly;
makeNaN(false, sign);		makeNaN(false, sign);
return is_signaling ? opInvalidOp : opOK;		return is_signaling ? opInvalidOp : opOK;
}		}

		// If NaN is negative zero, we need to create a new NaN to avoid converting
		// NaN to -Inf.
		if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero &&
		semantics->nanEncoding != fltNanEncoding::NegativeZero)
		makeNaN(false, false);

*losesInfo = lostFraction != lfExactlyZero \|\| X86SpecialNan;		*losesInfo = lostFraction != lfExactlyZero \|\| X86SpecialNan;

// For x87 extended precision, we want to make a NaN, not a special NaN if		// For x87 extended precision, we want to make a NaN, not a special NaN if
// the input wasn't special either.		// the input wasn't special either.
if (!X86SpecialNan && semantics == &semX87DoubleExtended)		if (!X86SpecialNan && semantics == &semX87DoubleExtended)
APInt::tcSetBit(significandParts(), semantics->precision - 1);		APInt::tcSetBit(significandParts(), semantics->precision - 1);

// Convert of sNaN creates qNaN and raises an exception (invalid op).		// Convert of sNaN creates qNaN and raises an exception (invalid op).
// This also guarantees that a sNaN does not become Inf on a truncation		// This also guarantees that a sNaN does not become Inf on a truncation
// that loses all payload bits.		// that loses all payload bits.
if (is_signaling) {		if (is_signaling) {
makeQuiet();		makeQuiet();
fs = opInvalidOp;		fs = opInvalidOp;
} else {		} else {
fs = opOK;		fs = opOK;
}		}
} else if (category == fcInfinity &&		} else if (category == fcInfinity &&
semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {		semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
makeNaN(false, sign);		makeNaN(false, sign);
*losesInfo = true;		*losesInfo = true;
fs = opInexact;		fs = opInexact;
		} else if (category == fcZero &&
		semantics->nanEncoding == fltNanEncoding::NegativeZero) {
		// Negative zero loses info, but positive zero doesn't.
		reedwmUnsubmitted Not Done Reply Inline Actions Don't we only lose info if the source of the conversion is -0? reedwm: Don't we only lose info if the source of the conversion is -0?
		krzysz00AuthorUnsubmitted Done Reply Inline Actions I don't think so? If I understand `losesInfo` correctly, it's for (non-rounding-related?) cases where, for `x : T`, `convert(T, convert(U, x)) != x` is possible. So since both +0 and -0 map to +0, we've lost information. krzysz00: I don't think so? If I understand `losesInfo` correctly, it's for (non-rounding-related?) cases…
		reedwmUnsubmitted Not Done Reply Inline Actions I don't think so? If I understand `losesInfo` correctly, it's for (non-rounding-related?) cases where, for `x : T`, `convert(T, convert(U, x)) != x` is possible. By that logic, `losesInfo` should be false if the source of the conversion is +0. E.g., in your equation `convert(T, convert(U, x)) != x`, if x is the FP32 value +0, T is FP32, and U is FP8, then the equation is false, since converting +0 to FP8 and back results in the FP32 value +0. So `losesInfo` should be false in that case. On the other hand, if x is the FP32 value -0, the equation is true, since converting -0 to FP8 and back results in the FP32 value +0, and so `losesInfo` should be true. reedwm: > I don't think so? If I understand `losesInfo` correctly, it's for (non-rounding-related?)…
		krzysz00AuthorUnsubmitted Done Reply Inline Actions Ok, to rephrase: we know that converting f32 `Inf` or `NaN` to any of the FN APFloat formats loses info, because there's two distinct concepts that get collapsed down to one value. Similarly, in our formats, putting in a 0 from a format that isn't unsigned-zero-only also loses info, as now +0 and -0 have been collapsed down to one value and you can't tell which one you had on the way back out. krzysz00: Ok, to rephrase: we know that converting f32 `Inf` or `NaN` to any of the FN APFloat formats…
		krzysz00AuthorUnsubmitted Done Reply Inline Actions Ok, having looked at the comment, on the function again you were right, +0 should not lose info but -0 should. krzysz00: Ok, having looked at the comment, on the function again you were right, +0 should not lose info…
		*losesInfo =
		fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign;
		fs = *losesInfo ? opInexact : opOK;
		// NaN is negative zero means -0 -> +0, which can lose information
		sign = false;
} else {		} else {
*losesInfo = false;		*losesInfo = false;
fs = opOK;		fs = opOK;
}		}

return fs;		return fs;
}		}

▲ Show 20 Lines • Show All 451 Lines • ▼ Show 20 Lines	IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
//		//
// We computed firstSigDigit by ignoring all zeros and dots. Thus if		// We computed firstSigDigit by ignoring all zeros and dots. Thus if
// D->firstSigDigit equals str.end(), every digit must be a zero and there can		// D->firstSigDigit equals str.end(), every digit must be a zero and there can
// be at most one dot. On the other hand, if we have a zero with a non-zero		// be at most one dot. On the other hand, if we have a zero with a non-zero
// exponent, then we know that D.firstSigDigit will be non-numeric.		// exponent, then we know that D.firstSigDigit will be non-numeric.
if (D.firstSigDigit == str.end() \|\| decDigitValue(*D.firstSigDigit) >= 10U) {		if (D.firstSigDigit == str.end() \|\| decDigitValue(*D.firstSigDigit) >= 10U) {
category = fcZero;		category = fcZero;
fs = opOK;		fs = opOK;
		if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
		sign = false;

/* Check whether the normalized exponent is high enough to overflow		/* Check whether the normalized exponent is high enough to overflow
max during the log-rebasing in the max-exponent check below. */		max during the log-rebasing in the max-exponent check below. */
} else if (D.normalizedExponent - 1 > INT_MAX / 42039) {		} else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
fs = handleOverflow(rounding_mode);		fs = handleOverflow(rounding_mode);

/* If it wasn't, then it also wasn't high enough to overflow max		/* If it wasn't, then it also wasn't high enough to overflow max
during the log-rebasing in the min-exponent check. Check that it		during the log-rebasing in the min-exponent check. Check that it
won't overflow min in either check, then perform the min-exponent		won't overflow min in either check, then perform the min-exponent
check. */		check. */
} else if (D.normalizedExponent - 1 < INT_MIN / 42039 \|\|		} else if (D.normalizedExponent - 1 < INT_MIN / 42039 \|\|
▲ Show 20 Lines • Show All 611 Lines • ▼ Show 20 Lines	if (isFiniteNonZero()) {
myexponent = 0x1f;		myexponent = 0x1f;
mysignificand = (uint32_t)*significandParts();		mysignificand = (uint32_t)*significandParts();
}		}

return APInt(8, (((sign & 1) << 7) \| ((myexponent & 0x1f) << 2) \|		return APInt(8, (((sign & 1) << 7) \| ((myexponent & 0x1f) << 2) \|
(mysignificand & 0x3)));		(mysignificand & 0x3)));
}		}

		APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const {
		assert(semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ);
		assert(partCount() == 1);

		uint32_t myexponent, mysignificand;

		if (isFiniteNonZero()) {
		myexponent = exponent + 16; // bias
		mysignificand = (uint32_t)*significandParts();
		if (myexponent == 1 && !(mysignificand & 0x4))
		myexponent = 0; // denormal
		} else if (category == fcZero) {
		myexponent = 0;
		mysignificand = 0;
		} else if (category == fcInfinity) {
		myexponent = 0;
		mysignificand = 0;
		} else {
		assert(category == fcNaN && "Unknown category!");
		myexponent = 0;
		mysignificand = (uint32_t)*significandParts();
		}

		return APInt(8, (((sign & 1) << 7) \| ((myexponent & 0x1f) << 2) \|
		(mysignificand & 0x3)));
		}

APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {		APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
assert(semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN);		assert(semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN);
assert(partCount() == 1);		assert(partCount() == 1);

uint32_t myexponent, mysignificand;		uint32_t myexponent, mysignificand;

if (isFiniteNonZero()) {		if (isFiniteNonZero()) {
myexponent = exponent + 7; // bias		myexponent = exponent + 7; // bias
Show All 11 Lines	if (isFiniteNonZero()) {
myexponent = 0xf;		myexponent = 0xf;
mysignificand = (uint32_t)*significandParts();		mysignificand = (uint32_t)*significandParts();
}		}

return APInt(8, (((sign & 1) << 7) \| ((myexponent & 0xf) << 3) \|		return APInt(8, (((sign & 1) << 7) \| ((myexponent & 0xf) << 3) \|
(mysignificand & 0x7)));		(mysignificand & 0x7)));
}		}

		APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const {
		assert(semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ);
		assert(partCount() == 1);

		uint32_t myexponent, mysignificand;

		if (isFiniteNonZero()) {
		myexponent = exponent + 8; // bias
		mysignificand = (uint32_t)*significandParts();
		if (myexponent == 1 && !(mysignificand & 0x8))
		myexponent = 0; // denormal
		} else if (category == fcZero) {
		myexponent = 0;
		mysignificand = 0;
		} else if (category == fcInfinity) {
		myexponent = 0;
		mysignificand = 0;
		} else {
		assert(category == fcNaN && "Unknown category!");
		myexponent = 0;
		mysignificand = (uint32_t)*significandParts();
		}

		return APInt(8, (((sign & 1) << 7) \| ((myexponent & 0xf) << 3) \|
		(mysignificand & 0x7)));
		}

// This function creates an APInt that is just a bit map of the floating		// This function creates an APInt that is just a bit map of the floating
// point constant as it would appear in memory. It is not a conversion,		// point constant as it would appear in memory. It is not a conversion,
// and treating the result as a normal integer is unlikely to be useful.		// and treating the result as a normal integer is unlikely to be useful.

APInt IEEEFloat::bitcastToAPInt() const {		APInt IEEEFloat::bitcastToAPInt() const {
if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)		if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)
return convertHalfAPFloatToAPInt();		return convertHalfAPFloatToAPInt();

Show All 10 Lines	if (semantics == (const llvm::fltSemantics*)&semIEEEquad)
return convertQuadrupleAPFloatToAPInt();		return convertQuadrupleAPFloatToAPInt();

if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy)		if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy)
return convertPPCDoubleDoubleAPFloatToAPInt();		return convertPPCDoubleDoubleAPFloatToAPInt();

if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2)		if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2)
return convertFloat8E5M2APFloatToAPInt();		return convertFloat8E5M2APFloatToAPInt();

		if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ)
		return convertFloat8E5M2FNUZAPFloatToAPInt();

if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN)		if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN)
return convertFloat8E4M3FNAPFloatToAPInt();		return convertFloat8E4M3FNAPFloatToAPInt();

		if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ)
		return convertFloat8E4M3FNUZAPFloatToAPInt();

assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&		assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
"unknown format!");		"unknown format!");
return convertF80LongDoubleAPFloatToAPInt();		return convertF80LongDoubleAPFloatToAPInt();
}		}

float IEEEFloat::convertToFloat() const {		float IEEEFloat::convertToFloat() const {
assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&		assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&
"Float semantics are not IEEEsingle");		"Float semantics are not IEEEsingle");
▲ Show 20 Lines • Show All 239 Lines • ▼ Show 20 Lines	if (myexponent == 0 && mysignificand == 0) {
*significandParts() = mysignificand;		*significandParts() = mysignificand;
if (myexponent == 0) // denormal		if (myexponent == 0) // denormal
exponent = -14;		exponent = -14;
else		else
*significandParts() \|= 0x4; // integer bit		*significandParts() \|= 0x4; // integer bit
}		}
}		}

		void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) {
		uint32_t i = (uint32_t)*api.getRawData();
		uint32_t myexponent = (i >> 2) & 0x1f;
		uint32_t mysignificand = i & 0x3;

		initialize(&semFloat8E5M2FNUZ);
		assert(partCount() == 1);

		sign = i >> 7;
		if (myexponent == 0 && mysignificand == 0 && sign == 0) {
		makeZero(sign);
		} else if (myexponent == 0 && mysignificand == 0 && sign == 1) {
		category = fcNaN;
		exponent = exponentNaN();
		*significandParts() = mysignificand;
		} else {
		category = fcNormal;
		exponent = myexponent - 16; // bias
		*significandParts() = mysignificand;
		if (myexponent == 0) // denormal
		exponent = -15;
		else
		*significandParts() \|= 0x4; // integer bit
		}
		}

void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {		void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {
uint32_t i = (uint32_t)*api.getRawData();		uint32_t i = (uint32_t)*api.getRawData();
uint32_t myexponent = (i >> 3) & 0xf;		uint32_t myexponent = (i >> 3) & 0xf;
uint32_t mysignificand = i & 0x7;		uint32_t mysignificand = i & 0x7;

initialize(&semFloat8E4M3FN);		initialize(&semFloat8E4M3FN);
assert(partCount() == 1);		assert(partCount() == 1);

Show All 10 Lines	if (myexponent == 0 && mysignificand == 0) {
*significandParts() = mysignificand;		*significandParts() = mysignificand;
if (myexponent == 0) // denormal		if (myexponent == 0) // denormal
exponent = -6;		exponent = -6;
else		else
*significandParts() \|= 0x8; // integer bit		*significandParts() \|= 0x8; // integer bit
}		}
}		}

		void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) {
		uint32_t i = (uint32_t)*api.getRawData();
		uint32_t myexponent = (i >> 3) & 0xf;
		uint32_t mysignificand = i & 0x7;

		initialize(&semFloat8E4M3FNUZ);
		assert(partCount() == 1);

		sign = i >> 7;
		if (myexponent == 0 && mysignificand == 0 && sign == 0) {
		makeZero(sign);
		} else if (myexponent == 0 && mysignificand == 0 && sign == 1) {
		category = fcNaN;
		exponent = exponentNaN();
		*significandParts() = mysignificand;
		} else {
		category = fcNormal;
		exponent = myexponent - 8; // bias
		*significandParts() = mysignificand;
		if (myexponent == 0) // denormal
		exponent = -7;
		else
		*significandParts() \|= 0x8; // integer bit
		}
		}

/// Treat api as containing the bits of a floating point number.		/// Treat api as containing the bits of a floating point number.
void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {		void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
assert(api.getBitWidth() == Sem->sizeInBits);		assert(api.getBitWidth() == Sem->sizeInBits);
if (Sem == &semIEEEhalf)		if (Sem == &semIEEEhalf)
return initFromHalfAPInt(api);		return initFromHalfAPInt(api);
if (Sem == &semBFloat)		if (Sem == &semBFloat)
return initFromBFloatAPInt(api);		return initFromBFloatAPInt(api);
if (Sem == &semIEEEsingle)		if (Sem == &semIEEEsingle)
return initFromFloatAPInt(api);		return initFromFloatAPInt(api);
if (Sem == &semIEEEdouble)		if (Sem == &semIEEEdouble)
return initFromDoubleAPInt(api);		return initFromDoubleAPInt(api);
if (Sem == &semX87DoubleExtended)		if (Sem == &semX87DoubleExtended)
return initFromF80LongDoubleAPInt(api);		return initFromF80LongDoubleAPInt(api);
if (Sem == &semIEEEquad)		if (Sem == &semIEEEquad)
return initFromQuadrupleAPInt(api);		return initFromQuadrupleAPInt(api);
if (Sem == &semPPCDoubleDoubleLegacy)		if (Sem == &semPPCDoubleDoubleLegacy)
return initFromPPCDoubleDoubleAPInt(api);		return initFromPPCDoubleDoubleAPInt(api);
if (Sem == &semFloat8E5M2)		if (Sem == &semFloat8E5M2)
return initFromFloat8E5M2APInt(api);		return initFromFloat8E5M2APInt(api);
		if (Sem == &semFloat8E5M2FNUZ)
		return initFromFloat8E5M2FNUZAPInt(api);
if (Sem == &semFloat8E4M3FN)		if (Sem == &semFloat8E4M3FN)
return initFromFloat8E4M3FNAPInt(api);		return initFromFloat8E4M3FNAPInt(api);
		if (Sem == &semFloat8E4M3FNUZ)
		return initFromFloat8E4M3FNUZAPInt(api);

llvm_unreachable(nullptr);		llvm_unreachable(nullptr);
}		}

/// Make this number the largest magnitude normal number in the given		/// Make this number the largest magnitude normal number in the given
/// semantics.		/// semantics.
void IEEEFloat::makeLargest(bool Negative) {		void IEEEFloat::makeLargest(bool Negative) {
// We want (in interchange format):		// We want (in interchange format):
Show All 12 Lines	void IEEEFloat::makeLargest(bool Negative) {
// Set the high integerPart especially setting all unused top bits for		// Set the high integerPart especially setting all unused top bits for
// internal consistency.		// internal consistency.
const unsigned NumUnusedHighBits =		const unsigned NumUnusedHighBits =
PartCount*integerPartWidth - semantics->precision;		PartCount*integerPartWidth - semantics->precision;
significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)		significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
? (~integerPart(0) >> NumUnusedHighBits)		? (~integerPart(0) >> NumUnusedHighBits)
: 0;		: 0;

if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)		if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
		semantics->nanEncoding == fltNanEncoding::AllOnes)
significand[0] &= ~integerPart(1);		significand[0] &= ~integerPart(1);
}		}

/// Make this number the smallest magnitude denormal number in the given		/// Make this number the smallest magnitude denormal number in the given
/// semantics.		/// semantics.
void IEEEFloat::makeSmallest(bool Negative) {		void IEEEFloat::makeSmallest(bool Negative) {
// We want (in interchange format):		// We want (in interchange format):
// sign = {Negative}		// sign = {Negative}
▲ Show 20 Lines • Show All 411 Lines • ▼ Show 20 Lines	case fcZero:
makeSmallest(false);		makeSmallest(false);
break;		break;
case fcNormal:		case fcNormal:
// nextUp(-getSmallest()) = -0		// nextUp(-getSmallest()) = -0
if (isSmallest() && isNegative()) {		if (isSmallest() && isNegative()) {
APInt::tcSet(significandParts(), 0, partCount());		APInt::tcSet(significandParts(), 0, partCount());
category = fcZero;		category = fcZero;
exponent = 0;		exponent = 0;
		if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
		sign = false;
break;		break;
}		}

if (isLargest() && !isNegative()) {		if (isLargest() && !isNegative()) {
if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {		if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
// nextUp(getLargest()) == NAN		// nextUp(getLargest()) == NAN
makeNaN();		makeNaN();
break;		break;
▲ Show 20 Lines • Show All 70 Lines • ▼ Show 20 Lines	IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
// If we are performing nextDown, swap sign so we have -nextUp(-x)		// If we are performing nextDown, swap sign so we have -nextUp(-x)
if (nextDown)		if (nextDown)
changeSign();		changeSign();

return result;		return result;
}		}

APFloatBase::ExponentType IEEEFloat::exponentNaN() const {		APFloatBase::ExponentType IEEEFloat::exponentNaN() const {
if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)		if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
		if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
		return semantics->minExponent;
return semantics->maxExponent;		return semantics->maxExponent;
		}
return semantics->maxExponent + 1;		return semantics->maxExponent + 1;
}		}

APFloatBase::ExponentType IEEEFloat::exponentInf() const {		APFloatBase::ExponentType IEEEFloat::exponentInf() const {
return semantics->maxExponent + 1;		return semantics->maxExponent + 1;
}		}

APFloatBase::ExponentType IEEEFloat::exponentZero() const {		APFloatBase::ExponentType IEEEFloat::exponentZero() const {
Show All 10 Lines	void IEEEFloat::makeInf(bool Negative) {
sign = Negative;		sign = Negative;
exponent = exponentInf();		exponent = exponentInf();
APInt::tcSet(significandParts(), 0, partCount());		APInt::tcSet(significandParts(), 0, partCount());
}		}

void IEEEFloat::makeZero(bool Negative) {		void IEEEFloat::makeZero(bool Negative) {
category = fcZero;		category = fcZero;
sign = Negative;		sign = Negative;
		if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
		// Merge negative zero to positive because 0b10000...000 is used for NaN
		sign = false;
		}
exponent = exponentZero();		exponent = exponentZero();
APInt::tcSet(significandParts(), 0, partCount());		APInt::tcSet(significandParts(), 0, partCount());
}		}

void IEEEFloat::makeQuiet() {		void IEEEFloat::makeQuiet() {
assert(isNaN());		assert(isNaN());
if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly)		if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly)
APInt::tcSetBit(significandParts(), semantics->precision - 2);		APInt::tcSetBit(significandParts(), semantics->precision - 2);
▲ Show 20 Lines • Show All 759 Lines • Show Last 20 Lines

llvm/unittests/ADT/APFloatTest.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

//===- llvm/unittest/ADT/APFloat.cpp - APFloat unit tests ---------------------===//		//===- llvm/unittest/ADT/APFloat.cpp - APFloat unit tests ---------------------===//
//		//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.		// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.		// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception		// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

#include "llvm/ADT/APFloat.h"		#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APSInt.h"		#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/Hashing.h"		#include "llvm/ADT/Hashing.h"
		#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"		#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Error.h"		#include "llvm/Support/Error.h"
#include "llvm/Support/FormatVariadic.h"		#include "llvm/Support/FormatVariadic.h"
#include "gtest/gtest.h"		#include "gtest/gtest.h"
#include <cmath>		#include <cmath>
#include <ostream>		#include <ostream>
#include <string>		#include <string>
#include <tuple>		#include <tuple>
▲ Show 20 Lines • Show All 1,266 Lines • ▼ Show 20 Lines
TEST(APFloatTest, makeNaN) {		TEST(APFloatTest, makeNaN) {
const struct {		const struct {
uint64_t expected;		uint64_t expected;
const fltSemantics &semantics;		const fltSemantics &semantics;
bool SNaN;		bool SNaN;
bool Negative;		bool Negative;
uint64_t payload;		uint64_t payload;
} tests[] = {		} tests[] = {
		// clang-format off
/* expected semantics SNaN Neg payload */		/* expected semantics SNaN Neg payload */
{ 0x7fc00000ULL, APFloat::IEEEsingle(), false, false, 0x00000000ULL },		{ 0x7fc00000ULL, APFloat::IEEEsingle(), false, false, 0x00000000ULL },
{ 0xffc00000ULL, APFloat::IEEEsingle(), false, true, 0x00000000ULL },		{ 0xffc00000ULL, APFloat::IEEEsingle(), false, true, 0x00000000ULL },
{ 0x7fc0ae72ULL, APFloat::IEEEsingle(), false, false, 0x0000ae72ULL },		{ 0x7fc0ae72ULL, APFloat::IEEEsingle(), false, false, 0x0000ae72ULL },
{ 0x7fffae72ULL, APFloat::IEEEsingle(), false, false, 0xffffae72ULL },		{ 0x7fffae72ULL, APFloat::IEEEsingle(), false, false, 0xffffae72ULL },
{ 0x7fdaae72ULL, APFloat::IEEEsingle(), false, false, 0x00daae72ULL },		{ 0x7fdaae72ULL, APFloat::IEEEsingle(), false, false, 0x00daae72ULL },
{ 0x7fa00000ULL, APFloat::IEEEsingle(), true, false, 0x00000000ULL },		{ 0x7fa00000ULL, APFloat::IEEEsingle(), true, false, 0x00000000ULL },
{ 0xffa00000ULL, APFloat::IEEEsingle(), true, true, 0x00000000ULL },		{ 0xffa00000ULL, APFloat::IEEEsingle(), true, true, 0x00000000ULL },
{ 0x7f80ae72ULL, APFloat::IEEEsingle(), true, false, 0x0000ae72ULL },		{ 0x7f80ae72ULL, APFloat::IEEEsingle(), true, false, 0x0000ae72ULL },
{ 0x7fbfae72ULL, APFloat::IEEEsingle(), true, false, 0xffffae72ULL },		{ 0x7fbfae72ULL, APFloat::IEEEsingle(), true, false, 0xffffae72ULL },
{ 0x7f9aae72ULL, APFloat::IEEEsingle(), true, false, 0x001aae72ULL },		{ 0x7f9aae72ULL, APFloat::IEEEsingle(), true, false, 0x001aae72ULL },
{ 0x7ff8000000000000ULL, APFloat::IEEEdouble(), false, false, 0x0000000000000000ULL },		{ 0x7ff8000000000000ULL, APFloat::IEEEdouble(), false, false, 0x0000000000000000ULL },
{ 0xfff8000000000000ULL, APFloat::IEEEdouble(), false, true, 0x0000000000000000ULL },		{ 0xfff8000000000000ULL, APFloat::IEEEdouble(), false, true, 0x0000000000000000ULL },
{ 0x7ff800000000ae72ULL, APFloat::IEEEdouble(), false, false, 0x000000000000ae72ULL },		{ 0x7ff800000000ae72ULL, APFloat::IEEEdouble(), false, false, 0x000000000000ae72ULL },
{ 0x7fffffffffffae72ULL, APFloat::IEEEdouble(), false, false, 0xffffffffffffae72ULL },		{ 0x7fffffffffffae72ULL, APFloat::IEEEdouble(), false, false, 0xffffffffffffae72ULL },
{ 0x7ffdaaaaaaaaae72ULL, APFloat::IEEEdouble(), false, false, 0x000daaaaaaaaae72ULL },		{ 0x7ffdaaaaaaaaae72ULL, APFloat::IEEEdouble(), false, false, 0x000daaaaaaaaae72ULL },
{ 0x7ff4000000000000ULL, APFloat::IEEEdouble(), true, false, 0x0000000000000000ULL },		{ 0x7ff4000000000000ULL, APFloat::IEEEdouble(), true, false, 0x0000000000000000ULL },
{ 0xfff4000000000000ULL, APFloat::IEEEdouble(), true, true, 0x0000000000000000ULL },		{ 0xfff4000000000000ULL, APFloat::IEEEdouble(), true, true, 0x0000000000000000ULL },
{ 0x7ff000000000ae72ULL, APFloat::IEEEdouble(), true, false, 0x000000000000ae72ULL },		{ 0x7ff000000000ae72ULL, APFloat::IEEEdouble(), true, false, 0x000000000000ae72ULL },
{ 0x7ff7ffffffffae72ULL, APFloat::IEEEdouble(), true, false, 0xffffffffffffae72ULL },		{ 0x7ff7ffffffffae72ULL, APFloat::IEEEdouble(), true, false, 0xffffffffffffae72ULL },
{ 0x7ff1aaaaaaaaae72ULL, APFloat::IEEEdouble(), true, false, 0x0001aaaaaaaaae72ULL },		{ 0x7ff1aaaaaaaaae72ULL, APFloat::IEEEdouble(), true, false, 0x0001aaaaaaaaae72ULL },
		{ 0x80ULL, APFloat::Float8E5M2FNUZ(), false, false, 0xaaULL },
		{ 0x80ULL, APFloat::Float8E5M2FNUZ(), false, true, 0xaaULL },
		{ 0x80ULL, APFloat::Float8E5M2FNUZ(), true, false, 0xaaULL },
		{ 0x80ULL, APFloat::Float8E5M2FNUZ(), true, true, 0xaaULL },
		{ 0x80ULL, APFloat::Float8E4M3FNUZ(), false, false, 0xaaULL },
		{ 0x80ULL, APFloat::Float8E4M3FNUZ(), false, true, 0xaaULL },
		{ 0x80ULL, APFloat::Float8E4M3FNUZ(), true, false, 0xaaULL },
		{ 0x80ULL, APFloat::Float8E4M3FNUZ(), true, true, 0xaaULL },
		// clang-format on
};		};

for (const auto &t : tests) {		for (const auto &t : tests) {
ASSERT_EQ(t.expected, nanbitsFromAPInt(t.semantics, t.SNaN, t.Negative, t.payload));		ASSERT_EQ(t.expected, nanbitsFromAPInt(t.semantics, t.SNaN, t.Negative, t.payload));
}		}
}		}

#ifdef GTEST_HAS_DEATH_TEST		#ifdef GTEST_HAS_DEATH_TEST
▲ Show 20 Lines • Show All 407 Lines • ▼ Show 20 Lines	llvm::detail::DoubleAPFloat T3(APFloat::PPCDoubleDouble(), std::move(F5),
std::move(F6));		std::move(F6));
EXPECT_FALSE(T3.isInteger());		EXPECT_FALSE(T3.isInteger());
}		}

TEST(APFloatTest, getLargest) {		TEST(APFloatTest, getLargest) {
EXPECT_EQ(3.402823466e+38f, APFloat::getLargest(APFloat::IEEEsingle()).convertToFloat());		EXPECT_EQ(3.402823466e+38f, APFloat::getLargest(APFloat::IEEEsingle()).convertToFloat());
EXPECT_EQ(1.7976931348623158e+308, APFloat::getLargest(APFloat::IEEEdouble()).convertToDouble());		EXPECT_EQ(1.7976931348623158e+308, APFloat::getLargest(APFloat::IEEEdouble()).convertToDouble());
EXPECT_EQ(448, APFloat::getLargest(APFloat::Float8E4M3FN()).convertToDouble());		EXPECT_EQ(448, APFloat::getLargest(APFloat::Float8E4M3FN()).convertToDouble());
		EXPECT_EQ(240,
		APFloat::getLargest(APFloat::Float8E4M3FNUZ()).convertToDouble());
		EXPECT_EQ(57344,
		APFloat::getLargest(APFloat::Float8E5M2FNUZ()).convertToDouble());
}		}

TEST(APFloatTest, getSmallest) {		TEST(APFloatTest, getSmallest) {
APFloat test = APFloat::getSmallest(APFloat::IEEEsingle(), false);		APFloat test = APFloat::getSmallest(APFloat::IEEEsingle(), false);
APFloat expected = APFloat(APFloat::IEEEsingle(), "0x0.000002p-126");		APFloat expected = APFloat(APFloat::IEEEsingle(), "0x0.000002p-126");
EXPECT_FALSE(test.isNegative());		EXPECT_FALSE(test.isNegative());
EXPECT_TRUE(test.isFiniteNonZero());		EXPECT_TRUE(test.isFiniteNonZero());
EXPECT_TRUE(test.isDenormal());		EXPECT_TRUE(test.isDenormal());
Show All 14 Lines	TEST(APFloatTest, getSmallest) {
EXPECT_TRUE(test.bitwiseIsEqual(expected));		EXPECT_TRUE(test.bitwiseIsEqual(expected));

test = APFloat::getSmallest(APFloat::IEEEquad(), true);		test = APFloat::getSmallest(APFloat::IEEEquad(), true);
expected = APFloat(APFloat::IEEEquad(), "-0x0.0000000000000000000000000001p-16382");		expected = APFloat(APFloat::IEEEquad(), "-0x0.0000000000000000000000000001p-16382");
EXPECT_TRUE(test.isNegative());		EXPECT_TRUE(test.isNegative());
EXPECT_TRUE(test.isFiniteNonZero());		EXPECT_TRUE(test.isFiniteNonZero());
EXPECT_TRUE(test.isDenormal());		EXPECT_TRUE(test.isDenormal());
EXPECT_TRUE(test.bitwiseIsEqual(expected));		EXPECT_TRUE(test.bitwiseIsEqual(expected));

		test = APFloat::getSmallest(APFloat::Float8E5M2FNUZ(), false);
		expected = APFloat(APFloat::Float8E5M2FNUZ(), "0x0.4p-15");
		EXPECT_FALSE(test.isNegative());
		EXPECT_TRUE(test.isFiniteNonZero());
		EXPECT_TRUE(test.isDenormal());
		EXPECT_TRUE(test.bitwiseIsEqual(expected));

		test = APFloat::getSmallest(APFloat::Float8E4M3FNUZ(), false);
		expected = APFloat(APFloat::Float8E4M3FNUZ(), "0x0.2p-7");
		EXPECT_FALSE(test.isNegative());
		EXPECT_TRUE(test.isFiniteNonZero());
		EXPECT_TRUE(test.isDenormal());
		EXPECT_TRUE(test.bitwiseIsEqual(expected));
}		}

TEST(APFloatTest, getSmallestNormalized) {		TEST(APFloatTest, getSmallestNormalized) {
APFloat test = APFloat::getSmallestNormalized(APFloat::IEEEsingle(), false);		APFloat test = APFloat::getSmallestNormalized(APFloat::IEEEsingle(), false);
APFloat expected = APFloat(APFloat::IEEEsingle(), "0x1p-126");		APFloat expected = APFloat(APFloat::IEEEsingle(), "0x1p-126");
EXPECT_FALSE(test.isNegative());		EXPECT_FALSE(test.isNegative());
EXPECT_TRUE(test.isFiniteNonZero());		EXPECT_TRUE(test.isFiniteNonZero());
EXPECT_FALSE(test.isDenormal());		EXPECT_FALSE(test.isDenormal());
Show All 34 Lines	TEST(APFloatTest, getSmallestNormalized) {

test = APFloat::getSmallestNormalized(APFloat::IEEEquad(), true);		test = APFloat::getSmallestNormalized(APFloat::IEEEquad(), true);
expected = APFloat(APFloat::IEEEquad(), "-0x1p-16382");		expected = APFloat(APFloat::IEEEquad(), "-0x1p-16382");
EXPECT_TRUE(test.isNegative());		EXPECT_TRUE(test.isNegative());
EXPECT_TRUE(test.isFiniteNonZero());		EXPECT_TRUE(test.isFiniteNonZero());
EXPECT_FALSE(test.isDenormal());		EXPECT_FALSE(test.isDenormal());
EXPECT_TRUE(test.bitwiseIsEqual(expected));		EXPECT_TRUE(test.bitwiseIsEqual(expected));
EXPECT_TRUE(test.isSmallestNormalized());		EXPECT_TRUE(test.isSmallestNormalized());

		test = APFloat::getSmallestNormalized(APFloat::Float8E5M2FNUZ(), false);
		expected = APFloat(APFloat::Float8E5M2FNUZ(), "0x1.0p-15");
		EXPECT_FALSE(test.isNegative());
		EXPECT_TRUE(test.isFiniteNonZero());
		EXPECT_FALSE(test.isDenormal());
		EXPECT_TRUE(test.bitwiseIsEqual(expected));
		EXPECT_TRUE(test.isSmallestNormalized());

		test = APFloat::getSmallestNormalized(APFloat::Float8E4M3FNUZ(), false);
		expected = APFloat(APFloat::Float8E4M3FNUZ(), "0x1.0p-7");
		EXPECT_FALSE(test.isNegative());
		EXPECT_TRUE(test.isFiniteNonZero());
		EXPECT_FALSE(test.isDenormal());
		EXPECT_TRUE(test.bitwiseIsEqual(expected));
		EXPECT_TRUE(test.isSmallestNormalized());
}		}

TEST(APFloatTest, getZero) {		TEST(APFloatTest, getZero) {
struct {		struct {
const fltSemantics *semantics;		const fltSemantics *semantics;
const bool sign;		const bool sign;
		const bool signedZero;
const unsigned long long bitPattern[2];		const unsigned long long bitPattern[2];
const unsigned bitPatternLength;		const unsigned bitPatternLength;
		reedwmUnsubmitted Done Reply Inline Actions What does this TODO mean? reedwm: What does this TODO mean?
		krzysz00AuthorUnsubmitted Done Reply Inline Actions Stray comment that was a note about adding what's currently `const bool signedZero;` above. krzysz00: Stray comment that was a note about adding what's currently `const bool signedZero;` above.
} const GetZeroTest[] = {		} const GetZeroTest[] = {
{&APFloat::IEEEhalf(), false, {0, 0}, 1},		{&APFloat::IEEEhalf(), false, true, {0, 0}, 1},
{&APFloat::IEEEhalf(), true, {0x8000ULL, 0}, 1},		{&APFloat::IEEEhalf(), true, true, {0x8000ULL, 0}, 1},
{&APFloat::IEEEsingle(), false, {0, 0}, 1},		{&APFloat::IEEEsingle(), false, true, {0, 0}, 1},
{&APFloat::IEEEsingle(), true, {0x80000000ULL, 0}, 1},		{&APFloat::IEEEsingle(), true, true, {0x80000000ULL, 0}, 1},
{&APFloat::IEEEdouble(), false, {0, 0}, 1},		{&APFloat::IEEEdouble(), false, true, {0, 0}, 1},
{&APFloat::IEEEdouble(), true, {0x8000000000000000ULL, 0}, 1},		{&APFloat::IEEEdouble(), true, true, {0x8000000000000000ULL, 0}, 1},
{&APFloat::IEEEquad(), false, {0, 0}, 2},		{&APFloat::IEEEquad(), false, true, {0, 0}, 2},
{&APFloat::IEEEquad(), true, {0, 0x8000000000000000ULL}, 2},		{&APFloat::IEEEquad(), true, true, {0, 0x8000000000000000ULL}, 2},
{&APFloat::PPCDoubleDouble(), false, {0, 0}, 2},		{&APFloat::PPCDoubleDouble(), false, true, {0, 0}, 2},
{&APFloat::PPCDoubleDouble(), true, {0x8000000000000000ULL, 0}, 2},		{&APFloat::PPCDoubleDouble(), true, true, {0x8000000000000000ULL, 0}, 2},
{&APFloat::x87DoubleExtended(), false, {0, 0}, 2},		{&APFloat::x87DoubleExtended(), false, true, {0, 0}, 2},
{&APFloat::x87DoubleExtended(), true, {0, 0x8000ULL}, 2},		{&APFloat::x87DoubleExtended(), true, true, {0, 0x8000ULL}, 2},
{&APFloat::Float8E5M2(), false, {0, 0}, 1},		{&APFloat::Float8E5M2(), false, true, {0, 0}, 1},
{&APFloat::Float8E5M2(), true, {0x80ULL, 0}, 1},		{&APFloat::Float8E5M2(), true, true, {0x80ULL, 0}, 1},
{&APFloat::Float8E4M3FN(), false, {0, 0}, 1},		{&APFloat::Float8E5M2FNUZ(), false, false, {0, 0}, 1},
{&APFloat::Float8E4M3FN(), true, {0x80ULL, 0}, 1},		{&APFloat::Float8E5M2FNUZ(), true, false, {0, 0}, 1},
};		{&APFloat::Float8E4M3FN(), false, true, {0, 0}, 1},
const unsigned NumGetZeroTests = 12;		{&APFloat::Float8E4M3FN(), true, true, {0x80ULL, 0}, 1},
		{&APFloat::Float8E4M3FNUZ(), false, false, {0, 0}, 1},
		{&APFloat::Float8E4M3FNUZ(), true, false, {0, 0}, 1}};
		const unsigned NumGetZeroTests = std::size(GetZeroTest);
		jakeh-gcUnsubmitted Done Reply Inline Actions There are now 20 test cases, not 16. 12 wasn't a correct value for this before. My preference is to use `std::size` here to avoid these issues in future. jakeh-gc: There are now 20 test cases, not 16. 12 wasn't a correct value for this before. My preference…
for (unsigned i = 0; i < NumGetZeroTests; ++i) {		for (unsigned i = 0; i < NumGetZeroTests; ++i) {
APFloat test = APFloat::getZero(*GetZeroTest[i].semantics,		APFloat test = APFloat::getZero(*GetZeroTest[i].semantics,
GetZeroTest[i].sign);		GetZeroTest[i].sign);
const char *pattern = GetZeroTest[i].sign? "-0x0p+0" : "0x0p+0";		const char *pattern = GetZeroTest[i].sign? "-0x0p+0" : "0x0p+0";
APFloat expected = APFloat(*GetZeroTest[i].semantics,		APFloat expected = APFloat(*GetZeroTest[i].semantics,
pattern);		pattern);
EXPECT_TRUE(test.isZero());		EXPECT_TRUE(test.isZero());
		if (GetZeroTest[i].signedZero)
EXPECT_TRUE(GetZeroTest[i].sign? test.isNegative() : !test.isNegative());		EXPECT_TRUE(GetZeroTest[i].sign ? test.isNegative() : !test.isNegative());
		else
		EXPECT_TRUE(!test.isNegative());
EXPECT_TRUE(test.bitwiseIsEqual(expected));		EXPECT_TRUE(test.bitwiseIsEqual(expected));
for (unsigned j = 0, je = GetZeroTest[i].bitPatternLength; j < je; ++j) {		for (unsigned j = 0, je = GetZeroTest[i].bitPatternLength; j < je; ++j) {
EXPECT_EQ(GetZeroTest[i].bitPattern[j],		EXPECT_EQ(GetZeroTest[i].bitPattern[j],
test.bitcastToAPInt().getRawData()[j]);		test.bitcastToAPInt().getRawData()[j]);
}		}
}		}
}		}

TEST(APFloatTest, copySign) {		TEST(APFloatTest, copySign) {
EXPECT_TRUE(APFloat(-42.0).bitwiseIsEqual(		EXPECT_TRUE(APFloat(-42.0).bitwiseIsEqual(
APFloat::copySign(APFloat(42.0), APFloat(-1.0))));		APFloat::copySign(APFloat(42.0), APFloat(-1.0))));
EXPECT_TRUE(APFloat(42.0).bitwiseIsEqual(		EXPECT_TRUE(APFloat(42.0).bitwiseIsEqual(
APFloat::copySign(APFloat(-42.0), APFloat(1.0))));		APFloat::copySign(APFloat(-42.0), APFloat(1.0))));
EXPECT_TRUE(APFloat(-42.0).bitwiseIsEqual(		EXPECT_TRUE(APFloat(-42.0).bitwiseIsEqual(
APFloat::copySign(APFloat(-42.0), APFloat(-1.0))));		APFloat::copySign(APFloat(-42.0), APFloat(-1.0))));
EXPECT_TRUE(APFloat(42.0).bitwiseIsEqual(		EXPECT_TRUE(APFloat(42.0).bitwiseIsEqual(
APFloat::copySign(APFloat(42.0), APFloat(1.0))));		APFloat::copySign(APFloat(42.0), APFloat(1.0))));
		// For floating-point formats with unsigned 0, copySign() to a zero is a noop
		EXPECT_TRUE(
		APFloat::getZero(APFloat::Float8E4M3FNUZ())
		.bitwiseIsEqual(APFloat::copySign(
		APFloat::getZero(APFloat::Float8E4M3FNUZ()), APFloat(-1.0))));
		EXPECT_TRUE(
		APFloat::getNaN(APFloat::Float8E4M3FNUZ(), true)
		.bitwiseIsEqual(APFloat::copySign(
		APFloat::getNaN(APFloat::Float8E4M3FNUZ(), true), APFloat(1.0))));
}		}

TEST(APFloatTest, convert) {		TEST(APFloatTest, convert) {
bool losesInfo;		bool losesInfo;
APFloat test(APFloat::IEEEdouble(), "1.0");		APFloat test(APFloat::IEEEdouble(), "1.0");
test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo);		test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo);
EXPECT_EQ(1.0f, test.convertToFloat());		EXPECT_EQ(1.0f, test.convertToFloat());
EXPECT_FALSE(losesInfo);		EXPECT_FALSE(losesInfo);
▲ Show 20 Lines • Show All 96 Lines • ▼ Show 20 Lines	TEST(APFloatTest, convert) {
EXPECT_FALSE(losesInfo);		EXPECT_FALSE(losesInfo);

test = APFloat(APFloat::IEEEsingle(), "0x0.01p-126");		test = APFloat(APFloat::IEEEsingle(), "0x0.01p-126");
test.convert(APFloat::BFloat(), APFloat::rmNearestTiesToAway, &losesInfo);		test.convert(APFloat::BFloat(), APFloat::rmNearestTiesToAway, &losesInfo);
EXPECT_EQ(0x01, test.bitcastToAPInt());		EXPECT_EQ(0x01, test.bitcastToAPInt());
EXPECT_TRUE(losesInfo);		EXPECT_TRUE(losesInfo);
}		}

		TEST(APFloatTest, Float8UZConvert) {
		bool losesInfo = false;
		std::pair<APFloat, APFloat::opStatus> toNaNTests[] = {
		{APFloat::getQNaN(APFloat::IEEEsingle(), false), APFloat::opOK},
		{APFloat::getQNaN(APFloat::IEEEsingle(), true), APFloat::opOK},
		{APFloat::getSNaN(APFloat::IEEEsingle(), false), APFloat::opInvalidOp},
		{APFloat::getSNaN(APFloat::IEEEsingle(), true), APFloat::opInvalidOp},
		{APFloat::getInf(APFloat::IEEEsingle(), false), APFloat::opInexact},
		{APFloat::getInf(APFloat::IEEEsingle(), true), APFloat::opInexact}};
		for (auto [toTest, expectedRes] : toNaNTests) {
		llvm::SmallString<16> value;
		toTest.toString(value);
		SCOPED_TRACE("toTest = " + value);
		for (const fltSemantics *sem :
		{&APFloat::Float8E4M3FNUZ(), &APFloat::Float8E5M2FNUZ()}) {
		SCOPED_TRACE("Semantics = " +
		std::to_string(APFloat::SemanticsToEnum(*sem)));
		losesInfo = false;
		APFloat test = toTest;
		EXPECT_EQ(test.convert(*sem, APFloat::rmNearestTiesToAway, &losesInfo),
		expectedRes);
		EXPECT_TRUE(test.isNaN());
		EXPECT_TRUE(test.isNegative());
		EXPECT_FALSE(test.isSignaling());
		EXPECT_FALSE(test.isInfinity());
		EXPECT_EQ(0x80, test.bitcastToAPInt());
		EXPECT_TRUE(losesInfo);
		}
		}

		// Negative zero conversions are information losing.
		losesInfo = false;
		APFloat test = APFloat::getZero(APFloat::IEEEsingle(), true);
		EXPECT_EQ(test.convert(APFloat::Float8E5M2FNUZ(),
		APFloat::rmNearestTiesToAway, &losesInfo),
		APFloat::opInexact);
		EXPECT_TRUE(test.isZero());
		EXPECT_FALSE(test.isNegative());
		EXPECT_TRUE(losesInfo);
		EXPECT_EQ(0x0, test.bitcastToAPInt());

		losesInfo = true;
		test = APFloat::getZero(APFloat::IEEEsingle(), false);
		EXPECT_EQ(test.convert(APFloat::Float8E5M2FNUZ(),
		APFloat::rmNearestTiesToAway, &losesInfo),
		APFloat::opOK);
		EXPECT_TRUE(test.isZero());
		EXPECT_FALSE(test.isNegative());
		EXPECT_FALSE(losesInfo);
		EXPECT_EQ(0x0, test.bitcastToAPInt());

		// Except in casts between ourselves.
		reedwmUnsubmitted Done Reply Inline Actions Typo: castss -> casts reedwm: Typo: castss -> casts
		losesInfo = true;
		test = APFloat::getZero(APFloat::Float8E5M2FNUZ());
		EXPECT_EQ(test.convert(APFloat::Float8E4M3FNUZ(),
		APFloat::rmNearestTiesToAway, &losesInfo),
		APFloat::opOK);
		EXPECT_FALSE(losesInfo);
		EXPECT_EQ(0x0, test.bitcastToAPInt());
		}

TEST(APFloatTest, PPCDoubleDouble) {		TEST(APFloatTest, PPCDoubleDouble) {
APFloat test(APFloat::PPCDoubleDouble(), "1.0");		APFloat test(APFloat::PPCDoubleDouble(), "1.0");
EXPECT_EQ(0x3ff0000000000000ull, test.bitcastToAPInt().getRawData()[0]);		EXPECT_EQ(0x3ff0000000000000ull, test.bitcastToAPInt().getRawData()[0]);
EXPECT_EQ(0x0000000000000000ull, test.bitcastToAPInt().getRawData()[1]);		EXPECT_EQ(0x0000000000000000ull, test.bitcastToAPInt().getRawData()[1]);

// LDBL_MAX		// LDBL_MAX
test = APFloat(APFloat::PPCDoubleDouble(), "1.79769313486231580793728971405301e+308");		test = APFloat(APFloat::PPCDoubleDouble(), "1.79769313486231580793728971405301e+308");
EXPECT_EQ(0x7fefffffffffffffull, test.bitcastToAPInt().getRawData()[0]);		EXPECT_EQ(0x7fefffffffffffffull, test.bitcastToAPInt().getRawData()[0]);
▲ Show 20 Lines • Show All 2,855 Lines • ▼ Show 20 Lines
}		}

TEST(APFloatTest, x87Next) {		TEST(APFloatTest, x87Next) {
APFloat F(APFloat::x87DoubleExtended(), "-1.0");		APFloat F(APFloat::x87DoubleExtended(), "-1.0");
F.next(false);		F.next(false);
EXPECT_TRUE(ilogb(F) == -1);		EXPECT_TRUE(ilogb(F) == -1);
}		}

		TEST(APFloatTest, Float8ExhaustivePair) {
		// Test each pair of 8-bit floats with non-standard semantics
		for (APFloat::Semantics Sem :
		{APFloat::S_Float8E4M3FN, APFloat::S_Float8E5M2FNUZ,
		APFloat::S_Float8E4M3FNUZ}) {
		const llvm::fltSemantics &S = APFloat::EnumToSemantics(Sem);
		for (int i = 0; i < 256; i++) {
		for (int j = 0; j < 256; j++) {
		SCOPED_TRACE("sem=" + std::to_string(Sem) + ",i=" + std::to_string(i) +
		",j=" + std::to_string(j));
		APFloat x(S, APInt(8, i));
		APFloat y(S, APInt(8, j));

		bool losesInfo;
		APFloat x16 = x;
		x16.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven,
		&losesInfo);
		EXPECT_FALSE(losesInfo);
		APFloat y16 = y;
		y16.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven,
		&losesInfo);
		EXPECT_FALSE(losesInfo);

		// Add
		APFloat z = x;
		z.add(y, APFloat::rmNearestTiesToEven);
		APFloat z16 = x16;
		z16.add(y16, APFloat::rmNearestTiesToEven);
		z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo);
		EXPECT_TRUE(z.bitwiseIsEqual(z16))
		<< "sem=" << Sem << ", i=" << i << ", j=" << j;

		// Subtract
		z = x;
		z.subtract(y, APFloat::rmNearestTiesToEven);
		z16 = x16;
		z16.subtract(y16, APFloat::rmNearestTiesToEven);
		z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo);
		EXPECT_TRUE(z.bitwiseIsEqual(z16))
		<< "sem=" << Sem << ", i=" << i << ", j=" << j;

		// Multiply
		z = x;
		z.multiply(y, APFloat::rmNearestTiesToEven);
		z16 = x16;
		z16.multiply(y16, APFloat::rmNearestTiesToEven);
		z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo);
		EXPECT_TRUE(z.bitwiseIsEqual(z16))
		<< "sem=" << Sem << ", i=" << i << ", j=" << j;

		// Divide
		z = x;
		z.divide(y, APFloat::rmNearestTiesToEven);
		z16 = x16;
		z16.divide(y16, APFloat::rmNearestTiesToEven);
		z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo);
		EXPECT_TRUE(z.bitwiseIsEqual(z16))
		<< "sem=" << Sem << ", i=" << i << ", j=" << j;

		// Mod
		z = x;
		z.mod(y);
		z16 = x16;
		z16.mod(y16);
		z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo);
		EXPECT_TRUE(z.bitwiseIsEqual(z16))
		<< "sem=" << Sem << ", i=" << i << ", j=" << j;

		// Remainder
		z = x;
		z.remainder(y);
		z16 = x16;
		z16.remainder(y16);
		z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo);
		EXPECT_TRUE(z.bitwiseIsEqual(z16))
		<< "sem=" << Sem << ", i=" << i << ", j=" << j;
		}
		}
		}
		}

TEST(APFloatTest, ConvertE4M3FNToE5M2) {		TEST(APFloatTest, ConvertE4M3FNToE5M2) {
bool losesInfo;		bool losesInfo;
APFloat test(APFloat::Float8E4M3FN(), "1.0");		APFloat test(APFloat::Float8E4M3FN(), "1.0");
APFloat::opStatus status = test.convert(		APFloat::opStatus status = test.convert(
APFloat::Float8E5M2(), APFloat::rmNearestTiesToEven, &losesInfo);		APFloat::Float8E5M2(), APFloat::rmNearestTiesToEven, &losesInfo);
EXPECT_EQ(1.0f, test.convertToFloat());		EXPECT_EQ(1.0f, test.convertToFloat());
EXPECT_FALSE(losesInfo);		EXPECT_FALSE(losesInfo);
EXPECT_EQ(status, APFloat::opOK);		EXPECT_EQ(status, APFloat::opOK);
▲ Show 20 Lines • Show All 277 Lines • ▼ Show 20 Lines	if (i == 1 \|\| i == 129) {
EXPECT_TRUE(test.isSmallest());		EXPECT_TRUE(test.isSmallest());
EXPECT_EQ(abs(test).convertToDouble(), 0x1p-9);		EXPECT_EQ(abs(test).convertToDouble(), 0x1p-9);
} else {		} else {
EXPECT_FALSE(test.isSmallest());		EXPECT_FALSE(test.isSmallest());
}		}

// convert to BFloat		// convert to BFloat
APFloat test2 = test;		APFloat test2 = test;
bool loses_info;		bool losesInfo;
APFloat::opStatus status = test2.convert(		APFloat::opStatus status = test2.convert(
APFloat::BFloat(), APFloat::rmNearestTiesToEven, &loses_info);		APFloat::BFloat(), APFloat::rmNearestTiesToEven, &losesInfo);
EXPECT_EQ(status, APFloat::opOK);		EXPECT_EQ(status, APFloat::opOK);
EXPECT_FALSE(loses_info);		EXPECT_FALSE(losesInfo);
if (i == 127 \|\| i == 255)		if (i == 127 \|\| i == 255)
EXPECT_TRUE(test2.isNaN());		EXPECT_TRUE(test2.isNaN());
else		else
EXPECT_EQ(test.convertToFloat(), test2.convertToFloat());		EXPECT_EQ(test.convertToFloat(), test2.convertToFloat());

// bitcastToAPInt		// bitcastToAPInt
EXPECT_EQ(i, test.bitcastToAPInt());		EXPECT_EQ(i, test.bitcastToAPInt());
}		}
}		}

TEST(APFloatTest, Float8E4M3FNExhaustivePair) {		TEST(APFloatTest, Float8E5M2FNUZNext) {
// Test each pair of Float8E4M3FN values.		APFloat test(APFloat::Float8E5M2FNUZ(), APFloat::uninitialized);
		APFloat expected(APFloat::Float8E5M2FNUZ(), APFloat::uninitialized);

		// 1. NextUp of largest bit pattern is nan
		test = APFloat::getLargest(APFloat::Float8E5M2FNUZ());
		expected = APFloat::getNaN(APFloat::Float8E5M2FNUZ());
		EXPECT_EQ(test.next(false), APFloat::opOK);
		EXPECT_FALSE(test.isInfinity());
		EXPECT_FALSE(test.isZero());
		EXPECT_TRUE(test.isNaN());
		EXPECT_TRUE(test.bitwiseIsEqual(expected));

		reedwmUnsubmitted Done Reply Inline Actions Add the word "negative". And similarly in Float8E4M3FNUZNext reedwm: Add the word "negative". And similarly in Float8E4M3FNUZNext
		// 2. NextUp of smallest negative denormal is +0
		reedwmUnsubmitted Done Reply Inline Actions Typo: smalest -> smallest reedwm: Typo: smalest -> smallest
		test = APFloat::getSmallest(APFloat::Float8E5M2FNUZ(), true);
		expected = APFloat::getZero(APFloat::Float8E5M2FNUZ(), false);
		EXPECT_EQ(test.next(false), APFloat::opOK);
		EXPECT_FALSE(test.isNegZero());
		EXPECT_TRUE(test.isPosZero());
		EXPECT_TRUE(test.bitwiseIsEqual(expected));

		// 3. nextDown of negative of largest value is NaN
		test = APFloat::getLargest(APFloat::Float8E5M2FNUZ(), true);
		expected = APFloat::getNaN(APFloat::Float8E5M2FNUZ());
		EXPECT_EQ(test.next(true), APFloat::opOK);
		EXPECT_FALSE(test.isInfinity());
		EXPECT_FALSE(test.isZero());
		EXPECT_TRUE(test.isNaN());
		EXPECT_TRUE(test.bitwiseIsEqual(expected));

		// 4. nextDown of +0 is smallest negative denormal
		test = APFloat::getZero(APFloat::Float8E5M2FNUZ(), false);
		expected = APFloat::getSmallest(APFloat::Float8E5M2FNUZ(), true);
		EXPECT_EQ(test.next(true), APFloat::opOK);
		EXPECT_FALSE(test.isZero());
		EXPECT_TRUE(test.isDenormal());
		EXPECT_TRUE(test.bitwiseIsEqual(expected));

		// 5. nextUp of NaN is NaN
		test = APFloat::getNaN(APFloat::Float8E5M2FNUZ(), false);
		expected = APFloat::getNaN(APFloat::Float8E5M2FNUZ(), true);
		EXPECT_EQ(test.next(false), APFloat::opOK);
		EXPECT_TRUE(test.isNaN());

		// 6. nextDown of NaN is NaN
		test = APFloat::getNaN(APFloat::Float8E5M2FNUZ(), false);
		expected = APFloat::getNaN(APFloat::Float8E5M2FNUZ(), true);
		EXPECT_EQ(test.next(true), APFloat::opOK);
		EXPECT_TRUE(test.isNaN());
		}

		TEST(APFloatTest, Float8E5M2FNUZChangeSign) {
		APFloat test = APFloat(APFloat::Float8E5M2FNUZ(), "1.0");
		APFloat expected = APFloat(APFloat::Float8E5M2FNUZ(), "-1.0");
		test.changeSign();
		EXPECT_TRUE(test.bitwiseIsEqual(expected));

		test = APFloat::getZero(APFloat::Float8E5M2FNUZ());
		expected = test;
		test.changeSign();
		EXPECT_TRUE(test.bitwiseIsEqual(expected));

		test = APFloat::getNaN(APFloat::Float8E5M2FNUZ());
		expected = test;
		test.changeSign();
		EXPECT_TRUE(test.bitwiseIsEqual(expected));
		}

		TEST(APFloatTest, Float8E5M2FNUZFromString) {
		// Exactly representable
		EXPECT_EQ(57344,
		APFloat(APFloat::Float8E5M2FNUZ(), "57344").convertToDouble());
		// Round down to maximum value
		EXPECT_EQ(57344,
		APFloat(APFloat::Float8E5M2FNUZ(), "59392").convertToDouble());
		// Round up, causing overflow to NaN
		reedwmUnsubmitted Done Reply Inline Actions This is the same as the next test case reedwm: This is the same as the next test case
		EXPECT_TRUE(APFloat(APFloat::Float8E5M2FNUZ(), "61440").isNaN());
		// Overflow without rounding
		EXPECT_TRUE(APFloat(APFloat::Float8E5M2FNUZ(), "131072").isNaN());
		// Inf converted to NaN
		EXPECT_TRUE(APFloat(APFloat::Float8E5M2FNUZ(), "inf").isNaN());
		// NaN converted to NaN
		EXPECT_TRUE(APFloat(APFloat::Float8E5M2FNUZ(), "nan").isNaN());
		// Negative zero converted to positive zero
		EXPECT_TRUE(APFloat(APFloat::Float8E5M2FNUZ(), "-0").isPosZero());
		}

		TEST(APFloatTest, UnsignedZeroArithmeticSpecial) {
		// Float semantics with only unsigned zero (ex. Float8E4M3FNUZ) violate the
		// IEEE rules about signs in arithmetic operations when producing zeros,
		// because they only have one zero. Most of the rest of the complexities of
		// arithmetic on these values are covered by the other Float8 types' test
		// cases and so are not repeated here.

		// The IEEE round towards negative rule doesn't apply
		APFloat test = APFloat::getSmallest(APFloat::Float8E4M3FNUZ());
		APFloat rhs = test;
		EXPECT_EQ(test.subtract(rhs, APFloat::rmTowardNegative), APFloat::opOK);
		EXPECT_TRUE(test.isZero());
		EXPECT_FALSE(test.isNegative());

		// Multiplication of (small) * (-small) is +0
		test = APFloat::getSmallestNormalized(APFloat::Float8E4M3FNUZ());
		rhs = -test;
		EXPECT_EQ(test.multiply(rhs, APFloat::rmNearestTiesToAway),
		APFloat::opInexact \| APFloat::opUnderflow);
		EXPECT_TRUE(test.isZero());
		EXPECT_FALSE(test.isNegative());

		reedwmUnsubmitted Done Reply Inline Actions typo: hives -> gives reedwm: typo: hives -> gives
		// Dividing the negatize float_min by anything gives +0
		test = APFloat::getSmallest(APFloat::Float8E4M3FNUZ(), true);
		rhs = APFloat(APFloat::Float8E4M3FNUZ(), "2.0");
		EXPECT_EQ(test.divide(rhs, APFloat::rmNearestTiesToEven),
		APFloat::opInexact \| APFloat::opUnderflow);
		EXPECT_TRUE(test.isZero());
		EXPECT_FALSE(test.isNegative());

		// Remainder can't copy sign because there's only one zero
		test = APFloat(APFloat::Float8E4M3FNUZ(), "-4.0");
		rhs = APFloat(APFloat::Float8E4M3FNUZ(), "2.0");
		EXPECT_EQ(test.remainder(rhs), APFloat::opOK);
		EXPECT_TRUE(test.isZero());
		EXPECT_FALSE(test.isNegative());

		// And same for mod
		test = APFloat(APFloat::Float8E4M3FNUZ(), "-4.0");
		rhs = APFloat(APFloat::Float8E4M3FNUZ(), "2.0");
		EXPECT_EQ(test.mod(rhs), APFloat::opOK);
		EXPECT_TRUE(test.isZero());
		EXPECT_FALSE(test.isNegative());

		// FMA correctly handles both the multiply and add parts of all this
		test = APFloat(APFloat::Float8E4M3FNUZ(), "2.0");
		rhs = test;
		APFloat addend = APFloat(APFloat::Float8E4M3FNUZ(), "-4.0");
		EXPECT_EQ(test.fusedMultiplyAdd(rhs, addend, APFloat::rmTowardNegative),
		APFloat::opOK);
		EXPECT_TRUE(test.isZero());
		EXPECT_FALSE(test.isNegative());
		}

		TEST(APFloatTest, Float8E5M2FNUZAdd) {
		APFloat QNaN = APFloat::getNaN(APFloat::Float8E5M2FNUZ(), false);

		auto FromStr = [](StringRef S) {
		return APFloat(APFloat::Float8E5M2FNUZ(), S);
		};

		struct {
		APFloat x;
		APFloat y;
		const char *result;
		int status;
		int category;
		APFloat::roundingMode roundingMode = APFloat::rmNearestTiesToEven;
		} AdditionTests[] = {
		// Test addition operations involving NaN, overflow, and the max E5M2FNUZ
		// value (57344) because E5M2FNUZ differs from IEEE-754 types in these
		// regards
		{FromStr("57344"), FromStr("2048"), "57344", APFloat::opInexact,
		APFloat::fcNormal},
		{FromStr("57344"), FromStr("4096"), "NaN",
		APFloat::opOverflow \| APFloat::opInexact, APFloat::fcNaN},
		{FromStr("-57344"), FromStr("-4096"), "NaN",
		APFloat::opOverflow \| APFloat::opInexact, APFloat::fcNaN},
		{QNaN, FromStr("-57344"), "NaN", APFloat::opOK, APFloat::fcNaN},
		{FromStr("57344"), FromStr("-8192"), "49152", APFloat::opOK,
		APFloat::fcNormal},
		{FromStr("57344"), FromStr("0"), "57344", APFloat::opOK,
		APFloat::fcNormal},
		{FromStr("57344"), FromStr("4096"), "57344", APFloat::opInexact,
		APFloat::fcNormal, APFloat::rmTowardZero},
		{FromStr("57344"), FromStr("57344"), "57344", APFloat::opInexact,
		APFloat::fcNormal, APFloat::rmTowardZero},
		};

		for (size_t i = 0; i < std::size(AdditionTests); ++i) {
		APFloat x(AdditionTests[i].x);
		APFloat y(AdditionTests[i].y);
		APFloat::opStatus status = x.add(y, AdditionTests[i].roundingMode);

		APFloat result(APFloat::Float8E5M2FNUZ(), AdditionTests[i].result);

		EXPECT_TRUE(result.bitwiseIsEqual(x));
		EXPECT_EQ(AdditionTests[i].status, (int)status);
		EXPECT_EQ(AdditionTests[i].category, (int)x.getCategory());
		}
		}

		TEST(APFloatTest, Float8E5M2FNUZDivideByZero) {
		APFloat x(APFloat::Float8E5M2FNUZ(), "1");
		APFloat zero(APFloat::Float8E5M2FNUZ(), "0");
		EXPECT_EQ(x.divide(zero, APFloat::rmNearestTiesToEven), APFloat::opDivByZero);
		EXPECT_TRUE(x.isNaN());
		}

		TEST(APFloatTest, Float8UnsignedZeroExhaustive) {
		struct {
		const fltSemantics *semantics;
		const double largest;
		const double smallest;
		} const exhaustiveTests[] = {{&APFloat::Float8E5M2FNUZ(), 57344., 0x1.0p-17},
		{&APFloat::Float8E4M3FNUZ(), 240., 0x1.0p-10}};
		for (const auto &testInfo : exhaustiveTests) {
		const fltSemantics &sem = *testInfo.semantics;
		SCOPED_TRACE("Semantics=" + std::to_string(APFloat::SemanticsToEnum(sem)));
		// Test each of the 256 values.
for (int i = 0; i < 256; i++) {		for (int i = 0; i < 256; i++) {
for (int j = 0; j < 256; j++) {		SCOPED_TRACE("i=" + std::to_string(i));
SCOPED_TRACE("i=" + std::to_string(i) + ",j=" + std::to_string(j));		APFloat test(sem, APInt(8, i));
APFloat x(APFloat::Float8E4M3FN(), APInt(8, i));
APFloat y(APFloat::Float8E4M3FN(), APInt(8, j));		// isLargest
		if (i == 127 \|\| i == 255) {
		EXPECT_TRUE(test.isLargest());
		EXPECT_EQ(abs(test).convertToDouble(), testInfo.largest);
		} else {
		EXPECT_FALSE(test.isLargest());
		}

		// isSmallest
		if (i == 1 \|\| i == 129) {
		EXPECT_TRUE(test.isSmallest());
		EXPECT_EQ(abs(test).convertToDouble(), testInfo.smallest);
		} else {
		EXPECT_FALSE(test.isSmallest());
		}

		// convert to BFloat
		APFloat test2 = test;
bool losesInfo;		bool losesInfo;
APFloat x16 = x;		APFloat::opStatus status = test2.convert(
x16.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven,		APFloat::BFloat(), APFloat::rmNearestTiesToEven, &losesInfo);
		EXPECT_EQ(status, APFloat::opOK);
		EXPECT_FALSE(losesInfo);
		if (i == 128)
		EXPECT_TRUE(test2.isNaN());
		else
		EXPECT_EQ(test.convertToFloat(), test2.convertToFloat());

		// bitcastToAPInt
		EXPECT_EQ(i, test.bitcastToAPInt());
		}
		}
		}

		TEST(APFloatTest, Float8E4M3FNUZNext) {
		APFloat test(APFloat::Float8E4M3FNUZ(), APFloat::uninitialized);
		APFloat expected(APFloat::Float8E4M3FNUZ(), APFloat::uninitialized);

		// 1. NextUp of largest bit pattern is nan
		test = APFloat::getLargest(APFloat::Float8E4M3FNUZ());
		expected = APFloat::getNaN(APFloat::Float8E4M3FNUZ());
		EXPECT_EQ(test.next(false), APFloat::opOK);
		EXPECT_FALSE(test.isInfinity());
		EXPECT_FALSE(test.isZero());
		EXPECT_TRUE(test.isNaN());
		EXPECT_TRUE(test.bitwiseIsEqual(expected));

		// 2. NextUp of smallest negative denormal is +0
		test = APFloat::getSmallest(APFloat::Float8E4M3FNUZ(), true);
		expected = APFloat::getZero(APFloat::Float8E4M3FNUZ(), false);
		EXPECT_EQ(test.next(false), APFloat::opOK);
		EXPECT_FALSE(test.isNegZero());
		EXPECT_TRUE(test.isPosZero());
		EXPECT_TRUE(test.bitwiseIsEqual(expected));

		// 3. nextDown of negative of largest value is NaN
		test = APFloat::getLargest(APFloat::Float8E4M3FNUZ(), true);
		expected = APFloat::getNaN(APFloat::Float8E4M3FNUZ());
		EXPECT_EQ(test.next(true), APFloat::opOK);
		EXPECT_FALSE(test.isInfinity());
		EXPECT_FALSE(test.isZero());
		EXPECT_TRUE(test.isNaN());
		EXPECT_TRUE(test.bitwiseIsEqual(expected));

		// 4. nextDown of +0 is smallest negative denormal
		test = APFloat::getZero(APFloat::Float8E4M3FNUZ(), false);
		expected = APFloat::getSmallest(APFloat::Float8E4M3FNUZ(), true);
		EXPECT_EQ(test.next(true), APFloat::opOK);
		EXPECT_FALSE(test.isZero());
		EXPECT_TRUE(test.isDenormal());
		EXPECT_TRUE(test.bitwiseIsEqual(expected));

		// 5. nextUp of NaN is NaN
		test = APFloat::getNaN(APFloat::Float8E4M3FNUZ(), false);
		expected = APFloat::getNaN(APFloat::Float8E4M3FNUZ(), true);
		EXPECT_EQ(test.next(false), APFloat::opOK);
		EXPECT_TRUE(test.isNaN());

		// 6. nextDown of NaN is NaN
		test = APFloat::getNaN(APFloat::Float8E4M3FNUZ(), false);
		expected = APFloat::getNaN(APFloat::Float8E4M3FNUZ(), true);
		EXPECT_EQ(test.next(true), APFloat::opOK);
		EXPECT_TRUE(test.isNaN());
		}

		TEST(APFloatTest, Float8E4M3FNUZChangeSign) {
		reedwmUnsubmitted Done Reply Inline Actions This looks pretty similar to Float8E5M2FNUZExhaustive. Maybe merge them. reedwm: This looks pretty similar to Float8E5M2FNUZExhaustive. Maybe merge them.
		APFloat test = APFloat(APFloat::Float8E4M3FNUZ(), "1.0");
		APFloat expected = APFloat(APFloat::Float8E4M3FNUZ(), "-1.0");
		test.changeSign();
		EXPECT_TRUE(test.bitwiseIsEqual(expected));

		test = APFloat::getZero(APFloat::Float8E4M3FNUZ());
		expected = test;
		test.changeSign();
		EXPECT_TRUE(test.bitwiseIsEqual(expected));

		test = APFloat::getNaN(APFloat::Float8E4M3FNUZ());
		expected = test;
		test.changeSign();
		EXPECT_TRUE(test.bitwiseIsEqual(expected));
		}

		TEST(APFloatTest, Float8E4M3FNUZFromString) {
		// Exactly representable
		EXPECT_EQ(240, APFloat(APFloat::Float8E4M3FNUZ(), "240").convertToDouble());
		// Round down to maximum value
		EXPECT_EQ(240, APFloat(APFloat::Float8E4M3FNUZ(), "247").convertToDouble());
		// Round up, causing overflow to NaN
		EXPECT_TRUE(APFloat(APFloat::Float8E4M3FNUZ(), "248").isNaN());
		// Overflow without rounding
		EXPECT_TRUE(APFloat(APFloat::Float8E4M3FNUZ(), "480").isNaN());
		// Inf converted to NaN
		EXPECT_TRUE(APFloat(APFloat::Float8E4M3FNUZ(), "inf").isNaN());
		// NaN converted to NaN
		EXPECT_TRUE(APFloat(APFloat::Float8E4M3FNUZ(), "nan").isNaN());
		// Negative zero converted to positive zero
		EXPECT_TRUE(APFloat(APFloat::Float8E4M3FNUZ(), "-0").isPosZero());
		}

		TEST(APFloatTest, Float8E4M3FNUZAdd) {
		APFloat QNaN = APFloat::getNaN(APFloat::Float8E4M3FNUZ(), false);

		auto FromStr = [](StringRef S) {
		return APFloat(APFloat::Float8E4M3FNUZ(), S);
		};

		struct {
		APFloat x;
		APFloat y;
		const char *result;
		int status;
		int category;
		APFloat::roundingMode roundingMode = APFloat::rmNearestTiesToEven;
		} AdditionTests[] = {
		// Test addition operations involving NaN, overflow, and the max E4M3FNUZ
		// value (240) because E4M3FNUZ differs from IEEE-754 types in these
		// regards
		{FromStr("240"), FromStr("4"), "240", APFloat::opInexact,
		APFloat::fcNormal},
		{FromStr("240"), FromStr("8"), "NaN",
		APFloat::opOverflow \| APFloat::opInexact, APFloat::fcNaN},
		{FromStr("240"), FromStr("16"), "NaN",
		APFloat::opOverflow \| APFloat::opInexact, APFloat::fcNaN},
		{FromStr("-240"), FromStr("-16"), "NaN",
		APFloat::opOverflow \| APFloat::opInexact, APFloat::fcNaN},
		{QNaN, FromStr("-240"), "NaN", APFloat::opOK, APFloat::fcNaN},
		{FromStr("240"), FromStr("-16"), "224", APFloat::opOK, APFloat::fcNormal},
		{FromStr("240"), FromStr("0"), "240", APFloat::opOK, APFloat::fcNormal},
		{FromStr("240"), FromStr("32"), "240", APFloat::opInexact,
		APFloat::fcNormal, APFloat::rmTowardZero},
		{FromStr("240"), FromStr("240"), "240", APFloat::opInexact,
		APFloat::fcNormal, APFloat::rmTowardZero},
		};

		for (size_t i = 0; i < std::size(AdditionTests); ++i) {
		APFloat x(AdditionTests[i].x);
		APFloat y(AdditionTests[i].y);
		APFloat::opStatus status = x.add(y, AdditionTests[i].roundingMode);

		APFloat result(APFloat::Float8E4M3FNUZ(), AdditionTests[i].result);

		EXPECT_TRUE(result.bitwiseIsEqual(x));
		EXPECT_EQ(AdditionTests[i].status, (int)status);
		EXPECT_EQ(AdditionTests[i].category, (int)x.getCategory());
		}
		}

		TEST(APFloatTest, Float8E4M3FNUZDivideByZero) {
		APFloat x(APFloat::Float8E4M3FNUZ(), "1");
		APFloat zero(APFloat::Float8E4M3FNUZ(), "0");
		EXPECT_EQ(x.divide(zero, APFloat::rmNearestTiesToEven), APFloat::opDivByZero);
		EXPECT_TRUE(x.isNaN());
		}

		TEST(APFloatTest, ConvertE5M2FNUZToE4M3FNUZ) {
		bool losesInfo;
		APFloat test(APFloat::Float8E5M2FNUZ(), "1.0");
		APFloat::opStatus status = test.convert(
		APFloat::Float8E4M3FNUZ(), APFloat::rmNearestTiesToEven, &losesInfo);
		EXPECT_EQ(1.0f, test.convertToFloat());
		EXPECT_FALSE(losesInfo);
		EXPECT_EQ(status, APFloat::opOK);

		losesInfo = true;
		test = APFloat(APFloat::Float8E5M2FNUZ(), "0.0");
		status = test.convert(APFloat::Float8E4M3FNUZ(), APFloat::rmNearestTiesToEven,
&losesInfo);		&losesInfo);
		EXPECT_EQ(0.0f, test.convertToFloat());
EXPECT_FALSE(losesInfo);		EXPECT_FALSE(losesInfo);
APFloat y16 = y;		EXPECT_EQ(status, APFloat::opOK);
y16.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven,
		losesInfo = true;
		test = APFloat(APFloat::Float8E5M2FNUZ(), "0x1.Cp7"); // 224
		status = test.convert(APFloat::Float8E4M3FNUZ(), APFloat::rmNearestTiesToEven,
&losesInfo);		&losesInfo);
		EXPECT_EQ(0x1.Cp7 /* 224 */, test.convertToFloat());
EXPECT_FALSE(losesInfo);		EXPECT_FALSE(losesInfo);
		EXPECT_EQ(status, APFloat::opOK);

// Add		// Test overflow
APFloat z = x;		losesInfo = false;
z.add(y, APFloat::rmNearestTiesToEven);		test = APFloat(APFloat::Float8E5M2FNUZ(), "0x1.0p8"); // 256
APFloat z16 = x16;		status = test.convert(APFloat::Float8E4M3FNUZ(), APFloat::rmNearestTiesToEven,
z16.add(y16, APFloat::rmNearestTiesToEven);
z16.convert(APFloat::Float8E4M3FN(), APFloat::rmNearestTiesToEven,
&losesInfo);		&losesInfo);
EXPECT_TRUE(z.bitwiseIsEqual(z16));		EXPECT_TRUE(std::isnan(test.convertToFloat()));
		EXPECT_TRUE(losesInfo);
		EXPECT_EQ(status, APFloat::opOverflow \| APFloat::opInexact);

// Subtract		// Test underflow
z = x;		test = APFloat(APFloat::Float8E5M2FNUZ(), "0x1.0p-11");
z.subtract(y, APFloat::rmNearestTiesToEven);		status = test.convert(APFloat::Float8E4M3FNUZ(), APFloat::rmNearestTiesToEven,
z16 = x16;
z16.subtract(y16, APFloat::rmNearestTiesToEven);
z16.convert(APFloat::Float8E4M3FN(), APFloat::rmNearestTiesToEven,
&losesInfo);		&losesInfo);
EXPECT_TRUE(z.bitwiseIsEqual(z16));		EXPECT_EQ(0., test.convertToFloat());
		EXPECT_TRUE(losesInfo);
		EXPECT_EQ(status, APFloat::opUnderflow \| APFloat::opInexact);

// Multiply		// Test rounding up to smallest denormal number
z = x;		losesInfo = false;
z.multiply(y, APFloat::rmNearestTiesToEven);		test = APFloat(APFloat::Float8E5M2FNUZ(), "0x1.8p-11");
z16 = x16;		status = test.convert(APFloat::Float8E4M3FNUZ(), APFloat::rmNearestTiesToEven,
z16.multiply(y16, APFloat::rmNearestTiesToEven);
z16.convert(APFloat::Float8E4M3FN(), APFloat::rmNearestTiesToEven,
&losesInfo);		&losesInfo);
EXPECT_TRUE(z.bitwiseIsEqual(z16)) << "i=" << i << ", j=" << j;		EXPECT_EQ(0x1.0p-10, test.convertToFloat());
		EXPECT_TRUE(losesInfo);
		EXPECT_EQ(status, APFloat::opUnderflow \| APFloat::opInexact);

// Divide		// Testing inexact rounding to denormal number
z = x;		losesInfo = false;
z.divide(y, APFloat::rmNearestTiesToEven);		test = APFloat(APFloat::Float8E5M2FNUZ(), "0x1.8p-10");
z16 = x16;		status = test.convert(APFloat::Float8E4M3FNUZ(), APFloat::rmNearestTiesToEven,
z16.divide(y16, APFloat::rmNearestTiesToEven);
z16.convert(APFloat::Float8E4M3FN(), APFloat::rmNearestTiesToEven,
&losesInfo);		&losesInfo);
EXPECT_TRUE(z.bitwiseIsEqual(z16)) << "i=" << i << ", j=" << j;		EXPECT_EQ(0x1.0p-9, test.convertToFloat());
		EXPECT_TRUE(losesInfo);
		EXPECT_EQ(status, APFloat::opUnderflow \| APFloat::opInexact);
		}

// Mod		TEST(APFloatTest, ConvertE4M3FNUZToE5M2FNUZ) {
z = x;		bool losesInfo;
z.mod(y);		APFloat test(APFloat::Float8E4M3FNUZ(), "1.0");
z16 = x16;		APFloat::opStatus status = test.convert(
z16.mod(y16);		APFloat::Float8E5M2FNUZ(), APFloat::rmNearestTiesToEven, &losesInfo);
z16.convert(APFloat::Float8E4M3FN(), APFloat::rmNearestTiesToEven,		EXPECT_EQ(1.0f, test.convertToFloat());
		EXPECT_FALSE(losesInfo);
		EXPECT_EQ(status, APFloat::opOK);

		losesInfo = true;
		test = APFloat(APFloat::Float8E4M3FNUZ(), "0.0");
		status = test.convert(APFloat::Float8E5M2FNUZ(), APFloat::rmNearestTiesToEven,
&losesInfo);		&losesInfo);
EXPECT_TRUE(z.bitwiseIsEqual(z16)) << "i=" << i << ", j=" << j;		EXPECT_EQ(0.0f, test.convertToFloat());
		EXPECT_FALSE(losesInfo);
		EXPECT_EQ(status, APFloat::opOK);

// Remainder		losesInfo = false;
z = x;		test = APFloat(APFloat::Float8E4M3FNUZ(), "0x1.2p0"); // 1.125
z.remainder(y);		status = test.convert(APFloat::Float8E5M2FNUZ(), APFloat::rmNearestTiesToEven,
z16 = x16;
z16.remainder(y16);
z16.convert(APFloat::Float8E4M3FN(), APFloat::rmNearestTiesToEven,
&losesInfo);		&losesInfo);
EXPECT_TRUE(z.bitwiseIsEqual(z16)) << "i=" << i << ", j=" << j;		EXPECT_EQ(0x1.0p0 /* 1.0 */, test.convertToFloat());
}		EXPECT_TRUE(losesInfo);
}		EXPECT_EQ(status, APFloat::opInexact);

		losesInfo = false;
		test = APFloat(APFloat::Float8E4M3FNUZ(), "0x1.6p0"); // 1.375
		status = test.convert(APFloat::Float8E5M2FNUZ(), APFloat::rmNearestTiesToEven,
		&losesInfo);
		EXPECT_EQ(0x1.8p0 /* 1.5 */, test.convertToFloat());
		EXPECT_TRUE(losesInfo);
		EXPECT_EQ(status, APFloat::opInexact);

		// Convert E4M3 denormal to E5M2 normal. Should not be truncated, despite the
		// destination format having one fewer significand bit
		losesInfo = true;
		test = APFloat(APFloat::Float8E4M3FNUZ(), "0x1.Cp-8");
		status = test.convert(APFloat::Float8E5M2FNUZ(), APFloat::rmNearestTiesToEven,
		&losesInfo);
		EXPECT_EQ(0x1.Cp-8, test.convertToFloat());
		EXPECT_FALSE(losesInfo);
		EXPECT_EQ(status, APFloat::opOK);
}		}

TEST(APFloatTest, F8ToString) {		TEST(APFloatTest, F8ToString) {
for (APFloat::Semantics S :		for (APFloat::Semantics S :
{APFloat::S_Float8E5M2, APFloat::S_Float8E4M3FN}) {		{APFloat::S_Float8E5M2, APFloat::S_Float8E4M3FN,
		APFloat::S_Float8E5M2FNUZ, APFloat::S_Float8E4M3FNUZ}) {
SCOPED_TRACE("Semantics=" + std::to_string(S));		SCOPED_TRACE("Semantics=" + std::to_string(S));
for (int i = 0; i < 256; i++) {		for (int i = 0; i < 256; i++) {
SCOPED_TRACE("i=" + std::to_string(i));		SCOPED_TRACE("i=" + std::to_string(i));
APFloat test(APFloat::Float8E5M2(), APInt(8, i));		APFloat test(APFloat::EnumToSemantics(S), APInt(8, i));
llvm::SmallString<128> str;		llvm::SmallString<128> str;
test.toString(str);		test.toString(str);

if (test.isNaN()) {		if (test.isNaN()) {
EXPECT_EQ(str, "NaN");		EXPECT_EQ(str, "NaN");
} else {		} else {
APFloat test2(APFloat::Float8E5M2(), str);		APFloat test2(APFloat::EnumToSemantics(S), str);
EXPECT_TRUE(test.bitwiseIsEqual(test2));		EXPECT_TRUE(test.bitwiseIsEqual(test2));
}		}
}		}
}		}
}		}

TEST(APFloatTest, IEEEdoubleToDouble) {		TEST(APFloatTest, IEEEdoubleToDouble) {
APFloat DPosZero(0.0);		APFloat DPosZero(0.0);
▲ Show 20 Lines • Show All 195 Lines • ▼ Show 20 Lines	TEST(APFloatTest, Float8E4M3FNToDouble) {
APFloat SmallestDenorm = APFloat::getSmallest(APFloat::Float8E4M3FN(), false);		APFloat SmallestDenorm = APFloat::getSmallest(APFloat::Float8E4M3FN(), false);
EXPECT_TRUE(SmallestDenorm.isDenormal());		EXPECT_TRUE(SmallestDenorm.isDenormal());
EXPECT_EQ(0x1p-9, SmallestDenorm.convertToDouble());		EXPECT_EQ(0x1p-9, SmallestDenorm.convertToDouble());

APFloat QNaN = APFloat::getQNaN(APFloat::Float8E4M3FN());		APFloat QNaN = APFloat::getQNaN(APFloat::Float8E4M3FN());
EXPECT_TRUE(std::isnan(QNaN.convertToDouble()));		EXPECT_TRUE(std::isnan(QNaN.convertToDouble()));
}		}

		TEST(APFloatTest, Float8E5M2FNUZToDouble) {
		APFloat One(APFloat::Float8E5M2FNUZ(), "1.0");
		EXPECT_EQ(1.0, One.convertToDouble());
		APFloat Two(APFloat::Float8E5M2FNUZ(), "2.0");
		EXPECT_EQ(2.0, Two.convertToDouble());
		APFloat PosLargest = APFloat::getLargest(APFloat::Float8E5M2FNUZ(), false);
		EXPECT_EQ(57344., PosLargest.convertToDouble());
		APFloat NegLargest = APFloat::getLargest(APFloat::Float8E5M2FNUZ(), true);
		EXPECT_EQ(-57344., NegLargest.convertToDouble());
		APFloat PosSmallest =
		APFloat::getSmallestNormalized(APFloat::Float8E5M2FNUZ(), false);
		EXPECT_EQ(0x1.p-15, PosSmallest.convertToDouble());
		APFloat NegSmallest =
		APFloat::getSmallestNormalized(APFloat::Float8E5M2FNUZ(), true);
		EXPECT_EQ(-0x1.p-15, NegSmallest.convertToDouble());

		APFloat SmallestDenorm =
		APFloat::getSmallest(APFloat::Float8E5M2FNUZ(), false);
		EXPECT_TRUE(SmallestDenorm.isDenormal());
		EXPECT_EQ(0x1p-17, SmallestDenorm.convertToDouble());

		APFloat QNaN = APFloat::getQNaN(APFloat::Float8E5M2FNUZ());
		EXPECT_TRUE(std::isnan(QNaN.convertToDouble()));
		}

		TEST(APFloatTest, Float8E4M3FNUZToDouble) {
		APFloat One(APFloat::Float8E4M3FNUZ(), "1.0");
		EXPECT_EQ(1.0, One.convertToDouble());
		APFloat Two(APFloat::Float8E4M3FNUZ(), "2.0");
		EXPECT_EQ(2.0, Two.convertToDouble());
		APFloat PosLargest = APFloat::getLargest(APFloat::Float8E4M3FNUZ(), false);
		EXPECT_EQ(240., PosLargest.convertToDouble());
		APFloat NegLargest = APFloat::getLargest(APFloat::Float8E4M3FNUZ(), true);
		EXPECT_EQ(-240., NegLargest.convertToDouble());
		APFloat PosSmallest =
		APFloat::getSmallestNormalized(APFloat::Float8E4M3FNUZ(), false);
		EXPECT_EQ(0x1.p-7, PosSmallest.convertToDouble());
		APFloat NegSmallest =
		APFloat::getSmallestNormalized(APFloat::Float8E4M3FNUZ(), true);
		EXPECT_EQ(-0x1.p-7, NegSmallest.convertToDouble());

		APFloat SmallestDenorm =
		APFloat::getSmallest(APFloat::Float8E4M3FNUZ(), false);
		EXPECT_TRUE(SmallestDenorm.isDenormal());
		EXPECT_EQ(0x1p-10, SmallestDenorm.convertToDouble());

		APFloat QNaN = APFloat::getQNaN(APFloat::Float8E4M3FNUZ());
		EXPECT_TRUE(std::isnan(QNaN.convertToDouble()));
		}

		TEST(APFloatTest, Float8E5M2FNUZToFloat) {
		APFloat PosZero = APFloat::getZero(APFloat::Float8E5M2FNUZ());
		APFloat PosZeroToFloat(PosZero.convertToFloat());
		EXPECT_TRUE(PosZeroToFloat.isPosZero());
		// Negative zero is not supported
		APFloat NegZero = APFloat::getZero(APFloat::Float8E5M2FNUZ(), true);
		APFloat NegZeroToFloat(NegZero.convertToFloat());
		EXPECT_TRUE(NegZeroToFloat.isPosZero());
		APFloat One(APFloat::Float8E5M2FNUZ(), "1.0");
		EXPECT_EQ(1.0F, One.convertToFloat());
		APFloat Two(APFloat::Float8E5M2FNUZ(), "2.0");
		EXPECT_EQ(2.0F, Two.convertToFloat());
		APFloat PosLargest = APFloat::getLargest(APFloat::Float8E5M2FNUZ(), false);
		EXPECT_EQ(57344.F, PosLargest.convertToFloat());
		APFloat NegLargest = APFloat::getLargest(APFloat::Float8E5M2FNUZ(), true);
		EXPECT_EQ(-57344.F, NegLargest.convertToFloat());
		APFloat PosSmallest =
		APFloat::getSmallestNormalized(APFloat::Float8E5M2FNUZ(), false);
		EXPECT_EQ(0x1.p-15F, PosSmallest.convertToFloat());
		APFloat NegSmallest =
		APFloat::getSmallestNormalized(APFloat::Float8E5M2FNUZ(), true);
		EXPECT_EQ(-0x1.p-15F, NegSmallest.convertToFloat());

		APFloat SmallestDenorm =
		APFloat::getSmallest(APFloat::Float8E5M2FNUZ(), false);
		EXPECT_TRUE(SmallestDenorm.isDenormal());
		EXPECT_EQ(0x1p-17F, SmallestDenorm.convertToFloat());

		APFloat QNaN = APFloat::getQNaN(APFloat::Float8E5M2FNUZ());
		EXPECT_TRUE(std::isnan(QNaN.convertToFloat()));
		}

		TEST(APFloatTest, Float8E4M3FNUZToFloat) {
		APFloat PosZero = APFloat::getZero(APFloat::Float8E4M3FNUZ());
		APFloat PosZeroToFloat(PosZero.convertToFloat());
		EXPECT_TRUE(PosZeroToFloat.isPosZero());
		// Negative zero is not supported
		APFloat NegZero = APFloat::getZero(APFloat::Float8E4M3FNUZ(), true);
		APFloat NegZeroToFloat(NegZero.convertToFloat());
		EXPECT_TRUE(NegZeroToFloat.isPosZero());
		APFloat One(APFloat::Float8E4M3FNUZ(), "1.0");
		EXPECT_EQ(1.0F, One.convertToFloat());
		APFloat Two(APFloat::Float8E4M3FNUZ(), "2.0");
		EXPECT_EQ(2.0F, Two.convertToFloat());
		APFloat PosLargest = APFloat::getLargest(APFloat::Float8E4M3FNUZ(), false);
		EXPECT_EQ(240.F, PosLargest.convertToFloat());
		APFloat NegLargest = APFloat::getLargest(APFloat::Float8E4M3FNUZ(), true);
		EXPECT_EQ(-240.F, NegLargest.convertToFloat());
		APFloat PosSmallest =
		APFloat::getSmallestNormalized(APFloat::Float8E4M3FNUZ(), false);
		EXPECT_EQ(0x1.p-7F, PosSmallest.convertToFloat());
		APFloat NegSmallest =
		APFloat::getSmallestNormalized(APFloat::Float8E4M3FNUZ(), true);
		EXPECT_EQ(-0x1.p-7F, NegSmallest.convertToFloat());

		APFloat SmallestDenorm =
		APFloat::getSmallest(APFloat::Float8E4M3FNUZ(), false);
		EXPECT_TRUE(SmallestDenorm.isDenormal());
		EXPECT_EQ(0x1p-10F, SmallestDenorm.convertToFloat());

		APFloat QNaN = APFloat::getQNaN(APFloat::Float8E4M3FNUZ());
		EXPECT_TRUE(std::isnan(QNaN.convertToFloat()));
		}

TEST(APFloatTest, IEEEsingleToFloat) {		TEST(APFloatTest, IEEEsingleToFloat) {
APFloat FPosZero(0.0F);		APFloat FPosZero(0.0F);
APFloat FPosZeroToFloat(FPosZero.convertToFloat());		APFloat FPosZeroToFloat(FPosZero.convertToFloat());
EXPECT_TRUE(FPosZeroToFloat.isPosZero());		EXPECT_TRUE(FPosZeroToFloat.isPosZero());
APFloat FNegZero(-0.0F);		APFloat FNegZero(-0.0F);
APFloat FNegZeroToFloat(FNegZero.convertToFloat());		APFloat FNegZeroToFloat(FNegZero.convertToFloat());
EXPECT_TRUE(FNegZeroToFloat.isNegZero());		EXPECT_TRUE(FNegZeroToFloat.isNegZero());

▲ Show 20 Lines • Show All 173 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[llvm][APFloat] Add NaN-in-negative-zero formats by AMD and GraphCore
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 496233

llvm/include/llvm/ADT/APFloat.h

llvm/lib/Support/APFloat.cpp

llvm/unittests/ADT/APFloatTest.cpp

This is an archive of the discontinued LLVM Phabricator instance.

[llvm][APFloat] Add NaN-in-negative-zero formats by AMD and GraphCoreClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 496233

llvm/include/llvm/ADT/APFloat.h

llvm/lib/Support/APFloat.cpp

llvm/unittests/ADT/APFloatTest.cpp

[llvm][APFloat] Add NaN-in-negative-zero formats by AMD and GraphCore
ClosedPublic