Diff 436086

libc/src/__support/FPUtil/FMA.h

	Show All 21 Lines
	#else			#else
	// FMA instructions are not available			// FMA instructions are not available
	#include "generic/FMA.h"			#include "generic/FMA.h"
	#include "src/__support/CPP/TypeTraits.h"			#include "src/__support/CPP/TypeTraits.h"

	namespace __llvm_libc {			namespace __llvm_libc {
	namespace fputil {			namespace fputil {

	// We have a generic implementation available only for single precision fma as			template <typename T> static inline T fma(T x, T y, T z) {
	// we restrict it to float values for now.
	template <typename T>
	static inline cpp::EnableIfType<cpp::IsSame<T, float>::Value, T> fma(T x, T y,
	T z) {
	return generic::fma(x, y, z);			return generic::fma(x, y, z);
	}			}

	} // namespace fputil			} // namespace fputil
	} // namespace __llvm_libc			} // namespace __llvm_libc

	#endif			#endif

	#endif // LLVM_LIBC_SRC_SUPPORT_FPUTIL_FMA_H			#endif // LLVM_LIBC_SRC_SUPPORT_FPUTIL_FMA_H

libc/src/__support/FPUtil/generic/FMA.h

//===-- Common header for FMA implementations -------------------- C++ --===//		//===-- Common header for FMA implementations -------------------- C++ --===//
//		//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.		// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.		// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception		// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC_SUPPORT_FPUTIL_GENERIC_FMA_H		#ifndef LLVM_LIBC_SRC_SUPPORT_FPUTIL_GENERIC_FMA_H
#define LLVM_LIBC_SRC_SUPPORT_FPUTIL_GENERIC_FMA_H		#define LLVM_LIBC_SRC_SUPPORT_FPUTIL_GENERIC_FMA_H

		#include "src/__support/CPP/Bit.h"
#include "src/__support/CPP/TypeTraits.h"		#include "src/__support/CPP/TypeTraits.h"
		#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/FPUtil/FPBits.h"		#include "src/__support/FPUtil/FPBits.h"
		#include "src/__support/FPUtil/FloatProperties.h"
		#include "src/__support/FPUtil/builtin_wrappers.h"
		#include "src/__support/common.h"

namespace __llvm_libc {		namespace __llvm_libc {
namespace fputil {		namespace fputil {
namespace generic {		namespace generic {

template <typename T>		template <typename T> static inline T fma(T x, T y, T z);
static inline cpp::EnableIfType<cpp::IsSame<T, float>::Value, T> fma(T x, T y,
T z) {		// TODO(lntue): Implement fmaf that is correctly rounded to all rounding modes.
		// The implementation below only is only correct for the default rounding mode,
		// round-to-nearest tie-to-even.
		template <> inline float fma<float>(float x, float y, float z) {
// Product is exact.		// Product is exact.
double prod = static_cast<double>(x) * static_cast<double>(y);		double prod = static_cast<double>(x) * static_cast<double>(y);
double z_d = static_cast<double>(z);		double z_d = static_cast<double>(z);
double sum = prod + z_d;		double sum = prod + z_d;
fputil::FPBits<double> bit_prod(prod), bitz(z_d), bit_sum(sum);		fputil::FPBits<double> bit_prod(prod), bitz(z_d), bit_sum(sum);

if (!(bit_sum.is_inf_or_nan() \|\| bit_sum.is_zero())) {		if (!(bit_sum.is_inf_or_nan() \|\| bit_sum.is_zero())) {
// Since the sum is computed in double precision, rounding might happen		// Since the sum is computed in double precision, rounding might happen
Show All 31 Lines	if (!t.is_zero() && ((bit_sum.get_mantissa() & 0xfff'ffffULL) == 0)) {
bit_sum.set_mantissa(bit_sum.get_mantissa() - 1);		bit_sum.set_mantissa(bit_sum.get_mantissa() - 1);
}		}
}		}
}		}

return static_cast<float>(static_cast<double>(bit_sum));		return static_cast<float>(static_cast<double>(bit_sum));
}		}

		namespace internal {

		// Extract the sticky bits and shift the `mantissa` to the right by
		// `shift_length`.
		static inline bool shift_mantissa(int shift_length, __uint128_t &mant) {
		if (shift_length >= 128) {
		mant = 0;
		return true; // prod_mant is non-zero.
		}
		__uint128_t mask = (__uint128_t(1) << shift_length) - 1;
		bool sticky_bits = (mant & mask) != 0;
		mant >>= shift_length;
		return sticky_bits;
		}

		} // namespace internal

		template <> inline double fma<double>(double x, double y, double z) {
		using FPBits = fputil::FPBits<double>;
		using FloatProp = fputil::FloatProperties<double>;

		if (unlikely(x == 0 \|\| y == 0 \|\| z == 0)) {
		return x * y + z;
		}

		int x_exp = 0;
		int y_exp = 0;
		int z_exp = 0;

		// Normalize denormal inputs.
		if (unlikely(FPBits(x).get_unbiased_exponent() == 0)) {
		x_exp -= 52;
		x *= 0x1.0p+52;
		}
		if (unlikely(FPBits(y).get_unbiased_exponent() == 0)) {
		y_exp -= 52;
		y *= 0x1.0p+52;
		}
		if (unlikely(FPBits(z).get_unbiased_exponent() == 0)) {
		z_exp -= 52;
		z *= 0x1.0p+52;
		}

		FPBits x_bits(x), y_bits(y), z_bits(z);
		bool x_sign = x_bits.get_sign();
		bool y_sign = y_bits.get_sign();
		bool z_sign = z_bits.get_sign();
		bool prod_sign = x_sign != y_sign;
		x_exp += x_bits.get_unbiased_exponent();
		y_exp += y_bits.get_unbiased_exponent();
		z_exp += z_bits.get_unbiased_exponent();

		if (unlikely(x_exp == FPBits::MAX_EXPONENT \|\| y_exp == FPBits::MAX_EXPONENT \|\|
		z_exp == FPBits::MAX_EXPONENT))
		return x * y + z;

		// Extract mantissa and append hidden leading bits.
		__uint128_t x_mant = x_bits.get_mantissa() \| FPBits::MIN_NORMAL;
		__uint128_t y_mant = y_bits.get_mantissa() \| FPBits::MIN_NORMAL;
		__uint128_t z_mant = z_bits.get_mantissa() \| FPBits::MIN_NORMAL;

		// If the exponent of the product x*y > the exponent of z, then no extra
		// precision beside the entire product x*y is needed. On the other hand, when
		// the exponent of z >= the exponent of the product x*y, the worst-case that
		// we need extra precision is when there is cancellation and the most
		// significant bit of the product is aligned exactly with the second most
		// significant bit of z:
		// z : 10aa...a
		// - prod : 1bb...bb....b
		// In that case, in order to store the exact result, we need at least
		// (Length of prod) - (MantissaLength of z) = 2*(52 + 1) - 52 = 54.
		// Overall, before aligning the mantissas and exponents, we can simply left-
		// shift the mantissa of z by at least 54, and left-shift the product of x*y
		// by (that amount - 52). After that, it is enough to align the least
		// significant bit, given that we keep track of the round and sticky bits
		// after the least significant bit.
		// We pick shifting z_mant by 64 bits so that technically we can simply use
		// the original mantissa as high part when constructing 128-bit z_mant. So the
		// mantissa of prod will be left-shifted by 64 - 54 = 10 initially.

		michaelrjUnsubmitted Done Reply Inline Actions nit: can you add something explaining why this number is 10? michaelrj: nit: can you add something explaining why this number is 10?
		__uint128_t prod_mant = x_mant * y_mant << 10;
		int prod_lsb_exp =
		x_exp + y_exp -
		(FPBits::EXPONENT_BIAS + 2 * MantissaWidth<double>::VALUE + 10);

		z_mant <<= 64;
		int z_lsb_exp = z_exp - (MantissaWidth<double>::VALUE + 64);
		bool round_bit = false;
		bool sticky_bits = false;
		bool z_shifted = false;

		// Align exponents.
		if (prod_lsb_exp < z_lsb_exp) {
		sticky_bits = internal::shift_mantissa(z_lsb_exp - prod_lsb_exp, prod_mant);
		prod_lsb_exp = z_lsb_exp;
		} else if (z_lsb_exp < prod_lsb_exp) {
		z_shifted = true;
		sticky_bits = internal::shift_mantissa(prod_lsb_exp - z_lsb_exp, z_mant);
		}

		// Perform the addition:
		// (-1)^prod_sign * prod_mant + (-1)^z_sign * z_mant.
		// The final result will be stored in prod_sign and prod_mant.
		if (prod_sign == z_sign) {
		// Effectively an addition.
		prod_mant += z_mant;
		} else {
		// Subtraction cases.
		if (prod_mant >= z_mant) {
		if (z_shifted && sticky_bits) {
		michaelrjUnsubmitted Done Reply Inline Actions nit: it should either be `the sticky bits remain` or `the sticky bit remains` michaelrj: nit: it should either be `the sticky bits remain` or `the sticky bit remains`
		// Add 1 more to the subtrahend so that the sticky bits remain
		// positive. This would simplify the rounding logic.
		++z_mant;
		}
		prod_mant -= z_mant;
		} else {
		if (!z_shifted && sticky_bits) {
		// Add 1 more to the subtrahend so that the sticky bits remain
		// positive. This would simplify the rounding logic.
		++prod_mant;
		}
		prod_mant = z_mant - prod_mant;
		prod_sign = z_sign;
		}
		}

		uint64_t result = 0;
		int r_exp = 0; // Unbiased exponent of the result

		// Normalize the result.
		if (prod_mant != 0) {
		uint64_t prod_hi = static_cast<uint64_t>(prod_mant >> 64);
		int lead_zeros =
		prod_hi ? clz(prod_hi) : 64 + clz(static_cast<uint64_t>(prod_mant));
		// Move the leading 1 to the most significant bit.
		prod_mant <<= lead_zeros;
		// The lower 64 bits are always sticky bits after moving the leading 1 to
		// the most significant bit.
		sticky_bits \|= (static_cast<uint64_t>(prod_mant) != 0);
		result = static_cast<uint64_t>(prod_mant >> 64);
		// Change prod_lsb_exp the be the exponent of the least significant bit of
		// the result.
		prod_lsb_exp += 64 - lead_zeros;
		r_exp = prod_lsb_exp + 63;

		if (r_exp > 0) {
		// The result is normal. We will shift the mantissa to the right by
		// 63 - 52 = 11 bits (from the locations of the most significant bit).
		// Then the rounding bit will correspond the the 11th bit, and the lowest
		// 10 bits are merged into sticky bits.
		round_bit = (result & 0x0400ULL) != 0;
		sticky_bits \|= (result & 0x03ffULL) != 0;
		result >>= 11;
		} else {
		if (r_exp < -52) {
		// The result is smaller than 1/2 of the smallest denormal number.
		sticky_bits = true; // since the result is non-zero.
		result = 0;
		} else {
		// The result is denormal.
		uint64_t mask = 1ULL << (11 - r_exp);
		round_bit = (result & mask) != 0;
		sticky_bits \|= (result & (mask - 1)) != 0;
		if (r_exp > -52)
		result >>= 12 - r_exp;
		else
		result = 0;
		}

		r_exp = 0;
		}
		} else {
		// Return +0.0 when there is exact cancellation, i.e., x*y == -z exactly.
		prod_sign = false;
		}

		// Finalize the result.
		int round_mode = fputil::get_round();
		if (unlikely(r_exp >= FPBits::MAX_EXPONENT)) {
		if ((round_mode == FE_TOWARDZERO) \|\|
		(round_mode == FE_UPWARD && prod_sign) \|\|
		(round_mode == FE_DOWNWARD && !prod_sign)) {
		result = FPBits::MAX_NORMAL;
		return prod_sign ? -bit_cast<double>(result) : bit_cast<double>(result);
		}
		return prod_sign ? static_cast<double>(FPBits::neg_inf())
		: static_cast<double>(FPBits::inf());
		}

		// Remove hidden bit and append the exponent field and sign bit.
		result = (result & FloatProp::MANTISSA_MASK) \|
		(static_cast<uint64_t>(r_exp) << FloatProp::MANTISSA_WIDTH);
		if (prod_sign) {
		result \|= FloatProp::SIGN_MASK;
		}

		// Rounding.
		if (round_mode == FE_TONEAREST) {
		if (round_bit && (sticky_bits \|\| ((result & 1) != 0)))
		++result;
		} else if ((round_mode == FE_UPWARD && !prod_sign) \|\|
		(round_mode == FE_DOWNWARD && prod_sign)) {
		if (round_bit \|\| sticky_bits)
		++result;
		}

		return bit_cast<double>(result);
		}

} // namespace generic		} // namespace generic
} // namespace fputil		} // namespace fputil
} // namespace __llvm_libc		} // namespace __llvm_libc

#endif // LLVM_LIBC_SRC_SUPPORT_FPUTIL_GENERIC_FMA_H		#endif // LLVM_LIBC_SRC_SUPPORT_FPUTIL_GENERIC_FMA_H

libc/src/math/CMakeLists.txt

Show First 20 Lines • Show All 58 Lines • ▼ Show 20 Lines	SRCS
fma.cpp		fma.cpp
HDRS		HDRS
fma.h		fma.h
DEPENDS		DEPENDS
libc.src.__support.FPUtil.fputil		libc.src.__support.FPUtil.fputil
libc.src.__support.FPUtil.fma		libc.src.__support.FPUtil.fma
COMPILE_OPTIONS		COMPILE_OPTIONS
-O3		-O3
FLAGS
FMA_OPT__ONLY
)		)

add_math_entrypoint_object(ceil)		add_math_entrypoint_object(ceil)
add_math_entrypoint_object(ceilf)		add_math_entrypoint_object(ceilf)
add_math_entrypoint_object(ceill)		add_math_entrypoint_object(ceill)

add_math_entrypoint_object(copysign)		add_math_entrypoint_object(copysign)
add_math_entrypoint_object(copysignf)		add_math_entrypoint_object(copysignf)
▲ Show 20 Lines • Show All 116 Lines • Show Last 20 Lines

libc/test/src/math/CMakeLists.txt

Show First 20 Lines • Show All 1,145 Lines • ▼ Show 20 Lines	add_fp_unittest(
HDRS		HDRS
NextAfterTest.h		NextAfterTest.h
DEPENDS		DEPENDS
libc.include.math		libc.include.math
libc.src.math.nextafterl		libc.src.math.nextafterl
libc.src.__support.FPUtil.fputil		libc.src.__support.FPUtil.fputil
)		)

		# TODO(lntue): The current implementation of fputil::general::fma<float> is only
		# correctly rounded for the default rounding mode round-to-nearest tie-to-even.
add_fp_unittest(		add_fp_unittest(
fmaf_test		fmaf_test
NEED_MPFR		NEED_MPFR
SUITE		SUITE
libc_math_unittests		libc_math_unittests
SRCS		SRCS
fmaf_test.cpp		fmaf_test.cpp
DEPENDS		DEPENDS
libc.include.math		libc.include.math
libc.src.math.fmaf		libc.src.math.fmaf
libc.src.__support.FPUtil.fputil		libc.src.__support.FPUtil.fputil
		FLAGS
		FMA_OPT__ONLY
)		)

add_fp_unittest(		add_fp_unittest(
fma_test		fma_test
NEED_MPFR		NEED_MPFR
SUITE		SUITE
libc_math_unittests		libc_math_unittests
SRCS		SRCS
▲ Show 20 Lines • Show All 100 Lines • Show Last 20 Lines

libc/test/src/math/FmaTest.h

Show First 20 Lines • Show All 55 Lines • ▼ Show 20 Lines	void test_special_numbers(Func func) {
// Test underflow rounding down.		// Test underflow rounding down.
T v = T(FPBits(FPBits::MIN_NORMAL + UIntType(1)));		T v = T(FPBits(FPBits::MIN_NORMAL + UIntType(1)));
EXPECT_FP_EQ(func(T(1) / T(FPBits::MIN_NORMAL << 1), v,		EXPECT_FP_EQ(func(T(1) / T(FPBits::MIN_NORMAL << 1), v,
T(FPBits(FPBits::MIN_NORMAL))),		T(FPBits(FPBits::MIN_NORMAL))),
v);		v);
// Test overflow.		// Test overflow.
T z = T(FPBits(FPBits::MAX_NORMAL));		T z = T(FPBits(FPBits::MAX_NORMAL));
EXPECT_FP_EQ(func(T(1.75), z, -z), T(0.75) * z);		EXPECT_FP_EQ(func(T(1.75), z, -z), T(0.75) * z);
		// Exact cancellation.
		EXPECT_FP_EQ(func(T(3.0), T(5.0), -T(15.0)), T(0.0));
		EXPECT_FP_EQ(func(T(-3.0), T(5.0), T(15.0)), T(0.0));
}		}

void test_subnormal_range(Func func) {		void test_subnormal_range(Func func) {
constexpr UIntType COUNT = 1000001;		constexpr UIntType COUNT = 1000001;
constexpr UIntType STEP =		constexpr UIntType STEP =
(FPBits::MAX_SUBNORMAL - FPBits::MIN_SUBNORMAL) / COUNT;		(FPBits::MAX_SUBNORMAL - FPBits::MIN_SUBNORMAL) / COUNT;
for (UIntType v = FPBits::MIN_SUBNORMAL, w = FPBits::MAX_SUBNORMAL;		for (UIntType v = FPBits::MIN_SUBNORMAL, w = FPBits::MAX_SUBNORMAL;
v <= FPBits::MAX_SUBNORMAL && w >= FPBits::MIN_SUBNORMAL;		v <= FPBits::MAX_SUBNORMAL && w >= FPBits::MIN_SUBNORMAL;
v += STEP, w -= STEP) {		v += STEP, w -= STEP) {
T x = T(FPBits(get_random_bit_pattern())), y = T(FPBits(v)),		T x = T(FPBits(get_random_bit_pattern())), y = T(FPBits(v)),
z = T(FPBits(w));		z = T(FPBits(w));
T result = func(x, y, z);
mpfr::TernaryInput<T> input{x, y, z};		mpfr::TernaryInput<T> input{x, y, z};
ASSERT_MPFR_MATCH(mpfr::Operation::Fma, input, result, 0.5);		ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Fma, input, func(x, y, z),
		0.5);
}		}
}		}

void test_normal_range(Func func) {		void test_normal_range(Func func) {
constexpr UIntType COUNT = 1000001;		constexpr UIntType COUNT = 1000001;
constexpr UIntType STEP = (FPBits::MAX_NORMAL - FPBits::MIN_NORMAL) / COUNT;		constexpr UIntType STEP = (FPBits::MAX_NORMAL - FPBits::MIN_NORMAL) / COUNT;
for (UIntType v = FPBits::MIN_NORMAL, w = FPBits::MAX_NORMAL;		for (UIntType v = FPBits::MIN_NORMAL, w = FPBits::MAX_NORMAL;
v <= FPBits::MAX_NORMAL && w >= FPBits::MIN_NORMAL;		v <= FPBits::MAX_NORMAL && w >= FPBits::MIN_NORMAL;
v += STEP, w -= STEP) {		v += STEP, w -= STEP) {
T x = T(FPBits(v)), y = T(FPBits(w)),		T x = T(FPBits(v)), y = T(FPBits(w)),
z = T(FPBits(get_random_bit_pattern()));		z = T(FPBits(get_random_bit_pattern()));
T result = func(x, y, z);
mpfr::TernaryInput<T> input{x, y, z};		mpfr::TernaryInput<T> input{x, y, z};
ASSERT_MPFR_MATCH(mpfr::Operation::Fma, input, result, 0.5);		ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Fma, input, func(x, y, z),
		0.5);
}		}
}		}
};		};

#endif // LLVM_LIBC_TEST_SRC_MATH_FMATEST_H		#endif // LLVM_LIBC_TEST_SRC_MATH_FMATEST_H

libc/test/src/math/fma_test.cpp

	//===-- Unittests for fma ------------------------------------------------===//			//===-- Unittests for fma ------------------------------------------------===//
	//			//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.			// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.			// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception			// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	#include "FmaTest.h"			#include "FmaTest.h"

	#include "src/math/fma.h"			#include "src/math/fma.h"

	using LlvmLibcFmaTest = FmaTestTemplate<double>;			struct Inputs {
				double a, b, c;
				};

				struct LlvmLibcFmaTest : public FmaTestTemplate<double> {
				void test_more_values() {
				constexpr int N = 236;
				constexpr Inputs INPUTS[N] = {
				{0x1p+0, 0x2p+0, 0x3p+0},
				{0x1.4p+0, 0xcp-4, 0x1p-4},
				{0x0p+0, 0x0p+0, 0x0p+0},
				{0x1p+0, 0x0p+0, 0x0p+0},
				{0x0p+0, 0x1p+0, 0x0p+0},
				{0x1p+0, 0x1p+0, 0x1p+0},
				{0x0p+0, 0x0p+0, 0x1p+0},
				{0x0p+0, 0x0p+0, 0x2p+0},
				{0x0p+0, 0x0p+0, 0xf.fffffp+124},
				{0x0p+0, 0x0p+0, 0xf.ffffffffffff8p+1020},
				{0x0p+0, 0x1p+0, 0x1p+0},
				{0x1p+0, 0x0p+0, 0x1p+0},
				{0x0p+0, 0x1p+0, 0x2p+0},
				{0x1p+0, 0x0p+0, 0x2p+0},
				{0x0p+0, 0x1p+0, 0xf.fffffp+124},
				{0x0p+0, 0x1p+0, 0xf.ffffffffffff8p+1020},
				{0x1p+0, 0x0p+0, 0xf.fffffp+124},
				{0x1p+0, 0x0p+0, 0xf.ffffffffffff8p+1020},
				{0x4p-128, 0x4p-128, 0x0p+0},
				{0x4p-128, 0x4p-1024, 0x0p+0},
				{0x4p-128, 0x8p-972, 0x0p+0},
				{0x4p-1024, 0x4p-128, 0x0p+0},
				{0x4p-1024, 0x4p-1024, 0x0p+0},
				{0x4p-1024, 0x8p-972, 0x0p+0},
				{0x8p-972, 0x4p-128, 0x0p+0},
				{0x8p-972, 0x4p-1024, 0x0p+0},
				{0x8p-972, 0x8p-972, 0x0p+0},
				{0x4p-128, 0x4p-128, 0x0p+0},
				{0x4p-128, 0x4p-1024, 0x0p+0},
				{0x4p-128, 0x8p-972, 0x0p+0},
				{0x4p-1024, 0x4p-128, 0x0p+0},
				{0x4p-1024, 0x4p-1024, 0x0p+0},
				{0x4p-1024, 0x8p-972, 0x0p+0},
				{0x8p-972, 0x4p-128, 0x0p+0},
				{0x8p-972, 0x4p-1024, 0x0p+0},
				{0x8p-972, 0x8p-972, 0x0p+0},
				{0x4p-128, 0x4p-128, 0x0p+0},
				{0x4p-128, 0x4p-1024, 0x0p+0},
				{0x4p-128, 0x8p-972, 0x0p+0},
				{0x4p-1024, 0x4p-128, 0x0p+0},
				{0x4p-1024, 0x4p-1024, 0x0p+0},
				{0x4p-1024, 0x8p-972, 0x0p+0},
				{0x8p-972, 0x4p-128, 0x0p+0},
				{0x8p-972, 0x4p-1024, 0x0p+0},
				{0x8p-972, 0x8p-972, 0x0p+0},
				{0x4p-128, 0x4p-128, 0x0p+0},
				{0x4p-128, 0x4p-1024, 0x0p+0},
				{0x4p-128, 0x8p-972, 0x0p+0},
				{0x4p-1024, 0x4p-128, 0x0p+0},
				{0x4p-1024, 0x4p-1024, 0x0p+0},
				{0x4p-1024, 0x8p-972, 0x0p+0},
				{0x8p-972, 0x4p-128, 0x0p+0},
				{0x8p-972, 0x4p-1024, 0x0p+0},
				{0x8p-972, 0x8p-972, 0x0p+0},
				{0x4p-128, 0x4p-128, 0x0p+0},
				{0x4p-128, 0x4p-1024, 0x0p+0},
				{0x4p-128, 0x8p-972, 0x0p+0},
				{0x4p-1024, 0x4p-128, 0x0p+0},
				{0x4p-1024, 0x4p-1024, 0x0p+0},
				{0x4p-1024, 0x8p-972, 0x0p+0},
				{0x8p-972, 0x4p-128, 0x0p+0},
				{0x8p-972, 0x4p-1024, 0x0p+0},
				{0x8p-972, 0x8p-972, 0x0p+0},
				{0x4p-128, 0x4p-128, 0x0p+0},
				{0x4p-128, 0x4p-1024, 0x0p+0},
				{0x4p-128, 0x8p-972, 0x0p+0},
				{0x4p-1024, 0x4p-128, 0x0p+0},
				{0x4p-1024, 0x4p-1024, 0x0p+0},
				{0x4p-1024, 0x8p-972, 0x0p+0},
				{0x8p-972, 0x4p-128, 0x0p+0},
				{0x8p-972, 0x4p-1024, 0x0p+0},
				{0x8p-972, 0x8p-972, 0x0p+0},
				{0x4p-128, 0x4p-128, 0x0p+0},
				{0x4p-128, 0x4p-1024, 0x0p+0},
				{0x4p-128, 0x8p-972, 0x0p+0},
				{0x4p-1024, 0x4p-128, 0x0p+0},
				{0x4p-1024, 0x4p-1024, 0x0p+0},
				{0x4p-1024, 0x8p-972, 0x0p+0},
				{0x8p-972, 0x4p-128, 0x0p+0},
				{0x8p-972, 0x4p-1024, 0x0p+0},
				{0x8p-972, 0x8p-972, 0x0p+0},
				{0x4p-128, 0x4p-128, 0x0p+0},
				{0x4p-128, 0x4p-1024, 0x0p+0},
				{0x4p-128, 0x8p-972, 0x0p+0},
				{0x4p-1024, 0x4p-128, 0x0p+0},
				{0x4p-1024, 0x4p-1024, 0x0p+0},
				{0x4p-1024, 0x8p-972, 0x0p+0},
				{0x8p-972, 0x4p-128, 0x0p+0},
				{0x8p-972, 0x4p-1024, 0x0p+0},
				{0x8p-972, 0x8p-972, 0x0p+0},
				{0xf.fffffp+124, 0xf.fffffp+124, 0x4p-128},
				{0xf.fffffp+124, 0xf.fffffp+124, 0x4p-1024},
				{0xf.fffffp+124, 0xf.fffffp+124, 0x8p-972},
				{0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-128},
				{0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-1024},
				{0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x8p-972},
				{0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-128},
				{0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-1024},
				{0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x8p-972},
				{0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-128},
				{0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-1024},
				{0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x8p-972},
				{0xf.fffffp+124, 0xf.fffffp+124, 0x4p-128},
				{0xf.fffffp+124, 0xf.fffffp+124, 0x4p-1024},
				{0xf.fffffp+124, 0xf.fffffp+124, 0x8p-972},
				{0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-128},
				{0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-1024},
				{0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x8p-972},
				{0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-128},
				{0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-1024},
				{0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x8p-972},
				{0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-128},
				{0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-1024},
				{0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x8p-972},
				{0xf.fffffp+124, 0xf.fffffp+124, 0x4p-128},
				{0xf.fffffp+124, 0xf.fffffp+124, 0x4p-1024},
				{0xf.fffffp+124, 0xf.fffffp+124, 0x8p-972},
				{0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-128},
				{0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-1024},
				{0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x8p-972},
				{0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-128},
				{0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-1024},
				{0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x8p-972},
				{0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-128},
				{0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-1024},
				{0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x8p-972},
				{0xf.fffffp+124, 0xf.fffffp+124, 0x4p-128},
				{0xf.fffffp+124, 0xf.fffffp+124, 0x4p-1024},
				{0xf.fffffp+124, 0xf.fffffp+124, 0x8p-972},
				{0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-128},
				{0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-1024},
				{0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x8p-972},
				{0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-128},
				{0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-1024},
				{0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x8p-972},
				{0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-128},
				{0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-1024},
				{0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x8p-972},
				{0xf.fffffp+124, 0xf.fffffp+124, 0x4p-128},
				{0xf.fffffp+124, 0xf.fffffp+124, 0x4p-1024},
				{0xf.fffffp+124, 0xf.fffffp+124, 0x8p-972},
				{0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-128},
				{0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-1024},
				{0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x8p-972},
				{0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-128},
				{0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-1024},
				{0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x8p-972},
				{0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-128},
				{0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-1024},
				{0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x8p-972},
				{0xf.fffffp+124, 0xf.fffffp+124, 0x4p-128},
				{0xf.fffffp+124, 0xf.fffffp+124, 0x4p-1024},
				{0xf.fffffp+124, 0xf.fffffp+124, 0x8p-972},
				{0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-128},
				{0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-1024},
				{0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x8p-972},
				{0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-128},
				{0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-1024},
				{0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x8p-972},
				{0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-128},
				{0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-1024},
				{0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x8p-972},
				{0xf.fffffp+124, 0xf.fffffp+124, 0x4p-128},
				{0xf.fffffp+124, 0xf.fffffp+124, 0x4p-1024},
				{0xf.fffffp+124, 0xf.fffffp+124, 0x8p-972},
				{0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-128},
				{0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-1024},
				{0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x8p-972},
				{0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-128},
				{0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-1024},
				{0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x8p-972},
				{0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-128},
				{0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-1024},
				{0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x8p-972},
				{0xf.fffffp+124, 0xf.fffffp+124, 0x4p-128},
				{0xf.fffffp+124, 0xf.fffffp+124, 0x4p-1024},
				{0xf.fffffp+124, 0xf.fffffp+124, 0x8p-972},
				{0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-128},
				{0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-1024},
				{0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x8p-972},
				{0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-128},
				{0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-1024},
				{0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x8p-972},
				{0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-128},
				{0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-1024},
				{0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x8p-972},
				{0x2.fffp+12, 0x1.000002p+0, 0x1.ffffp-24},
				{0x1.fffp+0, 0x1.00001p+0, 0x1.fffp+0},
				{0xc.d5e6fp+124, 0x2.6af378p-128, 0x1.f08948p+0},
				{0x1.9abcdep+100, 0x2.6af378p-128, 0x3.e1129p-28},
				{0xf.fffffp+124, 0x1.001p+0, 0xf.fffffp+124},
				{0xf.fffffp+124, 0x1.fffffep+0, 0xf.fffffp+124},
				{0xf.fffffp+124, 0x2p+0, 0xf.fffffp+124},
				{0x5p-128, 0x8.00002p-4, 0x1p-128},
				{0x7.ffffep-128, 0x8.00001p-4, 0x8p-152},
				{0x8p-152, 0x8p-4, 0x3.fffff8p-128},
				{0x8p-152, 0x8.8p-4, 0x3.fffff8p-128},
				{0x8p-152, 0x8p-152, 0x8p+124},
				{0x8p-152, 0x8p-152, 0x4p-128},
				{0x8p-152, 0x8p-152, 0x3.fffff8p-128},
				{0x8p-152, 0x8p-152, 0x8p-152},
				{0xf.ffp-4, 0xf.ffp-4, 0xf.fep-4},
				{0x4.000008p-128, 0x4.000008p-28, 0x8p+124},
				{0x4.000008p-128, 0x4.000008p-28, 0x8p+100},
				{0x2.fep+12, 0x1.0000000000001p+0, 0x1.ffep-48},
				{0x1.fffp+0, 0x1.0000000000001p+0, 0x1.fffp+0},
				{0x1.0000002p+0, 0xf.fffffep-4, 0x1p-300},
				{0xe.f56df7797f768p+1020, 0x3.7ab6fbbcbfbb4p-1024,
				0x3.40bf1803497f6p+0},
				{0x1.deadbeef2feedp+900, 0x3.7ab6fbbcbfbb4p-1024,
				0x6.817e300692fecp-124},
				{0xf.ffffffffffff8p+1020, 0x1.001p+0, 0xf.ffffffffffff8p+1020},
				{0xf.ffffffffffff8p+1020, 0x1.fffffffffffffp+0,
				0xf.ffffffffffff8p+1020},
				{0xf.ffffffffffff8p+1020, 0x2p+0, 0xf.ffffffffffff8p+1020},
				{0x5.a827999fcef3p-540, 0x5.a827999fcef3p-540, 0x0p+0},
				{0x3.bd5b7dde5fddap-496, 0x3.bd5b7dde5fddap-496, 0xd.fc352bc352bap-992},
				{0x3.bd5b7dde5fddap-504, 0x3.bd5b7dde5fddap-504,
				0xd.fc352bc352bap-1008},
				{0x8p-540, 0x4p-540, 0x4p-1076},
				{0x1.7fffff8p-968, 0x4p-108, 0x4p-1048},
				{0x2.8000008p-968, 0x4p-108, 0x4p-1048},
				{0x2.8p-968, 0x4p-108, 0x4p-1048},
				{0x2.33956cdae7c2ep-960, 0x3.8e211518bfea2p-108,
				0x2.02c2b59766d9p-1024},
				{0x3.a5d5dadd1d3a6p-980, 0x2.9c0cd8c5593bap-64, 0x2.49179ac00d15p-1024},
				{0x2.2a7aca1773e0cp-908, 0x9.6809186a42038p-128, 0x2.c9e356b3f0fp-1024},
				{0x3.ffffffffffffep-712, 0x3.ffffffffffffep-276,
				0x3.fffffc0000ffep-984},
				{0x5p-1024, 0x8.000000000001p-4, 0x1p-1024},
				{0x7.ffffffffffffp-1024, 0x8.0000000000008p-4, 0x4p-1076},
				{0x4p-1076, 0x8p-4, 0x3.ffffffffffffcp-1024},
				{0x4p-1076, 0x8.8p-4, 0x3.ffffffffffffcp-1024},
				{0x4p-1076, 0x4p-1076, 0x8p+1020},
				{0x4p-1076, 0x4p-1076, 0x4p-1024},
				{0x4p-1076, 0x4p-1076, 0x3.ffffffffffffcp-1024},
				{0x4p-1076, 0x4p-1076, 0x4p-1076},
				{0xf.ffffffffffff8p-4, 0xf.ffffffffffff8p-4, 0xf.ffffffffffffp-4},
				{0x4.0000000000004p-1024, 0x2.0000000000002p-56, 0x8p+1020},
				{0x4.0000000000004p-1024, 0x2.0000000000002p-56, 0x4p+968},
				{0x7.fffff8p-128, 0x3.fffffcp+24, 0xf.fffffp+124},
				{0x7.ffffffffffffcp-1024, 0x7.ffffffffffffcp+52,
				0xf.ffffffffffff8p+1020},
				};

				for (int i = 0; i < N; ++i) {
				for (int signs = 0; signs < 7; ++signs) {
				double a = (signs & 4) ? -INPUTS[i].a : INPUTS[i].a;
				double b = (signs & 2) ? -INPUTS[i].b : INPUTS[i].b;
				double c = (signs & 1) ? -INPUTS[i].c : INPUTS[i].c;
				mpfr::TernaryInput<double> input{a, b, c};
				ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Fma, input,
				__llvm_libc::fma(a, b, c), 0.5);
				}
				}
				}
				};

	TEST_F(LlvmLibcFmaTest, SpecialNumbers) {			TEST_F(LlvmLibcFmaTest, SpecialNumbers) {
	test_special_numbers(&__llvm_libc::fma);			test_special_numbers(&__llvm_libc::fma);
	}			}

	TEST_F(LlvmLibcFmaTest, SubnormalRange) {			TEST_F(LlvmLibcFmaTest, SubnormalRange) {
	test_subnormal_range(&__llvm_libc::fma);			test_subnormal_range(&__llvm_libc::fma);
	}			}

	TEST_F(LlvmLibcFmaTest, NormalRange) { test_normal_range(&__llvm_libc::fma); }			TEST_F(LlvmLibcFmaTest, NormalRange) { test_normal_range(&__llvm_libc::fma); }

				TEST_F(LlvmLibcFmaTest, ExtraValues) { test_more_values(); }

libc/test/src/math/fmaf_test.cpp

	//===-- Unittests for fmaf ------------------------------------------------===//			//===-- Unittests for fmaf ------------------------------------------------===//
	//			//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.			// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.			// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception			// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	#include "FmaTest.h"			#include "FmaTest.h"

	#include "src/math/fmaf.h"			#include "src/math/fmaf.h"

	using LlvmLibcFmaTest = FmaTestTemplate<float>;			using LlvmLibcFmafTest = FmaTestTemplate<float>;

	TEST_F(LlvmLibcFmaTest, SpecialNumbers) {			TEST_F(LlvmLibcFmafTest, SpecialNumbers) {
	test_special_numbers(&__llvm_libc::fmaf);			test_special_numbers(&__llvm_libc::fmaf);
	}			}

	TEST_F(LlvmLibcFmaTest, SubnormalRange) {			TEST_F(LlvmLibcFmafTest, SubnormalRange) {
	test_subnormal_range(&__llvm_libc::fmaf);			test_subnormal_range(&__llvm_libc::fmaf);
	}			}

	TEST_F(LlvmLibcFmaTest, NormalRange) { test_normal_range(&__llvm_libc::fmaf); }			TEST_F(LlvmLibcFmafTest, NormalRange) { test_normal_range(&__llvm_libc::fmaf); }

This is an archive of the discontinued LLVM Phabricator instance.

[libc] Implement double precision FMA for targets without FMA instructions.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 436086

libc/src/__support/FPUtil/FMA.h

libc/src/__support/FPUtil/generic/FMA.h

libc/src/math/CMakeLists.txt

libc/test/src/math/CMakeLists.txt

libc/test/src/math/FmaTest.h

libc/test/src/math/fma_test.cpp

libc/test/src/math/fmaf_test.cpp

This is an archive of the discontinued LLVM Phabricator instance.

[libc] Implement double precision FMA for targets without FMA instructions.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 436086

libc/src/__support/FPUtil/FMA.h

libc/src/__support/FPUtil/generic/FMA.h

libc/src/math/CMakeLists.txt

libc/test/src/math/CMakeLists.txt

libc/test/src/math/FmaTest.h

libc/test/src/math/fma_test.cpp

libc/test/src/math/fmaf_test.cpp

[libc] Implement double precision FMA for targets without FMA instructions.
ClosedPublic