Index: compiler-rt/lib/builtins/CMakeLists.txt =================================================================== --- compiler-rt/lib/builtins/CMakeLists.txt +++ compiler-rt/lib/builtins/CMakeLists.txt @@ -292,6 +292,7 @@ # long double is not 80 bits on Android or MSVC. set(x86_80_BIT_SOURCES divxc3.c + extendhfxf2.c fixxfdi.c fixxfti.c fixunsxfdi.c @@ -303,6 +304,7 @@ floatuntixf.c mulxc3.c powixf2.c + truncxfhf2.c ) if (NOT MSVC) Index: compiler-rt/lib/builtins/extendhfxf2.c =================================================================== --- /dev/null +++ compiler-rt/lib/builtins/extendhfxf2.c @@ -0,0 +1,24 @@ +//===-- lib/extendhfxf2.c - half -> x86 FP80 conversion -----------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define FP80_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_80BIT) + +#define SRC_HALF +#define DST_FP80 +#include "fp_extend_impl.inc" + +// Use a forwarding definition and noinline to implement a poor man's alias, +// as there isn't a good cross-platform way of defining one. +COMPILER_RT_ABI NOINLINE long double __extendhfxf2(src_t a) { + return __extendXfYf2__(a); +} + +#endif Index: compiler-rt/lib/builtins/fp_extend.h =================================================================== --- compiler-rt/lib/builtins/fp_extend.h +++ compiler-rt/lib/builtins/fp_extend.h @@ -58,22 +58,37 @@ typedef float dst_t; typedef uint32_t dst_rep_t; #define DST_REP_C UINT32_C +static const int dstBits = 32; static const int dstSigBits = 23; +static const int dstIntBits = 0; #elif defined DST_DOUBLE typedef double dst_t; typedef uint64_t dst_rep_t; #define DST_REP_C UINT64_C +static const int dstBits = 64; static const int dstSigBits = 52; +static const int dstIntBits = 0; + +#elif defined DST_FP80 +typedef long double dst_t; +typedef __uint128_t dst_rep_t; +#define DST_REP_C (__uint128_t) +static const int dstBits = 80; +static const int dstSigBits = 64; +static const int dstIntBits = 1; + #elif defined DST_QUAD typedef long double dst_t; typedef __uint128_t dst_rep_t; #define DST_REP_C (__uint128_t) +static const int dstBits = 128; static const int dstSigBits = 112; +static const int dstIntBits = 0; #else -#error Destination should be single, double, or quad precision! +#error Destination should be single, double, fp80, or quad precision! #endif // end destination precision // End of specialization parameters. Two helper routines for conversion to and Index: compiler-rt/lib/builtins/fp_extend_impl.inc =================================================================== --- compiler-rt/lib/builtins/fp_extend_impl.inc +++ compiler-rt/lib/builtins/fp_extend_impl.inc @@ -52,12 +52,12 @@ const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigBits - 1); const src_rep_t srcNaNCode = srcQNaN - 1; - const int dstBits = sizeof(dst_t) * CHAR_BIT; const int dstExpBits = dstBits - dstSigBits - 1; const int dstInfExp = (1 << dstExpBits) - 1; const int dstExpBias = dstInfExp >> 1; const dst_rep_t dstMinNormal = DST_REP_C(1) << dstSigBits; + const dst_rep_t dstSignificandMask = dstMinNormal - 1; // Break a into a sign and representation of the absolute value. const src_rep_t aRep = srcToRep(a); @@ -72,6 +72,19 @@ // Extend to the destination type by shifting the significand and // exponent into the proper position and rebiasing the exponent. absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits); + + if (dstIntBits) { + // x86_fp80 has an explicit instead of implicit integer bit at the top of + // the significand. Canonical values (except denormals & zero) set it to + // 1. + dst_rep_t absSignificand = absResult & dstSignificandMask; + absSignificand >>= 1; + absSignificand |= (dst_rep_t)1 << (dstSigBits - 1); + + absResult &= ~dstSignificandMask; + absResult |= absSignificand; + } + absResult += (dst_rep_t)(dstExpBias - srcExpBias) << dstSigBits; } @@ -81,8 +94,10 @@ // bit (if needed) and right-aligning the rest of the trailing NaN // payload field. absResult = (dst_rep_t)dstInfExp << dstSigBits; - absResult |= (dst_rep_t)(aAbs & srcQNaN) << (dstSigBits - srcSigBits); - absResult |= (dst_rep_t)(aAbs & srcNaNCode) << (dstSigBits - srcSigBits); + if (dstIntBits) + absResult |= (dst_rep_t)1 << (dstSigBits - 1); + absResult |= (dst_rep_t)(aAbs & srcQNaN) << (dstSigBits - dstIntBits - srcSigBits); + absResult |= (dst_rep_t)(aAbs & srcNaNCode) << (dstSigBits - dstIntBits - srcSigBits); } else if (aAbs) { @@ -90,7 +105,7 @@ // renormalize the significand and clear the leading bit, then insert // the correct adjusted exponent in the destination type. const int scale = src_rep_t_clz(aAbs) - src_rep_t_clz(srcMinNormal); - absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits + scale); + absResult = (dst_rep_t)aAbs << (dstSigBits - dstIntBits - srcSigBits + scale); absResult ^= dstMinNormal; const int resultExponent = dstExpBias - srcExpBias - scale + 1; absResult |= (dst_rep_t)resultExponent << dstSigBits; Index: compiler-rt/lib/builtins/fp_lib.h =================================================================== --- compiler-rt/lib/builtins/fp_lib.h +++ compiler-rt/lib/builtins/fp_lib.h @@ -104,6 +104,12 @@ COMPILER_RT_ABI fp_t __adddf3(fp_t a, fp_t b); +#elif defined FP80_PRECISION +#if __LDBL_MANT_DIG__ == 64 +#define CRT_LDBL_80BIT +// Only x86 does 80-bit floats, only support extend/trunc. +#endif + #elif defined QUAD_PRECISION #if __LDBL_MANT_DIG__ == 113 && defined(__SIZEOF_INT128__) #define CRT_LDBL_128BIT @@ -202,7 +208,7 @@ #undef Word_FullMask #endif // __LDBL_MANT_DIG__ == 113 && __SIZEOF_INT128__ #else -#error SINGLE_PRECISION, DOUBLE_PRECISION or QUAD_PRECISION must be defined. +#error SINGLE_PRECISION, DOUBLE_PRECISION, FP80_PRECISION, or QUAD_PRECISION must be defined. #endif #if defined(SINGLE_PRECISION) || defined(DOUBLE_PRECISION) || \ Index: compiler-rt/lib/builtins/fp_trunc.h =================================================================== --- compiler-rt/lib/builtins/fp_trunc.h +++ compiler-rt/lib/builtins/fp_trunc.h @@ -19,22 +19,36 @@ typedef float src_t; typedef uint32_t src_rep_t; #define SRC_REP_C UINT32_C +static const int srcBits = 32; static const int srcSigBits = 23; +static const int srcIntBits = 0; #elif defined SRC_DOUBLE typedef double src_t; typedef uint64_t src_rep_t; #define SRC_REP_C UINT64_C +static const int srcBits = 64; static const int srcSigBits = 52; +static const int srcIntBits = 0; + +#elif defined SRC_FLT80 +typedef long double src_t; +typedef __uint128_t src_rep_t; +#define SRC_REP_C (__uint128_t) +static const int srcBits = 80; +static const int srcSigBits = 64; +static const int srcIntBits = 1; #elif defined SRC_QUAD typedef long double src_t; typedef __uint128_t src_rep_t; #define SRC_REP_C (__uint128_t) +static const int srcBits = 128; static const int srcSigBits = 112; +static const int srcIntBits = 0; #else -#error Source should be double precision or quad precision! +#error Source should be double precision, fp80 precision, or quad precision! #endif // end source precision #if defined DST_DOUBLE @@ -77,7 +91,13 @@ src_t f; src_rep_t i; } rep = {.f = x}; - return rep.i; + src_rep_t res = rep.i; + + // Zero out the padding bits from the union if needed. + if (sizeof(src_rep_t) > sizeof(src_t)) + res &= (((src_rep_t)1 << sizeof(src_t)*CHAR_BIT) - 1); + + return res; } static __inline dst_t dstFromRep(dst_rep_t x) { Index: compiler-rt/lib/builtins/fp_trunc_impl.inc =================================================================== --- compiler-rt/lib/builtins/fp_trunc_impl.inc +++ compiler-rt/lib/builtins/fp_trunc_impl.inc @@ -1,3 +1,4 @@ +int printf(const char *, ...); //= lib/fp_trunc_impl.inc - high precision -> low precision conversion *-*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. @@ -38,10 +39,28 @@ #include "fp_trunc.h" +// x86_fp80 has an explicit integer bit at the top of the significand. +// This allowed more weird denormals, infinities and NaNs in 8087 & 80287; +// but from 387 onwards those are treated as invalid and we can just +// ignore the issue by converting to a canonical "usual-format" IEEE-857. + +static src_rep_t removeExplicitIntBit(src_rep_t in) { + if (!srcIntBits) + return in; + + const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigBits; + const src_rep_t srcSignificandMask = srcMinNormal - 1; + + src_rep_t inSig = in & srcSignificandMask; + in &= ~srcSignificandMask; + in |= (inSig << 1) & srcSignificandMask; + + return in; +} + static __inline dst_t __truncXfYf2__(src_t a) { // Various constants whose values follow from the type parameters. // Any reasonable optimizer will fold and propagate all of these. - const int srcBits = sizeof(src_t) * CHAR_BIT; const int srcExpBits = srcBits - srcSigBits - 1; const int srcInfExp = (1 << srcExpBits) - 1; const int srcExpBias = srcInfExp >> 1; @@ -71,7 +90,7 @@ // Break a into a sign and representation of the absolute value. const src_rep_t aRep = srcToRep(a); - const src_rep_t aAbs = aRep & srcAbsMask; + src_rep_t aAbs = aRep & srcAbsMask; const src_rep_t sign = aRep & srcSignMask; dst_rep_t absResult; @@ -79,6 +98,7 @@ // The exponent of a is within the range of normal numbers in the // destination format. We can convert by simply right-shifting with // rounding and adjusting the exponent. + aAbs = removeExplicitIntBit(aAbs); absResult = aAbs >> (srcSigBits - dstSigBits); absResult -= (dst_rep_t)(srcExpBias - dstExpBias) << dstSigBits; @@ -104,10 +124,11 @@ // a underflows on conversion to the destination type or is an exact // zero. The result may be a denormal or zero. Extract the exponent // to get the shift amount for the denormalization. + aAbs = removeExplicitIntBit(aAbs); const int aExp = aAbs >> srcSigBits; const int shift = srcExpBias - dstExpBias - aExp + 1; - const src_rep_t significand = (aRep & srcSignificandMask) | srcMinNormal; + const src_rep_t significand = (aAbs & srcSignificandMask) | srcMinNormal; // Right shift by the denormalization amount with sticky. if (shift > srcSigBits) { Index: compiler-rt/lib/builtins/truncxfhf2.c =================================================================== --- /dev/null +++ compiler-rt/lib/builtins/truncxfhf2.c @@ -0,0 +1,23 @@ +//===-- lib/trunctfhf2.c - quad -> half conversion ----------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define FP80_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_80BIT) + +#define SRC_FLT80 +#define DST_HALF +#include "fp_trunc_impl.inc" + +COMPILER_RT_ABI _Float16 __truncxfhf2(long double a) { + return __truncXfYf2__(a); +} + +#endif Index: compiler-rt/test/builtins/Unit/extendhfxf2_test.c =================================================================== --- /dev/null +++ compiler-rt/test/builtins/Unit/extendhfxf2_test.c @@ -0,0 +1,96 @@ +// RUN: %clang_builtins %s %librt -o %t && %run %t +// REQUIRES: librt_has_extendhfxf2 + +#include + +#include "fp_test.h" + +long double __extendhfxf2(TYPE_FP16 a); + +int test__extendhfxf2(TYPE_FP16 a, uint64_t expectedHi, uint64_t expectedLo) +{ + long double x = __extendhfxf2(a); + int ret = compareResultLD(x, expectedHi, expectedLo); + + if (ret){ + printf("error in test__extendhfxf2(%#.4x) = %Lf, " + "expected %Lf\n", toRep16(a), x, fromRep80(expectedHi, expectedLo)); + + } + return ret; +} + +char assumption_1[sizeof(TYPE_FP16) * CHAR_BIT == 16] = {0}; + +int main() +{ + // qNaN + if (test__extendhfxf2(fromRep16(0x7e00), + UINT64_C(0x7fff), + UINT64_C(0xc000000000000000))) + return 1; + // NaN + if (test__extendhfxf2(fromRep16(0x7f80), + UINT64_C(0x7fff), + UINT64_C(0xf000000000000000))) + return 1; + // inf + if (test__extendhfxf2(fromRep16(0x7c00), + UINT64_C(0x7fff), + UINT64_C(0x8000000000000000))) + return 1; + // -inf + if (test__extendhfxf2(fromRep16(0xfc00), + UINT64_C(0xffff), + UINT64_C(0x8000000000000000))) + return 1; + // zero + if (test__extendhfxf2(fromRep16(0x0), + UINT64_C(0x0000), + UINT64_C(0x0000000000000000))) + return 1; + // -zero + if (test__extendhfxf2(fromRep16(0x8000), + UINT64_C(0x8000), + UINT64_C(0x0000000000000000))) + return 1; + if (test__extendhfxf2(fromRep16(0x4248), + UINT64_C(0x4000), + UINT64_C(0xc900000000000000))) + return 1; + if (test__extendhfxf2(fromRep16(0xc248), + UINT64_C(0xc000), + UINT64_C(0xc900000000000000))) + return 1; + if (test__extendhfxf2(fromRep16(0x6e62), + UINT64_C(0x400b), + UINT64_C(0xcc40000000000000))) + return 1; + if (test__extendhfxf2(fromRep16(0x3c00), + UINT64_C(0x3fff), + UINT64_C(0x8000000000000000))) + return 1; + if (test__extendhfxf2(fromRep16(0x0400), + UINT64_C(0x3ff1), + UINT64_C(0x8000000000000000))) + return 1; + // denormal + if (test__extendhfxf2(fromRep16(0x0010), + UINT64_C(0x3feb), + UINT64_C(0x8000000000000000))) + return 1; + if (test__extendhfxf2(fromRep16(0x0001), + UINT64_C(0x3fe7), + UINT64_C(0x8000000000000000))) + return 1; + if (test__extendhfxf2(fromRep16(0x8001), + UINT64_C(0xbfe7), + UINT64_C(0x8000000000000000))) + return 1; + // max (precise) + if (test__extendhfxf2(fromRep16(0x7bff), + UINT64_C(0x400e), + UINT64_C(0xffe0000000000000))) + return 1; + return 0; +} Index: compiler-rt/test/builtins/Unit/fp_test.h =================================================================== --- compiler-rt/test/builtins/Unit/fp_test.h +++ compiler-rt/test/builtins/Unit/fp_test.h @@ -38,6 +38,16 @@ return ret; } +#if __LDBL_MANT_DIG__ == 64 +static inline long double fromRep80(uint64_t hi, uint64_t lo) +{ + __uint128_t x = ((__uint128_t)hi << 64) + lo; + long double ret; + memcpy(&ret, &x, 16); + return ret; +} +#endif + #if __LDBL_MANT_DIG__ == 113 static inline long double fromRep128(uint64_t hi, uint64_t lo) { @@ -73,6 +83,15 @@ return ret; } +#if __LDBL_MANT_DIG__ == 64 +static inline __uint128_t toRep80(long double x) +{ + __uint128_t ret = 0; + memcpy(&ret, &x, sizeof(x)); + return ret; +} +#endif + #if __LDBL_MANT_DIG__ == 113 static inline __uint128_t toRep128(long double x) { @@ -136,6 +155,30 @@ return 1; } +#if __LDBL_MANT_DIG__ == 64 +// return 0 if equal +// use two 64-bit integers instead of one 80-bit integer +static inline int compareResultLD(long double result, + uint64_t expectedHi, + uint64_t expectedLo) +{ + __uint128_t rep = toRep80(result); + uint64_t hi = rep >> 64; + uint64_t lo = rep; + + if (hi == expectedHi && lo == expectedLo) { + return 0; + } + // test other possible NaN representation(signal NaN) + else if (expectedHi == 0x7fffUL && expectedLo == 0x80000000UL) { + if ((hi & 0x7fffUL) == 0x7fffUL && lo > 0) { + return 0; + } + } + return 1; +} +#endif + #if __LDBL_MANT_DIG__ == 113 // return 0 if equal // use two 64-bit integers instead of one 128-bit integer @@ -232,6 +275,13 @@ return fromRep64(0x7ff8000000000000UL); } +#if __LDBL_MANT_DIG__ == 64 +static inline long double makeQNaN80(void) +{ + return fromRep80(0x7fffUL, 0xc000000000000000UL); +} +#endif + #if __LDBL_MANT_DIG__ == 113 static inline long double makeQNaN128(void) { @@ -254,6 +304,13 @@ return fromRep64(0x7ff0000000000000UL | (rand & 0xfffffffffffffUL)); } +#if __LDBL_MANT_DIG__ == 64 +static inline long double makeNaN80(uint64_t rand) +{ + return fromRep80(0x7fffUL, 0x8000000000000000 | (rand & 0x7fffffffffffffffUL)); +} +#endif + #if __LDBL_MANT_DIG__ == 113 static inline long double makeNaN128(uint64_t rand) { @@ -286,6 +343,18 @@ return fromRep64(0xfff0000000000000UL); } +#if __LDBL_MANT_DIG__ == 64 +static inline long double makeInf80(void) +{ + return fromRep80(0x7fffUL, 0x0UL); +} + +static inline long double makeNegativeInf80(void) +{ + return fromRep80(0xffffUL, 0x0UL); +} +#endif + #if __LDBL_MANT_DIG__ == 113 static inline long double makeInf128(void) { Index: compiler-rt/test/builtins/Unit/truncxfhf2_test.c =================================================================== --- /dev/null +++ compiler-rt/test/builtins/Unit/truncxfhf2_test.c @@ -0,0 +1,104 @@ +// RUN: %clang_builtins %s %librt -o %t && %run %t +// REQUIRES: librt_has_truncxfhf2 + +#include + +#include "fp_test.h" + +TYPE_FP16 __truncxfhf2(long double a); + +int test__truncxfhf2(long double a, uint16_t expected) +{ + TYPE_FP16 x = __truncxfhf2(a); + int ret = compareResultH(x, expected); + + if (ret){ + printf("error in test__truncxfhf2(%Lf) = %#.4x, " + "expected %#.4x\n", a, toRep16(x), expected); + } + return ret; +} + +char assumption_1[sizeof(__fp16) * CHAR_BIT == 16] = {0}; + +int main() +{ + // qNaN + if (test__truncxfhf2(makeQNaN80(), + UINT16_C(0x7e00))) + return 1; + // NaN + if (test__truncxfhf2(makeNaN80(UINT64_C(0x8000)), + UINT16_C(0x7e00))) + return 1; + // inf + if (test__truncxfhf2(makeInf80(), + UINT16_C(0x7c00))) + return 1; + if (test__truncxfhf2(-makeInf80(), + UINT16_C(0xfc00))) + return 1; + // zero + if (test__truncxfhf2(0.0, UINT16_C(0x0))) + return 1; + if (test__truncxfhf2(-0.0, UINT16_C(0x8000))) + return 1; + + if (test__truncxfhf2(3.1415926535, + UINT16_C(0x4248))) + return 1; + if (test__truncxfhf2(-3.1415926535, + UINT16_C(0xc248))) + return 1; + if (test__truncxfhf2(0x1.987124876876324p+1000, + UINT16_C(0x7c00))) + return 1; + if (test__truncxfhf2(0x1.987124876876324p+12, + UINT16_C(0x6e62))) + return 1; + if (test__truncxfhf2(0x1.0p+0, + UINT16_C(0x3c00))) + return 1; + if (test__truncxfhf2(0x1.0p-14, + UINT16_C(0x0400))) + return 1; + // denormal + if (test__truncxfhf2(0x1.0p-20, + UINT16_C(0x0010))) + return 1; + if (test__truncxfhf2(0x1.0p-24, + UINT16_C(0x0001))) + return 1; + if (test__truncxfhf2(-0x1.0p-24, + UINT16_C(0x8001))) + return 1; + if (test__truncxfhf2(0x1.5p-25, + UINT16_C(0x0001))) + return 1; + // and back to zero + if (test__truncxfhf2(0x1.0p-25, + UINT16_C(0x0000))) + return 1; + if (test__truncxfhf2(-0x1.0p-25, + UINT16_C(0x8000))) + return 1; + // max (precise) + if (test__truncxfhf2(65504.0, + UINT16_C(0x7bff))) + return 1; + // max (rounded) + if (test__truncxfhf2(65519.0, + UINT16_C(0x7bff))) + return 1; + // max (to +inf) + if (test__truncxfhf2(65520.0, + UINT16_C(0x7c00))) + return 1; + if (test__truncxfhf2(-65520.0, + UINT16_C(0xfc00))) + return 1; + if (test__truncxfhf2(65536.0, + UINT16_C(0x7c00))) + return 1; + return 0; +}