Index: lib/builtins/fp_trunc.h =================================================================== --- /dev/null +++ lib/builtins/fp_trunc.h @@ -0,0 +1,64 @@ +//=== lib/fp_trunc.h - high precision -> low precision conversion *- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Set source and destination precision setting +// +//===----------------------------------------------------------------------===// + +#ifndef FP_TRUNC_HEADER +#define FP_TRUNC_HEADER + +#include "int_lib.h" + +#if defined SRC_DOUBLE +typedef double src_t; +typedef uint64_t src_rep_t; +#define SRC_REP_C UINT64_C +static const int srcSigBits = 52; + +#elif defined SRC_QUAD +typedef long double src_t; +typedef __uint128_t src_rep_t; +#define SRC_REP_C (__uint128_t) +static const int srcSigBits = 112; + +#else +#error Source should be double precision or quad precision! +#endif //end source precision + +#if defined DST_DOUBLE +typedef double dst_t; +typedef uint64_t dst_rep_t; +#define DST_REP_C UINT64_C +static const int dstSigBits = 52; + +#elif defined DST_SINGLE +typedef float dst_t; +typedef uint32_t dst_rep_t; +#define DST_REP_C UINT32_C +static const int dstSigBits = 23; + +#else +#error Destination should be single precision or double precision! +#endif //end destination precision + +// End of specialization parameters. Two helper routines for conversion to and +// from the representation of floating-point data as integer values follow. + +static inline src_rep_t srcToRep(src_t x) { + const union { src_t f; src_rep_t i; } rep = {.f = x}; + return rep.i; +} + +static inline dst_t dstFromRep(dst_rep_t x) { + const union { dst_t f; dst_rep_t i; } rep = {.i = x}; + return rep.f; +} + +#endif // FP_TRUNC_HEADER Index: lib/builtins/fp_trunc_impl.inc =================================================================== --- /dev/null +++ lib/builtins/fp_trunc_impl.inc @@ -0,0 +1,135 @@ +//= lib/fp_trunc_impl.inc - high precision -> low precision conversion *-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a fairly generic conversion from a wider to a narrower +// IEEE-754 floating-point type in the default (round to nearest, ties to even) +// rounding mode. The constants and types defined following the includes below +// parameterize the conversion. +// +// This routine can be trivially adapted to support conversions to +// half-precision or from quad-precision. It does not support types that don't +// use the usual IEEE-754 interchange formats; specifically, some work would be +// needed to adapt it to (for example) the Intel 80-bit format or PowerPC +// double-double format. +// +// Note please, however, that this implementation is only intended to support +// *narrowing* operations; if you need to convert to a *wider* floating-point +// type (e.g. float -> double), then this routine will not do what you want it +// to. +// +// It also requires that integer types at least as large as both formats +// are available on the target platform; this may pose a problem when trying +// to add support for quad on some 32-bit systems, for example. +// +// Finally, the following assumptions are made: +// +// 1. floating-point types and integer types have the same endianness on the +// target platform +// +// 2. quiet NaNs, if supported, are indicated by the leading bit of the +// significand field being set +// +//===----------------------------------------------------------------------===// + +#include "fp_trunc.h" + +static inline dst_t __truncXfYf2__(src_t a) { + /* Various constants whose values follow from the type parameters. + Any reasonable optimizer will fold and propagate all of these.*/ + const int srcBits = sizeof(src_t)*CHAR_BIT; + const int srcExpBits = srcBits - srcSigBits - 1; + const int srcInfExp = (1 << srcExpBits) - 1; + const int srcExpBias = srcInfExp >> 1; + + const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigBits; + const src_rep_t significandMask = srcMinNormal - 1; + const src_rep_t srcInfinity = (src_rep_t)srcInfExp << srcSigBits; + const src_rep_t srcSignMask = SRC_REP_C(1) << (srcSigBits + srcExpBits); + const src_rep_t srcAbsMask = srcSignMask - 1; + const src_rep_t roundMask = (SRC_REP_C(1) << (srcSigBits - dstSigBits)) - 1; + const src_rep_t halfway = SRC_REP_C(1) << (srcSigBits - dstSigBits - 1); + const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigBits - 1); + const src_rep_t srcNaNCode = srcQNaN - 1; + + const int dstBits = sizeof(dst_t)*CHAR_BIT; + const int dstExpBits = dstBits - dstSigBits - 1; + const int dstInfExp = (1 << dstExpBits) - 1; + const int dstExpBias = dstInfExp >> 1; + + const int underflowExponent = srcExpBias + 1 - dstExpBias; + const int overflowExponent = srcExpBias + dstInfExp - dstExpBias;\ + const src_rep_t underflow = (src_rep_t)underflowExponent << srcSigBits; + const src_rep_t overflow = (src_rep_t)overflowExponent << srcSigBits; + + const dst_rep_t dstQNaN = DST_REP_C(1) << (dstSigBits - 1); + const dst_rep_t dstNaNCode = dstQNaN - 1; + + /* Break a into a sign and representation of the absolute value*/ + const src_rep_t aRep = srcToRep(a); + const src_rep_t aAbs = aRep & srcAbsMask; + const src_rep_t sign = aRep & srcSignMask; + dst_rep_t absResult; + + if (aAbs - underflow < aAbs - overflow) { + /* The exponent of a is within the range of normal numbers in the + destination format. We can convert by simply right-shifting with + rounding and adjusting the exponent.*/ + absResult = aAbs >> (srcSigBits - dstSigBits); + absResult -= (dst_rep_t)(srcExpBias - dstExpBias) << dstSigBits; + + const src_rep_t roundBits = aAbs & roundMask; + /* Round to nearest*/ + if (roundBits > halfway) + absResult++; + /* Ties to even*/ + else if (roundBits == halfway) + absResult += absResult & 1; + } + else if (aAbs > srcInfinity) { + /* a is NaN. + Conjure the result by beginning with infinity, setting the qNaN + bit and inserting the (truncated) trailing NaN field.*/ + absResult = (dst_rep_t)dstInfExp << dstSigBits; + absResult |= dstQNaN; + absResult |= ((aAbs & srcNaNCode) >> (srcSigBits - dstSigBits)) & dstNaNCode; + } + else if (aAbs > overflow) { + /* a overflows to infinity.*/ + absResult = (dst_rep_t)dstInfExp << dstSigBits; + } + else { + /* a underflows on conversion to the destination type or is an exact + zero. The result may be a denormal or zero. Extract the exponent + to get the shift amount for the denormalization.*/ + const int aExp = aAbs >> srcSigBits; + const int shift = srcExpBias - dstExpBias - aExp + 1; + + const src_rep_t significand = (aRep & significandMask) | srcMinNormal; + + /* Right shift by the denormalization amount with sticky.*/ + if (shift > srcSigBits) { + absResult = 0; + } else { + const bool sticky = significand << (srcBits - shift); + src_rep_t denormalizedSignificand = significand >> shift | sticky; + absResult = denormalizedSignificand >> (srcSigBits - dstSigBits); + const src_rep_t roundBits = denormalizedSignificand & roundMask; + /* Round to nearest*/ + if (roundBits > halfway) + absResult++; + /* Ties to even*/ + else if (roundBits == halfway) + absResult += absResult & 1; + } + } + + /* Apply the signbit to (dst_t)abs(a).*/ + const dst_rep_t result = absResult | sign >> (srcBits - dstBits); + return dstFromRep(result); +} Index: lib/builtins/truncdfsf2.c =================================================================== --- lib/builtins/truncdfsf2.c +++ lib/builtins/truncdfsf2.c @@ -6,163 +6,13 @@ // Source Licenses. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// This file implements a fairly generic conversion from a wider to a narrower -// IEEE-754 floating-point type in the default (round to nearest, ties to even) -// rounding mode. The constants and types defined following the includes below -// parameterize the conversion. -// -// This routine can be trivially adapted to support conversions to -// half-precision or from quad-precision. It does not support types that don't -// use the usual IEEE-754 interchange formats; specifically, some work would be -// needed to adapt it to (for example) the Intel 80-bit format or PowerPC -// double-double format. -// -// Note please, however, that this implementation is only intended to support -// *narrowing* operations; if you need to convert to a *wider* floating-point -// type (e.g. float -> double), then this routine will not do what you want it -// to. -// -// It also requires that integer types at least as large as both formats -// are available on the target platform; this may pose a problem when trying -// to add support for quad on some 32-bit systems, for example. -// -// Finally, the following assumptions are made: -// -// 1. floating-point types and integer types have the same endianness on the -// target platform -// -// 2. quiet NaNs, if supported, are indicated by the leading bit of the -// significand field being set -// -//===----------------------------------------------------------------------===// -#include "int_lib.h" - -typedef double src_t; -typedef uint64_t src_rep_t; -#define SRC_REP_C UINT64_C -static const int srcSigBits = 52; - -typedef float dst_t; -typedef uint32_t dst_rep_t; -#define DST_REP_C UINT32_C -static const int dstSigBits = 23; - -// End of specialization parameters. Two helper routines for conversion to and -// from the representation of floating-point data as integer values follow. - -static inline src_rep_t srcToRep(src_t x) { - const union { src_t f; src_rep_t i; } rep = {.f = x}; - return rep.i; -} - -static inline dst_t dstFromRep(dst_rep_t x) { - const union { dst_t f; dst_rep_t i; } rep = {.i = x}; - return rep.f; -} - -// End helper routines. Conversion implementation follows. +#define SRC_DOUBLE +#define DST_SINGLE +#include "fp_trunc_impl.inc" ARM_EABI_FNALIAS(d2f, truncdfsf2) -COMPILER_RT_ABI dst_t -__truncdfsf2(src_t a) { - - // Various constants whose values follow from the type parameters. - // Any reasonable optimizer will fold and propagate all of these. - const int srcBits = sizeof(src_t)*CHAR_BIT; - const int srcExpBits = srcBits - srcSigBits - 1; - const int srcInfExp = (1 << srcExpBits) - 1; - const int srcExpBias = srcInfExp >> 1; - - const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigBits; - const src_rep_t significandMask = srcMinNormal - 1; - const src_rep_t srcInfinity = (src_rep_t)srcInfExp << srcSigBits; - const src_rep_t srcSignMask = SRC_REP_C(1) << (srcSigBits + srcExpBits); - const src_rep_t srcAbsMask = srcSignMask - 1; - const src_rep_t roundMask = (SRC_REP_C(1) << (srcSigBits - dstSigBits)) - 1; - const src_rep_t halfway = SRC_REP_C(1) << (srcSigBits - dstSigBits - 1); - - const int dstBits = sizeof(dst_t)*CHAR_BIT; - const int dstExpBits = dstBits - dstSigBits - 1; - const int dstInfExp = (1 << dstExpBits) - 1; - const int dstExpBias = dstInfExp >> 1; - - const int underflowExponent = srcExpBias + 1 - dstExpBias; - const int overflowExponent = srcExpBias + dstInfExp - dstExpBias; - const src_rep_t underflow = (src_rep_t)underflowExponent << srcSigBits; - const src_rep_t overflow = (src_rep_t)overflowExponent << srcSigBits; - - const dst_rep_t dstQNaN = DST_REP_C(1) << (dstSigBits - 1); - const dst_rep_t dstNaNCode = dstQNaN - 1; - - // Break a into a sign and representation of the absolute value - const src_rep_t aRep = srcToRep(a); - const src_rep_t aAbs = aRep & srcAbsMask; - const src_rep_t sign = aRep & srcSignMask; - dst_rep_t absResult; - - if (aAbs - underflow < aAbs - overflow) { - // The exponent of a is within the range of normal numbers in the - // destination format. We can convert by simply right-shifting with - // rounding and adjusting the exponent. - absResult = aAbs >> (srcSigBits - dstSigBits); - absResult -= (dst_rep_t)(srcExpBias - dstExpBias) << dstSigBits; - - const src_rep_t roundBits = aAbs & roundMask; - - // Round to nearest - if (roundBits > halfway) - absResult++; - - // Ties to even - else if (roundBits == halfway) - absResult += absResult & 1; - } - - else if (aAbs > srcInfinity) { - // a is NaN. - // Conjure the result by beginning with infinity, setting the qNaN - // bit and inserting the (truncated) trailing NaN field. - absResult = (dst_rep_t)dstInfExp << dstSigBits; - absResult |= dstQNaN; - absResult |= aAbs & dstNaNCode; - } - - else if (aAbs > overflow) { - // a overflows to infinity. - absResult = (dst_rep_t)dstInfExp << dstSigBits; - } - - else { - // a underflows on conversion to the destination type or is an exact - // zero. The result may be a denormal or zero. Extract the exponent - // to get the shift amount for the denormalization. - const int aExp = aAbs >> srcSigBits; - const int shift = srcExpBias - dstExpBias - aExp + 1; - - const src_rep_t significand = (aRep & significandMask) | srcMinNormal; - - // Right shift by the denormalization amount with sticky. - if (shift > srcSigBits) { - absResult = 0; - } else { - const bool sticky = significand << (srcBits - shift); - src_rep_t denormalizedSignificand = significand >> shift | sticky; - absResult = denormalizedSignificand >> (srcSigBits - dstSigBits); - const src_rep_t roundBits = denormalizedSignificand & roundMask; - // Round to nearest - if (roundBits > halfway) - absResult++; - // Ties to even - else if (roundBits == halfway) - absResult += absResult & 1; - } - } - - // Apply the signbit to (dst_t)abs(a). - const dst_rep_t result = absResult | sign >> (srcBits - dstBits); - return dstFromRep(result); - +float __truncdfsf2(double a) { + return __truncXfYf2__(a); } Index: lib/builtins/trunctfdf2.c =================================================================== --- /dev/null +++ lib/builtins/trunctfdf2.c @@ -0,0 +1,21 @@ +//===-- lib/truncdfsf2.c - quad -> double conversion --------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" +#ifdef CRT_HAS_128BIT + +#define SRC_QUAD +#define DST_DOUBLE +#include "fp_trunc_impl.inc" + +double __trunctfdf2(long double a) { + return __truncXfYf2__(a); +} + +#endif Index: lib/builtins/trunctfsf2.c =================================================================== --- /dev/null +++ lib/builtins/trunctfsf2.c @@ -0,0 +1,21 @@ +//===-- lib/trunctfsf2.c - quad -> single conversion --------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" +#ifdef CRT_HAS_128BIT + +#define SRC_QUAD +#define DST_SINGLE +#include "fp_trunc_impl.inc" + +float __trunctfsf2(long double a) { + return __truncXfYf2__(a); +} + +#endif Index: test/builtins/Unit/trunctfdf2_test.c =================================================================== --- /dev/null +++ test/builtins/Unit/trunctfdf2_test.c @@ -0,0 +1,120 @@ +//===-------------- trunctfdf2_test.c - Test __trunctfdf2 -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file tests __trunctfdf2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include + +#if __LP64__ + +#include +#include + +static inline double fromRep64(uint64_t x) +{ + const union {double f; uint64_t i; } rep = {.i = x}; + return rep.f; +} + +static inline long double fromRep128(uint64_t hi, uint64_t lo) +{ + __uint128_t x = ((__uint128_t)hi << 64) + lo; + const union {long double f; __uint128_t i; } rep = {.i = x}; + return rep.f; +} + +static inline uint64_t toRep(double x) +{ + const union {double f; uint64_t i;} rep = {.f = x}; + return rep.i; +} + +// return 0 if equal +// use two 64-bit integers intead of one 128-bit integer +// because 128-bit integer constant can't be assigned directly +static inline int compareResult(double result, + uint64_t expected) +{ + uint64_t rep = toRep(result); + + if (rep == expected){ + return 0; + } + // test other posible NaN representation(signal NaN) + else if (expected == 0x7ff8000000000000UL){ + if ((rep & 0x7ff0000000000000UL) == 0x7ff0000000000000UL && + (rep & 0xfffffffffffffUL) > 0){ + return 0; + } + } + return 1; +} + +double __trunctfdf2(long double a); + +int test__trunctfdf2(long double a, uint64_t expected) +{ + double x = __trunctfdf2(a); + int ret = compareResult(x, expected); + + if (ret) + { + printf("error in test__trunctfdf2(%.20Lf) = %lf, " + "expected %lf\n", a, x, fromRep64(expected)); + } + return ret; +} + +char assumption_1[sizeof(long double) * CHAR_BIT == 128] = {0}; + +#endif + +int main() +{ +#if __LP64__ + // qNaN + if (test__trunctfdf2(fromRep128(0x7fff800000000000UL, 0x0UL), + 0x7ff8000000000000UL)) + return 1; + // NaN + if (test__trunctfdf2(fromRep128(0x7fff810000000000UL, 0x0UL), + 0x7ff8100000000000UL)) + return 1; + if (test__trunctfdf2(fromRep128(0x7fff700000000000UL, 0x0UL), + 0x7fff000000000000UL)) + return 1; + // inf + if (test__trunctfdf2(fromRep128(0x7fff000000000000UL, 0x0UL), + 0x7ff0000000000000UL)) + return 1; + // zero + if (test__trunctfdf2(0.0L, 0x0UL)) + return 1; + + if (test__trunctfdf2(0x1.af23456789bbaaab347645365cdep+5L, + 0x404af23456789bbbUL)) + return 1; + if (test__trunctfdf2(0x1.dedafcff354b6ae9758763545432p-9L, + 0x3f6dedafcff354b7UL)) + return 1; + if (test__trunctfdf2(0x1.2f34dd5f437e849b4baab754cdefp+4534L, + 0x7ff0000000000000UL)) + return 1; + if (test__trunctfdf2(0x1.edcbff8ad76ab5bf46463233214fp-435L, + 0x24cedcbff8ad76abUL)) + return 1; + +#else + printf("skipped\n"); + +#endif + return 0; +} Index: test/builtins/Unit/trunctfsf2_test.c =================================================================== --- /dev/null +++ test/builtins/Unit/trunctfsf2_test.c @@ -0,0 +1,118 @@ +//===--------------- trunctfsf2_test.c - Test __trunctfsf2 ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file tests __trunctfsf2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include + +#if __LP64__ + +#include +#include + +static inline float fromRep32(uint32_t x) +{ + const union {float f; uint32_t i; } rep = {.i = x}; + return rep.f; +} + +static inline long double fromRep128(uint64_t hi, uint64_t lo) +{ + __uint128_t x = ((__uint128_t)hi << 64) + lo; + const union {long double f; __uint128_t i; } rep = {.i = x}; + return rep.f; +} + +static inline uint32_t toRep(float x) +{ + const union {float f; uint32_t i;} rep = {.f = x}; + return rep.i; +} + +// return 0 if equal +// use two 64-bit integers intead of one 128-bit integer +// because 128-bit integer constant can't be assigned directly +static inline int compareResult(float result, + uint32_t expected) +{ + uint32_t rep = toRep(result); + + if (rep == expected){ + return 0; + } + // test other posible NaN representation(signal NaN) + else if (expected == 0x7fc00000U){ + if ((rep & 0x7f800000U) == 0x7f800000U && + (rep & 0x7fffffU) > 0){ + return 0; + } + } + return 1; +} + +float __trunctfsf2(long double a); + +int test__trunctfsf2(long double a, uint32_t expected) +{ + float x = __trunctfsf2(a); + int ret = compareResult(x, expected); + + if (ret){ + printf("error in test__trunctfsf2(%.20Lf) = %f, " + "expected %f\n", a, x, fromRep32(expected)); + } + return ret; +} + +char assumption_1[sizeof(long double) * CHAR_BIT == 128] = {0}; + +#endif + +int main() +{ +#if __LP64__ + // qNaN + if (test__trunctfsf2(fromRep128(0x7fff800000000000UL, 0x0UL), + 0x7fc00000U)) + return 1; + // NaN + if (test__trunctfsf2(fromRep128(0x7fff810000000000UL, 0x0UL), + 0x7fc08000U)) + return 1; + if (test__trunctfsf2(fromRep128(0x7fff700000000000UL, 0x0UL), + 0x7ff80000U)) + return 1; + // inf + if (test__trunctfsf2(fromRep128(0x7fff000000000000UL, 0x0UL), + 0x7f800000U)) + return 1; + // zero + if (test__trunctfsf2(0.0L, 0x0U)) + return 1; + + if (test__trunctfsf2(0x1.23a2abb4a2ddee355f36789abcdep+5L, + 0x4211d156U)) + return 1; + if (test__trunctfsf2(0x1.e3d3c45bd3abfd98b76a54cc321fp-9L, + 0x3b71e9e2U)) + return 1; + if (test__trunctfsf2(0x1.234eebb5faa678f4488693abcdefp+4534L, + 0x7f800000U)) + return 1; + if (test__trunctfsf2(0x1.edcba9bb8c76a5a43dd21f334634p-435L, 0x0U)) + return 1; + +#else + printf("skipped\n"); + +#endif + return 0; +}