diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -98,6 +98,8 @@ floatuntidf.c floatuntisf.c int_util.c + log10scaledsi3.c + log10scaleddi3.c lshrdi3.c lshrti3.c moddi3.c diff --git a/compiler-rt/lib/builtins/log10scaleddi3.c b/compiler-rt/lib/builtins/log10scaleddi3.c new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/builtins/log10scaleddi3.c @@ -0,0 +1,198 @@ +//===------- lib/log10scaleddi3.c - Scaled Integer log10() --------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements log10() for 64 bit scaled integers, with a scale of at +// most 60. This implementation is based on Clay. S. Turner's fast binary +// logarithm algorithm. If the true result of log10() cannot be precisely stored +// in the result type, the value is rounded down towards negative infinity. +// Only up to 60 bits are allowed for the scale since 4 bits are required to +// reppresent 10. +// +//===----------------------------------------------------------------------===// + +#include +#include + +#include "int_lib.h" + +#define BASE 10 + +#if !defined(__SIZEOF_INT128__) +#define loWord(a) (a & 0xffffffffU) +#define hiWord(a) (a >> 32) + +// 64x64 -> 128 wide multiply for platforms that don't have such an operation. +static void wideMultiply(uint64_t a, uint64_t b, uint64_t *hi, uint64_t *lo) { + // Each of the component 32x32 -> 64 products + const uint64_t plolo = loWord(a) * loWord(b); + const uint64_t plohi = loWord(a) * hiWord(b); + const uint64_t philo = hiWord(a) * loWord(b); + const uint64_t phihi = hiWord(a) * hiWord(b); + // Sum terms that contribute to lo in a way that allows us to get the carry + const uint64_t r0 = loWord(plolo); + const uint64_t r1 = hiWord(plolo) + loWord(plohi) + loWord(philo); + *lo = r0 + (r1 << 32); + // Sum terms contributing to hi with the carry from lo + *hi = hiWord(plohi) + hiWord(philo) + hiWord(r1) + phihi; +} + +// 128 bit funnel shift right from a 64 bit hi and lo. +static void funnelShiftRight(uint64_t hi, uint64_t lo, uint32_t shift, + uint64_t *res_hi, uint64_t *res_lo) { + if (shift >= 128) { + *res_hi = 0; + *res_lo = 0; + } else if (shift > 64) { + *res_hi = 0; + *res_lo = hi >> (shift - 64); + } else if (shift == 64) { + *res_hi = 0; + *res_lo = hi; + } else if (shift > 0) { + *res_hi = hi >> shift; + *res_lo = (hi << (64 - shift)) | (lo >> shift); + } else { + *res_hi = hi; + *res_lo = lo; + } +} + +#define NUM_BITS 64 + +void div10(uint64_t hi, uint64_t lo, uint64_t *res_hi, uint64_t *res_lo) { + uint64_t quot = 0, rem = 0; + + for (unsigned i = 0; i < NUM_BITS; ++i) { + quot <<= 1; + rem <<= 1; + rem |= (hi >> ((NUM_BITS - 1) - i)) & 1; + + if (rem >= BASE) { + rem -= BASE; + quot |= 1; + } + } + + *res_hi = quot; + + for (unsigned i = 0; i < NUM_BITS; ++i) { + quot <<= 1; + rem <<= 1; + rem |= (lo >> ((NUM_BITS - 1) - i)) & 1; + + if (rem >= BASE) { + rem -= BASE; + quot |= 1; + } + } + + *res_lo = quot; +} + +#endif + +// Returns: log10(x), rounded down towards negative infinity in the scale +// provided in the 2nd argument. If x is zero, INT64_MIN is returned. +// If the true result cannot fit inside a signed 64 bit integer, +// INT64_MIN is returned. This only affects large, negative results. +// If the scale is greater than 60, INT64_MAX is returned to respresent +// an invalid scale. +// +// Assumptions: x is represents an unsigned scaled integer. When the scale is 0, +// this becomes a regular integer log10() function. The scale must +// be at most 60 since we will need to respresent a scaled 10 (10 +// << scale) in an unsigned 64 bit integer. + +int64_t __log10scaleddi3(uint64_t x, uint32_t scale) { + // An input of zero will return the smallest value possible to represent + // negative infinity. + if (x == 0) + return INT64_MIN; + + // We must be able to represent a scaled 10 in the bit width we have now. + // In order to fit a scaled value of 10 in 64 bits, we need 4 bits for the + // integer, leaving 60 bits for the scale. + if (scale > 60) + return INT64_MAX; + + uint64_t oneval = UINT64_C(1) << scale; + uint64_t baseval = (uint64_t)BASE << scale; + int64_t y = 0; + + // Normalize x up to at least 1. + while (x < oneval) { + x *= BASE; + --y; + } + + // Normalize x to at most 10. + while (x >= baseval) { + x /= BASE; + ++y; + } + + // Return early since there is not fractional part. + if (scale == 0) + return y; + + // We overflowed past lowest value we can represent. + if (y < (INT64_MIN >> scale)) + return INT64_MIN; + + y <<= scale; + + // At this point, the value of res is between a scaled 1 and 10. + // Here we calculate the fractional part of the result. + int64_t b = INT64_C(1) << (scale - 1); + +#if defined(__SIZEOF_INT128__) + __uint128_t res = x; + for (size_t i = 0; i < scale; i++) { + assert(res <= UINT64_MAX); + res = res * res >> (scale - 1); + + // In order to remain as close as possible to the correct solution, we + // attempt to round using the bit before the LSB. + int roundbit = res & 1; + res >>= 1; + if (roundbit) + ++res; + + if (res >= baseval) { + res /= BASE; + y += b; + } + b >>= 1; + } +#else + uint64_t res_hi = 0, res_lo = x; + for (size_t i = 0; i < scale; i++) { + assert(res_hi == 0); + wideMultiply(res_lo, res_lo, &res_hi, &res_lo); + funnelShiftRight(res_hi, res_lo, scale - 1, &res_hi, &res_lo); + + // In order to remain as close as possible to the correct solution, we + // attempt to round using the bit before the LSB. + int roundbit = res_lo & 1; + funnelShiftRight(res_hi, res_lo, 1, &res_hi, &res_lo); + if (roundbit) { + ++res_lo; + if (res_lo == 0) // overflow + ++res_hi; + } + + if (res_hi > 0 || res_lo >= baseval) { + div10(res_hi, res_lo, &res_hi, &res_lo); + y += b; + } + b >>= 1; + } +#endif + + return y; +} diff --git a/compiler-rt/lib/builtins/log10scaledsi3.c b/compiler-rt/lib/builtins/log10scaledsi3.c new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/builtins/log10scaledsi3.c @@ -0,0 +1,97 @@ +//===------- lib/log10scaledsi3.c - Scaled Integer log10() --------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements log10() for 32 bit scaled integers, with a scale of at +// most 28. This implementation is based on Clay. S. Turner's fast binary +// logarithm algorithm. If the true result of log10() cannot be precisely stored +// in the result type, the value is rounded down towards negative infinity. +// Only up to 28 bits are allowed for the scale since 4 bits are required to +// represent 10. +// +//===----------------------------------------------------------------------===// + +#include + +#include "int_lib.h" + +#define BASE 10 + +// Returns: log10(x), rounded down towards negative infinity in the scale +// provided in the 2nd argument. If x is zero, INT32_MIN is returned. +// If the true result cannot fit inside a signed 32 bit integer, +// INT32_MIN is returned. This only affects large, negative results. +// If the scale is greater than 28, INT32_MAX is returned to respresent +// an invalid scale. +// +// Assumptions: x is represents an unsigned scaled integer. When the scale is 0, +// this becomes a regular integer log10() function. The scale must +// be at most 28 since we will need to respresent a scaled 10 (10 +// << scale) in an unsigned 28 bit integer. + +int32_t __log10scaledsi3(uint32_t x, uint32_t scale) { + // An input of zero will return the smallest value possible to represent + // negative infinity. + if (x == 0) + return INT32_MIN; + + // We must be able to represent a scaled 10 in the bit width we have now. + // In order to fit a scaled value of 10 in 64 bits, we need 4 bits for the + // integer, leaving 60 bits for the scale. + if (scale > 28) + return INT32_MAX; + + uint32_t oneval = UINT32_C(1) << scale; + uint32_t baseval = (uint32_t)BASE << scale; + int32_t y = 0; + + // Normalize x up to at least 1. + while (x < oneval) { + x *= BASE; + --y; + } + + // Normalize x to at most 10. + while (x >= baseval) { + x /= BASE; + ++y; + } + + // Return early since there is not fractional part. + if (scale == 0) + return y; + + // We overflowed past lowest value we can represent. + if (y < (INT32_MIN >> scale)) + return INT32_MIN; + + y <<= scale; + + // At this point, the value of res is between a scaled 1 and 10. + // Here we calculate the fractional part of the result. + int32_t b = INT32_C(1) << (scale - 1); + + uint64_t res = x; + for (size_t i = 0; i < scale; i++) { + res = res * res >> (scale - 1); + + // In order to remain as close as possible to the correct solution, we + // attempt to round using the bit before the LSB. + int roundbit = res & 1; + res >>= 1; + if (roundbit) + ++res; + + if (res >= baseval) { + res /= BASE; + y += b; + } + b >>= 1; + } + + return y; +} diff --git a/compiler-rt/test/builtins/Unit/log10scaledint.c b/compiler-rt/test/builtins/Unit/log10scaledint.c new file mode 100644 --- /dev/null +++ b/compiler-rt/test/builtins/Unit/log10scaledint.c @@ -0,0 +1,241 @@ +// RUN: %clang_builtins %s %librt -o %t && %run %t + +#include +#include + +#define CHECK_SUCCESS(x) \ + if (x) \ + return 1; + +#define SCALE 31 +#define SCALED_ONE (UINT64_C(1) << SCALE) +#define __log10_64(x) __log10scaleddi3(x, SCALE) + +#define CHECK_LOG10_64_SCALE(VAL, SCALE, EXPECTED) \ + if (__log10scaleddi3(VAL, SCALE) != EXPECTED) { \ + printf( \ + "error in __log10scaleddi3(%llu, %u) on line %d. Expected %lld, found %lld.\n", \ + VAL, SCALE, __LINE__, EXPECTED, __log10scaleddi3(VAL, SCALE)); \ + return 1; \ + } + +#define CHECK_LOG10_64(VAL, EXPECTED) \ + if (__log10_64(VAL) != EXPECTED) { \ + printf( \ + "error in __log10scaleddi3(%llu, %u) on line %d. Expected %lld, found %lld.\n", \ + VAL, SCALE, __LINE__, EXPECTED, __log10_64(VAL)); \ + return 1; \ + } + +int64_t __log10scaleddi3(uint64_t x, uint32_t scale); +int32_t __log10scaledsi3(uint32_t x, uint32_t scale); + +int check_log10_64() { + // Powers of 10. + // These values will not have the precision necessary to represent the true + // return values of 1og10() since reciprocal powers of 10 cannot be evenly + // represented with sums of recirpocal powers of 2 (1/10^n cannot be + // represented exactly with sums of 1/2^m). This is a binary log function so + // we can only attempt to represent powers of 10 as close as possible to sums + // of powers of 2. + // For example, we cannot get -3 exactly from log10(0.001), but we can + // precisely get log10((1 << SCALE) / 1000) and get a value close to -3 << + // SCALE. The error between this close result and the true value of -3 << + // SCALE will always be at most 1 if it is rounded. + CHECK_LOG10_64(SCALED_ONE / 10, + -2147483652); // -2147483652 / 2^SCALE => -1.000... + CHECK_LOG10_64(SCALED_ONE / 100, -4294967317); + CHECK_LOG10_64(SCALED_ONE / 1000, -6442451226); + CHECK_LOG10_64(SCALED_ONE / 10000, -8589936177); + + // These should produce precise whole integer results since the scaled integer + // input is not a fraction. + CHECK_LOG10_64(SCALED_ONE, 0); + CHECK_LOG10_64(10 * SCALED_ONE, SCALED_ONE); + CHECK_LOG10_64(100 * SCALED_ONE, 2 * SCALED_ONE); + CHECK_LOG10_64(1000 * SCALED_ONE, 3 * SCALED_ONE); + CHECK_LOG10_64(10000 * SCALED_ONE, 4 * SCALED_ONE); + + // Sums of powers of 2 + CHECK_LOG10_64(SCALED_ONE / 2 + SCALED_ONE / 4, -268303894); + CHECK_LOG10_64(SCALED_ONE / 2 + SCALED_ONE / 4 + SCALED_ONE / 8, -124536758); + CHECK_LOG10_64(SCALED_ONE / 2 + SCALED_ONE / 4 + SCALED_ONE / 8 + + SCALED_ONE / 16, + -60191226); + CHECK_LOG10_64(SCALED_ONE / 8 + SCALED_ONE / 16, -1561217881); + + // Integers + CHECK_LOG10_64(3 * SCALED_ONE, 1024610092); + CHECK_LOG10_64(5 * SCALED_ONE, 1501026654); + CHECK_LOG10_64(7 * SCALED_ONE, 1814834221); + CHECK_LOG10_64(11 * SCALED_ONE, 2236373762); + + CHECK_LOG10_64(3 * SCALED_ONE + SCALED_ONE / 2 + SCALED_ONE / 4, 1232722760); + CHECK_LOG10_64(5 * SCALED_ONE + SCALED_ONE / 2 + SCALED_ONE / 4 + SCALED_ONE / 8, + 1651431827); + CHECK_LOG10_64(7 * SCALED_ONE + SCALED_ONE / 2 + SCALED_ONE / 4 + + SCALED_ONE / 8 + SCALED_ONE / 16, + 1932056116); + CHECK_LOG10_64(11 * SCALED_ONE + SCALED_ONE / 8 + SCALED_ONE / 16, 2252137072); + + // Other numbers that can't evenly be broken into limited sums of powers of 2 + CHECK_LOG10_64(6746518852, 1067621222); // pi + CHECK_LOG10_64(5837465777, 932640298); // e + CHECK_LOG10_64(3037000499, 323228496); // sqrt(2) + + // Zero returns min val to represent -infinity + CHECK_LOG10_64(0, INT64_MIN); + + // Max + CHECK_LOG10_64(UINT64_MAX, 21333080777); + + // Min + CHECK_LOG10_64(1, -20040166791); + CHECK_LOG10_64(2, -19393709798); + + // Scale of zero results in integr log10(). + CHECK_LOG10_64_SCALE(0, 0, INT64_MIN); + CHECK_LOG10_64_SCALE(1, 0, 0); + CHECK_LOG10_64_SCALE(9, 0, 0); + CHECK_LOG10_64_SCALE(10, 0, 1); + CHECK_LOG10_64_SCALE(UINT64_MAX, 0, 19); + + // Small scales. Large values. + CHECK_LOG10_64_SCALE(UINT64_MAX, 1, 37); + CHECK_LOG10_64_SCALE(UINT64_MAX, 2, 74); + + // Large scales. Large values. + CHECK_LOG10_64_SCALE(UINT64_C(1) << 49, 50, -338929644074912); + CHECK_LOG10_64_SCALE(UINT64_C(1) << 54, 55, -10845748610397182); + CHECK_LOG10_64_SCALE(UINT64_C(1) << 57, 58, -86765988883177456); + CHECK_LOG10_64_SCALE(UINT64_C(1) << 58, 59, -173531977766354911); + CHECK_LOG10_64_SCALE(UINT64_C(1) << 59, 60, -347063955532709821); + + // At this point, we are no longer able to fit a scaled 10 in 64 bits. + CHECK_LOG10_64_SCALE(UINT64_C(1) << 60, 61, INT64_MAX); + CHECK_LOG10_64_SCALE(1, 63, INT64_MAX); + CHECK_LOG10_64_SCALE(1, 64, INT64_MAX); + CHECK_LOG10_64_SCALE(0, 64, INT64_MIN); // Return min bc input is still zero. + + // Large scales. Small values. + CHECK_LOG10_64_SCALE(1, 60, INT64_MIN); // Cannot represent 1 / 2^60 in 64 bits. + CHECK_LOG10_64_SCALE(UINT64_C(1) << 40, 60, -6941279110654196416); + CHECK_LOG10_64_SCALE(UINT64_C(1) << 34, 60, -9023662843850455340); + CHECK_LOG10_64_SCALE(UINT64_C(1) << 33, 60, INT64_MIN); + + return 0; +} + +#undef SCALE +#undef SCALED_ONE + +#define SCALE 15 +#define SCALED_ONE (UINT32_C(1) << SCALE) +#define __log10_32(x) __log10scaledsi3(x, SCALE) + +#define CHECK_LOG10_32_SCALE(VAL, SCALE, EXPECTED) \ + if (__log10scaledsi3(VAL, SCALE) != EXPECTED) { \ + printf( \ + "error in __log10scaledsi3(%d, %u) on line %d. Expected %d, found %d.\n", \ + VAL, SCALE, __LINE__, EXPECTED, __log10scaledsi3(VAL, SCALE)); \ + return 1; \ + } + +#define CHECK_LOG10_32(VAL, EXPECTED) \ + if (__log10_32(VAL) != EXPECTED) { \ + printf( \ + "error in __log10scaledsi3(%d, %u) on line %d. Expected %d, found %d.\n", \ + VAL, SCALE, __LINE__, EXPECTED, __log10_32(VAL)); \ + return 1; \ + } + +// Same as the previous test but for 32 bit ints. +int check_log10_32() { + // Powers of 10. + CHECK_LOG10_32(SCALED_ONE / 10, -32772); // -32772 / 2^SCALE => -1.000... + CHECK_LOG10_32(SCALED_ONE / 100, -65566); + CHECK_LOG10_32(SCALED_ONE / 1000, -98642); + CHECK_LOG10_32(SCALED_ONE / 10000, -132328); + + // These should produce precise whole integer results since the scaled integer + // input is not a fraction. + CHECK_LOG10_32(SCALED_ONE, 0); + CHECK_LOG10_32(10 * SCALED_ONE, SCALED_ONE); + CHECK_LOG10_32(100 * SCALED_ONE, 2 * SCALED_ONE); + CHECK_LOG10_32(1000 * SCALED_ONE, 3 * SCALED_ONE); + CHECK_LOG10_32(10000 * SCALED_ONE, 4 * SCALED_ONE); + + // Sums of powers of 2 + CHECK_LOG10_32(SCALED_ONE / 2 + SCALED_ONE / 4, -4094); + CHECK_LOG10_32(SCALED_ONE / 2 + SCALED_ONE / 4 + SCALED_ONE / 8, -1901); + CHECK_LOG10_32(SCALED_ONE / 2 + SCALED_ONE / 4 + SCALED_ONE / 8 + + SCALED_ONE / 16, + -919); + CHECK_LOG10_32(SCALED_ONE / 8 + SCALED_ONE / 16, -23823); + + // Integers + CHECK_LOG10_32(3 * SCALED_ONE, 15634); + CHECK_LOG10_32(5 * SCALED_ONE, 22903); + CHECK_LOG10_32(7 * SCALED_ONE, 27692); + CHECK_LOG10_32(11 * SCALED_ONE, 34124); + + CHECK_LOG10_32(3 * SCALED_ONE + SCALED_ONE / 2 + SCALED_ONE / 4, 18809); + CHECK_LOG10_32(5 * SCALED_ONE + SCALED_ONE / 2 + SCALED_ONE / 4 + SCALED_ONE / 8, + 25198); + CHECK_LOG10_32(7 * SCALED_ONE + SCALED_ONE / 2 + SCALED_ONE / 4 + + SCALED_ONE / 8 + SCALED_ONE / 16, + 29480); + CHECK_LOG10_32(11 * SCALED_ONE + SCALED_ONE / 8 + SCALED_ONE / 16, 34364); + + // Other numbers that can't evenly be broken into limited sums of powers of 2 + CHECK_LOG10_32(102943, 16290); // pi + CHECK_LOG10_32(89072, 14230); // e + CHECK_LOG10_32(46340, 4931); // sqrt(2) + + // Zero returns min val to represent -infinity + CHECK_LOG10_32(0, INT32_MIN); + + // Max + CHECK_LOG10_32(UINT32_MAX, 167690); + + // Min + CHECK_LOG10_32(1, -147963); + CHECK_LOG10_32(2, -138099); + + // Scale of zero results in integr log10(). + CHECK_LOG10_32_SCALE(0, 0, INT32_MIN); + CHECK_LOG10_32_SCALE(1, 0, 0); + CHECK_LOG10_32_SCALE(9, 0, 0); + CHECK_LOG10_32_SCALE(10, 0, 1); + CHECK_LOG10_32_SCALE(UINT32_MAX, 0, 9); + + // Small scales. Large values. + CHECK_LOG10_32_SCALE(UINT32_MAX, 1, 18); + CHECK_LOG10_32_SCALE(UINT32_MAX, 2, 36); + + // Large scales. Large values. + CHECK_LOG10_32_SCALE(UINT32_C(1) << 23, 24, -5050446); + CHECK_LOG10_32_SCALE(UINT32_C(1) << 24, 25, -10100891); + CHECK_LOG10_32_SCALE(UINT32_C(1) << 25, 26, -20201782); + CHECK_LOG10_32_SCALE(UINT32_C(1) << 26, 27, -40403563); + CHECK_LOG10_32_SCALE(UINT32_C(1) << 27, 28, -80807125); + + // At this point, we are no longer able to fit a scaled 10 in 64 bits. + CHECK_LOG10_32_SCALE(UINT32_C(1) << 28, 29, INT32_MAX); + CHECK_LOG10_32_SCALE(1, 32, INT32_MAX); + CHECK_LOG10_32_SCALE(1, 64, INT32_MAX); + CHECK_LOG10_32_SCALE(0, 64, INT32_MIN); // Return min bc input is still zero. + + // Large scales. Small values. + CHECK_LOG10_32_SCALE(1, 28, INT32_MIN); // Cannot represent 1 / 2^28 in 32 bits. + CHECK_LOG10_32_SCALE(UINT32_C(1) << 2, 28, -2100985229); + CHECK_LOG10_32_SCALE(UINT32_C(1) << 1, 28, INT32_MIN); + + return 0; +} + +int main() { + CHECK_SUCCESS(check_log10_64()); + CHECK_SUCCESS(check_log10_32()); + return 0; +}