diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -98,6 +98,8 @@ floatuntidf.c floatuntisf.c int_util.c + log10scaledsi3.c + log10scaleddi3.c lshrdi3.c lshrti3.c moddi3.c diff --git a/compiler-rt/lib/builtins/log10scaleddi3.c b/compiler-rt/lib/builtins/log10scaleddi3.c new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/builtins/log10scaleddi3.c @@ -0,0 +1,111 @@ +//===------- lib/log10scaleddi3.c - Scaled Integer log10() --------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements log10() for scaled integers up to a scale of 64. +// This implementation is based on Clay. S. Turner's fast binary logarithm +// algorithm. If the true result of log10() cannot be precisely stored in the +// result type, the value is rounded down towards negative infinity. +// +//===----------------------------------------------------------------------===// + +#include + +#include "int_lib.h" + +#define BASE 10 + +#ifndef CRT_HAS_128BIT +#define loWord(a) (a & 0xffffffffU) +#define hiWord(a) (a >> 32) + +// 64x64 -> 128 wide multiply for platforms that don't have such an operation. +static void wideMultiply(uint64_t a, uint64_t b, uint64_t *hi, uint64_t *lo) { + // Each of the component 32x32 -> 64 products + const uint64_t plolo = loWord(a) * loWord(b); + const uint64_t plohi = loWord(a) * hiWord(b); + const uint64_t philo = hiWord(a) * loWord(b); + const uint64_t phihi = hiWord(a) * hiWord(b); + // Sum terms that contribute to lo in a way that allows us to get the carry + const uint64_t r0 = loWord(plolo); + const uint64_t r1 = hiWord(plolo) + loWord(plohi) + loWord(philo); + *lo = r0 + (r1 << 32); + // Sum terms contributing to hi with the carry from lo + *hi = hiWord(plohi) + hiWord(philo) + hiWord(r1) + phihi; +} + +// 128 bit funnel shift right from a 64 bit hi and lo. This returns the lower +// 64 bits of the result. +static uint64_t funnelShiftRight(uint64_t hi, uint64_t lo, uint32_t shift) { + if (shift >= 128) + return 0; + if (shift > 64) + return hi >> (shift - 64); + if (shift == 64) + return hi; + if (shift > 0) + return (hi << (64 - shift)) | (lo >> shift); + return lo; +} +#endif + +// Returns: log10(x), rounded down towards negative infinity in the scale +// provided in the 2nd argument. If x is zero, INT64_MIN is returned. +// +// Assumptions: x is represents an unsigned scaled integer +// scale is at most 64. When the scale is 0, this becomes a regular +// integer log10() function. + +int64_t __log10scaleddi3(uint64_t x, uint32_t scale) { + int64_t b = UINT64_C(1) << (scale - 1); + int64_t y = 0; + uint64_t oneval = UINT64_C(1) << scale; + uint64_t baseval = ((uint64_t)BASE) << scale; + + if (x == 0) + return INT64_MIN; // represents negative infinity + + while (x < oneval) { + x *= BASE; + y -= oneval; + } + + while (x >= baseval) { + x /= BASE; + y += oneval; + } + + for (size_t i = 0; i < scale; i++) { +#ifdef CRT_HAS_128BIT + uint64_t res = (__uint128_t)x * x >> (scale - 1); + + // In order to remain as close as possible to the correct solution, we + // attempt to round using the bit before the LSB. + x = res >> 1; + if (res & 1) + ++x; +#else + uint64_t hi, lo; + wideMultiply(x, x, &hi, &lo); + + // In order to remain as close as possible to the correct solution, we + // attempt to round using the bit before the LSB. + uint64_t res = funnelShiftRight(hi, lo, scale - 1); + x = funnelShiftRight(hi, lo, scale); + if (res & 1) + ++x; +#endif + + if (x >= baseval) { + x /= BASE; + y += b; + } + b >>= 1; + } + + return y; +} diff --git a/compiler-rt/lib/builtins/log10scaledsi3.c b/compiler-rt/lib/builtins/log10scaledsi3.c new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/builtins/log10scaledsi3.c @@ -0,0 +1,66 @@ +//===------- lib/log10scaledsi3.c - Scaled Integer log10() --------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements log10() for 32 bit scaled integers up to a scale of 32. +// This implementation is based on Clay. S. Turner's fast binary logarithm +// algorithm. If the true result of log10() cannot be precisely stored in the +// result type, the value is rounded down towards negative infinity. +// +//===----------------------------------------------------------------------===// + +#include + +#include "int_lib.h" + +#define BASE 10 + +// Returns: log10(x), rounded down towards negative infinity in the scale +// provided in the 2nd argument. If x is zero, INT32_MIN is returned. +// +// Assumptions: x is represents an unsigned scaled integer +// scale is at most 32. When the scale is 0, this becomes a regular +// integer log10() function. + +int32_t __log10scaledsi3(uint32_t x, uint32_t scale) { + int32_t b = UINT32_C(1) << (scale - 1); + int32_t y = 0; + uint32_t oneval = UINT32_C(1) << scale; + uint32_t baseval = ((uint32_t)BASE) << scale; + + if (x == 0) + return INT32_MIN; // represents negative infinity + + while (x < oneval) { + x *= BASE; + y -= oneval; + } + + while (x >= baseval) { + x /= BASE; + y += oneval; + } + + for (size_t i = 0; i < scale; i++) { + // Most 32 bit targets support uint64_t. + uint64_t res = (uint64_t)x * x >> (scale - 1); + + // In order to remain as close as possible to the correct solution, we + // attempt to round using the bit before the LSB. + x = res >> 1; + if (res & 1) + ++x; + + if (x >= baseval) { + x /= BASE; + y += b; + } + b >>= 1; + } + + return y; +} diff --git a/compiler-rt/test/builtins/Unit/log10scaledint.c b/compiler-rt/test/builtins/Unit/log10scaledint.c new file mode 100644 --- /dev/null +++ b/compiler-rt/test/builtins/Unit/log10scaledint.c @@ -0,0 +1,166 @@ +// RUN: %clang_builtins %s %librt -o %t && %run %t + +#include +#include + +#define CHECK_SUCCESS(x) \ + if (x) \ + return 1; + +#define SCALE 31 +#define SCALED_ONE (UINT64_C(1) << SCALE) +#define __log10_64(x) __log10scaleddi3(x, SCALE) + +#define CHECK_LOG10(VAL, EXPECTED) \ + if (__log10_64(VAL) != EXPECTED) { \ + printf( \ + "error in __log10scaleddi3(%llu, %u). Expected %lld, found %lld.\n", \ + VAL, SCALE, EXPECTED, __log10_64(VAL)); \ + return 1; \ + } + +int64_t __log10scaleddi3(uint64_t x, uint32_t scale); +int32_t __log10scaledsi3(uint32_t x, uint32_t scale); + +int check_log10_64() { + // Powers of 10. + // These values will not have the precision necessary to represent the true + // return values of 1og10() since reciprocal powers of 10 cannot be evenly + // represented with sums of recirpocal powers of 2 (1/10^n cannot be + // represented exactly with sums of 1/2^m). This is a binary log function so + // we can only attempt to represent powers of 10 as close as possible to sums + // of powers of 2. + // For example, we cannot get -3 exactly from log10(0.001), but we can + // precisely get log10((1 << SCALE) / 1000) and get a value close to -3 << + // SCALE. The error between this close result and the true value of -3 << + // SCALE will always be at most 1 if it is rounded. + CHECK_LOG10(SCALED_ONE / 10, + -2147483652); // -2147483652 / 2^SCALE => -1.000... + CHECK_LOG10(SCALED_ONE / 100, -4294967317); + CHECK_LOG10(SCALED_ONE / 1000, -6442451226); + CHECK_LOG10(SCALED_ONE / 10000, -8589936177); + + // These should produce precise whole integer results since the scaled integer + // input is not a fraction. + CHECK_LOG10(SCALED_ONE, 0); + CHECK_LOG10(10 * SCALED_ONE, SCALED_ONE); + CHECK_LOG10(100 * SCALED_ONE, 2 * SCALED_ONE); + CHECK_LOG10(1000 * SCALED_ONE, 3 * SCALED_ONE); + CHECK_LOG10(10000 * SCALED_ONE, 4 * SCALED_ONE); + + // Sums of powers of 2 + CHECK_LOG10(SCALED_ONE / 2 + SCALED_ONE / 4, -268303894); + CHECK_LOG10(SCALED_ONE / 2 + SCALED_ONE / 4 + SCALED_ONE / 8, -124536758); + CHECK_LOG10(SCALED_ONE / 2 + SCALED_ONE / 4 + SCALED_ONE / 8 + + SCALED_ONE / 16, + -60191226); + CHECK_LOG10(SCALED_ONE / 8 + SCALED_ONE / 16, -1561217881); + + // Integers + CHECK_LOG10(3 * SCALED_ONE, 1024610092); + CHECK_LOG10(5 * SCALED_ONE, 1501026654); + CHECK_LOG10(7 * SCALED_ONE, 1814834221); + CHECK_LOG10(11 * SCALED_ONE, 2236373762); + + CHECK_LOG10(3 * SCALED_ONE + SCALED_ONE / 2 + SCALED_ONE / 4, 1232722760); + CHECK_LOG10(5 * SCALED_ONE + SCALED_ONE / 2 + SCALED_ONE / 4 + SCALED_ONE / 8, + 1651431827); + CHECK_LOG10(7 * SCALED_ONE + SCALED_ONE / 2 + SCALED_ONE / 4 + + SCALED_ONE / 8 + SCALED_ONE / 16, + 1932056116); + CHECK_LOG10(11 * SCALED_ONE + SCALED_ONE / 8 + SCALED_ONE / 16, 2252137072); + + // Other numbers that can't evenly be broken into limited sums of powers of 2 + CHECK_LOG10(6746518852, 1067621222); // pi + CHECK_LOG10(5837465777, 932640298); // e + CHECK_LOG10(3037000499, 323228496); // sqrt(2) + + // Zero returns min val to represent -infinity + CHECK_LOG10(0, INT64_MIN); + + // Max + CHECK_LOG10(UINT64_MAX, 21333080777); + + // Min + CHECK_LOG10(1, -20040166791); + CHECK_LOG10(2, -19393709798); + + return 0; +} + +#undef SCALE +#undef SCALED_ONE +#undef CHECK_LOG10 + +#define SCALE 15 +#define SCALED_ONE (UINT32_C(1) << SCALE) +#define __log10_32(x) __log10scaledsi3(x, SCALE) + +#define CHECK_LOG10(VAL, EXPECTED) \ + if (__log10_32(VAL) != EXPECTED) { \ + printf( \ + "error in __log10scaledsi3(%d, %u). Expected %d, found %d.\n", \ + VAL, SCALE, EXPECTED, __log10_32(VAL)); \ + return 1; \ + } + +// Same as the previous test but for 32 bit ints. +int check_log10_32() { + // Powers of 10. + CHECK_LOG10(SCALED_ONE / 10, -32772); // -32772 / 2^SCALE => -1.000... + CHECK_LOG10(SCALED_ONE / 100, -65566); + CHECK_LOG10(SCALED_ONE / 1000, -98642); + CHECK_LOG10(SCALED_ONE / 10000, -132328); + + // These should produce precise whole integer results since the scaled integer + // input is not a fraction. + CHECK_LOG10(SCALED_ONE, 0); + CHECK_LOG10(10 * SCALED_ONE, SCALED_ONE); + CHECK_LOG10(100 * SCALED_ONE, 2 * SCALED_ONE); + CHECK_LOG10(1000 * SCALED_ONE, 3 * SCALED_ONE); + CHECK_LOG10(10000 * SCALED_ONE, 4 * SCALED_ONE); + + // Sums of powers of 2 + CHECK_LOG10(SCALED_ONE / 2 + SCALED_ONE / 4, -4094); + CHECK_LOG10(SCALED_ONE / 2 + SCALED_ONE / 4 + SCALED_ONE / 8, -1901); + CHECK_LOG10(SCALED_ONE / 2 + SCALED_ONE / 4 + SCALED_ONE / 8 + + SCALED_ONE / 16, + -919); + CHECK_LOG10(SCALED_ONE / 8 + SCALED_ONE / 16, -23823); + + // Integers + CHECK_LOG10(3 * SCALED_ONE, 15634); + CHECK_LOG10(5 * SCALED_ONE, 22903); + CHECK_LOG10(7 * SCALED_ONE, 27692); + CHECK_LOG10(11 * SCALED_ONE, 34124); + + CHECK_LOG10(3 * SCALED_ONE + SCALED_ONE / 2 + SCALED_ONE / 4, 18809); + CHECK_LOG10(5 * SCALED_ONE + SCALED_ONE / 2 + SCALED_ONE / 4 + SCALED_ONE / 8, + 25198); + CHECK_LOG10(7 * SCALED_ONE + SCALED_ONE / 2 + SCALED_ONE / 4 + + SCALED_ONE / 8 + SCALED_ONE / 16, + 29480); + CHECK_LOG10(11 * SCALED_ONE + SCALED_ONE / 8 + SCALED_ONE / 16, 34364); + + // Other numbers that can't evenly be broken into limited sums of powers of 2 + CHECK_LOG10(102943, 16290); // pi + CHECK_LOG10(89072, 14230); // e + CHECK_LOG10(46340, 4931); // sqrt(2) + + // Zero returns min val to represent -infinity + CHECK_LOG10(0, INT32_MIN); + + // Max + CHECK_LOG10(UINT32_MAX, 167690); + + // Min + CHECK_LOG10(1, -147963); + CHECK_LOG10(2, -138099); + + return 0; +} +int main() { + CHECK_SUCCESS(check_log10_64()); + CHECK_SUCCESS(check_log10_32()); + return 0; +}