Index: cmake/builtin-config-ix.cmake =================================================================== --- cmake/builtin-config-ix.cmake +++ cmake/builtin-config-ix.cmake @@ -13,6 +13,7 @@ builtin_check_c_compiler_flag(-fomit-frame-pointer COMPILER_RT_HAS_OMIT_FRAME_POINTER_FLAG) builtin_check_c_compiler_flag(-ffreestanding COMPILER_RT_HAS_FREESTANDING_FLAG) builtin_check_c_compiler_flag(-fxray-instrument COMPILER_RT_HAS_XRAY_COMPILER_FLAG) +builtin_check_c_compiler_flag(-ffixed-point COMPILER_RT_HAS_FIXED_POINT_FLAG) builtin_check_c_compiler_source(COMPILER_RT_HAS_ATOMIC_KEYWORD " Index: lib/builtins/CMakeLists.txt =================================================================== --- lib/builtins/CMakeLists.txt +++ lib/builtins/CMakeLists.txt @@ -95,6 +95,7 @@ floatuntidf.c floatuntisf.c int_util.c + lognfx.c lshrdi3.c lshrti3.c moddi3.c @@ -130,6 +131,7 @@ powidf2.c powisf2.c powitf2.c + pow10fx.c subdf3.c subsf3.c subvdi3.c @@ -539,6 +541,7 @@ else () set(BUILTIN_CFLAGS "") + append_list_if(COMPILER_RT_HAS_FIXED_POINT_FLAG -ffixed-point BUILTIN_CFLAGS) append_list_if(COMPILER_RT_HAS_STD_C11_FLAG -std=c11 BUILTIN_CFLAGS) # These flags would normally be added to CMAKE_C_FLAGS by the llvm Index: lib/builtins/lognfx.c =================================================================== --- /dev/null +++ lib/builtins/lognfx.c @@ -0,0 +1,50 @@ +#include +#include + +#include "int_lib.h" + +// FIXME: We cannot yet use actual fixed point types here since in the AST, +// returning a fixed point type instead gets interpretted as returning the +// corresponding int type. This is probably because the patch that contains +// implicit casting/conversions between fixed point types and other types +// has not landed yet. +int64_t __logNScaledInt(uint64_t x, uint32_t precision, uint32_t dstprec, + uint32_t base) { + // This implementation is based on Clay. S. Turner's fast binary logarithm + // algorithm, slightly altered to be able to take log with any base. + + // Scaling down the answer first gives a more accurate result since the result + // may have a different precision than the input if the input type is + // different from the output type. + x >>= (precision - dstprec); + precision = dstprec; + + int64_t b = 1ULL << (precision - 1); + int64_t y = 0; + uint64_t oneval = 1ULL << precision; + uint64_t baseval = ((uint64_t)base) << precision; + + if (x == 0) + return INT64_MIN; // represents negative infinity + + while (x < oneval) { + x *= base; + y -= oneval; + } + + while (x >= baseval) { + x /= base; + y += oneval; + } + + for (size_t i = 0; i < precision; i++) { + x = (__int128_t)x * x >> precision; + if (x >= baseval) { + x /= base; + y += b; + } + b >>= 1; + } + + return y; +} Index: lib/builtins/pow10fx.c =================================================================== --- /dev/null +++ lib/builtins/pow10fx.c @@ -0,0 +1,192 @@ +#include +#include +#include + +#include "int_lib.h" + +#define LARGE_ONE ((__uint128_t)1) +#define UPPER64(x) ((x) >> 64) +#define LOWER64(x) ((x) % (LARGE_ONE << 64)) + +// Many bits are required to get the full precision of LACCUM_SCALE +struct __UInt256 { + __uint128_t hi; + __uint128_t lo; +}; + +// 256 bit multiplication. Subject to overflow. +struct __UInt256 __mul256(__uint128_t x, __uint128_t y) { + __uint128_t t1 = LOWER64(x) * LOWER64(y); + __uint128_t t2 = LOWER64(x) * UPPER64(y); + __uint128_t t3 = UPPER64(x) * LOWER64(y); + __uint128_t t4 = UPPER64(x) * UPPER64(y); + + uint64_t lo = t1; + __uint128_t m1 = (t1 >> 64) + (uint64_t)t2; + uint64_t m2 = m1; + __uint128_t mid = (__uint128_t)m2 + (uint64_t)t3; + __uint128_t hi = (t2 >> 64) + (t3 >> 64) + t4 + (m1 >> 64) + (mid >> 64); + + struct __UInt256 result = {hi, (mid << 64) + lo}; + return result; +} + +struct __UInt256 __shr256(struct __UInt256 *x, unsigned amt) { + assert(amt < 128); + struct __UInt256 result = {x->hi >> amt, + (x->lo >> amt) + + ((x->hi % (LARGE_ONE << amt)) << (128 - amt))}; + return result; +} + +struct __UInt256 __shl256(struct __UInt256 *x, unsigned amt) { + assert(amt < 128); + struct __UInt256 result = { + // Do the check since shifting over by 128 may not perform any shift at + // all + // when we want 0 after shifting 128 bits. + (x->hi << amt) + (amt ? (x->lo >> (128 - amt)) : 0), + x->lo << amt, + }; + return result; +} + +int __gt256(struct __UInt256 *x, struct __UInt256 *y) { + return x->hi > y->hi || (x->hi == y->hi && x->lo > y->lo); +} + +int __le256(struct __UInt256 *x, struct __UInt256 *y) { return !__gt256(x, y); } + +// Take the sqrt() of a 256 bit integer and capture the output as a scaled +// integer for a given scale. +// +// Note: This function will not return the correct result if the number of +// leading zeros in the 256 bit value passed is less than the scale (that is, +// the value left shifted by the scale overflows). +// +// Assuming the inputs satisfy this requirement, the result is guaranteed +// to fit in 128 bits. +__uint128_t __sqrt256(struct __UInt256 *val, unsigned scale) { + struct __UInt256 x = __shl256(val, scale); + __uint128_t b = LARGE_ONE << 127; // Highest positive bit + + // The result will always fit into 128 bits + __uint128_t result = 0; + for (unsigned i = 0; i < 128; ++i) { + __uint128_t test_val = result + b; + struct __UInt256 test_sqr = __mul256(test_val, test_val); + + if (__le256(&test_sqr, &x)) + result += b; // Set this bit + + b >>= 1; + } + return result; +} + +// y = 10^x; 0 <= x < 1; 1 <= y < 10 +// Take the power base 10 of a scaled integer with an arbitrary input and +// destination scale. The value passed must be between zero (inclusive) and one +// (exclusive), which is effectively finding the nth root of 10. +// +// The value can fit up to 128 bits and has the scale specified by the +// input scale. +__uint128_t __root_base10(__uint128_t x, unsigned input_scale, + unsigned dst_scale) { + assert(dst_scale < 128); + __uint128_t one = LARGE_ONE << dst_scale; + __uint128_t ten = one * 10; + + assert(x < (LARGE_ONE << input_scale) && + "This function only acceps values of 0 <= x < 1"); + + __uint128_t result = one; + for (unsigned i = 0; i < input_scale; ++i) { + struct __UInt256 result_buff = {0, result}; + if (x & 1) { + struct __UInt256 mul_result = __mul256(result, ten); + result_buff = __shr256(&mul_result, dst_scale); + } + result = __sqrt256(&result_buff, dst_scale); + + x >>= 1; + } + + assert(result / one < 10); + return result; +} + +// Exponentiation by squaring +// Subject to overflow +uint64_t __ipow(uint64_t base, uint64_t exp) { + uint64_t result = 1; + while (1) { + if (exp & 1) + result *= base; + exp >>= 1; + if (!exp) + break; + base *= base; + } + return result; +} + +#define LACCUM_SCALE 31 +#define LACCUM_ONE (1LL << LACCUM_SCALE) +#define LACCUM_LARGEST_ROOT10_EXP 20686623783 // 9.632... + +#define SQRT10_SCALE 64 +#define SQRT10_ONE (LARGE_ONE << SQRT10_SCALE) +#define SQRT10_TEN ((LARGE_ONE * 10) << SQRT10_SCALE) + +uint64_t __pow10fx_positive(uint64_t x) { + int64_t fract_part = x % LACCUM_ONE; + int64_t int_part = x / LACCUM_ONE; + + // The largest value we could possibly represent without overflowing is + // 9.63295... + if (x > LACCUM_LARGEST_ROOT10_EXP) + return UINT64_MAX; + + // Cannot possibly overflow since max val is 10^9 + int64_t non_scaled_int_pow = __ipow(10, int_part); + + // The int part will effectively be a left shift on the fractional part, so we + // need more precision in the fractional part to get a full answer. + // + // Currently the root_base10_fract provides a scale of 31 and the max value + // the int part provides is 10^9 which requires 30 integral bits. This means + // that to retain the desired precision, we need an extra 30 bits of precision + // in the result of our function for finding 10^(some fractional exponent). + // + // Largest number of bits needed for full precision: + // 30 // shift cause by multiplying against largest value represented by + // // 10&(int part of exponent); ceil(log2(10^9)) + // + 31 // Desired scale for final result (long _Accum) + // + 7 // part to hold the integral bit when intermitendly caluclating the + // // nested square roots of 10. + // = 68 // The final number of integral bits we need to perform any pow10 + // // calculation. In order to actuallt retain the scale needed after + // // doing a sqrt(), 129 bits are needed (68 + 30 + 31), which requires + // // the 256 bit integer. + __uint128_t scaled_fract_pow = + __root_base10(fract_part, LACCUM_SCALE, SQRT10_SCALE); + __uint128_t result = + (__uint128_t)non_scaled_int_pow * (__uint128_t)scaled_fract_pow; + return result >> (SQRT10_SCALE - LACCUM_SCALE); +} + +uint64_t __divfx(uint64_t x, uint64_t y) { + if (y == 0) + return UINT64_MAX; + return (((__uint128_t)x) << LACCUM_SCALE) / y; +} + +uint64_t __pow10fx(int64_t x) { + // -inf; this value is less than the min precision of a long _Accum + if (x == INT64_MIN) + return 0; + if (x < 0) + return __divfx(LACCUM_ONE, __pow10fx_positive(-x)); + return __pow10fx_positive(x); +} Index: test/builtins/Unit/lognfx.c =================================================================== --- /dev/null +++ test/builtins/Unit/lognfx.c @@ -0,0 +1,162 @@ +// RUN: %clang_builtins %s %librt -o %t && %run %t + +#include + +#include "int_lib.h" + +// We can at least narrow down the line number the assertion failed on +#define assert(b) \ + if (!(b)) { \ + return __LINE__ ? __LINE__ : 1; \ + } +#define check_success(x) \ + if (x) \ + return x; + +#define LACCUM_SCALE 31 +#define ULACCUM_SCALE 32 +#define ULACCUM_ONE (1ULL << ULACCUM_SCALE) +#define LACCUM_ONE (1LL << LACCUM_SCALE) + +int64_t __logNScaledInt(uint64_t, uint32_t, uint32_t, uint32_t); + +int64_t log2fx(uint64_t x) { + return __logNScaledInt(x, ULACCUM_SCALE, LACCUM_SCALE, 2); +} + +int check_log2() { + assert(log2fx(ULACCUM_ONE / 2) == -LACCUM_ONE); + assert(log2fx(ULACCUM_ONE / 4) == -2 * LACCUM_ONE); + assert(log2fx(ULACCUM_ONE / 8) == -3 * LACCUM_ONE); + assert(log2fx(ULACCUM_ONE / 16) == -4 * LACCUM_ONE); + + assert(log2fx(1 * ULACCUM_ONE) == 0); + assert(log2fx(2 * ULACCUM_ONE) == 1 * LACCUM_ONE); + assert(log2fx(4 * ULACCUM_ONE) == 2 * LACCUM_ONE); + assert(log2fx(8 * ULACCUM_ONE) == 3 * LACCUM_ONE); + + // Sums of powers of 2 + // These can be represented closest to the exact value within 1 ulp + assert(log2fx(ULACCUM_ONE / 2 + ULACCUM_ONE / 4) == -891286244); + assert(log2fx(ULACCUM_ONE / 2 + ULACCUM_ONE / 4 + ULACCUM_ONE / 8) == + -413702155); + assert(log2fx(ULACCUM_ONE / 2 + ULACCUM_ONE / 4 + ULACCUM_ONE / 8 + + ULACCUM_ONE / 16) == -199950924); + assert(log2fx(ULACCUM_ONE / 8 + ULACCUM_ONE / 16) == -5186253540); + + // Non-power-of-2 integers + assert(log2fx(3 * ULACCUM_ONE) == 3403681052); + assert(log2fx(5 * ULACCUM_ONE) == 4986302615); + assert(log2fx(7 * ULACCUM_ONE) == 6028748789); + assert(log2fx(11 * ULACCUM_ONE) == 7429072832); + + assert(log2fx(3 * ULACCUM_ONE + ULACCUM_ONE / 2 + ULACCUM_ONE / 4) == + 4095016372); + assert(log2fx(5 * ULACCUM_ONE + ULACCUM_ONE / 2 + ULACCUM_ONE / 4 + + ULACCUM_ONE / 8) == 5485937786); + assert(log2fx(7 * ULACCUM_ONE + ULACCUM_ONE / 2 + ULACCUM_ONE / 4 + + ULACCUM_ONE / 8 + ULACCUM_ONE / 16) == 6418151493); + assert(log2fx(11 * ULACCUM_ONE + ULACCUM_ONE / 8 + ULACCUM_ONE / 16) == + 7481437414); + + // Zero returns min val to represent -infinity + assert(log2fx(0) == INT64_MIN); + + // Any other fractional values that cannot be evenly represented as a sum of + // powers of 2 may be off by more than 1 ulp since we would be taking the + // log2 / of an incorrectly stored value. / Take log2(0.2). With a scale of 31 + // bits is storeed as 429496729, which / scaled back as a decimal actually + // represents 0.1999...9997206... / The difference between value of log2(0.2) + // and the representative value we / get as a scaled integer is 1.397 ulps. / + // However, the difference between the value we get and the log2 of the actual + // stored value (0.19999...) is less than 1 ulp. + int64_t result = log2fx(ULACCUM_ONE / 5); + int64_t expected_scaled = -4986302622; // log2(0.19999...)*2^31 + assert(abs(result - expected_scaled) <= 2); + + // Max + assert(log2fx(UINT64_MAX) == 68719476735); + + // Min + // Since we ideally pass in an unsigned long _Accum which has a scale 1 higher + // than that of a signed long _Accum, passing 1 shifted right will yield + // results equivalent to passing 0. + assert(log2fx(1) == INT64_MIN); + assert(log2fx(2) == -66571993088); + + return 0; +} + +int64_t shared_val; + +int64_t log10fx(uint64_t x) { + return __logNScaledInt(x, ULACCUM_SCALE, LACCUM_SCALE, 10); +} + +int check_log10() { + // Powers of 2 + // These values will not be as precision to the true values as with log10() + // since recirprocal powers of 10 cannot be evenly representable by sums of + // powers of 2. + // For example, 0.001ulk is stored as the scaled integer 2147483, which + // evaluates as 0.0009999... + // Because of this, log10(0.001ulk) may be many ulps away from the actual + // value of -3, but log10(0.001ulk) is still less than 1 ulp away from the + // log10 of the stored value (that is log10(214783) == log10(0.000999...). + assert(log10fx(ULACCUM_ONE / 10) == -2147483652); + assert(log10fx(ULACCUM_ONE / 100) == -4294967317); + assert(log10fx(ULACCUM_ONE / 1000) == -6442451226); + assert(log10fx(ULACCUM_ONE / 10000) == -8589936177); + + assert(log10fx(ULACCUM_ONE) == 0); + assert(log10fx(10 * ULACCUM_ONE) == 1 * LACCUM_ONE); + assert(log10fx(100 * ULACCUM_ONE) == 2 * LACCUM_ONE); + assert(log10fx(1000 * ULACCUM_ONE) == 3 * LACCUM_ONE); + assert(log10fx(10000 * ULACCUM_ONE) == 4 * LACCUM_ONE); + + // Sums of powers of 2 + assert(log10fx(ULACCUM_ONE / 2 + ULACCUM_ONE / 4) == -268303894); + assert(log10fx(ULACCUM_ONE / 2 + ULACCUM_ONE / 4 + ULACCUM_ONE / 8) == + -124536758); + assert(log10fx(ULACCUM_ONE / 2 + ULACCUM_ONE / 4 + ULACCUM_ONE / 8 + + ULACCUM_ONE / 16) == -60191226); + assert(log10fx(ULACCUM_ONE / 8 + ULACCUM_ONE / 16) == -1561217881); + + // Integers + assert(log10fx(3 * ULACCUM_ONE) == 1024610092); + assert(log10fx(5 * ULACCUM_ONE) == 1501026654); + assert(log10fx(7 * ULACCUM_ONE) == 1814834221); + assert(log10fx(11 * ULACCUM_ONE) == 2236373762); + + assert(log10fx(3 * ULACCUM_ONE + ULACCUM_ONE / 2 + ULACCUM_ONE / 4) == + 1232722760); + assert(log10fx(5 * ULACCUM_ONE + ULACCUM_ONE / 2 + ULACCUM_ONE / 4 + + ULACCUM_ONE / 8) == 1651431827); + assert(log10fx(7 * ULACCUM_ONE + ULACCUM_ONE / 2 + ULACCUM_ONE / 4 + + ULACCUM_ONE / 8 + ULACCUM_ONE / 16) == 1932056116); + assert(log10fx(11 * ULACCUM_ONE + ULACCUM_ONE / 8 + ULACCUM_ONE / 16) == + 2252137072); + + // Other numbers that can't evenly be broken into limited sums of powers of 2 + assert(log10fx(13493037704) == 1067621222); // pi + assert(log10fx(11674931554) == 932640298); // e + assert(log10fx(6074000999) == 323228496); // sqrt(2) + + // Zero returns min val to represent -infinity + assert(log10fx(0) == INT64_MIN); + + // Max + assert(log10fx(UINT64_MAX) == 20686623783); + + // Min + assert(log10fx(1) == INT64_MIN); + assert(log10fx(2) == -20040166791); + + return 0; +} + +int main() { + check_success(check_log2()); + check_success(check_log10()); + return 0; +} Index: test/builtins/Unit/pow10fx.c =================================================================== --- /dev/null +++ test/builtins/Unit/pow10fx.c @@ -0,0 +1,246 @@ +// RUN: %clang_builtins %s %librt -o %t && %run %t + +#include +#include + +#define LARGE_ONE ((__uint128_t)1) +#define UPPER64(x) ((x) >> 64) +#define LOWER64(x) ((x) % (LARGE_ONE << 64)) + +#define LACCUM_SCALE 31 +#define LACCUM_ONE (1LL << LACCUM_SCALE) +#define LACCUM_LARGEST_ROOT10_EXP 20686623783 // 9.632... + +// We can at least narrow down the line number the assertion failed on +#define assert(b) \ + if (!(b)) { \ + return __LINE__ ? __LINE__ : 1; \ + } +#define check_success(x) \ + if (x) \ + return x; + +#define TOLERANCE 1 +#define ASSERT_CLOSE(x, y) assert(x - y <= TOLERANCE || y - x <= TOLERANCE); + +struct __UInt256 { + __uint128_t hi; + __uint128_t lo; +}; + +struct __UInt256 __mul256(__uint128_t x, __uint128_t y); +__uint128_t __sqrt256(struct __UInt256 *val, unsigned scale); +__uint128_t __root_base10(__uint128_t x, unsigned input_scale, + unsigned dst_scale); +uint64_t __pow10fx(int64_t x); + +int test_mul256() { + struct __UInt256 result256; + result256 = __mul256(0, 0); + assert(result256.hi == 0); + assert(result256.lo == 0); + + result256 = __mul256(1, 1); + assert(result256.hi == 0); + assert(result256.lo == 1); + + result256 = __mul256(1, INT64_MAX); + assert(result256.hi == 0); + assert(result256.lo == INT64_MAX); + + result256 = __mul256(2, INT64_MAX); + assert(result256.hi == 0); + assert(result256.lo == LARGE_ONE * 2 * INT64_MAX); + + result256 = __mul256(INT64_MAX, INT64_MAX); + assert(result256.hi == 0); + assert(result256.lo % (LARGE_ONE << 64) == 1); + assert(result256.lo / (LARGE_ONE << 64) == 4611686018427387903); + + result256 = __mul256(1, LARGE_ONE * INT64_MAX * INT64_MAX); + assert(result256.hi == 0); + assert(result256.lo % (LARGE_ONE << 64) == 1); + assert(result256.lo / (LARGE_ONE << 64) == 4611686018427387903); + + result256 = __mul256(INT64_MAX, LARGE_ONE * INT64_MAX * INT64_MAX); + assert(result256.hi == 2305843009213693951); + assert(result256.lo % (LARGE_ONE << 64) == 9223372036854775807); + assert(result256.lo / (LARGE_ONE << 64) == 4611686018427387905); + + result256 = __mul256(LARGE_ONE * INT64_MAX * INT64_MAX, + LARGE_ONE * INT64_MAX * INT64_MAX); + assert(result256.hi % (LARGE_ONE << 64) == 9223372036854775809ULL); + assert(result256.hi / (LARGE_ONE << 64) == 1152921504606846975); + assert(result256.lo % (LARGE_ONE << 64) == 1); + assert(result256.lo / (LARGE_ONE << 64) == 9223372036854775806); + return 0; +} + +unsigned slow_log2(__uint128_t val) { + unsigned result = 0; + while (val >>= 1) + ++result; + return result; +} + +__uint128_t safe_sqrt256fx(struct __UInt256 val, unsigned scale) { + unsigned numhibits = slow_log2(val.hi); + if (numhibits) { + assert(numhibits + 128 + scale < 256); + } else { + assert(scale < 128); + } + return __sqrt256(&val, scale); +} + +int test_sqrt256fx() { + unsigned scale = 60; + struct __UInt256 val256 = {0, 0}; + assert(safe_sqrt256fx(val256, 0) == 0); + + val256.lo = 1; + assert(safe_sqrt256fx(val256, 0) == 1); + + val256.lo = 225; + assert(safe_sqrt256fx(val256, 0) == 15); + + val256.lo = INT64_MAX; + assert(safe_sqrt256fx(val256, 0) == 3037000499); + + val256.lo = LARGE_ONE * INT64_MAX * INT64_MAX; + assert(safe_sqrt256fx(val256, 0) == INT64_MAX); + + val256.hi = 1; + val256.lo = 0; + assert(safe_sqrt256fx(val256, 0) == LARGE_ONE << 64); + + val256.hi = LARGE_ONE * INT64_MAX * INT64_MAX; + val256.lo = LARGE_ONE * INT64_MAX * INT64_MAX; + __uint128_t result128 = safe_sqrt256fx(val256, 0); + assert(LOWER64(result128) == 0); + assert(UPPER64(result128) == 9223372036854775807); + + val256.hi = 0; + val256.lo = LARGE_ONE << scale; + assert(safe_sqrt256fx(val256, scale) == LARGE_ONE << scale); + + val256.lo = 2 * LARGE_ONE << scale; + assert(safe_sqrt256fx(val256, scale) == 1630477228166597776); + + val256.lo = -1; + result128 = safe_sqrt256fx(val256, scale); + assert(LOWER64(result128) == 18446744073709551615ULL); + assert(UPPER64(result128) == 1073741823); + + val256.hi = (LARGE_ONE << (128 - scale)) - 1; + val256.lo = -1; + result128 = safe_sqrt256fx(val256, scale); + assert(LOWER64(result128) == 18446744073709551615ULL); + assert(UPPER64(result128) == 18446744073709551615ULL); + + return 0; +} + +int test_root_base10() { + unsigned input_scale; + unsigned dst_scale; + __uint128_t x, result; + assert(__root_base10(0, 0, 0) == 1); + + input_scale = LACCUM_SCALE; + x = (LARGE_ONE << input_scale) / 2; + assert(__root_base10(x, input_scale, 0) == 3); + + dst_scale = 31; + assert(__root_base10(x, input_scale, dst_scale) == 6790939565); + + dst_scale = 60; + assert(__root_base10(x, input_scale, dst_scale) == 3645857917945947428); + + dst_scale = 61; + assert(__root_base10(x, input_scale, dst_scale) == 7291715835891894856); + + dst_scale = 62; + result = __root_base10(x, input_scale, dst_scale); + assert(result == 14583431671783789712ULL); + + dst_scale = 63; + input_scale = LACCUM_SCALE; + x = (LARGE_ONE << input_scale) / 2; + result = __root_base10(x, input_scale, dst_scale); + assert(result % (LARGE_ONE << 64) == 10720119269858027808ULL); + assert(result / (LARGE_ONE << 64) == 1); + + dst_scale = 64; + result = __root_base10(x, input_scale, dst_scale); + assert(result % (LARGE_ONE << 64) == 2993494466006504000); + assert(result / (LARGE_ONE << 64) == 3); + + x = 1359270951; + input_scale = LACCUM_SCALE; + dst_scale = 61; + result = __root_base10(x, input_scale, dst_scale); + ASSERT_CLOSE(result, 9903520305053854740ULL); + + return 0; +} + +int test_pow10fx() { + int64_t x = LACCUM_ONE / 2; + uint64_t result = __pow10fx(x); + ASSERT_CLOSE(result, 6790939565); + + x = LACCUM_ONE + LACCUM_ONE / 2; + result = __pow10fx(x); + ASSERT_CLOSE(result, 67909395656); + + x = 9 * LACCUM_ONE + LACCUM_ONE / 2; + result = __pow10fx(x); + ASSERT_CLOSE(result, 6790939565647295542); + + result = __pow10fx(-x); + ASSERT_CLOSE(result, 0); + + x = INT64_MAX; + result = __pow10fx(x); + ASSERT_CLOSE(result, UINT64_MAX); + + x = LACCUM_LARGEST_ROOT10_EXP; + result = __pow10fx(x); + ASSERT_CLOSE(result, 9223372028259425182); + + x = LACCUM_LARGEST_ROOT10_EXP + 1; + result = __pow10fx(x); + ASSERT_CLOSE(result, UINT64_MAX); + + result = __pow10fx(INT64_MIN); + ASSERT_CLOSE(result, 0); + + result = __pow10fx(0); + ASSERT_CLOSE(result, LACCUM_ONE); + + result = __pow10fx(LACCUM_ONE); + ASSERT_CLOSE(result, 10 * LACCUM_ONE); + + result = __pow10fx(9 * LACCUM_ONE); + ASSERT_CLOSE(result, 1000000000 * LACCUM_ONE); + + result = __pow10fx(10 * LACCUM_ONE); + ASSERT_CLOSE(result, UINT64_MAX); + + result = __pow10fx(-LACCUM_ONE); + ASSERT_CLOSE(result, LACCUM_ONE / 10); + + result = __pow10fx(-9 * LACCUM_ONE - LACCUM_ONE / 3); + ASSERT_CLOSE(result, 1); + + return 0; +} + +int main() { + check_success(test_mul256()); + check_success(test_sqrt256fx()); + check_success(test_root_base10()); + check_success(test_pow10fx()); + return 0; +}