Index: libcxx/src/CMakeLists.txt =================================================================== --- libcxx/src/CMakeLists.txt +++ libcxx/src/CMakeLists.txt @@ -101,6 +101,14 @@ filesystem/int128_builtins.cpp ) endif() + if (MSVC) + # The __int128_t type generates calls to libgcc/clang_rt.builtins style + # helper functions that aren't provided by the MSVC runtime; bundle the + # necessary bits here. + list(APPEND LIBCXX_SOURCES + filesystem/int128_builtins_msvc.cpp + ) + endif() endif() # Add all the headers to the project for IDEs. Index: libcxx/src/filesystem/int128_builtins_msvc.cpp =================================================================== --- /dev/null +++ libcxx/src/filesystem/int128_builtins_msvc.cpp @@ -0,0 +1,199 @@ +/*===-- int128_builtins_msvc.cpp - Implement __divti3 and __udivti3 -------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * ===----------------------------------------------------------------------=== + * + * This file implements __divti3 and __udivti3, and is stolen from the + * compiler_rt library. + * + * FIXME: we steal and re-compile it into filesystem, which uses __int128_t, + * when building for MSVC targets, that lack libgcc/compiler-rt style builtins + * for 128 bit integers. + * + * ===----------------------------------------------------------------------=== + */ + +#include "__config" +#include "climits" +#include + +#if !defined(_LIBCPP_HAS_NO_INT128) + +typedef union { + __uint128_t all; + struct { +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + uint64_t low; + uint64_t high; +#else + uint64_t high; + uint64_t low; +#endif + } s; +} utwords; + +// Returns the 128 bit division result by 64 bit. Result must fit in 64 bits. +// Remainder stored in r. +// Taken and adjusted from libdivide libdivide_128_div_64_to_64 division +// fallback. For a correctness proof see the reference for this algorithm +// in Knuth, Volume 2, section 4.3.1, Algorithm D. +__attribute__((unused)) +static inline uint64_t udiv128by64to64default(uint64_t u1, uint64_t u0, uint64_t v, + uint64_t *r) { + const unsigned n_udword_bits = sizeof(uint64_t) * CHAR_BIT; + const uint64_t b = (1ULL << (n_udword_bits / 2)); // Number base (32 bits) + uint64_t un1, un0; // Norm. dividend LSD's + uint64_t vn1, vn0; // Norm. divisor digits + uint64_t q1, q0; // Quotient digits + uint64_t un64, un21, un10; // Dividend digit pairs + uint64_t rhat; // A remainder + int32_t s; // Shift amount for normalization + + s = __builtin_clzll(v); + if (s > 0) { + // Normalize the divisor. + v = v << s; + un64 = (u1 << s) | (u0 >> (n_udword_bits - s)); + un10 = u0 << s; // Shift dividend left + } else { + // Avoid undefined behavior of (u0 >> 64). + un64 = u1; + un10 = u0; + } + + // Break divisor up into two 32-bit digits. + vn1 = v >> (n_udword_bits / 2); + vn0 = v & 0xFFFFFFFF; + + // Break right half of dividend into two digits. + un1 = un10 >> (n_udword_bits / 2); + un0 = un10 & 0xFFFFFFFF; + + // Compute the first quotient digit, q1. + q1 = un64 / vn1; + rhat = un64 - q1 * vn1; + + // q1 has at most error 2. No more than 2 iterations. + while (q1 >= b || q1 * vn0 > b * rhat + un1) { + q1 = q1 - 1; + rhat = rhat + vn1; + if (rhat >= b) + break; + } + + un21 = un64 * b + un1 - q1 * v; + + // Compute the second quotient digit. + q0 = un21 / vn1; + rhat = un21 - q0 * vn1; + + // q0 has at most error 2. No more than 2 iterations. + while (q0 >= b || q0 * vn0 > b * rhat + un0) { + q0 = q0 - 1; + rhat = rhat + vn1; + if (rhat >= b) + break; + } + + *r = (un21 * b + un0 - q0 * v) >> s; + return q1 * b + q0; +} + +static inline uint64_t udiv128by64to64(uint64_t u1, uint64_t u0, uint64_t v, + uint64_t *r) { +#if defined(__x86_64__) + uint64_t result; + __asm__("divq %[v]" + : "=a"(result), "=d"(*r) + : [ v ] "r"(v), "a"(u0), "d"(u1)); + return result; +#else + return udiv128by64to64default(u1, u0, v, r); +#endif +} + +// Effects: if rem != 0, *rem = a % b +// Returns: a / b + +static __uint128_t __udivmodti4(__uint128_t a, __uint128_t b, __uint128_t *rem) { + const unsigned n_utword_bits = sizeof(__uint128_t) * CHAR_BIT; + utwords dividend; + dividend.all = a; + utwords divisor; + divisor.all = b; + utwords quotient; + utwords remainder; + if (divisor.all > dividend.all) { + if (rem) + *rem = dividend.all; + return 0; + } + // When the divisor fits in 64 bits, we can use an optimized path. + if (divisor.s.high == 0) { + remainder.s.high = 0; + if (dividend.s.high < divisor.s.low) { + // The result fits in 64 bits. + quotient.s.low = udiv128by64to64(dividend.s.high, dividend.s.low, + divisor.s.low, &remainder.s.low); + quotient.s.high = 0; + } else { + // First, divide with the high part to get the remainder in dividend.s.high. + // After that dividend.s.high < divisor.s.low. + quotient.s.high = dividend.s.high / divisor.s.low; + dividend.s.high = dividend.s.high % divisor.s.low; + quotient.s.low = udiv128by64to64(dividend.s.high, dividend.s.low, + divisor.s.low, &remainder.s.low); + } + if (rem) + *rem = remainder.all; + return quotient.all; + } + // 0 <= shift <= 63. + int32_t shift = + __builtin_clzll(divisor.s.high) - __builtin_clzll(dividend.s.high); + divisor.all <<= shift; + quotient.s.high = 0; + quotient.s.low = 0; + for (; shift >= 0; --shift) { + quotient.s.low <<= 1; + // Branch free version of. + // if (dividend.all >= divisor.all) + // { + // dividend.all -= divisor.all; + // carry = 1; + // } + const __int128_t s = + (__int128_t)(divisor.all - dividend.all - 1) >> (n_utword_bits - 1); + quotient.s.low |= s & 1; + dividend.all -= divisor.all & s; + divisor.all >>= 1; + } + if (rem) + *rem = dividend.all; + return quotient.all; +} + +static inline __int128_t __divXi3(__int128_t a, __int128_t b) { + const int N = (int)(sizeof(__int128_t) * CHAR_BIT) - 1; + __int128_t s_a = a >> N; // s_a = a < 0 ? -1 : 0 + __int128_t s_b = b >> N; // s_b = b < 0 ? -1 : 0 + a = (a ^ s_a) - s_a; // negate if s_a == -1 + b = (b ^ s_b) - s_b; // negate if s_b == -1 + s_a ^= s_b; // sign of quotient + return (__udivmodti4(a, b, (__uint128_t *)0) ^ s_a) - s_a; // negate if s_a == -1 +} + +extern "C" _LIBCPP_FUNC_VIS +__int128_t __divti3(__int128_t a, __int128_t b) { + return __divXi3(a, b); +} + +extern "C" _LIBCPP_FUNC_VIS +__uint128_t __udivti3(__uint128_t a, __uint128_t b) { + return __udivmodti4(a, b, 0); +} + +#endif // _LIBCPP_HAS_NO_INT128