diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake --- a/compiler-rt/cmake/config-ix.cmake +++ b/compiler-rt/cmake/config-ix.cmake @@ -124,6 +124,7 @@ # Includes. check_cxx_compiler_flag(-nostdinc++ COMPILER_RT_HAS_NOSTDINCXX_FLAG) check_include_files("sys/auxv.h" COMPILER_RT_HAS_AUXV) +check_include_files("asm/hwcap" COMPILER_RT_HAS_HWCAP) # Libraries. check_library_exists(dl dlopen "" COMPILER_RT_HAS_LIBDL) diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -206,10 +206,16 @@ if(COMPILER_RT_HAS_ATOMIC_KEYWORD AND NOT COMPILER_RT_EXCLUDE_ATOMIC_BUILTIN) set(GENERIC_SOURCES ${GENERIC_SOURCES} + atomic_alt.h + atomic_alt.c atomic.c ) endif() +SET(ATOMIC_X86_64_ALT x86_64/atomic_alt_cx16.h) +SET(ATOMIC_X86_64_ALT_FLAGS "-mcx16") +#TODO: add ATOMIC_X86_64_ALT as a deps + if(APPLE) set(GENERIC_SOURCES ${GENERIC_SOURCES} diff --git a/compiler-rt/lib/builtins/atomic.c b/compiler-rt/lib/builtins/atomic.c --- a/compiler-rt/lib/builtins/atomic.c +++ b/compiler-rt/lib/builtins/atomic.c @@ -28,6 +28,7 @@ #include #include "assembly.h" +#include "atomic_alt.h" // Clang objects if you redefine a builtin. This little hack allows us to // define a function with the same name as an intrinsic. @@ -36,6 +37,8 @@ #pragma redefine_extname __atomic_exchange_c SYMBOL_NAME(__atomic_exchange) #pragma redefine_extname __atomic_compare_exchange_c SYMBOL_NAME( \ __atomic_compare_exchange) +#pragma redefine_extname __atomic_is_lock_free_c SYMBOL_NAME( \ + __atomic_is_lock_free) /// Number of locks. This allocates one page on 32-bit platforms, two on /// 64-bit. This can be specified externally if a different trade between @@ -120,18 +123,27 @@ return locks + (hash & SPINLOCK_MASK); } -/// Macros for determining whether a size is lock free. Clang can not yet -/// codegen __atomic_is_lock_free(16), so for now we assume 16-byte values are -/// not lock free. +/// Macros for determining whether a size is lock free. #define IS_LOCK_FREE_1 __c11_atomic_is_lock_free(1) #define IS_LOCK_FREE_2 __c11_atomic_is_lock_free(2) #define IS_LOCK_FREE_4 __c11_atomic_is_lock_free(4) #define IS_LOCK_FREE_8 __c11_atomic_is_lock_free(8) + +#ifdef __SIZEOF_INT128__ +#define IS_LOCK_FREE_16 __c11_atomic_is_lock_free(16) +#define HANDLE_CASE_16(LOCK_FREE_ACTION_ALT) \ + if (IS_LOCK_FREE_16) { \ + LOCK_FREE_ACTION_ALT(__uint128_t); \ + } +#else #define IS_LOCK_FREE_16 0 +#define HANDLE_CASE_16(LOCK_FREE_ACTION_ALT) +#endif // __SIZEOF_INT128__ /// Macro that calls the compiler-generated lock-free versions of functions /// when they exist. -#define LOCK_FREE_CASES() \ +/// The argument is the alternative action to use for case-8 and case-16 +#define LOCK_FREE_CASES(LOCK_FREE_ACTION_ALT) \ do { \ switch (size) { \ case 1: \ @@ -151,14 +163,12 @@ break; \ case 8: \ if (IS_LOCK_FREE_8) { \ - LOCK_FREE_ACTION(uint64_t); \ + LOCK_FREE_ACTION_ALT(uint64_t); \ } \ break; \ case 16: \ - if (IS_LOCK_FREE_16) { \ - /* FIXME: __uint128_t isn't available on 32 bit platforms. \ - LOCK_FREE_ACTION(__uint128_t);*/ \ - } \ + /* Special handling because not all platforms have uint_128*/ \ + HANDLE_CASE_16(LOCK_FREE_ACTION_ALT) \ break; \ } \ } while (0) @@ -169,8 +179,12 @@ #define LOCK_FREE_ACTION(type) \ *((type *)dest) = __c11_atomic_load((_Atomic(type) *)src, model); \ return; - LOCK_FREE_CASES(); +#define LOCK_FREE_ACTION_ALT(type) \ + *((type *)dest) = __c11_atomic_load_ifunc((_Atomic(type) *)src, model); \ + return; + LOCK_FREE_CASES(LOCK_FREE_ACTION_ALT); #undef LOCK_FREE_ACTION +#undef LOCK_FREE_ACTION_ALT Lock *l = lock_for_pointer(src); lock(l); memcpy(dest, src, size); @@ -183,8 +197,9 @@ #define LOCK_FREE_ACTION(type) \ __c11_atomic_store((_Atomic(type) *)dest, *(type *)src, model); \ return; - LOCK_FREE_CASES(); + LOCK_FREE_CASES(LOCK_FREE_ACTION); #undef LOCK_FREE_ACTION +#undef LOCK_FREE_ACTION_ALT Lock *l = lock_for_pointer(dest); lock(l); memcpy(dest, src, size); @@ -202,8 +217,13 @@ return __c11_atomic_compare_exchange_strong( \ (_Atomic(type) *)ptr, (type *)expected, *(type *)desired, success, \ failure) - LOCK_FREE_CASES(); +#define LOCK_FREE_ACTION_ALT(type) \ + return __c11_atomic_compare_exchange_strong_ifunc( \ + (_Atomic(type) *)ptr, (type *)expected, *(type *)desired, success, \ + failure) + LOCK_FREE_CASES(LOCK_FREE_ACTION_ALT); #undef LOCK_FREE_ACTION +#undef LOCK_FREE_ACTION_ALT Lock *l = lock_for_pointer(ptr); lock(l); if (memcmp(ptr, expected, size) == 0) { @@ -223,7 +243,7 @@ *(type *)old = \ __c11_atomic_exchange((_Atomic(type) *)ptr, *(type *)val, model); \ return; - LOCK_FREE_CASES(); + LOCK_FREE_CASES(LOCK_FREE_ACTION); #undef LOCK_FREE_ACTION Lock *l = lock_for_pointer(ptr); lock(l); @@ -254,7 +274,7 @@ #define OPTIMISED_CASE(n, lockfree, type) \ type __atomic_load_##n(type *src, int model) { \ if (lockfree) \ - return __c11_atomic_load((_Atomic(type) *)src, model); \ + return __c11_atomic_load_ifunc((_Atomic(type) *)src, model); \ Lock *l = lock_for_pointer(src); \ lock(l); \ type val = *src; \ diff --git a/compiler-rt/lib/builtins/atomic_alt.h b/compiler-rt/lib/builtins/atomic_alt.h new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/builtins/atomic_alt.h @@ -0,0 +1,30 @@ +#ifndef COMPILERRT_ATOMIC_ALT_H +#define COMPILERRT_ATOMIC_ALT_H + +#include + +extern uint64_t __c11_atomic_load_ifunc(uint64_t *src, int model); +extern int __c11_atomic_compare_exchange_strong_ifunc(uint64_t *ptr, + uint64_t *expected, + uint64_t desired, + int sucess, int failure); + +#ifdef __SIZEOF_INT128__ +extern __uint128_t __c11_atomic_load_ifunc(__uint128_t *src, int model); +extern int __c11_atomic_compare_exchange_strong_ifunc(__uint128_t *ptr, + __uint128_t *expected, + __uint128_t desired, + int sucess, int failure); + +#endif + +// This will be renamed in atomic.c +extern bool __atomic_is_lock_free_c(unsigned long size, + const volatile void *ptr); + +// TODO: hide it (somehow?) +// Returns true if this platform supports atomic operation for the given size in +// bytes. +extern bool have_atomic_cap(int N); + +#endif // COMPILERRT_ATOMIC_ALT_H diff --git a/compiler-rt/lib/builtins/atomic_alt.c b/compiler-rt/lib/builtins/atomic_alt.c new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/builtins/atomic_alt.c @@ -0,0 +1,128 @@ +#include "atomic_alt.h" + +#if defined(__x86_64__) || defined(__x86__) +#include +#endif + +#if __has_include() && __has_include() +#include +#include +#define HAS_ARM_HWCAP 1 +#else +#define HAS_ARM_HWCAP 0 +#endif + +#if defined(__x86_64__) || defined(__x86__) + +#ifdef __x86_64__ +#define FEAT_REG ecx +#define MASK bit_CMPXCHG16B +#else +#define FEAT_REG edx +#define MASK bit_CMPXCHG8B +#endif + +static inline bool check_x86_atomic_cas(void) { + unsigned int eax, ebx, ecx = 0, edx = 0; + __get_cpuid(1, &eax, &ebx, &ecx, &edx); + return (FEAT_REG & MASK) != 0; +} + +bool have_atomic_cap(int N) { + static int __have_atomic_cas = -1; + if (__have_atomic_cas == -1) { + __have_atomic_cas = check_x86_atomic_cas() != 0 ? 1 : 0; + } + switch (N) { + case 1: + case 2: + case 4: + return true; + case 8: +#ifdef __x86_64__ + return true; + case 16: +#endif + return __have_atomic_cas; + } + return false; +} +#elif defined(__aarch64__) && HAS_ARM_HWCAP + +bool have_atomic_cap(int N) { + static int __has_atomic_cap = -1; + if (__have_atomic_cap == -1) { + __have_atomic_cap = (getauxval(AT_HWCAP) & HWCAP_ATOMICS) != 0 ? 1 : 0; + } + switch (N) { + case 1: + case 2: + case 4: + case 8: + return __have_atomic_cap; + } + return false; +} + +#else +static inline bool have_atomic_cap(int) { return false; } +#endif + +// Return true if it could positively be determined to be lock free. +// Otherwise, fall through to the next bucket (next power-of-2). +#define CHECK_LOCK_FREE_POW2(N) \ + do { \ + uintptr_t r = (uintptr_t)ptr & (N - 1); \ + if (r != 0) \ + break; \ + if (__atomic_always_lock_free(N, 0)) \ + return true; \ + if (have_atomic_cap(N)) \ + return true; \ + } while (0) + +bool __atomic_is_lock_free_c(unsigned long size, const volatile void *ptr) { + // FIXME: We don't support non-power-of-2 sizes now. They could be handled + // by rounding up to the next power-of-2 bucket. But all the __atomic_* + // operations will need to do the same thing as well. + switch (size) { + case 0: + return true; + case 2: + CHECK_LOCK_FREE_POW2(2); + __attribute__((fallthrough)); + case 4: + CHECK_LOCK_FREE_POW2(4); + __attribute__((fallthrough)); + case 8: + CHECK_LOCK_FREE_POW2(8); + __attribute__((fallthrough)); + case 16: + CHECK_LOCK_FREE_POW2(16); + break; + } + return false; +} + +#ifdef __X86_64__ +#include "x86_64/atomic_alt_cx16.h" +#endif + +__attribute__((ifunc("atomic_load_resolver"))) uint64_t +__c11_atomic_load_ifunc(uint64_t *src, int model); + +#ifdef __SIZEOF_INT128__ +__attribute__((ifunc("atomic_load_resolver"))) __uint128_t +__c11_atomic_load_ifunc(__uint128_t *src, int model); +#endif + +void *atomic_load_resolver(void) { +#ifdef __X86_64__ + return has_atomic_cap() ? __c11_atomic_load_cx16 : __c11_atomic_load; +#elif defined(__i386__) + // TODO fill in for the rest; +#else + +#endif + return __c11_atomic_load; +} diff --git a/compiler-rt/lib/builtins/x86_64/atomic_alt_cx16.h b/compiler-rt/lib/builtins/x86_64/atomic_alt_cx16.h new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/builtins/x86_64/atomic_alt_cx16.h @@ -0,0 +1,8 @@ +// This file is compiled with -mcx16 + +// This is just aliased to c11_atomic_load ... +uint64_t __c11_atomic_load_cx16(uint64_t *src, int model) {} + +#ifdef __SIZEOF_INT128__ +__uint128_t __c11_atomic_load_cx16(__uint128_t *src, int model) {} +#endif diff --git a/compiler-rt/test/builtins/Unit/atomic_lock_free_test.cc b/compiler-rt/test/builtins/Unit/atomic_lock_free_test.cc new file mode 100644 --- /dev/null +++ b/compiler-rt/test/builtins/Unit/atomic_lock_free_test.cc @@ -0,0 +1,171 @@ +// RUN: %clang_builtins %s %librt -o %t && %run %t +// REQUIRES: librt_has_atomic +//===-- atomic_lock_free_test.c - Test is_lock_free function ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include +#include + +bool __atomic_is_lock_free_c(unsigned long size, const volatile void *ptr); + +void test_is_lock_free_0(void) { + struct A { + int a[100]; + }; + + struct A a; + // size = 0. Doesn't matter what the aligment is, it should be lock-free. + assert(__atomic_is_lock_free_c(0, &a) && "size=0 is not lock free"); + fprintf(stdout, "test_is_lock_free_0 PASSED\n"); +} + +void test_is_lock_free_aligned(void) { + // 1 + char ch = 'a'; + assert(__atomic_is_lock_free_c(1, &ch) && "size=1 is not lock free"); + + // 2 + short sh __attribute__((aligned(2))) = 2; + assert(__atomic_is_lock_free_c(2, &sh) && "size=2 is not lock free"); + + struct S2 { + char a; + char b; + } __attribute__((aligned(2))); + assert(2 == sizeof(struct S2)); + struct S2 s2; + assert(__atomic_is_lock_free_c(2, &s2) && "size=2 struct is not lock free"); + + // 4 + int i __attribute__((aligned(4))) = 4; + assert(__atomic_is_lock_free_c(4, &i) && "size=4 is not lock free"); + + struct S4 { + short a; + short b; + } __attribute__((aligned(4))); + assert(4 == sizeof(struct S4)); + struct S4 s4; + assert(__atomic_is_lock_free_c(4, &s4) && "size=4 struct is not lock free"); + + // 8 + double d __attribute__((aligned(8))) = 8.0; + assert(__atomic_is_lock_free_c(8, &d) && "size=8 is not lock free"); + + struct S8 { + int a; + int b; + } __attribute__((aligned(8))); + assert(8 == sizeof(struct S8)); + struct S8 s8; + assert(__atomic_is_lock_free_c(8, &s8) && "size=8 struct is not lock free"); + +#ifdef __SIZEOF_INT128__ + // 16 + struct S16 { + double a; + double b; + + } __attribute__((aligned(16))); + + assert(16 == sizeof(struct S16)); + struct S16 s16; + assert(__atomic_is_lock_free_c(16, &s16) && "size=16 is not lock free"); +#endif + + fprintf(stdout, "test_is_lock_free_aligned PASSED\n"); +} + +void test_is_lock_free_unaligned(void) { + + struct S3 { + char a; + char b; + char c; + } __attribute__((aligned(1))); + assert(3 == sizeof(struct S3)); + struct S3 s3; + assert(!__atomic_is_lock_free_c(3, &s3) && "size=3 struct is lock free"); + + struct S34 { + char a; + char b; + char c; + } __attribute__((aligned(4))); + assert(4 == sizeof(struct S34)); + struct S34 s34; + assert(__atomic_is_lock_free_c(4, &s34) && "size=3, aligned 4 struct is not lock free"); + + struct S5 { + char a; + char b; + char c; + char d; + char e; + } __attribute__((aligned(1))); + assert(5 == sizeof(struct S5)); + struct S5 s5; + assert(!__atomic_is_lock_free_c(5, &s5) && "size=5 struct is lock free"); + + struct S58 { + char a; + char b; + char c; + char d; + char e; + } __attribute__((aligned(8))); + assert(8 == sizeof(struct S58)); + struct S58 s58; + assert(__atomic_is_lock_free_c(sizeof(s58), &s58) && "size=5, aligned 8 struct is not lock free"); + + struct S9 { + char one; + char two; + char three; + char four; + char five; + char six; + char seven; + char eight; + char nine; + } __attribute__((aligned(1))); + + assert(9 == sizeof(struct S9)); + struct S9 s9; + assert(!__atomic_is_lock_free_c(9, &s9) && "size=9 is lock free"); + +#ifdef __SIZEOF_INT128__ + struct S9_16 { + char one; + char two; + char three; + char four; + char five; + char six; + char seven; + char eight; + char nine; + } __attribute__((aligned(16))); + + assert(16 == sizeof(struct S9_16)); + struct S9_16 s9_16; + assert(__atomic_is_lock_free_c(sizeof(s9_16), &s9) && "size=9, aligned 16 struct is not lock free"); +#endif + + fprintf(stdout, "test_is_lock_free_unaligned PASSED\n"); +} + +int main() { + test_is_lock_free_0(); + test_is_lock_free_aligned(); + test_is_lock_free_unaligned(); +} diff --git a/llvm/include/llvm/Config/config.h.cmake b/llvm/include/llvm/Config/config.h.cmake --- a/llvm/include/llvm/Config/config.h.cmake +++ b/llvm/include/llvm/Config/config.h.cmake @@ -10,6 +10,9 @@ /* Bug report URL. */ #define BUG_REPORT_URL "${BUG_REPORT_URL}" +#cmakedefine COMPILER_RT_HAS_AUXV ${COMPILER_RT_HAS_AUXV} +#cmakedefine COMPILER_RT_HAS_HWCAP ${COMPILER_RT_HAS_HWCAP} + /* Define to 1 to enable backtraces, and to 0 otherwise. */ #cmakedefine01 ENABLE_BACKTRACES