diff --git a/compiler-rt/cmake/builtin-config-ix.cmake b/compiler-rt/cmake/builtin-config-ix.cmake --- a/compiler-rt/cmake/builtin-config-ix.cmake +++ b/compiler-rt/cmake/builtin-config-ix.cmake @@ -30,6 +30,12 @@ " ) +builtin_check_c_compiler_source(COMPILER_RT_HAS_ASM_LSE +" +asm(\".arch armv8-a+lse\"); +asm(\"cas w0, w1, [x2]\"); +") + set(ARM64 aarch64) set(ARM32 arm armhf armv6m armv7m armv7em armv7 armv7s armv7k) set(HEXAGON hexagon) diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -504,9 +504,39 @@ set(aarch64_SOURCES ${GENERIC_TF_SOURCES} ${GENERIC_SOURCES} + cpu_model.c aarch64/fp_mode.c ) +# Generate outline atomics helpers from lse.S base +set(CUSTOM_FLAGS ${CMAKE_C_FLAGS}) +if(NOT ANDROID) + append_list_if(COMPILER_RT_HAS_VISIBILITY_HIDDEN_FLAG -DVISIBILITY_HIDDEN CUSTOM_FLAGS) +endif() +append_list_if(COMPILER_RT_HAS_ASM_LSE -DHAS_ASM_LSE CUSTOM_FLAGS) +string(REPLACE " " "\t" CUSTOM_FLAGS "${CUSTOM_FLAGS}") + +foreach(pat cas swp ldadd ldclr ldeor ldset) + foreach(size 1 2 4 8 16) + foreach(model 1 2 3 4) + if(pat STREQUAL "cas" OR NOT size STREQUAL "16") + set(helper_asm outline_atomic_${pat}${size}_${model}.S) + add_custom_command( + OUTPUT ${helper_asm} + COMMAND ${CMAKE_C_COMPILER} -E ${CUSTOM_FLAGS} -DL_${pat} -DSIZE=${size} -DMODEL=${model} + ${CMAKE_CURRENT_SOURCE_DIR}/aarch64/lse.S -o ${helper_asm} + DEPENDS aarch64/lse.S assembly.h + ) + set_source_files_properties(${helper_asm} PROPERTIES GENERATED TRUE) + set(aarch64_SOURCES + ${aarch64_SOURCES} + ${helper_asm} + ) + endif() + endforeach(model) + endforeach(size) +endforeach(pat) + if (MINGW) set(aarch64_SOURCES ${aarch64_SOURCES} diff --git a/compiler-rt/lib/builtins/aarch64/lse.S b/compiler-rt/lib/builtins/aarch64/lse.S new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/builtins/aarch64/lse.S @@ -0,0 +1,227 @@ +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "../assembly.h" + +// Out-of-line LSE atomics helpers. Ported from libgcc library. +// N = {1, 2, 4, 8} +// M = {1, 2, 4, 8, 16} +// ORDER = {'relax', 'acq', 'rel', 'acq_rel'} +// Routines implemented: +// +// iM __aarch64_casM_ORDER(iM expected, iM desired, iM *ptr) +// iN __aarch64_swpN_ORDER(iN val, iN *ptr) +// iN __aarch64_ldaddN_ORDER(iN val, iN *ptr) +// iN __aarch64_ldclrN_ORDER(iN val, iN *ptr) +// iN __aarch64_ldeorN_ORDER(iN val, iN *ptr) +// iN __aarch64_ldsetN_ORDER(iN val, iN *ptr) +// +// Routines may modify temporary registers tmp0, tmp1, tmp2, +// return value x0 and the flags only. + +#ifdef __aarch64__ + +#ifdef HAS_ASM_LSE +.arch armv8-a+lse +#else +.arch armv8-a +#endif + +HIDDEN(__aarch64_have_lse_atomics) + +// Generate mnemonics for +// L_cas: SIZE: 1,2,4,8,16 MODEL: 1,2,3,4 +// L_swp L_ldadd L_ldclr L_ldeor L_ldset: SIZE: 1,2,4,8 MODEL: 1,2,3,4 + +#if SIZE == 1 +#define S b +#define UXT uxtb +#define B 0x00000000 +#elif SIZE == 2 +#define S h +#define UXT uxth +#define B 0x40000000 +#elif SIZE == 4 || SIZE == 8 || SIZE == 16 +#define S +#define UXT mov +#if SIZE == 4 +#define B 0x80000000 +#elif SIZE == 8 +#define B 0xc0000000 +#endif +#else +#error +#endif // SIZE + +#if MODEL == 1 +#define SUFF _relax +#define A +#define L +#define M 0x000000 +#define N 0x000000 +#elif MODEL == 2 +#define SUFF _acq +#define A a +#define L +#define M 0x400000 +#define N 0x800000 +#elif MODEL == 3 +#define SUFF _rel +#define A +#define L l +#define M 0x008000 +#define N 0x400000 +#elif MODEL == 4 +#define SUFF _acq_rel +#define A a +#define L l +#define M 0x408000 +#define N 0xc00000 +#else +#error +#endif // MODEL + +// Define register size. +#define x(N) GLUE2(x, N) +#define w(N) GLUE2(w, N) +#if SIZE < 8 +#define s(N) w(N) +#else +#define s(N) x(N) +#endif + +#define NAME(BASE) GLUE4(__aarch64_, BASE, SIZE, SUFF) +#define LDXR GLUE4(ld, A, xr, S) +#define STXR GLUE4(st, L, xr, S) + +// Define temporary registers. +#define tmp0 16 +#define tmp1 17 +#define tmp2 15 + +// Macro for branch to label if no LSE available +.macro JUMP_IF_NOT_LSE label + adrp x(tmp0), __aarch64_have_lse_atomics + ldrb w(tmp0), [x(tmp0), :lo12:__aarch64_have_lse_atomics] + cbz w(tmp0), \label +.endm + +#ifdef L_cas +DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(cas)) + JUMP_IF_NOT_LSE 8f +#if SIZE < 16 +#ifdef HAS_ASM_LSE +#define CAS GLUE4(cas, A, L, S) s(0), s(1), [x2] +#else +#define CAS .inst 0x08a07c41 + B + M +#endif + CAS // s(0), s(1), [x2] + ret +8: + UXT s(tmp0), s(0) +0: + LDXR s(0), [x2] + cmp s(0), s(tmp0) + bne 1f + STXR w(tmp1), s(1), [x2] + cbnz w(tmp1), 0b +1: + ret +#else +#define LDXP GLUE3(ld, A, xp) +#define STXP GLUE3(st, L, xp) +#ifdef HAS_ASM_LSE +#define CASP GLUE3(casp, A, L) x0, x1, x2, x3, [x4] +#else +#define CASP .inst 0x48207c82 + M +#endif + + CASP // x0, x1, x2, x3, [x4] + ret +8: + mov x(tmp0), x0 + mov x(tmp1), x1 +0: + LDXP x0, x1, [x4] + cmp x0, x(tmp0) + ccmp x1, x(tmp1), #0, eq + bne 1f + STXP w(tmp2), x2, x3, [x4] + cbnz w(tmp2), 0b +1: + ret +#endif +END_COMPILERRT_OUTLINE_FUNCTION(NAME(cas)) +#endif // L_cas + +#ifdef L_swp +#ifdef HAS_ASM_LSE +#define SWP GLUE4(swp, A, L, S) s(0), s(0), [x1] +#else +#define SWP .inst 0x38208020 + B + N +#endif +DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(swp)) + JUMP_IF_NOT_LSE 8f + SWP // s(0), s(0), [x1] + ret +8: + mov s(tmp0), s(0) +0: + LDXR s(0), [x1] + STXR w(tmp1), s(tmp0), [x1] + cbnz w(tmp1), 0b + ret +END_COMPILERRT_OUTLINE_FUNCTION(NAME(swp)) +#endif // L_swp + +#if defined(L_ldadd) || defined(L_ldclr) || \ + defined(L_ldeor) || defined(L_ldset) + +#ifdef L_ldadd +#define LDNM ldadd +#define OP add +#define OPN 0x0000 +#elif defined(L_ldclr) +#define LDNM ldclr +#define OP bic +#define OPN 0x1000 +#elif defined(L_ldeor) +#define LDNM ldeor +#define OP eor +#define OPN 0x2000 +#elif defined(L_ldset) +#define LDNM ldset +#define OP orr +#define OPN 0x3000 +#else +#error +#endif + +#ifdef HAS_ASM_LSE +#define LDOP GLUE4(LDNM, A, L, S) s(0), s(0), [x1] +#else +#define LDOP .inst 0x38200020 + OPN + B + N +#endif + +DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(LDNM)) + JUMP_IF_NOT_LSE 8f + LDOP // s(0), s(0), [x1] + ret +8: + mov s(tmp0), s(0) +0: + LDXR s(0), [x1] + OP s(tmp1), s(0), s(tmp0) + STXR w(tmp2), s(tmp1), [x1] + cbnz w(tmp2), 0b + ret +END_COMPILERRT_OUTLINE_FUNCTION(NAME(LDNM)) +#endif // L_ldadd L_ldclr L_ldeor L_ldset + +NO_EXEC_STACK_DIRECTIVE + +// GNU property note for BTI and PAC +GNU_PROPERTY_BTI_PAC + +#endif // __aarch64__ diff --git a/compiler-rt/lib/builtins/assembly.h b/compiler-rt/lib/builtins/assembly.h --- a/compiler-rt/lib/builtins/assembly.h +++ b/compiler-rt/lib/builtins/assembly.h @@ -35,14 +35,18 @@ #define HIDDEN(name) .hidden name #define LOCAL_LABEL(name) .L_##name #define FILE_LEVEL_DIRECTIVE -#if defined(__arm__) +#if defined(__arm__) || defined(__aarch64__) #define SYMBOL_IS_FUNC(name) .type name,%function +#define FUNC_ALIGN \ + .text SEPARATOR \ + .balign 16 SEPARATOR #else #define SYMBOL_IS_FUNC(name) .type name,@function +#define FUNC_ALIGN #endif #define CONST_SECTION .section .rodata -#if defined(__GNU__) || defined(__FreeBSD__) || defined(__Fuchsia__) || \ +#if defined(__GNU__) || defined(__FreeBSD__) || defined(__Fuchsia__) || \ defined(__linux__) #define NO_EXEC_STACK_DIRECTIVE .section .note.GNU-stack,"",%progbits #else @@ -65,6 +69,58 @@ #endif +// BTI and PAC gnu property note +#define NT_GNU_PROPERTY_TYPE_0 5 +#define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000 +#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI 1 +#define GNU_PROPERTY_AARCH64_FEATURE_1_PAC 2 + +#if defined(__ARM_FEATURE_BTI_DEFAULT) +#define BTI_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_BTI +#else +#define BTI_FLAG 0 +#endif + +#if __ARM_FEATURE_PAC_DEFAULT & 3 +#define PAC_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_PAC +#else +#define PAC_FLAG 0 +#endif + +#define GNU_PROPERTY(type, value) \ + .pushsection .note.gnu.property, "a" SEPARATOR \ + .p2align 3 SEPARATOR \ + .word 4 SEPARATOR \ + .word 16 SEPARATOR \ + .word NT_GNU_PROPERTY_TYPE_0 SEPARATOR \ + .asciz "GNU" SEPARATOR \ + .word type SEPARATOR \ + .word 4 SEPARATOR \ + .word value SEPARATOR \ + .word 0 SEPARATOR \ + .popsection + +#if BTI_FLAG != 0 +#define BTI_C bti c +#else +#define BTI_C +#endif + +#if (BTI_FLAG | PAC_FLAG) != 0 +#define GNU_PROPERTY_BTI_PAC \ + GNU_PROPERTY(GNU_PROPERTY_AARCH64_FEATURE_1_AND, BTI_FLAG | PAC_FLAG) +#else +#define GNU_PROPERTY_BTI_PAC +#endif + +#if defined(__clang__) || defined(__GCC_HAVE_DWARF2_CFI_ASM) +#define CFI_START .cfi_startproc +#define CFI_END .cfi_endproc +#else +#define CFI_START +#define CFI_END +#endif + #if defined(__arm__) // Determine actual [ARM][THUMB[1][2]] ISA using compiler predefined macros: @@ -131,8 +187,14 @@ #define DEFINE_CODE_STATE #endif -#define GLUE2(a, b) a##b -#define GLUE(a, b) GLUE2(a, b) +#define GLUE2_(a, b) a##b +#define GLUE(a, b) GLUE2_(a, b) +#define GLUE2(a, b) GLUE2_(a, b) +#define GLUE3_(a, b, c) a##b##c +#define GLUE3(a, b, c) GLUE3_(a, b, c) +#define GLUE4_(a, b, c, d) a##b##c##d +#define GLUE4(a, b, c, d) GLUE4_(a, b, c, d) + #define SYMBOL_NAME(name) GLUE(__USER_LABEL_PREFIX__, name) #ifdef VISIBILITY_HIDDEN @@ -177,6 +239,16 @@ DECLARE_FUNC_ENCODING \ name: +#define DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(name) \ + DEFINE_CODE_STATE \ + FUNC_ALIGN \ + .globl name SEPARATOR \ + SYMBOL_IS_FUNC(name) SEPARATOR \ + DECLARE_SYMBOL_VISIBILITY(name) SEPARATOR \ + CFI_START SEPARATOR \ + DECLARE_FUNC_ENCODING \ + name: SEPARATOR BTI_C + #define DEFINE_COMPILERRT_FUNCTION_ALIAS(name, target) \ .globl SYMBOL_NAME(name) SEPARATOR \ SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \ @@ -193,8 +265,12 @@ #ifdef __ELF__ #define END_COMPILERRT_FUNCTION(name) \ .size SYMBOL_NAME(name), . - SYMBOL_NAME(name) +#define END_COMPILERRT_OUTLINE_FUNCTION(name) \ + CFI_END SEPARATOR \ + .size SYMBOL_NAME(name), . - SYMBOL_NAME(name) #else #define END_COMPILERRT_FUNCTION(name) +#define END_COMPILERRT_OUTLINE_FUNCTION(name) #endif #endif // COMPILERRT_ASSEMBLY_H diff --git a/compiler-rt/lib/builtins/cpu_model.c b/compiler-rt/lib/builtins/cpu_model.c --- a/compiler-rt/lib/builtins/cpu_model.c +++ b/compiler-rt/lib/builtins/cpu_model.c @@ -8,10 +8,21 @@ // // This file is based on LLVM's lib/Support/Host.cpp. // It implements the operating system Host concept and builtin -// __cpu_model for the compiler_rt library, for x86 only. +// __cpu_model for the compiler_rt library for x86 and +// __aarch64_have_lse_atomics for AArch64. // //===----------------------------------------------------------------------===// +#if defined(HAVE_INIT_PRIORITY) +#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__ 101)) +#elif __has_attribute(__constructor__) +#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__)) +#else +// FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that +// this runs during initialization. +#define CONSTRUCTOR_ATTRIBUTE +#endif + #if (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \ defined(_M_X64)) && \ (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)) @@ -665,16 +676,6 @@ #undef setFeature } -#if defined(HAVE_INIT_PRIORITY) -#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__ 101)) -#elif __has_attribute(__constructor__) -#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__)) -#else -// FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that -// this runs during initialization. -#define CONSTRUCTOR_ATTRIBUTE -#endif - #ifndef _WIN32 __attribute__((visibility("hidden"))) #endif @@ -750,4 +751,32 @@ return 0; } +#elif defined(__aarch64__) +// LSE support detection for out-of-line atomics +// using HWCAP and Auxiliary vector +_Bool __aarch64_have_lse_atomics + __attribute__((visibility("hidden"), nocommon)); + +#if defined(__linux__) || defined(__Fuchsia__) + +#define AT_HWCAP 16 +#define HWCAP_ATOMICS (1 << 8) +#include +#ifndef __GLIBC_PREREQ +#define __GLIBC_PREREQ(a, b) 0 +#endif + +#if (__GLIBC_PREREQ(2, 16) || \ + (defined(__ANDROID__) && __ANDROID_API__ >= 21) || defined(__Fuchsia__)) +#include +#else +extern "C" __attribute__((weak)) unsigned long getauxval(unsigned long); #endif + +static void CONSTRUCTOR_ATTRIBUTE init_have_lse_atomics(void) { + unsigned long hwcap = getauxval(AT_HWCAP); + __aarch64_have_lse_atomics = (hwcap & HWCAP_ATOMICS) != 0; +} +#endif // defined(__linux__) || defined(__Fuchsia__) + +#endif // defined(__aarch64__)