diff --git a/compiler-rt/cmake/builtin-config-ix.cmake b/compiler-rt/cmake/builtin-config-ix.cmake --- a/compiler-rt/cmake/builtin-config-ix.cmake +++ b/compiler-rt/cmake/builtin-config-ix.cmake @@ -28,6 +28,12 @@ asm(\"cas w0, w1, [x2]\"); ") +builtin_check_c_compiler_source(COMPILER_RT_HAS_ASM_SME +" +asm(\".arch armv9-a+sme\"); +asm(\"smstart\"); +") + set(ARM64 aarch64) set(ARM32 arm armhf armv4t armv5te armv6 armv6m armv7m armv7em armv7 armv7s armv7k armv8m.base armv8m.main armv8.1m.main) set(AVR avr) diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -556,6 +556,7 @@ ${GENERIC_SOURCES} cpu_model.c aarch64/fp_mode.c + aarch64/sme-abi.S ) # Generate outline atomics helpers from lse.S base @@ -781,6 +782,7 @@ endif() append_list_if(COMPILER_RT_HAS_ASM_LSE HAS_ASM_LSE BUILTIN_DEFS) + append_list_if(COMPILER_RT_HAS_ASM_SME HAS_ASM_SME BUILTIN_DEFS) foreach (arch ${BUILTIN_SUPPORTED_ARCH}) if (CAN_TARGET_${arch}) diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi.S b/compiler-rt/lib/builtins/aarch64/sme-abi.S new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/builtins/aarch64/sme-abi.S @@ -0,0 +1,175 @@ +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "assembly.h" + +#ifdef __aarch64__ + +#ifdef HAS_ASM_SME +#define ARCH armv9-a+sme +#define SMSTOP_SM smstop sm +#define SMSTOP_ZA smstop za +#define REG_TPIDR2_EL0 TPIDR2_EL0 +#define REG_SVCR SVCR +#define RDSVL_X15_1 rdsvl x15,#1 +#define RDSVL_X14_1 rdsvl x14,#1 +#define LDR_ZA_W12_0_X16 ldr za[w12,0],[x16] +#define STR_ZA_W12_0_X16 str za[w12,0],[x16] +#else +#define ARCH armv8-a +#define SMSTOP_SM .inst 0xd503427f +#define SMSTOP_ZA .inst 0xd503447f +#define REG_TPIDR2_EL0 S3_3_C13_C0_5 +#define REG_SVCR S3_3_C4_C2_2 +#define RDSVL_X15_1 .inst 0x04bf582f +#define RDSVL_X14_1 .inst 0x04bf582e +#define LDR_ZA_W12_0_X16 .inst 0xe1000200 +#define STR_ZA_W12_0_X16 .inst 0xe1200200 +#endif + +.arch ARCH + +// Utility function which calls a system's abort() routine. +// Because the function is streaming-compatible it should disable +// streaming-SVE mode before calling abort(). Note that there is no +// need to preserve any state before the call, because the function +// does not return. +DEFINE_COMPILERRT_PRIVATE_FUNCTION(do_abort) + .variant_pcs do_abort + bl __arm_sme_state + and x19, x0, #0x1 + tbnz x19, #0, 1f + b 2f +1: + SMSTOP_SM +2: + bl abort +END_COMPILERRT_FUNCTION(do_abort) + +DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sme_state) + .variant_pcs __arm_sme_state + mrs x16, CPTR_EL3 + tbnz w16, #12, 2f + mov x0, xzr + mrs x16, CPTR_EL3 + tbz w16, #12, 3f +1: + mrs x1, REG_TPIDR2_EL0 + orr x0, x0, #0x4000000000000000 + ret +2: + mov x0, #-9223372036854775808 + mrs x16, REG_SVCR + bfxil x0, x16, #0, #2 + mrs x16, CPTR_EL3 + tbnz w16, #12, 1b +3: + mov x1, xzr + ret +END_COMPILERRT_OUTLINE_FUNCTION(__arm_sme_state) + +DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_restore) + .variant_pcs __arm_tpidr2_restore + stp x29, x30, [sp, #-32]! + str x12, [sp, #16] + mrs x14, REG_TPIDR2_EL0 + cbnz x14, 3f + ldrh w14, [x0, #10] + cbnz w14, 3f + ldr w14, [x0, #12] + cbnz w14, 3f + RDSVL_X14_1 + ldrh w15, [x0, #8] + cbz x15, 2f + mov x12, xzr + ldr x16, [x0] +1: + LDR_ZA_W12_0_X16 + add x16, x16, x14 + add x12, x12, #1 + cmp x15, x12 + b.ne 1b +2: + ldr x12, [sp, #16] + ldp x29, x30, [sp], #32 + ret +3: + bl SYMBOL_NAME(do_abort) +END_COMPILERRT_OUTLINE_FUNCTION(__arm_tpidr2_restore) + +DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_save) + .variant_pcs __arm_tpidr2_save + stp x29, x30, [sp, #-32]! + str x12, [sp, #16] + mov x29, sp + mrs x14, CPTR_EL3 + tbz w14, #12, 2f + mrs x16, REG_TPIDR2_EL0 + cbz x16, 2f + ldrh w14, [x16, #10] + cbnz w14, 3f + ldr w14, [x16, #12] + cbnz w14, 3f + ldr x14, [x16] + cbz x14, 2f + RDSVL_X14_1 + ldrh w15, [x16, #8] + cbz x15, 2f + mov x12, xzr + ldr x16, [x16] +1: + STR_ZA_W12_0_X16 + add x16, x16, x14 + add x12, x12, #1 + cmp x15, x12 + b.ne 1b +2: + ldr x12, [sp, #16] + ldp x29, x30, [sp], #32 + ret +3: + bl SYMBOL_NAME(do_abort) +END_COMPILERRT_OUTLINE_FUNCTION(__arm_tpidr2_save) + +DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_za_disable) + .variant_pcs __arm_za_disable + stp x29, x30, [sp, #-32]! + str x12, [sp, #16] + mov x29, sp + mrs x14, CPTR_EL3 + tbz w14, #12, 3f + mrs x14, CPTR_EL3 + tbz w14, #12, 2f + mrs x16, REG_TPIDR2_EL0 + cbz x16, 2f + ldrh w14, [x16, #10] + cbnz w14, 4f + ldr w14, [x16, #12] + cbnz w14, 4f + ldr x14, [x16] + cbz x14, 2f + RDSVL_X14_1 + ldrh w15, [x16, #8] + cbz x15, 2f + mov x12, xzr + ldr x16, [x16] +1: + STR_ZA_W12_0_X16 + add x16, x16, x14 + add x12, x12, #1 + cmp x15, x12 + b.ne 1b +2: + mov x14, xzr + msr REG_TPIDR2_EL0, x14 + SMSTOP_ZA +3: + ldr x12, [sp, #16] + ldp x29, x30, [sp], #32 + ret +4: + bl SYMBOL_NAME(do_abort) +END_COMPILERRT_OUTLINE_FUNCTION(__arm_za_disable) + +#endif // __aarch64__