Index: compiler-rt/lib/builtins/CMakeLists.txt =================================================================== --- compiler-rt/lib/builtins/CMakeLists.txt +++ compiler-rt/lib/builtins/CMakeLists.txt @@ -638,6 +638,7 @@ set(riscv_SOURCES riscv/save.S riscv/restore.S + riscv/restore_tailcall.S ${GENERIC_SOURCES} ${GENERIC_TF_SOURCES} ) Index: compiler-rt/lib/builtins/riscv/restore_tailcall.S =================================================================== --- /dev/null +++ compiler-rt/lib/builtins/riscv/restore_tailcall.S @@ -0,0 +1,293 @@ +//===-- restore_tailcall.S - restore up to 12 callee-save registers -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Multiple entry points depending on number of registers to restore. Restores +// registers and then performs a tail call. +// +//===----------------------------------------------------------------------===// + +// These are alternative entry points to the normal __riscv_restore_ +// functions which allow the save-restore mechnism to be used even when the +// caller is making a tail call. These entry points take a single argument +// in `t1` which is the address of the function to jump to after restoring +// the registers and stack pointer. + +// All of the entry points are in the same section since we rely on many of +// them falling through into each other and don't want the linker to +// accidentally split them up, garbage collect, or reorder them. +// +// The entry points are grouped up into 2s for rv64 and 4s for rv32 since this +// is the minimum grouping which will maintain the required 16-byte stack +// alignment. + + .text + +#if __riscv_xlen == 32 + + .globl __riscv_restore_tailcall_12 + .type __riscv_restore_tailcall_12,@function +__riscv_restore_tailcall_12: + .cfi_startproc + .cfi_def_cfa_offset 64 + .cfi_offset s11, -64+12 + .cfi_offset s10, -64+16 + .cfi_offset s9, -64+20 + .cfi_offset s8, -64+24 + .cfi_offset s7, -64+28 + .cfi_offset s6, -64+32 + .cfi_offset s5, -64+36 + .cfi_offset s4, -64+40 + .cfi_offset s3, -64+44 + .cfi_offset s2, -64+48 + .cfi_offset s1, -64+52 + .cfi_offset s0, -64+56 + .cfi_offset ra, -64+60 + lw s11, 12(sp) + .cfi_restore s11 + addi sp, sp, 16 + // fallthrough into __riscv_restore_tailcall_11/10/9/8 + + .globl __riscv_restore_tailcall_11 + .type __riscv_restore_tailcall_11,@function + .globl __riscv_restore_tailcall_10 + .type __riscv_restore_tailcall_10,@function + .globl __riscv_restore_tailcall_9 + .type __riscv_restore_tailcall_9,@function + .globl __riscv_restore_tailcall_8 + .type __riscv_restore_tailcall_8,@function +__riscv_restore_tailcall_11: +__riscv_restore_tailcall_10: +__riscv_restore_tailcall_9: +__riscv_restore_tailcall_8: + .cfi_restore s11 + .cfi_def_cfa_offset 48 + lw s10, 0(sp) + .cfi_restore s10 + lw s9, 4(sp) + .cfi_restore s9 + lw s8, 8(sp) + .cfi_restore s8 + lw s7, 12(sp) + .cfi_restore s7 + addi sp, sp, 16 + // fallthrough into __riscv_restore_tailcall_7/6/5/4 + + .globl __riscv_restore_tailcall_7 + .type __riscv_restore_tailcall_7,@function + .globl __riscv_restore_tailcall_6 + .type __riscv_restore_tailcall_6,@function + .globl __riscv_restore_tailcall_5 + .type __riscv_restore_tailcall_5,@function + .globl __riscv_restore_tailcall_4 + .type __riscv_restore_tailcall_4,@function +__riscv_restore_tailcall_7: +__riscv_restore_tailcall_6: +__riscv_restore_tailcall_5: +__riscv_restore_tailcall_4: + .cfi_restore s7 + .cfi_restore s8 + .cfi_restore s9 + .cfi_restore s10 + .cfi_restore s11 + .cfi_def_cfa_offset 32 + lw s6, 0(sp) + .cfi_restore s6 + lw s5, 4(sp) + .cfi_restore s5 + lw s4, 8(sp) + .cfi_restore s4 + lw s3, 12(sp) + .cfi_restore s3 + addi sp, sp, 16 + // fallthrough into __riscv_restore_tailcall_3/2/1/0 + + .globl __riscv_restore_tailcall_3 + .type __riscv_restore_tailcall_3,@function + .globl __riscv_restore_tailcall_2 + .type __riscv_restore_tailcall_2,@function + .globl __riscv_restore_tailcall_1 + .type __riscv_restore_tailcall_1,@function + .globl __riscv_restore_tailcall_0 + .type __riscv_restore_tailcall_0,@function +__riscv_restore_tailcall_3: +__riscv_restore_tailcall_2: +__riscv_restore_tailcall_1: +__riscv_restore_tailcall_0: + .cfi_restore s3 + .cfi_restore s4 + .cfi_restore s5 + .cfi_restore s6 + .cfi_restore s7 + .cfi_restore s8 + .cfi_restore s9 + .cfi_restore s10 + .cfi_restore s11 + .cfi_def_cfa_offset 16 + lw s2, 0(sp) + .cfi_restore s2 + lw s1, 4(sp) + .cfi_restore s1 + lw s0, 8(sp) + .cfi_restore s0 + lw ra, 12(sp) + .cfi_restore ra + addi sp, sp, 16 + .cfi_def_cfa_offset 0 + jr t1 + .cfi_endproc + +#elif __riscv_xlen == 64 + + .globl __riscv_restore_tailcall_12 + .type __riscv_restore_tailcall_12,@function +__riscv_restore_tailcall_12: + .cfi_startproc + .cfi_def_cfa_offset 112 + .cfi_offset s11, -112+8 + .cfi_offset s10, -112+16 + .cfi_offset s9, -112+24 + .cfi_offset s8, -112+32 + .cfi_offset s7, -112+40 + .cfi_offset s6, -112+48 + .cfi_offset s5, -112+56 + .cfi_offset s4, -112+64 + .cfi_offset s3, -112+72 + .cfi_offset s2, -112+80 + .cfi_offset s1, -112+88 + .cfi_offset s0, -112+96 + .cfi_offset ra, -112+104 + ld s11, 8(sp) + .cfi_restore s11 + addi sp, sp, 16 + // fallthrough into __riscv_restore_tailcall_11/10 + + .globl __riscv_restore_tailcall_11 + .type __riscv_restore_tailcall_11,@function + .globl __riscv_restore_tailcall_10 + .type __riscv_restore_tailcall_10,@function +__riscv_restore_tailcall_11: +__riscv_restore_tailcall_10: + .cfi_restore s11 + .cfi_def_cfa_offset 96 + ld s10, 0(sp) + .cfi_restore s10 + ld s9, 8(sp) + .cfi_restore s9 + addi sp, sp, 16 + // fallthrough into __riscv_restore_tailcall_9/8 + + .globl __riscv_restore_tailcall_9 + .type __riscv_restore_tailcall_9,@function + .globl __riscv_restore_tailcall_8 + .type __riscv_restore_tailcall_8,@function +__riscv_restore_tailcall_9: +__riscv_restore_tailcall_8: + .cfi_restore s9 + .cfi_restore s10 + .cfi_restore s11 + .cfi_def_cfa_offset 80 + ld s8, 0(sp) + .cfi_restore s8 + ld s7, 8(sp) + .cfi_restore s7 + addi sp, sp, 16 + // fallthrough into __riscv_restore_tailcall_7/6 + + .globl __riscv_restore_tailcall_7 + .type __riscv_restore_tailcall_7,@function + .globl __riscv_restore_tailcall_6 + .type __riscv_restore_tailcall_6,@function +__riscv_restore_tailcall_7: +__riscv_restore_tailcall_6: + .cfi_restore s7 + .cfi_restore s8 + .cfi_restore s9 + .cfi_restore s10 + .cfi_restore s11 + .cfi_def_cfa_offset 64 + ld s6, 0(sp) + .cfi_restore s6 + ld s5, 8(sp) + .cfi_restore s5 + addi sp, sp, 16 + // fallthrough into __riscv_restore_tailcall_5/4 + + .globl __riscv_restore_tailcall_5 + .type __riscv_restore_tailcall_5,@function + .globl __riscv_restore_tailcall_4 + .type __riscv_restore_tailcall_4,@function +__riscv_restore_tailcall_5: +__riscv_restore_tailcall_4: + .cfi_restore s5 + .cfi_restore s6 + .cfi_restore s7 + .cfi_restore s8 + .cfi_restore s9 + .cfi_restore s10 + .cfi_restore s11 + .cfi_def_cfa_offset 48 + ld s4, 0(sp) + .cfi_restore s4 + ld s3, 8(sp) + .cfi_restore s3 + addi sp, sp, 16 + // fallthrough into __riscv_restore_tailcall_3/2 + + .globl __riscv_restore_tailcall_3 + .type __riscv_restore_tailcall_3,@function + .globl __riscv_restore_tailcall_2 + .type __riscv_restore_tailcall_2,@function + .globl __riscv_restore_tailcall_1 + .type __riscv_restore_tailcall_1,@function + .globl __riscv_restore_tailcall_0 + .type __riscv_restore_tailcall_0,@function +__riscv_restore_tailcall_3: +__riscv_restore_tailcall_2: + .cfi_restore s3 + .cfi_restore s4 + .cfi_restore s5 + .cfi_restore s6 + .cfi_restore s7 + .cfi_restore s8 + .cfi_restore s9 + .cfi_restore s10 + .cfi_restore s11 + .cfi_def_cfa_offset 32 + ld s2, 0(sp) + .cfi_restore s2 + ld s1, 8(sp) + .cfi_restore s1 + addi sp, sp, 16 + // fallthrough into __riscv_restore_tailcall_1/0 + +__riscv_restore_tailcall_1: +__riscv_restore_tailcall_0: + .cfi_restore s1 + .cfi_restore s2 + .cfi_restore s3 + .cfi_restore s4 + .cfi_restore s5 + .cfi_restore s6 + .cfi_restore s7 + .cfi_restore s8 + .cfi_restore s9 + .cfi_restore s10 + .cfi_restore s11 + .cfi_def_cfa_offset 16 + ld s0, 0(sp) + .cfi_restore s0 + ld ra, 8(sp) + .cfi_restore ra + addi sp, sp, 16 + .cfi_def_cfa_offset 0 + jr t1 + .cfi_endproc + +#else +# error "xlen must be 32 or 64 for save-restore implementation +#endif