diff --git a/clang/lib/Driver/XRayArgs.cpp b/clang/lib/Driver/XRayArgs.cpp --- a/clang/lib/Driver/XRayArgs.cpp +++ b/clang/lib/Driver/XRayArgs.cpp @@ -42,6 +42,7 @@ case llvm::Triple::aarch64: case llvm::Triple::hexagon: case llvm::Triple::ppc64le: + case llvm::Triple::loongarch64: case llvm::Triple::mips: case llvm::Triple::mipsel: case llvm::Triple::mips64: diff --git a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake --- a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake +++ b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake @@ -77,7 +77,7 @@ set(ALL_XRAY_SUPPORTED_ARCH ${X86_64}) else() set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM32} ${ARM64} ${MIPS32} ${MIPS64} - powerpc64le ${HEXAGON}) + powerpc64le ${HEXAGON} ${LOONGARCH64}) endif() set(ALL_SHADOWCALLSTACK_SUPPORTED_ARCH ${ARM64}) diff --git a/compiler-rt/lib/xray/CMakeLists.txt b/compiler-rt/lib/xray/CMakeLists.txt --- a/compiler-rt/lib/xray/CMakeLists.txt +++ b/compiler-rt/lib/xray/CMakeLists.txt @@ -47,6 +47,11 @@ xray_trampoline_AArch64.S ) +set(loongarch64_SOURCES + xray_loongarch64.cpp + xray_trampoline_loongarch64.S + ) + set(mips_SOURCES xray_mips.cpp xray_trampoline_mips.S @@ -117,6 +122,7 @@ ${arm_SOURCES} ${armhf_SOURCES} ${hexagon_SOURCES} + ${loongarch64_SOURCES} ${mips_SOURCES} ${mipsel_SOURCES} ${mips64_SOURCES} diff --git a/compiler-rt/lib/xray/xray_interface.cpp b/compiler-rt/lib/xray/xray_interface.cpp --- a/compiler-rt/lib/xray/xray_interface.cpp +++ b/compiler-rt/lib/xray/xray_interface.cpp @@ -46,6 +46,8 @@ static const int16_t cSledLength = 32; #elif defined(__arm__) static const int16_t cSledLength = 28; +#elif SANITIZER_LOONGARCH64 +static const int16_t cSledLength = 48; #elif SANITIZER_MIPS32 static const int16_t cSledLength = 48; #elif SANITIZER_MIPS64 diff --git a/compiler-rt/lib/xray/xray_loongarch64.cpp b/compiler-rt/lib/xray/xray_loongarch64.cpp new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/xray/xray_loongarch64.cpp @@ -0,0 +1,173 @@ +//===-------- xray_loongarch64.cpp ------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Implementation of loongarch-specific routines. +// +//===----------------------------------------------------------------------===// +#include "sanitizer_common/sanitizer_common.h" +#include "xray_defs.h" +#include "xray_interface_internal.h" +#include + +namespace __xray { + +// The machine codes for some instructions used in runtime patching. +enum PatchOpcodes : uint32_t { + PO_ADDID = 0x02c00000, // addi.d rd, rj, imm + PO_SD = 0x29c00000, // st.d rd, base, offset + PO_LU12IW = 0x14000000, // lu12i.w rd, imm + PO_ORI = 0x03800000, // ori rd, rs, imm + PO_LU32ID = 0x16000000, // lu32i.d rd, imm + PO_LU52ID = 0x03000000, // lu52i.d rd, rj, imm + PO_JIRL = 0x4c000000, // jirl rd, rj, 0 + PO_LD = 0x28c00000, // ld.d rd, base, offset + PO_B48 = 0x50003000, // b #48 +}; + +enum RegNum : uint32_t { + RN_T0 = 0xC, + RN_T1 = 0xD, + RN_RA = 0x1, + RN_SP = 0x3, +}; + +// Encode instructions in 2RI12 format, e.g. addi.d/lu521.d/ori/ld.d/st.d. +inline static uint32_t +encodeInstruction2RI12(uint32_t Opcode, uint32_t Rd, uint32_t Rj, + uint32_t Imm) XRAY_NEVER_INSTRUMENT { + return (Opcode | Rj << 5 | Rd | Imm << 10); +} + +// Encode instructions in 1RI20 format, e.g. lu12i.w/lu32i.d. +inline static uint32_t +encodeInstruction1RI20(uint32_t Opcode, uint32_t Rd, + uint32_t Imm) XRAY_NEVER_INSTRUMENT { + return (Opcode | Rd | Imm << 5); +} + +// Encode instructions in 2RI16 format, e.g. jirl. +inline static uint32_t +encodeInstruction2RI16(uint32_t Opcode, uint32_t Rd, uint32_t Rj, + uint32_t Imm) XRAY_NEVER_INSTRUMENT { + return (Opcode | Rj << 5 | Rd | Imm << 10); +} + +inline static bool patchSled(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled, + void (*TracingHook)()) XRAY_NEVER_INSTRUMENT { + // When |Enable| == true, + // We replace the following compile-time stub (sled): + // + // xray_sled_n: + // B .tmpN + // 11 NOPs (44 bytes) + // .tmpN + // + // With the following runtime patch: + // + // xray_sled_n: + // addi.d sp, sp, -16 ;create stack frame + // st.d ra, sp, 8 ;save return address + // lu12i.w t0, %abs_hi20(__xray_FunctionEntry/Exit) + // ori t0, t0, %abs_lo12(__xray_FunctionEntry/Exit) + // lu32i.d t0, %abs64_lo20(__xray_FunctionEntry/Exit) + // lu52i.d t0, t0, %abs64_hi12(__xray_FunctionEntry/Exit) + // lu12i.w t1, %abs_hi20(function_id) + // ori t1, t1, %abs_lo12(function_id) ;pass function id + // jirl ra, t0, 0 ;call Tracing hook + // ld.d ra, sp, 8 ;restore return address + // addi.d sp, sp, 16 ;delete stack frame + // + // Replacement of the first 4-byte instruction should be the last and atomic + // operation, so that the user code which reaches the sled concurrently + // either jumps over the whole sled, or executes the whole sled when the + // latter is ready. + // + // When |Enable|==false, we set back the first instruction in the sled to be + // B #48 + + uint32_t *Address = reinterpret_cast(Sled.address()); + if (Enable) { + uint32_t LoTracingHookAddr = reinterpret_cast(TracingHook) & 0xfff; + uint32_t HiTracingHookAddr = + (reinterpret_cast(TracingHook) >> 12) & 0xfffff; + uint32_t HigherTracingHookAddr = + (reinterpret_cast(TracingHook) >> 32) & 0xfffff; + uint32_t HighestTracingHookAddr = + (reinterpret_cast(TracingHook) >> 52) & 0xfff; + uint32_t LoFunctionID = FuncId & 0xfff; + uint32_t HiFunctionID = (FuncId >> 12) & 0xfffff; + Address[1] = encodeInstruction2RI12(PatchOpcodes::PO_SD, RegNum::RN_RA, + RegNum::RN_SP, 0x8); + Address[2] = encodeInstruction1RI20(PatchOpcodes::PO_LU12IW, RegNum::RN_T0, + HiTracingHookAddr); + Address[3] = encodeInstruction2RI12(PatchOpcodes::PO_ORI, RegNum::RN_T0, + RegNum::RN_T0, LoTracingHookAddr); + Address[4] = encodeInstruction1RI20(PatchOpcodes::PO_LU32ID, RegNum::RN_T0, + HigherTracingHookAddr); + Address[5] = encodeInstruction2RI12(PatchOpcodes::PO_LU52ID, RegNum::RN_T0, + RegNum::RN_T0, HighestTracingHookAddr); + Address[6] = encodeInstruction1RI20(PatchOpcodes::PO_LU12IW, RegNum::RN_T1, + HiFunctionID); + Address[7] = encodeInstruction2RI12(PatchOpcodes::PO_ORI, RegNum::RN_T1, + RegNum::RN_T1, LoFunctionID); + Address[8] = encodeInstruction2RI16(PatchOpcodes::PO_JIRL, RegNum::RN_RA, + RegNum::RN_T0, 0); + Address[9] = encodeInstruction2RI12(PatchOpcodes::PO_LD, RegNum::RN_RA, + RegNum::RN_SP, 0x8); + Address[10] = encodeInstruction2RI12(PatchOpcodes::PO_ADDID, RegNum::RN_SP, + RegNum::RN_SP, 0x10); + uint32_t CreateStackSpace = encodeInstruction2RI12( + PatchOpcodes::PO_ADDID, RegNum::RN_SP, RegNum::RN_SP, 0xff0); + std::atomic_store_explicit( + reinterpret_cast *>(Address), CreateStackSpace, + std::memory_order_release); + } else { + std::atomic_store_explicit( + reinterpret_cast *>(Address), + uint32_t(PatchOpcodes::PO_B48), std::memory_order_release); + } + return true; +} + +bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled, + void (*Trampoline)()) XRAY_NEVER_INSTRUMENT { + return patchSled(Enable, FuncId, Sled, Trampoline); +} + +bool patchFunctionExit(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); +} + +bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // FIXME: In the future we'd need to distinguish between non-tail exits and + // tail exits for better information preservation. + return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); +} + +bool patchCustomEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // FIXME: Implement in loongarch? + return false; +} + +bool patchTypedEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // FIXME: Implement in loongarch? + return false; +} +} // namespace __xray + +extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT { + // FIXME: this will have to be implemented in the trampoline assembly file +} diff --git a/compiler-rt/lib/xray/xray_trampoline_loongarch64.S b/compiler-rt/lib/xray/xray_trampoline_loongarch64.S new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/xray/xray_trampoline_loongarch64.S @@ -0,0 +1,121 @@ +//===-- xray_trampoline_loongarch64.s ---------------------------*- ASM -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// This implements the loongarch-specific assembler for the trampolines. +// +//===----------------------------------------------------------------------===// + + .text + .file "xray_trampoline_loongarch64.S" + .globl __xray_FunctionEntry + .p2align 2 + .type __xray_FunctionEntry,@function +__xray_FunctionEntry: + .cfi_startproc + // Save argument registers before doing any actual work. + .cfi_def_cfa_offset 136 + addi.d $sp, $sp, -136 + st.d $ra, $sp, 128 + .cfi_offset 1, -8 + st.d $a7, $sp, 120 + st.d $a6, $sp, 112 + st.d $a5, $sp, 104 + st.d $a4, $sp, 96 + st.d $a3, $sp, 88 + st.d $a2, $sp, 80 + st.d $a1, $sp, 72 + st.d $a0, $sp, 64 + fst.d $f7, $sp, 56 + fst.d $f6, $sp, 48 + fst.d $f5, $sp, 40 + fst.d $f4, $sp, 32 + fst.d $f3, $sp, 24 + fst.d $f2, $sp, 16 + fst.d $f1, $sp, 8 + fst.d $f0, $sp, 0 + + la.got $t2, _ZN6__xray19XRayPatchedFunctionE + ld.d $t2, $t2, 0 + + beqz $t2, FunctionEntry_restore + + // a1=0 means that we are tracing an entry event + move $a1, $zero + // Function ID is in t1 (the first parameter). + move $a0, $t1 + jirl $ra, $t2, 0 + +FunctionEntry_restore: + // Restore argument registers + fld.d $f0, $sp, 0 + fld.d $f1, $sp, 8 + fld.d $f2, $sp, 16 + fld.d $f3, $sp, 24 + fld.d $f4, $sp, 32 + fld.d $f5, $sp, 40 + fld.d $f6, $sp, 48 + fld.d $f7, $sp, 56 + ld.d $a0, $sp, 64 + ld.d $a1, $sp, 72 + ld.d $a2, $sp, 80 + ld.d $a3, $sp, 88 + ld.d $a4, $sp, 96 + ld.d $a5, $sp, 104 + ld.d $a6, $sp, 112 + ld.d $a7, $sp, 120 + ld.d $ra, $sp, 128 + addi.d $sp, $sp, 136 + ret +FunctionEntry_end: + .size __xray_FunctionEntry, FunctionEntry_end-__xray_FunctionEntry + .cfi_endproc + + .text + .globl __xray_FunctionExit + .p2align 2 + .type __xray_FunctionExit,@function +__xray_FunctionExit: + .cfi_startproc + // Save return registers before doing any actual work. + .cfi_def_cfa_offset 48 + addi.d $sp, $sp, -48 + st.d $ra, $sp, 40 + .cfi_offset 1, -8 + st.d $fp, $sp, 32 + st.d $a1, $sp, 24 + st.d $a0, $sp, 16 + fst.d $f1, $sp, 8 + fst.d $f0, $sp, 0 + + la.got $t2, _ZN6__xray19XRayPatchedFunctionE + ld.d $t2, $t2, 0 + + beqz $t2, FunctionExit_restore + + // a1=1 means that we are tracing an exit event + ori $a1, $zero, 1 + // Function ID is in t1 (the first parameter). + move $a0, $t1 + jirl $ra, $t2, 0 + +FunctionExit_restore: + // Restore return registers + fld.d $f0, $sp, 0 + fld.d $f1, $sp, 8 + ld.d $a1, $sp, 24 + ld.d $a0, $sp, 16 + ld.d $fp, $sp, 32 + ld.d $ra, $sp, 40 + addi.d $sp, $sp, 48 + ret + +FunctionExit_end: + .size __xray_FunctionExit, FunctionExit_end-__xray_FunctionExit + .cfi_endproc diff --git a/compiler-rt/lib/xray/xray_tsc.h b/compiler-rt/lib/xray/xray_tsc.h --- a/compiler-rt/lib/xray/xray_tsc.h +++ b/compiler-rt/lib/xray/xray_tsc.h @@ -43,7 +43,7 @@ #elif defined(__powerpc64__) #include "xray_powerpc64.inc" #elif defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \ - defined(__hexagon__) + defined(__hexagon__) || defined(__loongarch_lp64) // Emulated TSC. // There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does // not have a constant frequency like TSC on x86(_64), it may go faster diff --git a/compiler-rt/test/xray/TestCases/Posix/always-never-instrument.cpp b/compiler-rt/test/xray/TestCases/Posix/always-never-instrument.cpp --- a/compiler-rt/test/xray/TestCases/Posix/always-never-instrument.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/always-never-instrument.cpp @@ -9,7 +9,7 @@ // RUN: FileCheck %s --check-prefix NOINSTR // RUN: %llvm_xray extract -symbolize %t | \ // RUN: FileCheck %s --check-prefix ALWAYSINSTR -// REQUIRES: x86_64-target-arch +// REQUIRES: x86_64-target-arch || loongarch64-target-arch // REQUIRES: built-in-llvm-tree // NOINSTR-NOT: {{.*__xray_NeverInstrumented.*}} diff --git a/compiler-rt/test/xray/TestCases/Posix/arg1-arg0-logging.cpp b/compiler-rt/test/xray/TestCases/Posix/arg1-arg0-logging.cpp --- a/compiler-rt/test/xray/TestCases/Posix/arg1-arg0-logging.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/arg1-arg0-logging.cpp @@ -6,7 +6,7 @@ // RUN: XRAY_OPTIONS="patch_premain=true verbosity=1 xray_logfile_base=arg0-arg1-logging-" %run %t // // TODO: Support these in ARM and PPC -// XFAIL: target={{(arm|aarch64|mips).*}} +// XFAIL: target={{(arm|aarch64|loongarch64|mips).*}} // UNSUPPORTED: target=powerpc64le{{.*}} #include "xray/xray_interface.h" diff --git a/compiler-rt/test/xray/TestCases/Posix/arg1-logger.cpp b/compiler-rt/test/xray/TestCases/Posix/arg1-logger.cpp --- a/compiler-rt/test/xray/TestCases/Posix/arg1-logger.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/arg1-logger.cpp @@ -11,7 +11,7 @@ // RUN: rm -f arg1-logger-* // // At the time of writing, the ARM trampolines weren't written yet. -// XFAIL: target={{(arm|aarch64|mips).*}} +// XFAIL: target={{(arm|aarch64|loongarch64|mips).*}} // See the mailing list discussion of r296998. // UNSUPPORTED: target=powerpc64le{{.*}} diff --git a/compiler-rt/test/xray/TestCases/Posix/arg1-logging-implicit-this.cpp b/compiler-rt/test/xray/TestCases/Posix/arg1-logging-implicit-this.cpp --- a/compiler-rt/test/xray/TestCases/Posix/arg1-logging-implicit-this.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/arg1-logging-implicit-this.cpp @@ -4,7 +4,7 @@ // RUN: rm -f log-args-this-* // RUN: XRAY_OPTIONS="patch_premain=true verbosity=1 xray_logfile_base=log-args-this-" %run %t // -// XFAIL: target={{(arm|aarch64|mips).*}} +// XFAIL: target={{(arm|aarch64|loongarch64|mips).*}} // UNSUPPORTED: target=powerpc64le{{.*}} #include "xray/xray_interface.h" #include diff --git a/compiler-rt/test/xray/TestCases/Posix/basic-filtering.cpp b/compiler-rt/test/xray/TestCases/Posix/basic-filtering.cpp --- a/compiler-rt/test/xray/TestCases/Posix/basic-filtering.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/basic-filtering.cpp @@ -23,7 +23,7 @@ // RUN: FileCheck %s --check-prefix TRACE // RUN: rm -f basic-filtering-* // -// REQUIRES: x86_64-target-arch +// REQUIRES: x86_64-target-arch || loongarch64-target-arch // REQUIRES: built-in-llvm-tree #include diff --git a/compiler-rt/test/xray/TestCases/Posix/c-test.cpp b/compiler-rt/test/xray/TestCases/Posix/c-test.cpp --- a/compiler-rt/test/xray/TestCases/Posix/c-test.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/c-test.cpp @@ -4,7 +4,7 @@ // RUN: 2>&1 | FileCheck %s // RUN: rm -f xray-log.c-test.* // -// REQUIRES: x86_64-target-arch +// REQUIRES: x86_64-target-arch || loongarch64-target-arch // REQUIRES: built-in-llvm-tree __attribute__((xray_always_instrument)) void always() {} diff --git a/compiler-rt/test/xray/TestCases/Posix/clang-no-xray-instrument.cpp b/compiler-rt/test/xray/TestCases/Posix/clang-no-xray-instrument.cpp --- a/compiler-rt/test/xray/TestCases/Posix/clang-no-xray-instrument.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/clang-no-xray-instrument.cpp @@ -3,7 +3,7 @@ // // RUN: %clangxx -fno-xray-instrument -c %s -o %t.o // RUN: not %llvm_xray extract -symbolize %t.o 2>&1 | FileCheck %s -// REQUIRES: x86_64-target-arch +// REQUIRES: x86_64-target-arch || loongarch64-target-arch // REQUIRES: built-in-llvm-tree // CHECK: llvm-xray: Cannot extract instrumentation map diff --git a/compiler-rt/test/xray/TestCases/Posix/fdr-mode-inmemory.cpp b/compiler-rt/test/xray/TestCases/Posix/fdr-mode-inmemory.cpp --- a/compiler-rt/test/xray/TestCases/Posix/fdr-mode-inmemory.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/fdr-mode-inmemory.cpp @@ -8,7 +8,7 @@ // RUN: [ $FILES -eq 0 ] // RUN: rm -f fdr-inmemory-test-* // -// REQUIRES: x86_64-target-arch +// REQUIRES: x86_64-target-arch || loongarch64-target-arch // REQUIRES: built-in-llvm-tree #include "xray/xray_log_interface.h" diff --git a/compiler-rt/test/xray/TestCases/Posix/fdr-mode-multiple.cpp b/compiler-rt/test/xray/TestCases/Posix/fdr-mode-multiple.cpp --- a/compiler-rt/test/xray/TestCases/Posix/fdr-mode-multiple.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/fdr-mode-multiple.cpp @@ -8,7 +8,7 @@ // RUN: [ $FILES -eq 0 ] // RUN: rm -f fdr-inmemory-test-* // -// REQUIRES: x86_64-target-arch +// REQUIRES: x86_64-target-arch || loongarch64-target-arch // REQUIRES: built-in-llvm-tree #include "xray/xray_log_interface.h" diff --git a/compiler-rt/test/xray/TestCases/Posix/fdr-single-thread.cpp b/compiler-rt/test/xray/TestCases/Posix/fdr-single-thread.cpp --- a/compiler-rt/test/xray/TestCases/Posix/fdr-single-thread.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/fdr-single-thread.cpp @@ -8,7 +8,7 @@ // RUN: "`ls fdr-logging-1thr-* | head -n1`" | FileCheck %s // RUN: rm fdr-logging-1thr-* // -// REQUIRES: x86_64-target-arch +// REQUIRES: x86_64-target-arch || loongarch64-target-arch #include "xray/xray_log_interface.h" #include diff --git a/compiler-rt/test/xray/TestCases/Posix/fdr-thread-order.cpp b/compiler-rt/test/xray/TestCases/Posix/fdr-thread-order.cpp --- a/compiler-rt/test/xray/TestCases/Posix/fdr-thread-order.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/fdr-thread-order.cpp @@ -8,7 +8,7 @@ // RUN: %llvm_xray convert --symbolize --output-format=yaml -instr_map=%t.exe %t/* | \ // RUN: FileCheck %s --check-prefix TRACE // FIXME: Make llvm-xray work on non-x86_64 as well. -// REQUIRES: x86_64-target-arch +// REQUIRES: x86_64-target-arch || loongarch64-target-arch // REQUIRES: built-in-llvm-tree #include "xray/xray_log_interface.h" diff --git a/compiler-rt/test/xray/TestCases/Posix/profiling-multi-threaded.cpp b/compiler-rt/test/xray/TestCases/Posix/profiling-multi-threaded.cpp --- a/compiler-rt/test/xray/TestCases/Posix/profiling-multi-threaded.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/profiling-multi-threaded.cpp @@ -11,7 +11,7 @@ // RUN: [ $PROFILES -eq 1 ] // RUN: rm -f xray-log.profiling-multi-* // -// REQUIRES: x86_64-target-arch +// REQUIRES: x86_64-target-arch || loongarch64-target-arch // REQUIRES: built-in-llvm-tree #include "xray/xray_interface.h" diff --git a/compiler-rt/test/xray/TestCases/Posix/profiling-single-threaded.cpp b/compiler-rt/test/xray/TestCases/Posix/profiling-single-threaded.cpp --- a/compiler-rt/test/xray/TestCases/Posix/profiling-single-threaded.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/profiling-single-threaded.cpp @@ -11,7 +11,7 @@ // RUN: [ $PROFILES -eq 2 ] // RUN: rm -f xray-log.profiling-single-* // -// REQUIRES: x86_64-target-arch +// REQUIRES: x86_64-target-arch || loongarch64-target-arch // REQUIRES: built-in-llvm-tree #include "xray/xray_interface.h" diff --git a/compiler-rt/test/xray/TestCases/Posix/quiet-start.cpp b/compiler-rt/test/xray/TestCases/Posix/quiet-start.cpp --- a/compiler-rt/test/xray/TestCases/Posix/quiet-start.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/quiet-start.cpp @@ -10,7 +10,7 @@ // // FIXME: Understand how to make this work on other platforms // REQUIRES: built-in-llvm-tree -// REQUIRES: x86_64-target-arch +// REQUIRES: x86_64-target-arch || loongarch64-target-arch #include using namespace std; diff --git a/llvm/lib/CodeGen/XRayInstrumentation.cpp b/llvm/lib/CodeGen/XRayInstrumentation.cpp --- a/llvm/lib/CodeGen/XRayInstrumentation.cpp +++ b/llvm/lib/CodeGen/XRayInstrumentation.cpp @@ -226,6 +226,7 @@ case Triple::ArchType::thumb: case Triple::ArchType::aarch64: case Triple::ArchType::hexagon: + case Triple::ArchType::loongarch64: case Triple::ArchType::mips: case Triple::ArchType::mipsel: case Triple::ArchType::mips64: diff --git a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h --- a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h +++ b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h @@ -41,6 +41,12 @@ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS) override; + void emitSled(const MachineInstr &MI, SledKind Kind); + + void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI); + void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI); + void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI); + // tblgen'erated function. bool emitPseudoExpansionLowering(MCStreamer &OutStreamer, const MachineInstr *MI); diff --git a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp --- a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp @@ -17,6 +17,8 @@ #include "MCTargetDesc/LoongArchInstPrinter.h" #include "TargetInfo/LoongArchTargetInfo.h" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/TargetRegistry.h" using namespace llvm; @@ -35,11 +37,78 @@ if (emitPseudoExpansionLowering(*OutStreamer, MI)) return; + switch (MI->getOpcode()) { + case TargetOpcode::PATCHABLE_FUNCTION_ENTER: + LowerPATCHABLE_FUNCTION_ENTER(*MI); + return; + + case TargetOpcode::PATCHABLE_FUNCTION_EXIT: + LowerPATCHABLE_FUNCTION_EXIT(*MI); + return; + + case TargetOpcode::PATCHABLE_TAIL_CALL: + LowerPATCHABLE_TAIL_CALL(*MI); + return; + } + MCInst TmpInst; if (!lowerLoongArchMachineInstrToMCInst(MI, TmpInst, *this)) EmitToStreamer(*OutStreamer, TmpInst); } +void LoongArchAsmPrinter::LowerPATCHABLE_FUNCTION_ENTER( + const MachineInstr &MI) { + // TODO: handle "patchable-function-entry" function attribute + emitSled(MI, SledKind::FUNCTION_ENTER); +} + +void LoongArchAsmPrinter::LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI) { + emitSled(MI, SledKind::FUNCTION_EXIT); +} + +void LoongArchAsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI) { + emitSled(MI, SledKind::TAIL_CALL); +} + +void LoongArchAsmPrinter::emitSled(const MachineInstr &MI, SledKind Kind) { + const int8_t NoopsInSledCount = 11; + // For loongarch64 we want to emit the following pattern: + // + // .Lxray_sled_beginN: + // ALIGN + // B .Lxray_sled_endN + // 11 NOP instructions (44 bytes) + // .Lxray_sled_endN + // + // We need the 44 bytes (11instructions) because at runtime, we'd be patching + // over the full 48 bytes (12 instructions) with the following pattern: + // + // addi.d sp, sp, -16 ;create stack frame + // st.d ra, sp, 8 ;save return address + // lu12i.w t0, %abs_hi20(__xray_FunctionEntry/Exit) + // ori t0, t0, %abs_lo12(__xray_FunctionEntry/Exit) + // lu32i.d t0, %abs64_lo20(__xray_FunctionEntry/Exit) + // lu52i.d t0, t0, %abs64_hi12(__xray_FunctionEntry/Exit) + // lu12i.w t1, %abs_hi20(function_id) + // ori t1, t1, %abs_lo12(function_id) ;pass function id + // jirl ra, t0, 0 ;call Tracing hook + // ld.d ra, sp, 8 ;restore return address + // addi.d sp, sp, 16 ;delete stack frame + // + // Update compiler-rt/lib/xray/xray_loongarch64.cpp accordingly when number + // of instructions change. + OutStreamer->emitCodeAlignment(Align(4), &getSubtargetInfo()); + MCSymbol *BeginOfSled = OutContext.createTempSymbol("xray_sled_begin"); + MCSymbol *EndOfSled = OutContext.createTempSymbol("xray_sled_end"); + OutStreamer->emitLabel(BeginOfSled); + EmitToStreamer(*OutStreamer, + MCInstBuilder(LoongArch::B) + .addExpr(MCSymbolRefExpr::create(EndOfSled, OutContext))); + emitNops(NoopsInSledCount); + OutStreamer->emitLabel(EndOfSled); + recordSled(BeginOfSled, MI, Kind); // FIXME: use version 2 +} + bool LoongArchAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS) { @@ -112,6 +181,8 @@ bool LoongArchAsmPrinter::runOnMachineFunction(MachineFunction &MF) { AsmPrinter::runOnMachineFunction(MF); + // Emit the XRay table for this function. + emitXRayTable(); return true; } diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h @@ -82,6 +82,8 @@ ArrayRef> getSerializableDirectMachineOperandTargetFlags() const override; + MCInst getNop() const override; + protected: const LoongArchSubtarget &STI; }; diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp @@ -17,6 +17,7 @@ #include "MCTargetDesc/LoongArchMCTargetDesc.h" #include "MCTargetDesc/LoongArchMatInt.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/MC/MCInstBuilder.h" using namespace llvm; @@ -486,3 +487,10 @@ {MO_GD_PC_HI, "loongarch-gd-pc-hi"}}; return makeArrayRef(TargetFlags); } + +MCInst LoongArchInstrInfo::getNop() const { + return MCInstBuilder(LoongArch::ANDI) + .addReg(LoongArch::R0) + .addReg(LoongArch::R0) + .addImm(0); +} diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h --- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h +++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h @@ -93,6 +93,7 @@ MVT getGRLenVT() const { return GRLenVT; } unsigned getGRLen() const { return GRLen; } LoongArchABI::ABI getTargetABI() const { return TargetABI; } + bool isXRaySupported() const override { return is64Bit(); } }; } // end namespace llvm diff --git a/llvm/lib/XRay/InstrumentationMap.cpp b/llvm/lib/XRay/InstrumentationMap.cpp --- a/llvm/lib/XRay/InstrumentationMap.cpp +++ b/llvm/lib/XRay/InstrumentationMap.cpp @@ -60,6 +60,7 @@ // Find the section named "xray_instr_map". if ((!ObjFile.getBinary()->isELF() && !ObjFile.getBinary()->isMachO()) || !(ObjFile.getBinary()->getArch() == Triple::x86_64 || + ObjFile.getBinary()->getArch() == Triple::loongarch64 || ObjFile.getBinary()->getArch() == Triple::ppc64le || ObjFile.getBinary()->getArch() == Triple::arm || ObjFile.getBinary()->getArch() == Triple::aarch64)) diff --git a/llvm/test/CodeGen/LoongArch/xray-attribute-instrumentation.ll b/llvm/test/CodeGen/LoongArch/xray-attribute-instrumentation.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/xray-attribute-instrumentation.ll @@ -0,0 +1,71 @@ +; RUN: llc --mtriple=loongarch64 %s -o - | FileCheck %s +; RUN: llc --mtriple=loongarch64 -filetype=obj %s -o %t +; RUN: llvm-readobj -r %t | FileCheck %s --check-prefix=RELOC + +define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" { +; CHECK-LABEL: foo: +; CHECK-LABEL: .Lfunc_begin0: +; CHECK: .p2align 2 +; CHECK-LABEL: .Lxray_sled_begin0: +; CHECK-NEXT: b .Lxray_sled_end0 +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-LABEL: .Lxray_sled_end0: + ret i32 0 +; CHECK-LABEL: .Lxray_sled_begin1: +; CHECK-NEXT: b .Lxray_sled_end1 +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: .Lxray_sled_end1: +; CHECK-NEXT: ret +; CHECK-NEXT: .Lfunc_end0: +} + +; CHECK-LABEL: .section xray_instr_map +; CHECK-NEXT: .Lxray_sleds_start0: +; CHECK-NEXT: .dword .Lxray_sled_begin0 +; CHECK-NEXT: .dword foo +; CHECK-NEXT: .byte 0x00 +; CHECK-NEXT: .byte 0x01 +; CHECK-NEXT: .byte 0x00 +; CHECK-NEXT: .space 13 +; CHECK-NEXT: .dword .Lxray_sled_begin1 +; CHECK-NEXT: .dword foo +; CHECK-NEXT: .byte 0x01 +; CHECK-NEXT: .byte 0x01 +; CHECK-NEXT: .byte 0x00 +; CHECK-NEXT: .space 13 +; CHECK-NEXT: .Lxray_sleds_end0: + +; CHECK-LABEL: .section xray_fn_idx +; CHECK: .dword .Lxray_sleds_start0 +; CHECK-NEXT: .dword .Lxray_sleds_end0 + +; RELOC: Section ([[#]]) .relaxray_instr_map { +; RELOC-NEXT: 0x0 R_LARCH_64 .text 0x0 +; RELOC-NEXT: 0x8 R_LARCH_64 foo 0x0 +; RELOC-NEXT: 0x20 R_LARCH_64 .text 0x34 +; RELOC-NEXT: 0x28 R_LARCH_64 foo 0x0 +; RELOC-NEXT: } +; RELOC-NEXT: Section ([[#]]) .relaxray_fn_idx { +; RELOC-NEXT: 0x0 R_LARCH_64 xray_instr_map 0x0 +; RELOC-NEXT: 0x8 R_LARCH_64 xray_instr_map 0x40 +; RELOC-NEXT: }