diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.h b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.h --- a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.h +++ b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.h @@ -54,15 +54,18 @@ size_t GetFPUSize() { return sizeof(RegisterInfoPOSIX_arm64::FPU); } bool IsSVE(unsigned reg) const; - bool IsSME(unsigned reg) const; bool IsPAuth(unsigned reg) const; bool IsTLS(unsigned reg) const; + bool IsSME(unsigned reg) const; bool IsSVEZ(unsigned reg) const { return m_register_info_up->IsSVEZReg(reg); } bool IsSVEP(unsigned reg) const { return m_register_info_up->IsSVEPReg(reg); } bool IsSVEVG(unsigned reg) const { return m_register_info_up->IsSVERegVG(reg); } + bool IsSMEZA(unsigned reg) const { + return m_register_info_up->IsSMERegZA(reg); + } uint32_t GetRegNumSVEZ0() const { return m_register_info_up->GetRegNumSVEZ0(); diff --git a/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm64.h b/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm64.h --- a/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm64.h +++ b/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm64.h @@ -58,10 +58,20 @@ lldb_private::DataExtractor m_sve_data; lldb_private::DataExtractor m_pac_data; lldb_private::DataExtractor m_tls_data; + lldb_private::DataExtractor m_za_data; SVEState m_sve_state; uint16_t m_sve_vector_length = 0; + // These are pseudo registers derived from the values in SSVE and ZA data. + struct __attribute__((packed)) sme_pseudo_regs { + uint64_t ctrl_reg; + uint64_t svg_reg; + }; + static_assert(sizeof(sme_pseudo_regs) == 16); + + struct sme_pseudo_regs m_sme_pseudo_regs; + const uint8_t *GetSVEBuffer(uint64_t offset = 0); void ConfigureRegisterContext(); diff --git a/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm64.cpp b/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm64.cpp --- a/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm64.cpp +++ b/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm64.cpp @@ -23,8 +23,13 @@ llvm::ArrayRef notes) { Flags opt_regsets = RegisterInfoPOSIX_arm64::eRegsetMaskDefault; + DataExtractor ssve_data = + getRegset(notes, arch.GetTriple(), AARCH64_SSVE_Desc); + if (ssve_data.GetByteSize() >= sizeof(sve::user_sve_header)) + opt_regsets.Set(RegisterInfoPOSIX_arm64::eRegsetMaskSSVE); + DataExtractor sve_data = getRegset(notes, arch.GetTriple(), AARCH64_SVE_Desc); - if (sve_data.GetByteSize() > sizeof(sve::user_sve_header)) + if (sve_data.GetByteSize() >= sizeof(sve::user_sve_header)) opt_regsets.Set(RegisterInfoPOSIX_arm64::eRegsetMaskSVE); // Pointer Authentication register set data is based on struct @@ -40,6 +45,11 @@ if (tls_data.GetByteSize() >= sizeof(uint64_t)) opt_regsets.Set(RegisterInfoPOSIX_arm64::eRegsetMaskTLS); + DataExtractor za_data = getRegset(notes, arch.GetTriple(), AARCH64_ZA_Desc); + // Nothing if ZA is not present, just the header if it is disabled. + if (za_data.GetByteSize() >= sizeof(sve::user_za_header)) + opt_regsets.Set(RegisterInfoPOSIX_arm64::eRegsetMaskZA); + auto register_info_up = std::make_unique(arch, opt_regsets); return std::unique_ptr( @@ -51,6 +61,8 @@ Thread &thread, std::unique_ptr register_info, const DataExtractor &gpregset, llvm::ArrayRef notes) : RegisterContextPOSIX_arm64(thread, std::move(register_info)) { + ::memset(&m_sme_pseudo_regs, 0, sizeof(m_sme_pseudo_regs)); + m_gpr_data.SetData(std::make_shared(gpregset.GetDataStart(), gpregset.GetByteSize())); m_gpr_data.SetByteOrder(gpregset.GetByteOrder()); @@ -59,7 +71,15 @@ m_register_info_up->GetTargetArchitecture().GetTriple(); m_fpr_data = getRegset(notes, target_triple, FPR_Desc); - if (m_register_info_up->IsSVEEnabled()) + if (m_register_info_up->IsSSVEEnabled()) { + m_sve_data = getRegset(notes, target_triple, AARCH64_SSVE_Desc); + lldb::offset_t flags_offset = 12; + uint16_t flags = m_sve_data.GetU32(&flags_offset); + if ((flags & sve::ptrace_regs_mask) == sve::ptrace_regs_sve) + m_sve_state = SVEState::Streaming; + } + + if (m_sve_state != SVEState::Streaming && m_register_info_up->IsSVEEnabled()) m_sve_data = getRegset(notes, target_triple, AARCH64_SVE_Desc); if (m_register_info_up->IsPAuthEnabled()) @@ -68,6 +88,9 @@ if (m_register_info_up->IsTLSEnabled()) m_tls_data = getRegset(notes, target_triple, AARCH64_TLS_Desc); + if (m_register_info_up->IsZAEnabled()) + m_za_data = getRegset(notes, target_triple, AARCH64_ZA_Desc); + ConfigureRegisterContext(); } @@ -95,15 +118,18 @@ if (m_sve_data.GetByteSize() > sizeof(sve::user_sve_header)) { uint64_t sve_header_field_offset = 8; m_sve_vector_length = m_sve_data.GetU16(&sve_header_field_offset); - sve_header_field_offset = 12; - uint16_t sve_header_flags_field = - m_sve_data.GetU16(&sve_header_field_offset); - if ((sve_header_flags_field & sve::ptrace_regs_mask) == - sve::ptrace_regs_fpsimd) - m_sve_state = SVEState::FPSIMD; - else if ((sve_header_flags_field & sve::ptrace_regs_mask) == - sve::ptrace_regs_sve) - m_sve_state = SVEState::Full; + + if (m_sve_state != SVEState::Streaming) { + sve_header_field_offset = 12; + uint16_t sve_header_flags_field = + m_sve_data.GetU16(&sve_header_field_offset); + if ((sve_header_flags_field & sve::ptrace_regs_mask) == + sve::ptrace_regs_fpsimd) + m_sve_state = SVEState::FPSIMD; + else if ((sve_header_flags_field & sve::ptrace_regs_mask) == + sve::ptrace_regs_sve) + m_sve_state = SVEState::Full; + } if (!sve::vl_valid(m_sve_vector_length)) { m_sve_state = SVEState::Disabled; @@ -115,6 +141,23 @@ if (m_sve_state != SVEState::Disabled) m_register_info_up->ConfigureVectorLengthSVE( sve::vq_from_vl(m_sve_vector_length)); + + if (m_sve_state == SVEState::Streaming) + m_sme_pseudo_regs.ctrl_reg |= 1; + + if (m_za_data.GetByteSize() >= sizeof(sve::user_za_header)) { + lldb::offset_t vlen_offset = 8; + uint16_t svl = m_za_data.GetU16(&vlen_offset); + m_sme_pseudo_regs.svg_reg = svl / 8; + m_register_info_up->ConfigureVectorLengthZA(svl / 16); + + // If there is register data then ZA is active. The size of the note may be + // misleading here so we use the size field of the embedded header. + lldb::offset_t size_offset = 0; + uint32_t size = m_za_data.GetU32(&size_offset); + if (size > sizeof(sve::user_za_header)) + m_sme_pseudo_regs.ctrl_reg |= 1 << 1; + } } uint32_t RegisterContextCorePOSIX_arm64::CalculateSVEOffset( @@ -124,7 +167,8 @@ if (m_sve_state == SVEState::FPSIMD) { const uint32_t reg = reg_info->kinds[lldb::eRegisterKindLLDB]; sve_reg_offset = sve::ptrace_fpsimd_offset + (reg - GetRegNumSVEZ0()) * 16; - } else if (m_sve_state == SVEState::Full) { + } else if (m_sve_state == SVEState::Full || + m_sve_state == SVEState::Streaming) { uint32_t sve_z0_offset = GetGPRSize() + 16; sve_reg_offset = sve::SigRegsOffset() + reg_info->byte_offset - sve_z0_offset; @@ -163,19 +207,19 @@ } } else { // FPSR and FPCR will be located right after Z registers in - // SVEState::FPSIMD while in SVEState::Full they will be located at the - // end of register data after an alignment correction based on currently - // selected vector length. + // SVEState::FPSIMD while in SVEState::Full/SVEState::Streaming they will + // be located at the end of register data after an alignment correction + // based on currently selected vector length. uint32_t sve_reg_num = LLDB_INVALID_REGNUM; if (reg == GetRegNumFPSR()) { sve_reg_num = reg; - if (m_sve_state == SVEState::Full) + if (m_sve_state == SVEState::Full || m_sve_state == SVEState::Streaming) offset = sve::PTraceFPSROffset(sve::vq_from_vl(m_sve_vector_length)); else if (m_sve_state == SVEState::FPSIMD) offset = sve::ptrace_fpsimd_offset + (32 * 16); } else if (reg == GetRegNumFPCR()) { sve_reg_num = reg; - if (m_sve_state == SVEState::Full) + if (m_sve_state == SVEState::Full || m_sve_state == SVEState::Streaming) offset = sve::PTraceFPCROffset(sve::vq_from_vl(m_sve_vector_length)); else if (m_sve_state == SVEState::FPSIMD) offset = sve::ptrace_fpsimd_offset + (32 * 16) + 4; @@ -217,6 +261,7 @@ error); } break; case SVEState::Full: + case SVEState::Streaming: offset = CalculateSVEOffset(reg_info); assert(offset < m_sve_data.GetByteSize()); value.SetFromMemoryData(*reg_info, GetSVEBuffer(offset), @@ -237,6 +282,54 @@ assert(offset < m_tls_data.GetByteSize()); value.SetFromMemoryData(*reg_info, m_tls_data.GetDataStart() + offset, reg_info->byte_size, lldb::eByteOrderLittle, error); + } else if (IsSME(reg)) { + // If you had SME in the process, active or otherwise, there will at least + // be a ZA header. No header, no SME at all. + if (m_za_data.GetByteSize() < sizeof(sve::user_za_header)) + return false; + + if (!IsSMEZA(reg)) { + offset = reg_info->byte_offset - m_register_info_up->GetSMEOffset(); + assert(offset < sizeof(m_sme_pseudo_regs)); + // Host endian since these values are derived instead of being read from a + // core file note. + value.SetFromMemoryData( + *reg_info, reinterpret_cast(&m_sme_pseudo_regs) + offset, + reg_info->byte_size, lldb_private::endian::InlHostByteOrder(), error); + } else { + // If the process did not have the SME extension. + if (m_za_data.GetByteSize() < sizeof(sve::user_za_header)) + return false; + + // Don't use the size of the note to tell whether ZA is enabled. There may + // be non-register padding data after the header. Use the embedded + // header's size field instead. + lldb::offset_t size_offset = 0; + uint32_t size = m_za_data.GetU32(&size_offset); + bool za_enabled = size > sizeof(sve::user_za_header); + + size_t za_note_size = m_za_data.GetByteSize(); + // For a disabled ZA we fake a value of all 0s. + if (!za_enabled) { + uint64_t svl = m_sme_pseudo_regs.svg_reg * 8; + za_note_size = sizeof(sve::user_za_header) + (svl * svl); + } + + const uint8_t *src = nullptr; + std::vector disabled_za_data; + + if (za_enabled) + src = m_za_data.GetDataStart(); + else { + disabled_za_data.resize(za_note_size); + std::fill(disabled_za_data.begin(), disabled_za_data.end(), 0); + src = disabled_za_data.data(); + } + + value.SetFromMemoryData(*reg_info, src + sizeof(sve::user_za_header), + reg_info->byte_size, lldb::eByteOrderLittle, + error); + } } else return false; diff --git a/lldb/source/Plugins/Process/elf-core/RegisterUtilities.h b/lldb/source/Plugins/Process/elf-core/RegisterUtilities.h --- a/lldb/source/Plugins/Process/elf-core/RegisterUtilities.h +++ b/lldb/source/Plugins/Process/elf-core/RegisterUtilities.h @@ -119,6 +119,10 @@ {llvm::Triple::Linux, llvm::Triple::aarch64, llvm::ELF::NT_ARM_SVE}, }; +constexpr RegsetDesc AARCH64_SSVE_Desc[] = { + {llvm::Triple::Linux, llvm::Triple::aarch64, llvm::ELF::NT_ARM_SSVE}, +}; + constexpr RegsetDesc AARCH64_ZA_Desc[] = { {llvm::Triple::Linux, llvm::Triple::aarch64, llvm::ELF::NT_ARM_ZA}, }; diff --git a/lldb/test/API/linux/aarch64/sme_core_file/TestAArch64LinuxSMECoreFile.py b/lldb/test/API/linux/aarch64/sme_core_file/TestAArch64LinuxSMECoreFile.py new file mode 100644 --- /dev/null +++ b/lldb/test/API/linux/aarch64/sme_core_file/TestAArch64LinuxSMECoreFile.py @@ -0,0 +1,115 @@ +""" +Check that LLDB can read Scalable Matrix Extension (SME) data from core files. +""" + + +import lldb +import itertools +from enum import Enum +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * + + +class Mode(Enum): + SVE = 0 + SSVE = 1 + + +class ZA(Enum): + Disabled = 0 + Enabled = 1 + + +class AArch64LinuxSMECoreFileTestCase(TestBase): + NO_DEBUG_INFO_TESTCASE = True + + # SME introduces an extra SVE mode "streaming mode" and an array storage + # register "ZA". ZA can be enabled or disabled independent of streaming mode. + # Vector length can also be different between the streaming and non-streaming + # mode. Therefore this test checks a few combinations, but not all. + # + # The numbers in the core file names are options to the crashing program, + # see main.c for their meaning. The test case names will also explain them. + + def check_corefile(self, corefile): + self.runCmd("target create --core " + corefile) + + _, sve_mode, vl, svl, za = corefile.split("_") + + sve_mode = Mode(int(sve_mode)) + vl = int(vl) + svl = int(svl) + za = ZA(int(za)) + + self.expect("register read tpidr2", substrs=["0x1122334455667788"]) + + # In streaming mode, vg is the same as svg. 'g' is for granule which is + # 8 bytes. + if sve_mode == Mode.SSVE: + self.expect("register read vg", substrs=["0x{:016x}".format(svl // 8)]) + else: + self.expect("register read vg", substrs=["0x{:016x}".format(vl // 8)]) + + # svg is always the streaming mode vector length. + self.expect("register read svg", substrs=["0x{:016x}".format(svl // 8)]) + + svcr = 1 if sve_mode == Mode.SSVE else 0 + if za == ZA.Enabled: + svcr |= 2 + self.expect("register read svcr", substrs=["0x{:016x}".format(svcr)]) + + repeat_bytes = lambda v, n: " ".join(["0x{:02x}".format(v)] * n) + + sve_vl = svl if sve_mode == Mode.SSVE else vl + for i in range(0, 32): + # Each element is set to the register number + 1, for example: + # z0 = {0x01 0x01 0x01 ... } + expected = "{{{}}}".format(repeat_bytes(i + 1, sve_vl)) + self.expect("register read z{}".format(i), substrs=[expected]) + + # The P registers cycle between a few values. + # p0 = {0xff 0xff ... } + # p1 = {0x55 0x55 ... } + # ... + # P registers and FFR have 1 bit per byte element in a vector. + p_value = lambda v: "{{{}}}".format(repeat_bytes(v, sve_vl // 8)) + expected_p_values = [p_value(v) for v in [0xFF, 0x55, 0x11, 0x01, 0x00]] + expected_p_values = itertools.cycle(expected_p_values) + + for i in range(0, 15): + expected = next(expected_p_values) + self.expect("register read p{}".format(i), substrs=[expected]) + + self.expect( + "register read ffr", + substrs=["{{{}}}".format(repeat_bytes(0xFF, sve_vl // 8))], + ) + + if za == ZA.Enabled: + # Each row of ZA is set to the row number plus 1. For example: + # za = {0x01 0x01 0x01 0x01 0x02 0x02 ... + make_row = repeat_bytes + else: + # When ZA is disabled lldb shows it as 0s. + make_row = lambda _, n: repeat_bytes(0, n) + + expected_za = "{{{}}}".format( + " ".join([make_row(i + 1, svl) for i in range(svl)]) + ) + self.expect("register read za", substrs=[expected_za]) + + @skipIfLLVMTargetMissing("AArch64") + def test_sme_core_file_ssve_vl32_svl16_za_enabled(self): + self.check_corefile("core_1_32_16_1") + + @skipIfLLVMTargetMissing("AArch64") + def test_sme_core_file_ssve_vl16_svl32_za_disabled(self): + self.check_corefile("core_1_16_32_0") + + @skipIfLLVMTargetMissing("AArch64") + def test_sme_core_file_sve_vl16_svl32_za_enabled(self): + self.check_corefile("core_0_16_32_1") + + @skipIfLLVMTargetMissing("AArch64") + def test_sme_core_file_sve_vl32_svl16_za_disabled(self): + self.check_corefile("core_0_32_16_0") diff --git a/lldb/test/API/linux/aarch64/sme_core_file/core_0_16_32_1 b/lldb/test/API/linux/aarch64/sme_core_file/core_0_16_32_1 new file mode 100644 index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 GIT binary patch literal 0 Hc$@ /proc/self/coredeump_filter +// +// Must be run on a system that has SVE and SME, including the smefa64 +// extension. Example command: +// main 0 32 64 1 +// +// This would not enter streaming mode, set non-streaming VL to 32 +// bytes, streaming VL to 64 bytes and enable ZA. +// clang-format on + +#include +#include +#include +#include +#include + +#ifndef PR_SME_SET_VL +#define PR_SME_SET_VL 63 +#endif + +#define SM_INST(c) asm volatile("msr s0_3_c4_c" #c "_3, xzr") +#define SMSTART_SM SM_INST(3) +#define SMSTART_ZA SM_INST(5) + +void set_sve_registers() { + // We assume the smefa64 feature is present, which allows ffr access + // in streaming mode. + asm volatile("setffr\n\t"); + asm volatile("ptrue p0.b\n\t"); + asm volatile("ptrue p1.h\n\t"); + asm volatile("ptrue p2.s\n\t"); + asm volatile("ptrue p3.d\n\t"); + asm volatile("pfalse p4.b\n\t"); + asm volatile("ptrue p5.b\n\t"); + asm volatile("ptrue p6.h\n\t"); + asm volatile("ptrue p7.s\n\t"); + asm volatile("ptrue p8.d\n\t"); + asm volatile("pfalse p9.b\n\t"); + asm volatile("ptrue p10.b\n\t"); + asm volatile("ptrue p11.h\n\t"); + asm volatile("ptrue p12.s\n\t"); + asm volatile("ptrue p13.d\n\t"); + asm volatile("pfalse p14.b\n\t"); + asm volatile("ptrue p15.b\n\t"); + + asm volatile("cpy z0.b, p0/z, #1\n\t"); + asm volatile("cpy z1.b, p5/z, #2\n\t"); + asm volatile("cpy z2.b, p10/z, #3\n\t"); + asm volatile("cpy z3.b, p15/z, #4\n\t"); + asm volatile("cpy z4.b, p0/z, #5\n\t"); + asm volatile("cpy z5.b, p5/z, #6\n\t"); + asm volatile("cpy z6.b, p10/z, #7\n\t"); + asm volatile("cpy z7.b, p15/z, #8\n\t"); + asm volatile("cpy z8.b, p0/z, #9\n\t"); + asm volatile("cpy z9.b, p5/z, #10\n\t"); + asm volatile("cpy z10.b, p10/z, #11\n\t"); + asm volatile("cpy z11.b, p15/z, #12\n\t"); + asm volatile("cpy z12.b, p0/z, #13\n\t"); + asm volatile("cpy z13.b, p5/z, #14\n\t"); + asm volatile("cpy z14.b, p10/z, #15\n\t"); + asm volatile("cpy z15.b, p15/z, #16\n\t"); + asm volatile("cpy z16.b, p0/z, #17\n\t"); + asm volatile("cpy z17.b, p5/z, #18\n\t"); + asm volatile("cpy z18.b, p10/z, #19\n\t"); + asm volatile("cpy z19.b, p15/z, #20\n\t"); + asm volatile("cpy z20.b, p0/z, #21\n\t"); + asm volatile("cpy z21.b, p5/z, #22\n\t"); + asm volatile("cpy z22.b, p10/z, #23\n\t"); + asm volatile("cpy z23.b, p15/z, #24\n\t"); + asm volatile("cpy z24.b, p0/z, #25\n\t"); + asm volatile("cpy z25.b, p5/z, #26\n\t"); + asm volatile("cpy z26.b, p10/z, #27\n\t"); + asm volatile("cpy z27.b, p15/z, #28\n\t"); + asm volatile("cpy z28.b, p0/z, #29\n\t"); + asm volatile("cpy z29.b, p5/z, #30\n\t"); + asm volatile("cpy z30.b, p10/z, #31\n\t"); + asm volatile("cpy z31.b, p15/z, #32\n\t"); +} + +void set_za_register(int streaming_vl) { +#define MAX_VL_BYTES 256 + uint8_t data[MAX_VL_BYTES]; + + for (unsigned i = 0; i < streaming_vl; ++i) { + for (unsigned j = 0; j < MAX_VL_BYTES; ++j) + data[j] = i + 1; + asm volatile("mov w12, %w0\n\t" + "ldr za[w12, 0], [%1]\n\t" ::"r"(i), + "r"(&data) + : "w12"); + } +} + +void set_tpidr2(uint64_t value) { + __asm__ volatile("msr S3_3_C13_C0_5, %0" ::"r"(value)); +} + +int main(int argc, char **argv) { + // Arguments: + // SVE mode: 1 for streaming SVE (SSVE), any other value + // for non-streaming SVE mode. + // Non-Streaming Vector length: In bytes, an integer e.g. "32". + // Streaming Vector length: As above, but for streaming mode. + // ZA mode: 1 for enabled, any other value for disabled. + if (argc != 5) + return 1; + + // We assume this is run on a system with SME, so tpidr2 can always be + // accessed. + set_tpidr2(0x1122334455667788); + + // Streaming mode or not? + bool streaming_mode = strcmp(argv[1], "1") == 0; + + // Set vector length (is a syscall, resets modes). + int non_streaming_vl = atoi(argv[2]); + prctl(PR_SVE_SET_VL, non_streaming_vl); + int streaming_vl = atoi(argv[3]); + prctl(PR_SME_SET_VL, streaming_vl); + + if (streaming_mode) + SMSTART_SM; + + set_sve_registers(); + + // ZA enabled or disabled? + if (strcmp(argv[4], "1") == 0) { + SMSTART_ZA; + set_za_register(streaming_vl); + } + + *(volatile char *)(0) = 0; // Crashes here. + + return 0; +}