diff --git a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.h b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.h --- a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.h +++ b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.h @@ -81,11 +81,9 @@ private: bool m_gpr_is_valid; bool m_fpu_is_valid; - bool m_sve_buffer_is_valid; bool m_mte_ctrl_is_valid; bool m_tls_tpidr_is_valid; - bool m_sve_header_is_valid; bool m_pac_mask_is_valid; struct user_pt_regs m_gpr_arm64; // 64-bit general purpose registers. @@ -94,8 +92,29 @@ m_fpr; // floating-point registers including extended register sets. SVEState m_sve_state; - struct sve::user_sve_header m_sve_header; - std::vector m_sve_ptrace_payload; + + struct SVEStateData { + SVEStateData(unsigned regset) + : m_header_is_valid(false), m_buffer_is_valid(false), m_regset(regset) { + ::memset(&m_header, 0, sizeof(m_header)); + } + + void Invalidate() { + m_header_is_valid = false; + m_buffer_is_valid = false; + } + + bool m_header_is_valid; + struct sve::user_sve_header m_header; + bool m_buffer_is_valid; + // For storing the full ptrace data. + std::vector m_buffer; + // For the ptrace request. + unsigned m_regset; + }; + + SVEStateData m_sve_state_data; + SVEStateData m_ssve_state_data; bool m_refresh_hwdebug_info; @@ -114,6 +133,12 @@ bool IsFPR(unsigned reg) const; + SVEStateData &CurrentSVEStateData() { + if (m_sve_state == SVEState::Streaming) + return m_ssve_state_data; + return m_sve_state_data; + } + Status ReadAllSVE(); Status WriteAllSVE(); @@ -137,11 +162,9 @@ bool IsMTE(unsigned reg) const; bool IsTLS(unsigned reg) const; - uint64_t GetSVERegVG() { return m_sve_header.vl / 8; } + uint64_t GetSVERegVG() { return CurrentSVEStateData().m_header.vl / 8; } - void SetSVERegVG(uint64_t vg) { m_sve_header.vl = vg * 8; } - - void *GetSVEHeader() { return &m_sve_header; } + void SetSVERegVG(uint64_t vg) { CurrentSVEStateData().m_header.vl = vg * 8; } void *GetPACMask() { return &m_pac_mask; } @@ -149,13 +172,15 @@ void *GetTLSTPIDR() { return &m_tls_tpidr_reg; } - void *GetSVEBuffer() { return m_sve_ptrace_payload.data(); }; + void *GetSVEBuffer() { return CurrentSVEStateData().m_buffer.data(); } - size_t GetSVEHeaderSize() { return sizeof(m_sve_header); } + size_t GetSVEBufferSize() { return CurrentSVEStateData().m_buffer.size(); } - size_t GetPACMaskSize() { return sizeof(m_pac_mask); } + void *GetSVEHeader() { return &(CurrentSVEStateData().m_header); } - size_t GetSVEBufferSize() { return m_sve_ptrace_payload.size(); } + size_t GetSVEHeaderSize() { return sizeof(CurrentSVEStateData().m_header); } + + size_t GetPACMaskSize() { return sizeof(m_pac_mask); } size_t GetMTEControlSize() { return sizeof(m_mte_ctrl_reg); } diff --git a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp --- a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp +++ b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp @@ -36,6 +36,11 @@ #define NT_ARM_SVE 0x405 /* ARM Scalable Vector Extension */ #endif +#ifndef NT_ARM_SSVE +#define NT_ARM_SSVE \ + 0x40b /* ARM Scalable Matrix Extension, Streaming SVE mode */ +#endif + #ifndef NT_ARM_PAC_MASK #define NT_ARM_PAC_MASK 0x406 /* Pointer authentication code masks */ #endif @@ -71,9 +76,20 @@ if (NativeProcessLinux::PtraceWrapper(PTRACE_GETREGSET, native_thread.GetID(), ®set, &ioVec, sizeof(sve_header)) - .Success()) + .Success()) { opt_regsets.Set(RegisterInfoPOSIX_arm64::eRegsetMaskSVE); + // We may also have the Scalable Matrix Extension (SME) which adds a + // streaming SVE mode. + ioVec.iov_len = sizeof(sve_header); + regset = NT_ARM_SSVE; + if (NativeProcessLinux::PtraceWrapper(PTRACE_GETREGSET, + native_thread.GetID(), ®set, + &ioVec, sizeof(sve_header)) + .Success()) + opt_regsets.Set(RegisterInfoPOSIX_arm64::eRegsetMaskSSVE); + } + NativeProcessLinux &process = native_thread.GetProcess(); std::optional auxv_at_hwcap = @@ -109,12 +125,12 @@ std::unique_ptr register_info_up) : NativeRegisterContextRegisterInfo(native_thread, register_info_up.release()), - NativeRegisterContextLinux(native_thread) { + NativeRegisterContextLinux(native_thread), m_sve_state_data(NT_ARM_SVE), + m_ssve_state_data(NT_ARM_SSVE) { ::memset(&m_fpr, 0, sizeof(m_fpr)); ::memset(&m_gpr_arm64, 0, sizeof(m_gpr_arm64)); ::memset(&m_hwp_regs, 0, sizeof(m_hwp_regs)); ::memset(&m_hbp_regs, 0, sizeof(m_hbp_regs)); - ::memset(&m_sve_header, 0, sizeof(m_sve_header)); ::memset(&m_pac_mask, 0, sizeof(m_pac_mask)); m_mte_ctrl_reg = 0; @@ -128,13 +144,11 @@ m_gpr_is_valid = false; m_fpu_is_valid = false; - m_sve_buffer_is_valid = false; - m_sve_header_is_valid = false; m_pac_mask_is_valid = false; m_mte_ctrl_is_valid = false; m_tls_tpidr_is_valid = false; - if (GetRegisterInfo().IsSVEEnabled()) + if (GetRegisterInfo().IsSVEEnabled() || GetRegisterInfo().IsSSVEEnabled()) m_sve_state = SVEState::Unknown; else m_sve_state = SVEState::Disabled; @@ -203,26 +217,36 @@ assert(offset < GetFPRSize()); src = (uint8_t *)GetFPRBuffer() + offset; } else { - // SVE enabled, we will read and cache SVE ptrace data + // SVE enabled, we will read and cache SVE ptrace data. + // In SIMD or Full mode, the data comes from the SVE regset. In streaming + // mode, it also comes from that set, so we have to switch temporarily. + SVEState previous_sve_state = m_sve_state; + if (m_sve_state == SVEState::Streaming) + m_sve_state = SVEState::FPSIMD; + error = ReadAllSVE(); - if (error.Fail()) + if (error.Fail()) { + m_sve_state = previous_sve_state; return error; + } // FPSR and FPCR will be located right after Z registers in - // SVEState::FPSIMD while in SVEState::Full they will be located at the - // end of register data after an alignment correction based on currently - // selected vector length. + // SVEState::FPSIMD while in SVEState::Full they + // will be located at the end of register data after an alignment + // correction based on currently selected vector length. uint32_t sve_reg_num = LLDB_INVALID_REGNUM; if (reg == GetRegisterInfo().GetRegNumFPSR()) { sve_reg_num = reg; if (m_sve_state == SVEState::Full) - offset = sve::PTraceFPSROffset(sve::vq_from_vl(m_sve_header.vl)); + offset = sve::PTraceFPSROffset( + sve::vq_from_vl(CurrentSVEStateData().m_header.vl)); else if (m_sve_state == SVEState::FPSIMD) offset = sve::ptrace_fpsimd_offset + (32 * 16); } else if (reg == GetRegisterInfo().GetRegNumFPCR()) { sve_reg_num = reg; if (m_sve_state == SVEState::Full) - offset = sve::PTraceFPCROffset(sve::vq_from_vl(m_sve_header.vl)); + offset = sve::PTraceFPCROffset( + sve::vq_from_vl(CurrentSVEStateData().m_header.vl)); else if (m_sve_state == SVEState::FPSIMD) offset = sve::ptrace_fpsimd_offset + (32 * 16) + 4; } else { @@ -235,6 +259,10 @@ assert(offset < GetSVEBufferSize()); src = (uint8_t *)GetSVEBuffer() + offset; + + // We have separate copies of streaming and non-streaming state, so src + // may point to an inactive mode but the pointer is still valid. + m_sve_state = previous_sve_state; } } else if (IsTLS(reg)) { error = ReadTLSTPIDR(); @@ -344,7 +372,14 @@ return WriteFPR(); } else { - // SVE enabled, we will read and cache SVE ptrace data + // SVE enabled, we will read and cache SVE ptrace data. Even when in + // streaming mode, we need to write to the non-streaming regset. Doing so + // also exits streaming mode and invalidates its state. Therefore we don't + // have to flush it before doing this. + SVEState previous_sve_state = m_sve_state; + if (m_sve_state == SVEState::Streaming) + m_sve_state = SVEState::FPSIMD; + error = ReadAllSVE(); if (error.Fail()) return error; @@ -357,13 +392,15 @@ if (reg == GetRegisterInfo().GetRegNumFPSR()) { sve_reg_num = reg; if (m_sve_state == SVEState::Full) - offset = sve::PTraceFPSROffset(sve::vq_from_vl(m_sve_header.vl)); + offset = sve::PTraceFPSROffset( + sve::vq_from_vl(CurrentSVEStateData().m_header.vl)); else if (m_sve_state == SVEState::FPSIMD) offset = sve::ptrace_fpsimd_offset + (32 * 16); } else if (reg == GetRegisterInfo().GetRegNumFPCR()) { sve_reg_num = reg; if (m_sve_state == SVEState::Full) - offset = sve::PTraceFPCROffset(sve::vq_from_vl(m_sve_header.vl)); + offset = sve::PTraceFPCROffset( + sve::vq_from_vl(CurrentSVEStateData().m_header.vl)); else if (m_sve_state == SVEState::FPSIMD) offset = sve::ptrace_fpsimd_offset + (32 * 16) + 4; } else { @@ -377,7 +414,17 @@ assert(offset < GetSVEBufferSize()); dst = (uint8_t *)GetSVEBuffer() + offset; ::memcpy(dst, reg_value.GetBytes(), reg_info->byte_size); - return WriteAllSVE(); + Status write_result = WriteAllSVE(); + + if (previous_sve_state == SVEState::Streaming) { + // We have exited streaming mode, our vector length may have changed. + m_sve_state = SVEState::Unknown; + m_sve_state_data.Invalidate(); + m_ssve_state_data.Invalidate(); + ConfigureRegisterContext(); + } + + return write_result; } } else if (IsSVE(reg)) { if (m_sve_state == SVEState::Disabled || m_sve_state == SVEState::Unknown) @@ -392,7 +439,8 @@ uint64_t vg_value = reg_value.GetAsUInt64(); if (sve::vl_valid(vg_value * 8)) { - if (m_sve_header_is_valid && vg_value == GetSVERegVG()) + if (CurrentSVEStateData().m_header_is_valid && + vg_value == GetSVERegVG()) return error; SetSVERegVG(vg_value); @@ -401,7 +449,8 @@ if (error.Success()) ConfigureRegisterContext(); - if (m_sve_header_is_valid && vg_value == GetSVERegVG()) + if (CurrentSVEStateData().m_header_is_valid && + vg_value == GetSVERegVG()) return error; } @@ -480,8 +529,9 @@ Status NativeRegisterContextLinux_arm64::ReadAllRegisterValues( lldb::WritableDataBufferSP &data_sp) { // AArch64 register data must contain GPRs, either FPR or SVE registers - // and optional MTE register. Pointer Authentication (PAC) registers are - // read-only and will be skiped. + // (which can be non-streaming, SVE or streaming, SSVE) and optional MTE + // register. Pointer Authentication (PAC) registers are read-only and will be + // skiped. // In order to create register data checkpoint we first read all register // values if not done already and calculate total size of register set data. @@ -495,8 +545,10 @@ return error; // If SVE is enabled we need not copy FPR separately. - if (GetRegisterInfo().IsSVEEnabled()) { + if (GetRegisterInfo().IsSVEEnabled() || GetRegisterInfo().IsSSVEEnabled()) { reg_data_byte_size += GetSVEBufferSize(); + // Also store the current SVE mode. + reg_data_byte_size += sizeof(uint32_t); error = ReadAllSVE(); } else { reg_data_byte_size += GetFPRSize(); @@ -524,7 +576,9 @@ ::memcpy(dst, GetGPRBuffer(), GetGPRBufferSize()); dst += GetGPRBufferSize(); - if (GetRegisterInfo().IsSVEEnabled()) { + if (GetRegisterInfo().IsSVEEnabled() || GetRegisterInfo().IsSSVEEnabled()) { + *dst = static_cast(m_sve_state); + dst += sizeof(m_sve_state); ::memcpy(dst, GetSVEBuffer(), GetSVEBufferSize()); dst += GetSVEBufferSize(); } else { @@ -594,16 +648,21 @@ (data_sp->GetByteSize() > (reg_data_min_size + GetSVEHeaderSize())); if (contains_sve_reg_data) { + // Restore to the correct mode, streaming or not. + m_sve_state = static_cast(*src); + src += sizeof(m_sve_state); + // We have SVE register data first write SVE header. ::memcpy(GetSVEHeader(), src, GetSVEHeaderSize()); - if (!sve::vl_valid(m_sve_header.vl)) { - m_sve_header_is_valid = false; + SVEStateData &sve_state = CurrentSVEStateData(); + if (!sve::vl_valid(sve_state.m_header.vl)) { + sve_state.m_header_is_valid = false; error.SetErrorStringWithFormat("NativeRegisterContextLinux_arm64::%s " "Invalid SVE header in data_sp", __FUNCTION__); return error; } - m_sve_header_is_valid = true; + sve_state.m_header_is_valid = true; error = WriteSVEHeader(); if (error.Fail()) return error; @@ -622,7 +681,7 @@ } ::memcpy(GetSVEBuffer(), src, GetSVEBufferSize()); - m_sve_buffer_is_valid = true; + CurrentSVEStateData().m_buffer_is_valid = true; error = WriteAllSVE(); src += GetSVEBufferSize(); } else { @@ -814,8 +873,8 @@ void NativeRegisterContextLinux_arm64::InvalidateAllRegisters() { m_gpr_is_valid = false; m_fpu_is_valid = false; - m_sve_buffer_is_valid = false; - m_sve_header_is_valid = false; + m_sve_state_data.Invalidate(); + m_ssve_state_data.Invalidate(); m_pac_mask_is_valid = false; m_mte_ctrl_is_valid = false; m_tls_tpidr_is_valid = false; @@ -826,18 +885,19 @@ Status NativeRegisterContextLinux_arm64::ReadSVEHeader() { Status error; + SVEStateData &state = CurrentSVEStateData(); - if (m_sve_header_is_valid) + if (state.m_header_is_valid) return error; struct iovec ioVec; - ioVec.iov_base = GetSVEHeader(); - ioVec.iov_len = GetSVEHeaderSize(); + ioVec.iov_base = &state.m_header; + ioVec.iov_len = sizeof(state.m_header); - error = ReadRegisterSet(&ioVec, GetSVEHeaderSize(), NT_ARM_SVE); + error = ReadRegisterSet(&ioVec, sizeof(state.m_header), state.m_regset); if (error.Success()) - m_sve_header_is_valid = true; + state.m_header_is_valid = true; return error; } @@ -862,36 +922,39 @@ Status NativeRegisterContextLinux_arm64::WriteSVEHeader() { Status error; + SVEStateData &state = CurrentSVEStateData(); error = ReadSVEHeader(); if (error.Fail()) return error; struct iovec ioVec; - ioVec.iov_base = GetSVEHeader(); - ioVec.iov_len = GetSVEHeaderSize(); + ioVec.iov_base = &state.m_header; + ioVec.iov_len = sizeof(state.m_header); - m_sve_buffer_is_valid = false; - m_sve_header_is_valid = false; + // All SIMD/SVE/SSVE state must be re-read after doing this write. + m_sve_state_data.Invalidate(); + m_ssve_state_data.Invalidate(); m_fpu_is_valid = false; - return WriteRegisterSet(&ioVec, GetSVEHeaderSize(), NT_ARM_SVE); + return WriteRegisterSet(&ioVec, sizeof(state.m_header), state.m_regset); } Status NativeRegisterContextLinux_arm64::ReadAllSVE() { Status error; + SVEStateData &state = CurrentSVEStateData(); - if (m_sve_buffer_is_valid) + if (state.m_buffer_is_valid) return error; struct iovec ioVec; - ioVec.iov_base = GetSVEBuffer(); - ioVec.iov_len = GetSVEBufferSize(); + ioVec.iov_base = state.m_buffer.data(); + ioVec.iov_len = state.m_buffer.size(); - error = ReadRegisterSet(&ioVec, GetSVEBufferSize(), NT_ARM_SVE); + error = ReadRegisterSet(&ioVec, state.m_buffer.size(), state.m_regset); if (error.Success()) - m_sve_buffer_is_valid = true; + state.m_buffer_is_valid = true; return error; } @@ -903,16 +966,18 @@ if (error.Fail()) return error; - struct iovec ioVec; + SVEStateData &state = CurrentSVEStateData(); - ioVec.iov_base = GetSVEBuffer(); - ioVec.iov_len = GetSVEBufferSize(); + struct iovec ioVec; + ioVec.iov_base = state.m_buffer.data(); + ioVec.iov_len = state.m_buffer.size(); - m_sve_buffer_is_valid = false; - m_sve_header_is_valid = false; + // All SIMD/SVE/SSVE state must be re-read after doing this write. + m_sve_state_data.Invalidate(); + m_ssve_state_data.Invalidate(); m_fpu_is_valid = false; - return WriteRegisterSet(&ioVec, GetSVEBufferSize(), NT_ARM_SVE); + return WriteRegisterSet(&ioVec, state.m_buffer.size(), state.m_regset); } Status NativeRegisterContextLinux_arm64::ReadMTEControl() { @@ -985,29 +1050,57 @@ void NativeRegisterContextLinux_arm64::ConfigureRegisterContext() { // ConfigureRegisterContext gets called from InvalidateAllRegisters - // on every stop and configures SVE vector length. + // on every stop and configures SVE vector length and whether we are in + // streaming SVE mode. // If m_sve_state is set to SVEState::Disabled on first stop, code below will // be deemed non operational for the lifetime of current process. - if (!m_sve_header_is_valid && m_sve_state != SVEState::Disabled) { + if (!m_sve_state_data.m_header_is_valid && + !m_ssve_state_data.m_header_is_valid && + m_sve_state != SVEState::Disabled) { + // If we have SVE we may also have the SVE streaming mode that SME added. + // We can read the header of either mode, but only the active mode will + // have valid register data. + + // Check whether SME is present and the streaming SVE mode is active. + m_sve_state_data.Invalidate(); + m_ssve_state_data.Invalidate(); + m_sve_state = SVEState::Streaming; Status error = ReadSVEHeader(); - if (error.Success()) { - // If SVE is enabled thread can switch between SVEState::FPSIMD and - // SVEState::Full on every stop. - if ((m_sve_header.flags & sve::ptrace_regs_mask) == - sve::ptrace_regs_fpsimd) - m_sve_state = SVEState::FPSIMD; - else if ((m_sve_header.flags & sve::ptrace_regs_mask) == - sve::ptrace_regs_sve) - m_sve_state = SVEState::Full; + // Streaming mode is active if the header has the SVE active flag set. + if (error.Success() && ((m_ssve_state_data.m_header.flags & + sve::ptrace_regs_mask) == sve::ptrace_regs_sve)) { + m_sve_state = SVEState::Streaming; + } else { + // If we're not streaming, non-streaming might be active. + m_sve_state_data.Invalidate(); + m_ssve_state_data.Invalidate(); + m_sve_state = SVEState::Full; + error = ReadSVEHeader(); + if (error.Success()) { + m_sve_state_data.m_header_is_valid = true; + + // If SVE is enabled thread can switch between SVEState::FPSIMD and + // SVEState::Full on every stop. + if ((m_sve_state_data.m_header.flags & sve::ptrace_regs_mask) == + sve::ptrace_regs_fpsimd) + m_sve_state = SVEState::FPSIMD; + else if ((m_sve_state_data.m_header.flags & sve::ptrace_regs_mask) == + sve::ptrace_regs_sve) + m_sve_state = SVEState::Full; + } + } + if (m_sve_state == SVEState::Full || m_sve_state == SVEState::FPSIMD || + m_sve_state == SVEState::Streaming) { + SVEStateData &sve_state = CurrentSVEStateData(); // On every stop we configure SVE vector length by calling // ConfigureVectorLength regardless of current SVEState of this thread. uint32_t vq = RegisterInfoPOSIX_arm64::eVectorQuadwordAArch64SVE; - if (sve::vl_valid(m_sve_header.vl)) - vq = sve::vq_from_vl(m_sve_header.vl); + if (sve::vl_valid(sve_state.m_header.vl)) + vq = sve::vq_from_vl(sve_state.m_header.vl); GetRegisterInfo().ConfigureVectorLength(vq); - m_sve_ptrace_payload.resize(sve::PTraceSize(vq, sve::ptrace_regs_sve)); + sve_state.m_buffer.resize(sve::PTraceSize(vq, sve::ptrace_regs_sve)); } } } @@ -1025,7 +1118,9 @@ const uint32_t reg = reg_info->kinds[lldb::eRegisterKindLLDB]; sve_reg_offset = sve::ptrace_fpsimd_offset + (reg - GetRegisterInfo().GetRegNumSVEZ0()) * 16; - } else if (m_sve_state == SVEState::Full) { + // Between non-streaming and streaming mode, the layout is identical. + } else if (m_sve_state == SVEState::Full || + m_sve_state == SVEState::Streaming) { uint32_t sve_z0_offset = GetGPRSize() + 16; sve_reg_offset = sve::SigRegsOffset() + reg_info->byte_offset - sve_z0_offset; diff --git a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h --- a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h +++ b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h @@ -15,7 +15,7 @@ #include "lldb/lldb-private.h" #include -enum class SVEState { Unknown, Disabled, FPSIMD, Full }; +enum class SVEState : uint8_t { Unknown, Disabled, FPSIMD, Full, Streaming }; class RegisterInfoPOSIX_arm64 : public lldb_private::RegisterInfoAndSetInterface { @@ -26,9 +26,10 @@ enum { eRegsetMaskDefault = 0, eRegsetMaskSVE = 1, - eRegsetMaskPAuth = 2, - eRegsetMaskMTE = 4, - eRegsetMaskTLS = 8, + eRegsetMaskSSVE = 2, + eRegsetMaskPAuth = 4, + eRegsetMaskMTE = 8, + eRegsetMaskTLS = 16, eRegsetMaskDynamic = ~1, }; @@ -115,6 +116,7 @@ } bool IsSVEEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskSVE); } + bool IsSSVEEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskSSVE); } bool IsPAuthEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskPAuth); } bool IsMTEEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskMTE); } diff --git a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp --- a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp +++ b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp @@ -212,7 +212,7 @@ // dynamic register set like MTE, Pointer Authentication regset then we need // to create dynamic register infos and regset array. Push back all optional // register infos and regset and calculate register offsets accordingly. - if (m_opt_regsets.AllSet(eRegsetMaskSVE)) { + if (m_opt_regsets.AnySet(eRegsetMaskSVE | eRegsetMaskSSVE)) { m_register_info_p = g_register_infos_arm64_sve_le; m_register_info_count = sve_ffr + 1; m_per_regset_regnum_range[m_register_set_count++] = diff --git a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py --- a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py +++ b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py @@ -1,5 +1,6 @@ """ -Test the AArch64 SVE registers dynamic resize with multiple threads. +Test the AArch64 SVE and Streaming SVE (SSVE) registers dynamic resize with +multiple threads. This test assumes a minimum supported vector length (VL) of 256 bits and will test 512 bits if possible. We refer to "vg" which is the @@ -7,11 +8,15 @@ the same as a vg of 4. """ +from enum import Enum import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * from lldbsuite.test import lldbutil +class Mode(Enum): + SVE = 0 + SSVE = 1 class RegisterCommandsTestCase(TestBase): def get_supported_vg(self): @@ -45,6 +50,9 @@ if not self.res.GetError(): supported_vg.append(vg) + self.runCmd("breakpoint delete 1") + self.runCmd("continue") + return supported_vg def check_sve_registers(self, vg_test_value): @@ -88,24 +96,24 @@ self.expect("register read ffr", substrs=[p_regs_value]) - @no_debug_info_test - @skipIf(archs=no_match(["aarch64"])) - @skipIf(oslist=no_match(["linux"])) - def test_sve_registers_dynamic_config(self): - """Test AArch64 SVE registers multi-threaded dynamic resize.""" - - if not self.isAArch64SVE(): + def run_sve_test(self, mode): + if (mode == Mode.SVE) and not self.isAArch64SVE(): self.skipTest("SVE registers must be supported.") + if (mode == Mode.SSVE) and not self.isAArch64SME(): + self.skipTest("Streaming SVE registers must be supported.") + + cflags = "-march=armv8-a+sve -lpthread" + if mode == Mode.SSVE: + cflags += " -DUSE_SSVE" + self.build(dictionary={"CFLAGS_EXTRAS": cflags}) + self.build() supported_vg = self.get_supported_vg() if not (2 in supported_vg and 4 in supported_vg): self.skipTest("Not all required SVE vector lengths are supported.") - exe = self.getBuildArtifact("a.out") - self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET) - main_thread_stop_line = line_number("main.c", "// Break in main thread") lldbutil.run_break_set_by_file_and_line(self, "main.c", main_thread_stop_line) @@ -176,3 +184,17 @@ elif stopped_at_line_number == thY_break_line2: self.runCmd("thread select %d" % (idx + 1)) self.check_sve_registers(4) + + @no_debug_info_test + @skipIf(archs=no_match(["aarch64"])) + @skipIf(oslist=no_match(["linux"])) + def test_sve_registers_dynamic_config(self): + """Test AArch64 SVE registers multi-threaded dynamic resize.""" + self.run_sve_test(Mode.SVE) + + @no_debug_info_test + @skipIf(archs=no_match(["aarch64"])) + @skipIf(oslist=no_match(["linux"])) + def test_ssve_registers_dynamic_config(self): + """Test AArch64 SSVE registers multi-threaded dynamic resize.""" + self.run_sve_test(Mode.SSVE) diff --git a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/main.c b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/main.c --- a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/main.c +++ b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/main.c @@ -1,6 +1,12 @@ #include #include +#ifndef PR_SME_SET_VL +#define PR_SME_SET_VL 63 +#endif + +#define SMSTART() asm volatile("msr s0_3_c4_c7_3, xzr" /*smstart*/) + static inline void write_sve_registers() { asm volatile("setffr\n\t"); asm volatile("ptrue p0.b\n\t"); @@ -54,26 +60,41 @@ asm volatile("cpy z31.b, p15/z, #32\n\t"); } +int SET_VL_OPT = PR_SVE_SET_VL; + void *threadX_func(void *x_arg) { - prctl(PR_SVE_SET_VL, 8 * 4); + prctl(SET_VL_OPT, 8 * 4); +#ifdef USE_SSVE + SMSTART(); +#endif write_sve_registers(); write_sve_registers(); // Thread X breakpoint 1 return NULL; // Thread X breakpoint 2 } void *threadY_func(void *y_arg) { - prctl(PR_SVE_SET_VL, 8 * 2); + prctl(SET_VL_OPT, 8 * 2); +#ifdef USE_SSVE + SMSTART(); +#endif write_sve_registers(); write_sve_registers(); // Thread Y breakpoint 1 return NULL; // Thread Y breakpoint 2 } int main() { +#ifdef USE_SSVE + SET_VL_OPT = PR_SME_SET_VL; +#endif + /* this variable is our reference to the second thread */ pthread_t x_thread, y_thread; /* Set vector length to 8 and write SVE registers values */ - prctl(PR_SVE_SET_VL, 8 * 8); + prctl(SET_VL_OPT, 8 * 8); +#ifdef USE_SSVE + SMSTART(); +#endif write_sve_registers(); /* create a second thread which executes with argument x */ diff --git a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/Makefile b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/Makefile --- a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/Makefile +++ b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/Makefile @@ -1,5 +1,3 @@ C_SOURCES := main.c -CFLAGS_EXTRAS := -march=armv8-a+sve - include Makefile.rules diff --git a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py --- a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py +++ b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py @@ -2,11 +2,15 @@ Test the AArch64 SVE registers. """ +from enum import Enum import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * from lldbsuite.test import lldbutil +class Mode(Enum): + SVE = 0 + SSVE = 1 class RegisterCommandsTestCase(TestBase): def check_sve_register_size(self, set, name, expected): @@ -61,20 +65,28 @@ self.expect("register read " + "ffr", substrs=[p_regs_value]) - @no_debug_info_test - @skipIf(archs=no_match(["aarch64"])) - @skipIf(oslist=no_match(["linux"])) - def test_sve_registers_configuration(self): - """Test AArch64 SVE registers size configuration.""" - self.build() + def get_build_flags(self, mode): + cflags = "-march=armv8-a+sve" + if mode == Mode.SSVE: + cflags += " -DSTART_SSVE" + return {"CFLAGS_EXTRAS": cflags} + + def skip_if_needed(self, mode): + if (mode == Mode.SVE) and not self.isAArch64SVE(): + self.skipTest("SVE registers must be supported.") + + if (mode == Mode.SSVE) and not self.isAArch64SME(): + self.skipTest("SSVE registers must be supported.") + + def sve_registers_configuration_impl(self, mode): + self.skip_if_needed(mode) + + self.build(dictionary=self.get_build_flags(mode)) self.line = line_number("main.c", "// Set a break point here.") exe = self.getBuildArtifact("a.out") self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET) - if not self.isAArch64SVE(): - self.skipTest("SVE registers must be supported.") - lldbutil.run_break_set_by_file_and_line( self, "main.c", self.line, num_expected_locations=1 ) @@ -91,26 +103,17 @@ thread = process.GetThreadAtIndex(0) currentFrame = thread.GetFrameAtIndex(0) - has_sve = False - for registerSet in currentFrame.GetRegisters(): - if "Scalable Vector Extension Registers" in registerSet.GetName(): - has_sve = True - registerSets = process.GetThreadAtIndex(0).GetFrameAtIndex(0).GetRegisters() - - sve_registers = registerSets.GetValueAtIndex(2) - - vg_reg = sve_registers.GetChildMemberWithName("vg") + sve_registers = registerSets.GetFirstValueByName("Scalable Vector Extension Registers") + self.assertTrue(sve_registers) vg_reg_value = sve_registers.GetChildMemberWithName("vg").GetValueAsUnsigned() z_reg_size = vg_reg_value * 8 - - p_reg_size = z_reg_size / 8 - for i in range(32): self.check_sve_register_size(sve_registers, "z%i" % (i), z_reg_size) + p_reg_size = z_reg_size / 8 for i in range(16): self.check_sve_register_size(sve_registers, "p%i" % (i), p_reg_size) @@ -119,17 +122,26 @@ @no_debug_info_test @skipIf(archs=no_match(["aarch64"])) @skipIf(oslist=no_match(["linux"])) - def test_sve_registers_read_write(self): - """Test AArch64 SVE registers read and write.""" - self.build() - self.line = line_number("main.c", "// Set a break point here.") + def test_sve_registers_configuration(self): + """Test AArch64 SVE registers size configuration.""" + self.sve_registers_configuration_impl(Mode.SVE) + + @no_debug_info_test + @skipIf(archs=no_match(["aarch64"])) + @skipIf(oslist=no_match(["linux"])) + def test_ssve_registers_configuration(self): + """Test AArch64 SSVE registers size configuration.""" + self.sve_registers_configuration_impl(Mode.SSVE) + + def sve_registers_read_write_impl(self, start_mode, eval_mode): + self.skip_if_needed(start_mode) + self.skip_if_needed(eval_mode) + self.build(dictionary=self.get_build_flags(start_mode)) exe = self.getBuildArtifact("a.out") self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET) - if not self.isAArch64SVE(): - self.skipTest("SVE registers must be supported.") - + self.line = line_number("main.c", "// Set a break point here.") lldbutil.run_break_set_by_file_and_line( self, "main.c", self.line, num_expected_locations=1 ) @@ -143,34 +155,55 @@ target = self.dbg.GetSelectedTarget() process = target.GetProcess() - thread = process.GetThreadAtIndex(0) - currentFrame = thread.GetFrameAtIndex(0) - - has_sve = False - for registerSet in currentFrame.GetRegisters(): - if "Scalable Vector Extension Registers" in registerSet.GetName(): - has_sve = True registerSets = process.GetThreadAtIndex(0).GetFrameAtIndex(0).GetRegisters() - - sve_registers = registerSets.GetValueAtIndex(2) - - vg_reg = sve_registers.GetChildMemberWithName("vg") + sve_registers = registerSets.GetFirstValueByName("Scalable Vector Extension Registers") + self.assertTrue(sve_registers) vg_reg_value = sve_registers.GetChildMemberWithName("vg").GetValueAsUnsigned() - z_reg_size = vg_reg_value * 8 - self.check_sve_regs_read(z_reg_size) # Evaluate simple expression and print function expr_eval_func address. self.expect("expression expr_eval_func", substrs=["= 0x"]) # Evaluate expression call function expr_eval_func. - self.expect_expr("expr_eval_func()", result_type="int", result_value="1") + self.expect_expr("expr_eval_func({})".format( + "true" if (eval_mode == Mode.SSVE) else "false"), result_type="int", + result_value="1") # We called a jitted function above which must not have changed SVE # vector length or register values. self.check_sve_regs_read(z_reg_size) self.check_sve_regs_read_after_write(z_reg_size) + + # The following tests all setup some register values then evaluate an + # expression. After the expression, the mode and register values should be + # the same as before. Finally they read/write some values in the registers. + # The only difference is the mode we start the program in, and the mode + # the expression function uses. + + @no_debug_info_test + @skipIf(archs=no_match(["aarch64"])) + @skipIf(oslist=no_match(["linux"])) + def test_registers_expr_read_write_sve_sve(self): + self.sve_registers_read_write_impl(Mode.SVE, Mode.SVE) + + @no_debug_info_test + @skipIf(archs=no_match(["aarch64"])) + @skipIf(oslist=no_match(["linux"])) + def test_registers_expr_read_write_ssve_ssve(self): + self.sve_registers_read_write_impl(Mode.SSVE, Mode.SSVE) + + @no_debug_info_test + @skipIf(archs=no_match(["aarch64"])) + @skipIf(oslist=no_match(["linux"])) + def test_registers_expr_read_write_sve_ssve(self): + self.sve_registers_read_write_impl(Mode.SVE, Mode.SSVE) + + @no_debug_info_test + @skipIf(archs=no_match(["aarch64"])) + @skipIf(oslist=no_match(["linux"])) + def test_registers_expr_read_write_ssve_sve(self): + self.sve_registers_read_write_impl(Mode.SSVE, Mode.SVE) \ No newline at end of file diff --git a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/main.c b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/main.c --- a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/main.c +++ b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/main.c @@ -1,6 +1,15 @@ +#include #include +#ifndef PR_SME_SET_VL +#define PR_SME_SET_VL 63 +#endif + +#define SMSTART() asm volatile("msr s0_3_c4_c7_3, xzr" /*smstart*/) + void write_sve_regs() { + // We assume the smefa64 feature is present, which allows ffr access + // in streaming mode. asm volatile("setffr\n\t"); asm volatile("ptrue p0.b\n\t"); asm volatile("ptrue p1.h\n\t"); @@ -53,18 +62,85 @@ asm volatile("cpy z31.b, p15/z, #32\n\t"); } +// Set some different values so we can tell if lldb correctly returns to the set +// above after the expression is finished. +void write_sve_regs_expr() { + // pfalse only operates on the "b" aka byte element size. + asm volatile("pfalse p0.b\n\t"); + asm volatile("wrffr p0.b\n\t"); + asm volatile("pfalse p1.b\n\t"); + asm volatile("pfalse p2.b\n\t"); + asm volatile("pfalse p3.b\n\t"); + asm volatile("ptrue p4.b\n\t"); + asm volatile("pfalse p5.b\n\t"); + asm volatile("pfalse p6.b\n\t"); + asm volatile("pfalse p7.b\n\t"); + asm volatile("pfalse p8.b\n\t"); + asm volatile("ptrue p9.b\n\t"); + asm volatile("pfalse p10.b\n\t"); + asm volatile("pfalse p11.b\n\t"); + asm volatile("pfalse p12.b\n\t"); + asm volatile("pfalse p13.b\n\t"); + asm volatile("ptrue p14.b\n\t"); + asm volatile("pfalse p15.b\n\t"); + + asm volatile("cpy z0.b, p0/z, #2\n\t"); + asm volatile("cpy z1.b, p5/z, #3\n\t"); + asm volatile("cpy z2.b, p10/z, #4\n\t"); + asm volatile("cpy z3.b, p15/z, #5\n\t"); + asm volatile("cpy z4.b, p0/z, #6\n\t"); + asm volatile("cpy z5.b, p5/z, #7\n\t"); + asm volatile("cpy z6.b, p10/z, #8\n\t"); + asm volatile("cpy z7.b, p15/z, #9\n\t"); + asm volatile("cpy z8.b, p0/z, #10\n\t"); + asm volatile("cpy z9.b, p5/z, #11\n\t"); + asm volatile("cpy z10.b, p10/z, #12\n\t"); + asm volatile("cpy z11.b, p15/z, #13\n\t"); + asm volatile("cpy z12.b, p0/z, #14\n\t"); + asm volatile("cpy z13.b, p5/z, #15\n\t"); + asm volatile("cpy z14.b, p10/z, #16\n\t"); + asm volatile("cpy z15.b, p15/z, #17\n\t"); + asm volatile("cpy z16.b, p0/z, #18\n\t"); + asm volatile("cpy z17.b, p5/z, #19\n\t"); + asm volatile("cpy z18.b, p10/z, #20\n\t"); + asm volatile("cpy z19.b, p15/z, #21\n\t"); + asm volatile("cpy z20.b, p0/z, #22\n\t"); + asm volatile("cpy z21.b, p5/z, #23\n\t"); + asm volatile("cpy z22.b, p10/z, #24\n\t"); + asm volatile("cpy z23.b, p15/z, #25\n\t"); + asm volatile("cpy z24.b, p0/z, #26\n\t"); + asm volatile("cpy z25.b, p5/z, #27\n\t"); + asm volatile("cpy z26.b, p10/z, #28\n\t"); + asm volatile("cpy z27.b, p15/z, #29\n\t"); + asm volatile("cpy z28.b, p0/z, #30\n\t"); + asm volatile("cpy z29.b, p5/z, #31\n\t"); + asm volatile("cpy z30.b, p10/z, #32\n\t"); + asm volatile("cpy z31.b, p15/z, #33\n\t"); +} + // This function will be called using jitted expression call. We change vector // length and write SVE registers. Our program context should restore to // orignal vector length and register values after expression evaluation. -int expr_eval_func() { - prctl(PR_SVE_SET_VL, 8 * 2); - write_sve_regs(); - prctl(PR_SVE_SET_VL, 8 * 4); - write_sve_regs(); +int expr_eval_func(bool streaming) { + int SET_VL_OPT = streaming ? PR_SME_SET_VL : PR_SVE_SET_VL; + prctl(SET_VL_OPT, 8 * 2); + // Note that doing a syscall brings you back to non-streaming mode, so we + // don't need to SMSTOP here. + if (streaming) + SMSTART(); + write_sve_regs_expr(); + prctl(SET_VL_OPT, 8 * 4); + if (streaming) + SMSTART(); + write_sve_regs_expr(); return 1; } int main() { +#ifdef START_SSVE + SMSTART(); +#endif write_sve_regs(); + return 0; // Set a break point here. } diff --git a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/Makefile b/lldb/test/API/commands/register/register/aarch64_sve_simd_registers/Makefile copy from lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/Makefile copy to lldb/test/API/commands/register/register/aarch64_sve_simd_registers/Makefile --- a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/Makefile +++ b/lldb/test/API/commands/register/register/aarch64_sve_simd_registers/Makefile @@ -1,5 +1,3 @@ C_SOURCES := main.c -CFLAGS_EXTRAS := -march=armv8-a+sve - include Makefile.rules diff --git a/lldb/test/API/commands/register/register/aarch64_sve_simd_registers/TestSVESIMDRegisters.py b/lldb/test/API/commands/register/register/aarch64_sve_simd_registers/TestSVESIMDRegisters.py new file mode 100644 --- /dev/null +++ b/lldb/test/API/commands/register/register/aarch64_sve_simd_registers/TestSVESIMDRegisters.py @@ -0,0 +1,100 @@ +""" +Test that LLDB correctly reads and writes AArch64 SIMD registers in SVE, +streaming SVE and normal SIMD modes. + +In SIMD mode data comes from the SIMD regset but in SVE mode, it comes from +the SVE regset. In streaming mode it also comes from the SVE regset. +""" + +from enum import Enum +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + +class Mode(Enum): + SIMD = 0 + SVE = 1 + SSVE = 2 + +class SVESIMDRegistersTestCase(TestBase): + def get_build_flags(self, mode): + cflags = "-march=armv8-a+sve" + if mode == Mode.SSVE: + cflags += " -DSSVE" + elif mode == Mode.SVE: + cflags += " -DSVE" + + return {"CFLAGS_EXTRAS": cflags} + + def skip_if_needed(self, mode): + if (mode == Mode.SVE) and not self.isAArch64SVE(): + self.skipTest("SVE registers must be supported.") + + if (mode == Mode.SSVE) and not self.isAArch64SME(): + self.skipTest("SSVE registers must be supported.") + + def make_simd_value(self, n): + pad = " ".join(["0x00"] * 7) + return "{{0x{:02x} {} 0x{:02x} {}}}".format(n, pad, n, pad) + + def sve_simd_registers_impl(self, mode): + self.skip_if_needed(mode) + + self.build(dictionary=self.get_build_flags(mode)) + self.line = line_number("main.c", "// Set a break point here.") + + exe = self.getBuildArtifact("a.out") + self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET) + + lldbutil.run_break_set_by_file_and_line( + self, "main.c", self.line, num_expected_locations=1 + ) + self.runCmd("run", RUN_SUCCEEDED) + + self.expect( + "thread backtrace", + STOPPED_DUE_TO_BREAKPOINT, + substrs=["stop reason = breakpoint 1."], + ) + + # These are 128 bit registers, so getting them from the API as unsigned + # values doesn't work. Check the command output instead. + for i in range(32): + self.expect("register read v{}".format(i), + substrs=[self.make_simd_value(i)]) + + # Write a new set of values. The kernel will move the program back to + # non-streaming mode here. + for i in range(32): + self.runCmd("register write v{} \"{}\"".format( + i, self.make_simd_value(i+1))) + + # Should be visible within lldb. + for i in range(32): + self.expect("register read v{}".format(i), + substrs=[self.make_simd_value(i+1)]) + + # The program should agree with lldb. + self.expect("continue", substrs=["exited with status = 0"]) + + @no_debug_info_test + @skipIf(archs=no_match(["aarch64"])) + @skipIf(oslist=no_match(["linux"])) + def test_simd_registers_sve(self): + """Test read/write of SIMD registers when in SVE mode.""" + self.sve_simd_registers_impl(Mode.SVE) + + @no_debug_info_test + @skipIf(archs=no_match(["aarch64"])) + @skipIf(oslist=no_match(["linux"])) + def test_simd_registers_ssve(self): + """Test read/write of SIMD registers when in SSVE mode.""" + self.sve_simd_registers_impl(Mode.SSVE) + + @no_debug_info_test + @skipIf(archs=no_match(["aarch64"])) + @skipIf(oslist=no_match(["linux"])) + def test_simd_registers_simd(self): + """Test read/write of SIMD registers when in SIMD mode.""" + self.sve_simd_registers_impl(Mode.SIMD) \ No newline at end of file diff --git a/lldb/test/API/commands/register/register/aarch64_sve_simd_registers/main.c b/lldb/test/API/commands/register/register/aarch64_sve_simd_registers/main.c new file mode 100644 --- /dev/null +++ b/lldb/test/API/commands/register/register/aarch64_sve_simd_registers/main.c @@ -0,0 +1,108 @@ +#include +#include + +void write_simd_regs() { +#define WRITE_SIMD(NUM) \ + asm volatile("MOV v" #NUM ".d[0], %0\n\t" \ + "MOV v" #NUM ".d[1], %0\n\t" ::"r"(NUM)) + + WRITE_SIMD(0); + WRITE_SIMD(1); + WRITE_SIMD(2); + WRITE_SIMD(3); + WRITE_SIMD(4); + WRITE_SIMD(5); + WRITE_SIMD(6); + WRITE_SIMD(7); + WRITE_SIMD(8); + WRITE_SIMD(9); + WRITE_SIMD(10); + WRITE_SIMD(11); + WRITE_SIMD(12); + WRITE_SIMD(13); + WRITE_SIMD(14); + WRITE_SIMD(15); + WRITE_SIMD(16); + WRITE_SIMD(17); + WRITE_SIMD(18); + WRITE_SIMD(19); + WRITE_SIMD(20); + WRITE_SIMD(21); + WRITE_SIMD(22); + WRITE_SIMD(23); + WRITE_SIMD(24); + WRITE_SIMD(25); + WRITE_SIMD(26); + WRITE_SIMD(27); + WRITE_SIMD(28); + WRITE_SIMD(29); + WRITE_SIMD(30); + WRITE_SIMD(31); +} + +unsigned verify_simd_regs() { + uint64_t got_low = 0; + uint64_t got_high = 0; + uint64_t target = 0; + +#define VERIFY_SIMD(NUM) \ + do { \ + got_low = 0; \ + got_high = 0; \ + asm volatile("MOV %0, v" #NUM ".d[0]\n\t" \ + "MOV %1, v" #NUM ".d[1]\n\t" \ + : "=r"(got_low), "=r"(got_high)); \ + target = NUM + 1; \ + if ((got_low != target) || (got_high != target)) \ + return 1; \ + } while (0) + + VERIFY_SIMD(0); + VERIFY_SIMD(1); + VERIFY_SIMD(2); + VERIFY_SIMD(3); + VERIFY_SIMD(4); + VERIFY_SIMD(5); + VERIFY_SIMD(6); + VERIFY_SIMD(7); + VERIFY_SIMD(8); + VERIFY_SIMD(9); + VERIFY_SIMD(10); + VERIFY_SIMD(11); + VERIFY_SIMD(12); + VERIFY_SIMD(13); + VERIFY_SIMD(14); + VERIFY_SIMD(15); + VERIFY_SIMD(16); + VERIFY_SIMD(17); + VERIFY_SIMD(18); + VERIFY_SIMD(19); + VERIFY_SIMD(20); + VERIFY_SIMD(21); + VERIFY_SIMD(22); + VERIFY_SIMD(23); + VERIFY_SIMD(24); + VERIFY_SIMD(25); + VERIFY_SIMD(26); + VERIFY_SIMD(27); + VERIFY_SIMD(28); + VERIFY_SIMD(29); + VERIFY_SIMD(30); + VERIFY_SIMD(31); + + return 0; +} + +int main() { +#ifdef SSVE + asm volatile("msr s0_3_c4_c7_3, xzr" /*smstart*/); +#elif defined SVE + // Make the non-streaming SVE registers active. + asm volatile("cpy z0.b, p0/z, #1\n\t"); +#endif + // else test plain SIMD access. + + write_simd_regs(); + + return verify_simd_regs(); // Set a break point here. +}