diff --git a/lldb/bindings/interface/SBTarget.i b/lldb/bindings/interface/SBTarget.i --- a/lldb/bindings/interface/SBTarget.i +++ b/lldb/bindings/interface/SBTarget.i @@ -944,6 +944,9 @@ lldb::addr_t GetStackRedZoneSize(); + uint32_t + GetMaximumOpcodeByteSize() const; + %feature("docstring", " Returns true if the module has been loaded in this `SBTarget`. A module can be loaded either by the dynamic loader or by being manually diff --git a/lldb/include/lldb/API/SBTarget.h b/lldb/include/lldb/API/SBTarget.h --- a/lldb/include/lldb/API/SBTarget.h +++ b/lldb/include/lldb/API/SBTarget.h @@ -841,6 +841,8 @@ lldb::addr_t GetStackRedZoneSize(); + uint32_t GetMaximumOpcodeByteSize() const; + bool IsLoaded(const lldb::SBModule &module) const; lldb::SBLaunchInfo GetLaunchInfo() const; diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py @@ -957,6 +957,19 @@ } return self.send_recv(command_dict) + def request_disassemble(self, memoryReference, instructionOffset, instructionCount): + args_dict = { + 'memoryReference': memoryReference, + 'instructionOffset': instructionOffset, + 'instructionCount': instructionCount, + } + command_dict = { + 'command': 'disassemble', + 'type': 'request', + 'arguments': args_dict + } + return self.send_recv(command_dict) + def terminate(self): self.send.close() # self.recv.close() diff --git a/lldb/source/API/SBTarget.cpp b/lldb/source/API/SBTarget.cpp --- a/lldb/source/API/SBTarget.cpp +++ b/lldb/source/API/SBTarget.cpp @@ -1965,6 +1965,16 @@ return sb_instructions; } +uint32_t SBTarget::GetMaximumOpcodeByteSize() const { + LLDB_INSTRUMENT_VA(this); + + TargetSP target_sp(GetSP()); + if (target_sp) + return target_sp->GetArchitecture().GetMaximumOpcodeByteSize(); + + return 0; +} + lldb::SBInstructionList SBTarget::GetInstructions(lldb::SBAddress base_addr, const void *buf, size_t size) { diff --git a/lldb/test/API/python_api/target/TestTargetAPI.py b/lldb/test/API/python_api/target/TestTargetAPI.py --- a/lldb/test/API/python_api/target/TestTargetAPI.py +++ b/lldb/test/API/python_api/target/TestTargetAPI.py @@ -525,3 +525,14 @@ module = target.GetModuleAtIndex(i) self.assertTrue(target.IsLoaded(module), "Running the target should " "have loaded its modules.") + + def test_get_max_opcode_byte_size(self): + """Exercise SBTarget.GetMaximumOpcodeByteSize() API.""" + + d = {'EXE': 'b.out'} + self.build(dictionary=d) + self.setTearDownCleanup(dictionary=d) + target = self.create_simple_target('b.out') + + maxOpcodeByteSize = target.GetMaximumOpcodeByteSize() + self.assertTrue(maxOpcodeByteSize > 0 and maxOpcodeByteSize < 16) diff --git a/lldb/test/API/tools/lldb-vscode/disassemble/Makefile b/lldb/test/API/tools/lldb-vscode/disassemble/Makefile new file mode 100644 --- /dev/null +++ b/lldb/test/API/tools/lldb-vscode/disassemble/Makefile @@ -0,0 +1,3 @@ +CXX_SOURCES := main.cpp + +include Makefile.rules \ No newline at end of file diff --git a/lldb/test/API/tools/lldb-vscode/disassemble/TestVSCode_disassemble.py b/lldb/test/API/tools/lldb-vscode/disassemble/TestVSCode_disassemble.py new file mode 100644 --- /dev/null +++ b/lldb/test/API/tools/lldb-vscode/disassemble/TestVSCode_disassemble.py @@ -0,0 +1,96 @@ +""" +Test lldb-vscode disassemble request +""" + + +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +import lldbvscode_testcase + + +class TestVSCode_disassemble(lldbvscode_testcase.VSCodeTestCaseBase): + def setUp(self): + lldbvscode_testcase.VSCodeTestCaseBase.setUp(self) + + program = self.getBuildArtifact("a.out") + self.build_and_launch(program) + source = "main.cpp" + + breakpoint_line = line_number(source, "// breakpoint") + breakpoint_ids = self.set_source_breakpoints(source, [breakpoint_line]) + self.continue_to_breakpoints(breakpoint_ids) + + @skipIfWindows + @skipIfRemote + def test_disassemble_negative_offset(self): + # Retrieve program counter + stackFrames, _ = self.get_stackFrames_and_totalFramesCount() + pc = stackFrames[0]["instructionPointerReference"] + + # Get disassembled instructions + num_instructions, offset = 8, -4 + response = self.vscode.request_disassemble( + memoryReference=pc, + instructionOffset=offset, + instructionCount=num_instructions, + ) + self.assertTrue(response["success"]) + + disas_instructions = response["body"]["instructions"] + self.assertEquals(len(disas_instructions), num_instructions) + self.assertEquals(disas_instructions[num_instructions + offset]["address"], pc) + for instr in disas_instructions: + self.assertTrue("invalid" not in instr["instruction"]) + + @skipIfWindows + @skipIfRemote + def test_disassemble_zero_offset(self): + # Retrieve program counter + stackFrames, _ = self.get_stackFrames_and_totalFramesCount() + pc = stackFrames[0]["instructionPointerReference"] + + # Get disassembled instructions + num_instructions, offset = 4, 0 + response = self.vscode.request_disassemble( + memoryReference=pc, + instructionOffset=offset, + instructionCount=num_instructions, + ) + self.assertTrue(response["success"]) + + disas_instructions = response["body"]["instructions"] + self.assertEquals(len(disas_instructions), num_instructions) + self.assertEquals(disas_instructions[offset]["address"], pc) + for instr in disas_instructions: + self.assertTrue("invalid" not in instr["instruction"]) + + @skipIfWindows + @skipIfRemote + def test_disassemble_positive_offset(self): + # Retrieve program counter + stackFrames, _ = self.get_stackFrames_and_totalFramesCount() + pc = stackFrames[0]["instructionPointerReference"] + + # Get disassembled instructions + num_instructions, offset = 4, 2 + response = self.vscode.request_disassemble( + memoryReference=pc, + instructionOffset=offset, + instructionCount=num_instructions, + ) + self.assertTrue(response["success"]) + + disas_instructions = response["body"]["instructions"] + self.assertEquals(len(disas_instructions), num_instructions) + for instr in disas_instructions: + self.assertTrue("invalid" not in instr["instruction"]) + + @skipIfWindows + @skipIfRemote + def test_disassemble_invalid_address(self): + response = self.vscode.request_disassemble( + memoryReference="0x0", + instructionOffset=-200, + instructionCount=400, + ) + self.assertFalse(response["success"]) diff --git a/lldb/test/API/tools/lldb-vscode/disassemble/main.cpp b/lldb/test/API/tools/lldb-vscode/disassemble/main.cpp new file mode 100644 --- /dev/null +++ b/lldb/test/API/tools/lldb-vscode/disassemble/main.cpp @@ -0,0 +1,8 @@ +int add(int x, int y) { return x + y; } + +int main() { + int sum = add(2, 3); // breakpoint + sum++; + + return sum; +} \ No newline at end of file diff --git a/lldb/tools/lldb-vscode/CMakeLists.txt b/lldb/tools/lldb-vscode/CMakeLists.txt --- a/lldb/tools/lldb-vscode/CMakeLists.txt +++ b/lldb/tools/lldb-vscode/CMakeLists.txt @@ -25,6 +25,7 @@ add_lldb_tool(lldb-vscode lldb-vscode.cpp BreakpointBase.cpp + DisassembledInstruction.cpp ExceptionBreakpoint.cpp FifoFiles.cpp FunctionBreakpoint.cpp diff --git a/lldb/tools/lldb-vscode/DisassembledInstruction.h b/lldb/tools/lldb-vscode/DisassembledInstruction.h new file mode 100644 --- /dev/null +++ b/lldb/tools/lldb-vscode/DisassembledInstruction.h @@ -0,0 +1,19 @@ +#ifndef LLDB_TOOLS_LLDB_VSCODE_DISASSEMBLED_INSTRUCTION_H +#define LLDB_TOOLS_LLDB_VSCODE_DISASSEMBLED_INSTRUCTION_H + +#include "VSCodeForward.h" +#include + +namespace lldb_vscode { + +struct DisassembledInstruction { + std::string m_address; + std::string m_instruction; + + DisassembledInstruction(); + DisassembledInstruction(lldb::SBInstruction &inst); +}; + +} // namespace lldb_vscode + +#endif // LLDB_TOOLS_LLDB_VSCODE_DISASSEMBLED_INSTRUCTION_H \ No newline at end of file diff --git a/lldb/tools/lldb-vscode/DisassembledInstruction.cpp b/lldb/tools/lldb-vscode/DisassembledInstruction.cpp new file mode 100644 --- /dev/null +++ b/lldb/tools/lldb-vscode/DisassembledInstruction.cpp @@ -0,0 +1,27 @@ +#include "DisassembledInstruction.h" + +#include "LLDBUtils.h" +#include "VSCode.h" +#include "lldb/API/SBInstruction.h" + +namespace lldb_vscode { + +DisassembledInstruction::DisassembledInstruction() + : m_address("0x0000000000000000"), m_instruction("") {} + +DisassembledInstruction::DisassembledInstruction(lldb::SBInstruction &inst) { + const auto inst_addr = inst.GetAddress().GetLoadAddress(g_vsc.target); + const char *m = inst.GetMnemonic(g_vsc.target); + const char *o = inst.GetOperands(g_vsc.target); + const char *c = inst.GetComment(g_vsc.target); + + std::string line; + llvm::raw_string_ostream line_strm(line); + const auto comment_sep = (c == nullptr || std::string(c) == "") ? "" : " ; "; + line_strm << llvm::formatv("{0,12} {1}{2}{3}", m, o, comment_sep, c); + + m_address = addr_to_hex_string(inst_addr); + m_instruction = line_strm.str(); +} + +} // namespace lldb_vscode \ No newline at end of file diff --git a/lldb/tools/lldb-vscode/JSONUtils.h b/lldb/tools/lldb-vscode/JSONUtils.h --- a/lldb/tools/lldb-vscode/JSONUtils.h +++ b/lldb/tools/lldb-vscode/JSONUtils.h @@ -349,6 +349,8 @@ /// "source" - source file information as a "Source" VSCode object /// "line" - the source file line number as an integer /// "column" - the source file column number as an integer +/// "instructionPointerReference" - a memory reference for the current +/// instruction pointer in this frame /// /// \param[in] frame /// The LLDB stack frame to use when populating out the "StackFrame" @@ -463,6 +465,24 @@ int64_t varID, bool format_hex, bool is_name_duplicated = false); +/// Create a "DisassembledInstruction" object for a LLDB disassembled +/// instruction object. +/// +/// This function will fill in the following keys in the returned +/// object: +/// "address" - the address of the instruction +/// "instruction" - the text representing the instruction and its operands +/// +/// \param[in] instruction +/// The LLDB disassembled instruction to use when populating out the +/// "DisassembledInstruction" object. +/// +/// \return +/// A "DisassembledInstruction" JSON object with that follows the formal +/// JSON definition outlined by Microsoft. +llvm::json::Value +CreateDisassembledInstruction(DisassembledInstruction instruction); + llvm::json::Value CreateCompileUnit(lldb::SBCompileUnit unit); /// Create a runInTerminal reverse request object diff --git a/lldb/tools/lldb-vscode/JSONUtils.cpp b/lldb/tools/lldb-vscode/JSONUtils.cpp --- a/lldb/tools/lldb-vscode/JSONUtils.cpp +++ b/lldb/tools/lldb-vscode/JSONUtils.cpp @@ -782,6 +782,9 @@ object.try_emplace("line", line); } object.try_emplace("column", line_entry.GetColumn()); + + auto pc = addr_to_hex_string(frame.GetPC()); + object.try_emplace("instructionPointerReference", pc); return llvm::json::Value(std::move(object)); } @@ -1097,6 +1100,14 @@ return llvm::json::Value(std::move(object)); } +llvm::json::Value +CreateDisassembledInstruction(DisassembledInstruction instruction) { + llvm::json::Object object; + EmplaceSafeString(object, "address", instruction.m_address); + EmplaceSafeString(object, "instruction", instruction.m_instruction); + return llvm::json::Value(std::move(object)); +} + /// See /// https://microsoft.github.io/debug-adapter-protocol/specification#Reverse_Requests_RunInTerminal llvm::json::Object diff --git a/lldb/tools/lldb-vscode/LLDBUtils.h b/lldb/tools/lldb-vscode/LLDBUtils.h --- a/lldb/tools/lldb-vscode/LLDBUtils.h +++ b/lldb/tools/lldb-vscode/LLDBUtils.h @@ -10,6 +10,7 @@ #define LLDB_TOOLS_LLDB_VSCODE_LLDBUTILS_H #include "VSCodeForward.h" +#include "lldb/lldb-types.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/raw_ostream.h" @@ -106,6 +107,27 @@ /// The LLDB frame index ID. uint32_t GetLLDBFrameID(uint64_t dap_frame_id); +/// Given an address, convert it to its hexadecimal representation. +/// +/// \param[in] address +/// The address to convert. +/// +/// \return +/// The hexadecimal representation of the address. +std::string addr_to_hex_string(const lldb::addr_t address); + +/// Given an hexadecimal representation of an address, convert it to a number. +/// +/// Reverse of `addr_to_hex_string()`. +/// +/// \param[in] hex_address +/// The hexadecimal address to convert. +/// +/// \return +/// The decimal representation of the hex address. +lldb::addr_t +hex_string_to_addr(const std::optional hex_address); + } // namespace lldb_vscode #endif diff --git a/lldb/tools/lldb-vscode/LLDBUtils.cpp b/lldb/tools/lldb-vscode/LLDBUtils.cpp --- a/lldb/tools/lldb-vscode/LLDBUtils.cpp +++ b/lldb/tools/lldb-vscode/LLDBUtils.cpp @@ -83,4 +83,13 @@ frame.GetFrameID()); } +std::string addr_to_hex_string(const lldb::addr_t address) { + return "0x" + llvm::utohexstr(address, true); +} + +lldb::addr_t +hex_string_to_addr(const std::optional hex_address) { + return std::stoull(hex_address->data(), nullptr, 16); +} + } // namespace lldb_vscode diff --git a/lldb/tools/lldb-vscode/VSCode.h b/lldb/tools/lldb-vscode/VSCode.h --- a/lldb/tools/lldb-vscode/VSCode.h +++ b/lldb/tools/lldb-vscode/VSCode.h @@ -46,6 +46,7 @@ #include "lldb/API/SBTarget.h" #include "lldb/API/SBThread.h" +#include "DisassembledInstruction.h" #include "ExceptionBreakpoint.h" #include "FunctionBreakpoint.h" #include "IOStream.h" diff --git a/lldb/tools/lldb-vscode/VSCodeForward.h b/lldb/tools/lldb-vscode/VSCodeForward.h --- a/lldb/tools/lldb-vscode/VSCodeForward.h +++ b/lldb/tools/lldb-vscode/VSCodeForward.h @@ -15,6 +15,7 @@ struct FunctionBreakpoint; struct SourceBreakpoint; struct SourceReference; +struct DisassembledInstruction; } // namespace lldb_vscode namespace lldb { diff --git a/lldb/tools/lldb-vscode/lldb-vscode.cpp b/lldb/tools/lldb-vscode/lldb-vscode.cpp --- a/lldb/tools/lldb-vscode/lldb-vscode.cpp +++ b/lldb/tools/lldb-vscode/lldb-vscode.cpp @@ -1451,8 +1451,7 @@ // which may affect the outcome of tests. bool source_init_file = GetBoolean(arguments, "sourceInitFile", true); - g_vsc.debugger = - lldb::SBDebugger::Create(source_init_file, log_cb, nullptr); + g_vsc.debugger = lldb::SBDebugger::Create(source_init_file, log_cb, nullptr); g_vsc.progress_event_thread = std::thread(ProgressEventThreadFunction); // Start our event thread so we can receive events from the debugger, target, @@ -1542,6 +1541,8 @@ // The debug adapter supports 'logMessage' in breakpoint. body.try_emplace("supportsLogPoints", true); + body.try_emplace("supportsDisassembleRequest", true); + response.try_emplace("body", std::move(body)); g_vsc.SendJSON(llvm::json::Value(std::move(response))); } @@ -2117,6 +2118,178 @@ g_vsc.SendJSON(llvm::json::Value(std::move(response))); } +std::vector +_get_instructions_from_memory(lldb::addr_t start, uint64_t count) { + lldb::SBProcess process = g_vsc.target.GetProcess(); + + lldb::SBError error; + std::vector buffer(count, 0); + const size_t bytes_read __attribute__((unused)) = process.ReadMemory( + start, static_cast(buffer.data()), count, error); + assert(bytes_read == count && error.Success() && + "unable to read byte range from memory"); + + // If base_addr starts in the middle of an instruction, + // that first instruction will not be parsed correctly (negligible) + std::vector sb_instructions; + const auto base_addr = lldb::SBAddress(start, g_vsc.target); + lldb::SBInstructionList instructions = + g_vsc.target.GetInstructions(base_addr, buffer.data(), count); + + for (size_t i = 0; i < instructions.GetSize(); i++) { + auto instr = instructions.GetInstructionAtIndex(i); + sb_instructions.emplace_back(instr); + } + return sb_instructions; +} + +auto _handle_disassemble_positive_offset(lldb::addr_t base_addr, + int64_t instruction_offset, + uint64_t instruction_count) { + llvm::json::Array response_instructions; + + /** + * For positive offsets, we use the `ReadInstructions()` API to get + * `instruction_offset + instruction_count` instructions after the + * `base_addr`. + */ + auto start_addr = lldb::SBAddress(base_addr, g_vsc.target); + lldb::SBInstructionList instructions = g_vsc.target.ReadInstructions( + start_addr, instruction_offset + instruction_count); + + const auto num_instrs_to_skip = static_cast(instruction_offset); + for (size_t i = num_instrs_to_skip; i < instructions.GetSize(); ++i) { + lldb::SBInstruction instr = instructions.GetInstructionAtIndex(i); + + auto disass_instr = + CreateDisassembledInstruction(DisassembledInstruction(instr)); + response_instructions.emplace_back(std::move(disass_instr)); + } + + return response_instructions; +} + +auto _handle_disassemble_negative_offset( + lldb::addr_t base_addr, int64_t instruction_offset, + uint64_t instruction_count, + std::optional memory_reference) { + llvm::json::Array response_instructions; + + const auto max_instruction_size = g_vsc.target.GetMaximumOpcodeByteSize(); + const auto bytes_offset = -instruction_offset * max_instruction_size; + auto start_addr = base_addr - bytes_offset; + const auto disassemble_bytes = instruction_count * max_instruction_size; + + /** + * For negative offsets, we do not know what `start_addr` corresponds to the + * instruction located `instruction_offset` instructions before `base_addr` + * since on some architectures opcodes have variable length. + * + * To address that, we need to read at least starting from `start_addr = + * base_addr + instruction_offset * max_instruction_size` (pruning is done if + * more than `instruction_count` instructions are fetched) and ensure we start + * disassembling on the correct instruction boundary since `start_addr` might + * be in between opcode boundaries. To address the latter concern, we use the + * following loop. + */ + for (unsigned i = 0; i < max_instruction_size; i++) { + auto sb_instructions = + _get_instructions_from_memory(start_addr - i, disassemble_bytes); + + // Find position of requested instruction + // in retrieved disassembled instructions + auto index = sb_instructions.size() + 1; + for (size_t i = 0; i < sb_instructions.size(); i++) { + if (sb_instructions[i].GetAddress().GetLoadAddress(g_vsc.target) == + base_addr) { + index = i; + break; + } + } + if (index == sb_instructions.size() + 1) + continue; + + // Copy instructions into queue to easily manipulate them + std::deque disass_instructions; + for (auto &instr : sb_instructions) + disass_instructions.emplace_back(DisassembledInstruction(instr)); + + // Make sure the address in the disassemble request is at the right position + const uint64_t expected_index = -instruction_offset; + if (index < expected_index) { + for (uint64_t i = 0; i < (expected_index - index); i++) { + DisassembledInstruction nop_instruction; + disass_instructions.emplace_front(nop_instruction); + } + } else if (index > expected_index) { + const auto num_instr_to_remove = index - expected_index; + disass_instructions.erase(disass_instructions.begin(), + disass_instructions.begin() + + num_instr_to_remove); + } + + // Truncate if too many instructions + if (disass_instructions.size() > instruction_count) { + disass_instructions.erase(disass_instructions.begin() + instruction_count, + disass_instructions.end()); + } + + assert(disass_instructions.size() > expected_index && + disass_instructions[expected_index].m_address == + memory_reference.value()); + + for (auto &instr : disass_instructions) + response_instructions.emplace_back(CreateDisassembledInstruction(instr)); + return response_instructions; + } + + return response_instructions; +} + +void request_disassemble(const llvm::json::Object &request) { + llvm::json::Object response; + lldb::SBError error; + FillResponse(request, response); + auto arguments = request.getObject("arguments"); + const auto memory_reference = arguments->getString("memoryReference"); + const auto instruction_offset = GetSigned(arguments, "instructionOffset", 0); + const auto instruction_count = GetUnsigned(arguments, "instructionCount", 0); + llvm::json::Array response_instructions; + + bool success = true; + auto base_addr = hex_string_to_addr(memory_reference); + if (base_addr == 0) { + success = false; + } else { + response_instructions = + instruction_offset >= 0 + ? _handle_disassemble_positive_offset(base_addr, instruction_offset, + instruction_count) + : _handle_disassemble_negative_offset(base_addr, instruction_offset, + instruction_count, + memory_reference); + } + + // Add padding if not enough instructions + if (response_instructions.size() < instruction_count) { + const auto padding_len = instruction_count - response_instructions.size(); + for (size_t i = 0; i < padding_len; i++) { + const DisassembledInstruction nop_instruction; + auto disass_instr = CreateDisassembledInstruction(nop_instruction); + response_instructions.emplace_back(std::move(disass_instr)); + } + } + + assert((response_instructions.size() == instruction_count) && + "should return exact number of requested instructions"); + + llvm::json::Object body; + body.try_emplace("instructions", std::move(response_instructions)); + response.try_emplace("body", std::move(body)); + response["success"] = llvm::json::Value(success); + g_vsc.SendJSON(llvm::json::Value(std::move(response))); +} + // "SetExceptionBreakpointsRequest": { // "allOf": [ { "$ref": "#/definitions/Request" }, { // "type": "object", @@ -3085,6 +3258,7 @@ g_vsc.RegisterRequestCallback("stepOut", request_stepOut); g_vsc.RegisterRequestCallback("threads", request_threads); g_vsc.RegisterRequestCallback("variables", request_variables); + g_vsc.RegisterRequestCallback("disassemble", request_disassemble); // Custom requests g_vsc.RegisterRequestCallback("compileUnits", request_compileUnits); g_vsc.RegisterRequestCallback("modules", request_modules);