diff --git a/lldb/test/API/macosx/unregistered-macho/Makefile b/lldb/test/API/macosx/unregistered-macho/Makefile new file mode 100644 --- /dev/null +++ b/lldb/test/API/macosx/unregistered-macho/Makefile @@ -0,0 +1,3 @@ +C_SOURCES = main.c + +include Makefile.rules diff --git a/lldb/test/API/macosx/unregistered-macho/TestUnregisteredMacho.py b/lldb/test/API/macosx/unregistered-macho/TestUnregisteredMacho.py new file mode 100644 --- /dev/null +++ b/lldb/test/API/macosx/unregistered-macho/TestUnregisteredMacho.py @@ -0,0 +1,47 @@ +"""Test that debugserver will parse a mach-o in inferior memory even if it's not loaded.""" + +import os +import re +import subprocess + +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + +class TestUnregisteredMacho(TestBase): + + # newer debugserver required for jGetLoadedDynamicLibrariesInfos + # to support this + @skipIfOutOfTreeDebugserver + @no_debug_info_test + @skipUnlessDarwin + def test(self): + self.build() + target, process, thread, bkpt = lldbutil.run_to_source_breakpoint( + self, "// break here", lldb.SBFileSpec("main.c")) + + frame = thread.GetFrameAtIndex(0) + macho_buf = frame.GetValueForVariablePath("macho_buf") + macho_addr = macho_buf.GetValueAsUnsigned() + invalid_macho_addr = macho_buf.GetValueAsUnsigned() + 4 + gdb_packet = "process plugin packet send 'jGetLoadedDynamicLibrariesInfos:{\"solib_addresses\":[%d]}]'" % macho_addr + + # Send the jGetLoadedDynamicLibrariesInfos packet + # to debugserver, asking it to parse the mach-o binary + # at this address and give us the UUID etc, even though + # dyld doesn't think there is a binary at that address. + # We won't get a pathname for the binary (from dyld), but + # we will get to the LC_UUID and include that. + self.expect (gdb_packet, substrs=['"pathname":""', '"uuid":"1B4E28BA-2FA1-11D2-883F-B9A761BDE3FB"']) + + no_macho_gdb_packet = "process plugin packet send 'jGetLoadedDynamicLibrariesInfos:{\"solib_addresses\":[%d]}]'" % invalid_macho_addr + self.expect (no_macho_gdb_packet, substrs=['response: {"images":[]}']) + + # Test that we get back the information for the properly + # formatted Mach-O binary in memory, but do not get an + # entry for the invalid Mach-O address. + both_gdb_packet = "process plugin packet send 'jGetLoadedDynamicLibrariesInfos:{\"solib_addresses\":[%d,%d]}]'" % (macho_addr, invalid_macho_addr) + self.expect (both_gdb_packet, substrs=['response: {"images":[{"load_address":%d,' % macho_addr]) + self.expect (both_gdb_packet, substrs=['response: {"load_address":%d,' % invalid_macho_addr], matching=False) + diff --git a/lldb/test/API/macosx/unregistered-macho/main.c b/lldb/test/API/macosx/unregistered-macho/main.c new file mode 100644 --- /dev/null +++ b/lldb/test/API/macosx/unregistered-macho/main.c @@ -0,0 +1,63 @@ +#include +#include +#include +#include +#include + +int main() { + int size_of_load_cmds = + sizeof(struct segment_command_64) + sizeof(struct uuid_command); + uint8_t *macho_buf = + (uint8_t *)malloc(sizeof(struct mach_header_64) + size_of_load_cmds); + uint8_t *p = macho_buf; + struct mach_header_64 mh; + mh.magic = MH_MAGIC_64; + mh.cputype = CPU_TYPE_ARM64; + mh.cpusubtype = 0; + mh.filetype = MH_EXECUTE; + mh.ncmds = 2; + mh.sizeofcmds = size_of_load_cmds; + mh.flags = MH_NOUNDEFS | MH_DYLDLINK | MH_TWOLEVEL | MH_PIE; + + memcpy(p, &mh, sizeof(mh)); + p += sizeof(mh); + + struct segment_command_64 seg; + seg.cmd = LC_SEGMENT_64; + seg.cmdsize = sizeof(seg); + strcpy(seg.segname, "__TEXT"); + seg.vmaddr = 0x5000; + seg.vmsize = 0x1000; + seg.fileoff = 0; + seg.filesize = 0; + seg.maxprot = 0; + seg.initprot = 0; + seg.nsects = 0; + seg.flags = 0; + + memcpy(p, &seg, sizeof(seg)); + p += sizeof(seg); + + struct uuid_command uuid; + uuid.cmd = LC_UUID; + uuid.cmdsize = sizeof(uuid); + uuid_clear(uuid.uuid); + uuid_parse("1b4e28ba-2fa1-11d2-883f-b9a761bde3fb", uuid.uuid); + + memcpy(p, &uuid, sizeof(uuid)); + p += sizeof(uuid); + + // If this needs to be debugged, the memory buffer can be written + // to a file with + // (lldb) mem rea -b -o /tmp/t -c `p - macho_buf` macho_buf + // (lldb) platform shell otool -hlv /tmp/t + // to verify that it is well formed. + + // And inside lldb, it should be inspectable via + // (lldb) script print(lldb.frame.locals["macho_buf"][0].GetValueAsUnsigned()) + // 105553162403968 + // (lldb) process plugin packet send + // 'jGetLoadedDynamicLibrariesInfos:{"solib_addresses":[105553162403968]}]' + + return 0; // break here +} diff --git a/lldb/tools/debugserver/source/MacOSX/MachProcess.h b/lldb/tools/debugserver/source/MacOSX/MachProcess.h --- a/lldb/tools/debugserver/source/MacOSX/MachProcess.h +++ b/lldb/tools/debugserver/source/MacOSX/MachProcess.h @@ -73,9 +73,11 @@ uint64_t load_address; uint64_t mod_date; // may not be available - 0 if so struct mach_o_information macho_info; + bool is_valid; binary_image_information() - : filename(), load_address(INVALID_NUB_ADDRESS), mod_date(0) {} + : filename(), load_address(INVALID_NUB_ADDRESS), mod_date(0), + is_valid(false) {} }; // Child process control diff --git a/lldb/tools/debugserver/source/MacOSX/MachProcess.mm b/lldb/tools/debugserver/source/MacOSX/MachProcess.mm --- a/lldb/tools/debugserver/source/MacOSX/MachProcess.mm +++ b/lldb/tools/debugserver/source/MacOSX/MachProcess.mm @@ -757,12 +757,25 @@ uint32_t dyld_platform, nub_addr_t mach_o_header_addr, int wordsize, struct mach_o_information &inf) { uint64_t load_cmds_p; + auto is_valid_macho_header = [](uint32_t magic, uint32_t cputype) -> bool { + if (magic != MH_MAGIC && magic != MH_CIGAM && magic != MH_MAGIC_64 && + magic != MH_CIGAM_64) + return false; + if (cputype != CPU_TYPE_X86_64 && cputype != CPU_TYPE_ARM && + cputype != CPU_TYPE_ARM64 && cputype != CPU_TYPE_ARM64_32) + return false; + return true; + }; + if (wordsize == 4) { struct mach_header header; if (ReadMemory(mach_o_header_addr, sizeof(struct mach_header), &header) != sizeof(struct mach_header)) { return false; } + if (!is_valid_macho_header(header.magic, header.cputype)) + return false; + load_cmds_p = mach_o_header_addr + sizeof(struct mach_header); inf.mach_header.magic = header.magic; inf.mach_header.cputype = header.cputype; @@ -779,6 +792,8 @@ &header) != sizeof(struct mach_header_64)) { return false; } + if (!is_valid_macho_header(header.magic, header.cputype)) + return false; load_cmds_p = mach_o_header_addr + sizeof(struct mach_header_64); inf.mach_header.magic = header.magic; inf.mach_header.cputype = header.cputype; @@ -896,6 +911,8 @@ const size_t image_count = image_infos.size(); for (size_t i = 0; i < image_count; i++) { + if (!image_infos[i].is_valid) + continue; JSONGenerator::DictionarySP image_info_dict_sp( new JSONGenerator::Dictionary()); image_info_dict_sp->AddIntegerItem("load_address", @@ -970,7 +987,6 @@ } JSONGenerator::DictionarySP reply_sp(new JSONGenerator::Dictionary()); - ; reply_sp->AddItem("images", image_infos_array_sp); return reply_sp; @@ -985,8 +1001,8 @@ // information. JSONGenerator::ObjectSP MachProcess::GetLoadedDynamicLibrariesInfos( nub_process_t pid, nub_addr_t image_list_address, nub_addr_t image_count) { - JSONGenerator::DictionarySP reply_sp; + JSONGenerator::ObjectSP empty_reply_sp(new JSONGenerator::Dictionary()); int pointer_size = GetInferiorAddrSize(pid); std::vector image_infos; @@ -994,91 +1010,89 @@ uint8_t *image_info_buf = (uint8_t *)malloc(image_infos_size); if (image_info_buf == NULL) { - return reply_sp; + return empty_reply_sp; + } + if (ReadMemory(image_list_address, image_infos_size, image_info_buf) != + image_infos_size) { + return empty_reply_sp; } - if (ReadMemory(image_list_address, image_infos_size, image_info_buf) != - image_infos_size) { - return reply_sp; - } - //// First the image_infos array with (load addr, pathname, mod date) - ///tuples - - for (size_t i = 0; i < image_count; i++) { - struct binary_image_information info; - nub_addr_t pathname_address; - if (pointer_size == 4) { - uint32_t load_address_32; - uint32_t pathname_address_32; - uint32_t mod_date_32; - ::memcpy(&load_address_32, image_info_buf + (i * 3 * pointer_size), 4); - ::memcpy(&pathname_address_32, - image_info_buf + (i * 3 * pointer_size) + pointer_size, 4); - ::memcpy(&mod_date_32, image_info_buf + (i * 3 * pointer_size) + - pointer_size + pointer_size, - 4); - info.load_address = load_address_32; - info.mod_date = mod_date_32; - pathname_address = pathname_address_32; - } else { - uint64_t load_address_64; - uint64_t pathname_address_64; - uint64_t mod_date_64; - ::memcpy(&load_address_64, image_info_buf + (i * 3 * pointer_size), 8); - ::memcpy(&pathname_address_64, - image_info_buf + (i * 3 * pointer_size) + pointer_size, 8); - ::memcpy(&mod_date_64, image_info_buf + (i * 3 * pointer_size) + - pointer_size + pointer_size, - 8); - info.load_address = load_address_64; - info.mod_date = mod_date_64; - pathname_address = pathname_address_64; - } - char strbuf[17]; - info.filename = ""; - uint64_t pathname_ptr = pathname_address; - bool still_reading = true; - while (still_reading && - ReadMemory(pathname_ptr, sizeof(strbuf) - 1, strbuf) == - sizeof(strbuf) - 1) { - strbuf[sizeof(strbuf) - 1] = '\0'; - info.filename += strbuf; - pathname_ptr += sizeof(strbuf) - 1; - // Stop if we found nul byte indicating the end of the string - for (size_t i = 0; i < sizeof(strbuf) - 1; i++) { - if (strbuf[i] == '\0') { - still_reading = false; - break; - } + //// First the image_infos array with (load addr, pathname, mod date) + /// tuples + + for (size_t i = 0; i < image_count; i++) { + struct binary_image_information info; + nub_addr_t pathname_address; + if (pointer_size == 4) { + uint32_t load_address_32; + uint32_t pathname_address_32; + uint32_t mod_date_32; + ::memcpy(&load_address_32, image_info_buf + (i * 3 * pointer_size), 4); + ::memcpy(&pathname_address_32, + image_info_buf + (i * 3 * pointer_size) + pointer_size, 4); + ::memcpy(&mod_date_32, + image_info_buf + (i * 3 * pointer_size) + pointer_size + + pointer_size, + 4); + info.load_address = load_address_32; + info.mod_date = mod_date_32; + pathname_address = pathname_address_32; + } else { + uint64_t load_address_64; + uint64_t pathname_address_64; + uint64_t mod_date_64; + ::memcpy(&load_address_64, image_info_buf + (i * 3 * pointer_size), 8); + ::memcpy(&pathname_address_64, + image_info_buf + (i * 3 * pointer_size) + pointer_size, 8); + ::memcpy(&mod_date_64, + image_info_buf + (i * 3 * pointer_size) + pointer_size + + pointer_size, + 8); + info.load_address = load_address_64; + info.mod_date = mod_date_64; + pathname_address = pathname_address_64; + } + char strbuf[17]; + info.filename = ""; + uint64_t pathname_ptr = pathname_address; + bool still_reading = true; + while (still_reading && ReadMemory(pathname_ptr, sizeof(strbuf) - 1, + strbuf) == sizeof(strbuf) - 1) { + strbuf[sizeof(strbuf) - 1] = '\0'; + info.filename += strbuf; + pathname_ptr += sizeof(strbuf) - 1; + // Stop if we found nul byte indicating the end of the string + for (size_t i = 0; i < sizeof(strbuf) - 1; i++) { + if (strbuf[i] == '\0') { + still_reading = false; + break; } } - uuid_clear(info.macho_info.uuid); - image_infos.push_back(info); - } - if (image_infos.size() == 0) { - return reply_sp; } + uuid_clear(info.macho_info.uuid); + image_infos.push_back(info); + } + if (image_infos.size() == 0) { + return empty_reply_sp; + } - free(image_info_buf); + free(image_info_buf); - //// Second, read the mach header / load commands for all the dylibs + //// Second, read the mach header / load commands for all the dylibs - for (size_t i = 0; i < image_count; i++) { - // The SPI to provide platform is not available on older systems. - uint32_t platform = 0; - if (!GetMachOInformationFromMemory(platform, - image_infos[i].load_address, - pointer_size, - image_infos[i].macho_info)) { - return reply_sp; - } + for (size_t i = 0; i < image_count; i++) { + // The SPI to provide platform is not available on older systems. + uint32_t platform = 0; + if (GetMachOInformationFromMemory(platform, image_infos[i].load_address, + pointer_size, + image_infos[i].macho_info)) { + image_infos[i].is_valid = true; } + } - //// Third, format all of the above in the JSONGenerator object. - - return FormatDynamicLibrariesIntoJSON(image_infos); + //// Third, format all of the above in the JSONGenerator object. - return reply_sp; + return FormatDynamicLibrariesIntoJSON(image_infos); } /// From dyld SPI header dyld_process_info.h @@ -1141,7 +1155,6 @@ // macOS 10.12, iOS 10, tvOS 10, watchOS 3 and newer. JSONGenerator::ObjectSP MachProcess::GetAllLoadedLibrariesInfos(nub_process_t pid) { - JSONGenerator::DictionarySP reply_sp; int pointer_size = GetInferiorAddrSize(pid); std::vector image_infos; @@ -1149,8 +1162,11 @@ uint32_t platform = GetPlatform(); const size_t image_count = image_infos.size(); for (size_t i = 0; i < image_count; i++) { - GetMachOInformationFromMemory(platform, image_infos[i].load_address, - pointer_size, image_infos[i].macho_info); + if (GetMachOInformationFromMemory(platform, image_infos[i].load_address, + pointer_size, + image_infos[i].macho_info)) { + image_infos[i].is_valid = true; + } } return FormatDynamicLibrariesIntoJSON(image_infos); } @@ -1160,10 +1176,11 @@ // dyld SPIs that exist in macOS 10.12, iOS 10, tvOS 10, watchOS 3 and newer. JSONGenerator::ObjectSP MachProcess::GetLibrariesInfoForAddresses( nub_process_t pid, std::vector &macho_addresses) { - JSONGenerator::DictionarySP reply_sp; int pointer_size = GetInferiorAddrSize(pid); + // Collect the list of all binaries that dyld knows about in + // the inferior process. std::vector all_image_infos; GetAllLoadedBinariesViaDYLDSPI(all_image_infos); uint32_t platform = GetPlatform(); @@ -1171,19 +1188,35 @@ std::vector image_infos; const size_t macho_addresses_count = macho_addresses.size(); const size_t all_image_infos_count = all_image_infos.size(); + for (size_t i = 0; i < macho_addresses_count; i++) { + bool found_matching_entry = false; for (size_t j = 0; j < all_image_infos_count; j++) { if (all_image_infos[j].load_address == macho_addresses[i]) { image_infos.push_back(all_image_infos[j]); + found_matching_entry = true; } } + if (!found_matching_entry) { + // dyld doesn't think there is a binary at this address, + // but maybe there isn't a binary YET - let's look in memory + // for a proper mach-o header etc and return what we can. + // We will have an empty filename for the binary (because dyld + // doesn't know about it yet) but we can read all of the mach-o + // load commands from memory directly. + struct binary_image_information entry; + entry.load_address = macho_addresses[i]; + image_infos.push_back(entry); + } } const size_t image_infos_count = image_infos.size(); for (size_t i = 0; i < image_infos_count; i++) { - GetMachOInformationFromMemory(platform, - image_infos[i].load_address, pointer_size, - image_infos[i].macho_info); + if (GetMachOInformationFromMemory(platform, image_infos[i].load_address, + pointer_size, + image_infos[i].macho_info)) { + image_infos[i].is_valid = true; + } } return FormatDynamicLibrariesIntoJSON(image_infos); }