diff --git a/lldb/source/Plugins/Process/minidump/MinidumpParser.h b/lldb/source/Plugins/Process/minidump/MinidumpParser.h --- a/lldb/source/Plugins/Process/minidump/MinidumpParser.h +++ b/lldb/source/Plugins/Process/minidump/MinidumpParser.h @@ -96,6 +96,9 @@ llvm::object::MinidumpFile &GetMinidumpFile() { return *m_file; } + static MemoryRegionInfo GetMemoryRegionInfo(const MemoryRegionInfos ®ions, + lldb::addr_t load_addr); + private: MinidumpParser(lldb::DataBufferSP data_sp, std::unique_ptr file); diff --git a/lldb/source/Plugins/Process/minidump/MinidumpParser.cpp b/lldb/source/Plugins/Process/minidump/MinidumpParser.cpp --- a/lldb/source/Plugins/Process/minidump/MinidumpParser.cpp +++ b/lldb/source/Plugins/Process/minidump/MinidumpParser.cpp @@ -267,6 +267,88 @@ return {}; } +static bool +CreateRegionsCacheFromLinuxMaps(MinidumpParser &parser, + std::vector ®ions) { + auto data = parser.GetStream(StreamType::LinuxMaps); + if (data.empty()) + return false; + ParseLinuxMapRegions(llvm::toStringRef(data), + [&](const lldb_private::MemoryRegionInfo ®ion, + const lldb_private::Status &status) -> bool { + if (status.Success()) + regions.push_back(region); + return true; + }); + return !regions.empty(); +} + +/// Check for the memory regions starting at \a load_addr for a contiguous +/// section that has execute permissions that matches the module path. +/// +/// When we load a breakpad generated minidump file, we might have the +/// /proc//maps text for a process that details the memory map of the +/// process that the minidump is describing. This checks the sorted memory +/// regions for a section that has execute permissions. A sample maps files +/// might look like: +/// +/// 00400000-00401000 r--p 00000000 fd:01 2838574 /tmp/a.out +/// 00401000-00402000 r-xp 00001000 fd:01 2838574 /tmp/a.out +/// 00402000-00403000 r--p 00002000 fd:01 2838574 /tmp/a.out +/// 00403000-00404000 r--p 00002000 fd:01 2838574 /tmp/a.out +/// 00404000-00405000 rw-p 00003000 fd:01 2838574 /tmp/a.out +/// ... +/// +/// This function should return true when given 0x00400000 and "/tmp/a.out" +/// is passed in as the path since it has a consecutive memory region for +/// "/tmp/a.out" that has execute permissions at 0x00401000. This will help us +/// differentiate if a file has been memory mapped into a process for reading +/// and breakpad ends up saving a minidump file that has two module entries for +/// a given file: one that is read only for the entire file, and then one that +/// is the real executable that is loaded into memory for execution. For memory +/// mapped files they will typically show up and r--p permissions and a range +/// matcning the entire range of the file on disk: +/// +/// 00800000-00805000 r--p 00000000 fd:01 2838574 /tmp/a.out +/// 00805000-00806000 r-xp 00001000 fd:01 1234567 /usr/lib/libc.so +/// +/// This function should return false when asked about 0x00800000 with +/// "/tmp/a.out" as the path. +/// +/// \param[in] path +/// The path to the module to check for in the memory regions. Only sequential +/// memory regions whose paths match this path will be considered when looking +/// for execute permissions. +/// +/// \param[in] regions +/// A sorted list of memory regions obtained from a call to +/// CreateRegionsCacheFromLinuxMaps. +/// +/// \param[in] base_of_image +/// The load address of this module from BaseOfImage in the modules list. +/// +/// \return +/// True if a contiguous region of memory belonging to the module with a +/// matching path exists that has executable permissions. Returns false if +/// \a regions is empty or if there are no regions with execute permissions +/// that match \a path. + +static bool CheckForLinuxExecutable(ConstString path, + const MemoryRegionInfos ®ions, + lldb::addr_t base_of_image) { + if (regions.empty()) + return false; + lldb::addr_t addr = base_of_image; + MemoryRegionInfo region = MinidumpParser::GetMemoryRegionInfo(regions, addr); + while (region.GetName() == path) { + if (region.GetExecutable() == MemoryRegionInfo::eYes) + return true; + addr += region.GetRange().GetByteSize(); + region = MinidumpParser::GetMemoryRegionInfo(regions, addr); + } + return false; +} + std::vector MinidumpParser::GetFilteredModuleList() { Log *log = GetLogIfAnyCategoriesSet(LIBLLDB_LOG_MODULES); auto ExpectedModules = GetMinidumpFile().getModuleList(); @@ -276,6 +358,15 @@ return {}; } + // Create memory regions from the linux maps only. We do this to avoid issues + // with breakpad generated minidumps where if someone has mmap'ed a shared + // library into memory to accesss its data in the object file, we can get a + // minidump with two mappings for a binary: one whose base image points to a + // memory region that is read + execute and one that is read only. + MemoryRegionInfos linux_regions; + if (CreateRegionsCacheFromLinuxMaps(*this, linux_regions)) + llvm::sort(linux_regions); + // map module_name -> filtered_modules index typedef llvm::StringMap MapType; MapType module_name_to_filtered_index; @@ -304,10 +395,25 @@ // "filtered_modules.size()" above. filtered_modules.push_back(&module); } else { + // We have a duplicate module entry. Check the linux regions to see if + // the module we already have is not really a mapped executable. If it + // isn't check to see if the current duplicate module entry is a real + // mapped executable, and if so, replace it. This can happen when a + // process mmap's in the file for an executable in order to read bytes + // from the executable file. A memory region mapping will exist for the + // mmap'ed version and for the loaded executable, but only one will have + // a consecutive region that is executable in the memory regions. + auto dup_module = filtered_modules[iter->second]; + ConstString name(*ExpectedName); + if (!CheckForLinuxExecutable(name, linux_regions, + dup_module->BaseOfImage) && + CheckForLinuxExecutable(name, linux_regions, module.BaseOfImage)) { + filtered_modules[iter->second] = &module; + continue; + } // This module has been seen. Modules are sometimes mentioned multiple // times when they are mapped discontiguously, so find the module with // the lowest "base_of_image" and use that as the filtered module. - auto dup_module = filtered_modules[iter->second]; if (module.BaseOfImage < dup_module->BaseOfImage) filtered_modules[iter->second] = &module; } @@ -411,22 +517,6 @@ return range->range_ref.slice(offset, overlap); } -static bool -CreateRegionsCacheFromLinuxMaps(MinidumpParser &parser, - std::vector ®ions) { - auto data = parser.GetStream(StreamType::LinuxMaps); - if (data.empty()) - return false; - ParseLinuxMapRegions(llvm::toStringRef(data), - [&](const lldb_private::MemoryRegionInfo ®ion, - const lldb_private::Status &status) -> bool { - if (status.Success()) - regions.push_back(region); - return true; - }); - return !regions.empty(); -} - static bool CreateRegionsCacheFromMemoryInfoList(MinidumpParser &parser, std::vector ®ions) { @@ -500,10 +590,10 @@ uint64_t base_rva; std::tie(memory64_list, base_rva) = MinidumpMemoryDescriptor64::ParseMemory64List(data); - + if (memory64_list.empty()) return false; - + regions.reserve(memory64_list.size()); for (const auto &memory_desc : memory64_list) { if (memory_desc.data_size == 0) @@ -597,3 +687,30 @@ } return "unknown stream type"; } + +MemoryRegionInfo +MinidumpParser::GetMemoryRegionInfo(const MemoryRegionInfos ®ions, + lldb::addr_t load_addr) { + MemoryRegionInfo region; + auto pos = llvm::upper_bound(regions, load_addr); + if (pos != regions.begin() && + std::prev(pos)->GetRange().Contains(load_addr)) { + return *std::prev(pos); + } + + if (pos == regions.begin()) + region.GetRange().SetRangeBase(0); + else + region.GetRange().SetRangeBase(std::prev(pos)->GetRange().GetRangeEnd()); + + if (pos == regions.end()) + region.GetRange().SetRangeEnd(UINT64_MAX); + else + region.GetRange().SetRangeEnd(pos->GetRange().GetRangeBase()); + + region.SetReadable(MemoryRegionInfo::eNo); + region.SetWritable(MemoryRegionInfo::eNo); + region.SetExecutable(MemoryRegionInfo::eNo); + region.SetMapped(MemoryRegionInfo::eNo); + return region; +} diff --git a/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp b/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp --- a/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp +++ b/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp @@ -404,32 +404,6 @@ return ArchSpec(triple); } -static MemoryRegionInfo GetMemoryRegionInfo(const MemoryRegionInfos ®ions, - lldb::addr_t load_addr) { - MemoryRegionInfo region; - auto pos = llvm::upper_bound(regions, load_addr); - if (pos != regions.begin() && - std::prev(pos)->GetRange().Contains(load_addr)) { - return *std::prev(pos); - } - - if (pos == regions.begin()) - region.GetRange().SetRangeBase(0); - else - region.GetRange().SetRangeBase(std::prev(pos)->GetRange().GetRangeEnd()); - - if (pos == regions.end()) - region.GetRange().SetRangeEnd(UINT64_MAX); - else - region.GetRange().SetRangeEnd(pos->GetRange().GetRangeBase()); - - region.SetReadable(MemoryRegionInfo::eNo); - region.SetWritable(MemoryRegionInfo::eNo); - region.SetExecutable(MemoryRegionInfo::eNo); - region.SetMapped(MemoryRegionInfo::eNo); - return region; -} - void ProcessMinidump::BuildMemoryRegions() { if (m_memory_regions) return; @@ -454,7 +428,7 @@ MemoryRegionInfo::RangeType section_range(load_addr, section_sp->GetByteSize()); MemoryRegionInfo region = - ::GetMemoryRegionInfo(*m_memory_regions, load_addr); + MinidumpParser::GetMemoryRegionInfo(*m_memory_regions, load_addr); if (region.GetMapped() != MemoryRegionInfo::eYes && region.GetRange().GetRangeBase() <= section_range.GetRangeBase() && section_range.GetRangeEnd() <= region.GetRange().GetRangeEnd()) { @@ -475,7 +449,7 @@ Status ProcessMinidump::GetMemoryRegionInfo(lldb::addr_t load_addr, MemoryRegionInfo ®ion) { BuildMemoryRegions(); - region = ::GetMemoryRegionInfo(*m_memory_regions, load_addr); + region = MinidumpParser::GetMemoryRegionInfo(*m_memory_regions, load_addr); return Status(); } diff --git a/lldb/unittests/Process/minidump/MinidumpParserTest.cpp b/lldb/unittests/Process/minidump/MinidumpParserTest.cpp --- a/lldb/unittests/Process/minidump/MinidumpParserTest.cpp +++ b/lldb/unittests/Process/minidump/MinidumpParserTest.cpp @@ -689,6 +689,130 @@ EXPECT_EQ(0x0000000000001000u, filtered_modules[0]->BaseOfImage); } +TEST_F(MinidumpParserTest, MinidumpDuplicateModuleMappedFirst) { + ASSERT_THAT_ERROR(SetUpFromYaml(R"( +--- !minidump +Streams: + - Type: ModuleList + Modules: + - Base of Image: 0x400d0000 + Size of Image: 0x00002000 + Module Name: '/usr/lib/libc.so' + CodeView Record: '' + - Base of Image: 0x400d3000 + Size of Image: 0x00001000 + Module Name: '/usr/lib/libc.so' + CodeView Record: '' + - Type: LinuxMaps + Text: | + 400d0000-400d2000 r--p 00000000 b3:04 227 /usr/lib/libc.so + 400d2000-400d3000 rw-p 00000000 00:00 0 + 400d3000-400d4000 r-xp 00010000 b3:04 227 /usr/lib/libc.so + 400d4000-400d5000 rwxp 00001000 b3:04 227 /usr/lib/libc.so +... +)"), + llvm::Succeeded()); + // If we have a module mentioned twice in the module list, and we have full + // linux maps for all of the memory regions, make sure we pick the one that + // has a consecutive region with a matching path that has executable + // permissions. If clients open an object file with mmap, breakpad can create + // multiple mappings for a library errnoneously and the lowest address isn't + // always the right address. In this case we check the consective memory + // regions whose path matches starting at the base of image address and make + // sure one of the regions is executable and prefer that one. + // + // This test will make sure that if the executable is second in the module + // list, that it will become the selected module in the filtered list. + std::vector filtered_modules = + parser->GetFilteredModuleList(); + ASSERT_EQ(1u, filtered_modules.size()); + EXPECT_EQ(0x400d3000u, filtered_modules[0]->BaseOfImage); +} + +TEST_F(MinidumpParserTest, MinidumpDuplicateModuleMappedSecond) { + ASSERT_THAT_ERROR(SetUpFromYaml(R"( +--- !minidump +Streams: + - Type: ModuleList + Modules: + - Base of Image: 0x400d0000 + Size of Image: 0x00002000 + Module Name: '/usr/lib/libc.so' + CodeView Record: '' + - Base of Image: 0x400d3000 + Size of Image: 0x00001000 + Module Name: '/usr/lib/libc.so' + CodeView Record: '' + - Type: LinuxMaps + Text: | + 400d0000-400d1000 r-xp 00010000 b3:04 227 /usr/lib/libc.so + 400d1000-400d2000 rwxp 00001000 b3:04 227 /usr/lib/libc.so + 400d2000-400d3000 rw-p 00000000 00:00 0 + 400d3000-400d5000 r--p 00000000 b3:04 227 /usr/lib/libc.so +... +)"), + llvm::Succeeded()); + // If we have a module mentioned twice in the module list, and we have full + // linux maps for all of the memory regions, make sure we pick the one that + // has a consecutive region with a matching path that has executable + // permissions. If clients open an object file with mmap, breakpad can create + // multiple mappings for a library errnoneously and the lowest address isn't + // always the right address. In this case we check the consective memory + // regions whose path matches starting at the base of image address and make + // sure one of the regions is executable and prefer that one. + // + // This test will make sure that if the executable is first in the module + // list, that it will remain the correctly selected module in the filtered + // list. + std::vector filtered_modules = + parser->GetFilteredModuleList(); + ASSERT_EQ(1u, filtered_modules.size()); + EXPECT_EQ(0x400d0000u, filtered_modules[0]->BaseOfImage); +} + +TEST_F(MinidumpParserTest, MinidumpDuplicateModuleSeparateCode) { + ASSERT_THAT_ERROR(SetUpFromYaml(R"( +--- !minidump +Streams: + - Type: ModuleList + Modules: + - Base of Image: 0x400d0000 + Size of Image: 0x00002000 + Module Name: '/usr/lib/libc.so' + CodeView Record: '' + - Base of Image: 0x400d5000 + Size of Image: 0x00001000 + Module Name: '/usr/lib/libc.so' + CodeView Record: '' + - Type: LinuxMaps + Text: | + 400d0000-400d3000 r--p 00000000 b3:04 227 /usr/lib/libc.so + 400d3000-400d5000 rw-p 00000000 00:00 0 + 400d5000-400d6000 r--p 00000000 b3:04 227 /usr/lib/libc.so + 400d6000-400d7000 r-xp 00010000 b3:04 227 /usr/lib/libc.so + 400d7000-400d8000 rwxp 00001000 b3:04 227 /usr/lib/libc.so +... +)"), + llvm::Succeeded()); + // If we have a module mentioned twice in the module list, and we have full + // linux maps for all of the memory regions, make sure we pick the one that + // has a consecutive region with a matching path that has executable + // permissions. If clients open an object file with mmap, breakpad can create + // multiple mappings for a library errnoneously and the lowest address isn't + // always the right address. In this case we check the consective memory + // regions whose path matches starting at the base of image address and make + // sure one of the regions is executable and prefer that one. + // + // This test will make sure if binaries are compiled with "-z separate-code", + // where the first region for a binary won't be marked as executable, that + // it gets selected by detecting the second consecutive mapping at 0x400d7000 + // when asked about the a module mamed "/usr/lib/libc.so" at 0x400d5000. + std::vector filtered_modules = + parser->GetFilteredModuleList(); + ASSERT_EQ(1u, filtered_modules.size()); + EXPECT_EQ(0x400d5000u, filtered_modules[0]->BaseOfImage); +} + TEST_F(MinidumpParserTest, MinidumpModuleOrder) { ASSERT_THAT_ERROR(SetUpFromYaml(R"( --- !minidump @@ -721,4 +845,3 @@ parser->GetMinidumpFile().getString(filtered_modules[1]->ModuleNameRVA), llvm::HasValue("/tmp/b")); } -