diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -970,6 +970,15 @@ Sections.end())); } + /// Return base address for the shared object or PIE based on the segment + /// mapping information. \p MMapAddress is an address where one of the + /// segments was mapped. \p FileOffset is the offset in the file of the + /// mapping. Note that \p FileOffset should be page-aligned and could be + /// different from the file offset of the segment which could be unaligned. + /// If no segment is found that matches \p FileOffset, return NoneType(). + Optional getBaseAddressForMapping(uint64_t MMapAddress, + uint64_t FileOffset) const; + /// Check if the address belongs to this binary's static allocation space. bool containsAddress(uint64_t Address) const { return Address >= FirstAllocAddress && Address < LayoutStartAddress; diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h --- a/bolt/include/bolt/Profile/DataAggregator.h +++ b/bolt/include/bolt/Profile/DataAggregator.h @@ -168,14 +168,15 @@ /// from the file name in BC. std::string BuildIDBinaryName; - /// Memory map info for a single file + /// Memory map info for a single file as recorded in perf.data struct MMapInfo { - uint64_t BaseAddress; - uint64_t Size; - uint64_t Offset; - int32_t PID{-1}; - bool Forked{false}; - uint64_t Time{0ULL}; // time in micro seconds + uint64_t BaseAddress{0}; /// Base address of the mapped binary. + uint64_t MMapAddress{0}; /// Address of the executable segment. + uint64_t Size{0}; /// Size of the mapping. + uint64_t Offset{0}; /// File offset of the mapped segment. + int32_t PID{-1}; /// Process ID. + bool Forked{false}; /// Was the process forked? + uint64_t Time{0ULL}; /// Time in micro seconds. }; /// Per-PID map info for the binary @@ -420,12 +421,8 @@ /// correspond to the binary allocated address space, are adjusted to avoid /// conflicts. void adjustAddress(uint64_t &Address, const MMapInfo &MMI) const { - if (Address >= MMI.BaseAddress && Address < MMI.BaseAddress + MMI.Size) { - // NOTE: Assumptions about the binary segment load table (PH for ELF) - // Segment file offset equals virtual address (which is true for .so) - // There aren't multiple executable segments loaded because MMapInfo - // doesn't support them. - Address -= MMI.BaseAddress - MMI.Offset; + if (Address >= MMI.MMapAddress && Address < MMI.MMapAddress + MMI.Size) { + Address -= MMI.BaseAddress; } else if (Address < MMI.Size) { // Make sure the address is not treated as belonging to the binary. Address = (-1ULL); diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -1690,6 +1690,22 @@ } } +Optional +BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress, + uint64_t FileOffset) const { + // Find a segment with a matching file offset. + for (auto &KV : SegmentMapInfo) { + const SegmentInfo &SegInfo = KV.second; + if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) == FileOffset) { + // Use segment's aligned memory offset to calculate the base address. + const uint64_t MemOffset = alignDown(SegInfo.Address, SegInfo.Alignment); + return MMapAddress - MemOffset; + } + } + + return NoneType(); +} + ErrorOr BinaryContext::getSectionForAddress(uint64_t Address) { auto SI = AddressToSection.upper_bound(Address); if (SI != AddressToSection.begin()) { diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -1943,7 +1943,7 @@ } const StringRef BaseAddressStr = Line.split('[').second.split('(').first; - if (BaseAddressStr.getAsInteger(0, ParsedInfo.BaseAddress)) { + if (BaseAddressStr.getAsInteger(0, ParsedInfo.MMapAddress)) { reportError("expected base address"); Diag << "Found: " << BaseAddressStr << "in '" << Line << "'\n"; return make_error_code(llvm::errc::io_error); @@ -2003,7 +2003,7 @@ dbgs() << "FileName -> mmap info:\n"; for (const std::pair &Pair : GlobalMMapInfo) dbgs() << " " << Pair.first << " : " << Pair.second.PID << " [0x" - << Twine::utohexstr(Pair.second.BaseAddress) << ", " + << Twine::utohexstr(Pair.second.MMapAddress) << ", " << Twine::utohexstr(Pair.second.Size) << " @ " << Twine::utohexstr(Pair.second.Offset) << "]\n"; }); @@ -2017,29 +2017,45 @@ auto Range = GlobalMMapInfo.equal_range(NameToUse); for (auto I = Range.first; I != Range.second; ++I) { - const MMapInfo &MMapInfo = I->second; - if (BC->HasFixedLoadAddress && MMapInfo.BaseAddress) { + MMapInfo &MMapInfo = I->second; + if (BC->HasFixedLoadAddress && MMapInfo.MMapAddress) { // Check that the binary mapping matches one of the segments. bool MatchFound = false; for (auto &KV : BC->SegmentMapInfo) { SegmentInfo &SegInfo = KV.second; - // The mapping is page-aligned and hence the BaseAddress could be + // The mapping is page-aligned and hence the MMapAddress could be // different from the segment start address. We cannot know the page // size of the mapping, but we know it should not exceed the segment // alignment value. Hence we are performing an approximate check. - if (SegInfo.Address >= MMapInfo.BaseAddress && - SegInfo.Address - MMapInfo.BaseAddress < SegInfo.Alignment) { + if (SegInfo.Address >= MMapInfo.MMapAddress && + SegInfo.Address - MMapInfo.MMapAddress < SegInfo.Alignment) { MatchFound = true; break; } } if (!MatchFound) { errs() << "PERF2BOLT-WARNING: ignoring mapping of " << NameToUse - << " at 0x" << Twine::utohexstr(MMapInfo.BaseAddress) << '\n'; + << " at 0x" << Twine::utohexstr(MMapInfo.MMapAddress) << '\n'; continue; } } + // Set base address for shared objects. + if (!BC->HasFixedLoadAddress) { + Optional BaseAddress = + BC->getBaseAddressForMapping(MMapInfo.MMapAddress, MMapInfo.Offset); + if (!BaseAddress) { + errs() << "PERF2BOLT-WARNING: unable to find base address of the " + "binary when memory mapped at 0x" + << Twine::utohexstr(MMapInfo.MMapAddress) + << " using file offset 0x" << Twine::utohexstr(MMapInfo.Offset) + << ". Ignoring profile data for this mapping\n"; + continue; + } else { + MMapInfo.BaseAddress = *BaseAddress; + } + } + BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo)); } @@ -2110,7 +2126,7 @@ LLVM_DEBUG({ for (std::pair &MMI : BinaryMMapInfo) outs() << " " << MMI.second.PID << (MMI.second.Forked ? " (forked)" : "") - << ": (0x" << Twine::utohexstr(MMI.second.BaseAddress) << ": 0x" + << ": (0x" << Twine::utohexstr(MMI.second.MMapAddress) << ": 0x" << Twine::utohexstr(MMI.second.Size) << ")\n"; }); diff --git a/bolt/unittests/Core/BinaryContext.cpp b/bolt/unittests/Core/BinaryContext.cpp new file mode 100644 --- /dev/null +++ b/bolt/unittests/Core/BinaryContext.cpp @@ -0,0 +1,85 @@ +#include "bolt/Core/BinaryContext.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Support/TargetSelect.h" +#include "gtest/gtest.h" + +using namespace llvm; +using namespace llvm::object; +using namespace llvm::ELF; +using namespace bolt; + +namespace { +struct BinaryContextTester : public testing::TestWithParam { + void SetUp() override { + initalizeLLVM(); + prepareElf(); + initializeBOLT(); + } + +protected: + void initalizeLLVM() { + llvm::InitializeAllTargetInfos(); + llvm::InitializeAllTargetMCs(); + llvm::InitializeAllAsmParsers(); + llvm::InitializeAllDisassemblers(); + llvm::InitializeAllTargets(); + llvm::InitializeAllAsmPrinters(); + } + + void prepareElf() { + memcpy(ElfBuf, "\177ELF", 4); + ELF64LE::Ehdr *EHdr = reinterpret_cast(ElfBuf); + EHdr->e_ident[llvm::ELF::EI_CLASS] = llvm::ELF::ELFCLASS64; + EHdr->e_ident[llvm::ELF::EI_DATA] = llvm::ELF::ELFDATA2LSB; + EHdr->e_machine = GetParam() == Triple::aarch64 ? EM_AARCH64 : EM_X86_64; + MemoryBufferRef Source(StringRef(ElfBuf, sizeof(ElfBuf)), "ELF"); + ObjFile = cantFail(ObjectFile::createObjectFile(Source)); + } + + void initializeBOLT() { + BC = cantFail(BinaryContext::createBinaryContext( + ObjFile.get(), true, DWARFContext::create(*ObjFile.get()))); + ASSERT_FALSE(!BC); + } + + char ElfBuf[sizeof(typename ELF64LE::Ehdr)] = {}; + std::unique_ptr ObjFile; + std::unique_ptr BC; +}; +} // namespace + +#ifdef X86_AVAILABLE + +INSTANTIATE_TEST_SUITE_P(X86, BinaryContextTester, + ::testing::Values(Triple::x86_64)); + +#endif + +#ifdef AARCH64_AVAILABLE + +INSTANTIATE_TEST_SUITE_P(AArch64, BinaryContextTester, + ::testing::Values(Triple::aarch64)); + +#endif + +TEST_P(BinaryContextTester, BaseAddress) { + // Check that base address calculation is correct for a binary with the + // following segment layout: + BC->SegmentMapInfo[0] = SegmentInfo{0, 0x10e8c2b4, 0, 0x10e8c2b4, 0x1000}; + BC->SegmentMapInfo[0x10e8d2b4] = + SegmentInfo{0x10e8d2b4, 0x3952faec, 0x10e8c2b4, 0x3952faec, 0x1000}; + BC->SegmentMapInfo[0x4a3bddc0] = + SegmentInfo{0x4a3bddc0, 0x148e828, 0x4a3bbdc0, 0x148e828, 0x1000}; + BC->SegmentMapInfo[0x4b84d5e8] = + SegmentInfo{0x4b84d5e8, 0x294f830, 0x4b84a5e8, 0x3d3820, 0x1000}; + + Optional BaseAddress = + BC->getBaseAddressForMapping(0x7f13f5556000, 0x10e8c000); + ASSERT_TRUE(BaseAddress.hasValue()); + ASSERT_EQ(*BaseAddress, 0x7f13e46c9000ULL); + + BaseAddress = BC->getBaseAddressForMapping(0x7f13f5556000, 0x137a000); + ASSERT_FALSE(BaseAddress.hasValue()); +} diff --git a/bolt/unittests/Core/CMakeLists.txt b/bolt/unittests/Core/CMakeLists.txt --- a/bolt/unittests/Core/CMakeLists.txt +++ b/bolt/unittests/Core/CMakeLists.txt @@ -8,6 +8,7 @@ ) add_bolt_unittest(CoreTests + BinaryContext.cpp MCPlusBuilder.cpp )