diff --git a/llvm/include/llvm/ProfileData/RawMemProfReader.h b/llvm/include/llvm/ProfileData/RawMemProfReader.h --- a/llvm/include/llvm/ProfileData/RawMemProfReader.h +++ b/llvm/include/llvm/ProfileData/RawMemProfReader.h @@ -106,6 +106,8 @@ Error initialize(std::unique_ptr DataBuffer); // Read and parse the contents of the `DataBuffer` as a binary format profile. Error readRawProfile(std::unique_ptr DataBuffer); + // Initialize the segment mapping information for symbolization. + Error setupForSymbolization(); // Symbolize and cache all the virtual addresses we encounter in the // callstacks from the raw profile. Also prune callstack frames which we can't // symbolize or those that belong to the runtime. For profile entries where @@ -125,11 +127,21 @@ object::SectionedAddress getModuleOffset(uint64_t VirtualAddress); + // The profiled binary. object::OwningBinary Binary; + // A symbolizer to translate virtual addresses to code locations. std::unique_ptr Symbolizer; + // The preferred load address of the executable segment. + uint64_t PreferredTextSegmentAddress = 0; + // The base address of the text segment in the process during profiling. + uint64_t ProfiledTextSegmentStart = 0; + // The limit address of the text segment in the process during profiling. + uint64_t ProfiledTextSegmentEnd = 0; + + // The memory mapped segment information for all executable segments in the + // profiled binary (filtered from the raw profile using the build id). + llvm::SmallVector SegmentInfo; - // The contents of the raw profile. - llvm::SmallVector SegmentInfo; // A map from callstack id (same as key in CallStackMap below) to the heap // information recorded for that allocation context. llvm::MapVector CallstackProfileData; diff --git a/llvm/lib/ProfileData/RawMemProfReader.cpp b/llvm/lib/ProfileData/RawMemProfReader.cpp --- a/llvm/lib/ProfileData/RawMemProfReader.cpp +++ b/llvm/lib/ProfileData/RawMemProfReader.cpp @@ -24,13 +24,16 @@ #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h" #include "llvm/Object/Binary.h" +#include "llvm/Object/BuildID.h" #include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/ObjectFile.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/MemProf.h" #include "llvm/ProfileData/MemProfData.inc" #include "llvm/ProfileData/RawMemProfReader.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" #include "llvm/Support/Path.h" #define DEBUG_TYPE "memprof" @@ -270,17 +273,37 @@ auto* Elf64LEObject = llvm::cast(ElfObject); const llvm::object::ELF64LEFile& ElfFile = Elf64LEObject->getELFFile(); auto PHdrsOr = ElfFile.program_headers(); - if(!PHdrsOr) - return report(make_error(Twine("Could not read program headers: "), - inconvertibleErrorCode()), - FileName); - auto FirstLoadHeader = PHdrsOr->begin(); - while (FirstLoadHeader->p_type != llvm::ELF::PT_LOAD) - ++FirstLoadHeader; - if(FirstLoadHeader->p_vaddr == 0) - return report(make_error(Twine("Unsupported position independent code"), - inconvertibleErrorCode()), - FileName); + if (!PHdrsOr) + return report( + make_error(Twine("Could not read program headers: "), + inconvertibleErrorCode()), + FileName); + + int NumExecutableSegments = 0; + for (const auto &Phdr : *PHdrsOr) { + if (Phdr.p_type == ELF::PT_LOAD) { + if (Phdr.p_flags & ELF::PF_X) { + // We assume only one text segment in the main binary for simplicity and + // reduce the overhead of checking multiple ranges during symbolization. + if (++NumExecutableSegments > 1) { + return report( + make_error( + "Expect only one executable load segment in the binary", + inconvertibleErrorCode()), + FileName); + } + // Segment will always be loaded at a page boundary, expect it to be + // aligned already. Assume 4K pagesize for the machine from which the + // profile has been collected. This should be fine for now, in case we + // want to support other pagesizes it can be recorded in the raw profile + // during collection. + PreferredTextSegmentAddress = Phdr.p_vaddr; + assert(Phdr.p_vaddr == (Phdr.p_vaddr & ~(0x1000 - 1U)) && + "Expect p_vaddr to always be page aligned"); + assert(Phdr.p_offset == 0 && "Expect p_offset = 0 for symbolization."); + } + } + } auto Triple = ElfObject->makeTriple(); if (!Triple.isX86()) @@ -299,15 +322,51 @@ return report(SOFOr.takeError(), FileName); Symbolizer = std::move(SOFOr.get()); + // Process the raw profile. if (Error E = readRawProfile(std::move(DataBuffer))) return E; + if (Error E = setupForSymbolization()) + return E; + if (Error E = symbolizeAndFilterStackFrames()) return E; return mapRawProfileToRecords(); } +Error RawMemProfReader::setupForSymbolization() { + auto *Object = cast(Binary.getBinary()); + auto BuildIdOr = object::getBuildID(Object); + if (!BuildIdOr.has_value()) + return make_error(Twine("No build id found in binary ") + + Binary.getBinary()->getFileName(), + inconvertibleErrorCode()); + llvm::ArrayRef BinaryId = BuildIdOr.value(); + + int NumMatched = 0; + for (const auto &Entry : SegmentInfo) { + llvm::ArrayRef SegmentId(Entry.BuildId, Entry.BuildIdSize); + if (BinaryId == SegmentId) { + // We assume only one text segment in the main binary for simplicity and + // reduce the overhead of checking multiple ranges during symbolization. + if (++NumMatched > 1) { + return make_error( + "We expect only one executable segment in the profiled binary", + inconvertibleErrorCode()); + } + ProfiledTextSegmentStart = Entry.Start; + ProfiledTextSegmentEnd = Entry.End; + } + } + assert(NumMatched != 0 && "No matching executable segments in segment info."); + assert(PreferredTextSegmentAddress == 0 || + (PreferredTextSegmentAddress == ProfiledTextSegmentStart) && + "Expect text segment address to be 0 or equal to profiled text " + "segment start."); + return Error::success(); +} + Error RawMemProfReader::mapRawProfileToRecords() { // Hold a mapping from function to each callsite location we encounter within // it that is part of some dynamic allocation context. The location is stored @@ -516,20 +575,19 @@ object::SectionedAddress RawMemProfReader::getModuleOffset(const uint64_t VirtualAddress) { - LLVM_DEBUG({ - SegmentEntry *ContainingSegment = nullptr; - for (auto &SE : SegmentInfo) { - if (VirtualAddress > SE.Start && VirtualAddress <= SE.End) { - ContainingSegment = &SE; - } + if (VirtualAddress > ProfiledTextSegmentStart && + VirtualAddress <= ProfiledTextSegmentEnd) { + // For PIE binaries, the preferred address is zero and we adjust the virtual + // address by start of the profiled segment assuming that the offset of the + // segment in the binary is zero. For non-PIE binaries the preferred and + // profiled segment addresses should be equal and this is a no-op. + const uint64_t AdjustedAddress = + VirtualAddress + PreferredTextSegmentAddress - ProfiledTextSegmentStart; + return object::SectionedAddress{AdjustedAddress}; } - - // Ensure that the virtual address is valid. - assert(ContainingSegment && "Could not find a segment entry"); - }); - - // TODO: Compute the file offset based on the maps and program headers. For - // now this only works for non PIE binaries. + // Addresses which do not originate from the profiled text segment in the + // binary are not adjusted. These will fail symbolization and be filtered out + // during processing. return object::SectionedAddress{VirtualAddress}; } diff --git a/llvm/test/tools/llvm-profdata/memprof-pic.test b/llvm/test/tools/llvm-profdata/memprof-pic.test --- a/llvm/test/tools/llvm-profdata/memprof-pic.test +++ b/llvm/test/tools/llvm-profdata/memprof-pic.test @@ -7,6 +7,92 @@ recorded. To update the inputs used below run Inputs/update_memprof_inputs.sh /path/to/updated/clang -RUN: not llvm-profdata show --memory %p/Inputs/pic.memprofraw --profiled-binary %p/Inputs/pic.memprofexe -o - 2>&1 | FileCheck %s +RUN: llvm-profdata show --memory %p/Inputs/pic.memprofraw --profiled-binary %p/Inputs/pic.memprofexe -o - 2>&1 | FileCheck %s -CHECK: Unsupported position independent code +CHECK: MemprofProfile: +CHECK-NEXT: Summary: +CHECK-NEXT: Version: 3 +CHECK-NEXT: NumSegments: {{[0-9]+}} +CHECK-NEXT: NumMibInfo: 2 +CHECK-NEXT: NumAllocFunctions: 1 +CHECK-NEXT: NumStackOffsets: 2 +CHECK-NEXT: Segments: +CHECK-NEXT: - +CHECK-NEXT: BuildId: {{[[:xdigit:]]+}} +CHECK-NEXT: Start: 0x{{[[:xdigit:]]+}} +CHECK-NEXT: End: 0x{{[[:xdigit:]]+}} +CHECK-NEXT: Offset: 0x{{[[:xdigit:]]+}} +CHECK-NEXT: - + +CHECK: Records: +CHECK-NEXT: - +CHECK-NEXT: FunctionGUID: {{[0-9]+}} +CHECK-NEXT: AllocSites: +CHECK-NEXT: - +CHECK-NEXT: Callstack: +CHECK-NEXT: - +CHECK-NEXT: Function: {{[0-9]+}} +CHECK-NEXT: SymbolName: main +CHECK-NEXT: LineOffset: 1 +CHECK-NEXT: Column: 21 +CHECK-NEXT: Inline: 0 +CHECK-NEXT: MemInfoBlock: +CHECK-NEXT: AllocCount: 1 +CHECK-NEXT: TotalAccessCount: 2 +CHECK-NEXT: MinAccessCount: 2 +CHECK-NEXT: MaxAccessCount: 2 +CHECK-NEXT: TotalSize: 10 +CHECK-NEXT: MinSize: 10 +CHECK-NEXT: MaxSize: 10 +CHECK-NEXT: AllocTimestamp: {{[0-9]+}} +CHECK-NEXT: DeallocTimestamp: {{[0-9]+}} +CHECK-NEXT: TotalLifetime: 0 +CHECK-NEXT: MinLifetime: 0 +CHECK-NEXT: MaxLifetime: 0 +CHECK-NEXT: AllocCpuId: {{[0-9]+}} +CHECK-NEXT: DeallocCpuId: {{[0-9]+}} +CHECK-NEXT: NumMigratedCpu: 0 +CHECK-NEXT: NumLifetimeOverlaps: 0 +CHECK-NEXT: NumSameAllocCpu: 0 +CHECK-NEXT: NumSameDeallocCpu: 0 +CHECK-NEXT: DataTypeId: {{[0-9]+}} +CHECK-NEXT: TotalAccessDensity: 20 +CHECK-NEXT: MinAccessDensity: 20 +CHECK-NEXT: MaxAccessDensity: 20 +CHECK-NEXT: TotalLifetimeAccessDensity: 20000 +CHECK-NEXT: MinLifetimeAccessDensity: 20000 +CHECK-NEXT: MaxLifetimeAccessDensity: 20000 +CHECK-NEXT: - +CHECK-NEXT: Callstack: +CHECK-NEXT: - +CHECK-NEXT: Function: {{[0-9]+}} +CHECK-NEXT: SymbolName: main +CHECK-NEXT: LineOffset: 4 +CHECK-NEXT: Column: 15 +CHECK-NEXT: Inline: 0 +CHECK-NEXT: MemInfoBlock: +CHECK-NEXT: AllocCount: 1 +CHECK-NEXT: TotalAccessCount: 2 +CHECK-NEXT: MinAccessCount: 2 +CHECK-NEXT: MaxAccessCount: 2 +CHECK-NEXT: TotalSize: 10 +CHECK-NEXT: MinSize: 10 +CHECK-NEXT: MaxSize: 10 +CHECK-NEXT: AllocTimestamp: {{[0-9]+}} +CHECK-NEXT: DeallocTimestamp: {{[0-9]+}} +CHECK-NEXT: TotalLifetime: 0 +CHECK-NEXT: MinLifetime: 0 +CHECK-NEXT: MaxLifetime: 0 +CHECK-NEXT: AllocCpuId: {{[0-9]+}} +CHECK-NEXT: DeallocCpuId: {{[0-9]+}} +CHECK-NEXT: NumMigratedCpu: 0 +CHECK-NEXT: NumLifetimeOverlaps: 0 +CHECK-NEXT: NumSameAllocCpu: 0 +CHECK-NEXT: NumSameDeallocCpu: 0 +CHECK-NEXT: DataTypeId: {{[0-9]+}} +CHECK-NEXT: TotalAccessDensity: 20 +CHECK-NEXT: MinAccessDensity: 20 +CHECK-NEXT: MaxAccessDensity: 20 +CHECK-NEXT: TotalLifetimeAccessDensity: 20000 +CHECK-NEXT: MinLifetimeAccessDensity: 20000 +CHECK-NEXT: MaxLifetimeAccessDensity: 20000