diff --git a/compiler-rt/include/CMakeLists.txt b/compiler-rt/include/CMakeLists.txt --- a/compiler-rt/include/CMakeLists.txt +++ b/compiler-rt/include/CMakeLists.txt @@ -23,6 +23,7 @@ if (COMPILER_RT_BUILD_MEMPROF) set(MEMPROF_HEADERS sanitizer/memprof_interface.h + profile/MemProfData.inc ) endif(COMPILER_RT_BUILD_MEMPROF) diff --git a/compiler-rt/include/profile/MemProfData.inc b/compiler-rt/include/profile/MemProfData.inc new file mode 100644 --- /dev/null +++ b/compiler-rt/include/profile/MemProfData.inc @@ -0,0 +1,61 @@ +#ifndef MEMPROF_DATA_INC +#define MEMPROF_DATA_INC +/*===-- MemProfData.inc - MemProf profiling runtime structures -*- C++ -*-=== *\ +|* +|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +|* See https://llvm.org/LICENSE.txt for license information. +|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +|* +\*===----------------------------------------------------------------------===*/ +/* + * This is the main file that defines all the data structure, signature, + * constant literals that are shared across profiling runtime library, + * and host tools (reader/writer). + * + * This file has two identical copies. The primary copy lives in LLVM and + * the other one sits in compiler-rt/include/profile directory. To make changes + * in this file, first modify the primary copy and copy it over to compiler-rt. + * Testing of any change in this file can start only after the two copies are + * synced up. + * +\*===----------------------------------------------------------------------===*/ + + +#ifdef _MSC_VER +#define PACKED(__decl__) __pragma(pack(push,1)) __decl__ __pragma(pack(pop)) +#else +#define PACKED(__decl__) __decl__ __attribute__((__packed__)) +#endif + +// A 64-bit magic number to uniquely identify the raw binary memprof profile file. +#define MEMPROF_RAW_MAGIC_64 \ + ((uint64_t)255 << 56 | (uint64_t)'m' << 48 | (uint64_t)'p' << 40 | (uint64_t)'r' << 32 | \ + (uint64_t)'o' << 24 | (uint64_t)'f' << 16 | (uint64_t)'r' << 8 | (uint64_t)129) + +// The version number of the raw binary format. +#define MEMPROF_RAW_VERSION 1ULL + +namespace llvm { +namespace memprof { +// A struct describing the header used for the raw binary memprof profile format. +PACKED(struct Header { + uint64_t Magic; + uint64_t Version; + uint64_t TotalSize; + uint64_t SegmentOffset; + uint64_t MIBOffset; + uint64_t StackOffset; +}); + +// A struct describing the information necessary to describe a /proc/maps +// segment entry for a particular binary/library identified by its build id. +PACKED(struct SegmentEntry { + uint64_t Start; + uint64_t End; + uint64_t Offset; + uint8_t BuildId[32]; +}); +} // namespace memprof +} // namespace llvm + +#endif diff --git a/compiler-rt/lib/memprof/CMakeLists.txt b/compiler-rt/lib/memprof/CMakeLists.txt --- a/compiler-rt/lib/memprof/CMakeLists.txt +++ b/compiler-rt/lib/memprof/CMakeLists.txt @@ -46,6 +46,7 @@ ) include_directories(..) +include_directories(../../include) set(MEMPROF_CFLAGS ${SANITIZER_COMMON_CFLAGS}) set(MEMPROF_COMMON_DEFINITIONS "") diff --git a/compiler-rt/lib/memprof/memprof_rawprofile.h b/compiler-rt/lib/memprof/memprof_rawprofile.h --- a/compiler-rt/lib/memprof/memprof_rawprofile.h +++ b/compiler-rt/lib/memprof/memprof_rawprofile.h @@ -5,17 +5,10 @@ #include "sanitizer_common/sanitizer_procmaps.h" namespace __memprof { - -// TODO: pull these in from MemProfData.inc -#define MEMPROF_RAW_MAGIC_64 \ - (u64)255 << 56 | (u64)'m' << 48 | (u64)'p' << 40 | (u64)'r' << 32 | \ - (u64)'o' << 24 | (u64)'f' << 16 | (u64)'r' << 8 | (u64)129 - -#define MEMPROF_RAW_VERSION 1ULL - +// Serialize the in-memory representation of the memprof profile to the raw +// binary format. The format itself is documented memprof_rawprofile.cpp. u64 SerializeToRawProfile(MIBMapTy &BlockCache, MemoryMappingLayoutBase &Layout, char *&Buffer); - } // namespace __memprof #endif // MEMPROF_RAWPROFILE_H_ diff --git a/compiler-rt/lib/memprof/memprof_rawprofile.cpp b/compiler-rt/lib/memprof/memprof_rawprofile.cpp --- a/compiler-rt/lib/memprof/memprof_rawprofile.cpp +++ b/compiler-rt/lib/memprof/memprof_rawprofile.cpp @@ -1,5 +1,10 @@ -#include "memprof_rawprofile.h" +#include +#include +#include + #include "memprof_meminfoblock.h" +#include "memprof_rawprofile.h" +#include "profile/MemProfData.inc" #include "sanitizer_common/sanitizer_allocator_internal.h" #include "sanitizer_common/sanitizer_linux.h" #include "sanitizer_common/sanitizer_procmaps.h" @@ -8,29 +13,12 @@ #include "sanitizer_common/sanitizer_stacktrace.h" #include "sanitizer_common/sanitizer_vector.h" -#include -#include - namespace __memprof { using ::__sanitizer::Vector; +using SegmentEntry = ::llvm::memprof::SegmentEntry; +using Header = ::llvm::memprof::Header; namespace { -typedef struct __attribute__((__packed__)) { - u64 start; - u64 end; - u64 offset; - u8 buildId[32]; -} SegmentEntry; - -typedef struct __attribute__((__packed__)) { - u64 magic; - u64 version; - u64 total_size; - u64 segment_offset; - u64 mib_offset; - u64 stack_offset; -} Header; - template char *WriteBytes(T Pod, char *&Buffer) { *(T *)Buffer = Pod; return Buffer + sizeof(T); @@ -76,12 +64,12 @@ for (Layout.Reset(); Layout.Next(&segment);) { if (segment.IsReadable() && segment.IsExecutable()) { - SegmentEntry entry{}; - entry.start = segment.start; - entry.end = segment.end; - entry.offset = segment.offset; - memcpy(entry.buildId, segment.uuid, sizeof(segment.uuid)); - memcpy(Ptr, &entry, sizeof(SegmentEntry)); + SegmentEntry Entry{}; + Entry.Start = segment.start; + Entry.End = segment.end; + Entry.Offset = segment.offset; + memcpy(Entry.BuildId, segment.uuid, sizeof(segment.uuid)); + memcpy(Ptr, &Entry, sizeof(SegmentEntry)); Ptr += sizeof(SegmentEntry); NumSegmentsRecorded++; } diff --git a/llvm/include/llvm/ProfileData/MemProfData.inc b/llvm/include/llvm/ProfileData/MemProfData.inc new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/ProfileData/MemProfData.inc @@ -0,0 +1,61 @@ +#ifndef MEMPROF_DATA_INC +#define MEMPROF_DATA_INC +/*===-- MemProfData.inc - MemProf profiling runtime structures -*- C++ -*-=== *\ +|* +|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +|* See https://llvm.org/LICENSE.txt for license information. +|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +|* +\*===----------------------------------------------------------------------===*/ +/* + * This is the main file that defines all the data structure, signature, + * constant literals that are shared across profiling runtime library, + * and host tools (reader/writer). + * + * This file has two identical copies. The primary copy lives in LLVM and + * the other one sits in compiler-rt/include/profile directory. To make changes + * in this file, first modify the primary copy and copy it over to compiler-rt. + * Testing of any change in this file can start only after the two copies are + * synced up. + * +\*===----------------------------------------------------------------------===*/ + + +#ifdef _MSC_VER +#define PACKED(__decl__) __pragma(pack(push,1)) __decl__ __pragma(pack(pop)) +#else +#define PACKED(__decl__) __decl__ __attribute__((__packed__)) +#endif + +// A 64-bit magic number to uniquely identify the raw binary memprof profile file. +#define MEMPROF_RAW_MAGIC_64 \ + ((uint64_t)255 << 56 | (uint64_t)'m' << 48 | (uint64_t)'p' << 40 | (uint64_t)'r' << 32 | \ + (uint64_t)'o' << 24 | (uint64_t)'f' << 16 | (uint64_t)'r' << 8 | (uint64_t)129) + +// The version number of the raw binary format. +#define MEMPROF_RAW_VERSION 1ULL + +namespace llvm { +namespace memprof { +// A struct describing the header used for the raw binary memprof profile format. +PACKED(struct Header { + uint64_t Magic; + uint64_t Version; + uint64_t TotalSize; + uint64_t SegmentOffset; + uint64_t MIBOffset; + uint64_t StackOffset; +}); + +// A struct describing the information necessary to describe a /proc/maps +// segment entry for a particular binary/library identified by its build id. +PACKED(struct SegmentEntry { + uint64_t Start; + uint64_t End; + uint64_t Offset; + uint8_t BuildId[32]; +}); +} // namespace memprof +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/ProfileData/RawMemProfReader.h b/llvm/include/llvm/ProfileData/RawMemProfReader.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/ProfileData/RawMemProfReader.h @@ -0,0 +1,43 @@ +#ifndef LLVM_PROFILEDATA_RAWMEMPROFREADER_H_ +#define LLVM_PROFILEDATA_RAWMEMPROFREADER_H_ +//===- MemProfReader.h - Instrumented memory profiling reader ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for reading MemProf profiling data. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Error.h" +#include "llvm/Support/MemoryBuffer.h" + +namespace llvm { +namespace memprof { + +class RawMemProfReader { +public: + RawMemProfReader(std::unique_ptr DataBuffer) + : DataBuffer(std::move(DataBuffer)) {} + // Prints aggregate counts for each raw profile parsed from the DataBuffer. + void printSummaries(raw_ostream &OS) const; + + // Return true if the \p DataBuffer starts with magic bytes indicating it is + // a raw binary memprof profile. + static bool hasFormat(const MemoryBuffer &DataBuffer); + + // Create a RawMemProfReader after sanity checking the contents of the file at + // \p Path. + static Expected> create(const Twine &Path); + +private: + std::unique_ptr DataBuffer; +}; + +} // namespace memprof +} // namespace llvm + +#endif // LLVM_PROFILEDATA_RAWMEMPROFREADER_H_ diff --git a/llvm/lib/ProfileData/CMakeLists.txt b/llvm/lib/ProfileData/CMakeLists.txt --- a/llvm/lib/ProfileData/CMakeLists.txt +++ b/llvm/lib/ProfileData/CMakeLists.txt @@ -7,6 +7,7 @@ SampleProf.cpp SampleProfReader.cpp SampleProfWriter.cpp + RawMemProfReader.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/ProfileData diff --git a/llvm/lib/ProfileData/RawMemProfReader.cpp b/llvm/lib/ProfileData/RawMemProfReader.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/ProfileData/RawMemProfReader.cpp @@ -0,0 +1,111 @@ +//===- RawMemProfReader.cpp - Instrumented memory profiling reader --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for reading MemProf profiling data. +// +//===----------------------------------------------------------------------===// + +#include + +#include "llvm/ProfileData/InstrProf.h" +#include "llvm/ProfileData/MemProfData.inc" +#include "llvm/ProfileData/RawMemProfReader.h" + +namespace llvm { +namespace memprof { +namespace { + +struct Summary { + uint64_t Version; + uint64_t TotalSizeBytes; + uint64_t NumSegments; + uint64_t NumMIBInfo; + uint64_t NumStackOffsets; +}; + +Summary computeSummary(const char *Start) { + auto *H = reinterpret_cast(Start); + + return Summary{ + H->Version, + H->TotalSize, + *reinterpret_cast(Start + H->SegmentOffset), + *reinterpret_cast(Start + H->MIBOffset), + *reinterpret_cast(Start + H->StackOffset), + }; +} + +} // namespace + +Expected> +RawMemProfReader::create(const Twine &Path) { + auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path, /*IsText=*/true); + if (std::error_code EC = BufferOr.getError()) + return errorCodeToError(EC); + + std::unique_ptr Buffer(BufferOr.get().release()); + + if (Buffer->getBufferSize() == 0) + return make_error(instrprof_error::empty_raw_profile); + + if (!RawMemProfReader::hasFormat(*Buffer)) + return make_error(instrprof_error::bad_magic); + + if (Buffer->getBufferSize() < sizeof(Header)) { + return make_error(instrprof_error::truncated); + } + + // The size of the buffer can be > header total size since we allow repeated + // serialization of memprof profiles to the same file. + uint64_t TotalSize = 0; + const char *Next = Buffer->getBufferStart(); + while (Next < Buffer->getBufferEnd()) { + auto *H = reinterpret_cast(Next); + if (H->Version != MEMPROF_RAW_VERSION) { + return make_error(instrprof_error::unsupported_version); + } + + TotalSize += H->TotalSize; + Next += H->TotalSize; + } + + if (Buffer->getBufferSize() != TotalSize) { + return make_error(instrprof_error::malformed); + } + + return std::make_unique(std::move(Buffer)); +} + +bool RawMemProfReader::hasFormat(const MemoryBuffer &Buffer) { + if (Buffer.getBufferSize() < sizeof(uint64_t)) + return false; + uint64_t Magic = *reinterpret_cast(Buffer.getBufferStart()); + return Magic == MEMPROF_RAW_MAGIC_64; +} + +void RawMemProfReader::printSummaries(raw_ostream &OS) const { + int Count = 0; + const char *Next = DataBuffer->getBufferStart(); + while (Next < DataBuffer->getBufferEnd()) { + auto Summary = computeSummary(Next); + OS << "MemProf Profile " << ++Count << "\n"; + OS << " Version: " << Summary.Version << "\n"; + OS << " TotalSizeBytes: " << Summary.TotalSizeBytes << "\n"; + OS << " NumSegments: " << Summary.NumSegments << "\n"; + OS << " NumMIBInfo: " << Summary.NumMIBInfo << "\n"; + OS << " NumStackOffsets: " << Summary.NumStackOffsets << "\n"; + // TODO: Print the build ids once we can record them using the + // sanitizer_procmaps library for linux. + + auto *H = reinterpret_cast(Next); + Next += H->TotalSize; + } +} + +} // namespace memprof +} // namespace llvm diff --git a/llvm/test/tools/llvm-profdata/Inputs/basic.memprofraw b/llvm/test/tools/llvm-profdata/Inputs/basic.memprofraw new file mode 100644 index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 GIT binary patch literal 0 Hc$@ +#include +int main(int argc, char **argv) { + char *x = (char *)malloc(10); + memset(x, 0, 10); + free(x); + x = (char *)malloc(10); + memset(x, 0, 10); + free(x); + return 0; +} +``` + +The following commands were used to compile the source to a memprof instrumented +executable and collect a raw binary format profile. Since the profile contains +virtual addresses for the callstack, we do not expect the raw binary profile to +be deterministic. The summary should be deterministic apart from changes to +the shared libraries linked in which could change the number of segments +recorded. + +``` +clang -fmemory-profile -mno-omit-leaf-frame-pointer -fno-omit-frame-pointer -fno-optimize-sibling-calls -gline-tables-only -m64 -Wl,-build-id source.c -o rawprofile.out + +env MEMPROF_OPTIONS=log_path=stdout ./rawprofile.out > basic.memprofraw +``` + +RUN: llvm-profdata show --memory %p/Inputs/basic.memprofraw -o - | FileCheck %s + +We expect 3 MIB entries, 1 each for the malloc calls in the program and one +additional entry from a realloc in glibc/libio/vasprintf.c. + +CHECK: MemProf Profile 1 +CHECK: Version: 1 +CHECK: TotalSizeBytes: 1012 +CHECK: NumSegments: 9 +CHECK: NumMIBInfo: 3 +CHECK: NumStackOffsets: 3 diff --git a/llvm/test/tools/llvm-profdata/memprof-multi.test b/llvm/test/tools/llvm-profdata/memprof-multi.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/memprof-multi.test @@ -0,0 +1,48 @@ +The input raw profile test has been generated from the following source code: + +``` +#include +#include +#include +int main(int argc, char **argv) { + char *x = (char *)malloc(10); + memset(x, 0, 10); + free(x); + __memprof_profile_dump(); + x = (char *)malloc(10); + memset(x, 0, 10); + free(x); + return 0; +} +``` + +The following commands were used to compile the source to a memprof instrumented +executable and collect a raw binary format profile. Since the profile contains +virtual addresses for the callstack, we do not expect the raw binary profile to +be deterministic. The summary should be deterministic apart from changes to +the shared libraries linked in which could change the number of segments +recorded. + +``` +clang -fmemory-profile -mno-omit-leaf-frame-pointer -fno-omit-frame-pointer -fno-optimize-sibling-calls -gline-tables-only -m64 -Wl,-build-id source.c -o rawprofile.out + +env MEMPROF_OPTIONS=log_path=stdout ./rawprofile.out > multi.memprofraw +``` + +RUN: llvm-profdata show --memory %p/Inputs/multi.memprofraw -o - | FileCheck %s + +We expect 2 MIB entries, 1 each for the malloc calls in the program. Unlike the +memprof-basic.test we do not see any allocation from glibc. + +CHECK: MemProf Profile 1 +CHECK: Version: 1 +CHECK: TotalSizeBytes: 864 +CHECK: NumSegments: 9 +CHECK: NumMIBInfo: 2 +CHECK: NumStackOffsets: 2 +CHECK: MemProf Profile 2 +CHECK: Version: 1 +CHECK: TotalSizeBytes: 864 +CHECK: NumSegments: 9 +CHECK: NumMIBInfo: 2 +CHECK: NumStackOffsets: 2 diff --git a/llvm/test/tools/llvm-profdata/text-format-errors.test b/llvm/test/tools/llvm-profdata/text-format-errors.test --- a/llvm/test/tools/llvm-profdata/text-format-errors.test +++ b/llvm/test/tools/llvm-profdata/text-format-errors.test @@ -25,7 +25,7 @@ 4- Detect binary input RUN: not llvm-profdata show %p/Inputs/text-format-errors.text.bin 2>&1 | FileCheck %s --check-prefix=BINARY BINARY: error: {{.+}}: unrecognized instrumentation profile encoding format -BINARY: Perhaps you forgot to use the --sample option? +BINARY: Perhaps you forgot to use the --sample or --memory option? 5- Detect malformed value profile data RUN: not llvm-profdata show %p/Inputs/vp-malform.proftext 2>&1 | FileCheck %s --check-prefix=VP diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -17,6 +17,7 @@ #include "llvm/ProfileData/InstrProfReader.h" #include "llvm/ProfileData/InstrProfWriter.h" #include "llvm/ProfileData/ProfileCommon.h" +#include "llvm/ProfileData/RawMemProfReader.h" #include "llvm/ProfileData/SampleProfReader.h" #include "llvm/ProfileData/SampleProfWriter.h" #include "llvm/Support/CommandLine.h" @@ -80,8 +81,8 @@ instrprof_error instrError = IPE.get(); StringRef Hint = ""; if (instrError == instrprof_error::unrecognized_format) { - // Hint for common error of forgetting --sample for sample profiles. - Hint = "Perhaps you forgot to use the --sample option?"; + // Hint in case user missed specifying the profile type. + Hint = "Perhaps you forgot to use the --sample or --memory option?"; } exitWithError(IPE.message(), std::string(Whence), std::string(Hint)); }); @@ -95,7 +96,7 @@ } namespace { -enum ProfileKinds { instr, sample }; +enum ProfileKinds { instr, sample, memory }; enum FailureMode { failIfAnyAreInvalid, failIfAllAreInvalid }; } @@ -2447,6 +2448,17 @@ return 0; } +static int showMemProfProfile(const std::string &Filename, raw_fd_ostream &OS) { + auto ReaderOr = llvm::memprof::RawMemProfReader::create(Filename); + if (Error E = ReaderOr.takeError()) + exitWithError(std::move(E), Filename); + + std::unique_ptr Reader( + ReaderOr.get().release()); + Reader->printSummaries(OS); + return 0; +} + static int show_main(int argc, const char *argv[]) { cl::opt Filename(cl::Positional, cl::Required, cl::desc("")); @@ -2487,7 +2499,8 @@ cl::opt ProfileKind( cl::desc("Profile kind:"), cl::init(instr), cl::values(clEnumVal(instr, "Instrumentation profile (default)"), - clEnumVal(sample, "Sample profile"))); + clEnumVal(sample, "Sample profile"), + clEnumVal(memory, "MemProf memory access profile"))); cl::opt TopNFunctions( "topn", cl::init(0), cl::desc("Show the list of functions with the largest internal counts")); @@ -2532,11 +2545,12 @@ ShowMemOPSizes, ShowDetailedSummary, DetailedSummaryCutoffs, ShowAllFunctions, ShowCS, ValueCutoff, OnlyListBelow, ShowFunction, TextFormat, ShowBinaryIds, OS); - else + if (ProfileKind == sample) return showSampleProfile(Filename, ShowCounts, TopNFunctions, ShowAllFunctions, ShowDetailedSummary, ShowFunction, ShowProfileSymbolList, ShowSectionInfoOnly, ShowHotFuncList, OS); + return showMemProfProfile(Filename, OS); } int main(int argc, const char *argv[]) {