diff --git a/compiler-rt/lib/memprof/CMakeLists.txt b/compiler-rt/lib/memprof/CMakeLists.txt --- a/compiler-rt/lib/memprof/CMakeLists.txt +++ b/compiler-rt/lib/memprof/CMakeLists.txt @@ -2,6 +2,7 @@ set(MEMPROF_SOURCES memprof_allocator.cpp + memprof_blockcache.cpp memprof_descriptions.cpp memprof_flags.cpp memprof_interceptors.cpp @@ -9,6 +10,7 @@ memprof_linux.cpp memprof_malloc_linux.cpp memprof_posix.cpp + memprof_rawprofile.cpp memprof_rtl.cpp memprof_shadow_setup.cpp memprof_stack.cpp @@ -26,6 +28,7 @@ SET(MEMPROF_HEADERS memprof_allocator.h + memprof_blockcache.h memprof_descriptions.h memprof_flags.h memprof_flags.inc @@ -35,6 +38,8 @@ memprof_interface_internal.h memprof_internal.h memprof_mapping.h + memprof_meminfoblock.h + memprof_rawprofile.h memprof_stack.h memprof_stats.h memprof_thread.h @@ -192,3 +197,8 @@ add_dependencies(memprof clang_rt.memprof-${arch}-symbols) endif() endforeach() + + +if(COMPILER_RT_INCLUDE_TESTS) + add_subdirectory(tests) +endif() diff --git a/compiler-rt/lib/memprof/memprof_allocator.cpp b/compiler-rt/lib/memprof/memprof_allocator.cpp --- a/compiler-rt/lib/memprof/memprof_allocator.cpp +++ b/compiler-rt/lib/memprof/memprof_allocator.cpp @@ -14,7 +14,10 @@ //===----------------------------------------------------------------------===// #include "memprof_allocator.h" +#include "memprof_blockcache.h" #include "memprof_mapping.h" +#include "memprof_meminfoblock.h" +#include "memprof_rawprofile.h" #include "memprof_stack.h" #include "memprof_thread.h" #include "sanitizer_common/sanitizer_allocator_checks.h" @@ -25,6 +28,7 @@ #include "sanitizer_common/sanitizer_flags.h" #include "sanitizer_common/sanitizer_internal_defs.h" #include "sanitizer_common/sanitizer_list.h" +#include "sanitizer_common/sanitizer_procmaps.h" #include "sanitizer_common/sanitizer_stackdepot.h" #include @@ -166,244 +170,6 @@ return &ms->allocator_cache; } -struct MemInfoBlock { - u32 alloc_count; - u64 total_access_count, min_access_count, max_access_count; - u64 total_size; - u32 min_size, max_size; - u32 alloc_timestamp, dealloc_timestamp; - u64 total_lifetime; - u32 min_lifetime, max_lifetime; - u32 alloc_cpu_id, dealloc_cpu_id; - u32 num_migrated_cpu; - - // Only compared to prior deallocated object currently. - u32 num_lifetime_overlaps; - u32 num_same_alloc_cpu; - u32 num_same_dealloc_cpu; - - u64 data_type_id; // TODO: hash of type name - - MemInfoBlock() : alloc_count(0) {} - - MemInfoBlock(u32 size, u64 access_count, u32 alloc_timestamp, - u32 dealloc_timestamp, u32 alloc_cpu, u32 dealloc_cpu) - : alloc_count(1), total_access_count(access_count), - min_access_count(access_count), max_access_count(access_count), - total_size(size), min_size(size), max_size(size), - alloc_timestamp(alloc_timestamp), dealloc_timestamp(dealloc_timestamp), - total_lifetime(dealloc_timestamp - alloc_timestamp), - min_lifetime(total_lifetime), max_lifetime(total_lifetime), - alloc_cpu_id(alloc_cpu), dealloc_cpu_id(dealloc_cpu), - num_lifetime_overlaps(0), num_same_alloc_cpu(0), - num_same_dealloc_cpu(0) { - num_migrated_cpu = alloc_cpu_id != dealloc_cpu_id; - } - - void Print(u64 id) { - u64 p; - if (flags()->print_terse) { - p = total_size * 100 / alloc_count; - Printf("MIB:%llu/%u/%d.%02d/%u/%u/", id, alloc_count, p / 100, p % 100, - min_size, max_size); - p = total_access_count * 100 / alloc_count; - Printf("%d.%02d/%u/%u/", p / 100, p % 100, min_access_count, - max_access_count); - p = total_lifetime * 100 / alloc_count; - Printf("%d.%02d/%u/%u/", p / 100, p % 100, min_lifetime, max_lifetime); - Printf("%u/%u/%u/%u\n", num_migrated_cpu, num_lifetime_overlaps, - num_same_alloc_cpu, num_same_dealloc_cpu); - } else { - p = total_size * 100 / alloc_count; - Printf("Memory allocation stack id = %llu\n", id); - Printf("\talloc_count %u, size (ave/min/max) %d.%02d / %u / %u\n", - alloc_count, p / 100, p % 100, min_size, max_size); - p = total_access_count * 100 / alloc_count; - Printf("\taccess_count (ave/min/max): %d.%02d / %u / %u\n", p / 100, - p % 100, min_access_count, max_access_count); - p = total_lifetime * 100 / alloc_count; - Printf("\tlifetime (ave/min/max): %d.%02d / %u / %u\n", p / 100, p % 100, - min_lifetime, max_lifetime); - Printf("\tnum migrated: %u, num lifetime overlaps: %u, num same alloc " - "cpu: %u, num same dealloc_cpu: %u\n", - num_migrated_cpu, num_lifetime_overlaps, num_same_alloc_cpu, - num_same_dealloc_cpu); - } - } - - static void printHeader() { - CHECK(flags()->print_terse); - Printf("MIB:StackID/AllocCount/AveSize/MinSize/MaxSize/AveAccessCount/" - "MinAccessCount/MaxAccessCount/AveLifetime/MinLifetime/MaxLifetime/" - "NumMigratedCpu/NumLifetimeOverlaps/NumSameAllocCpu/" - "NumSameDeallocCpu\n"); - } - - void Merge(MemInfoBlock &newMIB) { - alloc_count += newMIB.alloc_count; - - total_access_count += newMIB.total_access_count; - min_access_count = Min(min_access_count, newMIB.min_access_count); - max_access_count = Max(max_access_count, newMIB.max_access_count); - - total_size += newMIB.total_size; - min_size = Min(min_size, newMIB.min_size); - max_size = Max(max_size, newMIB.max_size); - - total_lifetime += newMIB.total_lifetime; - min_lifetime = Min(min_lifetime, newMIB.min_lifetime); - max_lifetime = Max(max_lifetime, newMIB.max_lifetime); - - // We know newMIB was deallocated later, so just need to check if it was - // allocated before last one deallocated. - num_lifetime_overlaps += newMIB.alloc_timestamp < dealloc_timestamp; - alloc_timestamp = newMIB.alloc_timestamp; - dealloc_timestamp = newMIB.dealloc_timestamp; - - num_same_alloc_cpu += alloc_cpu_id == newMIB.alloc_cpu_id; - num_same_dealloc_cpu += dealloc_cpu_id == newMIB.dealloc_cpu_id; - alloc_cpu_id = newMIB.alloc_cpu_id; - dealloc_cpu_id = newMIB.dealloc_cpu_id; - } -}; - -static u32 AccessCount = 0; -static u32 MissCount = 0; - -struct SetEntry { - SetEntry() : id(0), MIB() {} - bool Empty() { return id == 0; } - void Print() { - CHECK(!Empty()); - MIB.Print(id); - } - // The stack id - u64 id; - MemInfoBlock MIB; -}; - -struct CacheSet { - enum { kSetSize = 4 }; - - void PrintAll() { - for (int i = 0; i < kSetSize; i++) { - if (Entries[i].Empty()) - continue; - Entries[i].Print(); - } - } - void insertOrMerge(u64 new_id, MemInfoBlock &newMIB) { - AccessCount++; - SetAccessCount++; - - for (int i = 0; i < kSetSize; i++) { - auto id = Entries[i].id; - // Check if this is a hit or an empty entry. Since we always move any - // filled locations to the front of the array (see below), we don't need - // to look after finding the first empty entry. - if (id == new_id || !id) { - if (id == 0) { - Entries[i].id = new_id; - Entries[i].MIB = newMIB; - } else { - Entries[i].MIB.Merge(newMIB); - } - // Assuming some id locality, we try to swap the matching entry - // into the first set position. - if (i != 0) { - auto tmp = Entries[0]; - Entries[0] = Entries[i]; - Entries[i] = tmp; - } - return; - } - } - - // Miss - MissCount++; - SetMissCount++; - - // We try to find the entries with the lowest alloc count to be evicted: - int min_idx = 0; - u64 min_count = Entries[0].MIB.alloc_count; - for (int i = 1; i < kSetSize; i++) { - CHECK(!Entries[i].Empty()); - if (Entries[i].MIB.alloc_count < min_count) { - min_idx = i; - min_count = Entries[i].MIB.alloc_count; - } - } - - // Print the evicted entry profile information - if (!flags()->print_terse) - Printf("Evicted:\n"); - Entries[min_idx].Print(); - - // Similar to the hit case, put new MIB in first set position. - if (min_idx != 0) - Entries[min_idx] = Entries[0]; - Entries[0].id = new_id; - Entries[0].MIB = newMIB; - } - - void PrintMissRate(int i) { - u64 p = SetAccessCount ? SetMissCount * 10000ULL / SetAccessCount : 0; - Printf("Set %d miss rate: %d / %d = %5d.%02d%%\n", i, SetMissCount, - SetAccessCount, p / 100, p % 100); - } - - SetEntry Entries[kSetSize]; - u32 SetAccessCount = 0; - u32 SetMissCount = 0; -}; - -struct MemInfoBlockCache { - MemInfoBlockCache() { - if (common_flags()->print_module_map) - DumpProcessMap(); - if (flags()->print_terse) - MemInfoBlock::printHeader(); - Sets = - (CacheSet *)malloc(sizeof(CacheSet) * flags()->mem_info_cache_entries); - Constructed = true; - } - - ~MemInfoBlockCache() { free(Sets); } - - void insertOrMerge(u64 new_id, MemInfoBlock &newMIB) { - u64 hv = new_id; - - // Use mod method where number of entries should be a prime close to power - // of 2. - hv %= flags()->mem_info_cache_entries; - - return Sets[hv].insertOrMerge(new_id, newMIB); - } - - void PrintAll() { - for (int i = 0; i < flags()->mem_info_cache_entries; i++) { - Sets[i].PrintAll(); - } - } - - void PrintMissRate() { - if (!flags()->print_mem_info_cache_miss_rate) - return; - u64 p = AccessCount ? MissCount * 10000ULL / AccessCount : 0; - Printf("Overall miss rate: %d / %d = %5d.%02d%%\n", MissCount, AccessCount, - p / 100, p % 100); - if (flags()->print_mem_info_cache_miss_rate_details) - for (int i = 0; i < flags()->mem_info_cache_entries; i++) - Sets[i].PrintMissRate(i); - } - - CacheSet *Sets; - // Flag when the Sets have been allocated, in case a deallocation is called - // very early before the static init of the Allocator and therefore this table - // have completed. - bool Constructed = false; -}; - // Accumulates the access count from the shadow for the given pointer and size. u64 GetShadowCount(uptr p, u32 size) { u64 *shadow = (u64 *)MEM_TO_SHADOW(p); @@ -458,13 +224,39 @@ bool destructing; // ------------------- Initialization ------------------------ - explicit Allocator(LinkerInitialized) : destructing(false) {} + explicit Allocator(LinkerInitialized) + : MemInfoBlockTable(flags()->mem_info_cache_entries), destructing(false) { + if (common_flags()->print_module_map) + DumpProcessMap(); + if (flags()->print_terse) + MemInfoBlock::printHeader(); + } - ~Allocator() { FinishAndPrint(); } + ~Allocator() { FinishAndWrite(); } - void FinishAndPrint() { - if (!flags()->print_terse) + void FinishAndWrite() { + InsertLiveBlocks(); + + if (flags()->print_text) { + WriteTextProfile(); + return; + } + + // Serialize the contents to a raw profile. Format documented in + // memprof_rawprofile.h. + char *Buffer = nullptr; + + MemoryMappingLayout Layout(true); + int BytesSerialized = + SerializeToRawProfile(MemInfoBlockTable, Layout, Buffer); + CHECK(Buffer && BytesSerialized && "could not serialize to buffer"); + report_file.Write(Buffer, BytesSerialized); + } + + void InsertLiveBlocks() { + if (flags()->print_text && !flags()->print_terse) Printf("Live on exit:\n"); + allocator.ForceLock(); allocator.ForEachChunk( [](uptr chunk, void *alloc) { @@ -480,14 +272,19 @@ MemInfoBlock newMIB(user_requested_size, c, m->timestamp_ms, curtime, m->cpu_id, GetCpuId()); ((Allocator *)alloc) - ->MemInfoBlockTable.insertOrMerge(m->alloc_context_id, newMIB); + ->MemInfoBlockTable.insertOrMerge(m->alloc_context_id, newMIB, + flags()->print_terse); }, this); allocator.ForceUnlock(); + } + void WriteTextProfile() { destructing = true; - MemInfoBlockTable.PrintMissRate(); - MemInfoBlockTable.PrintAll(); + if (flags()->print_mem_info_cache_miss_rate) + MemInfoBlockTable.PrintMissRate( + flags()->print_mem_info_cache_miss_rate_details); + MemInfoBlockTable.PrintAll(flags()->print_terse); StackDepotPrintAll(); } @@ -630,7 +427,8 @@ m->cpu_id, GetCpuId()); { SpinMutexLock l(&fallback_mutex); - MemInfoBlockTable.insertOrMerge(m->alloc_context_id, newMIB); + MemInfoBlockTable.insertOrMerge(m->alloc_context_id, newMIB, + flags()->print_terse); } } @@ -898,7 +696,7 @@ } int __memprof_profile_dump() { - instance.FinishAndPrint(); + instance.FinishAndWrite(); // In the future we may want to return non-zero if there are any errors // detected during the dumping process. return 0; diff --git a/compiler-rt/lib/memprof/memprof_blockcache.h b/compiler-rt/lib/memprof/memprof_blockcache.h new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/memprof/memprof_blockcache.h @@ -0,0 +1,58 @@ +#ifndef MEMPROF_BLOCKCACHE_H_ +#define MEMPROF_BLOCKCACHE_H_ + +#include "memprof_meminfoblock.h" +#include "sanitizer_common/sanitizer_internal_defs.h" + +#include + +namespace __memprof { + +struct SetEntry { + SetEntry() : id(0), MIB() {} + bool Empty() const { return id == 0; } + void Print(bool print_terse) { + CHECK(!Empty()); + MIB.Print(id, print_terse); + } + // The stack id + u64 id; + MemInfoBlock MIB; +} __attribute__((packed)); + +struct CacheSet { + enum { kSetSize = 4 }; + + void PrintAll(bool print_terse); + void insertOrMerge(u64 new_id, MemInfoBlock &newMIB, bool print_terse); + void PrintMissRate(int i); + + SetEntry Entries[kSetSize]; + u32 SetAccessCount = 0; + u32 SetMissCount = 0; +}; + +struct MemInfoBlockCache { + MemInfoBlockCache(u32 mem_info_cache_entries) + : kNumCacheEntries(mem_info_cache_entries) { + Sets = (CacheSet *)malloc(sizeof(CacheSet) * kNumCacheEntries); + Constructed = true; + } + + ~MemInfoBlockCache() { free(Sets); } + + void insertOrMerge(u64 new_id, MemInfoBlock &newMIB, bool print_terse); + void PrintAll(bool print_terse); + void PrintMissRate(bool print_details); + + CacheSet *Sets; + const int kNumCacheEntries; + // Flag when the Sets have been allocated, in case a deallocation is called + // very early before the static init of the Allocator and therefore this table + // have completed. + bool Constructed = false; +}; + +} // namespace __memprof + +#endif // MEMPROF_BLOCKCACHE_H_ diff --git a/compiler-rt/lib/memprof/memprof_blockcache.cpp b/compiler-rt/lib/memprof/memprof_blockcache.cpp new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/memprof/memprof_blockcache.cpp @@ -0,0 +1,104 @@ + +#include "memprof_blockcache.h" + +namespace __memprof { + +static u32 AccessCount = 0; +static u32 MissCount = 0; + +void CacheSet::PrintAll(bool print_terse) { + for (int i = 0; i < kSetSize; i++) { + if (Entries[i].Empty()) + continue; + Entries[i].Print(print_terse); + } +} + +void CacheSet::insertOrMerge(u64 new_id, MemInfoBlock &newMIB, + bool print_terse) { + AccessCount++; + SetAccessCount++; + + for (int i = 0; i < kSetSize; i++) { + auto id = Entries[i].id; + // Check if this is a hit or an empty entry. Since we always move any + // filled locations to the front of the array (see below), we don't need + // to look after finding the first empty entry. + if (id == new_id || !id) { + if (id == 0) { + Entries[i].id = new_id; + Entries[i].MIB = newMIB; + } else { + Entries[i].MIB.Merge(newMIB); + } + // Assuming some id locality, we try to swap the matching entry + // into the first set position. + if (i != 0) { + auto tmp = Entries[0]; + Entries[0] = Entries[i]; + Entries[i] = tmp; + } + return; + } + } + + // Miss + MissCount++; + SetMissCount++; + + // We try to find the entries with the lowest alloc count to be evicted: + int min_idx = 0; + u64 min_count = Entries[0].MIB.alloc_count; + for (int i = 1; i < kSetSize; i++) { + CHECK(!Entries[i].Empty()); + if (Entries[i].MIB.alloc_count < min_count) { + min_idx = i; + min_count = Entries[i].MIB.alloc_count; + } + } + + // Print the evicted entry profile information + if (print_terse) + Printf("Evicted:\n"); + Entries[min_idx].Print(print_terse); + + // Similar to the hit case, put new MIB in first set position. + if (min_idx != 0) + Entries[min_idx] = Entries[0]; + Entries[0].id = new_id; + Entries[0].MIB = newMIB; +} + +void CacheSet::PrintMissRate(int i) { + u64 p = SetAccessCount ? SetMissCount * 10000ULL / SetAccessCount : 0; + Printf("Set %d miss rate: %d / %d = %5d.%02d%%\n", i, SetMissCount, + SetAccessCount, p / 100, p % 100); +} + +void MemInfoBlockCache::insertOrMerge(u64 new_id, MemInfoBlock &newMIB, + bool print_terse) { + u64 hv = new_id; + + // Use mod method where number of entries should be a prime close to power + // of 2. + hv %= kNumCacheEntries; + + return Sets[hv].insertOrMerge(new_id, newMIB, print_terse); +} + +void MemInfoBlockCache::PrintAll(bool print_terse) { + for (int i = 0; i < kNumCacheEntries; i++) { + Sets[i].PrintAll(print_terse); + } +} + +void MemInfoBlockCache::PrintMissRate(bool print_details) { + u64 p = AccessCount ? MissCount * 10000ULL / AccessCount : 0; + Printf("Overall miss rate: %d / %d = %5d.%02d%%\n", MissCount, AccessCount, + p / 100, p % 100); + if (print_details) + for (int i = 0; i < kNumCacheEntries; i++) + Sets[i].PrintMissRate(i); +} + +} // namespace __memprof diff --git a/compiler-rt/lib/memprof/memprof_flags.inc b/compiler-rt/lib/memprof/memprof_flags.inc --- a/compiler-rt/lib/memprof/memprof_flags.inc +++ b/compiler-rt/lib/memprof/memprof_flags.inc @@ -35,8 +35,10 @@ "realloc(p, 0) is equivalent to free(p) by default (Same as the " "POSIX standard). If set to false, realloc(p, 0) will return a " "pointer to an allocated space which can not be used.") +MEMPROF_FLAG(bool, print_text, true, + "If set, prints memory profile in text format.") MEMPROF_FLAG(bool, print_terse, false, - "If set, prints memory profile in a terse format.") + "If set, prints memory profile in a terse format. Only applicable if print_text = true.") MEMPROF_FLAG( int, mem_info_cache_entries, 16381, diff --git a/compiler-rt/lib/memprof/memprof_meminfoblock.h b/compiler-rt/lib/memprof/memprof_meminfoblock.h new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/memprof/memprof_meminfoblock.h @@ -0,0 +1,113 @@ +#ifndef MEMPROF_MEMINFOBLOCK_H_ +#define MEMPROF_MEMINFOBLOCK_H_ + +#include "memprof_interface_internal.h" // For u32, u64 TODO: Move these out of the internal header. +#include "sanitizer_common/sanitizer_common.h" + +namespace __memprof { + +using __sanitizer::Printf; + +struct MemInfoBlock { + u32 alloc_count; + u64 total_access_count, min_access_count, max_access_count; + u64 total_size; + u32 min_size, max_size; + u32 alloc_timestamp, dealloc_timestamp; + u64 total_lifetime; + u32 min_lifetime, max_lifetime; + u32 alloc_cpu_id, dealloc_cpu_id; + u32 num_migrated_cpu; + + // Only compared to prior deallocated object currently. + u32 num_lifetime_overlaps; + u32 num_same_alloc_cpu; + u32 num_same_dealloc_cpu; + + u64 data_type_id; // TODO: hash of type name + + MemInfoBlock() : alloc_count(0) {} + + MemInfoBlock(u32 size, u64 access_count, u32 alloc_timestamp, + u32 dealloc_timestamp, u32 alloc_cpu, u32 dealloc_cpu) + : alloc_count(1), total_access_count(access_count), + min_access_count(access_count), max_access_count(access_count), + total_size(size), min_size(size), max_size(size), + alloc_timestamp(alloc_timestamp), dealloc_timestamp(dealloc_timestamp), + total_lifetime(dealloc_timestamp - alloc_timestamp), + min_lifetime(total_lifetime), max_lifetime(total_lifetime), + alloc_cpu_id(alloc_cpu), dealloc_cpu_id(dealloc_cpu), + num_lifetime_overlaps(0), num_same_alloc_cpu(0), + num_same_dealloc_cpu(0) { + num_migrated_cpu = alloc_cpu_id != dealloc_cpu_id; + } + + void Print(u64 id, bool print_terse) { + u64 p; + if (print_terse) { + p = total_size * 100 / alloc_count; + Printf("MIB:%llu/%u/%d.%02d/%u/%u/", id, alloc_count, p / 100, p % 100, + min_size, max_size); + p = total_access_count * 100 / alloc_count; + Printf("%d.%02d/%u/%u/", p / 100, p % 100, min_access_count, + max_access_count); + p = total_lifetime * 100 / alloc_count; + Printf("%d.%02d/%u/%u/", p / 100, p % 100, min_lifetime, max_lifetime); + Printf("%u/%u/%u/%u\n", num_migrated_cpu, num_lifetime_overlaps, + num_same_alloc_cpu, num_same_dealloc_cpu); + } else { + p = total_size * 100 / alloc_count; + Printf("Memory allocation stack id = %llu\n", id); + Printf("\talloc_count %u, size (ave/min/max) %d.%02d / %u / %u\n", + alloc_count, p / 100, p % 100, min_size, max_size); + p = total_access_count * 100 / alloc_count; + Printf("\taccess_count (ave/min/max): %d.%02d / %u / %u\n", p / 100, + p % 100, min_access_count, max_access_count); + p = total_lifetime * 100 / alloc_count; + Printf("\tlifetime (ave/min/max): %d.%02d / %u / %u\n", p / 100, p % 100, + min_lifetime, max_lifetime); + Printf("\tnum migrated: %u, num lifetime overlaps: %u, num same alloc " + "cpu: %u, num same dealloc_cpu: %u\n", + num_migrated_cpu, num_lifetime_overlaps, num_same_alloc_cpu, + num_same_dealloc_cpu); + } + } + + static void printHeader() { + Printf("MIB:StackID/AllocCount/AveSize/MinSize/MaxSize/AveAccessCount/" + "MinAccessCount/MaxAccessCount/AveLifetime/MinLifetime/MaxLifetime/" + "NumMigratedCpu/NumLifetimeOverlaps/NumSameAllocCpu/" + "NumSameDeallocCpu\n"); + } + + void Merge(MemInfoBlock &newMIB) { + alloc_count += newMIB.alloc_count; + + total_access_count += newMIB.total_access_count; + min_access_count = Min(min_access_count, newMIB.min_access_count); + max_access_count = Max(max_access_count, newMIB.max_access_count); + + total_size += newMIB.total_size; + min_size = Min(min_size, newMIB.min_size); + max_size = Max(max_size, newMIB.max_size); + + total_lifetime += newMIB.total_lifetime; + min_lifetime = Min(min_lifetime, newMIB.min_lifetime); + max_lifetime = Max(max_lifetime, newMIB.max_lifetime); + + // We know newMIB was deallocated later, so just need to check if it was + // allocated before last one deallocated. + num_lifetime_overlaps += newMIB.alloc_timestamp < dealloc_timestamp; + alloc_timestamp = newMIB.alloc_timestamp; + dealloc_timestamp = newMIB.dealloc_timestamp; + + num_same_alloc_cpu += alloc_cpu_id == newMIB.alloc_cpu_id; + num_same_dealloc_cpu += dealloc_cpu_id == newMIB.dealloc_cpu_id; + alloc_cpu_id = newMIB.alloc_cpu_id; + dealloc_cpu_id = newMIB.dealloc_cpu_id; + } +} __attribute__((packed)); + +} // namespace __memprof + +#endif // MEMPROF_MEMINFOBLOCK_H_ diff --git a/compiler-rt/lib/memprof/memprof_rawprofile.h b/compiler-rt/lib/memprof/memprof_rawprofile.h new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/memprof/memprof_rawprofile.h @@ -0,0 +1,21 @@ +#ifndef MEMPROF_RAWPROFILE_H_ +#define MEMPROF_RAWPROFILE_H_ + +#include "memprof_blockcache.h" +#include "sanitizer_common/sanitizer_procmaps.h" + +namespace __memprof { + +// TODO: pull these in from MemProfData.inc +#define MEMPROF_RAW_MAGIC_64 \ + (u64)255 << 56 | (u64)'m' << 48 | (u64)'p' << 40 | (u64)'r' << 32 | \ + (u64)'o' << 24 | (u64)'f' << 16 | (u64)'r' << 8 | (u64)129 + +#define MEMPROF_RAW_VERSION 1ULL + +int SerializeToRawProfile(const MemInfoBlockCache &BlockCache, + MemoryMappingLayout &Layout, char *&Buffer); + +} // namespace __memprof + +#endif // MEMPROF_RAWPROFILE_H_ diff --git a/compiler-rt/lib/memprof/memprof_rawprofile.cpp b/compiler-rt/lib/memprof/memprof_rawprofile.cpp new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/memprof/memprof_rawprofile.cpp @@ -0,0 +1,209 @@ +#include "memprof_rawprofile.h" +#include "memprof_blockcache.h" +#include "memprof_meminfoblock.h" +#include "sanitizer_common/sanitizer_linux.h" +#include "sanitizer_common/sanitizer_procmaps.h" +#include "sanitizer_common/sanitizer_stackdepot.h" +#include "sanitizer_common/sanitizer_stackdepotbase.h" +#include "sanitizer_common/sanitizer_stacktrace.h" + +#include +#include +#include + +namespace __memprof { + +namespace { +typedef struct __attribute__((__packed__)) { + u64 start; + u64 end; + u64 offset; + u8 buildId[32]; +} SegmentEntry; + +template char *WriteBytes(T Pod, char *&Buffer) { + static_assert(std::is_pod::value, "Must be a POD type."); + *(T *)Buffer = Pod; + return Buffer + sizeof(T); +} + +} // namespace + +int SerializeSegments(MemoryMappingLayout &Layout, char *&Buffer) { + u32 NumSegmentsToRecord = 0; + MemoryMappedSegment segment; + + for (Layout.Reset(); Layout.Next(&segment);) + if (segment.IsReadable() && segment.IsExecutable()) + NumSegmentsToRecord++; + + CHECK(Buffer == nullptr); + // Allocate a buffer + int NumBytesToWrite = + sizeof(u64) // A header which stores the number of records. + + sizeof(SegmentEntry) * NumSegmentsToRecord; + Buffer = (char *)malloc(NumBytesToWrite); + + char *Ptr = Buffer; + *((u64 *)Ptr) = NumSegmentsToRecord; + Ptr += sizeof(u64); + + for (Layout.Reset(); Layout.Next(&segment);) { + if (segment.IsReadable() && segment.IsExecutable()) { + SegmentEntry entry{segment.start, segment.end, segment.offset}; + memcpy(entry.buildId, segment.uuid, sizeof(segment.uuid)); + memcpy(Ptr, &entry, sizeof(SegmentEntry)); + Ptr += sizeof(SegmentEntry); + } + } + + return NumBytesToWrite; +} + +int SerializeStack(const std::set &StackIds, char *&Buffer) { + int NumBytesToWrite = sizeof(u64); + + for (const u64 Id : StackIds) { + // One entry for the id and then one more for the number of stack pcs. + NumBytesToWrite += 2 * sizeof(u64); + const StackTrace St = StackDepotGet(Id); + + CHECK(St.trace != nullptr && St.size > 0 && "Empty stack trace"); + for (uptr i = 0; i < St.size && St.trace[i] != 0; i++) { + NumBytesToWrite += sizeof(u64); + } + } + + Buffer = (char *)malloc(NumBytesToWrite); + char *Ptr = Buffer; + Ptr = WriteBytes(static_cast(StackIds.size()), Ptr); + + for (const u64 Id : StackIds) { + Ptr = WriteBytes(Id, Ptr); + Ptr += sizeof(u64); // Bump it by u64, we will fill this in later. + u64 Count = 0; + const StackTrace St = StackDepotGet(Id); + for (uptr i = 0; i < St.size && St.trace[i] != 0; i++) { + // PCs in stack traces are actually the return addresses, that is, + // addresses of the next instructions after the call. + uptr pc = StackTrace::GetPreviousInstructionPc(St.trace[i]); + ++Count; + Ptr = WriteBytes(static_cast(pc), Ptr); + } + // Store the count in the space we reserved earlier. + *(u64 *)(Ptr - (Count + 1) * sizeof(Count)) = Count; + } + + return NumBytesToWrite; +} + +int SerializeMIBInfo(const MemInfoBlockCache &BlockCache, + std::set &StackIds, char *&Buffer) { + int NumBytesToWrite = sizeof(u64); + + u64 NumEntries = 0; + for (int i = 0; i < BlockCache.kNumCacheEntries; i++) { + const CacheSet &Set = BlockCache.Sets[i]; + for (int j = 0; j < CacheSet::kSetSize; j++) { + if (Set.Entries[j].Empty()) + continue; + NumBytesToWrite += sizeof(SetEntry); + ++NumEntries; + } + } + + Buffer = (char *)malloc(NumBytesToWrite); + char *Ptr = Buffer; + Ptr = WriteBytes(NumEntries, Ptr); + + for (int i = 0; i < BlockCache.kNumCacheEntries; i++) { + const CacheSet &Set = BlockCache.Sets[i]; + for (int j = 0; j < CacheSet::kSetSize; j++) { + if (Set.Entries[j].Empty()) + continue; + *(SetEntry *)Ptr = Set.Entries[j]; + Ptr += sizeof(SetEntry); + StackIds.insert(Set.Entries[j].id); + } + } + + return NumBytesToWrite; +} + +// Format +// ---------- Header +// Magic +// Version +// Total Size +// Segment Offset +// MIB Info Offset +// Stack Offset +// ---------- Segment Info +// Num Entries +// ---------- Segment Entry +// Start +// End +// Offset +// BuildID 32B +// ---------- +// ... +// ---------- MIB Info +// Num Entries +// ---------- MIB Entry +// Alloc Count +// ... +// ---------- Stack Info +// Num Entries +// ---------- Stack Entry +// Num Stacks +// PC1 +// PC2 +// ... +// ---------- +// ... +int SerializeToRawProfile(const MemInfoBlockCache &BlockCache, + MemoryMappingLayout &Layout, char *&Buffer) { + char *SegmentBuffer = nullptr; + int NumSegmentBytes = SerializeSegments(Layout, SegmentBuffer); + + char *MIBInfoBuffer = nullptr; + std::set StackIds; + int NumMIBInfoBytes = SerializeMIBInfo(BlockCache, StackIds, MIBInfoBuffer); + + char *StackBuffer = nullptr; + int NumStackBytes = SerializeStack(StackIds, StackBuffer); + + // Compute how many bytes we need for the header. + int NumBytesHeader = sizeof(MEMPROF_RAW_MAGIC_64) + + sizeof(MEMPROF_RAW_VERSION) + + 4 * sizeof(u64); // Offset into Segment and Stack + // sections, MIB and total size. + + int TotalSizeBytes = + NumBytesHeader + NumSegmentBytes + NumStackBytes + NumMIBInfoBytes; + + // Allocate the memory for the entire buffer incl. info blocks. + Buffer = (char *)malloc(TotalSizeBytes); + char *Ptr = Buffer; + + // Write Header + Ptr = WriteBytes(MEMPROF_RAW_MAGIC_64, Ptr); + Ptr = WriteBytes(MEMPROF_RAW_VERSION, Ptr); + Ptr = WriteBytes(static_cast(TotalSizeBytes), Ptr); + Ptr = WriteBytes(static_cast(NumBytesHeader), Ptr); + Ptr = WriteBytes(static_cast(NumBytesHeader + NumSegmentBytes), Ptr); + Ptr = WriteBytes( + static_cast(NumBytesHeader + NumSegmentBytes + NumMIBInfoBytes), + Ptr); + + memcpy(Ptr, SegmentBuffer, NumSegmentBytes); + Ptr += NumSegmentBytes; + memcpy(Ptr, MIBInfoBuffer, NumMIBInfoBytes); + Ptr += NumMIBInfoBytes; + memcpy(Ptr, StackBuffer, NumStackBytes); + // We don't need Ptr anymore.. just return the total size. + + return TotalSizeBytes; +} + +} // namespace __memprof diff --git a/compiler-rt/lib/memprof/tests/CMakeLists.txt b/compiler-rt/lib/memprof/tests/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/memprof/tests/CMakeLists.txt @@ -0,0 +1,49 @@ +include(CompilerRTCompile) + +set(MEMPROF_UNITTEST_CFLAGS + ${COMPILER_RT_UNITTEST_CFLAGS} + ${COMPILER_RT_GTEST_CFLAGS} + ${COMPILER_RT_GMOCK_CFLAGS} + -I${COMPILER_RT_SOURCE_DIR}/lib/ + -O2 + -g + -fno-rtti + -fno-omit-frame-pointer) + +file(GLOB MEMPROF_HEADERS ../*.h) + +set(MEMPROF_SOURCES + ../memprof_blockcache.cpp + ../memprof_rawprofile.cpp) + +set(MEMPROF_UNITTESTS + rawprofile.cpp + driver.cpp) + +set(MEMPROF_UNIT_TEST_HEADERS + ${MEMPROF_HEADERS}) + +if(NOT WIN32) + list(APPEND MEMPROF_UNITTEST_LINK_FLAGS -pthread) +endif() + +if(COMPILER_RT_DEFAULT_TARGET_ARCH IN_LIST MEMPROF_SUPPORTED_ARCH) + # MemProf unit tests are only run on the host machine. + set(arch ${COMPILER_RT_DEFAULT_TARGET_ARCH}) + + add_executable(MemProfUnitTests + ${MEMPROF_UNITTESTS} + ${COMPILER_RT_GTEST_SOURCE} + ${COMPILER_RT_GMOCK_SOURCE} + ${MEMPROF_SOURCES} + $ + $ + $ + $) + set_target_compile_flags(MemProfUnitTests ${MEMPROF_UNITTEST_CFLAGS}) + set_target_link_flags(MemProfUnitTests ${MEMPROF_UNITTEST_LINK_FLAGS}) + target_link_libraries(MemProfUnitTests dl) + + set_target_properties(MemProfUnitTests PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) +endif() diff --git a/compiler-rt/lib/memprof/tests/driver.cpp b/compiler-rt/lib/memprof/tests/driver.cpp new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/memprof/tests/driver.cpp @@ -0,0 +1,14 @@ +//===-- driver.cpp ----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "gtest/gtest.h" + +int main(int argc, char **argv) { + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/compiler-rt/lib/memprof/tests/rawprofile.cpp b/compiler-rt/lib/memprof/tests/rawprofile.cpp new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/memprof/tests/rawprofile.cpp @@ -0,0 +1,157 @@ +#include "memprof/memprof_blockcache.h" +#include "memprof/memprof_meminfoblock.h" +#include "memprof/memprof_rawprofile.h" +#include "sanitizer_common/sanitizer_common.h" +#include "sanitizer_common/sanitizer_procmaps.h" +#include "sanitizer_common/sanitizer_stackdepot.h" +#include "sanitizer_common/sanitizer_stacktrace.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +#include + +namespace { + +using ::__memprof::MemInfoBlock; +using ::__memprof::MemInfoBlockCache; +using ::__memprof::SerializeToRawProfile; +using ::__memprof::SetEntry; +using ::__sanitizer::MemoryMappedSegment; +using ::__sanitizer::MemoryMappingLayout; +using ::__sanitizer::StackDepotPut; +using ::__sanitizer::StackTrace; +using ::testing::_; +using ::testing::Action; +using ::testing::DoAll; +using ::testing::Return; +using ::testing::SetArgPointee; + +class MockMemoryMappingLayout : public MemoryMappingLayout { +public: + MOCK_METHOD(bool, Next, (MemoryMappedSegment *), (override)); + MOCK_METHOD(void, Reset, ()); + MockMemoryMappingLayout() : MemoryMappingLayout(false){}; +}; + +std::unique_ptr MakeFakeCache() { + auto FakeCache = + std::make_unique(/*mem_info_cache_entries=*/1); + MemInfoBlock FakeMIB; + memset(&FakeMIB, 0, sizeof(MemInfoBlock)); + FakeMIB.alloc_count = 0x1; + FakeMIB.total_access_count = 0x2; + + // Create a fake stack trace, start from 2 since we deduct 1 to get prior pc + // address. + uptr array[] = {2, 3, 4, 5, 6}; + StackTrace St(array, ARRAY_SIZE(array)); + u32 Id = StackDepotPut(St); + + FakeCache->insertOrMerge(Id, FakeMIB, /*print_terse=*/false); + return FakeCache; +} + +template T Read(char *&Buffer) { + static_assert(std::is_pod::value, "Must be a POD type."); + T t = *reinterpret_cast(Buffer); + Buffer += sizeof(T); + return t; +} + +TEST(MemProf, Basic) { + MockMemoryMappingLayout Layout; + MemoryMappedSegment FakeSegment; + memset(&FakeSegment, 0, sizeof(FakeSegment)); + FakeSegment.start = 0x10; + FakeSegment.end = 0x20; + FakeSegment.offset = 0x20; + uint8_t uuid[__sanitizer::kModuleUUIDSize] = {0xC, 0x0, 0xF, 0xF, 0xE, 0xE}; + memcpy(FakeSegment.uuid, uuid, __sanitizer::kModuleUUIDSize); + FakeSegment.protection = + __sanitizer::kProtectionExecute | __sanitizer::kProtectionRead; + + const Action SetSegment = + DoAll(SetArgPointee<0>(FakeSegment), Return(true)); + EXPECT_CALL(Layout, Next(_)) + .WillOnce(SetSegment) + .WillOnce(Return(false)) + .WillOnce(SetSegment) + .WillRepeatedly(Return(false)); + + EXPECT_CALL(Layout, Reset).Times(2); + + auto FakeCache = MakeFakeCache(); + char *Ptr = nullptr; + int NumBytes = SerializeToRawProfile(*FakeCache, Layout, Ptr); + const char *Buffer = Ptr; + + ASSERT_GT(NumBytes, 0); + ASSERT_TRUE(Ptr); + + // Check the header. + EXPECT_THAT(Read(Ptr), MEMPROF_RAW_MAGIC_64); + EXPECT_THAT(Read(Ptr), MEMPROF_RAW_VERSION); + const u64 TotalSize = Read(Ptr); + const u64 SegmentOffset = Read(Ptr); + const u64 MIBOffset = Read(Ptr); + const u64 StackOffset = Read(Ptr); + + // ============= Check sizes. + + EXPECT_GT(TotalSize, 0ULL); + + EXPECT_GT(SegmentOffset, 0ULL); + // We expect only 1 segment entry, 8b for the count and 56b for SegmentEntry + // in memprof_rawprofile.cpp. + EXPECT_EQ(MIBOffset - SegmentOffset, 64); + + EXPECT_GT(MIBOffset, 0ULL); + // We expect only 1 mib entry, 8b for the count and sizeof(SetEntry) contains + // stack id + MeminfoBlock. + EXPECT_EQ(StackOffset - MIBOffset, 8 + sizeof(SetEntry)); + + EXPECT_GT(StackOffset, 0ULL); + // We expect only 1 stack entry, with 5 frames - 8b for count, 8b for id, 8b + // for frame count and 5*8b for fake frame. + EXPECT_EQ(TotalSize - StackOffset, 8 + 8 + 8 + 5 * 8); + + // ============= Check contents. + unsigned char ExpectedSegmentBytes[64] = { + 0x01, 0, 0, 0, 0, 0, 0, 0, // Number of entries + 0x10, 0, 0, 0, 0, 0, 0, 0, // Start + 0x20, 0, 0, 0, 0, 0, 0, 0, // End + 0x20, 0, 0, 0, 0, 0, 0, 0, // Offset + 0x0C, 0x0, 0xF, 0xF, 0xE, 0xE, // Uuid + }; + EXPECT_EQ(memcmp(Buffer + SegmentOffset, ExpectedSegmentBytes, 64), 0); + + // Check that the number of entries is 1. + EXPECT_EQ(*reinterpret_cast(Buffer + MIBOffset), 1ULL); + // Check that stack id is set. + EXPECT_NE(*reinterpret_cast(Buffer + MIBOffset + 8), 0ULL); + unsigned char ExpectedMIBBytes[sizeof(MemInfoBlock)] = { + 0x01, 0, 0, 0, // Alloc count + 0x02, 0, 0, 0, // Total access count + }; + // Compare contents after skipping count and stack id. + EXPECT_EQ( + memcmp(Buffer + MIBOffset + 16, ExpectedMIBBytes, sizeof(MemInfoBlock)), + 0); + + // Check that the number of entries is 1. + EXPECT_EQ(*reinterpret_cast(Buffer + StackOffset), 1ULL); + // Check that the stack id is not null. + EXPECT_NE(*reinterpret_cast(Buffer + StackOffset + 8), 0ULL); + // Contents are num pcs, value of each pc - 1. + unsigned char ExpectedStackBytes[6 * 8] = { + 0x5, 0, 0, 0, 0, 0, 0, 0, // Number of PCs + 0x1, 0, 0, 0, 0, 0, 0, 0, // PC ... + 0x2, 0, 0, 0, 0, 0, 0, 0, 0x3, 0, 0, 0, 0, 0, 0, 0, + 0x4, 0, 0, 0, 0, 0, 0, 0, 0x5, 0, 0, 0, 0, 0, 0, 0, + }; + EXPECT_EQ(memcmp(Buffer + StackOffset + 16, ExpectedStackBytes, + sizeof(ExpectedStackBytes)), + 0); +} + +} // namespace diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps.h b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps.h --- a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps.h @@ -69,9 +69,9 @@ public: explicit MemoryMappingLayout(bool cache_enabled); ~MemoryMappingLayout(); - bool Next(MemoryMappedSegment *segment); + virtual bool Next(MemoryMappedSegment *segment); bool Error() const; - void Reset(); + virtual void Reset(); // In some cases, e.g. when running under a sandbox on Linux, ASan is unable // to obtain the memory mappings. It should fall back to pre-cached data // instead of aborting.