diff --git a/compiler-rt/lib/memprof/CMakeLists.txt b/compiler-rt/lib/memprof/CMakeLists.txt --- a/compiler-rt/lib/memprof/CMakeLists.txt +++ b/compiler-rt/lib/memprof/CMakeLists.txt @@ -8,6 +8,7 @@ memprof_interceptors_memintrinsics.cpp memprof_linux.cpp memprof_malloc_linux.cpp + memprof_mibmap.cpp memprof_posix.cpp memprof_rtl.cpp memprof_shadow_setup.cpp @@ -35,6 +36,8 @@ memprof_interface_internal.h memprof_internal.h memprof_mapping.h + memprof_meminfoblock.h + memprof_mibmap.h memprof_stack.h memprof_stats.h memprof_thread.h diff --git a/compiler-rt/lib/memprof/memprof_allocator.cpp b/compiler-rt/lib/memprof/memprof_allocator.cpp --- a/compiler-rt/lib/memprof/memprof_allocator.cpp +++ b/compiler-rt/lib/memprof/memprof_allocator.cpp @@ -15,6 +15,8 @@ #include "memprof_allocator.h" #include "memprof_mapping.h" +#include "memprof_meminfoblock.h" +#include "memprof_mibmap.h" #include "memprof_stack.h" #include "memprof_thread.h" #include "sanitizer_common/sanitizer_allocator_checks.h" @@ -28,7 +30,6 @@ #include "sanitizer_common/sanitizer_stackdepot.h" #include -#include #include namespace __memprof { @@ -166,247 +167,6 @@ return &ms->allocator_cache; } -struct MemInfoBlock { - u32 alloc_count; - u64 total_access_count, min_access_count, max_access_count; - u64 total_size; - u32 min_size, max_size; - u32 alloc_timestamp, dealloc_timestamp; - u64 total_lifetime; - u32 min_lifetime, max_lifetime; - u32 alloc_cpu_id, dealloc_cpu_id; - u32 num_migrated_cpu; - - // Only compared to prior deallocated object currently. - u32 num_lifetime_overlaps; - u32 num_same_alloc_cpu; - u32 num_same_dealloc_cpu; - - u64 data_type_id; // TODO: hash of type name - - MemInfoBlock() : alloc_count(0) {} - - MemInfoBlock(u32 size, u64 access_count, u32 alloc_timestamp, - u32 dealloc_timestamp, u32 alloc_cpu, u32 dealloc_cpu) - : alloc_count(1), total_access_count(access_count), - min_access_count(access_count), max_access_count(access_count), - total_size(size), min_size(size), max_size(size), - alloc_timestamp(alloc_timestamp), dealloc_timestamp(dealloc_timestamp), - total_lifetime(dealloc_timestamp - alloc_timestamp), - min_lifetime(total_lifetime), max_lifetime(total_lifetime), - alloc_cpu_id(alloc_cpu), dealloc_cpu_id(dealloc_cpu), - num_lifetime_overlaps(0), num_same_alloc_cpu(0), - num_same_dealloc_cpu(0) { - num_migrated_cpu = alloc_cpu_id != dealloc_cpu_id; - } - - void Print(u64 id) { - u64 p; - if (flags()->print_terse) { - p = total_size * 100 / alloc_count; - Printf("MIB:%llu/%u/%llu.%02llu/%u/%u/", id, alloc_count, p / 100, p % 100, - min_size, max_size); - p = total_access_count * 100 / alloc_count; - Printf("%llu.%02llu/%llu/%llu/", p / 100, p % 100, min_access_count, - max_access_count); - p = total_lifetime * 100 / alloc_count; - Printf("%llu.%02llu/%u/%u/", p / 100, p % 100, min_lifetime, max_lifetime); - Printf("%u/%u/%u/%u\n", num_migrated_cpu, num_lifetime_overlaps, - num_same_alloc_cpu, num_same_dealloc_cpu); - } else { - p = total_size * 100 / alloc_count; - Printf("Memory allocation stack id = %llu\n", id); - Printf("\talloc_count %u, size (ave/min/max) %llu.%02llu / %u / %u\n", - alloc_count, p / 100, p % 100, min_size, max_size); - p = total_access_count * 100 / alloc_count; - Printf("\taccess_count (ave/min/max): %llu.%02llu / %llu / %llu\n", p / 100, - p % 100, min_access_count, max_access_count); - p = total_lifetime * 100 / alloc_count; - Printf("\tlifetime (ave/min/max): %llu.%02llu / %u / %u\n", p / 100, p % 100, - min_lifetime, max_lifetime); - Printf("\tnum migrated: %u, num lifetime overlaps: %u, num same alloc " - "cpu: %u, num same dealloc_cpu: %u\n", - num_migrated_cpu, num_lifetime_overlaps, num_same_alloc_cpu, - num_same_dealloc_cpu); - } - } - - static void printHeader() { - CHECK(flags()->print_terse); - Printf("MIB:StackID/AllocCount/AveSize/MinSize/MaxSize/AveAccessCount/" - "MinAccessCount/MaxAccessCount/AveLifetime/MinLifetime/MaxLifetime/" - "NumMigratedCpu/NumLifetimeOverlaps/NumSameAllocCpu/" - "NumSameDeallocCpu\n"); - } - - void Merge(MemInfoBlock &newMIB) { - alloc_count += newMIB.alloc_count; - - total_access_count += newMIB.total_access_count; - min_access_count = Min(min_access_count, newMIB.min_access_count); - max_access_count = Max(max_access_count, newMIB.max_access_count); - - total_size += newMIB.total_size; - min_size = Min(min_size, newMIB.min_size); - max_size = Max(max_size, newMIB.max_size); - - total_lifetime += newMIB.total_lifetime; - min_lifetime = Min(min_lifetime, newMIB.min_lifetime); - max_lifetime = Max(max_lifetime, newMIB.max_lifetime); - - // We know newMIB was deallocated later, so just need to check if it was - // allocated before last one deallocated. - num_lifetime_overlaps += newMIB.alloc_timestamp < dealloc_timestamp; - alloc_timestamp = newMIB.alloc_timestamp; - dealloc_timestamp = newMIB.dealloc_timestamp; - - num_same_alloc_cpu += alloc_cpu_id == newMIB.alloc_cpu_id; - num_same_dealloc_cpu += dealloc_cpu_id == newMIB.dealloc_cpu_id; - alloc_cpu_id = newMIB.alloc_cpu_id; - dealloc_cpu_id = newMIB.dealloc_cpu_id; - } -}; - -struct SetEntry { - SetEntry() : id(0), MIB() {} - bool Empty() { return id == 0; } - void Print() { - CHECK(!Empty()); - MIB.Print(id); - } - // The stack id - u64 id; - MemInfoBlock MIB; -}; - -struct CacheSet { - enum { kSetSize = 4 }; - - void PrintAll() { - for (int i = 0; i < kSetSize; i++) { - if (Entries[i].Empty()) - continue; - Entries[i].Print(); - } - } - void insertOrMerge(u64 new_id, MemInfoBlock &newMIB) { - SpinMutexLock l(&SetMutex); - AccessCount++; - - for (int i = 0; i < kSetSize; i++) { - auto id = Entries[i].id; - // Check if this is a hit or an empty entry. Since we always move any - // filled locations to the front of the array (see below), we don't need - // to look after finding the first empty entry. - if (id == new_id || !id) { - if (id == 0) { - Entries[i].id = new_id; - Entries[i].MIB = newMIB; - } else { - Entries[i].MIB.Merge(newMIB); - } - // Assuming some id locality, we try to swap the matching entry - // into the first set position. - if (i != 0) { - auto tmp = Entries[0]; - Entries[0] = Entries[i]; - Entries[i] = tmp; - } - return; - } - } - - // Miss - MissCount++; - - // We try to find the entries with the lowest alloc count to be evicted: - int min_idx = 0; - u64 min_count = Entries[0].MIB.alloc_count; - for (int i = 1; i < kSetSize; i++) { - CHECK(!Entries[i].Empty()); - if (Entries[i].MIB.alloc_count < min_count) { - min_idx = i; - min_count = Entries[i].MIB.alloc_count; - } - } - - // Print the evicted entry profile information - if (!flags()->print_terse) - Printf("Evicted:\n"); - Entries[min_idx].Print(); - - // Similar to the hit case, put new MIB in first set position. - if (min_idx != 0) - Entries[min_idx] = Entries[0]; - Entries[0].id = new_id; - Entries[0].MIB = newMIB; - } - - void PrintMissRate(int i) { - u64 p = AccessCount ? MissCount * 10000ULL / AccessCount : 0; - Printf("Set %d miss rate: %d / %d = %5llu.%02llu%%\n", i, MissCount, - AccessCount, p / 100, p % 100); - } - - SetEntry Entries[kSetSize]; - u32 AccessCount = 0; - u32 MissCount = 0; - SpinMutex SetMutex; -}; - -struct MemInfoBlockCache { - MemInfoBlockCache() { - if (common_flags()->print_module_map) - DumpProcessMap(); - if (flags()->print_terse) - MemInfoBlock::printHeader(); - Sets = - (CacheSet *)malloc(sizeof(CacheSet) * flags()->mem_info_cache_entries); - Constructed = true; - } - - ~MemInfoBlockCache() { free(Sets); } - - void insertOrMerge(u64 new_id, MemInfoBlock &newMIB) { - u64 hv = new_id; - - // Use mod method where number of entries should be a prime close to power - // of 2. - hv %= flags()->mem_info_cache_entries; - - return Sets[hv].insertOrMerge(new_id, newMIB); - } - - void PrintAll() { - for (int i = 0; i < flags()->mem_info_cache_entries; i++) { - Sets[i].PrintAll(); - } - } - - void PrintMissRate() { - if (!flags()->print_mem_info_cache_miss_rate) - return; - u64 MissCountSum = 0; - u64 AccessCountSum = 0; - for (int i = 0; i < flags()->mem_info_cache_entries; i++) { - MissCountSum += Sets[i].MissCount; - AccessCountSum += Sets[i].AccessCount; - } - u64 p = AccessCountSum ? MissCountSum * 10000ULL / AccessCountSum : 0; - Printf("Overall miss rate: %llu / %llu = %5llu.%02llu%%\n", MissCountSum, - AccessCountSum, p / 100, p % 100); - if (flags()->print_mem_info_cache_miss_rate_details) - for (int i = 0; i < flags()->mem_info_cache_entries; i++) - Sets[i].PrintMissRate(i); - } - - CacheSet *Sets; - // Flag when the Sets have been allocated, in case a deallocation is called - // very early before the static init of the Allocator and therefore this table - // have completed. - bool Constructed = false; -}; - // Accumulates the access count from the shadow for the given pointer and size. u64 GetShadowCount(uptr p, u32 size) { u64 *shadow = (u64 *)MEM_TO_SHADOW(p); @@ -457,7 +217,9 @@ uptr max_user_defined_malloc_size; atomic_uint8_t rss_limit_exceeded; - MemInfoBlockCache MemInfoBlockTable; + // Holds the mapping of stack ids to MemInfoBlocks. + MIBMapTy MIBMap; + bool destructing; // ------------------- Initialization ------------------------ @@ -465,16 +227,23 @@ ~Allocator() { FinishAndPrint(); } + static void PrintCallback(const uptr Key, const MemInfoBlock &Value, + void *Arg) { + Value.Print(Key, bool(Arg)); + } + void FinishAndPrint() { + if (common_flags()->print_module_map) + DumpProcessMap(); if (!flags()->print_terse) Printf("Live on exit:\n"); allocator.ForceLock(); allocator.ForEachChunk( [](uptr chunk, void *alloc) { u64 user_requested_size; + Allocator *A = (Allocator *)alloc; MemprofChunk *m = - ((Allocator *)alloc) - ->GetMemprofChunk((void *)chunk, user_requested_size); + A->GetMemprofChunk((void *)chunk, user_requested_size); if (!m) return; uptr user_beg = ((uptr)m) + kChunkHeaderSize; @@ -482,15 +251,14 @@ long curtime = GetTimestamp(); MemInfoBlock newMIB(user_requested_size, c, m->timestamp_ms, curtime, m->cpu_id, GetCpuId()); - ((Allocator *)alloc) - ->MemInfoBlockTable.insertOrMerge(m->alloc_context_id, newMIB); + InsertOrMerge(m->alloc_context_id, newMIB, A->MIBMap); }, this); allocator.ForceUnlock(); destructing = true; - MemInfoBlockTable.PrintMissRate(); - MemInfoBlockTable.PrintAll(); + MIBMap.ForEach(PrintCallback, + reinterpret_cast(flags()->print_terse)); StackDepotPrintAll(); } @@ -623,14 +391,13 @@ u64 user_requested_size = atomic_exchange(&m->user_requested_size, 0, memory_order_acquire); - if (memprof_inited && memprof_init_done && !destructing && - MemInfoBlockTable.Constructed) { + if (memprof_inited && memprof_init_done && !destructing) { u64 c = GetShadowCount(p, user_requested_size); long curtime = GetTimestamp(); MemInfoBlock newMIB(user_requested_size, c, m->timestamp_ms, curtime, m->cpu_id, GetCpuId()); - MemInfoBlockTable.insertOrMerge(m->alloc_context_id, newMIB); + InsertOrMerge(m->alloc_context_id, newMIB, MIBMap); } MemprofStats &thread_stats = GetCurrentThreadStats(); diff --git a/compiler-rt/lib/memprof/memprof_flags.inc b/compiler-rt/lib/memprof/memprof_flags.inc --- a/compiler-rt/lib/memprof/memprof_flags.inc +++ b/compiler-rt/lib/memprof/memprof_flags.inc @@ -37,13 +37,3 @@ "pointer to an allocated space which can not be used.") MEMPROF_FLAG(bool, print_terse, false, "If set, prints memory profile in a terse format.") - -MEMPROF_FLAG( - int, mem_info_cache_entries, 16381, - "Size in entries of the mem info block cache, should be closest prime" - " number to a power of two for best hashing.") -MEMPROF_FLAG(bool, print_mem_info_cache_miss_rate, false, - "If set, prints the miss rate of the mem info block cache.") -MEMPROF_FLAG( - bool, print_mem_info_cache_miss_rate_details, false, - "If set, prints detailed miss rates of the mem info block cache sets.") diff --git a/compiler-rt/lib/memprof/memprof_meminfoblock.h b/compiler-rt/lib/memprof/memprof_meminfoblock.h new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/memprof/memprof_meminfoblock.h @@ -0,0 +1,115 @@ +#ifndef MEMPROF_MEMINFOBLOCK_H_ +#define MEMPROF_MEMINFOBLOCK_H_ + +#include "memprof_interface_internal.h" // For u32, u64 TODO: Move these out of the internal header. +#include "sanitizer_common/sanitizer_common.h" + +namespace __memprof { + +using __sanitizer::Printf; + +struct MemInfoBlock { + u32 alloc_count; + u64 total_access_count, min_access_count, max_access_count; + u64 total_size; + u32 min_size, max_size; + u32 alloc_timestamp, dealloc_timestamp; + u64 total_lifetime; + u32 min_lifetime, max_lifetime; + u32 alloc_cpu_id, dealloc_cpu_id; + u32 num_migrated_cpu; + + // Only compared to prior deallocated object currently. + u32 num_lifetime_overlaps; + u32 num_same_alloc_cpu; + u32 num_same_dealloc_cpu; + + u64 data_type_id; // TODO: hash of type name + + MemInfoBlock() : alloc_count(0) {} + + MemInfoBlock(u32 size, u64 access_count, u32 alloc_timestamp, + u32 dealloc_timestamp, u32 alloc_cpu, u32 dealloc_cpu) + : alloc_count(1), total_access_count(access_count), + min_access_count(access_count), max_access_count(access_count), + total_size(size), min_size(size), max_size(size), + alloc_timestamp(alloc_timestamp), dealloc_timestamp(dealloc_timestamp), + total_lifetime(dealloc_timestamp - alloc_timestamp), + min_lifetime(total_lifetime), max_lifetime(total_lifetime), + alloc_cpu_id(alloc_cpu), dealloc_cpu_id(dealloc_cpu), + num_lifetime_overlaps(0), num_same_alloc_cpu(0), + num_same_dealloc_cpu(0) { + num_migrated_cpu = alloc_cpu_id != dealloc_cpu_id; + } + + void Print(u64 id, bool print_terse) const { + u64 p; + + if (print_terse) { + p = total_size * 100 / alloc_count; + Printf("MIB:%llu/%u/%llu.%02llu/%u/%u/", id, alloc_count, p / 100, + p % 100, min_size, max_size); + p = total_access_count * 100 / alloc_count; + Printf("%llu.%02llu/%llu/%llu/", p / 100, p % 100, min_access_count, + max_access_count); + p = total_lifetime * 100 / alloc_count; + Printf("%llu.%02llu/%u/%u/", p / 100, p % 100, min_lifetime, + max_lifetime); + Printf("%u/%u/%u/%u\n", num_migrated_cpu, num_lifetime_overlaps, + num_same_alloc_cpu, num_same_dealloc_cpu); + } else { + p = total_size * 100 / alloc_count; + Printf("Memory allocation stack id = %llu\n", id); + Printf("\talloc_count %u, size (ave/min/max) %llu.%02llu / %u / %u\n", + alloc_count, p / 100, p % 100, min_size, max_size); + p = total_access_count * 100 / alloc_count; + Printf("\taccess_count (ave/min/max): %llu.%02llu / %llu / %llu\n", + p / 100, p % 100, min_access_count, max_access_count); + p = total_lifetime * 100 / alloc_count; + Printf("\tlifetime (ave/min/max): %llu.%02llu / %u / %u\n", p / 100, + p % 100, min_lifetime, max_lifetime); + Printf("\tnum migrated: %u, num lifetime overlaps: %u, num same alloc " + "cpu: %u, num same dealloc_cpu: %u\n", + num_migrated_cpu, num_lifetime_overlaps, num_same_alloc_cpu, + num_same_dealloc_cpu); + } + } + + static void printHeader() { + Printf("MIB:StackID/AllocCount/AveSize/MinSize/MaxSize/AveAccessCount/" + "MinAccessCount/MaxAccessCount/AveLifetime/MinLifetime/MaxLifetime/" + "NumMigratedCpu/NumLifetimeOverlaps/NumSameAllocCpu/" + "NumSameDeallocCpu\n"); + } + + void Merge(const MemInfoBlock &newMIB) { + alloc_count += newMIB.alloc_count; + + total_access_count += newMIB.total_access_count; + min_access_count = Min(min_access_count, newMIB.min_access_count); + max_access_count = Max(max_access_count, newMIB.max_access_count); + + total_size += newMIB.total_size; + min_size = Min(min_size, newMIB.min_size); + max_size = Max(max_size, newMIB.max_size); + + total_lifetime += newMIB.total_lifetime; + min_lifetime = Min(min_lifetime, newMIB.min_lifetime); + max_lifetime = Max(max_lifetime, newMIB.max_lifetime); + + // We know newMIB was deallocated later, so just need to check if it was + // allocated before last one deallocated. + num_lifetime_overlaps += newMIB.alloc_timestamp < dealloc_timestamp; + alloc_timestamp = newMIB.alloc_timestamp; + dealloc_timestamp = newMIB.dealloc_timestamp; + + num_same_alloc_cpu += alloc_cpu_id == newMIB.alloc_cpu_id; + num_same_dealloc_cpu += dealloc_cpu_id == newMIB.dealloc_cpu_id; + alloc_cpu_id = newMIB.alloc_cpu_id; + dealloc_cpu_id = newMIB.dealloc_cpu_id; + } +} __attribute__((packed)); + +} // namespace __memprof + +#endif // MEMPROF_MEMINFOBLOCK_H_ diff --git a/compiler-rt/lib/memprof/memprof_mibmap.h b/compiler-rt/lib/memprof/memprof_mibmap.h new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/memprof/memprof_mibmap.h @@ -0,0 +1,16 @@ +#ifndef MEMPROF_MIBMAP_H_ +#define MEMPROF_MIBMAP_H_ + +#include "memprof_meminfoblock.h" +#include "sanitizer_common/sanitizer_addrhashmap.h" + +namespace __memprof { + +// The MIB map is meant to store a mapping from stack ids to MemInfoBlocks. +typedef __sanitizer::AddrHashMap MIBMapTy; + +void InsertOrMerge(const uptr Id, const MemInfoBlock &Block, MIBMapTy &Map); + +} // namespace __memprof + +#endif // MEMPROF_MIBMAP_H_ diff --git a/compiler-rt/lib/memprof/memprof_mibmap.cpp b/compiler-rt/lib/memprof/memprof_mibmap.cpp new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/memprof/memprof_mibmap.cpp @@ -0,0 +1,35 @@ +//===-- memprof_mibmap.cpp -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of MemProfiler, a memory profiler. +// +//===----------------------------------------------------------------------===// + +#include "memprof_mibmap.h" +#include "sanitizer_common/sanitizer_mutex.h" + +namespace __memprof { + +// TODO: Place these on separate cache lines with padding. +static __sanitizer::StaticSpinMutex Mutex[8]; +constexpr uptr Mask = (1ULL << 3) - 1; + +void InsertOrMerge(const uptr Id, const MemInfoBlock &Block, MIBMapTy &Map) { + const uptr Index = Id & Mask; + SpinMutexLock lock(&Mutex[Index]); + + MIBMapTy::Handle h(&Map, static_cast(Id), /*remove=*/false, + /*create=*/true); + if (h.created()) { + *h = Block; + } else { + h->Merge(Block); + } +} + +} // namespace __memprof diff --git a/compiler-rt/test/memprof/TestCases/mem_info_cache_entries.cpp b/compiler-rt/test/memprof/TestCases/mem_info_cache_entries.cpp deleted file mode 100644 --- a/compiler-rt/test/memprof/TestCases/mem_info_cache_entries.cpp +++ /dev/null @@ -1,10 +0,0 @@ -// Check mem_info_cache_entries option. - -// RUN: %clangxx_memprof -O0 %s -o %t && %env_memprof_opts=log_path=stderr:mem_info_cache_entries=15:print_mem_info_cache_miss_rate=1:print_mem_info_cache_miss_rate_details=1 %run %t 2>&1 | FileCheck %s - -// CHECK: Set 14 miss rate: 0 / {{.*}} = 0.00% -// CHECK-NOT: Set - -int main() { - return 0; -} diff --git a/compiler-rt/test/memprof/TestCases/print_miss_rate.cpp b/compiler-rt/test/memprof/TestCases/print_miss_rate.cpp deleted file mode 100644 --- a/compiler-rt/test/memprof/TestCases/print_miss_rate.cpp +++ /dev/null @@ -1,14 +0,0 @@ -// Check print_mem_info_cache_miss_rate and -// print_mem_info_cache_miss_rate_details options. - -// RUN: %clangxx_memprof -O0 %s -o %t -// RUN: %env_memprof_opts=log_path=stderr:print_mem_info_cache_miss_rate=1 %run %t 2>&1 | FileCheck %s -// RUN: %env_memprof_opts=log_path=stderr:print_mem_info_cache_miss_rate=1:print_mem_info_cache_miss_rate_details=1 %run %t 2>&1 | FileCheck %s --check-prefix=DETAILS - -// CHECK: Overall miss rate: 0 / {{.*}} = 0.00% -// DETAILS: Set 0 miss rate: 0 / {{.*}} = 0.00% -// DETAILS: Set 16380 miss rate: 0 / {{.*}} = 0.00% - -int main() { - return 0; -}