diff --git a/compiler-rt/lib/memprof/CMakeLists.txt b/compiler-rt/lib/memprof/CMakeLists.txt --- a/compiler-rt/lib/memprof/CMakeLists.txt +++ b/compiler-rt/lib/memprof/CMakeLists.txt @@ -8,6 +8,7 @@ memprof_interceptors_memintrinsics.cpp memprof_linux.cpp memprof_malloc_linux.cpp + memprof_mibmap.cpp memprof_posix.cpp memprof_rtl.cpp memprof_shadow_setup.cpp @@ -36,6 +37,7 @@ memprof_internal.h memprof_mapping.h memprof_meminfoblock.h + memprof_mibmap.h memprof_stack.h memprof_stats.h memprof_thread.h diff --git a/compiler-rt/lib/memprof/memprof_allocator.cpp b/compiler-rt/lib/memprof/memprof_allocator.cpp --- a/compiler-rt/lib/memprof/memprof_allocator.cpp +++ b/compiler-rt/lib/memprof/memprof_allocator.cpp @@ -16,6 +16,7 @@ #include "memprof_allocator.h" #include "memprof_mapping.h" #include "memprof_meminfoblock.h" +#include "memprof_mibmap.h" #include "memprof_stack.h" #include "memprof_thread.h" #include "sanitizer_common/sanitizer_allocator_checks.h" @@ -29,7 +30,6 @@ #include "sanitizer_common/sanitizer_stackdepot.h" #include -#include #include namespace __memprof { @@ -167,146 +167,6 @@ return &ms->allocator_cache; } -struct SetEntry { - SetEntry() : id(0), MIB() {} - bool Empty() { return id == 0; } - void Print() { - CHECK(!Empty()); - MIB.Print(id, flags()->print_terse); - } - // The stack id - u64 id; - MemInfoBlock MIB; -}; - -struct CacheSet { - enum { kSetSize = 4 }; - - void PrintAll() { - for (int i = 0; i < kSetSize; i++) { - if (Entries[i].Empty()) - continue; - Entries[i].Print(); - } - } - void insertOrMerge(u64 new_id, MemInfoBlock &newMIB) { - SpinMutexLock l(&SetMutex); - AccessCount++; - - for (int i = 0; i < kSetSize; i++) { - auto id = Entries[i].id; - // Check if this is a hit or an empty entry. Since we always move any - // filled locations to the front of the array (see below), we don't need - // to look after finding the first empty entry. - if (id == new_id || !id) { - if (id == 0) { - Entries[i].id = new_id; - Entries[i].MIB = newMIB; - } else { - Entries[i].MIB.Merge(newMIB); - } - // Assuming some id locality, we try to swap the matching entry - // into the first set position. - if (i != 0) { - auto tmp = Entries[0]; - Entries[0] = Entries[i]; - Entries[i] = tmp; - } - return; - } - } - - // Miss - MissCount++; - - // We try to find the entries with the lowest alloc count to be evicted: - int min_idx = 0; - u64 min_count = Entries[0].MIB.alloc_count; - for (int i = 1; i < kSetSize; i++) { - CHECK(!Entries[i].Empty()); - if (Entries[i].MIB.alloc_count < min_count) { - min_idx = i; - min_count = Entries[i].MIB.alloc_count; - } - } - - // Print the evicted entry profile information - if (!flags()->print_terse) - Printf("Evicted:\n"); - Entries[min_idx].Print(); - - // Similar to the hit case, put new MIB in first set position. - if (min_idx != 0) - Entries[min_idx] = Entries[0]; - Entries[0].id = new_id; - Entries[0].MIB = newMIB; - } - - void PrintMissRate(int i) { - u64 p = AccessCount ? MissCount * 10000ULL / AccessCount : 0; - Printf("Set %d miss rate: %d / %d = %5llu.%02llu%%\n", i, MissCount, - AccessCount, p / 100, p % 100); - } - - SetEntry Entries[kSetSize]; - u32 AccessCount = 0; - u32 MissCount = 0; - SpinMutex SetMutex; -}; - -struct MemInfoBlockCache { - MemInfoBlockCache() { - if (common_flags()->print_module_map) - DumpProcessMap(); - if (flags()->print_terse) - MemInfoBlock::printHeader(); - Sets = - (CacheSet *)malloc(sizeof(CacheSet) * flags()->mem_info_cache_entries); - Constructed = true; - } - - ~MemInfoBlockCache() { free(Sets); } - - void insertOrMerge(u64 new_id, MemInfoBlock &newMIB) { - u64 hv = new_id; - - // Use mod method where number of entries should be a prime close to power - // of 2. - hv %= flags()->mem_info_cache_entries; - - return Sets[hv].insertOrMerge(new_id, newMIB); - } - - void PrintAll() { - for (int i = 0; i < flags()->mem_info_cache_entries; i++) { - Sets[i].PrintAll(); - } - } - - void PrintMissRate() { - if (!flags()->print_mem_info_cache_miss_rate) - return; - u64 MissCountSum = 0; - u64 AccessCountSum = 0; - for (int i = 0; i < flags()->mem_info_cache_entries; i++) { - MissCountSum += Sets[i].MissCount; - AccessCountSum += Sets[i].AccessCount; - } - u64 p = AccessCountSum ? MissCountSum * 10000ULL / AccessCountSum : 0; - Printf("Overall miss rate: %llu / %llu = %5llu.%02llu%%\n", MissCountSum, - AccessCountSum, p / 100, p % 100); - if (flags()->print_mem_info_cache_miss_rate_details) - for (int i = 0; i < flags()->mem_info_cache_entries; i++) - Sets[i].PrintMissRate(i); - } - - CacheSet *Sets; - // Flag when the Sets have been allocated, in case a deallocation is called - // very early before the static init of the Allocator and therefore this table - // have completed. - bool Constructed = false; -}; - // Accumulates the access count from the shadow for the given pointer and size. u64 GetShadowCount(uptr p, u32 size) { u64 *shadow = (u64 *)MEM_TO_SHADOW(p); @@ -357,24 +217,35 @@ uptr max_user_defined_malloc_size; atomic_uint8_t rss_limit_exceeded; - MemInfoBlockCache MemInfoBlockTable; + // Holds the mapping of stack ids to MemInfoBlocks. + MIBMapTy MIBMap; + bool destructing; + bool constructed = false; // ------------------- Initialization ------------------------ - explicit Allocator(LinkerInitialized) : destructing(false) {} - + explicit Allocator(LinkerInitialized) + : destructing(false), constructed(true) {} ~Allocator() { FinishAndPrint(); } + static void PrintCallback(const uptr Key, LockedMemInfoBlock *const &Value, + void *Arg) { + SpinMutexLock(&Value->mutex); + Value->mib.Print(Key, bool(Arg)); + } + void FinishAndPrint() { + if (common_flags()->print_module_map) + DumpProcessMap(); if (!flags()->print_terse) Printf("Live on exit:\n"); allocator.ForceLock(); allocator.ForEachChunk( [](uptr chunk, void *alloc) { u64 user_requested_size; + Allocator *A = (Allocator *)alloc; MemprofChunk *m = - ((Allocator *)alloc) - ->GetMemprofChunk((void *)chunk, user_requested_size); + A->GetMemprofChunk((void *)chunk, user_requested_size); if (!m) return; uptr user_beg = ((uptr)m) + kChunkHeaderSize; @@ -382,16 +253,15 @@ long curtime = GetTimestamp(); MemInfoBlock newMIB(user_requested_size, c, m->timestamp_ms, curtime, m->cpu_id, GetCpuId()); - ((Allocator *)alloc) - ->MemInfoBlockTable.insertOrMerge(m->alloc_context_id, newMIB); + InsertOrMerge(m->alloc_context_id, newMIB, A->MIBMap); }, this); - allocator.ForceUnlock(); destructing = true; - MemInfoBlockTable.PrintMissRate(); - MemInfoBlockTable.PrintAll(); + MIBMap.ForEach(PrintCallback, + reinterpret_cast(flags()->print_terse)); StackDepotPrintAll(); + allocator.ForceUnlock(); } void InitLinkerInitialized() { @@ -523,14 +393,13 @@ u64 user_requested_size = atomic_exchange(&m->user_requested_size, 0, memory_order_acquire); - if (memprof_inited && memprof_init_done && !destructing && - MemInfoBlockTable.Constructed) { + if (memprof_inited && memprof_init_done && constructed && !destructing) { u64 c = GetShadowCount(p, user_requested_size); long curtime = GetTimestamp(); MemInfoBlock newMIB(user_requested_size, c, m->timestamp_ms, curtime, m->cpu_id, GetCpuId()); - MemInfoBlockTable.insertOrMerge(m->alloc_context_id, newMIB); + InsertOrMerge(m->alloc_context_id, newMIB, MIBMap); } MemprofStats &thread_stats = GetCurrentThreadStats(); diff --git a/compiler-rt/lib/memprof/memprof_flags.inc b/compiler-rt/lib/memprof/memprof_flags.inc --- a/compiler-rt/lib/memprof/memprof_flags.inc +++ b/compiler-rt/lib/memprof/memprof_flags.inc @@ -37,13 +37,3 @@ "pointer to an allocated space which can not be used.") MEMPROF_FLAG(bool, print_terse, false, "If set, prints memory profile in a terse format.") - -MEMPROF_FLAG( - int, mem_info_cache_entries, 16381, - "Size in entries of the mem info block cache, should be closest prime" - " number to a power of two for best hashing.") -MEMPROF_FLAG(bool, print_mem_info_cache_miss_rate, false, - "If set, prints the miss rate of the mem info block cache.") -MEMPROF_FLAG( - bool, print_mem_info_cache_miss_rate_details, false, - "If set, prints detailed miss rates of the mem info block cache sets.") diff --git a/compiler-rt/lib/memprof/memprof_mibmap.h b/compiler-rt/lib/memprof/memprof_mibmap.h new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/memprof/memprof_mibmap.h @@ -0,0 +1,24 @@ +#ifndef MEMPROF_MIBMAP_H_ +#define MEMPROF_MIBMAP_H_ + +#include "memprof_meminfoblock.h" +#include "sanitizer_common/sanitizer_addrhashmap.h" +#include "sanitizer_common/sanitizer_mutex.h" + +namespace __memprof { + +struct LockedMemInfoBlock { + __sanitizer::StaticSpinMutex mutex; + MemInfoBlock mib; +}; + +// The MIB map stores a mapping from stack ids to MemInfoBlocks. +typedef __sanitizer::AddrHashMap MIBMapTy; + +// Insert a new MemInfoBlock or merge with an existing block identified by the +// stack id. +void InsertOrMerge(const uptr Id, const MemInfoBlock &Block, MIBMapTy &Map); + +} // namespace __memprof + +#endif // MEMPROF_MIBMAP_H_ diff --git a/compiler-rt/lib/memprof/memprof_mibmap.cpp b/compiler-rt/lib/memprof/memprof_mibmap.cpp new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/memprof/memprof_mibmap.cpp @@ -0,0 +1,35 @@ +//===-- memprof_mibmap.cpp -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of MemProfiler, a memory profiler. +// +//===----------------------------------------------------------------------===// + +#include "memprof_mibmap.h" +#include "sanitizer_common/sanitizer_allocator_internal.h" +#include "sanitizer_common/sanitizer_mutex.h" + +namespace __memprof { + +void InsertOrMerge(const uptr Id, const MemInfoBlock &Block, MIBMapTy &Map) { + MIBMapTy::Handle h(&Map, static_cast(Id), /*remove=*/false, + /*create=*/true); + if (h.created()) { + LockedMemInfoBlock *lmib = + (LockedMemInfoBlock *)InternalAlloc(sizeof(LockedMemInfoBlock)); + lmib->mutex.Init(); + lmib->mib = Block; + *h = lmib; + } else { + LockedMemInfoBlock *lmib = *h; + SpinMutexLock lock(&lmib->mutex); + lmib->mib.Merge(Block); + } +} + +} // namespace __memprof diff --git a/compiler-rt/test/memprof/TestCases/mem_info_cache_entries.cpp b/compiler-rt/test/memprof/TestCases/mem_info_cache_entries.cpp deleted file mode 100644 --- a/compiler-rt/test/memprof/TestCases/mem_info_cache_entries.cpp +++ /dev/null @@ -1,10 +0,0 @@ -// Check mem_info_cache_entries option. - -// RUN: %clangxx_memprof -O0 %s -o %t && %env_memprof_opts=log_path=stderr:mem_info_cache_entries=15:print_mem_info_cache_miss_rate=1:print_mem_info_cache_miss_rate_details=1 %run %t 2>&1 | FileCheck %s - -// CHECK: Set 14 miss rate: 0 / {{.*}} = 0.00% -// CHECK-NOT: Set - -int main() { - return 0; -} diff --git a/compiler-rt/test/memprof/TestCases/memprof_merge_mib.cpp b/compiler-rt/test/memprof/TestCases/memprof_merge_mib.cpp new file mode 100644 --- /dev/null +++ b/compiler-rt/test/memprof/TestCases/memprof_merge_mib.cpp @@ -0,0 +1,24 @@ +// RUN: %clangxx_memprof %s -o %t + +// RUN: %env_memprof_opts=log_path=stdout %run %t | FileCheck %s + +#include +#include +#include +int main(int argc, char **argv) { + for (int i = 0; i < 3; i++) { + char *x = (char *)malloc(10); + if (i % 2) + memset(x, 0, 10); + else + memset(x, 2, 10); + free(x); + } + return 0; +} +// We should get one allocation site with alloc_count = loop trip count = 3 +// CHECK: Memory allocation stack id = [[ID:[0-9]+]] +// CHECK-NEXT-COUNT-1: alloc_count 3 +// CHECK-COUNT-1: Stack for id {{.*}}[[ID]] +// CHECK-NEXT-COUNT-1: memprof_malloc_linux.cpp +// CHECK-NEXT-COUNT-1: memprof_merge_mib.cpp diff --git a/compiler-rt/test/memprof/TestCases/print_miss_rate.cpp b/compiler-rt/test/memprof/TestCases/print_miss_rate.cpp deleted file mode 100644 --- a/compiler-rt/test/memprof/TestCases/print_miss_rate.cpp +++ /dev/null @@ -1,14 +0,0 @@ -// Check print_mem_info_cache_miss_rate and -// print_mem_info_cache_miss_rate_details options. - -// RUN: %clangxx_memprof -O0 %s -o %t -// RUN: %env_memprof_opts=log_path=stderr:print_mem_info_cache_miss_rate=1 %run %t 2>&1 | FileCheck %s -// RUN: %env_memprof_opts=log_path=stderr:print_mem_info_cache_miss_rate=1:print_mem_info_cache_miss_rate_details=1 %run %t 2>&1 | FileCheck %s --check-prefix=DETAILS - -// CHECK: Overall miss rate: 0 / {{.*}} = 0.00% -// DETAILS: Set 0 miss rate: 0 / {{.*}} = 0.00% -// DETAILS: Set 16380 miss rate: 0 / {{.*}} = 0.00% - -int main() { - return 0; -}