diff --git a/compiler-rt/CMakeLists.txt b/compiler-rt/CMakeLists.txt
--- a/compiler-rt/CMakeLists.txt
+++ b/compiler-rt/CMakeLists.txt
@@ -45,6 +45,8 @@
 mark_as_advanced(COMPILER_RT_BUILD_LIBFUZZER)
 option(COMPILER_RT_BUILD_PROFILE "Build profile runtime" ON)
 mark_as_advanced(COMPILER_RT_BUILD_PROFILE)
+option(COMPILER_RT_BUILD_MEMPROF "Build memory profiling runtime" ON)
+mark_as_advanced(COMPILER_RT_BUILD_MEMPROF)
 option(COMPILER_RT_BUILD_XRAY_NO_PREINIT "Build xray with no preinit patching" OFF)
 mark_as_advanced(COMPILER_RT_BUILD_XRAY_NO_PREINIT)
 
@@ -67,6 +69,25 @@
       -D${COMPILER_RT_ASAN_SHADOW_SCALE_DEFINITION})
 endif()
 
+set(COMPILER_RT_MEMPROF_SHADOW_SCALE ""
+    CACHE STRING "Override the shadow scale to be used in memprof runtime")
+
+if (NOT COMPILER_RT_MEMPROF_SHADOW_SCALE STREQUAL "")
+  # Check that the shadow scale value is valid.
+  if (NOT (COMPILER_RT_MEMPROF_SHADOW_SCALE GREATER -1 AND
+	  COMPILER_RT_MEMPROF_SHADOW_SCALE LESS 8))
+    message(FATAL_ERROR "
+    Invalid Memprof Shadow Scale '${COMPILER_RT_MEMPROF_SHADOW_SCALE}'.")
+  endif()
+
+  set(COMPILER_RT_MEMPROF_SHADOW_SCALE_LLVM_FLAG
+	  -mllvm -memprof-mapping-scale=${COMPILER_RT_MEMPROF_SHADOW_SCALE})
+  set(COMPILER_RT_MEMPROF_SHADOW_SCALE_DEFINITION
+	  MEMPROF_SHADOW_SCALE=${COMPILER_RT_MEMPROF_SHADOW_SCALE})
+  set(COMPILER_RT_MEMPROF_SHADOW_SCALE_FLAG
+	  -D${COMPILER_RT_MEMPROF_SHADOW_SCALE_DEFINITION})
+endif()
+
 set(COMPILER_RT_HWASAN_WITH_INTERCEPTORS ON CACHE BOOL
     "Enable libc interceptors in HWASan (testing mode)")
 
diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake
--- a/compiler-rt/cmake/config-ix.cmake
+++ b/compiler-rt/cmake/config-ix.cmake
@@ -323,6 +323,7 @@
 endif()
 set(ALL_MSAN_SUPPORTED_ARCH ${X86_64} ${MIPS64} ${ARM64} ${PPC64} ${S390X})
 set(ALL_HWASAN_SUPPORTED_ARCH ${X86_64} ${ARM64})
+set(ALL_MEMPROF_SUPPORTED_ARCH ${X86_64})
 set(ALL_PROFILE_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${PPC64}
     ${MIPS32} ${MIPS64} ${S390X} ${SPARC} ${SPARCV9})
 set(ALL_TSAN_SUPPORTED_ARCH ${X86_64} ${MIPS64} ${ARM64} ${PPC64})
@@ -550,6 +551,9 @@
   list_intersect(HWASAN_SUPPORTED_ARCH
     ALL_HWASAN_SUPPORTED_ARCH
     SANITIZER_COMMON_SUPPORTED_ARCH)
+  list_intersect(MEMPROF_SUPPORTED_ARCH
+    ALL_MEMPROF_SUPPORTED_ARCH
+    SANITIZER_COMMON_SUPPORTED_ARCH)
   list_intersect(PROFILE_SUPPORTED_ARCH
     ALL_PROFILE_SUPPORTED_ARCH
     SANITIZER_COMMON_SUPPORTED_ARCH)
@@ -598,6 +602,7 @@
   filter_available_targets(LSAN_SUPPORTED_ARCH ${ALL_LSAN_SUPPORTED_ARCH})
   filter_available_targets(MSAN_SUPPORTED_ARCH ${ALL_MSAN_SUPPORTED_ARCH})
   filter_available_targets(HWASAN_SUPPORTED_ARCH ${ALL_HWASAN_SUPPORTED_ARCH})
+  filter_available_targets(MEMPROF_SUPPORTED_ARCH ${ALL_MEMPROF_SUPPORTED_ARCH})
   filter_available_targets(PROFILE_SUPPORTED_ARCH ${ALL_PROFILE_SUPPORTED_ARCH})
   filter_available_targets(TSAN_SUPPORTED_ARCH ${ALL_TSAN_SUPPORTED_ARCH})
   filter_available_targets(UBSAN_SUPPORTED_ARCH ${ALL_UBSAN_SUPPORTED_ARCH})
@@ -701,6 +706,13 @@
   set(COMPILER_RT_HAS_HWASAN FALSE)
 endif()
 
+if (COMPILER_RT_HAS_SANITIZER_COMMON AND MEMPROF_SUPPORTED_ARCH AND
+    OS_NAME MATCHES "Linux")
+  set(COMPILER_RT_HAS_MEMPROF TRUE)
+else()
+  set(COMPILER_RT_HAS_MEMPROF FALSE)
+endif()
+
 if (PROFILE_SUPPORTED_ARCH AND NOT LLVM_USE_SANITIZER AND
     OS_NAME MATCHES "Darwin|Linux|FreeBSD|Windows|Android|Fuchsia|SunOS|NetBSD")
   set(COMPILER_RT_HAS_PROFILE TRUE)
diff --git a/compiler-rt/include/CMakeLists.txt b/compiler-rt/include/CMakeLists.txt
--- a/compiler-rt/include/CMakeLists.txt
+++ b/compiler-rt/include/CMakeLists.txt
@@ -5,6 +5,7 @@
     sanitizer/common_interface_defs.h
     sanitizer/coverage_interface.h
     sanitizer/dfsan_interface.h
+    sanitizer/memprof_interface.h
     sanitizer/hwasan_interface.h
     sanitizer/linux_syscall_hooks.h
     sanitizer/lsan_interface.h
diff --git a/compiler-rt/include/sanitizer/memprof_interface.h b/compiler-rt/include/sanitizer/memprof_interface.h
new file mode 100644
--- /dev/null
+++ b/compiler-rt/include/sanitizer/memprof_interface.h
@@ -0,0 +1,60 @@
+//===-- sanitizer/memprof_interface.h --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler (MemProf).
+//
+// Public interface header.
+//===----------------------------------------------------------------------===//
+#ifndef SANITIZER_MEMPROF_INTERFACE_H
+#define SANITIZER_MEMPROF_INTERFACE_H
+
+#include <sanitizer/common_interface_defs.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+/// Records access to a memory region (<c>[addr, addr+size)</c>).
+///
+/// This memory must be previously allocated by your program.
+///
+/// \param addr Start of memory region.
+/// \param size Size of memory region.
+void __memprof_record_access_range(void const volatile *addr, size_t size);
+
+/// Records access to a memory address <c><i>addr</i></c>.
+///
+/// This memory must be previously allocated by your program.
+///
+/// \param addr Accessed memory address
+void __memprof_record_access(void const volatile *addr);
+
+/// User-provided callback on MemProf errors.
+///
+/// You can provide a function that would be called immediately when MemProf
+/// detects an error. This is useful in cases when MemProf detects an error but
+/// your program crashes before the MemProf report is printed.
+void __memprof_on_error(void);
+
+/// Prints accumulated statistics to <c>stderr</c> (useful for calling from the
+/// debugger).
+void __memprof_print_accumulated_stats(void);
+
+/// User-provided default option settings.
+///
+/// You can provide your own implementation of this function to return a string
+/// containing MemProf runtime options (for example,
+/// <c>verbosity=1:print_stats=1</c>).
+///
+/// \returns Default options string.
+const char *__memprof_default_options(void);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // SANITIZER_MEMPROF_INTERFACE_H
diff --git a/compiler-rt/lib/CMakeLists.txt b/compiler-rt/lib/CMakeLists.txt
--- a/compiler-rt/lib/CMakeLists.txt
+++ b/compiler-rt/lib/CMakeLists.txt
@@ -60,6 +60,10 @@
   compiler_rt_build_runtime(fuzzer)
 endif()
 
+if(COMPILER_RT_BUILD_MEMPROF AND COMPILER_RT_HAS_SANITIZER_COMMON)
+  compiler_rt_build_runtime(memprof)
+endif()
+
 # It doesn't normally make sense to build runtimes when a sanitizer is enabled,
 # so we don't add_subdirectory the runtimes in that case. However, the opposite
 # is true for fuzzers that exercise parts of the runtime. So we add the fuzzer
diff --git a/compiler-rt/lib/asan/asan_allocator.cpp.sv b/compiler-rt/lib/asan/asan_allocator.cpp.sv
new file mode 100644
--- /dev/null
+++ b/compiler-rt/lib/asan/asan_allocator.cpp.sv
@@ -0,0 +1,1605 @@
+//===-- asan_allocator.cpp ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// Implementation of ASan's memory allocator, 2-nd version.
+// This variant uses the allocator from sanitizer_common, i.e. the one shared
+// with ThreadSanitizer and MemorySanitizer.
+//
+//===----------------------------------------------------------------------===//
+
+#include <ctime>
+#ifdef __linux__
+#include <sched.h>
+#endif
+#include <stdio.h>
+#include <map>
+#include "asan_allocator.h"
+#include "asan_mapping.h"
+#include "asan_poisoning.h"
+#include "asan_report.h"
+#include "asan_stack.h"
+#include "asan_thread.h"
+#include "sanitizer_common/sanitizer_allocator_checks.h"
+#include "sanitizer_common/sanitizer_allocator_interface.h"
+#undef errno
+#include "sanitizer_common/sanitizer_errno.h"
+#include "sanitizer_common/sanitizer_file.h"
+#include "sanitizer_common/sanitizer_flags.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
+#include "sanitizer_common/sanitizer_list.h"
+#include "sanitizer_common/sanitizer_stackdepot.h"
+#include "sanitizer_common/sanitizer_quarantine.h"
+#include "lsan/lsan_common.h"
+
+namespace __asan {
+
+static char PathPrefix[] = "./alloc";
+extern "C" char *__llvm_profile_get_path_prefix() {
+  return PathPrefix;
+}
+extern "C" void __llvm_profile_set_filename(const char *Name) {
+  // Only used for testing.
+}
+//static char ProfileFilename[] = "default_1.profraw";
+//static char ProfileFilenameWithPath[] = "./alloc/default_1.profraw";
+extern "C" const char *__llvm_profile_get_filename() {
+  return flags()->heapprof_file;
+  //return ProfileFilenameWithPath;
+}
+extern "C" void __llvm_profile_reset_counters() {
+}
+static fd_t ProfileFile = kInvalidFd;
+
+static void OpenProfileIfNeeded() {
+  if (ProfileFile != kInvalidFd)
+    return;
+
+  if (!flags()->heapprof_file[0]) {
+    ProfileFile = kStderrFd;
+  } else {
+    error_t err;
+    //ProfileFile = OpenFile(ProfileFilenameWithPath, WrOnly, &err);
+    ProfileFile = OpenFile(flags()->heapprof_file, WrOnly, &err);
+    if (ProfileFile == kInvalidFd)
+      Report("Allocator: failed to open %s for writing (reason: %d)\n",
+             flags()->heapprof_file, err);
+             //ProfileFilenameWithPath, err);
+    __sanitizer_set_report_fd(reinterpret_cast<void*>(ProfileFile));
+  }
+
+  DumpProcessMap();
+}
+
+// Valid redzone sizes are 16, 32, 64, ... 2048, so we encode them in 3 bits.
+// We use adaptive redzones: for larger allocation larger redzones are used.
+static u32 RZLog2Size(u32 rz_log) {
+  return 32;
+#if 0
+  CHECK_LT(rz_log, 8);
+  return 16 << rz_log;
+#endif
+}
+
+static int GetCpuId(void) {
+  // _asan_init is called via the preinit_array, which subsequently calls
+  // malloc. Since this is before _dl_init calls VDSO_SETUP, sched_getcpu
+  // will seg fault as the address of __vdso_getcpu will be null.
+  // Can we use rseq like tcmalloc instead of calling sched_getcpu?
+  if (!asan_inited || !asan_init_done)
+    return -1;
+#ifdef __linux__
+  //return -1;
+#if 0
+  if (asan_inited && asan_init_done)
+    fprintf(stderr, "sched_getcpu = %d\n", sched_getcpu());
+  return internal_getcpu();
+#endif
+  return sched_getcpu();
+#else
+#ifdef _WIN32
+  PROCESSOR_NUMBER pn;
+  GetCurrentProcessorNumberEx(&pn);
+  return 64 * pn.Group + pn.Number;
+#else
+  return -1;
+#endif
+#endif
+}
+
+static int GetTimestamp(void) {
+  // timespec_get will segfault if called from dl_init
+  if (!heapprof_inited) {
+    //fprintf(stderr, "Try to get timestamp before inited\n");
+    // By returning 0, this will be effectively treated as being
+    // timestamped at heapprof init time (when heapprof_init_timestamp_s
+    // is initialized).
+    return 0;
+  }
+  timespec ts;
+  timespec_get(&ts, TIME_UTC);
+  return (ts.tv_sec - heapprof_init_timestamp_s)*1000 + ts.tv_nsec/1000;
+}
+
+static AsanAllocator &get_allocator();
+
+// The memory chunk allocated from the underlying allocator looks like this:
+// L L L L L L H H U U U U U U R R
+//   L -- left redzone words (0 or more bytes)
+//   H -- ChunkHeader (16 bytes), which is also a part of the left redzone.
+//   U -- user memory.
+//   R -- right redzone (0 or more bytes)
+// ChunkBase consists of ChunkHeader and other bytes that overlap with user
+// memory.
+
+// If the left redzone is greater than the ChunkHeader size we store a magic
+// value in the first uptr word of the memory block and store the address of
+// ChunkBase in the next uptr.
+// M B L L L L L L L L L  H H U U U U U U
+//   |                    ^
+//   ---------------------|
+//   M -- magic value kAllocBegMagic
+//   B -- address of ChunkHeader pointing to the first 'H'
+static const uptr kAllocBegMagic = 0xCC6E96B9;
+
+// Should be no more than 32-bytes
+struct ChunkHeader {
+  // 1-st 4 bytes.
+  u32 from_memalign     : 1;
+  // This field is used for small sizes. For large sizes it is equal to
+  // SizeClassMap::kMaxSize and the actual size is stored in the
+  // SecondaryAllocator's metadata.
+  u32 user_requested_size : 29;
+  // 2 bits available
+  // 2-nd 4 bytes
+  u32 cpu_id;
+  // 3-rd 4 bytes
+  u32 timestamp_ms;
+  // 4-th and 5th 4 bytes
+  u64 alloc_context_id;
+  // 6-th and 7-th 4 bytes
+  u64 data_type_id; // hash of type name
+  // 8-th 4 bytes available
+};
+
+struct ChunkBase : ChunkHeader {
+  // Header2, intersects with user memory.
+  u32 free_context_id;
+};
+
+static const uptr kChunkHeaderSize = sizeof(ChunkHeader);
+static const uptr kChunkHeader2Size = sizeof(ChunkBase) - kChunkHeaderSize;
+COMPILER_CHECK(kChunkHeaderSize == 32);
+COMPILER_CHECK(kChunkHeader2Size <= 16);
+
+// Every chunk of memory allocated by this allocator can be in one of 3 states:
+// CHUNK_AVAILABLE: the chunk is in the free list and ready to be allocated.
+// CHUNK_ALLOCATED: the chunk is allocated and not yet freed.
+// CHUNK_QUARANTINE: the chunk was freed and put into quarantine zone.
+enum {
+  CHUNK_AVAILABLE  = 0,  // 0 is the default value even if we didn't set it.
+  CHUNK_ALLOCATED  = 2,
+  CHUNK_QUARANTINE = 3
+};
+
+struct AsanChunk: ChunkBase {
+  uptr Beg() { return reinterpret_cast<uptr>(this) + kChunkHeaderSize; }
+  uptr UsedSize(bool locked_version = false) {
+    if (user_requested_size != SizeClassMap::kMaxSize)
+      return user_requested_size;
+    return *reinterpret_cast<uptr *>(
+               get_allocator().GetMetaData(AllocBeg(locked_version)));
+  }
+  void *AllocBeg(bool locked_version = false) {
+    //fprintf(stderr, "AllocBeg %d %d %lx %lx\n", from_memalign, locked_version, this, kChunkHeaderSize);
+    if (from_memalign) {
+      if (locked_version)
+        return get_allocator().GetBlockBeginFastLocked(
+            reinterpret_cast<void *>(this));
+      return get_allocator().GetBlockBegin(reinterpret_cast<void *>(this));
+    }
+    return reinterpret_cast<void*>(Beg() - kChunkHeaderSize);
+  }
+  bool AddrIsInside(uptr addr, bool locked_version = false) {
+    return (addr >= Beg()) && (addr < Beg() + UsedSize(locked_version));
+  }
+};
+
+struct QuarantineCallback {
+  QuarantineCallback(AllocatorCache *cache, BufferedStackTrace *stack)
+      : cache_(cache),
+        stack_(stack) {
+  }
+
+  void Recycle(AsanChunk *m) {
+    CHECK_EQ(m->chunk_state, CHUNK_QUARANTINE);
+    atomic_store((atomic_uint8_t*)m, CHUNK_AVAILABLE, memory_order_relaxed);
+    PoisonShadow(m->Beg(),
+                 RoundUpTo(m->UsedSize(), SHADOW_GRANULARITY),
+                 kAsanHeapLeftRedzoneMagic);
+    void *p = reinterpret_cast<void *>(m->AllocBeg());
+    if (p != m) {
+      uptr *alloc_magic = reinterpret_cast<uptr *>(p);
+      CHECK_EQ(alloc_magic[0], kAllocBegMagic);
+      // Clear the magic value, as allocator internals may overwrite the
+      // contents of deallocated chunk, confusing GetAsanChunk lookup.
+      alloc_magic[0] = 0;
+      CHECK_EQ(alloc_magic[1], reinterpret_cast<uptr>(m));
+    }
+
+    // Statistics.
+    AsanStats &thread_stats = GetCurrentThreadStats();
+    thread_stats.real_frees++;
+    thread_stats.really_freed += m->UsedSize();
+
+    get_allocator().Deallocate(cache_, p);
+  }
+
+  void *Allocate(uptr size) {
+    void *res = get_allocator().Allocate(cache_, size, 1);
+    // TODO(alekseys): Consider making quarantine OOM-friendly.
+    if (UNLIKELY(!res))
+      ReportOutOfMemory(size, stack_);
+    return res;
+  }
+
+  void Deallocate(void *p) {
+    get_allocator().Deallocate(cache_, p);
+  }
+
+ private:
+  AllocatorCache* const cache_;
+  BufferedStackTrace* const stack_;
+};
+
+typedef Quarantine<QuarantineCallback, AsanChunk> AsanQuarantine;
+typedef AsanQuarantine::Cache QuarantineCache;
+
+void AsanMapUnmapCallback::OnMap(uptr p, uptr size) const {
+  PoisonShadow(p, size, kAsanHeapLeftRedzoneMagic);
+  // Statistics.
+  AsanStats &thread_stats = GetCurrentThreadStats();
+  thread_stats.mmaps++;
+  thread_stats.mmaped += size;
+}
+void AsanMapUnmapCallback::OnUnmap(uptr p, uptr size) const {
+  PoisonShadow(p, size, 0);
+  // We are about to unmap a chunk of user memory.
+  // Mark the corresponding shadow memory as not needed.
+  FlushUnneededASanShadowMemory(p, size);
+  // Statistics.
+  AsanStats &thread_stats = GetCurrentThreadStats();
+  thread_stats.munmaps++;
+  thread_stats.munmaped += size;
+}
+
+// We can not use THREADLOCAL because it is not supported on some of the
+// platforms we care about (OSX 10.6, Android).
+// static THREADLOCAL AllocatorCache cache;
+AllocatorCache *GetAllocatorCache(AsanThreadLocalMallocStorage *ms) {
+  CHECK(ms);
+  return &ms->allocator_cache;
+}
+
+QuarantineCache *GetQuarantineCache(AsanThreadLocalMallocStorage *ms) {
+  CHECK(ms);
+  CHECK_LE(sizeof(QuarantineCache), sizeof(ms->quarantine_cache));
+  return reinterpret_cast<QuarantineCache *>(ms->quarantine_cache);
+}
+
+void AllocatorOptions::SetFrom(const Flags *f, const CommonFlags *cf) {
+  quarantine_size_mb = f->quarantine_size_mb;
+  thread_local_quarantine_size_kb = f->thread_local_quarantine_size_kb;
+  min_redzone = f->redzone;
+  max_redzone = f->max_redzone;
+  may_return_null = cf->allocator_may_return_null;
+  alloc_dealloc_mismatch = f->alloc_dealloc_mismatch;
+  release_to_os_interval_ms = cf->allocator_release_to_os_interval_ms;
+}
+
+void AllocatorOptions::CopyTo(Flags *f, CommonFlags *cf) {
+  f->quarantine_size_mb = quarantine_size_mb;
+  f->thread_local_quarantine_size_kb = thread_local_quarantine_size_kb;
+  f->redzone = min_redzone;
+  f->max_redzone = max_redzone;
+  cf->allocator_may_return_null = may_return_null;
+  f->alloc_dealloc_mismatch = alloc_dealloc_mismatch;
+  cf->allocator_release_to_os_interval_ms = release_to_os_interval_ms;
+}
+
+struct HeapInfoBlock {
+  u32 alloc_count;
+  u64 total_access_count, min_access_count, max_access_count;
+  u64 total_size;
+  u32 min_size, max_size;
+  u32 alloc_timestamp, dealloc_timestamp;
+  u64 total_lifetime;
+  u32 min_lifetime, max_lifetime;
+  u64 total_percent_utilized;
+  u32 min_percent_utilized, max_percent_utilized;
+  u32 alloc_cpu_id, dealloc_cpu_id;
+  u32 num_migrated_cpu;
+
+  // Only compared to prior deallocated object unless multiple prev tracked.
+  u32 num_lifetime_overlaps;
+  u32 num_same_alloc_cpu;
+  u32 num_same_dealloc_cpu;
+
+  u64 data_type_id; // hash of type name - is this needed?
+
+  HeapInfoBlock() : alloc_count(0) {}
+
+  HeapInfoBlock(u32 size, u64 access_count, u32 alloc_timestamp, u32 dealloc_timestamp, u32 percent_utilized, u32 alloc_cpu, u32 dealloc_cpu)
+      : alloc_count(1), total_access_count(access_count), min_access_count(access_count), max_access_count(access_count),
+        total_size(size), min_size(size), max_size(size),
+        alloc_timestamp(alloc_timestamp), dealloc_timestamp(dealloc_timestamp),
+        total_lifetime(alloc_timestamp?(dealloc_timestamp - alloc_timestamp):0),
+        min_lifetime(total_lifetime), max_lifetime(total_lifetime),
+        total_percent_utilized(percent_utilized),
+        min_percent_utilized(percent_utilized), max_percent_utilized(percent_utilized),
+        alloc_cpu_id(alloc_cpu), dealloc_cpu_id(dealloc_cpu),
+        num_lifetime_overlaps(0), num_same_alloc_cpu(0), num_same_dealloc_cpu(0) {
+    //fprintf(stderr, "Construct HIB\n");
+#if 0
+    if (alloc_timestamp == 0)
+      Printf("Bad alloc timestamp 0 (allocated before heapprof init)\n");
+#endif
+    num_migrated_cpu = alloc_cpu_id != dealloc_cpu_id;
+  }
+
+  void Print(u64 id) {
+    char buffer[100];
+#define PRINTVERBOSE 0
+#if PRINTVERBOSE
+    sprintf(buffer, "%5.2f", ((float)total_size)/alloc_count);
+    Printf("HeapInfoBlock:\n");
+    Printf("\tallocation stack id = %llu\n", id);
+    Printf("\talloc_count %u, size (ave/min/max) %s / %u / %u\n", alloc_count, buffer, min_size, max_size);
+    sprintf(buffer, "%5.2f", ((float)total_access_count)/alloc_count);
+    Printf("\taccess_count (ave/min/max): %s / %u / %u\n", buffer, min_access_count, max_access_count);
+    sprintf(buffer, "%5.2f", ((float)total_lifetime)/alloc_count);
+    Printf("\tlifetime (ave/min/max): %s / %u / %u\n", buffer, min_lifetime, max_lifetime);
+    sprintf(buffer, "%5.2f", ((float)total_percent_utilized)/alloc_count);
+    Printf("\tpercent_utilized (ave/min/max): %s / %u / %u\n", buffer, min_percent_utilized, max_percent_utilized);
+    Printf("\tnum migrated: %u, num lifetime overlaps: %u, num same alloc cpu: %u, num same dealloc_cpu: %u\n",
+            num_migrated_cpu, num_lifetime_overlaps, num_same_alloc_cpu, num_same_dealloc_cpu);
+#else
+    sprintf(buffer, "%5.2f", ((float)total_size)/alloc_count);
+    Printf("HIB:%llu/%u/%s/%u/%u/", id, alloc_count, buffer, min_size, max_size);
+    sprintf(buffer, "%5.2f", ((float)total_access_count)/alloc_count);
+    Printf("%s/%u/%u/", buffer, min_access_count, max_access_count);
+    sprintf(buffer, "%5.2f", ((float)total_lifetime)/alloc_count);
+    Printf("%s/%u/%u/", buffer, min_lifetime, max_lifetime);
+    sprintf(buffer, "%5.2f", ((float)total_percent_utilized)/alloc_count);
+    Printf("%s/%u/%u/", buffer, min_percent_utilized, max_percent_utilized);
+    Printf("%u/%u/%u/%u\n",
+            num_migrated_cpu, num_lifetime_overlaps, num_same_alloc_cpu, num_same_dealloc_cpu);
+#endif
+  }
+
+  // This is not thread safe!!
+  void Merge(HeapInfoBlock &newHIB) {
+    //fprintf(stderr, "Merge HIB\n");
+    alloc_count += newHIB.alloc_count; // Presumably adds 1
+
+    total_access_count += newHIB.total_access_count;
+    min_access_count = Min(min_access_count, newHIB.min_access_count);
+    max_access_count = Max(max_access_count, newHIB.max_access_count);
+
+    total_size += newHIB.total_size;
+    min_size = Min(min_size, newHIB.min_size);
+    max_size = Max(max_size, newHIB.max_size);
+
+    total_lifetime += newHIB.total_lifetime;
+    min_lifetime = Min(min_lifetime, newHIB.min_lifetime);
+    max_lifetime = Max(max_lifetime, newHIB.max_lifetime);
+
+    total_percent_utilized += newHIB.total_percent_utilized;
+    min_percent_utilized = Min(min_percent_utilized, newHIB.min_percent_utilized);
+    max_percent_utilized = Max(max_percent_utilized, newHIB.max_percent_utilized);
+
+    // We know newHIB was deallocated later, so just need to check if it was
+    // allocated before last one deallocated.
+    num_lifetime_overlaps += newHIB.alloc_timestamp < dealloc_timestamp;
+    alloc_timestamp = newHIB.alloc_timestamp;
+    dealloc_timestamp = newHIB.dealloc_timestamp;
+
+    num_same_alloc_cpu += alloc_cpu_id == newHIB.alloc_cpu_id;
+    num_same_dealloc_cpu += dealloc_cpu_id == newHIB.dealloc_cpu_id;
+    alloc_cpu_id = newHIB.alloc_cpu_id;
+    dealloc_cpu_id = newHIB.dealloc_cpu_id;
+  }
+};
+
+static u32 AccessCount = 0;
+static u32 MissCount = 0;
+
+struct SetEntry {
+  SetEntry() : id(0), HIB() {}
+  bool Empty() { return id == 0; }
+  void Print(bool PrintStack = false) {
+    CHECK(!Empty());
+    HIB.Print(id);
+    if (PrintStack) {
+      Printf("Stack:\n");
+      StackDepotGet(id).Print();
+    }
+  }
+  u64 id;
+  HeapInfoBlock HIB;
+};
+
+struct CacheSet {
+  enum { kSetSize = 4 };
+
+  void PrintAll() {
+    for (int i = 0; i < kSetSize; i++) {
+      if (Entries[i].Empty())
+        continue;
+      Entries[i].Print();
+    }
+  }
+  void insertOrMerge(u64 new_id, HeapInfoBlock &newHIB) {
+    int i = 0;
+    SetEntry* matching_entry = nullptr;
+    for (i = 0; i < kSetSize; i++) {
+      auto id = Entries[i].id;
+      if (id == new_id || !id) {
+        if (id == 0) {
+          Entries[i].id = new_id;
+          Entries[i].HIB = newHIB;
+        } else {
+          Entries[i].HIB.Merge(newHIB);
+        }
+        matching_entry = &Entries[i];
+        break;
+      }
+    }
+
+    AccessCount++;
+    SetAccessCount++;
+
+    // Hit
+    if (i < kSetSize) {
+      // Assuming some id locality, we try to swap the hit entry
+      // with the first one. Not sure if such locality is prevalent.
+      if (i != 0) {
+        auto tmp = Entries[0];
+        Entries[0] = *matching_entry;
+        *matching_entry = tmp;
+      }
+      return;
+    }
+
+    // Miss
+    MissCount++;
+    SetMissCount++;
+
+    // We try to find the entries with the lowest alloc count to be evicted:
+    int min_idx = 0;
+    u64 min_count = Entries[0].HIB.alloc_count;
+    for (i = 1; i < kSetSize; i++) {
+      if (Entries[i].Empty()) {
+        break;
+      }
+      if (Entries[i].HIB.alloc_count < min_count) {
+        min_idx = i;
+        min_count = Entries[i].HIB.alloc_count;
+      }
+    }
+
+#define DOPRINT 1
+#if DOPRINT
+#define PRINTEVICTED 1
+#if PRINTEVICTED
+#if PRINTVERBOSE
+    Printf("Evicted:\n");
+#endif
+    Entries[min_idx].Print(false);
+#endif
+#endif
+
+
+    // Similar to the hit case, put new HIB in first set position.
+    if (min_idx != 0) {
+      Entries[min_idx] = Entries[0];
+    }
+    Entries[0].id = new_id;
+    Entries[0].HIB = newHIB;
+  }
+
+  void PrintMissRate(int i) {
+    char buffer[100];
+    sprintf(buffer, "%5.2f%%", SetAccessCount?SetMissCount*100.0/SetAccessCount:0.0);
+    Printf("Set %d miss rate: %d / %d = %s\n", i, SetMissCount, SetAccessCount, buffer);
+  }
+
+  SetEntry Entries[kSetSize];
+  u32 SetAccessCount = 0;
+  u32 SetMissCount = 0;
+};
+
+struct HeapInfoBlockCache {
+#if 0
+  enum { kCacheSize = 1024*16 };
+  enum { kCacheShift = 14 };
+#endif
+  //enum { kCacheSize = 1024 };
+  // Closest prime number to 2048
+  enum { kCacheSize = 2053 };
+  // Closest prime number to 4096 (4093/4099 are closest)
+  //enum { kCacheSize = 4093 };
+  // Closest prime number to 8192
+  //enum { kCacheSize = 8191 };
+  // Closest prime number to 16384 is 16381
+  //enum { kCacheSize = 16381 };
+  // Closest prime number to 32768 is 32771
+  //enum { kCacheSize = 32771 };
+  // Largest prime number below 1024
+  //enum { kCacheSize = 1021 };
+  // Largest prime number above 1024
+  //enum { kCacheSize = 1031 };
+  //enum { kCacheShift = 10 };
+
+  HeapInfoBlockCache() {
+    //Printf("Construct HIB Cache %p\n", this);
+    OpenProfileIfNeeded();
+  }
+
+  ~HeapInfoBlockCache() {
+    //Printf("Destruct HIB Cache\n");
+    CloseFile(ProfileFile);
+  }
+
+  void insertOrMerge(u64 new_id, HeapInfoBlock &newHIB) {
+    //Printf("insertOrMerge %p:\n", this);
+    OpenProfileIfNeeded();
+    u64 hv = new_id;
+#if 0
+    // stack ids are small sequential numbers in the lower bits,
+    // or'ed with a 2^8 bit "part" shifted left 23 bits.
+    // So doing a simple mod doesn't give a good hash distribution.
+    // Instead move the 2^8 bit part into the upper bits of our
+    // hash and or with mod.
+    // Need to not hard code these shifts, they may vary by platform.
+    u64 part = new_id >> 23;
+    u64 shifted_part = part << (kCacheShift > 8 ? (kCacheShift -/*part is 2^8*/8) : 0);
+    hv |= shifted_part;
+    // Do the mod last in case any of the above logic is wrong shifted_part is
+    // too large for cache
+    hv %= kCacheSize;
+    //fprintf(stderr, "insertOrMerge %x %x %x %x\n", new_id, part, shifted_part, hv);
+#endif
+
+#if 0
+    // Use multiplication method:
+    // h(k) = floor(m * (kA - floor(kA)))
+    // A = (sqrt(5)-1)/2 = 0.6180339887
+    // m = table size, power of 2
+    double kA = new_id*0.6180339887;
+    hv = kCacheSize * (kA - (u32)kA);
+    //fprintf(stderr, "insertOrMerge %x %5.2f %5.2f %x\n", new_id, kA, (kA - (u32)kA), hv);
+#endif
+
+    // Use mod method where kCacheSize is a prime close to power of 2.
+    hv %= kCacheSize;
+
+    return Sets[hv].insertOrMerge(new_id, newHIB);
+  }
+
+  void PrintAll() {
+    for (int i = 0; i < kCacheSize; i++) {
+      Sets[i].PrintAll();
+    }
+  }
+
+  void PrintMissRate() {
+    char buffer[100];
+    sprintf(buffer, "%5.2f%%", AccessCount?MissCount*100.0/AccessCount:0.0);
+    Printf("Overall miss rate: %d / %d = %s\n", MissCount, AccessCount, buffer);
+#define PRINTMISSRATEDETAILS 0
+#if PRINTMISSRATEDETAILS
+    for (int i = 0; i < kCacheSize; i++) {
+      Sets[i].PrintMissRate(i);
+    }
+#endif
+  }
+
+  CacheSet Sets[kCacheSize];
+};
+
+u64 GetShadowCount(uptr p, u32 size, u32 &percent_utilized) {
+  u64 *shadow = (u64 *)MEM_TO_SHADOW(p);
+  u64 count = 0;
+  // MEM_GRANULARITY = 64
+  u32 blocks = (size + MEM_GRANULARITY-1)/MEM_GRANULARITY;
+  u64 *shadow_end = shadow + blocks;
+  u32 utilized = 0;
+  for ( ; shadow < shadow_end; shadow++) {
+    u64 new_count = *shadow;
+    count += new_count;
+    utilized += (new_count != 0);
+  }
+  percent_utilized = (utilized + blocks-1)/blocks * 100;
+  return count;
+}
+
+struct Allocator {
+  static const uptr kMaxAllowedMallocSize =
+      FIRST_32_SECOND_64(3UL << 30, 1ULL << 40);
+
+  AsanAllocator allocator;
+  AsanQuarantine quarantine;
+  StaticSpinMutex fallback_mutex;
+  AllocatorCache fallback_allocator_cache;
+  QuarantineCache fallback_quarantine_cache;
+
+  uptr max_user_defined_malloc_size;
+  atomic_uint8_t rss_limit_exceeded;
+
+  // ------------------- Options --------------------------
+  atomic_uint16_t min_redzone;
+  atomic_uint16_t max_redzone;
+  atomic_uint8_t alloc_dealloc_mismatch;
+
+//#define USEHIB 0 // DEBUGGING
+#define USEHIB 1
+#if USEHIB
+#if USEHIBOLD
+  std::map<u64, HeapInfoBlock> HeapInfoBlockTable;
+#else
+  HeapInfoBlockCache HeapInfoBlockTable;
+#endif
+#endif
+  bool destructing;
+
+  // ------------------- Initialization ------------------------
+  explicit Allocator(LinkerInitialized)
+      : quarantine(LINKER_INITIALIZED),
+        fallback_quarantine_cache(LINKER_INITIALIZED),
+        destructing(false) {
+    //Printf("Construct Allocator\n");
+#if 0
+    error_t err;
+    ProfileFile = OpenFile(ProfileFilenameWithPath, WrOnly, &err);
+    if (ProfileFile == kInvalidFd)
+      Report("Allocator: failed to open %s for writing (reason: %d)\n",
+             ProfileFilenameWithPath, err);
+    __sanitizer_set_report_fd(reinterpret_cast<void*>(ProfileFile));
+#endif
+  }
+
+  ~Allocator() {
+    //Printf("Destruct Allocator\n");
+    FinishAndPrint();
+#if 0
+    CloseFile(ProfileFile);
+#endif
+  }
+
+  void FinishAndPrint() {
+#define NONFREED 1
+#if NONFREED
+    Printf("Live on exit:\n");
+    allocator.ForceLock();
+    allocator.ForEachChunk(
+        [](uptr chunk, void *alloc) {
+#if USEHIB
+          AsanChunk *m = ((Allocator *)alloc)->GetAsanChunk((void *)chunk);
+          //fprintf(stderr, "Chunk at %llx state is %u size %u\n", m, m->chunk_state, m->user_requested_size);
+          //if (m->chunk_state != CHUNK_ALLOCATED)
+          // The size is reset to 0 on deallocation (and a min of 1 on
+          // allocation).
+          if (!m->user_requested_size)
+            return;
+          uptr user_beg = ((uptr)m) + kChunkHeaderSize;
+#if 0
+          if (m->user_requested_size == 80) {
+            fprintf(stderr, "Chunk alloc id %llu size %u mask %llx mask2 %llx\n", m->alloc_context_id, m->user_requested_size, SHADOW_MASK, SHADOW_MASK2);
+            u64 *shadow = (u64 *)MEM_TO_SHADOW(user_beg);
+            // MEM_GRANULARITY = 64
+            u64 *shadow_end = shadow + (m->user_requested_size + MEM_GRANULARITY-1)/MEM_GRANULARITY;
+            for ( ; shadow < shadow_end; shadow++)
+              fprintf(stderr, "Shadow for %llx is %llu\n", shadow, *shadow);
+          }
+#endif
+          //u64 c = *(u64 *)MEM_TO_SHADOW(user_beg);
+          u32 percent_utilized;
+          u64 c = GetShadowCount(user_beg, m->user_requested_size, percent_utilized);
+          long curtime = GetTimestamp();
+          HeapInfoBlock newHIB(m->user_requested_size, c, m->timestamp_ms, curtime, percent_utilized, m->cpu_id, GetCpuId());
+#if USEHIBOLD
+          auto I = ((Allocator *)alloc)->HeapInfoBlockTable.find(m->alloc_context_id);
+          if (I != ((Allocator *)alloc)->HeapInfoBlockTable.end())
+            I->second.Merge(newHIB);
+          else {
+            fprintf(stderr, "Stack:\n");
+            StackDepotGet(m->alloc_context_id).Print();
+            newHIB.Print();
+          }
+#else
+          ((Allocator *)alloc)->HeapInfoBlockTable.insertOrMerge(m->alloc_context_id, newHIB);
+#endif
+
+#if 0
+          // this may do a malloc which will wait on the lock we're holding here
+          auto I = ((Allocator *)alloc)->HeapInfoBlockTable.insert({m->alloc_context_id, newHIB});
+          if (!I.second)
+            I.first->second.Merge(newHIB);
+#endif
+#endif
+        },
+        this);
+    allocator.ForceUnlock();
+#endif
+
+    destructing = true;
+#if USEHIB
+    //Printf("Dump HIBTable:\n");
+#if USEHIBOLD
+    for (auto I : HeapInfoBlockTable) {
+      fprintf(stderr, "Stack:\n");
+      StackDepotGet(I.first).Print();
+      I.second.Print();
+    }
+#else
+    HeapInfoBlockTable.PrintMissRate();
+#if DOPRINT
+    HeapInfoBlockTable.PrintAll();
+    StackDepotPrintAll();
+#endif
+#endif
+#endif
+  }
+
+  void CheckOptions(const AllocatorOptions &options) const {
+    CHECK_GE(options.min_redzone, 16);
+    CHECK_GE(options.max_redzone, options.min_redzone);
+    CHECK_LE(options.max_redzone, 2048);
+    CHECK(IsPowerOfTwo(options.min_redzone));
+    CHECK(IsPowerOfTwo(options.max_redzone));
+  }
+
+  void SharedInitCode(const AllocatorOptions &options) {
+    CheckOptions(options);
+    quarantine.Init((uptr)options.quarantine_size_mb << 20,
+                    (uptr)options.thread_local_quarantine_size_kb << 10);
+    atomic_store(&alloc_dealloc_mismatch, options.alloc_dealloc_mismatch,
+                 memory_order_release);
+    atomic_store(&min_redzone, options.min_redzone, memory_order_release);
+    atomic_store(&max_redzone, options.max_redzone, memory_order_release);
+  }
+
+  void InitLinkerInitialized(const AllocatorOptions &options) {
+    SetAllocatorMayReturnNull(options.may_return_null);
+    allocator.InitLinkerInitialized(options.release_to_os_interval_ms);
+    SharedInitCode(options);
+    max_user_defined_malloc_size = common_flags()->max_allocation_size_mb
+                                       ? common_flags()->max_allocation_size_mb
+                                             << 20
+                                       : kMaxAllowedMallocSize;
+  }
+
+  bool RssLimitExceeded() {
+    return atomic_load(&rss_limit_exceeded, memory_order_relaxed);
+  }
+
+  void SetRssLimitExceeded(bool limit_exceeded) {
+    atomic_store(&rss_limit_exceeded, limit_exceeded, memory_order_relaxed);
+  }
+
+  void RePoisonChunk(uptr chunk) {
+    // This could be a user-facing chunk (with redzones), or some internal
+    // housekeeping chunk, like TransferBatch. Start by assuming the former.
+    AsanChunk *ac = GetAsanChunk((void *)chunk);
+    uptr allocated_size = allocator.GetActuallyAllocatedSize((void *)ac);
+    uptr beg = ac->Beg();
+    uptr end = ac->Beg() + ac->UsedSize(true);
+    uptr chunk_end = chunk + allocated_size;
+    if (chunk < beg && beg < end && end <= chunk_end &&
+        ac->chunk_state == CHUNK_ALLOCATED) {
+      // Looks like a valid AsanChunk in use, poison redzones only.
+      PoisonShadow(chunk, beg - chunk, kAsanHeapLeftRedzoneMagic);
+      uptr end_aligned_down = RoundDownTo(end, SHADOW_GRANULARITY);
+      FastPoisonShadowPartialRightRedzone(
+          end_aligned_down, end - end_aligned_down,
+          chunk_end - end_aligned_down, kAsanHeapLeftRedzoneMagic);
+    } else {
+      // This is either not an AsanChunk or freed or quarantined AsanChunk.
+      // In either case, poison everything.
+      PoisonShadow(chunk, allocated_size, kAsanHeapLeftRedzoneMagic);
+    }
+  }
+
+  void ReInitialize(const AllocatorOptions &options) {
+    SetAllocatorMayReturnNull(options.may_return_null);
+    allocator.SetReleaseToOSIntervalMs(options.release_to_os_interval_ms);
+    SharedInitCode(options);
+
+    // Poison all existing allocation's redzones.
+    if (CanPoisonMemory()) {
+      allocator.ForceLock();
+      allocator.ForEachChunk(
+          [](uptr chunk, void *alloc) {
+            ((Allocator *)alloc)->RePoisonChunk(chunk);
+          },
+          this);
+      allocator.ForceUnlock();
+    }
+  }
+
+  void GetOptions(AllocatorOptions *options) const {
+    options->quarantine_size_mb = quarantine.GetSize() >> 20;
+    options->thread_local_quarantine_size_kb = quarantine.GetCacheSize() >> 10;
+    options->min_redzone = atomic_load(&min_redzone, memory_order_acquire);
+    options->max_redzone = atomic_load(&max_redzone, memory_order_acquire);
+    options->may_return_null = AllocatorMayReturnNull();
+    options->alloc_dealloc_mismatch =
+        atomic_load(&alloc_dealloc_mismatch, memory_order_acquire);
+    options->release_to_os_interval_ms = allocator.ReleaseToOSIntervalMs();
+  }
+
+  // -------------------- Helper methods. -------------------------
+  uptr ComputeRZLog(uptr user_requested_size) {
+    return 0;
+#if 0
+    u32 rz_log =
+      user_requested_size <= 64        - 16   ? 0 :
+      user_requested_size <= 128       - 32   ? 1 :
+      user_requested_size <= 512       - 64   ? 2 :
+      user_requested_size <= 4096      - 128  ? 3 :
+      user_requested_size <= (1 << 14) - 256  ? 4 :
+      user_requested_size <= (1 << 15) - 512  ? 5 :
+      user_requested_size <= (1 << 16) - 1024 ? 6 : 7;
+    u32 min_rz = atomic_load(&min_redzone, memory_order_acquire);
+    u32 max_rz = atomic_load(&max_redzone, memory_order_acquire);
+    return Min(Max(rz_log, RZSize2Log(min_rz)), RZSize2Log(max_rz));
+#endif
+  }
+
+  // We have an address between two chunks, and we want to report just one.
+  AsanChunk *ChooseChunk(uptr addr, AsanChunk *left_chunk,
+                         AsanChunk *right_chunk) {
+    // Prefer an allocated chunk over freed chunk and freed chunk
+    // over available chunk.
+    if (left_chunk->chunk_state != right_chunk->chunk_state) {
+      if (left_chunk->chunk_state == CHUNK_ALLOCATED)
+        return left_chunk;
+      if (right_chunk->chunk_state == CHUNK_ALLOCATED)
+        return right_chunk;
+      if (left_chunk->chunk_state == CHUNK_QUARANTINE)
+        return left_chunk;
+      if (right_chunk->chunk_state == CHUNK_QUARANTINE)
+        return right_chunk;
+    }
+    // Same chunk_state: choose based on offset.
+    sptr l_offset = 0, r_offset = 0;
+    CHECK(AsanChunkView(left_chunk).AddrIsAtRight(addr, 1, &l_offset));
+    CHECK(AsanChunkView(right_chunk).AddrIsAtLeft(addr, 1, &r_offset));
+    if (l_offset < r_offset)
+      return left_chunk;
+    return right_chunk;
+  }
+
+  bool UpdateAllocationStack(uptr addr, BufferedStackTrace *stack) {
+    AsanChunk *m = GetAsanChunkByAddr(addr);
+    if (!m) return false;
+    if (m->chunk_state != CHUNK_ALLOCATED) return false;
+    if (m->Beg() != addr) return false;
+    atomic_store((atomic_uint32_t *)&m->alloc_context_id, StackDepotPut(*stack),
+                 memory_order_relaxed);
+    return true;
+  }
+
+  // -------------------- Allocation/Deallocation routines ---------------
+  void *Allocate(uptr size, uptr alignment, BufferedStackTrace *stack,
+                 AllocType alloc_type, bool can_fill) {
+    if (UNLIKELY(!asan_inited))
+      AsanInitFromRtl();
+    if (RssLimitExceeded()) {
+      if (AllocatorMayReturnNull())
+        return nullptr;
+      ReportRssLimitExceeded(stack);
+    }
+    Flags &fl = *flags();
+    CHECK(stack);
+    const uptr min_alignment = ASAN_ALIGNMENT;
+    if (alignment < min_alignment)
+      alignment = min_alignment;
+    if (size == 0) {
+      // We'd be happy to avoid allocating memory for zero-size requests, but
+      // some programs/tests depend on this behavior and assume that malloc
+      // would not return NULL even for zero-size allocations. Moreover, it
+      // looks like operator new should never return NULL, and results of
+      // consecutive "new" calls must be different even if the allocated size
+      // is zero.
+      size = 1;
+    }
+    CHECK(IsPowerOfTwo(alignment));
+    uptr rz_log = ComputeRZLog(size);
+    //uptr rz_size = Max((uptr)RZLog2Size(rz_log), alignment);
+    uptr rz_size = (uptr)RZLog2Size(rz_log);
+    uptr rounded_size = RoundUpTo(Max(size, kChunkHeader2Size), alignment);
+    uptr needed_size = rounded_size + rz_size;
+    if (alignment > min_alignment)
+      needed_size += alignment;
+    bool using_primary_allocator = true;
+    // If we are allocating from the secondary allocator, there will be no
+    // automatic right redzone, so add the right redzone manually.
+    if (!PrimaryAllocator::CanAllocate(needed_size, alignment)) {
+      needed_size += rz_size;
+      using_primary_allocator = false;
+    }
+    CHECK(IsAligned(needed_size, min_alignment));
+    if (size > kMaxAllowedMallocSize || needed_size > kMaxAllowedMallocSize ||
+        size > max_user_defined_malloc_size) {
+      if (AllocatorMayReturnNull()) {
+        Report("WARNING: AddressSanitizer failed to allocate 0x%zx bytes\n",
+               (void*)size);
+        return nullptr;
+      }
+      uptr malloc_limit =
+          Min(kMaxAllowedMallocSize, max_user_defined_malloc_size);
+      ReportAllocationSizeTooBig(size, needed_size, malloc_limit, stack);
+    }
+
+    AsanThread *t = GetCurrentThread();
+    void *allocated;
+    if (t) {
+      AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage());
+      allocated = allocator.Allocate(cache, needed_size, 8);
+    } else {
+      SpinMutexLock l(&fallback_mutex);
+      AllocatorCache *cache = &fallback_allocator_cache;
+      allocated = allocator.Allocate(cache, needed_size, 8);
+    }
+    if (UNLIKELY(!allocated)) {
+      SetAllocatorOutOfMemory();
+      if (AllocatorMayReturnNull())
+        return nullptr;
+      ReportOutOfMemory(size, stack);
+    }
+
+    if (*(u8 *)MEM_TO_SHADOW((uptr)allocated) == 0 && CanPoisonMemory()) {
+      // Heap poisoning is enabled, but the allocator provides an unpoisoned
+      // chunk. This is possible if CanPoisonMemory() was false for some
+      // time, for example, due to flags()->start_disabled.
+      // Anyway, poison the block before using it for anything else.
+      uptr allocated_size = allocator.GetActuallyAllocatedSize(allocated);
+      PoisonShadow((uptr)allocated, allocated_size, kAsanHeapLeftRedzoneMagic);
+    }
+
+    uptr alloc_beg = reinterpret_cast<uptr>(allocated);
+    uptr alloc_end = alloc_beg + needed_size;
+    uptr beg_plus_redzone = alloc_beg + rz_size;
+    uptr user_beg = beg_plus_redzone;
+    if (!IsAligned(user_beg, alignment))
+      user_beg = RoundUpTo(user_beg, alignment);
+    uptr user_end = user_beg + size;
+    CHECK_LE(user_end, alloc_end);
+    uptr chunk_beg = user_beg - kChunkHeaderSize;
+    AsanChunk *m = reinterpret_cast<AsanChunk *>(chunk_beg);
+    //fprintf(stderr, "Allocate %lx %lx %lu %lu\n", m, chunk_beg, size, alignment);
+    m->from_memalign = user_beg != beg_plus_redzone;
+    if (alloc_beg != chunk_beg) {
+      CHECK_LE(alloc_beg+ 2 * sizeof(uptr), chunk_beg);
+      reinterpret_cast<uptr *>(alloc_beg)[0] = kAllocBegMagic;
+      reinterpret_cast<uptr *>(alloc_beg)[1] = chunk_beg;
+    }
+    if (using_primary_allocator) {
+      CHECK(size);
+      m->user_requested_size = size;
+      CHECK(allocator.FromPrimary(allocated));
+    } else {
+      CHECK(!allocator.FromPrimary(allocated));
+      m->user_requested_size = SizeClassMap::kMaxSize;
+      uptr *meta = reinterpret_cast<uptr *>(allocator.GetMetaData(allocated));
+      meta[0] = size;
+      meta[1] = chunk_beg;
+    }
+
+#if USEHIB
+    m->cpu_id = GetCpuId();
+
+    m->timestamp_ms = GetTimestamp();
+#endif
+
+    m->alloc_context_id = StackDepotPut(*stack);
+
+#if 0
+    if (asan_inited && asan_init_done && !destructing) {
+      fprintf(stderr, "Allocate %llx: alignment = %u, size = %u, rz_log = %u, rz_size = %u, rounded_size = %u, needed_size = %u, cpu_id = %d, timestamp = %d\n", m, alignment, size, rz_log, rz_size, rounded_size, needed_size, m->cpu_id, m->timestamp_ms);
+      fprintf(stderr, "Stack:\n");
+      StackDepotGet(m->alloc_context_id).Print();
+    }
+#endif
+
+    uptr size_rounded_down_to_granularity =
+        RoundDownTo(size, SHADOW_GRANULARITY);
+    // Unpoison the bulk of the memory region.
+    if (size_rounded_down_to_granularity)
+      PoisonShadow(user_beg, size_rounded_down_to_granularity, 0);
+    // Deal with the end of the region if size is not aligned to granularity.
+    if (size != size_rounded_down_to_granularity && CanPoisonMemory()) {
+      u8 *shadow =
+          (u8 *)MemToShadow(user_beg + size_rounded_down_to_granularity);
+      *shadow = fl.poison_partial ? (size & (SHADOW_GRANULARITY - 1)) : 0;
+    }
+
+    AsanStats &thread_stats = GetCurrentThreadStats();
+    thread_stats.mallocs++;
+    thread_stats.malloced += size;
+    thread_stats.malloced_redzones += needed_size - size;
+    if (needed_size > SizeClassMap::kMaxSize)
+      thread_stats.malloc_large++;
+    else
+      thread_stats.malloced_by_size[SizeClassMap::ClassID(needed_size)]++;
+
+    void *res = reinterpret_cast<void *>(user_beg);
+    if (can_fill && fl.max_malloc_fill_size) {
+      uptr fill_size = Min(size, (uptr)fl.max_malloc_fill_size);
+      REAL(memset)(res, fl.malloc_fill_byte, fill_size);
+    }
+    // Must be the last mutation of metadata in this function.
+    atomic_store((atomic_uint8_t *)m, CHUNK_ALLOCATED, memory_order_release);
+    ASAN_MALLOC_HOOK(res, size);
+    return res;
+  }
+
+  void Deallocate(void *ptr, uptr delete_size, uptr delete_alignment,
+                  BufferedStackTrace *stack, AllocType alloc_type) {
+    uptr p = reinterpret_cast<uptr>(ptr);
+    if (p == 0) return;
+
+    uptr chunk_beg = p - kChunkHeaderSize;
+    AsanChunk *m = reinterpret_cast<AsanChunk *>(chunk_beg);
+
+    if (asan_inited && asan_init_done && !destructing) {
+#if USEHIB
+      // Need to add all counts in chunk (i.e. if size >64b)!
+      //u64 c = *(u64 *)MEM_TO_SHADOW(p);
+      u32 percent_utilized;
+      u64 c = GetShadowCount(p, m->user_requested_size, percent_utilized);
+      long curtime = GetTimestamp();
+#if 0
+      long lifetime = curtime - m->timestamp_ms;
+      fprintf(stderr, "Free chunk %llx with access count %llu, size %u lifetime (ms) %d dealloc cpu %d alloc cpu %d\n", m, c, m->user_requested_size,
+              lifetime, GetCpuId(), m->cpu_id);
+      fprintf(stderr, "Stack:\n");
+      StackDepotGet(m->alloc_context_id).Print();
+#endif
+
+      HeapInfoBlock newHIB(m->user_requested_size, c, m->timestamp_ms, curtime, percent_utilized, m->cpu_id, GetCpuId());
+      // Can we make this per-thread?
+      {
+      SpinMutexLock l(&fallback_mutex);
+#if USEHIBOLD
+      auto I = HeapInfoBlockTable.insert({m->alloc_context_id, newHIB});
+      if (!I.second)
+        I.first->second.Merge(newHIB);
+#else
+      HeapInfoBlockTable.insertOrMerge(m->alloc_context_id, newHIB);
+#endif
+      }
+#endif
+    }
+
+    // On Windows, uninstrumented DLLs may allocate memory before ASan hooks
+    // malloc. Don't report an invalid free in this case.
+    if (SANITIZER_WINDOWS &&
+        !get_allocator().PointerIsMine(ptr)) {
+      if (!IsSystemHeapAddress(p))
+        ReportFreeNotMalloced(p, stack);
+      return;
+    }
+
+    AsanStats &thread_stats = GetCurrentThreadStats();
+    thread_stats.frees++;
+    thread_stats.freed += m->UsedSize();
+
+    AsanThread *t = GetCurrentThread();
+    if (t) {
+      AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage());
+      allocator.Deallocate(cache, m->AllocBeg());
+    } else {
+      SpinMutexLock l(&fallback_mutex);
+      AllocatorCache *cache = &fallback_allocator_cache;
+      allocator.Deallocate(cache, m->AllocBeg());
+    }
+
+    m->user_requested_size = 0;
+    atomic_store((atomic_uint8_t*)m, CHUNK_AVAILABLE, memory_order_relaxed);
+    ASAN_FREE_HOOK(ptr);
+  }
+
+  void *Reallocate(void *old_ptr, uptr new_size, BufferedStackTrace *stack) {
+    CHECK(old_ptr && new_size);
+    uptr p = reinterpret_cast<uptr>(old_ptr);
+    uptr chunk_beg = p - kChunkHeaderSize;
+    AsanChunk *m = reinterpret_cast<AsanChunk *>(chunk_beg);
+
+    AsanStats &thread_stats = GetCurrentThreadStats();
+    thread_stats.reallocs++;
+    thread_stats.realloced += new_size;
+
+    void *new_ptr = Allocate(new_size, 8, stack, FROM_MALLOC, true);
+    if (new_ptr) {
+      u8 chunk_state = m->chunk_state;
+      if (chunk_state != CHUNK_ALLOCATED)
+        ReportInvalidFree(old_ptr, chunk_state, stack);
+      CHECK_NE(REAL(memcpy), nullptr);
+      uptr memcpy_size = Min(new_size, m->UsedSize());
+      // If realloc() races with free(), we may start copying freed memory.
+      // However, we will report racy double-free later anyway.
+      REAL(memcpy)(new_ptr, old_ptr, memcpy_size);
+      Deallocate(old_ptr, 0, 0, stack, FROM_MALLOC);
+    }
+    return new_ptr;
+  }
+
+  void *Calloc(uptr nmemb, uptr size, BufferedStackTrace *stack) {
+    if (UNLIKELY(CheckForCallocOverflow(size, nmemb))) {
+      if (AllocatorMayReturnNull())
+        return nullptr;
+      ReportCallocOverflow(nmemb, size, stack);
+    }
+    void *ptr = Allocate(nmemb * size, 8, stack, FROM_MALLOC, false);
+    // If the memory comes from the secondary allocator no need to clear it
+    // as it comes directly from mmap.
+    if (ptr && allocator.FromPrimary(ptr))
+      REAL(memset)(ptr, 0, nmemb * size);
+    return ptr;
+  }
+
+  void ReportInvalidFree(void *ptr, u8 chunk_state, BufferedStackTrace *stack) {
+    if (chunk_state == CHUNK_QUARANTINE)
+      ReportDoubleFree((uptr)ptr, stack);
+    else
+      ReportFreeNotMalloced((uptr)ptr, stack);
+  }
+
+  void CommitBack(AsanThreadLocalMallocStorage *ms, BufferedStackTrace *stack) {
+    AllocatorCache *ac = GetAllocatorCache(ms);
+    quarantine.Drain(GetQuarantineCache(ms), QuarantineCallback(ac, stack));
+    allocator.SwallowCache(ac);
+  }
+
+  // -------------------------- Chunk lookup ----------------------
+
+  // Assumes alloc_beg == allocator.GetBlockBegin(alloc_beg).
+  AsanChunk *GetAsanChunk(void *alloc_beg) {
+    if (!alloc_beg) return nullptr;
+    if (!allocator.FromPrimary(alloc_beg)) {
+      uptr *meta = reinterpret_cast<uptr *>(allocator.GetMetaData(alloc_beg));
+      AsanChunk *m = reinterpret_cast<AsanChunk *>(meta[1]);
+      return m;
+    }
+    uptr *alloc_magic = reinterpret_cast<uptr *>(alloc_beg);
+    if (alloc_magic[0] == kAllocBegMagic)
+      return reinterpret_cast<AsanChunk *>(alloc_magic[1]);
+    return reinterpret_cast<AsanChunk *>(alloc_beg);
+  }
+
+  AsanChunk *GetAsanChunkByAddr(uptr p) {
+    void *alloc_beg = allocator.GetBlockBegin(reinterpret_cast<void *>(p));
+    return GetAsanChunk(alloc_beg);
+  }
+
+  // Allocator must be locked when this function is called.
+  AsanChunk *GetAsanChunkByAddrFastLocked(uptr p) {
+    void *alloc_beg =
+        allocator.GetBlockBeginFastLocked(reinterpret_cast<void *>(p));
+    return GetAsanChunk(alloc_beg);
+  }
+
+  uptr AllocationSize(uptr p) {
+    AsanChunk *m = GetAsanChunkByAddr(p);
+    if (!m) return 0;
+    if (m->chunk_state != CHUNK_ALLOCATED) return 0;
+    if (m->Beg() != p) return 0;
+    return m->UsedSize();
+  }
+
+  AsanChunkView FindHeapChunkByAddress(uptr addr) {
+    AsanChunk *m1 = GetAsanChunkByAddr(addr);
+    if (!m1) return AsanChunkView(m1);
+    sptr offset = 0;
+    if (AsanChunkView(m1).AddrIsAtLeft(addr, 1, &offset)) {
+      // The address is in the chunk's left redzone, so maybe it is actually
+      // a right buffer overflow from the other chunk to the left.
+      // Search a bit to the left to see if there is another chunk.
+      AsanChunk *m2 = nullptr;
+      for (uptr l = 1; l < GetPageSizeCached(); l++) {
+        m2 = GetAsanChunkByAddr(addr - l);
+        if (m2 == m1) continue;  // Still the same chunk.
+        break;
+      }
+      if (m2 && AsanChunkView(m2).AddrIsAtRight(addr, 1, &offset))
+        m1 = ChooseChunk(addr, m2, m1);
+    }
+    return AsanChunkView(m1);
+  }
+
+  void Purge(BufferedStackTrace *stack) {
+    AsanThread *t = GetCurrentThread();
+    if (t) {
+      AsanThreadLocalMallocStorage *ms = &t->malloc_storage();
+      quarantine.DrainAndRecycle(GetQuarantineCache(ms),
+                                 QuarantineCallback(GetAllocatorCache(ms),
+                                                    stack));
+    }
+    {
+      SpinMutexLock l(&fallback_mutex);
+      quarantine.DrainAndRecycle(&fallback_quarantine_cache,
+                                 QuarantineCallback(&fallback_allocator_cache,
+                                                    stack));
+    }
+
+    allocator.ForceReleaseToOS();
+  }
+
+  void PrintStats() {
+    allocator.PrintStats();
+    quarantine.PrintStats();
+  }
+
+  void ForceLock() {
+    allocator.ForceLock();
+    fallback_mutex.Lock();
+  }
+
+  void ForceUnlock() {
+    fallback_mutex.Unlock();
+    allocator.ForceUnlock();
+  }
+};
+
+static Allocator instance(LINKER_INITIALIZED);
+
+extern "C" int __llvm_profile_dump() {
+  instance.FinishAndPrint();
+  return 0;
+}
+
+static AsanAllocator &get_allocator() {
+  return instance.allocator;
+}
+
+bool AsanChunkView::IsValid() const {
+  return chunk_ && chunk_->chunk_state != CHUNK_AVAILABLE;
+}
+bool AsanChunkView::IsAllocated() const {
+  return chunk_ && chunk_->chunk_state == CHUNK_ALLOCATED;
+}
+bool AsanChunkView::IsQuarantined() const {
+  return chunk_ && chunk_->chunk_state == CHUNK_QUARANTINE;
+}
+uptr AsanChunkView::Beg() const { return chunk_->Beg(); }
+uptr AsanChunkView::End() const { return Beg() + UsedSize(); }
+uptr AsanChunkView::UsedSize() const { return chunk_->UsedSize(); }
+u32 AsanChunkView::UserRequestedAlignment() const {
+  return 0;
+}
+uptr AsanChunkView::AllocTid() const { return 0; }
+uptr AsanChunkView::FreeTid() const { return 0; }
+AllocType AsanChunkView::GetAllocType() const {
+  return FROM_MALLOC;
+}
+
+static StackTrace GetStackTraceFromId(u32 id) {
+  CHECK(id);
+  StackTrace res = StackDepotGet(id);
+  CHECK(res.trace);
+  return res;
+}
+
+u32 AsanChunkView::GetAllocStackId() const { return chunk_->alloc_context_id; }
+u32 AsanChunkView::GetFreeStackId() const { return chunk_->free_context_id; }
+
+StackTrace AsanChunkView::GetAllocStack() const {
+  return GetStackTraceFromId(GetAllocStackId());
+}
+
+StackTrace AsanChunkView::GetFreeStack() const {
+  return GetStackTraceFromId(GetFreeStackId());
+}
+
+void InitializeAllocator(const AllocatorOptions &options) {
+  instance.InitLinkerInitialized(options);
+}
+
+void ReInitializeAllocator(const AllocatorOptions &options) {
+  instance.ReInitialize(options);
+}
+
+void GetAllocatorOptions(AllocatorOptions *options) {
+  instance.GetOptions(options);
+}
+
+AsanChunkView FindHeapChunkByAddress(uptr addr) {
+  return instance.FindHeapChunkByAddress(addr);
+}
+AsanChunkView FindHeapChunkByAllocBeg(uptr addr) {
+  return AsanChunkView(instance.GetAsanChunk(reinterpret_cast<void*>(addr)));
+}
+
+void AsanThreadLocalMallocStorage::CommitBack() {
+  GET_STACK_TRACE_MALLOC;
+  instance.CommitBack(this, &stack);
+}
+
+void PrintInternalAllocatorStats() {
+  instance.PrintStats();
+}
+
+void asan_free(void *ptr, BufferedStackTrace *stack, AllocType alloc_type) {
+  instance.Deallocate(ptr, 0, 0, stack, alloc_type);
+}
+
+void asan_delete(void *ptr, uptr size, uptr alignment,
+                 BufferedStackTrace *stack, AllocType alloc_type) {
+  instance.Deallocate(ptr, size, alignment, stack, alloc_type);
+}
+
+void *asan_malloc(uptr size, BufferedStackTrace *stack) {
+  return SetErrnoOnNull(instance.Allocate(size, 8, stack, FROM_MALLOC, true));
+}
+
+void *asan_calloc(uptr nmemb, uptr size, BufferedStackTrace *stack) {
+  return SetErrnoOnNull(instance.Calloc(nmemb, size, stack));
+}
+
+void *asan_reallocarray(void *p, uptr nmemb, uptr size,
+                        BufferedStackTrace *stack) {
+  if (UNLIKELY(CheckForCallocOverflow(size, nmemb))) {
+    errno = errno_ENOMEM;
+    if (AllocatorMayReturnNull())
+      return nullptr;
+    ReportReallocArrayOverflow(nmemb, size, stack);
+  }
+  return asan_realloc(p, nmemb * size, stack);
+}
+
+void *asan_realloc(void *p, uptr size, BufferedStackTrace *stack) {
+  if (!p)
+    return SetErrnoOnNull(instance.Allocate(size, 8, stack, FROM_MALLOC, true));
+  if (size == 0) {
+    if (flags()->allocator_frees_and_returns_null_on_realloc_zero) {
+      instance.Deallocate(p, 0, 0, stack, FROM_MALLOC);
+      return nullptr;
+    }
+    // Allocate a size of 1 if we shouldn't free() on Realloc to 0
+    size = 1;
+  }
+  return SetErrnoOnNull(instance.Reallocate(p, size, stack));
+}
+
+void *asan_valloc(uptr size, BufferedStackTrace *stack) {
+  return SetErrnoOnNull(
+      instance.Allocate(size, GetPageSizeCached(), stack, FROM_MALLOC, true));
+}
+
+void *asan_pvalloc(uptr size, BufferedStackTrace *stack) {
+  uptr PageSize = GetPageSizeCached();
+  if (UNLIKELY(CheckForPvallocOverflow(size, PageSize))) {
+    errno = errno_ENOMEM;
+    if (AllocatorMayReturnNull())
+      return nullptr;
+    ReportPvallocOverflow(size, stack);
+  }
+  // pvalloc(0) should allocate one page.
+  size = size ? RoundUpTo(size, PageSize) : PageSize;
+  return SetErrnoOnNull(
+      instance.Allocate(size, PageSize, stack, FROM_MALLOC, true));
+}
+
+void *asan_memalign(uptr alignment, uptr size, BufferedStackTrace *stack,
+                    AllocType alloc_type) {
+  if (UNLIKELY(!IsPowerOfTwo(alignment))) {
+    errno = errno_EINVAL;
+    if (AllocatorMayReturnNull())
+      return nullptr;
+    ReportInvalidAllocationAlignment(alignment, stack);
+  }
+  return SetErrnoOnNull(
+      instance.Allocate(size, alignment, stack, alloc_type, true));
+}
+
+void *asan_aligned_alloc(uptr alignment, uptr size, BufferedStackTrace *stack) {
+  if (UNLIKELY(!CheckAlignedAllocAlignmentAndSize(alignment, size))) {
+    errno = errno_EINVAL;
+    if (AllocatorMayReturnNull())
+      return nullptr;
+    ReportInvalidAlignedAllocAlignment(size, alignment, stack);
+  }
+  return SetErrnoOnNull(
+      instance.Allocate(size, alignment, stack, FROM_MALLOC, true));
+}
+
+int asan_posix_memalign(void **memptr, uptr alignment, uptr size,
+                        BufferedStackTrace *stack) {
+  if (UNLIKELY(!CheckPosixMemalignAlignment(alignment))) {
+    if (AllocatorMayReturnNull())
+      return errno_EINVAL;
+    ReportInvalidPosixMemalignAlignment(alignment, stack);
+  }
+  void *ptr = instance.Allocate(size, alignment, stack, FROM_MALLOC, true);
+  if (UNLIKELY(!ptr))
+    // OOM error is already taken care of by Allocate.
+    return errno_ENOMEM;
+  CHECK(IsAligned((uptr)ptr, alignment));
+  *memptr = ptr;
+  return 0;
+}
+
+uptr asan_malloc_usable_size(const void *ptr, uptr pc, uptr bp) {
+  if (!ptr) return 0;
+  uptr usable_size = instance.AllocationSize(reinterpret_cast<uptr>(ptr));
+  if (flags()->check_malloc_usable_size && (usable_size == 0)) {
+    GET_STACK_TRACE_FATAL(pc, bp);
+    ReportMallocUsableSizeNotOwned((uptr)ptr, &stack);
+  }
+  return usable_size;
+}
+
+uptr asan_mz_size(const void *ptr) {
+  return instance.AllocationSize(reinterpret_cast<uptr>(ptr));
+}
+
+void asan_mz_force_lock() {
+  instance.ForceLock();
+}
+
+void asan_mz_force_unlock() {
+  instance.ForceUnlock();
+}
+
+void AsanSoftRssLimitExceededCallback(bool limit_exceeded) {
+  instance.SetRssLimitExceeded(limit_exceeded);
+}
+
+} // namespace __asan
+
+// --- Implementation of LSan-specific functions --- {{{1
+namespace __lsan {
+void LockAllocator() {
+  __asan::get_allocator().ForceLock();
+}
+
+void UnlockAllocator() {
+  __asan::get_allocator().ForceUnlock();
+}
+
+void GetAllocatorGlobalRange(uptr *begin, uptr *end) {
+  *begin = (uptr)&__asan::get_allocator();
+  *end = *begin + sizeof(__asan::get_allocator());
+}
+
+uptr PointsIntoChunk(void* p) {
+  uptr addr = reinterpret_cast<uptr>(p);
+  __asan::AsanChunk *m = __asan::instance.GetAsanChunkByAddrFastLocked(addr);
+  if (!m) return 0;
+  uptr chunk = m->Beg();
+  if (m->chunk_state != __asan::CHUNK_ALLOCATED)
+    return 0;
+  if (m->AddrIsInside(addr, /*locked_version=*/true))
+    return chunk;
+  if (IsSpecialCaseOfOperatorNew0(chunk, m->UsedSize(/*locked_version*/ true),
+                                  addr))
+    return chunk;
+  return 0;
+}
+
+// Debug code. Delete once issue #1193 is chased down.
+extern "C" SANITIZER_WEAK_ATTRIBUTE const char *__lsan_current_stage;
+
+uptr GetUserBegin(uptr chunk) {
+  __asan::AsanChunk *m = __asan::instance.GetAsanChunkByAddrFastLocked(chunk);
+  if (!m)
+    Printf(
+        "ASAN is about to crash with a CHECK failure.\n"
+        "The ASAN developers are trying to chase down this bug,\n"
+        "so if you've encountered this bug please let us know.\n"
+        "See also: https://github.com/google/sanitizers/issues/1193\n"
+        "chunk: %p caller %p __lsan_current_stage %s\n",
+        chunk, GET_CALLER_PC(), __lsan_current_stage);
+  CHECK(m);
+  return m->Beg();
+}
+
+LsanMetadata::LsanMetadata(uptr chunk) {
+  metadata_ = reinterpret_cast<void *>(chunk - __asan::kChunkHeaderSize);
+}
+
+bool LsanMetadata::allocated() const {
+  __asan::AsanChunk *m = reinterpret_cast<__asan::AsanChunk *>(metadata_);
+  return m->chunk_state == __asan::CHUNK_ALLOCATED;
+}
+
+ChunkTag LsanMetadata::tag() const {
+  return ChunkTag::kIgnored;
+}
+
+void LsanMetadata::set_tag(ChunkTag value) {
+}
+
+uptr LsanMetadata::requested_size() const {
+  __asan::AsanChunk *m = reinterpret_cast<__asan::AsanChunk *>(metadata_);
+  return m->UsedSize(/*locked_version=*/true);
+}
+
+u32 LsanMetadata::stack_trace_id() const {
+  __asan::AsanChunk *m = reinterpret_cast<__asan::AsanChunk *>(metadata_);
+  return m->alloc_context_id;
+}
+
+void ForEachChunk(ForEachChunkCallback callback, void *arg) {
+  __asan::get_allocator().ForEachChunk(callback, arg);
+}
+
+IgnoreObjectResult IgnoreObjectLocked(const void *p) {
+  uptr addr = reinterpret_cast<uptr>(p);
+  __asan::AsanChunk *m = __asan::instance.GetAsanChunkByAddr(addr);
+  if (!m) return kIgnoreObjectInvalid;
+  if ((m->chunk_state == __asan::CHUNK_ALLOCATED) && m->AddrIsInside(addr)) {
+    return kIgnoreObjectSuccess;
+  } else {
+    return kIgnoreObjectInvalid;
+  }
+}
+}  // namespace __lsan
+
+// ---------------------- Interface ---------------- {{{1
+using namespace __asan;
+
+// ASan allocator doesn't reserve extra bytes, so normally we would
+// just return "size". We don't want to expose our redzone sizes, etc here.
+uptr __sanitizer_get_estimated_allocated_size(uptr size) {
+  return size;
+}
+
+int __sanitizer_get_ownership(const void *p) {
+  uptr ptr = reinterpret_cast<uptr>(p);
+  return instance.AllocationSize(ptr) > 0;
+}
+
+uptr __sanitizer_get_allocated_size(const void *p) {
+  if (!p) return 0;
+  uptr ptr = reinterpret_cast<uptr>(p);
+  uptr allocated_size = instance.AllocationSize(ptr);
+  // Die if p is not malloced or if it is already freed.
+  if (allocated_size == 0) {
+    GET_STACK_TRACE_FATAL_HERE;
+    ReportSanitizerGetAllocatedSizeNotOwned(ptr, &stack);
+  }
+  return allocated_size;
+}
+
+void __sanitizer_purge_allocator() {
+  GET_STACK_TRACE_MALLOC;
+  instance.Purge(&stack);
+}
+
+int __asan_update_allocation_context(void* addr) {
+  GET_STACK_TRACE_MALLOC;
+  return instance.UpdateAllocationStack((uptr)addr, &stack);
+}
+
+#if !SANITIZER_SUPPORTS_WEAK_HOOKS
+// Provide default (no-op) implementation of malloc hooks.
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_malloc_hook,
+                             void *ptr, uptr size) {
+  (void)ptr;
+  (void)size;
+}
+
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_free_hook, void *ptr) {
+  (void)ptr;
+}
+#endif
diff --git a/compiler-rt/lib/hwasan/hwasan_linux.cpp.sv b/compiler-rt/lib/hwasan/hwasan_linux.cpp.sv
new file mode 100644
--- /dev/null
+++ b/compiler-rt/lib/hwasan/hwasan_linux.cpp.sv
@@ -0,0 +1,441 @@
+//===-- hwasan_linux.cpp ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file is a part of HWAddressSanitizer and contains Linux-, NetBSD- and
+/// FreeBSD-specific code.
+///
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_platform.h"
+#if SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_NETBSD
+
+#include "hwasan.h"
+#include "hwasan_dynamic_shadow.h"
+#include "hwasan_interface_internal.h"
+#include "hwasan_mapping.h"
+#include "hwasan_report.h"
+#include "hwasan_thread.h"
+#include "hwasan_thread_list.h"
+
+#include <dlfcn.h>
+#include <elf.h>
+#include <link.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <unwind.h>
+#include <sys/prctl.h>
+#include <errno.h>
+
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_procmaps.h"
+
+// Configurations of HWASAN_WITH_INTERCEPTORS and SANITIZER_ANDROID.
+//
+// HWASAN_WITH_INTERCEPTORS=OFF, SANITIZER_ANDROID=OFF
+//   Not currently tested.
+// HWASAN_WITH_INTERCEPTORS=OFF, SANITIZER_ANDROID=ON
+//   Integration tests downstream exist.
+// HWASAN_WITH_INTERCEPTORS=ON, SANITIZER_ANDROID=OFF
+//    Tested with check-hwasan on x86_64-linux.
+// HWASAN_WITH_INTERCEPTORS=ON, SANITIZER_ANDROID=ON
+//    Tested with check-hwasan on aarch64-linux-android.
+#if !SANITIZER_ANDROID
+SANITIZER_INTERFACE_ATTRIBUTE
+THREADLOCAL uptr __hwasan_tls;
+#endif
+
+namespace __hwasan {
+
+static void ProtectGap(uptr addr, uptr size, const char *name) {
+  __sanitizer::ProtectGap(addr, size, kZeroBaseShadowStart,
+                          kZeroBaseMaxShadowStart, name);
+}
+
+static uptr kLowMemStart;
+static uptr kLowMemEnd;
+static uptr kLowShadowEnd;
+static uptr kLowShadowStart;
+static uptr kHighShadowStart;
+static uptr kHighShadowEnd;
+static uptr kHighMemStart;
+static uptr kHighMemEnd;
+
+static void PrintRange(uptr start, uptr end, const char *name) {
+  Printf("|| [%p, %p] || %.*s ||\n", (void *)start, (void *)end, 10, name);
+}
+
+static void PrintAddressSpaceLayout() {
+  PrintRange(kHighMemStart, kHighMemEnd, "HighMem");
+  if (kHighShadowEnd + 1 < kHighMemStart)
+    PrintRange(kHighShadowEnd + 1, kHighMemStart - 1, "ShadowGap");
+  else
+    CHECK_EQ(kHighShadowEnd + 1, kHighMemStart);
+  PrintRange(kHighShadowStart, kHighShadowEnd, "HighShadow");
+  if (kLowShadowEnd + 1 < kHighShadowStart)
+    PrintRange(kLowShadowEnd + 1, kHighShadowStart - 1, "ShadowGap");
+  else
+    CHECK_EQ(kLowMemEnd + 1, kHighShadowStart);
+  PrintRange(kLowShadowStart, kLowShadowEnd, "LowShadow");
+  if (kLowMemEnd + 1 < kLowShadowStart)
+    PrintRange(kLowMemEnd + 1, kLowShadowStart - 1, "ShadowGap");
+  else
+    CHECK_EQ(kLowMemEnd + 1, kLowShadowStart);
+  PrintRange(kLowMemStart, kLowMemEnd, "LowMem");
+  CHECK_EQ(0, kLowMemStart);
+}
+
+static void InitializeShadowBaseAddress(uptr shadow_size_bytes) {
+  __hwasan_shadow_memory_dynamic_address =
+      FindDynamicShadowStart(shadow_size_bytes);
+}
+
+void InitPrctl() {
+#define PR_SET_TAGGED_ADDR_CTRL 55
+#define PR_GET_TAGGED_ADDR_CTRL 56
+#define PR_TAGGED_ADDR_ENABLE (1UL << 0)
+  // Check we're running on a kernel that can use the tagged address ABI.
+  if (internal_prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0) == (uptr)-1 &&
+      errno == EINVAL) {
+#if SANITIZER_ANDROID
+    // Some older Android kernels have the tagged pointer ABI on
+    // unconditionally, and hence don't have the tagged-addr prctl while still
+    // allow the ABI.
+    // If targeting Android and the prctl is not around we assume this is the
+    // case.
+    return;
+#else
+    Printf(
+        "FATAL: "
+        "HWAddressSanitizer requires a kernel with tagged address ABI.\n");
+    Die();
+#endif
+  }
+
+  // Turn on the tagged address ABI.
+  if (internal_prctl(PR_SET_TAGGED_ADDR_CTRL, PR_TAGGED_ADDR_ENABLE, 0, 0, 0) ==
+          (uptr)-1 ||
+      !internal_prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0)) {
+    Printf(
+        "FATAL: HWAddressSanitizer failed to enable tagged address syscall "
+        "ABI.\nSuggest check `sysctl abi.tagged_addr_disabled` "
+        "configuration.\n");
+    Die();
+  }
+#undef PR_SET_TAGGED_ADDR_CTRL
+#undef PR_GET_TAGGED_ADDR_CTRL
+#undef PR_TAGGED_ADDR_ENABLE
+}
+
+bool InitShadow() {
+  // Define the entire memory range.
+  kHighMemEnd = GetHighMemEnd(kShadowScale);
+
+  // Determine shadow memory base offset.
+  InitializeShadowBaseAddress(MemToShadowSize(kHighMemEnd));
+
+  // Place the low memory first.
+  kLowMemEnd = __hwasan_shadow_memory_dynamic_address - 1;
+  kLowMemStart = 0;
+
+  // Define the low shadow based on the already placed low memory.
+  kLowShadowEnd = MemToShadow(kLowMemEnd);
+  kLowShadowStart = __hwasan_shadow_memory_dynamic_address;
+
+  // High shadow takes whatever memory is left up there (making sure it is not
+  // interfering with low memory in the fixed case).
+  kHighShadowEnd = MemToShadow(kHighMemEnd);
+  kHighShadowStart = Max(kLowMemEnd, MemToShadow(kHighShadowEnd)) + 1;
+
+  // High memory starts where allocated shadow allows.
+  kHighMemStart = ShadowToMem(kHighShadowStart);
+
+  // Check the sanity of the defined memory ranges (there might be gaps).
+  CHECK_EQ(kHighMemStart % GetMmapGranularity(), 0);
+  CHECK_GT(kHighMemStart, kHighShadowEnd);
+  CHECK_GT(kHighShadowEnd, kHighShadowStart);
+  CHECK_GT(kHighShadowStart, kLowMemEnd);
+  CHECK_GT(kLowMemEnd, kLowMemStart);
+  CHECK_GT(kLowShadowEnd, kLowShadowStart);
+  CHECK_GT(kLowShadowStart, kLowMemEnd);
+
+  if (Verbosity())
+    PrintAddressSpaceLayout();
+
+  // Reserve shadow memory.
+  ReserveShadowMemoryRange(kLowShadowStart, kLowShadowEnd, "low shadow");
+  ReserveShadowMemoryRange(kHighShadowStart, kHighShadowEnd, "high shadow");
+
+  // Protect all the gaps.
+  ProtectGap(0, Min(kLowMemStart, kLowShadowStart), "below low shadow/mem");
+  if (kLowMemEnd + 1 < kLowShadowStart)
+    ProtectGap(kLowMemEnd + 1, kLowShadowStart - kLowMemEnd - 1, "low mem to shadow");
+  if (kLowShadowEnd + 1 < kHighShadowStart)
+    ProtectGap(kLowShadowEnd + 1, kHighShadowStart - kLowShadowEnd - 1, "low shadow to high shadow");
+  if (kHighShadowEnd + 1 < kHighMemStart)
+    ProtectGap(kHighShadowEnd + 1, kHighMemStart - kHighShadowEnd - 1, "above high shadow");
+
+  return true;
+}
+
+void InitThreads() {
+  CHECK(__hwasan_shadow_memory_dynamic_address);
+  uptr guard_page_size = GetMmapGranularity();
+  uptr thread_space_start =
+      __hwasan_shadow_memory_dynamic_address - (1ULL << kShadowBaseAlignment);
+  uptr thread_space_end =
+      __hwasan_shadow_memory_dynamic_address - guard_page_size;
+  ReserveShadowMemoryRange(thread_space_start, thread_space_end - 1,
+                           "hwasan threads", /*madvise_shadow*/ false);
+  ProtectGap(thread_space_end,
+             __hwasan_shadow_memory_dynamic_address - thread_space_end, "hwasan threads");
+  InitThreadList(thread_space_start, thread_space_end - thread_space_start);
+}
+
+bool MemIsApp(uptr p) {
+  CHECK(GetTagFromPointer(p) == 0);
+  return p >= kHighMemStart || (p >= kLowMemStart && p <= kLowMemEnd);
+}
+
+static void HwasanAtExit(void) {
+  if (common_flags()->print_module_map)
+    DumpProcessMap();
+  if (flags()->print_stats && (flags()->atexit || hwasan_report_count > 0))
+    ReportStats();
+  if (hwasan_report_count > 0) {
+    // ReportAtExitStatistics();
+    if (common_flags()->exitcode)
+      internal__exit(common_flags()->exitcode);
+  }
+}
+
+void InstallAtExitHandler() {
+  atexit(HwasanAtExit);
+}
+
+// ---------------------- TSD ---------------- {{{1
+
+extern "C" void __hwasan_thread_enter() {
+  hwasanThreadList().CreateCurrentThread()->InitRandomState();
+}
+
+extern "C" void __hwasan_thread_exit() {
+  Thread *t = GetCurrentThread();
+  // Make sure that signal handler can not see a stale current thread pointer.
+  atomic_signal_fence(memory_order_seq_cst);
+  if (t)
+    hwasanThreadList().ReleaseThread(t);
+}
+
+#if HWASAN_WITH_INTERCEPTORS
+static pthread_key_t tsd_key;
+static bool tsd_key_inited = false;
+
+void HwasanTSDThreadInit() {
+  if (tsd_key_inited)
+    CHECK_EQ(0, pthread_setspecific(tsd_key,
+                                    (void *)GetPthreadDestructorIterations()));
+}
+
+void HwasanTSDDtor(void *tsd) {
+  uptr iterations = (uptr)tsd;
+  if (iterations > 1) {
+    CHECK_EQ(0, pthread_setspecific(tsd_key, (void *)(iterations - 1)));
+    return;
+  }
+  __hwasan_thread_exit();
+}
+
+void HwasanTSDInit() {
+  CHECK(!tsd_key_inited);
+  tsd_key_inited = true;
+  CHECK_EQ(0, pthread_key_create(&tsd_key, HwasanTSDDtor));
+}
+#else
+void HwasanTSDInit() {}
+void HwasanTSDThreadInit() {}
+#endif
+
+#if SANITIZER_ANDROID
+uptr *GetCurrentThreadLongPtr() {
+  return (uptr *)get_android_tls_ptr();
+}
+#else
+uptr *GetCurrentThreadLongPtr() {
+  return &__hwasan_tls;
+}
+#endif
+
+#if SANITIZER_ANDROID
+void AndroidTestTlsSlot() {
+  uptr kMagicValue = 0x010203040A0B0C0D;
+  uptr *tls_ptr = GetCurrentThreadLongPtr();
+  uptr old_value = *tls_ptr;
+  *tls_ptr = kMagicValue;
+  dlerror();
+  if (*(uptr *)get_android_tls_ptr() != kMagicValue) {
+    Printf(
+        "ERROR: Incompatible version of Android: TLS_SLOT_SANITIZER(6) is used "
+        "for dlerror().\n");
+    Die();
+  }
+  *tls_ptr = old_value;
+}
+#else
+void AndroidTestTlsSlot() {}
+#endif
+
+Thread *GetCurrentThread() {
+  uptr *ThreadLongPtr = GetCurrentThreadLongPtr();
+  if (UNLIKELY(*ThreadLongPtr == 0))
+    return nullptr;
+  auto *R = (StackAllocationsRingBuffer *)ThreadLongPtr;
+  return hwasanThreadList().GetThreadByBufferAddress((uptr)R->Next());
+}
+
+struct AccessInfo {
+  uptr addr;
+  uptr size;
+  bool is_store;
+  bool is_load;
+  bool recover;
+};
+
+static AccessInfo GetAccessInfo(siginfo_t *info, ucontext_t *uc) {
+  // Access type is passed in a platform dependent way (see below) and encoded
+  // as 0xXY, where X&1 is 1 for store, 0 for load, and X&2 is 1 if the error is
+  // recoverable. Valid values of Y are 0 to 4, which are interpreted as
+  // log2(access_size), and 0xF, which means that access size is passed via
+  // platform dependent register (see below).
+#if defined(__aarch64__)
+  // Access type is encoded in BRK immediate as 0x900 + 0xXY. For Y == 0xF,
+  // access size is stored in X1 register. Access address is always in X0
+  // register.
+  uptr pc = (uptr)info->si_addr;
+  const unsigned code = ((*(u32 *)pc) >> 5) & 0xffff;
+  if ((code & 0xff00) != 0x900)
+    return AccessInfo{}; // Not ours.
+
+  const bool is_store = code & 0x10;
+  const bool recover = code & 0x20;
+  const uptr addr = uc->uc_mcontext.regs[0];
+  const unsigned size_log = code & 0xf;
+  if (size_log > 4 && size_log != 0xf)
+    return AccessInfo{}; // Not ours.
+  const uptr size = size_log == 0xf ? uc->uc_mcontext.regs[1] : 1U << size_log;
+
+#elif defined(__x86_64__)
+  // Access type is encoded in the instruction following INT3 as
+  // NOP DWORD ptr [EAX + 0x40 + 0xXY]. For Y == 0xF, access size is stored in
+  // RSI register. Access address is always in RDI register.
+  uptr pc = (uptr)uc->uc_mcontext.gregs[REG_RIP];
+  uint8_t *nop = (uint8_t*)pc;
+  if (*nop != 0x0f || *(nop + 1) != 0x1f || *(nop + 2) != 0x40  ||
+      *(nop + 3) < 0x40)
+    return AccessInfo{}; // Not ours.
+  const unsigned code = *(nop + 3);
+
+  const bool is_store = code & 0x10;
+  const bool recover = code & 0x20;
+  const uptr addr = uc->uc_mcontext.gregs[REG_RDI];
+  const unsigned size_log = code & 0xf;
+  if (size_log > 4 && size_log != 0xf)
+    return AccessInfo{}; // Not ours.
+  const uptr size =
+      size_log == 0xf ? uc->uc_mcontext.gregs[REG_RSI] : 1U << size_log;
+
+#else
+# error Unsupported architecture
+#endif
+
+  return AccessInfo{addr, size, is_store, !is_store, recover};
+}
+
+static void HandleTagMismatch(AccessInfo ai, uptr pc, uptr frame,
+                              ucontext_t *uc, uptr *registers_frame = nullptr) {
+  InternalMmapVector<BufferedStackTrace> stack_buffer(1);
+  BufferedStackTrace *stack = stack_buffer.data();
+  stack->Reset();
+  stack->Unwind(pc, frame, uc, common_flags()->fast_unwind_on_fatal);
+
+  // The second stack frame contains the failure __hwasan_check function, as
+  // we have a stack frame for the registers saved in __hwasan_tag_mismatch that
+  // we wish to ignore. This (currently) only occurs on AArch64, as x64
+  // implementations use SIGTRAP to implement the failure, and thus do not go
+  // through the stack saver.
+  if (registers_frame && stack->trace && stack->size > 0) {
+    stack->trace++;
+    stack->size--;
+  }
+
+  bool fatal = flags()->halt_on_error || !ai.recover;
+  ReportTagMismatch(stack, ai.addr, ai.size, ai.is_store, fatal,
+                    registers_frame);
+}
+
+static bool HwasanOnSIGTRAP(int signo, siginfo_t *info, ucontext_t *uc) {
+  AccessInfo ai = GetAccessInfo(info, uc);
+  if (!ai.is_store && !ai.is_load)
+    return false;
+
+  SignalContext sig{info, uc};
+  HandleTagMismatch(ai, StackTrace::GetNextInstructionPc(sig.pc), sig.bp, uc);
+
+#if defined(__aarch64__)
+  uc->uc_mcontext.pc += 4;
+#elif defined(__x86_64__)
+#else
+# error Unsupported architecture
+#endif
+  return true;
+}
+
+static void OnStackUnwind(const SignalContext &sig, const void *,
+                          BufferedStackTrace *stack) {
+  stack->Unwind(StackTrace::GetNextInstructionPc(sig.pc), sig.bp, sig.context,
+                common_flags()->fast_unwind_on_fatal);
+}
+
+void HwasanOnDeadlySignal(int signo, void *info, void *context) {
+  // Probably a tag mismatch.
+  if (signo == SIGTRAP)
+    if (HwasanOnSIGTRAP(signo, (siginfo_t *)info, (ucontext_t*)context))
+      return;
+
+  HandleDeadlySignal(info, context, GetTid(), &OnStackUnwind, nullptr);
+}
+
+
+} // namespace __hwasan
+
+// Entry point for interoperability between __hwasan_tag_mismatch (ASM) and the
+// rest of the mismatch handling code (C++).
+void __hwasan_tag_mismatch4(uptr addr, uptr access_info, uptr *registers_frame,
+                            size_t outsize) {
+  __hwasan::AccessInfo ai;
+  ai.is_store = access_info & 0x10;
+  ai.is_load = !ai.is_store;
+  ai.recover = access_info & 0x20;
+  ai.addr = addr;
+  if ((access_info & 0xf) == 0xf)
+    ai.size = outsize;
+  else
+    ai.size = 1 << (access_info & 0xf);
+
+  __hwasan::HandleTagMismatch(ai, (uptr)__builtin_return_address(0),
+                              (uptr)__builtin_frame_address(0), nullptr,
+                              registers_frame);
+  __builtin_unreachable();
+}
+
+#endif // SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_NETBSD
diff --git a/compiler-rt/lib/memprof/CMakeLists.txt b/compiler-rt/lib/memprof/CMakeLists.txt
new file mode 100644
--- /dev/null
+++ b/compiler-rt/lib/memprof/CMakeLists.txt
@@ -0,0 +1,199 @@
+# Build for the Memory Profiler runtime support library.
+
+set(MEMPROF_SOURCES
+  memprof_allocator.cpp
+  memprof_descriptions.cpp
+  memprof_errors.cpp
+  memprof_flags.cpp
+  memprof_interceptors.cpp
+  memprof_interceptors_memintrinsics.cpp
+  memprof_linux.cpp
+  memprof_malloc_linux.cpp
+  memprof_posix.cpp
+  memprof_report.cpp
+  memprof_rtl.cpp
+  memprof_shadow_setup.cpp
+  memprof_stack.cpp
+  memprof_stats.cpp
+  memprof_thread.cpp
+  )
+
+set(MEMPROF_CXX_SOURCES
+  memprof_new_delete.cpp
+  )
+
+set(MEMPROF_PREINIT_SOURCES
+  memprof_preinit.cpp
+  )
+
+SET(MEMPROF_HEADERS
+  memprof_allocator.h
+  memprof_descriptions.h
+  memprof_errors.h
+  memprof_flags.h
+  memprof_flags.inc
+  memprof_init_version.h
+  memprof_interceptors.h
+  memprof_interceptors_memintrinsics.h
+  memprof_interface_internal.h
+  memprof_internal.h
+  memprof_mapping.h
+  memprof_report.h
+  memprof_stack.h
+  memprof_stats.h
+  memprof_thread.h
+  )
+
+include_directories(..)
+
+set(MEMPROF_CFLAGS ${SANITIZER_COMMON_CFLAGS})
+set(MEMPROF_COMMON_DEFINITIONS ${COMPILER_RT_MEMPROF_SHADOW_SCALE_DEFINITION})
+
+append_rtti_flag(OFF MEMPROF_CFLAGS)
+
+set(MEMPROF_DYNAMIC_LINK_FLAGS ${SANITIZER_COMMON_LINK_FLAGS})
+
+set(MEMPROF_DYNAMIC_DEFINITIONS
+  ${MEMPROF_COMMON_DEFINITIONS} MEMPROF_DYNAMIC=1)
+
+set(MEMPROF_DYNAMIC_CFLAGS ${MEMPROF_CFLAGS})
+append_list_if(COMPILER_RT_HAS_FTLS_MODEL_INITIAL_EXEC
+  -ftls-model=initial-exec MEMPROF_DYNAMIC_CFLAGS)
+
+set(MEMPROF_DYNAMIC_LIBS ${SANITIZER_CXX_ABI_LIBRARIES} ${SANITIZER_COMMON_LINK_LIBS})
+
+append_list_if(COMPILER_RT_HAS_LIBDL dl MEMPROF_DYNAMIC_LIBS)
+append_list_if(COMPILER_RT_HAS_LIBRT rt MEMPROF_DYNAMIC_LIBS)
+append_list_if(COMPILER_RT_HAS_LIBM m MEMPROF_DYNAMIC_LIBS)
+append_list_if(COMPILER_RT_HAS_LIBPTHREAD pthread MEMPROF_DYNAMIC_LIBS)
+append_list_if(COMPILER_RT_HAS_LIBLOG log MEMPROF_DYNAMIC_LIBS)
+
+if (TARGET cxx-headers OR HAVE_LIBCXX)
+  set(MEMPROF_DEPS cxx-headers)
+endif()
+
+# Compile MemProf sources into an object library.
+
+add_compiler_rt_object_libraries(RTMemprof_dynamic
+  OS ${SANITIZER_COMMON_SUPPORTED_OS}
+  ARCHS ${MEMPROF_SUPPORTED_ARCH}
+  SOURCES ${MEMPROF_SOURCES} ${MEMPROF_CXX_SOURCES}
+  ADDITIONAL_HEADERS ${MEMPROF_HEADERS}
+  CFLAGS ${MEMPROF_DYNAMIC_CFLAGS}
+  DEFS ${MEMPROF_DYNAMIC_DEFINITIONS}
+  DEPS ${MEMPROF_DEPS})
+
+  add_compiler_rt_object_libraries(RTMemprof
+    ARCHS ${MEMPROF_SUPPORTED_ARCH}
+    SOURCES ${MEMPROF_SOURCES}
+    ADDITIONAL_HEADERS ${MEMPROF_HEADERS}
+    CFLAGS ${MEMPROF_CFLAGS}
+    DEFS ${MEMPROF_COMMON_DEFINITIONS}
+    DEPS ${MEMPROF_DEPS})
+  add_compiler_rt_object_libraries(RTMemprof_cxx
+    ARCHS ${MEMPROF_SUPPORTED_ARCH}
+    SOURCES ${MEMPROF_CXX_SOURCES}
+    ADDITIONAL_HEADERS ${MEMPROF_HEADERS}
+    CFLAGS ${MEMPROF_CFLAGS}
+    DEFS ${MEMPROF_COMMON_DEFINITIONS}
+    DEPS ${MEMPROF_DEPS})
+  add_compiler_rt_object_libraries(RTMemprof_preinit
+    ARCHS ${MEMPROF_SUPPORTED_ARCH}
+    SOURCES ${MEMPROF_PREINIT_SOURCES}
+    ADDITIONAL_HEADERS ${MEMPROF_HEADERS}
+    CFLAGS ${MEMPROF_CFLAGS}
+    DEFS ${MEMPROF_COMMON_DEFINITIONS}
+    DEPS ${MEMPROF_DEPS})
+
+  file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp "")
+  add_compiler_rt_object_libraries(RTMemprof_dynamic_version_script_dummy
+    ARCHS ${MEMPROF_SUPPORTED_ARCH}
+    SOURCES ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp
+    CFLAGS ${MEMPROF_DYNAMIC_CFLAGS}
+    DEFS ${MEMPROF_DYNAMIC_DEFINITIONS}
+    DEPS ${MEMPROF_DEPS})
+
+# Build MemProf runtimes shipped with Clang.
+add_compiler_rt_component(memprof)
+
+  # Build separate libraries for each target.
+
+  set(MEMPROF_COMMON_RUNTIME_OBJECT_LIBS
+    RTInterception
+    RTSanitizerCommon
+    RTSanitizerCommonLibc
+    RTSanitizerCommonCoverage
+    RTSanitizerCommonSymbolizer)
+
+  add_compiler_rt_runtime(clang_rt.memprof
+    STATIC
+    ARCHS ${MEMPROF_SUPPORTED_ARCH}
+    OBJECT_LIBS RTMemprof_preinit
+                RTMemprof
+                ${MEMPROF_COMMON_RUNTIME_OBJECT_LIBS}
+    CFLAGS ${MEMPROF_CFLAGS}
+    DEFS ${MEMPROF_COMMON_DEFINITIONS}
+    PARENT_TARGET memprof)
+
+  add_compiler_rt_runtime(clang_rt.memprof_cxx
+    STATIC
+    ARCHS ${MEMPROF_SUPPORTED_ARCH}
+    OBJECT_LIBS RTMemprof_cxx
+    CFLAGS ${MEMPROF_CFLAGS}
+    DEFS ${MEMPROF_COMMON_DEFINITIONS}
+    PARENT_TARGET memprof)
+
+  add_compiler_rt_runtime(clang_rt.memprof-preinit
+    STATIC
+    ARCHS ${MEMPROF_SUPPORTED_ARCH}
+    OBJECT_LIBS RTMemprof_preinit
+    CFLAGS ${MEMPROF_CFLAGS}
+    DEFS ${MEMPROF_COMMON_DEFINITIONS}
+    PARENT_TARGET memprof)
+
+  foreach(arch ${MEMPROF_SUPPORTED_ARCH})
+    if (UNIX)
+      add_sanitizer_rt_version_list(clang_rt.memprof-dynamic-${arch}
+                                    LIBS clang_rt.memprof-${arch} clang_rt.memprof_cxx-${arch}
+                                    EXTRA memprof.syms.extra)
+      set(VERSION_SCRIPT_FLAG
+           -Wl,--version-script,${CMAKE_CURRENT_BINARY_DIR}/clang_rt.memprof-dynamic-${arch}.vers)
+      set_property(SOURCE
+        ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp
+        APPEND PROPERTY
+        OBJECT_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/clang_rt.memprof-dynamic-${arch}.vers)
+    else()
+      set(VERSION_SCRIPT_FLAG)
+    endif()
+
+    set(MEMPROF_DYNAMIC_WEAK_INTERCEPTION)
+
+    add_compiler_rt_runtime(clang_rt.memprof
+      SHARED
+      ARCHS ${arch}
+      OBJECT_LIBS ${MEMPROF_COMMON_RUNTIME_OBJECT_LIBS}
+              RTMemprof_dynamic
+              # The only purpose of RTMemprof_dynamic_version_script_dummy is to
+              # carry a dependency of the shared runtime on the version script.
+              # Replacing it with a straightforward
+              # add_dependencies(clang_rt.memprof-dynamic-${arch} clang_rt.memprof-dynamic-${arch}-version-list)
+              # generates an order-only dependency in ninja.
+              RTMemprof_dynamic_version_script_dummy
+              ${MEMPROF_DYNAMIC_WEAK_INTERCEPTION}
+      CFLAGS ${MEMPROF_DYNAMIC_CFLAGS}
+      LINK_FLAGS ${MEMPROF_DYNAMIC_LINK_FLAGS}
+                ${VERSION_SCRIPT_FLAG}
+      LINK_LIBS ${MEMPROF_DYNAMIC_LIBS}
+      DEFS ${MEMPROF_DYNAMIC_DEFINITIONS}
+      PARENT_TARGET memprof)
+
+    if (SANITIZER_USE_SYMBOLS)
+      add_sanitizer_rt_symbols(clang_rt.memprof_cxx
+        ARCHS ${arch})
+      add_dependencies(memprof clang_rt.memprof_cxx-${arch}-symbols)
+      add_sanitizer_rt_symbols(clang_rt.memprof
+        ARCHS ${arch}
+        EXTRA memprof.syms.extra)
+      add_dependencies(memprof clang_rt.memprof-${arch}-symbols)
+    endif()
+  endforeach()
diff --git a/compiler-rt/lib/memprof/README.txt b/compiler-rt/lib/memprof/README.txt
new file mode 100644
--- /dev/null
+++ b/compiler-rt/lib/memprof/README.txt
@@ -0,0 +1,17 @@
+MemProfiling RT
+================================
+This directory contains sources of the MemProfiling (MemProf) runtime library.
+
+Directory structure:
+README.txt       : This file.
+CMakeLists.txt   : File for cmake-based build.
+memprof_*.{cc,h}    : Sources of the memprof runtime library.
+
+Also MemProf runtime needs the following libraries:
+lib/interception/      : Machinery used to intercept function calls.
+lib/sanitizer_common/  : Code shared between various sanitizers.
+
+MemProf runtime can only be built by CMake. You can run MemProf tests
+from the root of your CMake build tree:
+
+make check-memprof
diff --git a/compiler-rt/lib/memprof/memprof.syms.extra b/compiler-rt/lib/memprof/memprof.syms.extra
new file mode 100644
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof.syms.extra
@@ -0,0 +1 @@
+__memprof_*
diff --git a/compiler-rt/lib/memprof/memprof_allocator.h b/compiler-rt/lib/memprof/memprof_allocator.h
new file mode 100644
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_allocator.h
@@ -0,0 +1,109 @@
+//===-- memprof_allocator.h ------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// MemProf-private header for memprof_allocator.cpp.
+//===----------------------------------------------------------------------===//
+
+#ifndef MEMPROF_ALLOCATOR_H
+#define MEMPROF_ALLOCATOR_H
+
+#include "memprof_flags.h"
+#include "memprof_interceptors.h"
+#include "memprof_internal.h"
+#include "sanitizer_common/sanitizer_allocator.h"
+#include "sanitizer_common/sanitizer_list.h"
+
+#if !defined(__x86_64__)
+#error Unsupported platform
+#endif
+#if !SANITIZER_CAN_USE_ALLOCATOR64
+#error Only 64-bit allocator supported
+#endif
+
+namespace __memprof {
+
+enum AllocType {
+  FROM_MALLOC = 1, // Memory block came from malloc, calloc, realloc, etc.
+  FROM_NEW = 2,    // Memory block came from operator new.
+  FROM_NEW_BR = 3  // Memory block came from operator new [ ]
+};
+
+void InitializeAllocator();
+
+struct MemprofMapUnmapCallback {
+  void OnMap(uptr p, uptr size) const;
+  void OnUnmap(uptr p, uptr size) const;
+};
+
+constexpr uptr kAllocatorSpace = 0x600000000000ULL;
+constexpr uptr kAllocatorSize = 0x40000000000ULL; // 4T.
+typedef DefaultSizeClassMap SizeClassMap;
+template <typename AddressSpaceViewTy>
+struct AP64 { // Allocator64 parameters. Deliberately using a short name.
+  static const uptr kSpaceBeg = kAllocatorSpace;
+  static const uptr kSpaceSize = kAllocatorSize;
+  static const uptr kMetadataSize = 0;
+  typedef __memprof::SizeClassMap SizeClassMap;
+  typedef MemprofMapUnmapCallback MapUnmapCallback;
+  static const uptr kFlags = 0;
+  using AddressSpaceView = AddressSpaceViewTy;
+};
+
+template <typename AddressSpaceView>
+using PrimaryAllocatorASVT = SizeClassAllocator64<AP64<AddressSpaceView>>;
+using PrimaryAllocator = PrimaryAllocatorASVT<LocalAddressSpaceView>;
+
+static const uptr kNumberOfSizeClasses = SizeClassMap::kNumClasses;
+
+template <typename AddressSpaceView>
+using MemprofAllocatorASVT =
+    CombinedAllocator<PrimaryAllocatorASVT<AddressSpaceView>>;
+using MemprofAllocator = MemprofAllocatorASVT<LocalAddressSpaceView>;
+using AllocatorCache = MemprofAllocator::AllocatorCache;
+
+struct MemprofThreadLocalMallocStorage {
+  uptr quarantine_cache[16];
+  AllocatorCache allocator_cache;
+  void CommitBack();
+
+private:
+  // These objects are allocated via mmap() and are zero-initialized.
+  MemprofThreadLocalMallocStorage() {}
+};
+
+void *memprof_memalign(uptr alignment, uptr size, BufferedStackTrace *stack,
+                       AllocType alloc_type);
+void memprof_free(void *ptr, BufferedStackTrace *stack, AllocType alloc_type);
+void memprof_delete(void *ptr, uptr size, uptr alignment,
+                    BufferedStackTrace *stack, AllocType alloc_type);
+
+void *memprof_malloc(uptr size, BufferedStackTrace *stack);
+void *memprof_calloc(uptr nmemb, uptr size, BufferedStackTrace *stack);
+void *memprof_realloc(void *p, uptr size, BufferedStackTrace *stack);
+void *memprof_reallocarray(void *p, uptr nmemb, uptr size,
+                           BufferedStackTrace *stack);
+void *memprof_valloc(uptr size, BufferedStackTrace *stack);
+void *memprof_pvalloc(uptr size, BufferedStackTrace *stack);
+
+void *memprof_aligned_alloc(uptr alignment, uptr size,
+                            BufferedStackTrace *stack);
+int memprof_posix_memalign(void **memptr, uptr alignment, uptr size,
+                           BufferedStackTrace *stack);
+uptr memprof_malloc_usable_size(const void *ptr, uptr pc, uptr bp);
+
+uptr memprof_mz_size(const void *ptr);
+void memprof_mz_force_lock();
+void memprof_mz_force_unlock();
+
+void PrintInternalAllocatorStats();
+void MemprofSoftRssLimitExceededCallback(bool exceeded);
+
+} // namespace __memprof
+#endif // MEMPROF_ALLOCATOR_H
diff --git a/compiler-rt/lib/memprof/memprof_allocator.cpp b/compiler-rt/lib/memprof/memprof_allocator.cpp
new file mode 100644
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_allocator.cpp
@@ -0,0 +1,851 @@
+//===-- memprof_allocator.cpp --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// Implementation of MemProf's memory allocator, which uses the allocator
+// from sanitizer_common.
+//
+//===----------------------------------------------------------------------===//
+
+#include "memprof_allocator.h"
+#include "memprof_mapping.h"
+#include "memprof_report.h"
+#include "memprof_stack.h"
+#include "memprof_thread.h"
+#include "sanitizer_common/sanitizer_allocator_checks.h"
+#include "sanitizer_common/sanitizer_allocator_interface.h"
+#include <ctime>
+#include <map>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#undef errno
+#include "sanitizer_common/sanitizer_errno.h"
+#include "sanitizer_common/sanitizer_file.h"
+#include "sanitizer_common/sanitizer_flags.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
+#include "sanitizer_common/sanitizer_list.h"
+#include "sanitizer_common/sanitizer_stackdepot.h"
+
+namespace __memprof {
+
+static int GetCpuId(void) {
+  // _memprof_preinit is called via the preinit_array, which subsequently calls
+  // malloc. Since this is before _dl_init calls VDSO_SETUP, sched_getcpu
+  // will seg fault as the address of __vdso_getcpu will be null.
+  if (!memprof_init_done)
+    return -1;
+  return sched_getcpu();
+}
+
+// Compute the timestamp in ms.
+static int GetTimestamp(void) {
+  // timespec_get will segfault if called from dl_init
+  if (!memprof_timestamp_inited) {
+    // By returning 0, this will be effectively treated as being
+    // timestamped at memprof init time (when memprof_init_timestamp_s
+    // is initialized).
+    return 0;
+  }
+  timespec ts;
+  timespec_get(&ts, TIME_UTC);
+  return (ts.tv_sec - memprof_init_timestamp_s) * 1000 + ts.tv_nsec / 1000;
+}
+
+static MemprofAllocator &get_allocator();
+
+// The memory chunk allocated from the underlying allocator looks like this:
+// H H U U U U U U
+//   H -- ChunkHeader (32 bytes)
+//   U -- user memory.
+
+// If there is left padding before the ChunkHeader (due to use of memalign),
+// we store a magic value in the first uptr word of the memory block and
+// store the address of ChunkHeader in the next uptr.
+// M B L L L L L L L L L  H H U U U U U U
+//   |                    ^
+//   ---------------------|
+//   M -- magic value kAllocBegMagic
+//   B -- address of ChunkHeader pointing to the first 'H'
+constexpr uptr kAllocBegMagic = 0xCC6E96B9;
+
+constexpr uptr kMaxAllowedMallocBits = 40;
+
+// Should be no more than 32-bytes
+struct ChunkHeader {
+  // 1-st 4 bytes.
+  u32 alloc_context_id;
+  // 2-nd 4 bytes
+  u32 cpu_id;
+  // 3-rd 4 bytes
+  u32 timestamp_ms;
+  // 4-th 4 bytes available
+  u32 dummy;
+  // 5-th and 6-th 4 bytes
+  // The max size of an allocation is 2^40 (kMaxAllowedMallocSize).
+  u64 user_requested_size : kMaxAllowedMallocBits;
+  u64 from_memalign : 1;
+  // 23 bits available
+  // 7-th and 8-th 4 bytes
+  u64 data_type_id; // TODO: hash of type name
+};
+
+static const uptr kChunkHeaderSize = sizeof(ChunkHeader);
+COMPILER_CHECK(kChunkHeaderSize == 32);
+
+struct MemprofChunk : ChunkHeader {
+  uptr Beg() { return reinterpret_cast<uptr>(this) + kChunkHeaderSize; }
+  uptr UsedSize() { return user_requested_size; }
+  void *AllocBeg() {
+    if (from_memalign)
+      return get_allocator().GetBlockBegin(reinterpret_cast<void *>(this));
+    return reinterpret_cast<void *>(this);
+  }
+};
+
+void FlushUnneededMemProfShadowMemory(uptr p, uptr size) {
+  // Since memprof's mapping is compacting, the shadow chunk may be
+  // not page-aligned, so we only flush the page-aligned portion.
+  ReleaseMemoryPagesToOS(MemToShadow(p), MemToShadow(p + size));
+}
+
+void MemprofMapUnmapCallback::OnMap(uptr p, uptr size) const {
+  // Statistics.
+  MemprofStats &thread_stats = GetCurrentThreadStats();
+  thread_stats.mmaps++;
+  thread_stats.mmaped += size;
+}
+void MemprofMapUnmapCallback::OnUnmap(uptr p, uptr size) const {
+  // We are about to unmap a chunk of user memory.
+  // Mark the corresponding shadow memory as not needed.
+  FlushUnneededMemProfShadowMemory(p, size);
+  // Statistics.
+  MemprofStats &thread_stats = GetCurrentThreadStats();
+  thread_stats.munmaps++;
+  thread_stats.munmaped += size;
+}
+
+AllocatorCache *GetAllocatorCache(MemprofThreadLocalMallocStorage *ms) {
+  CHECK(ms);
+  return &ms->allocator_cache;
+}
+
+struct MemInfoBlock {
+  u32 alloc_count;
+  u64 total_access_count, min_access_count, max_access_count;
+  u64 total_size;
+  u32 min_size, max_size;
+  u32 alloc_timestamp, dealloc_timestamp;
+  u64 total_lifetime;
+  u32 min_lifetime, max_lifetime;
+  u32 alloc_cpu_id, dealloc_cpu_id;
+  u32 num_migrated_cpu;
+
+  // Only compared to prior deallocated object currently.
+  u32 num_lifetime_overlaps;
+  u32 num_same_alloc_cpu;
+  u32 num_same_dealloc_cpu;
+
+  u64 data_type_id; // TODO: hash of type name
+
+  MemInfoBlock() : alloc_count(0) {}
+
+  MemInfoBlock(u32 size, u64 access_count, u32 alloc_timestamp,
+               u32 dealloc_timestamp, u32 alloc_cpu, u32 dealloc_cpu)
+      : alloc_count(1), total_access_count(access_count),
+        min_access_count(access_count), max_access_count(access_count),
+        total_size(size), min_size(size), max_size(size),
+        alloc_timestamp(alloc_timestamp), dealloc_timestamp(dealloc_timestamp),
+        total_lifetime(alloc_timestamp ? (dealloc_timestamp - alloc_timestamp)
+                                       : 0),
+        min_lifetime(total_lifetime), max_lifetime(total_lifetime),
+        alloc_cpu_id(alloc_cpu), dealloc_cpu_id(dealloc_cpu),
+        num_lifetime_overlaps(0), num_same_alloc_cpu(0),
+        num_same_dealloc_cpu(0) {
+    num_migrated_cpu = alloc_cpu_id != dealloc_cpu_id;
+  }
+
+  void Print(u64 id) {
+    char buffer[100];
+    if (flags()->print_terse) {
+      sprintf(buffer, "%.2f", ((float)total_size) / alloc_count);
+      Printf("MIB:%llu/%u/%s/%u/%u/", id, alloc_count, buffer, min_size,
+             max_size);
+      sprintf(buffer, "%.2f", ((float)total_access_count) / alloc_count);
+      Printf("%s/%u/%u/", buffer, min_access_count, max_access_count);
+      sprintf(buffer, "%.2f", ((float)total_lifetime) / alloc_count);
+      Printf("%s/%u/%u/", buffer, min_lifetime, max_lifetime);
+      Printf("%u/%u/%u/%u\n", num_migrated_cpu, num_lifetime_overlaps,
+             num_same_alloc_cpu, num_same_dealloc_cpu);
+    } else {
+      sprintf(buffer, "%.2f", ((float)total_size) / alloc_count);
+      Printf("Memory allocation stack id = %llu\n", id);
+      Printf("\talloc_count %u, size (ave/min/max) %s / %u / %u\n", alloc_count,
+             buffer, min_size, max_size);
+      sprintf(buffer, "%.2f", ((float)total_access_count) / alloc_count);
+      Printf("\taccess_count (ave/min/max): %s / %u / %u\n", buffer,
+             min_access_count, max_access_count);
+      sprintf(buffer, "%.2f", ((float)total_lifetime) / alloc_count);
+      Printf("\tlifetime (ave/min/max): %s / %u / %u\n", buffer, min_lifetime,
+             max_lifetime);
+      Printf("\tnum migrated: %u, num lifetime overlaps: %u, num same alloc "
+             "cpu: %u, num same dealloc_cpu: %u\n",
+             num_migrated_cpu, num_lifetime_overlaps, num_same_alloc_cpu,
+             num_same_dealloc_cpu);
+    }
+  }
+
+  static void printHeader() {
+    CHECK(flags()->print_terse);
+    Printf("MIB:StackID/AllocCount/AveSize/MinSize/MaxSize/AveAccessCount/"
+           "MinAccessCount/MaxAccessCount/AveLifetime/MinLifetime/MaxLifetime/"
+           "NumMigratedCpu/NumLifetimeOverlaps/NumSameAllocCpu/"
+           "NumSameDeallocCpu\n");
+  }
+
+  void Merge(MemInfoBlock &newMIB) {
+    alloc_count += newMIB.alloc_count;
+
+    total_access_count += newMIB.total_access_count;
+    min_access_count = Min(min_access_count, newMIB.min_access_count);
+    max_access_count = Max(max_access_count, newMIB.max_access_count);
+
+    total_size += newMIB.total_size;
+    min_size = Min(min_size, newMIB.min_size);
+    max_size = Max(max_size, newMIB.max_size);
+
+    total_lifetime += newMIB.total_lifetime;
+    min_lifetime = Min(min_lifetime, newMIB.min_lifetime);
+    max_lifetime = Max(max_lifetime, newMIB.max_lifetime);
+
+    // We know newMIB was deallocated later, so just need to check if it was
+    // allocated before last one deallocated.
+    num_lifetime_overlaps += newMIB.alloc_timestamp < dealloc_timestamp;
+    alloc_timestamp = newMIB.alloc_timestamp;
+    dealloc_timestamp = newMIB.dealloc_timestamp;
+
+    num_same_alloc_cpu += alloc_cpu_id == newMIB.alloc_cpu_id;
+    num_same_dealloc_cpu += dealloc_cpu_id == newMIB.dealloc_cpu_id;
+    alloc_cpu_id = newMIB.alloc_cpu_id;
+    dealloc_cpu_id = newMIB.dealloc_cpu_id;
+  }
+};
+
+static u32 AccessCount = 0;
+static u32 MissCount = 0;
+
+struct SetEntry {
+  SetEntry() : id(0), MIB() {}
+  bool Empty() { return id == 0; }
+  void Print() {
+    CHECK(!Empty());
+    MIB.Print(id);
+  }
+  // The stack id
+  u64 id;
+  MemInfoBlock MIB;
+};
+
+struct CacheSet {
+  enum { kSetSize = 4 };
+
+  void PrintAll() {
+    for (int i = 0; i < kSetSize; i++) {
+      if (Entries[i].Empty())
+        continue;
+      Entries[i].Print();
+    }
+  }
+  void insertOrMerge(u64 new_id, MemInfoBlock &newMIB) {
+    AccessCount++;
+    SetAccessCount++;
+
+    for (int i = 0; i < kSetSize; i++) {
+      auto id = Entries[i].id;
+      // Check if this is a hit or an empty entry. Since we always move any
+      // filled locations to the front of the array (see below), we don't need
+      // to look after finding the first empty entry.
+      if (id == new_id || !id) {
+        if (id == 0) {
+          Entries[i].id = new_id;
+          Entries[i].MIB = newMIB;
+        } else {
+          Entries[i].MIB.Merge(newMIB);
+        }
+        // Assuming some id locality, we try to swap the matching entry
+        // into the first set position.
+        if (i != 0) {
+          auto tmp = Entries[0];
+          Entries[0] = Entries[i];
+          Entries[i] = tmp;
+        }
+        return;
+      }
+    }
+
+    // Miss
+    MissCount++;
+    SetMissCount++;
+
+    // We try to find the entries with the lowest alloc count to be evicted:
+    int min_idx = 0;
+    u64 min_count = Entries[0].MIB.alloc_count;
+    for (int i = 1; i < kSetSize; i++) {
+      CHECK(!Entries[i].Empty());
+      if (Entries[i].MIB.alloc_count < min_count) {
+        min_idx = i;
+        min_count = Entries[i].MIB.alloc_count;
+      }
+    }
+
+    // Print the evicted entry profile information
+    if (!flags()->print_terse)
+      Printf("Evicted:\n");
+    Entries[min_idx].Print();
+
+    // Similar to the hit case, put new MIB in first set position.
+    if (min_idx != 0)
+      Entries[min_idx] = Entries[0];
+    Entries[0].id = new_id;
+    Entries[0].MIB = newMIB;
+  }
+
+  void PrintMissRate(int i) {
+    char buffer[100];
+    sprintf(buffer, "%5.2f%%",
+            SetAccessCount ? SetMissCount * 100.0 / SetAccessCount : 0.0);
+    Printf("Set %d miss rate: %d / %d = %s\n", i, SetMissCount, SetAccessCount,
+           buffer);
+  }
+
+  SetEntry Entries[kSetSize];
+  u32 SetAccessCount = 0;
+  u32 SetMissCount = 0;
+};
+
+struct MemInfoBlockCache {
+  MemInfoBlockCache() {
+    if (flags()->dump_process_map)
+      DumpProcessMap();
+    if (flags()->print_terse)
+      MemInfoBlock::printHeader();
+    Sets =
+        (CacheSet *)malloc(sizeof(CacheSet) * flags()->mem_info_cache_entries);
+  }
+
+  ~MemInfoBlockCache() { free(Sets); }
+
+  void insertOrMerge(u64 new_id, MemInfoBlock &newMIB) {
+    u64 hv = new_id;
+
+    // Use mod method where number of entries should be a prime close to power
+    // of 2.
+    hv %= flags()->mem_info_cache_entries;
+
+    return Sets[hv].insertOrMerge(new_id, newMIB);
+  }
+
+  void PrintAll() {
+    for (int i = 0; i < flags()->mem_info_cache_entries; i++) {
+      Sets[i].PrintAll();
+    }
+  }
+
+  void PrintMissRate() {
+    if (!flags()->print_mem_info_cache_miss_rate)
+      return;
+    char buffer[100];
+    sprintf(buffer, "%5.2f%%",
+            AccessCount ? MissCount * 100.0 / AccessCount : 0.0);
+    Printf("Overall miss rate: %d / %d = %s\n", MissCount, AccessCount, buffer);
+    if (flags()->print_mem_info_cache_miss_rate_details)
+      for (int i = 0; i < flags()->mem_info_cache_entries; i++)
+        Sets[i].PrintMissRate(i);
+  }
+
+  CacheSet *Sets;
+};
+
+// Accumulates the access count from the shadow for the given pointer and size.
+u64 GetShadowCount(uptr p, u32 size) {
+  u64 *shadow = (u64 *)MEM_TO_SHADOW(p);
+  u64 *shadow_end = (u64 *)MEM_TO_SHADOW(p + size);
+  u64 count = 0;
+  for (; shadow <= shadow_end; shadow++)
+    count += *shadow;
+  return count;
+}
+
+// Clears the shadow counters (when memory is allocated).
+void ClearShadow(uptr addr, uptr size) {
+  CHECK(AddrIsAlignedByGranularity(addr));
+  CHECK(AddrIsInMem(addr));
+  CHECK(AddrIsAlignedByGranularity(addr + size));
+  CHECK(AddrIsInMem(addr + size - SHADOW_GRANULARITY));
+  CHECK(REAL(memset));
+  uptr shadow_beg = MEM_TO_SHADOW(addr);
+  uptr shadow_end = MEM_TO_SHADOW(addr + size - SHADOW_GRANULARITY) + 1;
+  if (shadow_end - shadow_beg < common_flags()->clear_shadow_mmap_threshold) {
+    REAL(memset)((void *)shadow_beg, 0, shadow_end - shadow_beg);
+  } else {
+    uptr page_size = GetPageSizeCached();
+    uptr page_beg = RoundUpTo(shadow_beg, page_size);
+    uptr page_end = RoundDownTo(shadow_end, page_size);
+
+    if (page_beg >= page_end) {
+      REAL(memset)((void *)shadow_beg, 0, shadow_end - shadow_beg);
+    } else {
+      if (page_beg != shadow_beg) {
+        REAL(memset)((void *)shadow_beg, 0, page_beg - shadow_beg);
+      }
+      if (page_end != shadow_end) {
+        REAL(memset)((void *)page_end, 0, shadow_end - page_end);
+      }
+      ReserveShadowMemoryRange(page_beg, page_end - 1, nullptr);
+    }
+  }
+}
+
+struct Allocator {
+  static const uptr kMaxAllowedMallocSize = 1ULL << kMaxAllowedMallocBits;
+
+  MemprofAllocator allocator;
+  StaticSpinMutex fallback_mutex;
+  AllocatorCache fallback_allocator_cache;
+
+  uptr max_user_defined_malloc_size;
+  atomic_uint8_t rss_limit_exceeded;
+
+  MemInfoBlockCache MemInfoBlockTable;
+  bool destructing;
+
+  // ------------------- Initialization ------------------------
+  explicit Allocator(LinkerInitialized) : destructing(false) {}
+
+  ~Allocator() { FinishAndPrint(); }
+
+  void FinishAndPrint() {
+    if (!flags()->print_terse)
+      Printf("Live on exit:\n");
+    allocator.ForceLock();
+    allocator.ForEachChunk(
+        [](uptr chunk, void *alloc) {
+          MemprofChunk *m =
+              ((Allocator *)alloc)->GetMemprofChunk((void *)chunk);
+          // The size is reset to 0 on deallocation (and a min of 1 on
+          // allocation).
+          if (!m->user_requested_size)
+            return;
+          uptr user_beg = ((uptr)m) + kChunkHeaderSize;
+          u64 c = GetShadowCount(user_beg, m->user_requested_size);
+          long curtime = GetTimestamp();
+          MemInfoBlock newMIB(m->user_requested_size, c, m->timestamp_ms,
+                              curtime, m->cpu_id, GetCpuId());
+          ((Allocator *)alloc)
+              ->MemInfoBlockTable.insertOrMerge(m->alloc_context_id, newMIB);
+        },
+        this);
+    allocator.ForceUnlock();
+
+    destructing = true;
+    MemInfoBlockTable.PrintMissRate();
+    MemInfoBlockTable.PrintAll();
+    StackDepotPrintAll();
+  }
+
+  void InitLinkerInitialized() {
+    SetAllocatorMayReturnNull(common_flags()->allocator_may_return_null);
+    allocator.InitLinkerInitialized(
+        common_flags()->allocator_release_to_os_interval_ms);
+    max_user_defined_malloc_size = common_flags()->max_allocation_size_mb
+                                       ? common_flags()->max_allocation_size_mb
+                                             << 20
+                                       : kMaxAllowedMallocSize;
+  }
+
+  bool RssLimitExceeded() {
+    return atomic_load(&rss_limit_exceeded, memory_order_relaxed);
+  }
+
+  void SetRssLimitExceeded(bool limit_exceeded) {
+    atomic_store(&rss_limit_exceeded, limit_exceeded, memory_order_relaxed);
+  }
+
+  // -------------------- Allocation/Deallocation routines ---------------
+  void *Allocate(uptr size, uptr alignment, BufferedStackTrace *stack,
+                 AllocType alloc_type) {
+    if (UNLIKELY(!memprof_inited))
+      MemprofInitFromRtl();
+    if (RssLimitExceeded()) {
+      if (AllocatorMayReturnNull())
+        return nullptr;
+      ReportRssLimitExceeded(stack);
+    }
+    CHECK(stack);
+    const uptr min_alignment = MEMPROF_ALIGNMENT;
+    if (alignment < min_alignment)
+      alignment = min_alignment;
+    if (size == 0) {
+      // We'd be happy to avoid allocating memory for zero-size requests, but
+      // some programs/tests depend on this behavior and assume that malloc
+      // would not return NULL even for zero-size allocations. Moreover, it
+      // looks like operator new should never return NULL, and results of
+      // consecutive "new" calls must be different even if the allocated size
+      // is zero.
+      size = 1;
+    }
+    CHECK(IsPowerOfTwo(alignment));
+    uptr rounded_size = RoundUpTo(size, alignment);
+    uptr needed_size = rounded_size + kChunkHeaderSize;
+    if (alignment > min_alignment)
+      needed_size += alignment;
+    bool using_primary_allocator = true;
+    if (!PrimaryAllocator::CanAllocate(needed_size, alignment))
+      using_primary_allocator = false;
+    CHECK(IsAligned(needed_size, min_alignment));
+    if (size > kMaxAllowedMallocSize || needed_size > kMaxAllowedMallocSize ||
+        size > max_user_defined_malloc_size) {
+      if (AllocatorMayReturnNull()) {
+        Report("WARNING: MemProfiler failed to allocate 0x%zx bytes\n",
+               (void *)size);
+        return nullptr;
+      }
+      uptr malloc_limit =
+          Min(kMaxAllowedMallocSize, max_user_defined_malloc_size);
+      ReportAllocationSizeTooBig(size, needed_size, malloc_limit, stack);
+    }
+
+    MemprofThread *t = GetCurrentThread();
+    void *allocated;
+    if (t) {
+      AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage());
+      allocated = allocator.Allocate(cache, needed_size, 8);
+    } else {
+      SpinMutexLock l(&fallback_mutex);
+      AllocatorCache *cache = &fallback_allocator_cache;
+      allocated = allocator.Allocate(cache, needed_size, 8);
+    }
+    if (UNLIKELY(!allocated)) {
+      SetAllocatorOutOfMemory();
+      if (AllocatorMayReturnNull())
+        return nullptr;
+      ReportOutOfMemory(size, stack);
+    }
+
+    uptr alloc_beg = reinterpret_cast<uptr>(allocated);
+    uptr alloc_end = alloc_beg + needed_size;
+    uptr beg_plus_header = alloc_beg + kChunkHeaderSize;
+    uptr user_beg = beg_plus_header;
+    if (!IsAligned(user_beg, alignment))
+      user_beg = RoundUpTo(user_beg, alignment);
+    uptr user_end = user_beg + size;
+    CHECK_LE(user_end, alloc_end);
+    uptr chunk_beg = user_beg - kChunkHeaderSize;
+    MemprofChunk *m = reinterpret_cast<MemprofChunk *>(chunk_beg);
+    m->from_memalign = alloc_beg != chunk_beg;
+    if (alloc_beg != chunk_beg) {
+      CHECK_LE(alloc_beg + 2 * sizeof(uptr), chunk_beg);
+      reinterpret_cast<uptr *>(alloc_beg)[0] = kAllocBegMagic;
+      reinterpret_cast<uptr *>(alloc_beg)[1] = chunk_beg;
+    }
+    CHECK(size);
+    m->user_requested_size = size;
+
+    m->cpu_id = GetCpuId();
+    m->timestamp_ms = GetTimestamp();
+    m->alloc_context_id = StackDepotPut(*stack);
+
+    uptr size_rounded_down_to_granularity =
+        RoundDownTo(size, SHADOW_GRANULARITY);
+    if (size_rounded_down_to_granularity)
+      ClearShadow(user_beg, size_rounded_down_to_granularity);
+
+    MemprofStats &thread_stats = GetCurrentThreadStats();
+    thread_stats.mallocs++;
+    thread_stats.malloced += size;
+    thread_stats.malloced_overhead += needed_size - size;
+    if (needed_size > SizeClassMap::kMaxSize)
+      thread_stats.malloc_large++;
+    else
+      thread_stats.malloced_by_size[SizeClassMap::ClassID(needed_size)]++;
+
+    void *res = reinterpret_cast<void *>(user_beg);
+    MEMPROF_MALLOC_HOOK(res, size);
+    return res;
+  }
+
+  void Deallocate(void *ptr, uptr delete_size, uptr delete_alignment,
+                  BufferedStackTrace *stack, AllocType alloc_type) {
+    uptr p = reinterpret_cast<uptr>(ptr);
+    if (p == 0)
+      return;
+
+    uptr chunk_beg = p - kChunkHeaderSize;
+    MemprofChunk *m = reinterpret_cast<MemprofChunk *>(chunk_beg);
+
+    if (memprof_inited && memprof_init_done && !destructing) {
+      u64 c = GetShadowCount(p, m->user_requested_size);
+      long curtime = GetTimestamp();
+
+      MemInfoBlock newMIB(m->user_requested_size, c, m->timestamp_ms, curtime,
+                          m->cpu_id, GetCpuId());
+      {
+        SpinMutexLock l(&fallback_mutex);
+        MemInfoBlockTable.insertOrMerge(m->alloc_context_id, newMIB);
+      }
+    }
+
+    MemprofStats &thread_stats = GetCurrentThreadStats();
+    thread_stats.frees++;
+    thread_stats.freed += m->UsedSize();
+    m->user_requested_size = 0;
+
+    MemprofThread *t = GetCurrentThread();
+    if (t) {
+      AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage());
+      allocator.Deallocate(cache, m->AllocBeg());
+    } else {
+      SpinMutexLock l(&fallback_mutex);
+      AllocatorCache *cache = &fallback_allocator_cache;
+      allocator.Deallocate(cache, m->AllocBeg());
+    }
+
+    MEMPROF_FREE_HOOK(ptr);
+  }
+
+  void *Reallocate(void *old_ptr, uptr new_size, BufferedStackTrace *stack) {
+    CHECK(old_ptr && new_size);
+    uptr p = reinterpret_cast<uptr>(old_ptr);
+    uptr chunk_beg = p - kChunkHeaderSize;
+    MemprofChunk *m = reinterpret_cast<MemprofChunk *>(chunk_beg);
+
+    MemprofStats &thread_stats = GetCurrentThreadStats();
+    thread_stats.reallocs++;
+    thread_stats.realloced += new_size;
+
+    void *new_ptr = Allocate(new_size, 8, stack, FROM_MALLOC);
+    if (new_ptr) {
+      CHECK_NE(REAL(memcpy), nullptr);
+      uptr memcpy_size = Min(new_size, m->UsedSize());
+      REAL(memcpy)(new_ptr, old_ptr, memcpy_size);
+      Deallocate(old_ptr, 0, 0, stack, FROM_MALLOC);
+    }
+    return new_ptr;
+  }
+
+  void *Calloc(uptr nmemb, uptr size, BufferedStackTrace *stack) {
+    if (UNLIKELY(CheckForCallocOverflow(size, nmemb))) {
+      if (AllocatorMayReturnNull())
+        return nullptr;
+      ReportCallocOverflow(nmemb, size, stack);
+    }
+    void *ptr = Allocate(nmemb * size, 8, stack, FROM_MALLOC);
+    // If the memory comes from the secondary allocator no need to clear it
+    // as it comes directly from mmap.
+    if (ptr && allocator.FromPrimary(ptr))
+      REAL(memset)(ptr, 0, nmemb * size);
+    return ptr;
+  }
+
+  void CommitBack(MemprofThreadLocalMallocStorage *ms,
+                  BufferedStackTrace *stack) {
+    AllocatorCache *ac = GetAllocatorCache(ms);
+    allocator.SwallowCache(ac);
+  }
+
+  // -------------------------- Chunk lookup ----------------------
+
+  // Assumes alloc_beg == allocator.GetBlockBegin(alloc_beg).
+  MemprofChunk *GetMemprofChunk(void *alloc_beg) {
+    if (!alloc_beg)
+      return nullptr;
+    uptr *alloc_magic = reinterpret_cast<uptr *>(alloc_beg);
+    if (alloc_magic[0] == kAllocBegMagic)
+      return reinterpret_cast<MemprofChunk *>(alloc_magic[1]);
+    return reinterpret_cast<MemprofChunk *>(alloc_beg);
+  }
+
+  MemprofChunk *GetMemprofChunkByAddr(uptr p) {
+    void *alloc_beg = allocator.GetBlockBegin(reinterpret_cast<void *>(p));
+    return GetMemprofChunk(alloc_beg);
+  }
+
+  uptr AllocationSize(uptr p) {
+    MemprofChunk *m = GetMemprofChunkByAddr(p);
+    if (!m)
+      return 0;
+    if (!m->user_requested_size)
+      return 0;
+    if (m->Beg() != p)
+      return 0;
+    return m->UsedSize();
+  }
+
+  void Purge(BufferedStackTrace *stack) { allocator.ForceReleaseToOS(); }
+
+  void PrintStats() { allocator.PrintStats(); }
+
+  void ForceLock() {
+    allocator.ForceLock();
+    fallback_mutex.Lock();
+  }
+
+  void ForceUnlock() {
+    fallback_mutex.Unlock();
+    allocator.ForceUnlock();
+  }
+};
+
+static Allocator instance(LINKER_INITIALIZED);
+
+static MemprofAllocator &get_allocator() { return instance.allocator; }
+
+void InitializeAllocator() { instance.InitLinkerInitialized(); }
+
+void MemprofThreadLocalMallocStorage::CommitBack() {
+  GET_STACK_TRACE_MALLOC;
+  instance.CommitBack(this, &stack);
+}
+
+void PrintInternalAllocatorStats() { instance.PrintStats(); }
+
+void memprof_free(void *ptr, BufferedStackTrace *stack, AllocType alloc_type) {
+  instance.Deallocate(ptr, 0, 0, stack, alloc_type);
+}
+
+void memprof_delete(void *ptr, uptr size, uptr alignment,
+                    BufferedStackTrace *stack, AllocType alloc_type) {
+  instance.Deallocate(ptr, size, alignment, stack, alloc_type);
+}
+
+void *memprof_malloc(uptr size, BufferedStackTrace *stack) {
+  return SetErrnoOnNull(instance.Allocate(size, 8, stack, FROM_MALLOC));
+}
+
+void *memprof_calloc(uptr nmemb, uptr size, BufferedStackTrace *stack) {
+  return SetErrnoOnNull(instance.Calloc(nmemb, size, stack));
+}
+
+void *memprof_reallocarray(void *p, uptr nmemb, uptr size,
+                           BufferedStackTrace *stack) {
+  if (UNLIKELY(CheckForCallocOverflow(size, nmemb))) {
+    errno = errno_ENOMEM;
+    if (AllocatorMayReturnNull())
+      return nullptr;
+    ReportReallocArrayOverflow(nmemb, size, stack);
+  }
+  return memprof_realloc(p, nmemb * size, stack);
+}
+
+void *memprof_realloc(void *p, uptr size, BufferedStackTrace *stack) {
+  if (!p)
+    return SetErrnoOnNull(instance.Allocate(size, 8, stack, FROM_MALLOC));
+  if (size == 0) {
+    if (flags()->allocator_frees_and_returns_null_on_realloc_zero) {
+      instance.Deallocate(p, 0, 0, stack, FROM_MALLOC);
+      return nullptr;
+    }
+    // Allocate a size of 1 if we shouldn't free() on Realloc to 0
+    size = 1;
+  }
+  return SetErrnoOnNull(instance.Reallocate(p, size, stack));
+}
+
+void *memprof_valloc(uptr size, BufferedStackTrace *stack) {
+  return SetErrnoOnNull(
+      instance.Allocate(size, GetPageSizeCached(), stack, FROM_MALLOC));
+}
+
+void *memprof_pvalloc(uptr size, BufferedStackTrace *stack) {
+  uptr PageSize = GetPageSizeCached();
+  if (UNLIKELY(CheckForPvallocOverflow(size, PageSize))) {
+    errno = errno_ENOMEM;
+    if (AllocatorMayReturnNull())
+      return nullptr;
+    ReportPvallocOverflow(size, stack);
+  }
+  // pvalloc(0) should allocate one page.
+  size = size ? RoundUpTo(size, PageSize) : PageSize;
+  return SetErrnoOnNull(instance.Allocate(size, PageSize, stack, FROM_MALLOC));
+}
+
+void *memprof_memalign(uptr alignment, uptr size, BufferedStackTrace *stack,
+                       AllocType alloc_type) {
+  if (UNLIKELY(!IsPowerOfTwo(alignment))) {
+    errno = errno_EINVAL;
+    if (AllocatorMayReturnNull())
+      return nullptr;
+    ReportInvalidAllocationAlignment(alignment, stack);
+  }
+  return SetErrnoOnNull(instance.Allocate(size, alignment, stack, alloc_type));
+}
+
+void *memprof_aligned_alloc(uptr alignment, uptr size,
+                            BufferedStackTrace *stack) {
+  if (UNLIKELY(!CheckAlignedAllocAlignmentAndSize(alignment, size))) {
+    errno = errno_EINVAL;
+    if (AllocatorMayReturnNull())
+      return nullptr;
+    ReportInvalidAlignedAllocAlignment(size, alignment, stack);
+  }
+  return SetErrnoOnNull(instance.Allocate(size, alignment, stack, FROM_MALLOC));
+}
+
+int memprof_posix_memalign(void **memptr, uptr alignment, uptr size,
+                           BufferedStackTrace *stack) {
+  if (UNLIKELY(!CheckPosixMemalignAlignment(alignment))) {
+    if (AllocatorMayReturnNull())
+      return errno_EINVAL;
+    ReportInvalidPosixMemalignAlignment(alignment, stack);
+  }
+  void *ptr = instance.Allocate(size, alignment, stack, FROM_MALLOC);
+  if (UNLIKELY(!ptr))
+    // OOM error is already taken care of by Allocate.
+    return errno_ENOMEM;
+  CHECK(IsAligned((uptr)ptr, alignment));
+  *memptr = ptr;
+  return 0;
+}
+
+uptr memprof_malloc_usable_size(const void *ptr, uptr pc, uptr bp) {
+  if (!ptr)
+    return 0;
+  uptr usable_size = instance.AllocationSize(reinterpret_cast<uptr>(ptr));
+  return usable_size;
+}
+
+uptr memprof_mz_size(const void *ptr) {
+  return instance.AllocationSize(reinterpret_cast<uptr>(ptr));
+}
+
+void memprof_mz_force_lock() { instance.ForceLock(); }
+
+void memprof_mz_force_unlock() { instance.ForceUnlock(); }
+
+void MemprofSoftRssLimitExceededCallback(bool limit_exceeded) {
+  instance.SetRssLimitExceeded(limit_exceeded);
+}
+
+} // namespace __memprof
+
+// ---------------------- Interface ---------------- {{{1
+using namespace __memprof;
+
+#if !SANITIZER_SUPPORTS_WEAK_HOOKS
+// Provide default (no-op) implementation of malloc hooks.
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_malloc_hook, void *ptr,
+                             uptr size) {
+  (void)ptr;
+  (void)size;
+}
+
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_free_hook, void *ptr) {
+  (void)ptr;
+}
+#endif
diff --git a/compiler-rt/lib/memprof/memprof_descriptions.h b/compiler-rt/lib/memprof/memprof_descriptions.h
new file mode 100644
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_descriptions.h
@@ -0,0 +1,45 @@
+//===-- memprof_descriptions.h ---------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// MemProf-private header for memprof_descriptions.cpp.
+//===----------------------------------------------------------------------===//
+#ifndef MEMPROF_DESCRIPTIONS_H
+#define MEMPROF_DESCRIPTIONS_H
+
+#include "memprof_allocator.h"
+#include "memprof_thread.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_report_decorator.h"
+
+namespace __memprof {
+
+void DescribeThread(MemprofThreadContext *context);
+static inline void DescribeThread(MemprofThread *t) {
+  if (t)
+    DescribeThread(t->context());
+}
+
+class MemprofThreadIdAndName {
+public:
+  explicit MemprofThreadIdAndName(MemprofThreadContext *t);
+  explicit MemprofThreadIdAndName(u32 tid);
+
+  // Contains "T%tid (%name)" or "T%tid" if the name is empty.
+  const char *c_str() const { return &name[0]; }
+
+private:
+  void Init(u32 tid, const char *tname);
+
+  char name[128];
+};
+
+} // namespace __memprof
+
+#endif // MEMPROF_DESCRIPTIONS_H
diff --git a/compiler-rt/lib/memprof/memprof_descriptions.cpp b/compiler-rt/lib/memprof/memprof_descriptions.cpp
new file mode 100644
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_descriptions.cpp
@@ -0,0 +1,71 @@
+//===-- memprof_descriptions.cpp -------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// MemProf functions for getting information about an address and/or printing
+// it.
+//===----------------------------------------------------------------------===//
+
+#include "memprof_descriptions.h"
+#include "memprof_mapping.h"
+#include "memprof_report.h"
+#include "memprof_stack.h"
+#include "sanitizer_common/sanitizer_stackdepot.h"
+
+namespace __memprof {
+
+MemprofThreadIdAndName::MemprofThreadIdAndName(MemprofThreadContext *t) {
+  Init(t->tid, t->name);
+}
+
+MemprofThreadIdAndName::MemprofThreadIdAndName(u32 tid) {
+  if (tid == kInvalidTid) {
+    Init(tid, "");
+  } else {
+    memprofThreadRegistry().CheckLocked();
+    MemprofThreadContext *t = GetThreadContextByTidLocked(tid);
+    Init(tid, t->name);
+  }
+}
+
+void MemprofThreadIdAndName::Init(u32 tid, const char *tname) {
+  int len = internal_snprintf(name, sizeof(name), "T%d", tid);
+  CHECK(((unsigned int)len) < sizeof(name));
+  if (tname[0] != '\0')
+    internal_snprintf(&name[len], sizeof(name) - len, " (%s)", tname);
+}
+
+void DescribeThread(MemprofThreadContext *context) {
+  CHECK(context);
+  memprofThreadRegistry().CheckLocked();
+  // No need to announce the main thread.
+  if (context->tid == 0 || context->announced) {
+    return;
+  }
+  context->announced = true;
+  InternalScopedString str(1024);
+  str.append("Thread %s", MemprofThreadIdAndName(context).c_str());
+  if (context->parent_tid == kInvalidTid) {
+    str.append(" created by unknown thread\n");
+    Printf("%s", str.data());
+    return;
+  }
+  str.append(" created by %s here:\n",
+             MemprofThreadIdAndName(context->parent_tid).c_str());
+  Printf("%s", str.data());
+  StackDepotGet(context->stack_id).Print();
+  // Recursively described parent thread if needed.
+  if (flags()->print_full_thread_history) {
+    MemprofThreadContext *parent_context =
+        GetThreadContextByTidLocked(context->parent_tid);
+    DescribeThread(parent_context);
+  }
+}
+
+} // namespace __memprof
diff --git a/compiler-rt/lib/memprof/memprof_errors.h b/compiler-rt/lib/memprof/memprof_errors.h
new file mode 100644
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_errors.h
@@ -0,0 +1,209 @@
+//===-- memprof_errors.h ---------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// MemProf-private header for error structures.
+//===----------------------------------------------------------------------===//
+#ifndef MEMPROF_ERRORS_H
+#define MEMPROF_ERRORS_H
+
+#include "memprof_descriptions.h"
+#include "sanitizer_common/sanitizer_common.h"
+
+namespace __memprof {
+
+// (*) VS2013 does not implement unrestricted unions, so we need a trivial
+// default constructor explicitly defined for each particular error.
+
+// None of the error classes own the stack traces mentioned in them.
+
+struct ErrorBase {
+  char descr[1024];
+  u32 tid;
+
+  ErrorBase() = default; // (*)
+  explicit ErrorBase(u32 tid_) : tid(tid_) {}
+  ErrorBase(u32 tid_, int initial_score, const char *reason) : tid(tid_) {
+    descr[0] = 0;
+    internal_strlcat(descr, reason, sizeof(descr));
+    Printf("descr: %s %s\n", descr, reason);
+  }
+};
+
+struct ErrorCallocOverflow : ErrorBase {
+  const BufferedStackTrace *stack;
+  uptr count;
+  uptr size;
+
+  ErrorCallocOverflow() = default; // (*)
+  ErrorCallocOverflow(u32 tid, BufferedStackTrace *stack_, uptr count_,
+                      uptr size_)
+      : ErrorBase(tid, 10, "calloc-overflow"), stack(stack_), count(count_),
+        size(size_) {}
+  void Print();
+};
+
+struct ErrorReallocArrayOverflow : ErrorBase {
+  const BufferedStackTrace *stack;
+  uptr count;
+  uptr size;
+
+  ErrorReallocArrayOverflow() = default; // (*)
+  ErrorReallocArrayOverflow(u32 tid, BufferedStackTrace *stack_, uptr count_,
+                            uptr size_)
+      : ErrorBase(tid, 10, "reallocarray-overflow"), stack(stack_),
+        count(count_), size(size_) {}
+  void Print();
+};
+
+struct ErrorPvallocOverflow : ErrorBase {
+  const BufferedStackTrace *stack;
+  uptr size;
+
+  ErrorPvallocOverflow() = default; // (*)
+  ErrorPvallocOverflow(u32 tid, BufferedStackTrace *stack_, uptr size_)
+      : ErrorBase(tid, 10, "pvalloc-overflow"), stack(stack_), size(size_) {}
+  void Print();
+};
+
+struct ErrorInvalidAllocationAlignment : ErrorBase {
+  const BufferedStackTrace *stack;
+  uptr alignment;
+
+  ErrorInvalidAllocationAlignment() = default; // (*)
+  ErrorInvalidAllocationAlignment(u32 tid, BufferedStackTrace *stack_,
+                                  uptr alignment_)
+      : ErrorBase(tid, 10, "invalid-allocation-alignment"), stack(stack_),
+        alignment(alignment_) {}
+  void Print();
+};
+
+struct ErrorInvalidAlignedAllocAlignment : ErrorBase {
+  const BufferedStackTrace *stack;
+  uptr size;
+  uptr alignment;
+
+  ErrorInvalidAlignedAllocAlignment() = default; // (*)
+  ErrorInvalidAlignedAllocAlignment(u32 tid, BufferedStackTrace *stack_,
+                                    uptr size_, uptr alignment_)
+      : ErrorBase(tid, 10, "invalid-aligned-alloc-alignment"), stack(stack_),
+        size(size_), alignment(alignment_) {}
+  void Print();
+};
+
+struct ErrorInvalidPosixMemalignAlignment : ErrorBase {
+  const BufferedStackTrace *stack;
+  uptr alignment;
+
+  ErrorInvalidPosixMemalignAlignment() = default; // (*)
+  ErrorInvalidPosixMemalignAlignment(u32 tid, BufferedStackTrace *stack_,
+                                     uptr alignment_)
+      : ErrorBase(tid, 10, "invalid-posix-memalign-alignment"), stack(stack_),
+        alignment(alignment_) {}
+  void Print();
+};
+
+struct ErrorAllocationSizeTooBig : ErrorBase {
+  const BufferedStackTrace *stack;
+  uptr user_size;
+  uptr total_size;
+  uptr max_size;
+
+  ErrorAllocationSizeTooBig() = default; // (*)
+  ErrorAllocationSizeTooBig(u32 tid, BufferedStackTrace *stack_,
+                            uptr user_size_, uptr total_size_, uptr max_size_)
+      : ErrorBase(tid, 10, "allocation-size-too-big"), stack(stack_),
+        user_size(user_size_), total_size(total_size_), max_size(max_size_) {}
+  void Print();
+};
+
+struct ErrorRssLimitExceeded : ErrorBase {
+  const BufferedStackTrace *stack;
+
+  ErrorRssLimitExceeded() = default; // (*)
+  ErrorRssLimitExceeded(u32 tid, BufferedStackTrace *stack_)
+      : ErrorBase(tid, 10, "rss-limit-exceeded"), stack(stack_) {}
+  void Print();
+};
+
+struct ErrorOutOfMemory : ErrorBase {
+  const BufferedStackTrace *stack;
+  uptr requested_size;
+
+  ErrorOutOfMemory() = default; // (*)
+  ErrorOutOfMemory(u32 tid, BufferedStackTrace *stack_, uptr requested_size_)
+      : ErrorBase(tid, 10, "out-of-memory"), stack(stack_),
+        requested_size(requested_size_) {}
+  void Print();
+};
+
+// clang-format off
+#define MEMPROF_FOR_EACH_ERROR_KIND(macro)         \
+  macro(CallocOverflow)                         \
+  macro(ReallocArrayOverflow)                   \
+  macro(PvallocOverflow)                        \
+  macro(InvalidAllocationAlignment)             \
+  macro(InvalidAlignedAllocAlignment)           \
+  macro(InvalidPosixMemalignAlignment)          \
+  macro(AllocationSizeTooBig)                   \
+  macro(RssLimitExceeded)                       \
+  macro(OutOfMemory)                            \
+// clang-format on
+
+#define MEMPROF_DEFINE_ERROR_KIND(name) kErrorKind##name,
+#define MEMPROF_ERROR_DESCRIPTION_MEMBER(name) Error##name name;
+#define MEMPROF_ERROR_DESCRIPTION_CONSTRUCTOR(name)                    \
+  ErrorDescription(Error##name const &e) : kind(kErrorKind##name) { \
+    internal_memcpy(&name, &e, sizeof(name));                       \
+  }
+#define MEMPROF_ERROR_DESCRIPTION_PRINT(name) \
+  case kErrorKind##name:                   \
+    return name.Print();
+
+enum ErrorKind {
+  kErrorKindInvalid = 0,
+  MEMPROF_FOR_EACH_ERROR_KIND(MEMPROF_DEFINE_ERROR_KIND)
+};
+
+struct ErrorDescription {
+  ErrorKind kind;
+  // We're using a tagged union because it allows us to have a trivially
+  // copiable type and use the same structures as the public interface.
+  //
+  // We can add a wrapper around it to make it "more c++-like", but that would
+  // add a lot of code and the benefit wouldn't be that big.
+  union {
+    ErrorBase Base;
+    MEMPROF_FOR_EACH_ERROR_KIND(MEMPROF_ERROR_DESCRIPTION_MEMBER)
+  };
+
+  ErrorDescription() { internal_memset(this, 0, sizeof(*this)); }
+  explicit ErrorDescription(LinkerInitialized) {}
+  MEMPROF_FOR_EACH_ERROR_KIND(MEMPROF_ERROR_DESCRIPTION_CONSTRUCTOR)
+
+  bool IsValid() { return kind != kErrorKindInvalid; }
+  void Print() {
+    switch (kind) {
+      MEMPROF_FOR_EACH_ERROR_KIND(MEMPROF_ERROR_DESCRIPTION_PRINT)
+      case kErrorKindInvalid:
+        CHECK(0);
+    }
+    CHECK(0);
+  }
+};
+
+#undef MEMPROF_FOR_EACH_ERROR_KIND
+#undef MEMPROF_DEFINE_ERROR_KIND
+#undef MEMPROF_ERROR_DESCRIPTION_MEMBER
+#undef MEMPROF_ERROR_DESCRIPTION_CONSTRUCTOR
+#undef MEMPROF_ERROR_DESCRIPTION_PRINT
+
+}  // namespace __memprof
+
+#endif  // MEMPROF_ERRORS_H
diff --git a/compiler-rt/lib/memprof/memprof_errors.cpp b/compiler-rt/lib/memprof/memprof_errors.cpp
new file mode 100644
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_errors.cpp
@@ -0,0 +1,138 @@
+//===-- memprof_errors.cpp -------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// MemProf implementation for error structures.
+//===----------------------------------------------------------------------===//
+
+#include "memprof_errors.h"
+#include "memprof_descriptions.h"
+#include "memprof_mapping.h"
+#include "memprof_report.h"
+#include "memprof_stack.h"
+#include "sanitizer_common/sanitizer_report_decorator.h"
+#include "sanitizer_common/sanitizer_stackdepot.h"
+
+namespace __memprof {
+
+void ErrorCallocOverflow::Print() {
+  SanitizerCommonDecorator d;
+  Printf("%s", d.Error());
+  Report("ERROR: MemProfiler: calloc parameters overflow: count * size "
+         "(%zd * %zd) cannot be represented in type size_t (thread %s)\n",
+         count, size, MemprofThreadIdAndName(tid).c_str());
+  Printf("%s", d.Default());
+  stack->Print();
+  PrintHintAllocatorCannotReturnNull();
+  ReportErrorSummary(descr, stack);
+}
+
+void ErrorReallocArrayOverflow::Print() {
+  SanitizerCommonDecorator d;
+  Printf("%s", d.Error());
+  Report("ERROR: MemProfiler: reallocarray parameters overflow: count * size "
+         "(%zd * %zd) cannot be represented in type size_t (thread %s)\n",
+         count, size, MemprofThreadIdAndName(tid).c_str());
+  Printf("%s", d.Default());
+  stack->Print();
+  PrintHintAllocatorCannotReturnNull();
+  ReportErrorSummary(descr, stack);
+}
+
+void ErrorPvallocOverflow::Print() {
+  SanitizerCommonDecorator d;
+  Printf("%s", d.Error());
+  Report("ERROR: MemProfiler: pvalloc parameters overflow: size 0x%zx "
+         "rounded up to system page size 0x%zx cannot be represented in type "
+         "size_t (thread %s)\n",
+         size, GetPageSizeCached(), MemprofThreadIdAndName(tid).c_str());
+  Printf("%s", d.Default());
+  stack->Print();
+  PrintHintAllocatorCannotReturnNull();
+  ReportErrorSummary(descr, stack);
+}
+
+void ErrorInvalidAllocationAlignment::Print() {
+  SanitizerCommonDecorator d;
+  Printf("%s", d.Error());
+  Report("ERROR: MemProfiler: invalid allocation alignment: %zd, "
+         "alignment must be a power of two (thread %s)\n",
+         alignment, MemprofThreadIdAndName(tid).c_str());
+  Printf("%s", d.Default());
+  stack->Print();
+  PrintHintAllocatorCannotReturnNull();
+  ReportErrorSummary(descr, stack);
+}
+
+void ErrorInvalidAlignedAllocAlignment::Print() {
+  SanitizerCommonDecorator d;
+  Printf("%s", d.Error());
+  Report("ERROR: MemProfiler: invalid alignment requested in "
+         "aligned_alloc: %zd, alignment must be a power of two and the "
+         "requested size 0x%zx must be a multiple of alignment "
+         "(thread %s)\n",
+         alignment, size, MemprofThreadIdAndName(tid).c_str());
+  Printf("%s", d.Default());
+  stack->Print();
+  PrintHintAllocatorCannotReturnNull();
+  ReportErrorSummary(descr, stack);
+}
+
+void ErrorInvalidPosixMemalignAlignment::Print() {
+  SanitizerCommonDecorator d;
+  Printf("%s", d.Error());
+  Report(
+      "ERROR: MemProfiler: invalid alignment requested in posix_memalign: "
+      "%zd, alignment must be a power of two and a multiple of sizeof(void*) "
+      "== %zd (thread %s)\n",
+      alignment, sizeof(void *), MemprofThreadIdAndName(tid).c_str());
+  Printf("%s", d.Default());
+  stack->Print();
+  PrintHintAllocatorCannotReturnNull();
+  ReportErrorSummary(descr, stack);
+}
+
+void ErrorAllocationSizeTooBig::Print() {
+  SanitizerCommonDecorator d;
+  Printf("%s", d.Error());
+  Report("ERROR: MemProfiler: requested allocation size 0x%zx (0x%zx after "
+         "adjustments for alignment, headers etc.) exceeds maximum supported "
+         "size of 0x%zx (thread %s)\n",
+         user_size, total_size, max_size, MemprofThreadIdAndName(tid).c_str());
+  Printf("%s", d.Default());
+  stack->Print();
+  PrintHintAllocatorCannotReturnNull();
+  ReportErrorSummary(descr, stack);
+}
+
+void ErrorRssLimitExceeded::Print() {
+  SanitizerCommonDecorator d;
+  Printf("%s", d.Error());
+  Report("ERROR: MemProfiler: specified RSS limit exceeded, currently set to "
+         "soft_rss_limit_mb=%zd\n",
+         common_flags()->soft_rss_limit_mb);
+  Printf("%s", d.Default());
+  stack->Print();
+  PrintHintAllocatorCannotReturnNull();
+  ReportErrorSummary(descr, stack);
+}
+
+void ErrorOutOfMemory::Print() {
+  SanitizerCommonDecorator d;
+  Printf("%s", d.Error());
+  Report("ERROR: MemProfiler: allocator is out of memory trying to allocate "
+         "0x%zx bytes\n",
+         requested_size);
+  Printf("%s", d.Default());
+  stack->Print();
+  PrintHintAllocatorCannotReturnNull();
+  ReportErrorSummary(descr, stack);
+}
+
+} // namespace __memprof
diff --git a/compiler-rt/lib/memprof/memprof_flags.h b/compiler-rt/lib/memprof/memprof_flags.h
new file mode 100644
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_flags.h
@@ -0,0 +1,45 @@
+//===-- memprof_flags.h ---------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// MemProf runtime flags.
+//===----------------------------------------------------------------------===//
+
+#ifndef MEMPROF_FLAGS_H
+#define MEMPROF_FLAGS_H
+
+#include "sanitizer_common/sanitizer_flag_parser.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
+
+// MemProf flag values can be defined in four ways:
+// 1) initialized with default values at startup.
+// 2) overriden during compilation of MemProf runtime by providing
+//    compile definition MEMPROF_DEFAULT_OPTIONS.
+// 3) overriden from string returned by user-specified function
+//    __memprof_default_options().
+// 4) overriden from env variable MEMPROF_OPTIONS.
+
+namespace __memprof {
+
+struct Flags {
+#define MEMPROF_FLAG(Type, Name, DefaultValue, Description) Type Name;
+#include "memprof_flags.inc"
+#undef MEMPROF_FLAG
+
+  void SetDefaults();
+};
+
+extern Flags memprof_flags_dont_use_directly;
+inline Flags *flags() { return &memprof_flags_dont_use_directly; }
+
+void InitializeFlags();
+
+} // namespace __memprof
+
+#endif // MEMPROF_FLAGS_H
diff --git a/compiler-rt/lib/memprof/memprof_flags.cpp b/compiler-rt/lib/memprof/memprof_flags.cpp
new file mode 100644
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_flags.cpp
@@ -0,0 +1,106 @@
+//===-- memprof_flags.cpp --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// MemProf flag parsing logic.
+//===----------------------------------------------------------------------===//
+
+#include "memprof_flags.h"
+#include "memprof_interface_internal.h"
+#include "memprof_stack.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_flag_parser.h"
+#include "sanitizer_common/sanitizer_flags.h"
+
+namespace __memprof {
+
+Flags memprof_flags_dont_use_directly; // use via flags().
+
+static const char *MaybeUseMemprofDefaultOptionsCompileDefinition() {
+#ifdef MEMPROF_DEFAULT_OPTIONS
+  return SANITIZER_STRINGIFY(MEMPROF_DEFAULT_OPTIONS);
+#else
+  return "";
+#endif
+}
+
+void Flags::SetDefaults() {
+#define MEMPROF_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue;
+#include "memprof_flags.inc"
+#undef MEMPROF_FLAG
+}
+
+static void RegisterMemprofFlags(FlagParser *parser, Flags *f) {
+#define MEMPROF_FLAG(Type, Name, DefaultValue, Description)                    \
+  RegisterFlag(parser, #Name, Description, &f->Name);
+#include "memprof_flags.inc"
+#undef MEMPROF_FLAG
+}
+
+void InitializeFlags() {
+  // Set the default values and prepare for parsing MemProf and common flags.
+  SetCommonFlagsDefaults();
+  {
+    CommonFlags cf;
+    cf.CopyFrom(*common_flags());
+    cf.external_symbolizer_path = GetEnv("MEMPROF_SYMBOLIZER_PATH");
+    cf.malloc_context_size = kDefaultMallocContextSize;
+    cf.intercept_tls_get_addr = true;
+    cf.exitcode = 1;
+    OverrideCommonFlags(cf);
+  }
+  Flags *f = flags();
+  f->SetDefaults();
+
+  FlagParser memprof_parser;
+  RegisterMemprofFlags(&memprof_parser, f);
+  RegisterCommonFlags(&memprof_parser);
+
+  // Override from MemProf compile definition.
+  const char *memprof_compile_def =
+      MaybeUseMemprofDefaultOptionsCompileDefinition();
+  memprof_parser.ParseString(memprof_compile_def);
+
+  // Override from user-specified string.
+  const char *memprof_default_options = __memprof_default_options();
+  memprof_parser.ParseString(memprof_default_options);
+
+  // Override from command line.
+  memprof_parser.ParseStringFromEnv("MEMPROF_OPTIONS");
+
+  InitializeCommonFlags();
+
+  if (Verbosity())
+    ReportUnrecognizedFlags();
+
+  if (common_flags()->help) {
+    memprof_parser.PrintFlagDescriptions();
+  }
+
+  CHECK_LE((uptr)common_flags()->malloc_context_size, kStackTraceMax);
+
+  if (!f->replace_str && common_flags()->intercept_strlen) {
+    Report("WARNING: strlen interceptor is enabled even though replace_str=0. "
+           "Use intercept_strlen=0 to disable it.");
+  }
+  if (!f->replace_str && common_flags()->intercept_strchr) {
+    Report("WARNING: strchr* interceptors are enabled even though "
+           "replace_str=0. Use intercept_strchr=0 to disable them.");
+  }
+  if (!f->replace_str && common_flags()->intercept_strndup) {
+    Report("WARNING: strndup* interceptors are enabled even though "
+           "replace_str=0. Use intercept_strndup=0 to disable them.");
+  }
+}
+
+} // namespace __memprof
+
+SANITIZER_INTERFACE_WEAK_DEF(const char *, __memprof_default_options, void) {
+  return "";
+}
diff --git a/compiler-rt/lib/memprof/memprof_flags.inc b/compiler-rt/lib/memprof/memprof_flags.inc
new file mode 100644
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_flags.inc
@@ -0,0 +1,76 @@
+//===-- memprof_flags.inc --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// MemProf runtime flags.
+//
+//===----------------------------------------------------------------------===//
+#ifndef MEMPROF_FLAG
+#error "Define MEMPROF_FLAG prior to including this file!"
+#endif
+
+// MEMPROF_FLAG(Type, Name, DefaultValue, Description)
+// See COMMON_FLAG in sanitizer_flags.inc for more details.
+
+MEMPROF_FLAG(
+    bool, replace_str, true,
+    "If set, uses custom wrappers and replacements for libc string functions "
+    "to find more errors.")
+MEMPROF_FLAG(
+    bool, replace_intrin, true,
+    "If set, uses custom wrappers for memset/memcpy/memmove intrinsics.")
+MEMPROF_FLAG(
+    int, sleep_before_dying, 0,
+    "Number of seconds to sleep between printing an error report and "
+    "terminating the program. Useful for debugging purposes (e.g. when one "
+    "needs to attach gdb).")
+MEMPROF_FLAG(
+    int, sleep_after_init, 0,
+    "Number of seconds to sleep after MemProfiler is initialized. "
+    "Useful for debugging purposes (e.g. when one needs to attach gdb).")
+MEMPROF_FLAG(bool, unmap_shadow_on_exit, false,
+              "If set, explicitly unmaps the (huge) shadow at exit.")
+MEMPROF_FLAG(bool, protect_shadow_gap, true, "If set, mprotect the shadow gap")
+MEMPROF_FLAG(bool, print_stats, false,
+              "Print various statistics after printing an error message or if "
+              "atexit=1.")
+MEMPROF_FLAG(bool, print_legend, true,
+              "Print the legend for the shadow bytes.")
+MEMPROF_FLAG(
+    bool, atexit, false,
+    "If set, prints MemProf exit stats even after program terminates "
+    "successfully.")
+MEMPROF_FLAG(
+    bool, print_full_thread_history, true,
+    "If set, prints thread creation stacks for the threads involved in the "
+    "report and their ancestors up to the main thread.")
+
+MEMPROF_FLAG(bool, halt_on_error, true,
+              "Crash the program after printing the first error report "
+              "(WARNING: USE AT YOUR OWN RISK!)")
+MEMPROF_FLAG(bool, allocator_frees_and_returns_null_on_realloc_zero, true,
+              "realloc(p, 0) is equivalent to free(p) by default (Same as the "
+              "POSIX standard). If set to false, realloc(p, 0) will return a "
+              "pointer to an allocated space which can not be used.")
+MEMPROF_FLAG(
+    bool, verify_memprof_link_order, true,
+    "Check position of MemProf runtime in library list (needs to be disabled"
+    " when other library has to be preloaded system-wide)")
+MEMPROF_FLAG(bool, print_terse, false,
+              "If set, prints memory profile in a terse format.")
+MEMPROF_FLAG(bool, dump_process_map, false,
+              "If set, prints the process memory map.")
+
+MEMPROF_FLAG(
+    int, mem_info_cache_entries, 16381,
+    "Size in entries of the mem info block cache, should be closest prime"
+    " number to a power of two for best hashing.")
+MEMPROF_FLAG(bool, print_mem_info_cache_miss_rate, false,
+              "If set, prints the miss rate of the mem info block cache.")
+MEMPROF_FLAG(
+    bool, print_mem_info_cache_miss_rate_details, false,
+    "If set, prints detailed miss rates of the mem info block cache sets.")
diff --git a/compiler-rt/lib/memprof/memprof_init_version.h b/compiler-rt/lib/memprof/memprof_init_version.h
new file mode 100644
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_init_version.h
@@ -0,0 +1,26 @@
+//===-- memprof_init_version.h ---------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// This header defines a versioned __memprof_init function to be called at the
+// startup of the instrumented program.
+//===----------------------------------------------------------------------===//
+#ifndef MEMPROF_INIT_VERSION_H
+#define MEMPROF_INIT_VERSION_H
+
+#include "sanitizer_common/sanitizer_platform.h"
+
+extern "C" {
+// Every time the Memprof ABI changes we also change the version number in the
+// __memprof_init function name.  Objects built with incompatible Memprof ABI
+// versions will not link with run-time.
+#define __memprof_version_mismatch_check __memprof_version_mismatch_check_v1
+}
+
+#endif // MEMPROF_INIT_VERSION_H
diff --git a/compiler-rt/lib/memprof/memprof_interceptors.h b/compiler-rt/lib/memprof/memprof_interceptors.h
new file mode 100644
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_interceptors.h
@@ -0,0 +1,54 @@
+//===-- memprof_interceptors.h ---------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// MemProf-private header for memprof_interceptors.cpp
+//===----------------------------------------------------------------------===//
+#ifndef MEMPROF_INTERCEPTORS_H
+#define MEMPROF_INTERCEPTORS_H
+
+#include "interception/interception.h"
+#include "memprof_interceptors_memintrinsics.h"
+#include "memprof_internal.h"
+#include "sanitizer_common/sanitizer_platform_interceptors.h"
+
+namespace __memprof {
+
+void InitializeMemprofInterceptors();
+void InitializePlatformInterceptors();
+
+#define ENSURE_MEMPROF_INITED()                                                \
+  do {                                                                         \
+    CHECK(!memprof_init_is_running);                                           \
+    if (UNLIKELY(!memprof_inited)) {                                           \
+      MemprofInitFromRtl();                                                    \
+    }                                                                          \
+  } while (0)
+
+} // namespace __memprof
+
+DECLARE_REAL(int, memcmp, const void *a1, const void *a2, uptr size)
+DECLARE_REAL(char *, strchr, const char *str, int c)
+DECLARE_REAL(SIZE_T, strlen, const char *s)
+DECLARE_REAL(char *, strncpy, char *to, const char *from, uptr size)
+DECLARE_REAL(uptr, strnlen, const char *s, uptr maxlen)
+DECLARE_REAL(char *, strstr, const char *s1, const char *s2)
+
+#define MEMPROF_INTERCEPT_FUNC(name)                                           \
+  do {                                                                         \
+    if (!INTERCEPT_FUNCTION(name))                                             \
+      VReport(1, "MemProfiler: failed to intercept '%s'\n'", #name);           \
+  } while (0)
+#define MEMPROF_INTERCEPT_FUNC_VER(name, ver)                                  \
+  do {                                                                         \
+    if (!INTERCEPT_FUNCTION_VER(name, ver))                                    \
+      VReport(1, "MemProfiler: failed to intercept '%s@@%s'\n", #name, #ver);  \
+  } while (0)
+
+#endif // MEMPROF_INTERCEPTORS_H
diff --git a/compiler-rt/lib/memprof/memprof_interceptors.cpp b/compiler-rt/lib/memprof/memprof_interceptors.cpp
new file mode 100644
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_interceptors.cpp
@@ -0,0 +1,396 @@
+//===-- memprof_interceptors.cpp -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// Intercept various libc functions.
+//===----------------------------------------------------------------------===//
+
+#include "memprof_interceptors.h"
+#include "memprof_allocator.h"
+#include "memprof_internal.h"
+#include "memprof_mapping.h"
+#include "memprof_report.h"
+#include "memprof_stack.h"
+#include "memprof_stats.h"
+#include "sanitizer_common/sanitizer_libc.h"
+#include "sanitizer_common/sanitizer_posix.h"
+
+namespace __memprof {
+
+#define MEMPROF_READ_STRING(s, n) MEMPROF_READ_RANGE((s), (n))
+
+static inline uptr MaybeRealStrnlen(const char *s, uptr maxlen) {
+#if SANITIZER_INTERCEPT_STRNLEN
+  if (REAL(strnlen)) {
+    return REAL(strnlen)(s, maxlen);
+  }
+#endif
+  return internal_strnlen(s, maxlen);
+}
+
+void SetThreadName(const char *name) {
+  MemprofThread *t = GetCurrentThread();
+  if (t)
+    memprofThreadRegistry().SetThreadName(t->tid(), name);
+}
+
+int OnExit() {
+  // FIXME: ask frontend whether we need to return failure.
+  return 0;
+}
+
+} // namespace __memprof
+
+// ---------------------- Wrappers ---------------- {{{1
+using namespace __memprof;
+
+DECLARE_REAL_AND_INTERCEPTOR(void *, malloc, uptr)
+DECLARE_REAL_AND_INTERCEPTOR(void, free, void *)
+
+#define MEMPROF_INTERCEPTOR_ENTER(ctx, func)                                   \
+  ctx = 0;                                                                     \
+  (void)ctx;
+
+#define COMMON_INTERCEPT_FUNCTION(name) MEMPROF_INTERCEPT_FUNC(name)
+#define COMMON_INTERCEPT_FUNCTION_VER(name, ver)                               \
+  MEMPROF_INTERCEPT_FUNC_VER(name, ver)
+#define COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, size)                         \
+  MEMPROF_WRITE_RANGE(ptr, size)
+#define COMMON_INTERCEPTOR_READ_RANGE(ctx, ptr, size)                          \
+  MEMPROF_READ_RANGE(ptr, size)
+#define COMMON_INTERCEPTOR_ENTER(ctx, func, ...)                               \
+  MEMPROF_INTERCEPTOR_ENTER(ctx, func);                                        \
+  do {                                                                         \
+    if (memprof_init_is_running)                                               \
+      return REAL(func)(__VA_ARGS__);                                          \
+    ENSURE_MEMPROF_INITED();                                                   \
+  } while (false)
+#define COMMON_INTERCEPTOR_DIR_ACQUIRE(ctx, path)                              \
+  do {                                                                         \
+  } while (false)
+#define COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd)                                 \
+  do {                                                                         \
+  } while (false)
+#define COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd)                                 \
+  do {                                                                         \
+  } while (false)
+#define COMMON_INTERCEPTOR_FD_SOCKET_ACCEPT(ctx, fd, newfd)                    \
+  do {                                                                         \
+  } while (false)
+#define COMMON_INTERCEPTOR_SET_THREAD_NAME(ctx, name) SetThreadName(name)
+// Should be memprofThreadRegistry().SetThreadNameByUserId(thread, name)
+// But memprof does not remember UserId's for threads (pthread_t);
+// and remembers all ever existed threads, so the linear search by UserId
+// can be slow.
+#define COMMON_INTERCEPTOR_SET_PTHREAD_NAME(ctx, thread, name)                 \
+  do {                                                                         \
+  } while (false)
+#define COMMON_INTERCEPTOR_BLOCK_REAL(name) REAL(name)
+#define COMMON_INTERCEPTOR_ON_DLOPEN(filename, flag)                           \
+  do {                                                                         \
+    CheckNoDeepBind(filename, flag);                                           \
+  } while (false)
+#define COMMON_INTERCEPTOR_ON_EXIT(ctx) OnExit()
+#define COMMON_INTERCEPTOR_LIBRARY_LOADED(filename, handle)
+#define COMMON_INTERCEPTOR_LIBRARY_UNLOADED()
+#define COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED (!memprof_inited)
+#define COMMON_INTERCEPTOR_GET_TLS_RANGE(begin, end)                           \
+  if (MemprofThread *t = GetCurrentThread()) {                                 \
+    *begin = t->tls_begin();                                                   \
+    *end = t->tls_end();                                                       \
+  } else {                                                                     \
+    *begin = *end = 0;                                                         \
+  }
+
+#define COMMON_INTERCEPTOR_MEMMOVE_IMPL(ctx, to, from, size)                   \
+  do {                                                                         \
+    MEMPROF_INTERCEPTOR_ENTER(ctx, memmove);                                   \
+    MEMPROF_MEMMOVE_IMPL(to, from, size);                                      \
+  } while (false)
+
+#define COMMON_INTERCEPTOR_MEMCPY_IMPL(ctx, to, from, size)                    \
+  do {                                                                         \
+    MEMPROF_INTERCEPTOR_ENTER(ctx, memcpy);                                    \
+    MEMPROF_MEMCPY_IMPL(to, from, size);                                       \
+  } while (false)
+
+#define COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, c, size)                    \
+  do {                                                                         \
+    MEMPROF_INTERCEPTOR_ENTER(ctx, memset);                                    \
+    MEMPROF_MEMSET_IMPL(block, c, size);                                       \
+  } while (false)
+
+#include "sanitizer_common/sanitizer_common_interceptors.inc"
+#include "sanitizer_common/sanitizer_signal_interceptors.inc"
+
+#define COMMON_SYSCALL_PRE_READ_RANGE(p, s) MEMPROF_READ_RANGE(p, s)
+#define COMMON_SYSCALL_PRE_WRITE_RANGE(p, s) MEMPROF_WRITE_RANGE(p, s)
+#define COMMON_SYSCALL_POST_READ_RANGE(p, s)                                   \
+  do {                                                                         \
+    (void)(p);                                                                 \
+    (void)(s);                                                                 \
+  } while (false)
+#define COMMON_SYSCALL_POST_WRITE_RANGE(p, s)                                  \
+  do {                                                                         \
+    (void)(p);                                                                 \
+    (void)(s);                                                                 \
+  } while (false)
+#include "sanitizer_common/sanitizer_common_syscalls.inc"
+
+struct ThreadStartParam {
+  atomic_uintptr_t t;
+  atomic_uintptr_t is_registered;
+};
+
+static thread_return_t THREAD_CALLING_CONV memprof_thread_start(void *arg) {
+  ThreadStartParam *param = reinterpret_cast<ThreadStartParam *>(arg);
+  MemprofThread *t = nullptr;
+  while ((t = reinterpret_cast<MemprofThread *>(
+              atomic_load(&param->t, memory_order_acquire))) == nullptr)
+    internal_sched_yield();
+  SetCurrentThread(t);
+  return t->ThreadStart(GetTid(), &param->is_registered);
+}
+
+INTERCEPTOR(int, pthread_create, void *thread, void *attr,
+            void *(*start_routine)(void *), void *arg) {
+  EnsureMainThreadIDIsCorrect();
+  GET_STACK_TRACE_THREAD;
+  int detached = 0;
+  if (attr)
+    REAL(pthread_attr_getdetachstate)(attr, &detached);
+  ThreadStartParam param;
+  atomic_store(&param.t, 0, memory_order_relaxed);
+  atomic_store(&param.is_registered, 0, memory_order_relaxed);
+  int result;
+  {
+    // Ignore all allocations made by pthread_create: thread stack/TLS may be
+    // stored by pthread for future reuse even after thread destruction, and
+    // the linked list it's stored in doesn't even hold valid pointers to the
+    // objects, the latter are calculated by obscure pointer arithmetic.
+    result = REAL(pthread_create)(thread, attr, memprof_thread_start, &param);
+  }
+  if (result == 0) {
+    u32 current_tid = GetCurrentTidOrInvalid();
+    MemprofThread *t = MemprofThread::Create(start_routine, arg, current_tid,
+                                             &stack, detached);
+    atomic_store(&param.t, reinterpret_cast<uptr>(t), memory_order_release);
+    // Wait until the MemprofThread object is initialized and the
+    // ThreadRegistry entry is in "started" state.
+    while (atomic_load(&param.is_registered, memory_order_acquire) == 0)
+      internal_sched_yield();
+  }
+  return result;
+}
+
+INTERCEPTOR(int, pthread_join, void *t, void **arg) {
+  return real_pthread_join(t, arg);
+}
+
+DEFINE_REAL_PTHREAD_FUNCTIONS
+
+INTERCEPTOR(char *, index, const char *string, int c)
+ALIAS(WRAPPER_NAME(strchr));
+
+// For both strcat() and strncat() we need to check the validity of |to|
+// argument irrespective of the |from| length.
+INTERCEPTOR(char *, strcat, char *to, const char *from) {
+  void *ctx;
+  MEMPROF_INTERCEPTOR_ENTER(ctx, strcat);
+  ENSURE_MEMPROF_INITED();
+  if (flags()->replace_str) {
+    uptr from_length = REAL(strlen)(from);
+    MEMPROF_READ_RANGE(from, from_length + 1);
+    uptr to_length = REAL(strlen)(to);
+    MEMPROF_READ_STRING(to, to_length);
+    MEMPROF_WRITE_RANGE(to + to_length, from_length + 1);
+  }
+  return REAL(strcat)(to, from);
+}
+
+INTERCEPTOR(char *, strncat, char *to, const char *from, uptr size) {
+  void *ctx;
+  MEMPROF_INTERCEPTOR_ENTER(ctx, strncat);
+  ENSURE_MEMPROF_INITED();
+  if (flags()->replace_str) {
+    uptr from_length = MaybeRealStrnlen(from, size);
+    uptr copy_length = Min(size, from_length + 1);
+    MEMPROF_READ_RANGE(from, copy_length);
+    uptr to_length = REAL(strlen)(to);
+    MEMPROF_READ_STRING(to, to_length);
+    MEMPROF_WRITE_RANGE(to + to_length, from_length + 1);
+  }
+  return REAL(strncat)(to, from, size);
+}
+
+INTERCEPTOR(char *, strcpy, char *to, const char *from) {
+  void *ctx;
+  MEMPROF_INTERCEPTOR_ENTER(ctx, strcpy);
+  if (memprof_init_is_running) {
+    return REAL(strcpy)(to, from);
+  }
+  ENSURE_MEMPROF_INITED();
+  if (flags()->replace_str) {
+    uptr from_size = REAL(strlen)(from) + 1;
+    MEMPROF_READ_RANGE(from, from_size);
+    MEMPROF_WRITE_RANGE(to, from_size);
+  }
+  return REAL(strcpy)(to, from);
+}
+
+INTERCEPTOR(char *, strdup, const char *s) {
+  void *ctx;
+  MEMPROF_INTERCEPTOR_ENTER(ctx, strdup);
+  if (UNLIKELY(!memprof_inited))
+    return internal_strdup(s);
+  ENSURE_MEMPROF_INITED();
+  uptr length = REAL(strlen)(s);
+  if (flags()->replace_str) {
+    MEMPROF_READ_RANGE(s, length + 1);
+  }
+  GET_STACK_TRACE_MALLOC;
+  void *new_mem = memprof_malloc(length + 1, &stack);
+  REAL(memcpy)(new_mem, s, length + 1);
+  return reinterpret_cast<char *>(new_mem);
+}
+
+INTERCEPTOR(char *, __strdup, const char *s) {
+  void *ctx;
+  MEMPROF_INTERCEPTOR_ENTER(ctx, strdup);
+  if (UNLIKELY(!memprof_inited))
+    return internal_strdup(s);
+  ENSURE_MEMPROF_INITED();
+  uptr length = REAL(strlen)(s);
+  if (flags()->replace_str) {
+    MEMPROF_READ_RANGE(s, length + 1);
+  }
+  GET_STACK_TRACE_MALLOC;
+  void *new_mem = memprof_malloc(length + 1, &stack);
+  REAL(memcpy)(new_mem, s, length + 1);
+  return reinterpret_cast<char *>(new_mem);
+}
+
+INTERCEPTOR(char *, strncpy, char *to, const char *from, uptr size) {
+  void *ctx;
+  MEMPROF_INTERCEPTOR_ENTER(ctx, strncpy);
+  ENSURE_MEMPROF_INITED();
+  if (flags()->replace_str) {
+    uptr from_size = Min(size, MaybeRealStrnlen(from, size) + 1);
+    MEMPROF_READ_RANGE(from, from_size);
+    MEMPROF_WRITE_RANGE(to, size);
+  }
+  return REAL(strncpy)(to, from, size);
+}
+
+INTERCEPTOR(long, strtol, const char *nptr, char **endptr, int base) {
+  void *ctx;
+  MEMPROF_INTERCEPTOR_ENTER(ctx, strtol);
+  ENSURE_MEMPROF_INITED();
+  if (!flags()->replace_str) {
+    return REAL(strtol)(nptr, endptr, base);
+  }
+  char *real_endptr;
+  long result = REAL(strtol)(nptr, &real_endptr, base);
+  StrtolFixAndCheck(ctx, nptr, endptr, real_endptr, base);
+  return result;
+}
+
+INTERCEPTOR(int, atoi, const char *nptr) {
+  void *ctx;
+  MEMPROF_INTERCEPTOR_ENTER(ctx, atoi);
+  ENSURE_MEMPROF_INITED();
+  if (!flags()->replace_str) {
+    return REAL(atoi)(nptr);
+  }
+  char *real_endptr;
+  // "man atoi" tells that behavior of atoi(nptr) is the same as
+  // strtol(nptr, 0, 10), i.e. it sets errno to ERANGE if the
+  // parsed integer can't be stored in *long* type (even if it's
+  // different from int). So, we just imitate this behavior.
+  int result = REAL(strtol)(nptr, &real_endptr, 10);
+  FixRealStrtolEndptr(nptr, &real_endptr);
+  MEMPROF_READ_STRING(nptr, (real_endptr - nptr) + 1);
+  return result;
+}
+
+INTERCEPTOR(long, atol, const char *nptr) {
+  void *ctx;
+  MEMPROF_INTERCEPTOR_ENTER(ctx, atol);
+  ENSURE_MEMPROF_INITED();
+  if (!flags()->replace_str) {
+    return REAL(atol)(nptr);
+  }
+  char *real_endptr;
+  long result = REAL(strtol)(nptr, &real_endptr, 10);
+  FixRealStrtolEndptr(nptr, &real_endptr);
+  MEMPROF_READ_STRING(nptr, (real_endptr - nptr) + 1);
+  return result;
+}
+
+INTERCEPTOR(long long, strtoll, const char *nptr, char **endptr, int base) {
+  void *ctx;
+  MEMPROF_INTERCEPTOR_ENTER(ctx, strtoll);
+  ENSURE_MEMPROF_INITED();
+  if (!flags()->replace_str) {
+    return REAL(strtoll)(nptr, endptr, base);
+  }
+  char *real_endptr;
+  long long result = REAL(strtoll)(nptr, &real_endptr, base);
+  StrtolFixAndCheck(ctx, nptr, endptr, real_endptr, base);
+  return result;
+}
+
+INTERCEPTOR(long long, atoll, const char *nptr) {
+  void *ctx;
+  MEMPROF_INTERCEPTOR_ENTER(ctx, atoll);
+  ENSURE_MEMPROF_INITED();
+  if (!flags()->replace_str) {
+    return REAL(atoll)(nptr);
+  }
+  char *real_endptr;
+  long long result = REAL(strtoll)(nptr, &real_endptr, 10);
+  FixRealStrtolEndptr(nptr, &real_endptr);
+  MEMPROF_READ_STRING(nptr, (real_endptr - nptr) + 1);
+  return result;
+}
+
+// ---------------------- InitializeMemprofInterceptors ---------------- {{{1
+namespace __memprof {
+void InitializeMemprofInterceptors() {
+  static bool was_called_once;
+  CHECK(!was_called_once);
+  was_called_once = true;
+  InitializeCommonInterceptors();
+  InitializeSignalInterceptors();
+
+  // Intercept str* functions.
+  MEMPROF_INTERCEPT_FUNC(strcat);
+  MEMPROF_INTERCEPT_FUNC(strcpy);
+  MEMPROF_INTERCEPT_FUNC(strncat);
+  MEMPROF_INTERCEPT_FUNC(strncpy);
+  MEMPROF_INTERCEPT_FUNC(strdup);
+  MEMPROF_INTERCEPT_FUNC(__strdup);
+  MEMPROF_INTERCEPT_FUNC(index);
+
+  MEMPROF_INTERCEPT_FUNC(atoi);
+  MEMPROF_INTERCEPT_FUNC(atol);
+  MEMPROF_INTERCEPT_FUNC(strtol);
+  MEMPROF_INTERCEPT_FUNC(atoll);
+  MEMPROF_INTERCEPT_FUNC(strtoll);
+
+  // Intercept threading-related functions
+  MEMPROF_INTERCEPT_FUNC(pthread_create);
+  MEMPROF_INTERCEPT_FUNC(pthread_join);
+
+  InitializePlatformInterceptors();
+
+  VReport(1, "MemProfiler: libc interceptors initialized\n");
+}
+
+} // namespace __memprof
diff --git a/compiler-rt/lib/memprof/memprof_interceptors_memintrinsics.h b/compiler-rt/lib/memprof/memprof_interceptors_memintrinsics.h
new file mode 100644
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_interceptors_memintrinsics.h
@@ -0,0 +1,85 @@
+//===-- memprof_interceptors_memintrinsics.h -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===---------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// MemProf-private header for memprof_interceptors_memintrinsics.cpp
+//===---------------------------------------------------------------------===//
+#ifndef MEMPROF_MEMINTRIN_H
+#define MEMPROF_MEMINTRIN_H
+
+#include "interception/interception.h"
+#include "memprof_interface_internal.h"
+#include "memprof_internal.h"
+#include "memprof_mapping.h"
+
+DECLARE_REAL(void *, memcpy, void *to, const void *from, uptr size)
+DECLARE_REAL(void *, memset, void *block, int c, uptr size)
+
+namespace __memprof {
+
+// We implement ACCESS_MEMORY_RANGE, MEMPROF_READ_RANGE,
+// and MEMPROF_WRITE_RANGE as macro instead of function so
+// that no extra frames are created, and stack trace contains
+// relevant information only.
+#define ACCESS_MEMORY_RANGE(offset, size)                                      \
+  do {                                                                         \
+    __memprof_record_access_range(offset, size);                               \
+  } while (0)
+
+// memcpy is called during __memprof_init() from the internals of printf(...).
+// We do not treat memcpy with to==from as a bug.
+// See http://llvm.org/bugs/show_bug.cgi?id=11763.
+#define MEMPROF_MEMCPY_IMPL(to, from, size)                                    \
+  do {                                                                         \
+    if (UNLIKELY(!memprof_inited))                                             \
+      return internal_memcpy(to, from, size);                                  \
+    if (memprof_init_is_running) {                                             \
+      return REAL(memcpy)(to, from, size);                                     \
+    }                                                                          \
+    ENSURE_MEMPROF_INITED();                                                   \
+    if (flags()->replace_intrin) {                                             \
+      MEMPROF_READ_RANGE(from, size);                                          \
+      MEMPROF_WRITE_RANGE(to, size);                                           \
+    }                                                                          \
+    return REAL(memcpy)(to, from, size);                                       \
+  } while (0)
+
+// memset is called inside Printf.
+#define MEMPROF_MEMSET_IMPL(block, c, size)                                    \
+  do {                                                                         \
+    if (UNLIKELY(!memprof_inited))                                             \
+      return internal_memset(block, c, size);                                  \
+    if (memprof_init_is_running) {                                             \
+      return REAL(memset)(block, c, size);                                     \
+    }                                                                          \
+    ENSURE_MEMPROF_INITED();                                                   \
+    if (flags()->replace_intrin) {                                             \
+      MEMPROF_WRITE_RANGE(block, size);                                        \
+    }                                                                          \
+    return REAL(memset)(block, c, size);                                       \
+  } while (0)
+
+#define MEMPROF_MEMMOVE_IMPL(to, from, size)                                   \
+  do {                                                                         \
+    if (UNLIKELY(!memprof_inited))                                             \
+      return internal_memmove(to, from, size);                                 \
+    ENSURE_MEMPROF_INITED();                                                   \
+    if (flags()->replace_intrin) {                                             \
+      MEMPROF_READ_RANGE(from, size);                                          \
+      MEMPROF_WRITE_RANGE(to, size);                                           \
+    }                                                                          \
+    return internal_memmove(to, from, size);                                   \
+  } while (0)
+
+#define MEMPROF_READ_RANGE(offset, size) ACCESS_MEMORY_RANGE(offset, size)
+#define MEMPROF_WRITE_RANGE(offset, size) ACCESS_MEMORY_RANGE(offset, size)
+
+} // namespace __memprof
+
+#endif // MEMPROF_MEMINTRIN_H
diff --git a/compiler-rt/lib/memprof/memprof_interceptors_memintrinsics.cpp b/compiler-rt/lib/memprof/memprof_interceptors_memintrinsics.cpp
new file mode 100644
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_interceptors_memintrinsics.cpp
@@ -0,0 +1,30 @@
+//===-- memprof_interceptors_memintrinsics.cpp ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===---------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// MemProf versions of memcpy, memmove, and memset.
+//===---------------------------------------------------------------------===//
+
+#include "memprof_interceptors_memintrinsics.h"
+#include "memprof_report.h"
+#include "memprof_stack.h"
+
+using namespace __memprof;
+
+void *__memprof_memcpy(void *to, const void *from, uptr size) {
+  MEMPROF_MEMCPY_IMPL(to, from, size);
+}
+
+void *__memprof_memset(void *block, int c, uptr size) {
+  MEMPROF_MEMSET_IMPL(block, c, size);
+}
+
+void *__memprof_memmove(void *to, const void *from, uptr size) {
+  MEMPROF_MEMMOVE_IMPL(to, from, size);
+}
diff --git a/compiler-rt/lib/memprof/memprof_interface_internal.h b/compiler-rt/lib/memprof/memprof_interface_internal.h
new file mode 100644
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_interface_internal.h
@@ -0,0 +1,63 @@
+//===-- memprof_interface_internal.h ---------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// This header declares the MemProfiler runtime interface functions.
+// The runtime library has to define these functions so the instrumented program
+// could call them.
+//
+// See also include/sanitizer/memprof_interface.h
+//===----------------------------------------------------------------------===//
+#ifndef MEMPROF_INTERFACE_INTERNAL_H
+#define MEMPROF_INTERFACE_INTERNAL_H
+
+#include "sanitizer_common/sanitizer_internal_defs.h"
+
+#include "memprof_init_version.h"
+
+using __sanitizer::u32;
+using __sanitizer::u64;
+using __sanitizer::uptr;
+
+extern "C" {
+// This function should be called at the very beginning of the process,
+// before any instrumented code is executed and before any call to malloc.
+SANITIZER_INTERFACE_ATTRIBUTE void __memprof_init();
+SANITIZER_INTERFACE_ATTRIBUTE void __memprof_preinit();
+SANITIZER_INTERFACE_ATTRIBUTE void __memprof_version_mismatch_check_v1();
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __memprof_record_access(void const volatile *addr);
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __memprof_record_access_range(void const volatile *addr, uptr size);
+
+SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE void
+__memprof_on_error();
+
+SANITIZER_INTERFACE_ATTRIBUTE void __memprof_print_accumulated_stats();
+
+SANITIZER_INTERFACE_ATTRIBUTE
+const char *__memprof_default_options();
+
+SANITIZER_INTERFACE_ATTRIBUTE
+extern uptr __memprof_shadow_memory_dynamic_address;
+
+SANITIZER_INTERFACE_ATTRIBUTE void __memprof_load(uptr p);
+SANITIZER_INTERFACE_ATTRIBUTE void __memprof_store(uptr p);
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void *__memprof_memcpy(void *dst, const void *src, uptr size);
+SANITIZER_INTERFACE_ATTRIBUTE
+void *__memprof_memset(void *s, int c, uptr n);
+SANITIZER_INTERFACE_ATTRIBUTE
+void *__memprof_memmove(void *dest, const void *src, uptr n);
+} // extern "C"
+
+#endif // MEMPROF_INTERFACE_INTERNAL_H
diff --git a/compiler-rt/lib/memprof/memprof_internal.h b/compiler-rt/lib/memprof/memprof_internal.h
new file mode 100644
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_internal.h
@@ -0,0 +1,116 @@
+//===-- memprof_internal.h -------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// MemProf-private header which defines various general utilities.
+//===----------------------------------------------------------------------===//
+#ifndef MEMPROF_INTERNAL_H
+#define MEMPROF_INTERNAL_H
+
+#include "memprof_flags.h"
+#include "memprof_interface_internal.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
+#include "sanitizer_common/sanitizer_libc.h"
+#include "sanitizer_common/sanitizer_stacktrace.h"
+
+#if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__)
+#error "The MemProfiler run-time should not be instrumented by MemProfiler"
+#endif
+
+// Build-time configuration options.
+
+// If set, memprof will intercept C++ exception api call(s).
+#ifndef MEMPROF_HAS_EXCEPTIONS
+#define MEMPROF_HAS_EXCEPTIONS 1
+#endif
+
+#ifndef MEMPROF_DYNAMIC
+#ifdef PIC
+#define MEMPROF_DYNAMIC 1
+#else
+#define MEMPROF_DYNAMIC 0
+#endif
+#endif
+
+// All internal functions in memprof reside inside the __memprof namespace
+// to avoid namespace collisions with the user programs.
+// Separate namespace also makes it simpler to distinguish the memprof
+// run-time functions from the instrumented user code in a profile.
+namespace __memprof {
+
+class MemprofThread;
+using __sanitizer::StackTrace;
+
+void MemprofInitFromRtl();
+
+// memprof_rtl.cpp
+void PrintAddressSpaceLayout();
+
+// memprof_shadow_setup.cpp
+void InitializeShadowMemory();
+
+// memprof_malloc_linux.cpp
+void ReplaceSystemMalloc();
+
+// memprof_linux.cpp
+uptr FindDynamicShadowStart();
+void *MemprofDoesNotSupportStaticLinkage();
+void MemprofCheckDynamicRTPrereqs();
+void MemprofCheckIncompatibleRT();
+
+// memprof_thread.cpp
+MemprofThread *CreateMainThread();
+
+void ReadContextStack(void *context, uptr *stack, uptr *ssize);
+
+// Wrapper for TLS/TSD.
+void MemprofTSDInit(void (*destructor)(void *tsd));
+void *MemprofTSDGet();
+void MemprofTSDSet(void *tsd);
+void PlatformTSDDtor(void *tsd);
+
+void AppendToErrorMessageBuffer(const char *buffer);
+
+void *MemprofDlSymNext(const char *sym);
+
+// Returns `true` iff most of MemProf init process should be skipped due to
+// the MemProf library being loaded via `dlopen()`. Platforms may perform any
+// `dlopen()` specific initialization inside this function.
+bool HandleDlopenInit();
+
+// Add convenient macro for interface functions that may be represented as
+// weak hooks.
+#define MEMPROF_MALLOC_HOOK(ptr, size)                                         \
+  do {                                                                         \
+    if (&__sanitizer_malloc_hook)                                              \
+      __sanitizer_malloc_hook(ptr, size);                                      \
+    RunMallocHooks(ptr, size);                                                 \
+  } while (false)
+#define MEMPROF_FREE_HOOK(ptr)                                                 \
+  do {                                                                         \
+    if (&__sanitizer_free_hook)                                                \
+      __sanitizer_free_hook(ptr);                                              \
+    RunFreeHooks(ptr);                                                         \
+  } while (false)
+#define MEMPROF_ON_ERROR()                                                     \
+  if (&__memprof_on_error)                                                     \
+  __memprof_on_error()
+
+extern int memprof_inited;
+extern int memprof_timestamp_inited;
+extern int memprof_init_done;
+// Used to avoid infinite recursion in __memprof_init().
+extern bool memprof_init_is_running;
+extern void (*death_callback)(void);
+extern long memprof_init_timestamp_s;
+
+} // namespace __memprof
+
+#endif // MEMPROF_INTERNAL_H
diff --git a/compiler-rt/lib/memprof/memprof_linux.cpp b/compiler-rt/lib/memprof/memprof_linux.cpp
new file mode 100644
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_linux.cpp
@@ -0,0 +1,156 @@
+//===-- memprof_linux.cpp ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// Linux-specific details.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_platform.h"
+#if !SANITIZER_LINUX
+#error Unsupported OS
+#endif
+
+#include "memprof_interceptors.h"
+#include "memprof_internal.h"
+#include "memprof_thread.h"
+#include "sanitizer_common/sanitizer_flags.h"
+#include "sanitizer_common/sanitizer_freebsd.h"
+#include "sanitizer_common/sanitizer_libc.h"
+#include "sanitizer_common/sanitizer_procmaps.h"
+
+#include <dlfcn.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <link.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/ucontext.h>
+#include <unistd.h>
+#include <unwind.h>
+
+typedef enum {
+  MEMPROF_RT_VERSION_UNDEFINED = 0,
+  MEMPROF_RT_VERSION_DYNAMIC,
+  MEMPROF_RT_VERSION_STATIC,
+} memprof_rt_version_t;
+
+// FIXME: perhaps also store abi version here?
+extern "C" {
+SANITIZER_INTERFACE_ATTRIBUTE
+memprof_rt_version_t __memprof_rt_version;
+}
+
+namespace __memprof {
+
+void InitializePlatformInterceptors() {}
+void InitializePlatformExceptionHandlers() {}
+
+void *MemprofDoesNotSupportStaticLinkage() {
+  // This will fail to link with -static.
+  return &_DYNAMIC; // defined in link.h
+}
+
+uptr FindDynamicShadowStart() {
+  uptr shadow_size_bytes = MemToShadowSize(kHighMemEnd);
+  return MapDynamicShadow(shadow_size_bytes, SHADOW_SCALE,
+                          /*min_shadow_base_alignment*/ 0, kHighMemEnd);
+}
+
+static int FindFirstDSOCallback(struct dl_phdr_info *info, size_t size,
+                                void *data) {
+  VReport(2, "info->dlpi_name = %s\tinfo->dlpi_addr = %p\n", info->dlpi_name,
+          info->dlpi_addr);
+
+  // Continue until the first dynamic library is found
+  if (!info->dlpi_name || info->dlpi_name[0] == 0)
+    return 0;
+
+  // Ignore vDSO
+  if (internal_strncmp(info->dlpi_name, "linux-", sizeof("linux-") - 1) == 0)
+    return 0;
+
+  *(const char **)data = info->dlpi_name;
+  return 1;
+}
+
+static bool IsDynamicRTName(const char *libname) {
+  return internal_strstr(libname, "libclang_rt.memprof");
+}
+
+static void ReportIncompatibleRT() {
+  Report("Your application is linked against incompatible MemProf runtimes.\n");
+  Die();
+}
+
+void MemprofCheckDynamicRTPrereqs() {
+  if (!MEMPROF_DYNAMIC || !flags()->verify_memprof_link_order)
+    return;
+
+  // Ensure that dynamic RT is the first DSO in the list
+  const char *first_dso_name = nullptr;
+  dl_iterate_phdr(FindFirstDSOCallback, &first_dso_name);
+  if (first_dso_name && !IsDynamicRTName(first_dso_name)) {
+    Report("MemProf runtime does not come first in initial library list; "
+           "you should either link runtime to your application or "
+           "manually preload it with LD_PRELOAD.\n");
+    Die();
+  }
+}
+
+void MemprofCheckIncompatibleRT() {
+  if (MEMPROF_DYNAMIC) {
+    if (__memprof_rt_version == MEMPROF_RT_VERSION_UNDEFINED) {
+      __memprof_rt_version = MEMPROF_RT_VERSION_DYNAMIC;
+    } else if (__memprof_rt_version != MEMPROF_RT_VERSION_DYNAMIC) {
+      ReportIncompatibleRT();
+    }
+  } else {
+    if (__memprof_rt_version == MEMPROF_RT_VERSION_UNDEFINED) {
+      // Ensure that dynamic runtime is not present. We should detect it
+      // as early as possible, otherwise MemProf interceptors could bind to
+      // the functions in dynamic MemProf runtime instead of the functions in
+      // system libraries, causing crashes later in MemProf initialization.
+      MemoryMappingLayout proc_maps(/*cache_enabled*/ true);
+      char filename[PATH_MAX];
+      MemoryMappedSegment segment(filename, sizeof(filename));
+      while (proc_maps.Next(&segment)) {
+        if (IsDynamicRTName(segment.filename)) {
+          Report("Your application is linked against "
+                 "incompatible MemProf runtimes.\n");
+          Die();
+        }
+      }
+      __memprof_rt_version = MEMPROF_RT_VERSION_STATIC;
+    } else if (__memprof_rt_version != MEMPROF_RT_VERSION_STATIC) {
+      ReportIncompatibleRT();
+    }
+  }
+}
+
+void ReadContextStack(void *context, uptr *stack, uptr *ssize) {
+  ucontext_t *ucp = (ucontext_t *)context;
+  *stack = (uptr)ucp->uc_stack.ss_sp;
+  *ssize = ucp->uc_stack.ss_size;
+}
+
+void *MemprofDlSymNext(const char *sym) { return dlsym(RTLD_NEXT, sym); }
+
+bool HandleDlopenInit() {
+  // Not supported on this platform.
+  static_assert(!SANITIZER_SUPPORTS_INIT_FOR_DLOPEN,
+                "Expected SANITIZER_SUPPORTS_INIT_FOR_DLOPEN to be false");
+  return false;
+}
+
+} // namespace __memprof
diff --git a/compiler-rt/lib/memprof/memprof_malloc_linux.cpp b/compiler-rt/lib/memprof/memprof_malloc_linux.cpp
new file mode 100644
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_malloc_linux.cpp
@@ -0,0 +1,226 @@
+//===-- memprof_malloc_linux.cpp -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// Linux-specific malloc interception.
+// We simply define functions like malloc, free, realloc, etc.
+// They will replace the corresponding libc functions automagically.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_platform.h"
+#if !SANITIZER_LINUX
+#error Unsupported OS
+#endif
+
+#include "memprof_allocator.h"
+#include "memprof_interceptors.h"
+#include "memprof_internal.h"
+#include "memprof_stack.h"
+#include "sanitizer_common/sanitizer_allocator_checks.h"
+#include "sanitizer_common/sanitizer_errno.h"
+#include "sanitizer_common/sanitizer_tls_get_addr.h"
+
+// ---------------------- Replacement functions ---------------- {{{1
+using namespace __memprof;
+
+static uptr allocated_for_dlsym;
+static uptr last_dlsym_alloc_size_in_words;
+static const uptr kDlsymAllocPoolSize = 1024;
+static uptr alloc_memory_for_dlsym[kDlsymAllocPoolSize];
+
+static INLINE bool IsInDlsymAllocPool(const void *ptr) {
+  uptr off = (uptr)ptr - (uptr)alloc_memory_for_dlsym;
+  return off < allocated_for_dlsym * sizeof(alloc_memory_for_dlsym[0]);
+}
+
+static void *AllocateFromLocalPool(uptr size_in_bytes) {
+  uptr size_in_words = RoundUpTo(size_in_bytes, kWordSize) / kWordSize;
+  void *mem = (void *)&alloc_memory_for_dlsym[allocated_for_dlsym];
+  last_dlsym_alloc_size_in_words = size_in_words;
+  allocated_for_dlsym += size_in_words;
+  CHECK_LT(allocated_for_dlsym, kDlsymAllocPoolSize);
+  return mem;
+}
+
+static void DeallocateFromLocalPool(const void *ptr) {
+  // Hack: since glibc 2.27 dlsym no longer uses stack-allocated memory to store
+  // error messages and instead uses malloc followed by free. To avoid pool
+  // exhaustion due to long object filenames, handle that special case here.
+  uptr prev_offset = allocated_for_dlsym - last_dlsym_alloc_size_in_words;
+  void *prev_mem = (void *)&alloc_memory_for_dlsym[prev_offset];
+  if (prev_mem == ptr) {
+    REAL(memset)(prev_mem, 0, last_dlsym_alloc_size_in_words * kWordSize);
+    allocated_for_dlsym = prev_offset;
+    last_dlsym_alloc_size_in_words = 0;
+  }
+}
+
+static int PosixMemalignFromLocalPool(void **memptr, uptr alignment,
+                                      uptr size_in_bytes) {
+  if (UNLIKELY(!CheckPosixMemalignAlignment(alignment)))
+    return errno_EINVAL;
+
+  CHECK(alignment >= kWordSize);
+
+  uptr addr = (uptr)&alloc_memory_for_dlsym[allocated_for_dlsym];
+  uptr aligned_addr = RoundUpTo(addr, alignment);
+  uptr aligned_size = RoundUpTo(size_in_bytes, kWordSize);
+
+  uptr *end_mem = (uptr *)(aligned_addr + aligned_size);
+  uptr allocated = end_mem - alloc_memory_for_dlsym;
+  if (allocated >= kDlsymAllocPoolSize)
+    return errno_ENOMEM;
+
+  allocated_for_dlsym = allocated;
+  *memptr = (void *)aligned_addr;
+  return 0;
+}
+
+static INLINE bool MaybeInDlsym() { return memprof_init_is_running; }
+
+static INLINE bool UseLocalPool() { return MaybeInDlsym(); }
+
+static void *ReallocFromLocalPool(void *ptr, uptr size) {
+  const uptr offset = (uptr)ptr - (uptr)alloc_memory_for_dlsym;
+  const uptr copy_size = Min(size, kDlsymAllocPoolSize - offset);
+  void *new_ptr;
+  if (UNLIKELY(UseLocalPool())) {
+    new_ptr = AllocateFromLocalPool(size);
+  } else {
+    ENSURE_MEMPROF_INITED();
+    GET_STACK_TRACE_MALLOC;
+    new_ptr = memprof_malloc(size, &stack);
+  }
+  internal_memcpy(new_ptr, ptr, copy_size);
+  return new_ptr;
+}
+
+INTERCEPTOR(void, free, void *ptr) {
+  GET_STACK_TRACE_FREE;
+  if (UNLIKELY(IsInDlsymAllocPool(ptr))) {
+    DeallocateFromLocalPool(ptr);
+    return;
+  }
+  memprof_free(ptr, &stack, FROM_MALLOC);
+}
+
+#if SANITIZER_INTERCEPT_CFREE
+INTERCEPTOR(void, cfree, void *ptr) {
+  GET_STACK_TRACE_FREE;
+  if (UNLIKELY(IsInDlsymAllocPool(ptr)))
+    return;
+  memprof_free(ptr, &stack, FROM_MALLOC);
+}
+#endif // SANITIZER_INTERCEPT_CFREE
+
+INTERCEPTOR(void *, malloc, uptr size) {
+  if (UNLIKELY(UseLocalPool()))
+    // Hack: dlsym calls malloc before REAL(malloc) is retrieved from dlsym.
+    return AllocateFromLocalPool(size);
+  ENSURE_MEMPROF_INITED();
+  GET_STACK_TRACE_MALLOC;
+  return memprof_malloc(size, &stack);
+}
+
+INTERCEPTOR(void *, calloc, uptr nmemb, uptr size) {
+  if (UNLIKELY(UseLocalPool()))
+    // Hack: dlsym calls calloc before REAL(calloc) is retrieved from dlsym.
+    return AllocateFromLocalPool(nmemb * size);
+  ENSURE_MEMPROF_INITED();
+  GET_STACK_TRACE_MALLOC;
+  return memprof_calloc(nmemb, size, &stack);
+}
+
+INTERCEPTOR(void *, realloc, void *ptr, uptr size) {
+  if (UNLIKELY(IsInDlsymAllocPool(ptr)))
+    return ReallocFromLocalPool(ptr, size);
+  if (UNLIKELY(UseLocalPool()))
+    return AllocateFromLocalPool(size);
+  ENSURE_MEMPROF_INITED();
+  GET_STACK_TRACE_MALLOC;
+  return memprof_realloc(ptr, size, &stack);
+}
+
+#if SANITIZER_INTERCEPT_REALLOCARRAY
+INTERCEPTOR(void *, reallocarray, void *ptr, uptr nmemb, uptr size) {
+  ENSURE_MEMPROF_INITED();
+  GET_STACK_TRACE_MALLOC;
+  return memprof_reallocarray(ptr, nmemb, size, &stack);
+}
+#endif // SANITIZER_INTERCEPT_REALLOCARRAY
+
+#if SANITIZER_INTERCEPT_MEMALIGN
+INTERCEPTOR(void *, memalign, uptr boundary, uptr size) {
+  GET_STACK_TRACE_MALLOC;
+  return memprof_memalign(boundary, size, &stack, FROM_MALLOC);
+}
+
+INTERCEPTOR(void *, __libc_memalign, uptr boundary, uptr size) {
+  GET_STACK_TRACE_MALLOC;
+  void *res = memprof_memalign(boundary, size, &stack, FROM_MALLOC);
+  DTLS_on_libc_memalign(res, size);
+  return res;
+}
+#endif // SANITIZER_INTERCEPT_MEMALIGN
+
+#if SANITIZER_INTERCEPT_ALIGNED_ALLOC
+INTERCEPTOR(void *, aligned_alloc, uptr boundary, uptr size) {
+  GET_STACK_TRACE_MALLOC;
+  return memprof_aligned_alloc(boundary, size, &stack);
+}
+#endif // SANITIZER_INTERCEPT_ALIGNED_ALLOC
+
+INTERCEPTOR(uptr, malloc_usable_size, void *ptr) {
+  GET_CURRENT_PC_BP_SP;
+  (void)sp;
+  return memprof_malloc_usable_size(ptr, pc, bp);
+}
+
+#if SANITIZER_INTERCEPT_MALLOPT_AND_MALLINFO
+// We avoid including malloc.h for portability reasons.
+// man mallinfo says the fields are "long", but the implementation uses int.
+// It doesn't matter much -- we just need to make sure that the libc's mallinfo
+// is not called.
+struct fake_mallinfo {
+  int x[10];
+};
+
+INTERCEPTOR(struct fake_mallinfo, mallinfo, void) {
+  struct fake_mallinfo res;
+  REAL(memset)(&res, 0, sizeof(res));
+  return res;
+}
+
+INTERCEPTOR(int, mallopt, int cmd, int value) { return 0; }
+#endif // SANITIZER_INTERCEPT_MALLOPT_AND_MALLINFO
+
+INTERCEPTOR(int, posix_memalign, void **memptr, uptr alignment, uptr size) {
+  if (UNLIKELY(UseLocalPool()))
+    return PosixMemalignFromLocalPool(memptr, alignment, size);
+  GET_STACK_TRACE_MALLOC;
+  return memprof_posix_memalign(memptr, alignment, size, &stack);
+}
+
+INTERCEPTOR(void *, valloc, uptr size) {
+  GET_STACK_TRACE_MALLOC;
+  return memprof_valloc(size, &stack);
+}
+
+#if SANITIZER_INTERCEPT_PVALLOC
+INTERCEPTOR(void *, pvalloc, uptr size) {
+  GET_STACK_TRACE_MALLOC;
+  return memprof_pvalloc(size, &stack);
+}
+#endif // SANITIZER_INTERCEPT_PVALLOC
+
+INTERCEPTOR(void, malloc_stats, void) { __memprof_print_accumulated_stats(); }
+
+namespace __memprof {
+void ReplaceSystemMalloc() {}
+} // namespace __memprof
diff --git a/compiler-rt/lib/memprof/memprof_mapping.h b/compiler-rt/lib/memprof/memprof_mapping.h
new file mode 100644
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_mapping.h
@@ -0,0 +1,142 @@
+//===-- memprof_mapping.h --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// Defines MemProf memory mapping.
+//===----------------------------------------------------------------------===//
+#ifndef MEMPROF_MAPPING_H
+#define MEMPROF_MAPPING_H
+
+#include "memprof_internal.h"
+
+#if defined(MEMPROF_SHADOW_SCALE)
+static const u64 kDefaultShadowScale = MEMPROF_SHADOW_SCALE;
+#else
+static const u64 kDefaultShadowScale = 3;
+#endif
+#define SHADOW_SCALE kDefaultShadowScale
+
+#define SHADOW_OFFSET __memprof_shadow_memory_dynamic_address
+
+#define SHADOW_GRANULARITY (1ULL << SHADOW_SCALE)
+#define MEMPROF_ALIGNMENT 32
+
+#define DO_MEMPROF_MAPPING_PROFILE 0 // Set to 1 to profile the functions below.
+
+#if DO_MEMPROF_MAPPING_PROFILE
+#define PROFILE_MEMPROF_MAPPING() MemprofMappingProfile[__LINE__]++;
+#else
+#define PROFILE_MEMPROF_MAPPING()
+#endif
+
+namespace __memprof {
+
+extern uptr MemprofMappingProfile[];
+
+extern uptr kHighMemEnd; // Initialized in __memprof_init.
+
+} // namespace __memprof
+
+#define SHADOW_ENTRY_SIZE 8
+
+// Size of memory block mapped to a single shadow location
+#define MEM_GRANULARITY 64ULL
+
+#define SHADOW_MASK ~(MEM_GRANULARITY - 1)
+
+#define MEM_TO_SHADOW(mem)                                                     \
+  (((mem & SHADOW_MASK) >> SHADOW_SCALE) + (SHADOW_OFFSET))
+
+#define kLowMemBeg 0
+#define kLowMemEnd (SHADOW_OFFSET ? SHADOW_OFFSET - 1 : 0)
+
+#define kLowShadowBeg SHADOW_OFFSET
+#define kLowShadowEnd (MEM_TO_SHADOW(kLowMemEnd) + SHADOW_ENTRY_SIZE - 1)
+
+#define kHighMemBeg (MEM_TO_SHADOW(kHighMemEnd) + 1 + SHADOW_ENTRY_SIZE - 1)
+
+#define kHighShadowBeg MEM_TO_SHADOW(kHighMemBeg)
+#define kHighShadowEnd (MEM_TO_SHADOW(kHighMemEnd) + SHADOW_ENTRY_SIZE - 1)
+
+// With the zero shadow base we can not actually map pages starting from 0.
+// This constant is somewhat arbitrary.
+#define kZeroBaseShadowStart 0
+#define kZeroBaseMaxShadowStart (1 << 18)
+
+#define kShadowGapBeg (kLowShadowEnd ? kLowShadowEnd + 1 : kZeroBaseShadowStart)
+#define kShadowGapEnd (kHighShadowBeg - 1)
+
+namespace __memprof {
+
+static inline uptr MemToShadowSize(uptr size) { return size >> SHADOW_SCALE; }
+static inline bool AddrIsInLowMem(uptr a) {
+  PROFILE_MEMPROF_MAPPING();
+  return a <= kLowMemEnd;
+}
+
+static inline bool AddrIsInLowShadow(uptr a) {
+  PROFILE_MEMPROF_MAPPING();
+  return a >= kLowShadowBeg && a <= kLowShadowEnd;
+}
+
+static inline bool AddrIsInHighMem(uptr a) {
+  PROFILE_MEMPROF_MAPPING();
+  return kHighMemBeg && a >= kHighMemBeg && a <= kHighMemEnd;
+}
+
+static inline bool AddrIsInHighShadow(uptr a) {
+  PROFILE_MEMPROF_MAPPING();
+  return kHighMemBeg && a >= kHighShadowBeg && a <= kHighShadowEnd;
+}
+
+static inline bool AddrIsInShadowGap(uptr a) {
+  PROFILE_MEMPROF_MAPPING();
+  // In zero-based shadow mode we treat addresses near zero as addresses
+  // in shadow gap as well.
+  if (SHADOW_OFFSET == 0)
+    return a <= kShadowGapEnd;
+  return a >= kShadowGapBeg && a <= kShadowGapEnd;
+}
+
+static inline bool AddrIsInMem(uptr a) {
+  PROFILE_MEMPROF_MAPPING();
+  return AddrIsInLowMem(a) || AddrIsInHighMem(a) ||
+         (flags()->protect_shadow_gap == 0 && AddrIsInShadowGap(a));
+}
+
+static inline uptr MemToShadow(uptr p) {
+  PROFILE_MEMPROF_MAPPING();
+  CHECK(AddrIsInMem(p));
+  return MEM_TO_SHADOW(p);
+}
+
+static inline bool AddrIsInShadow(uptr a) {
+  PROFILE_MEMPROF_MAPPING();
+  return AddrIsInLowShadow(a) || AddrIsInHighShadow(a);
+}
+
+static inline bool AddrIsAlignedByGranularity(uptr a) {
+  PROFILE_MEMPROF_MAPPING();
+  return (a & (SHADOW_GRANULARITY - 1)) == 0;
+}
+
+static inline void RecordAccess(uptr a) {
+  PROFILE_MEMPROF_MAPPING();
+  // If we use a different shadow size then the type below needs adjustment.
+  CHECK_EQ(SHADOW_ENTRY_SIZE, 8);
+  u64 *shadow_address = (u64 *)MEM_TO_SHADOW(a);
+  (*shadow_address)++;
+}
+
+// Must be after all calls to PROFILE_MEMPROF_MAPPING().
+static const uptr kMemprofMappingProfileSize = __LINE__;
+
+} // namespace __memprof
+
+#endif // MEMPROF_MAPPING_H
diff --git a/compiler-rt/lib/memprof/memprof_new_delete.cpp b/compiler-rt/lib/memprof/memprof_new_delete.cpp
new file mode 100644
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_new_delete.cpp
@@ -0,0 +1,145 @@
+//===-- memprof_interceptors.cpp -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// Interceptors for operators new and delete.
+//===----------------------------------------------------------------------===//
+
+#include "memprof_allocator.h"
+#include "memprof_internal.h"
+#include "memprof_report.h"
+#include "memprof_stack.h"
+
+#include "interception/interception.h"
+
+#include <stddef.h>
+
+#define CXX_OPERATOR_ATTRIBUTE INTERCEPTOR_ATTRIBUTE
+
+using namespace __memprof;
+
+// Fake std::nothrow_t and std::align_val_t to avoid including <new>.
+namespace std {
+struct nothrow_t {};
+enum class align_val_t : size_t {};
+} // namespace std
+
+#define OPERATOR_NEW_BODY(type, nothrow)                                       \
+  GET_STACK_TRACE_MALLOC;                                                      \
+  void *res = memprof_memalign(0, size, &stack, type);                         \
+  if (!nothrow && UNLIKELY(!res))                                              \
+    ReportOutOfMemory(size, &stack);                                           \
+  return res;
+#define OPERATOR_NEW_BODY_ALIGN(type, nothrow)                                 \
+  GET_STACK_TRACE_MALLOC;                                                      \
+  void *res = memprof_memalign((uptr)align, size, &stack, type);               \
+  if (!nothrow && UNLIKELY(!res))                                              \
+    ReportOutOfMemory(size, &stack);                                           \
+  return res;
+
+CXX_OPERATOR_ATTRIBUTE
+void *operator new(size_t size) {
+  OPERATOR_NEW_BODY(FROM_NEW, false /*nothrow*/);
+}
+CXX_OPERATOR_ATTRIBUTE
+void *operator new[](size_t size) {
+  OPERATOR_NEW_BODY(FROM_NEW_BR, false /*nothrow*/);
+}
+CXX_OPERATOR_ATTRIBUTE
+void *operator new(size_t size, std::nothrow_t const &) {
+  OPERATOR_NEW_BODY(FROM_NEW, true /*nothrow*/);
+}
+CXX_OPERATOR_ATTRIBUTE
+void *operator new[](size_t size, std::nothrow_t const &) {
+  OPERATOR_NEW_BODY(FROM_NEW_BR, true /*nothrow*/);
+}
+CXX_OPERATOR_ATTRIBUTE
+void *operator new(size_t size, std::align_val_t align) {
+  OPERATOR_NEW_BODY_ALIGN(FROM_NEW, false /*nothrow*/);
+}
+CXX_OPERATOR_ATTRIBUTE
+void *operator new[](size_t size, std::align_val_t align) {
+  OPERATOR_NEW_BODY_ALIGN(FROM_NEW_BR, false /*nothrow*/);
+}
+CXX_OPERATOR_ATTRIBUTE
+void *operator new(size_t size, std::align_val_t align,
+                   std::nothrow_t const &) {
+  OPERATOR_NEW_BODY_ALIGN(FROM_NEW, true /*nothrow*/);
+}
+CXX_OPERATOR_ATTRIBUTE
+void *operator new[](size_t size, std::align_val_t align,
+                     std::nothrow_t const &) {
+  OPERATOR_NEW_BODY_ALIGN(FROM_NEW_BR, true /*nothrow*/);
+}
+
+#define OPERATOR_DELETE_BODY(type)                                             \
+  GET_STACK_TRACE_FREE;                                                        \
+  memprof_delete(ptr, 0, 0, &stack, type);
+
+#define OPERATOR_DELETE_BODY_SIZE(type)                                        \
+  GET_STACK_TRACE_FREE;                                                        \
+  memprof_delete(ptr, size, 0, &stack, type);
+
+#define OPERATOR_DELETE_BODY_ALIGN(type)                                       \
+  GET_STACK_TRACE_FREE;                                                        \
+  memprof_delete(ptr, 0, static_cast<uptr>(align), &stack, type);
+
+#define OPERATOR_DELETE_BODY_SIZE_ALIGN(type)                                  \
+  GET_STACK_TRACE_FREE;                                                        \
+  memprof_delete(ptr, size, static_cast<uptr>(align), &stack, type);
+
+CXX_OPERATOR_ATTRIBUTE
+void operator delete(void *ptr)NOEXCEPT { OPERATOR_DELETE_BODY(FROM_NEW); }
+CXX_OPERATOR_ATTRIBUTE
+void operator delete[](void *ptr) NOEXCEPT {
+  OPERATOR_DELETE_BODY(FROM_NEW_BR);
+}
+CXX_OPERATOR_ATTRIBUTE
+void operator delete(void *ptr, std::nothrow_t const &) {
+  OPERATOR_DELETE_BODY(FROM_NEW);
+}
+CXX_OPERATOR_ATTRIBUTE
+void operator delete[](void *ptr, std::nothrow_t const &) {
+  OPERATOR_DELETE_BODY(FROM_NEW_BR);
+}
+CXX_OPERATOR_ATTRIBUTE
+void operator delete(void *ptr, size_t size)NOEXCEPT {
+  OPERATOR_DELETE_BODY_SIZE(FROM_NEW);
+}
+CXX_OPERATOR_ATTRIBUTE
+void operator delete[](void *ptr, size_t size) NOEXCEPT {
+  OPERATOR_DELETE_BODY_SIZE(FROM_NEW_BR);
+}
+CXX_OPERATOR_ATTRIBUTE
+void operator delete(void *ptr, std::align_val_t align)NOEXCEPT {
+  OPERATOR_DELETE_BODY_ALIGN(FROM_NEW);
+}
+CXX_OPERATOR_ATTRIBUTE
+void operator delete[](void *ptr, std::align_val_t align) NOEXCEPT {
+  OPERATOR_DELETE_BODY_ALIGN(FROM_NEW_BR);
+}
+CXX_OPERATOR_ATTRIBUTE
+void operator delete(void *ptr, std::align_val_t align,
+                     std::nothrow_t const &) {
+  OPERATOR_DELETE_BODY_ALIGN(FROM_NEW);
+}
+CXX_OPERATOR_ATTRIBUTE
+void operator delete[](void *ptr, std::align_val_t align,
+                       std::nothrow_t const &) {
+  OPERATOR_DELETE_BODY_ALIGN(FROM_NEW_BR);
+}
+CXX_OPERATOR_ATTRIBUTE
+void operator delete(void *ptr, size_t size, std::align_val_t align)NOEXCEPT {
+  OPERATOR_DELETE_BODY_SIZE_ALIGN(FROM_NEW);
+}
+CXX_OPERATOR_ATTRIBUTE
+void operator delete[](void *ptr, size_t size,
+                       std::align_val_t align) NOEXCEPT {
+  OPERATOR_DELETE_BODY_SIZE_ALIGN(FROM_NEW_BR);
+}
diff --git a/compiler-rt/lib/memprof/memprof_posix.cpp b/compiler-rt/lib/memprof/memprof_posix.cpp
new file mode 100644
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_posix.cpp
@@ -0,0 +1,66 @@
+//===-- memprof_posix.cpp ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// Posix-specific details.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_platform.h"
+#if !SANITIZER_POSIX
+#error Only Posix supported
+#endif
+
+#include "memprof_interceptors.h"
+#include "memprof_internal.h"
+#include "memprof_mapping.h"
+#include "memprof_report.h"
+#include "memprof_stack.h"
+#include "sanitizer_common/sanitizer_libc.h"
+#include "sanitizer_common/sanitizer_posix.h"
+#include "sanitizer_common/sanitizer_procmaps.h"
+
+#include <pthread.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <unistd.h>
+
+namespace __memprof {
+
+// ---------------------- TSD ---------------- {{{1
+
+static pthread_key_t tsd_key;
+static bool tsd_key_inited = false;
+void MemprofTSDInit(void (*destructor)(void *tsd)) {
+  CHECK(!tsd_key_inited);
+  tsd_key_inited = true;
+  CHECK_EQ(0, pthread_key_create(&tsd_key, destructor));
+}
+
+void *MemprofTSDGet() {
+  CHECK(tsd_key_inited);
+  return pthread_getspecific(tsd_key);
+}
+
+void MemprofTSDSet(void *tsd) {
+  CHECK(tsd_key_inited);
+  pthread_setspecific(tsd_key, tsd);
+}
+
+void PlatformTSDDtor(void *tsd) {
+  MemprofThreadContext *context = (MemprofThreadContext *)tsd;
+  if (context->destructor_iterations > 1) {
+    context->destructor_iterations--;
+    CHECK_EQ(0, pthread_setspecific(tsd_key, tsd));
+    return;
+  }
+  MemprofThread::TSDDtor(tsd);
+}
+} // namespace __memprof
diff --git a/compiler-rt/lib/memprof/memprof_preinit.cpp b/compiler-rt/lib/memprof/memprof_preinit.cpp
new file mode 100644
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_preinit.cpp
@@ -0,0 +1,23 @@
+//===-- memprof_preinit.cpp ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// Call __memprof_init at the very early stage of process startup.
+//===----------------------------------------------------------------------===//
+#include "memprof_internal.h"
+
+using namespace __memprof;
+
+#if SANITIZER_CAN_USE_PREINIT_ARRAY
+// The symbol is called __local_memprof_preinit, because it's not intended to
+// be exported. This code linked into the main executable when -fmemory-profile
+// is in the link flags. It can only use exported interface functions.
+__attribute__((section(".preinit_array"),
+               used)) void (*__local_memprof_preinit)(void) = __memprof_preinit;
+#endif
diff --git a/compiler-rt/lib/memprof/memprof_report.h b/compiler-rt/lib/memprof/memprof_report.h
new file mode 100644
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_report.h
@@ -0,0 +1,40 @@
+//===-- memprof_report.h ---------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// MemProf-private header for error reporting functions.
+//===----------------------------------------------------------------------===//
+
+#ifndef MEMPROF_REPORT_H
+#define MEMPROF_REPORT_H
+
+#include "memprof_allocator.h"
+#include "memprof_internal.h"
+#include "memprof_thread.h"
+
+namespace __memprof {
+
+// Different kinds of error reports.
+void ReportCallocOverflow(uptr count, uptr size, BufferedStackTrace *stack);
+void ReportReallocArrayOverflow(uptr count, uptr size,
+                                BufferedStackTrace *stack);
+void ReportPvallocOverflow(uptr size, BufferedStackTrace *stack);
+void ReportInvalidAllocationAlignment(uptr alignment,
+                                      BufferedStackTrace *stack);
+void ReportInvalidAlignedAllocAlignment(uptr size, uptr alignment,
+                                        BufferedStackTrace *stack);
+void ReportInvalidPosixMemalignAlignment(uptr alignment,
+                                         BufferedStackTrace *stack);
+void ReportAllocationSizeTooBig(uptr user_size, uptr total_size, uptr max_size,
+                                BufferedStackTrace *stack);
+void ReportRssLimitExceeded(BufferedStackTrace *stack);
+void ReportOutOfMemory(uptr requested_size, BufferedStackTrace *stack);
+
+} // namespace __memprof
+#endif // MEMPROF_REPORT_H
diff --git a/compiler-rt/lib/memprof/memprof_report.cpp b/compiler-rt/lib/memprof/memprof_report.cpp
new file mode 100644
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_report.cpp
@@ -0,0 +1,211 @@
+//===-- memprof_report.cpp -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// This file contains error reporting code.
+//===----------------------------------------------------------------------===//
+
+#include "memprof_report.h"
+#include "memprof_descriptions.h"
+#include "memprof_errors.h"
+#include "memprof_flags.h"
+#include "memprof_internal.h"
+#include "memprof_mapping.h"
+#include "memprof_stack.h"
+#include "memprof_thread.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_flags.h"
+#include "sanitizer_common/sanitizer_report_decorator.h"
+#include "sanitizer_common/sanitizer_stackdepot.h"
+#include "sanitizer_common/sanitizer_symbolizer.h"
+
+namespace __memprof {
+
+// -------------------- User-specified callbacks ----------------- {{{1
+static char *error_message_buffer = nullptr;
+static uptr error_message_buffer_pos = 0;
+static BlockingMutex error_message_buf_mutex(LINKER_INITIALIZED);
+
+void AppendToErrorMessageBuffer(const char *buffer) {
+  BlockingMutexLock l(&error_message_buf_mutex);
+  if (!error_message_buffer) {
+    error_message_buffer =
+        (char *)MmapOrDieQuietly(kErrorMessageBufferSize, __func__);
+    error_message_buffer_pos = 0;
+  }
+  uptr length = internal_strlen(buffer);
+  RAW_CHECK(kErrorMessageBufferSize >= error_message_buffer_pos);
+  uptr remaining = kErrorMessageBufferSize - error_message_buffer_pos;
+  internal_strncpy(error_message_buffer + error_message_buffer_pos, buffer,
+                   remaining);
+  error_message_buffer[kErrorMessageBufferSize - 1] = '\0';
+  // FIXME: reallocate the buffer instead of truncating the message.
+  error_message_buffer_pos += Min(remaining, length);
+}
+
+// -------------------- Different kinds of reports ----------------- {{{1
+
+// Use ScopedInErrorReport to run common actions just before and
+// immediately after printing error report.
+class ScopedInErrorReport {
+public:
+  explicit ScopedInErrorReport(bool fatal = false)
+      : halt_on_error_(fatal || flags()->halt_on_error) {
+    // Make sure the registry and sanitizer report mutexes are locked while
+    // we're printing an error report.
+    // We can lock them only here to avoid self-deadlock in case of
+    // recursive reports.
+    memprofThreadRegistry().Lock();
+    Printf(
+        "=================================================================\n");
+  }
+
+  ~ScopedInErrorReport() {
+    if (halt_on_error_ && !__sanitizer_acquire_crash_state()) {
+      memprofThreadRegistry().Unlock();
+      return;
+    }
+    MEMPROF_ON_ERROR();
+    if (current_error_.IsValid())
+      current_error_.Print();
+
+    // Make sure the current thread is announced.
+    DescribeThread(GetCurrentThread());
+    // We may want to grab this lock again when printing stats.
+    memprofThreadRegistry().Unlock();
+    // Print memory stats.
+    if (flags()->print_stats)
+      __memprof_print_accumulated_stats();
+
+    if (common_flags()->print_cmdline)
+      PrintCmdline();
+
+    if (common_flags()->print_module_map == 2)
+      PrintModuleMap();
+
+    // Copy the message buffer so that we could start logging without holding a
+    // lock that gets aquired during printing.
+    InternalMmapVector<char> buffer_copy(kErrorMessageBufferSize);
+    {
+      BlockingMutexLock l(&error_message_buf_mutex);
+      internal_memcpy(buffer_copy.data(), error_message_buffer,
+                      kErrorMessageBufferSize);
+      // Clear error_message_buffer so that if we find other errors
+      // we don't re-log this error.
+      error_message_buffer_pos = 0;
+    }
+
+    LogFullErrorReport(buffer_copy.data());
+
+    if (halt_on_error_ && common_flags()->abort_on_error) {
+      // FIXME: implement "compact" error format, possibly without, or with
+      // highly compressed stack traces?
+      // FIXME: or just use the summary line as abort message?
+      SetAbortMessage(buffer_copy.data());
+    }
+
+    // In halt_on_error = false mode, reset the current error object (before
+    // unlocking).
+    if (!halt_on_error_)
+      internal_memset(&current_error_, 0, sizeof(current_error_));
+
+    if (halt_on_error_) {
+      Report("ABORTING\n");
+      Die();
+    }
+  }
+
+  void ReportError(const ErrorDescription &description) {
+    // Can only report one error per ScopedInErrorReport.
+    CHECK_EQ(current_error_.kind, kErrorKindInvalid);
+    internal_memcpy(&current_error_, &description, sizeof(current_error_));
+  }
+
+  static ErrorDescription &CurrentError() { return current_error_; }
+
+private:
+  ScopedErrorReportLock error_report_lock_;
+  // Error currently being reported. This enables the destructor to interact
+  // with the debugger and point it to an error description.
+  static ErrorDescription current_error_;
+  bool halt_on_error_;
+};
+
+ErrorDescription ScopedInErrorReport::current_error_(LINKER_INITIALIZED);
+
+void ReportCallocOverflow(uptr count, uptr size, BufferedStackTrace *stack) {
+  ScopedInErrorReport in_report(/*fatal=*/true);
+  ErrorCallocOverflow error(GetCurrentTidOrInvalid(), stack, count, size);
+  in_report.ReportError(error);
+}
+
+void ReportReallocArrayOverflow(uptr count, uptr size,
+                                BufferedStackTrace *stack) {
+  ScopedInErrorReport in_report(/*fatal=*/true);
+  ErrorReallocArrayOverflow error(GetCurrentTidOrInvalid(), stack, count, size);
+  in_report.ReportError(error);
+}
+
+void ReportPvallocOverflow(uptr size, BufferedStackTrace *stack) {
+  ScopedInErrorReport in_report(/*fatal=*/true);
+  ErrorPvallocOverflow error(GetCurrentTidOrInvalid(), stack, size);
+  in_report.ReportError(error);
+}
+
+void ReportInvalidAllocationAlignment(uptr alignment,
+                                      BufferedStackTrace *stack) {
+  ScopedInErrorReport in_report(/*fatal=*/true);
+  ErrorInvalidAllocationAlignment error(GetCurrentTidOrInvalid(), stack,
+                                        alignment);
+  in_report.ReportError(error);
+}
+
+void ReportInvalidAlignedAllocAlignment(uptr size, uptr alignment,
+                                        BufferedStackTrace *stack) {
+  ScopedInErrorReport in_report(/*fatal=*/true);
+  ErrorInvalidAlignedAllocAlignment error(GetCurrentTidOrInvalid(), stack, size,
+                                          alignment);
+  in_report.ReportError(error);
+}
+
+void ReportInvalidPosixMemalignAlignment(uptr alignment,
+                                         BufferedStackTrace *stack) {
+  ScopedInErrorReport in_report(/*fatal=*/true);
+  ErrorInvalidPosixMemalignAlignment error(GetCurrentTidOrInvalid(), stack,
+                                           alignment);
+  in_report.ReportError(error);
+}
+
+void ReportAllocationSizeTooBig(uptr user_size, uptr total_size, uptr max_size,
+                                BufferedStackTrace *stack) {
+  ScopedInErrorReport in_report(/*fatal=*/true);
+  ErrorAllocationSizeTooBig error(GetCurrentTidOrInvalid(), stack, user_size,
+                                  total_size, max_size);
+  in_report.ReportError(error);
+}
+
+void ReportRssLimitExceeded(BufferedStackTrace *stack) {
+  ScopedInErrorReport in_report(/*fatal=*/true);
+  ErrorRssLimitExceeded error(GetCurrentTidOrInvalid(), stack);
+  in_report.ReportError(error);
+}
+
+void ReportOutOfMemory(uptr requested_size, BufferedStackTrace *stack) {
+  ScopedInErrorReport in_report(/*fatal=*/true);
+  ErrorOutOfMemory error(GetCurrentTidOrInvalid(), stack, requested_size);
+  in_report.ReportError(error);
+}
+
+} // namespace __memprof
+
+// --------------------------- Interface --------------------- {{{1
+
+// Provide default implementation of __memprof_on_error that does nothing
+// and may be overriden by user.
+SANITIZER_INTERFACE_WEAK_DEF(void, __memprof_on_error, void) {}
diff --git a/compiler-rt/lib/memprof/memprof_rtl.cpp b/compiler-rt/lib/memprof/memprof_rtl.cpp
new file mode 100644
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_rtl.cpp
@@ -0,0 +1,340 @@
+//===-- memprof_rtl.cpp --------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// Main file of the MemProf run-time library.
+//===----------------------------------------------------------------------===//
+
+#include "memprof_allocator.h"
+#include "memprof_interceptors.h"
+#include "memprof_interface_internal.h"
+#include "memprof_internal.h"
+#include "memprof_mapping.h"
+#include "memprof_report.h"
+#include "memprof_stack.h"
+#include "memprof_stats.h"
+#include "memprof_thread.h"
+#include "sanitizer_common/sanitizer_atomic.h"
+#include "sanitizer_common/sanitizer_flags.h"
+#include "sanitizer_common/sanitizer_libc.h"
+#include "sanitizer_common/sanitizer_symbolizer.h"
+#include <ctime>
+
+uptr __memprof_shadow_memory_dynamic_address; // Global interface symbol.
+
+namespace __memprof {
+
+uptr MemprofMappingProfile[kMemprofMappingProfileSize];
+
+static void MemprofDie() {
+  static atomic_uint32_t num_calls;
+  if (atomic_fetch_add(&num_calls, 1, memory_order_relaxed) != 0) {
+    // Don't die twice - run a busy loop.
+    while (1) {
+    }
+  }
+  if (common_flags()->print_module_map >= 1)
+    PrintModuleMap();
+  if (flags()->sleep_before_dying) {
+    Report("Sleeping for %d second(s)\n", flags()->sleep_before_dying);
+    SleepForSeconds(flags()->sleep_before_dying);
+  }
+  if (flags()->unmap_shadow_on_exit) {
+    if (kHighShadowEnd)
+      UnmapOrDie((void *)kLowShadowBeg, kHighShadowEnd - kLowShadowBeg);
+  }
+}
+
+static void MemprofCheckFailed(const char *file, int line, const char *cond,
+                               u64 v1, u64 v2) {
+  Report("MemProfiler CHECK failed: %s:%d \"%s\" (0x%zx, 0x%zx)\n", file, line,
+         cond, (uptr)v1, (uptr)v2);
+
+  // Print a stack trace the first time we come here. Otherwise, we probably
+  // failed a CHECK during symbolization.
+  static atomic_uint32_t num_calls;
+  if (atomic_fetch_add(&num_calls, 1, memory_order_relaxed) == 0) {
+    PRINT_CURRENT_STACK_CHECK();
+  }
+
+  Die();
+}
+
+// -------------------------- Globals --------------------- {{{1
+int memprof_inited;
+int memprof_init_done;
+bool memprof_init_is_running;
+int memprof_timestamp_inited;
+long memprof_init_timestamp_s;
+
+uptr kHighMemEnd;
+
+// -------------------------- Run-time entry ------------------- {{{1
+// exported functions
+
+#define MEMPROF_MEMORY_ACCESS_CALLBACK_BODY() __memprof::RecordAccess(addr);
+
+#define MEMPROF_MEMORY_ACCESS_CALLBACK(type)                                   \
+  extern "C" NOINLINE INTERFACE_ATTRIBUTE void __memprof_##type(uptr addr) {   \
+    MEMPROF_MEMORY_ACCESS_CALLBACK_BODY()                                      \
+  }
+
+MEMPROF_MEMORY_ACCESS_CALLBACK(load)
+MEMPROF_MEMORY_ACCESS_CALLBACK(store)
+
+// Force the linker to keep the symbols for various MemProf interface
+// functions. We want to keep those in the executable in order to let the
+// instrumented dynamic libraries access the symbol even if it is not used by
+// the executable itself. This should help if the build system is removing dead
+// code at link time.
+static NOINLINE void force_interface_symbols() {
+  volatile int fake_condition = 0; // prevent dead condition elimination.
+  // clang-format off
+  switch (fake_condition) {
+    case 1: __memprof_record_access(nullptr); break;
+    case 2: __memprof_record_access_range(nullptr, 0); break;
+  }
+  // clang-format on
+}
+
+static void memprof_atexit() {
+  Printf("MemProfiler exit stats:\n");
+  __memprof_print_accumulated_stats();
+  // Print MemprofMappingProfile.
+  for (uptr i = 0; i < kMemprofMappingProfileSize; i++) {
+    if (MemprofMappingProfile[i] == 0)
+      continue;
+    Printf("memprof_mapping.h:%zd -- %zd\n", i, MemprofMappingProfile[i]);
+  }
+}
+
+static void InitializeHighMemEnd() {
+  kHighMemEnd = GetMaxUserVirtualAddress();
+  // Increase kHighMemEnd to make sure it's properly
+  // aligned together with kHighMemBeg:
+  kHighMemEnd |= (GetMmapGranularity() << SHADOW_SCALE) - 1;
+}
+
+void PrintAddressSpaceLayout() {
+  if (kHighMemBeg) {
+    Printf("|| `[%p, %p]` || HighMem    ||\n", (void *)kHighMemBeg,
+           (void *)kHighMemEnd);
+    Printf("|| `[%p, %p]` || HighShadow ||\n", (void *)kHighShadowBeg,
+           (void *)kHighShadowEnd);
+  }
+  Printf("|| `[%p, %p]` || ShadowGap  ||\n", (void *)kShadowGapBeg,
+         (void *)kShadowGapEnd);
+  if (kLowShadowBeg) {
+    Printf("|| `[%p, %p]` || LowShadow  ||\n", (void *)kLowShadowBeg,
+           (void *)kLowShadowEnd);
+    Printf("|| `[%p, %p]` || LowMem     ||\n", (void *)kLowMemBeg,
+           (void *)kLowMemEnd);
+  }
+  Printf("MemToShadow(shadow): %p %p", (void *)MEM_TO_SHADOW(kLowShadowBeg),
+         (void *)MEM_TO_SHADOW(kLowShadowEnd));
+  if (kHighMemBeg) {
+    Printf(" %p %p", (void *)MEM_TO_SHADOW(kHighShadowBeg),
+           (void *)MEM_TO_SHADOW(kHighShadowEnd));
+  }
+  Printf("\n");
+  Printf("malloc_context_size=%zu\n",
+         (uptr)common_flags()->malloc_context_size);
+
+  Printf("SHADOW_SCALE: %d\n", (int)SHADOW_SCALE);
+  Printf("SHADOW_GRANULARITY: %d\n", (int)SHADOW_GRANULARITY);
+  Printf("SHADOW_OFFSET: 0x%zx\n", (uptr)SHADOW_OFFSET);
+  CHECK(SHADOW_SCALE >= 3 && SHADOW_SCALE <= 7);
+}
+
+static bool UNUSED __local_memprof_dyninit = [] {
+  MaybeStartBackgroudThread();
+  SetSoftRssLimitExceededCallback(MemprofSoftRssLimitExceededCallback);
+
+  return false;
+}();
+
+static void MemprofInitInternal() {
+  if (LIKELY(memprof_inited))
+    return;
+  SanitizerToolName = "MemProfiler";
+  CHECK(!memprof_init_is_running && "MemProf init calls itself!");
+  memprof_init_is_running = true;
+
+  CacheBinaryName();
+
+  // Initialize flags. This must be done early, because most of the
+  // initialization steps look at flags().
+  InitializeFlags();
+
+  // Stop performing init at this point if we are being loaded via
+  // dlopen() and the platform supports it.
+  if (SANITIZER_SUPPORTS_INIT_FOR_DLOPEN && UNLIKELY(HandleDlopenInit())) {
+    memprof_init_is_running = false;
+    VReport(1, "MemProfiler init is being performed for dlopen().\n");
+    return;
+  }
+
+  MemprofCheckIncompatibleRT();
+  MemprofCheckDynamicRTPrereqs();
+  AvoidCVE_2016_2143();
+
+  SetMallocContextSize(common_flags()->malloc_context_size);
+
+  InitializeHighMemEnd();
+
+  // Make sure we are not statically linked.
+  MemprofDoesNotSupportStaticLinkage();
+
+  // Install tool-specific callbacks in sanitizer_common.
+  AddDieCallback(MemprofDie);
+  SetCheckFailedCallback(MemprofCheckFailed);
+  SetPrintfAndReportCallback(AppendToErrorMessageBuffer);
+
+  __sanitizer_set_report_path(common_flags()->log_path);
+
+  __sanitizer::InitializePlatformEarly();
+
+  // Re-exec ourselves if we need to set additional env or command line args.
+  MaybeReexec();
+
+  // Setup internal allocator callback.
+  SetLowLevelAllocateMinAlignment(SHADOW_GRANULARITY);
+
+  InitializeMemprofInterceptors();
+  CheckASLR();
+
+  ReplaceSystemMalloc();
+
+  DisableCoreDumperIfNecessary();
+
+  InitializeShadowMemory();
+
+  MemprofTSDInit(PlatformTSDDtor);
+
+  InitializeAllocator();
+
+  // On Linux MemprofThread::ThreadStart() calls malloc() that's why
+  // memprof_inited should be set to 1 prior to initializing the threads.
+  memprof_inited = 1;
+  memprof_init_is_running = false;
+
+  if (flags()->atexit)
+    Atexit(memprof_atexit);
+
+  InitializeCoverage(common_flags()->coverage, common_flags()->coverage_dir);
+
+  // interceptors
+  InitTlsSize();
+
+  // Create main thread.
+  MemprofThread *main_thread = CreateMainThread();
+  CHECK_EQ(0, main_thread->tid());
+  force_interface_symbols(); // no-op.
+  SanitizerInitializeUnwinder();
+
+  Symbolizer::LateInitialize();
+
+  VReport(1, "MemProfiler Init done\n");
+
+  if (flags()->sleep_after_init) {
+    Report("Sleeping for %d second(s)\n", flags()->sleep_after_init);
+    SleepForSeconds(flags()->sleep_after_init);
+  }
+  memprof_init_done = 1;
+}
+
+void MemprofInitTime() {
+  if (LIKELY(memprof_timestamp_inited))
+    return;
+  memprof_timestamp_inited = 1;
+  timespec ts;
+  timespec_get(&ts, TIME_UTC);
+  memprof_init_timestamp_s = ts.tv_sec;
+}
+
+// Initialize as requested from some part of MemProf runtime library
+// (interceptors, allocator, etc).
+void MemprofInitFromRtl() { MemprofInitInternal(); }
+
+#if MEMPROF_DYNAMIC
+// Initialize runtime in case it's LD_PRELOAD-ed into uninstrumented executable
+// (and thus normal initializers from .preinit_array or modules haven't run).
+
+class MemprofInitializer {
+public:
+  MemprofInitializer() { MemprofInitFromRtl(); }
+};
+
+static MemprofInitializer memprof_initializer;
+#endif // MEMPROF_DYNAMIC
+
+} // namespace __memprof
+
+// ---------------------- Interface ---------------- {{{1
+using namespace __memprof;
+
+// Initialize as requested from instrumented application code.
+void __memprof_init() {
+  MemprofInitTime();
+  MemprofInitInternal();
+}
+
+void __memprof_preinit() { MemprofInitInternal(); }
+
+void __memprof_version_mismatch_check_v1() {}
+
+void __memprof_record_access(void const volatile *addr) {
+  __memprof::RecordAccess((uptr)addr);
+}
+
+// We only record the access on the first location in the range,
+// since we will later accumulate the access counts across the
+// full allocation, and we don't want to inflate the hotness from
+// a memory intrinsic on a large range of memory.
+// TODO: Should we do something else so we can better track utilization?
+void __memprof_record_access_range(void const volatile *addr,
+                                   UNUSED uptr size) {
+  __memprof::RecordAccess((uptr)addr);
+}
+
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE u16
+__sanitizer_unaligned_load16(const uu16 *p) {
+  __memprof_record_access(p);
+  return *p;
+}
+
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE u32
+__sanitizer_unaligned_load32(const uu32 *p) {
+  __memprof_record_access(p);
+  return *p;
+}
+
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE u64
+__sanitizer_unaligned_load64(const uu64 *p) {
+  __memprof_record_access(p);
+  return *p;
+}
+
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE void
+__sanitizer_unaligned_store16(uu16 *p, u16 x) {
+  __memprof_record_access(p);
+  *p = x;
+}
+
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE void
+__sanitizer_unaligned_store32(uu32 *p, u32 x) {
+  __memprof_record_access(p);
+  *p = x;
+}
+
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE void
+__sanitizer_unaligned_store64(uu64 *p, u64 x) {
+  __memprof_record_access(p);
+  *p = x;
+}
diff --git a/compiler-rt/lib/memprof/memprof_shadow_setup.cpp b/compiler-rt/lib/memprof/memprof_shadow_setup.cpp
new file mode 100644
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_shadow_setup.cpp
@@ -0,0 +1,62 @@
+//===-- memprof_shadow_setup.cpp -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// Set up the shadow memory.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_platform.h"
+
+#include "memprof_internal.h"
+#include "memprof_mapping.h"
+
+namespace __memprof {
+
+static void ProtectGap(uptr addr, uptr size) {
+  if (!flags()->protect_shadow_gap) {
+    // The shadow gap is unprotected, so there is a chance that someone
+    // is actually using this memory. Which means it needs a shadow...
+    uptr GapShadowBeg = RoundDownTo(MEM_TO_SHADOW(addr), GetPageSizeCached());
+    uptr GapShadowEnd =
+        RoundUpTo(MEM_TO_SHADOW(addr + size), GetPageSizeCached()) - 1;
+    if (Verbosity())
+      Printf("protect_shadow_gap=0:"
+             " not protecting shadow gap, allocating gap's shadow\n"
+             "|| `[%p, %p]` || ShadowGap's shadow ||\n",
+             GapShadowBeg, GapShadowEnd);
+    ReserveShadowMemoryRange(GapShadowBeg, GapShadowEnd,
+                             "unprotected gap shadow");
+    return;
+  }
+  __sanitizer::ProtectGap(addr, size, kZeroBaseShadowStart,
+                          kZeroBaseMaxShadowStart);
+}
+
+void InitializeShadowMemory() {
+  uptr shadow_start = FindDynamicShadowStart();
+  // Update the shadow memory address (potentially) used by instrumentation.
+  __memprof_shadow_memory_dynamic_address = shadow_start;
+
+  if (kLowShadowBeg)
+    shadow_start -= GetMmapGranularity();
+
+  if (Verbosity())
+    PrintAddressSpaceLayout();
+
+  // mmap the low shadow plus at least one page at the left.
+  if (kLowShadowBeg)
+    ReserveShadowMemoryRange(shadow_start, kLowShadowEnd, "low shadow");
+  // mmap the high shadow.
+  ReserveShadowMemoryRange(kHighShadowBeg, kHighShadowEnd, "high shadow");
+  // protect the gap.
+  ProtectGap(kShadowGapBeg, kShadowGapEnd - kShadowGapBeg + 1);
+  CHECK_EQ(kShadowGapEnd, kHighShadowBeg - 1);
+}
+
+} // namespace __memprof
diff --git a/compiler-rt/lib/memprof/memprof_stack.h b/compiler-rt/lib/memprof/memprof_stack.h
new file mode 100644
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_stack.h
@@ -0,0 +1,84 @@
+//===-- memprof_stack.h ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// MemProf-private header for memprof_stack.cpp.
+//===----------------------------------------------------------------------===//
+
+#ifndef MEMPROF_STACK_H
+#define MEMPROF_STACK_H
+
+#include "memprof_flags.h"
+#include "memprof_thread.h"
+#include "sanitizer_common/sanitizer_flags.h"
+#include "sanitizer_common/sanitizer_stacktrace.h"
+
+namespace __memprof {
+
+static const u32 kDefaultMallocContextSize = 30;
+
+void SetMallocContextSize(u32 size);
+u32 GetMallocContextSize();
+
+} // namespace __memprof
+
+// NOTE: A Rule of thumb is to retrieve stack trace in the interceptors
+// as early as possible (in functions exposed to the user), as we generally
+// don't want stack trace to contain functions from MemProf internals.
+
+#define GET_STACK_TRACE(max_size, fast)                                        \
+  BufferedStackTrace stack;                                                    \
+  if (max_size <= 2) {                                                         \
+    stack.size = max_size;                                                     \
+    if (max_size > 0) {                                                        \
+      stack.top_frame_bp = GET_CURRENT_FRAME();                                \
+      stack.trace_buffer[0] = StackTrace::GetCurrentPc();                      \
+      if (max_size > 1)                                                        \
+        stack.trace_buffer[1] = GET_CALLER_PC();                               \
+    }                                                                          \
+  } else {                                                                     \
+    stack.Unwind(StackTrace::GetCurrentPc(), GET_CURRENT_FRAME(), nullptr,     \
+                 fast, max_size);                                              \
+  }
+
+#define GET_STACK_TRACE_FATAL(pc, bp)                                          \
+  BufferedStackTrace stack;                                                    \
+  stack.Unwind(pc, bp, nullptr, common_flags()->fast_unwind_on_fatal)
+
+#define GET_STACK_TRACE_SIGNAL(sig)                                            \
+  BufferedStackTrace stack;                                                    \
+  stack.Unwind((sig).pc, (sig).bp, (sig).context,                              \
+               common_flags()->fast_unwind_on_fatal)
+
+#define GET_STACK_TRACE_FATAL_HERE                                             \
+  GET_STACK_TRACE(kStackTraceMax, common_flags()->fast_unwind_on_fatal)
+
+#define GET_STACK_TRACE_CHECK_HERE                                             \
+  GET_STACK_TRACE(kStackTraceMax, common_flags()->fast_unwind_on_check)
+
+#define GET_STACK_TRACE_THREAD GET_STACK_TRACE(kStackTraceMax, true)
+
+#define GET_STACK_TRACE_MALLOC                                                 \
+  GET_STACK_TRACE(GetMallocContextSize(), common_flags()->fast_unwind_on_malloc)
+
+#define GET_STACK_TRACE_FREE GET_STACK_TRACE_MALLOC
+
+#define PRINT_CURRENT_STACK()                                                  \
+  {                                                                            \
+    GET_STACK_TRACE_FATAL_HERE;                                                \
+    stack.Print();                                                             \
+  }
+
+#define PRINT_CURRENT_STACK_CHECK()                                            \
+  {                                                                            \
+    GET_STACK_TRACE_CHECK_HERE;                                                \
+    stack.Print();                                                             \
+  }
+
+#endif // MEMPROF_STACK_H
diff --git a/compiler-rt/lib/memprof/memprof_stack.cpp b/compiler-rt/lib/memprof/memprof_stack.cpp
new file mode 100644
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_stack.cpp
@@ -0,0 +1,87 @@
+//===-- memprof_stack.cpp ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// Code for MemProf stack trace.
+//===----------------------------------------------------------------------===//
+#include "memprof_stack.h"
+#include "memprof_internal.h"
+#include "sanitizer_common/sanitizer_atomic.h"
+
+namespace __memprof {
+
+static atomic_uint32_t malloc_context_size;
+
+void SetMallocContextSize(u32 size) {
+  atomic_store(&malloc_context_size, size, memory_order_release);
+}
+
+u32 GetMallocContextSize() {
+  return atomic_load(&malloc_context_size, memory_order_acquire);
+}
+
+namespace {
+
+// ScopedUnwinding is a scope for stacktracing member of a context
+class ScopedUnwinding {
+public:
+  explicit ScopedUnwinding(MemprofThread *t) : thread(t) {
+    if (thread) {
+      can_unwind = !thread->isUnwinding();
+      thread->setUnwinding(true);
+    }
+  }
+  ~ScopedUnwinding() {
+    if (thread)
+      thread->setUnwinding(false);
+  }
+
+  bool CanUnwind() const { return can_unwind; }
+
+private:
+  MemprofThread *thread = nullptr;
+  bool can_unwind = true;
+};
+
+} // namespace
+
+} // namespace __memprof
+
+void __sanitizer::BufferedStackTrace::UnwindImpl(uptr pc, uptr bp,
+                                                 void *context,
+                                                 bool request_fast,
+                                                 u32 max_depth) {
+  using namespace __memprof;
+  size = 0;
+  if (UNLIKELY(!memprof_inited))
+    return;
+  request_fast = StackTrace::WillUseFastUnwind(request_fast);
+  MemprofThread *t = GetCurrentThread();
+  ScopedUnwinding unwind_scope(t);
+  if (!unwind_scope.CanUnwind())
+    return;
+  if (request_fast) {
+    if (t) {
+      Unwind(max_depth, pc, bp, nullptr, t->stack_top(), t->stack_bottom(),
+             true);
+    }
+    return;
+  }
+  Unwind(max_depth, pc, bp, context, 0, 0, false);
+}
+
+// ------------------ Interface -------------- {{{1
+
+extern "C" {
+SANITIZER_INTERFACE_ATTRIBUTE
+void __sanitizer_print_stack_trace() {
+  using namespace __memprof;
+  PRINT_CURRENT_STACK();
+}
+} // extern "C"
diff --git a/compiler-rt/lib/memprof/memprof_stats.h b/compiler-rt/lib/memprof/memprof_stats.h
new file mode 100644
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_stats.h
@@ -0,0 +1,61 @@
+//===-- memprof_stats.h ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// MemProf-private header for statistics.
+//===----------------------------------------------------------------------===//
+#ifndef MEMPROF_STATS_H
+#define MEMPROF_STATS_H
+
+#include "memprof_allocator.h"
+#include "memprof_internal.h"
+
+namespace __memprof {
+
+// MemprofStats struct is NOT thread-safe.
+// Each MemprofThread has its own MemprofStats, which are sometimes flushed
+// to the accumulated MemprofStats.
+struct MemprofStats {
+  // MemprofStats must be a struct consisting of uptr fields only.
+  // When merging two MemprofStats structs, we treat them as arrays of uptr.
+  uptr mallocs;
+  uptr malloced;
+  uptr malloced_overhead;
+  uptr frees;
+  uptr freed;
+  uptr real_frees;
+  uptr really_freed;
+  uptr reallocs;
+  uptr realloced;
+  uptr mmaps;
+  uptr mmaped;
+  uptr munmaps;
+  uptr munmaped;
+  uptr malloc_large;
+  uptr malloced_by_size[kNumberOfSizeClasses];
+
+  // Ctor for global MemprofStats (accumulated stats for dead threads).
+  explicit MemprofStats(LinkerInitialized) {}
+  // Creates empty stats.
+  MemprofStats();
+
+  void Print(); // Prints formatted stats to stderr.
+  void Clear();
+  void MergeFrom(const MemprofStats *stats);
+};
+
+// Returns stats for GetCurrentThread(), or stats for fake "unknown thread"
+// if GetCurrentThread() returns 0.
+MemprofStats &GetCurrentThreadStats();
+// Flushes a given stats into accumulated stats of dead threads.
+void FlushToDeadThreadStats(MemprofStats *stats);
+
+} // namespace __memprof
+
+#endif // MEMPROF_STATS_H
diff --git a/compiler-rt/lib/memprof/memprof_stats.cpp b/compiler-rt/lib/memprof/memprof_stats.cpp
new file mode 100644
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_stats.cpp
@@ -0,0 +1,127 @@
+//===-- memprof_stats.cpp ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// Code related to statistics collected by MemProfiler.
+//===----------------------------------------------------------------------===//
+#include "memprof_stats.h"
+#include "memprof_interceptors.h"
+#include "memprof_internal.h"
+#include "memprof_thread.h"
+#include "sanitizer_common/sanitizer_mutex.h"
+#include "sanitizer_common/sanitizer_stackdepot.h"
+
+namespace __memprof {
+
+MemprofStats::MemprofStats() { Clear(); }
+
+void MemprofStats::Clear() {
+  CHECK(REAL(memset));
+  REAL(memset)(this, 0, sizeof(MemprofStats));
+}
+
+static void PrintMallocStatsArray(const char *prefix,
+                                  uptr (&array)[kNumberOfSizeClasses]) {
+  Printf("%s", prefix);
+  for (uptr i = 0; i < kNumberOfSizeClasses; i++) {
+    if (!array[i])
+      continue;
+    Printf("%zu:%zu; ", i, array[i]);
+  }
+  Printf("\n");
+}
+
+void MemprofStats::Print() {
+  Printf("Stats: %zuM malloced (%zuM for overhead) by %zu calls\n",
+         malloced >> 20, malloced_overhead >> 20, mallocs);
+  Printf("Stats: %zuM realloced by %zu calls\n", realloced >> 20, reallocs);
+  Printf("Stats: %zuM freed by %zu calls\n", freed >> 20, frees);
+  Printf("Stats: %zuM really freed by %zu calls\n", really_freed >> 20,
+         real_frees);
+  Printf("Stats: %zuM (%zuM-%zuM) mmaped; %zu maps, %zu unmaps\n",
+         (mmaped - munmaped) >> 20, mmaped >> 20, munmaped >> 20, mmaps,
+         munmaps);
+
+  PrintMallocStatsArray("  mallocs by size class: ", malloced_by_size);
+  Printf("Stats: malloc large: %zu\n", malloc_large);
+}
+
+void MemprofStats::MergeFrom(const MemprofStats *stats) {
+  uptr *dst_ptr = reinterpret_cast<uptr *>(this);
+  const uptr *src_ptr = reinterpret_cast<const uptr *>(stats);
+  uptr num_fields = sizeof(*this) / sizeof(uptr);
+  for (uptr i = 0; i < num_fields; i++)
+    dst_ptr[i] += src_ptr[i];
+}
+
+static BlockingMutex print_lock(LINKER_INITIALIZED);
+
+static MemprofStats unknown_thread_stats(LINKER_INITIALIZED);
+static MemprofStats dead_threads_stats(LINKER_INITIALIZED);
+static BlockingMutex dead_threads_stats_lock(LINKER_INITIALIZED);
+// Required for malloc_zone_statistics() on OS X. This can't be stored in
+// per-thread MemprofStats.
+static uptr max_malloced_memory;
+
+static void MergeThreadStats(ThreadContextBase *tctx_base, void *arg) {
+  MemprofStats *accumulated_stats = reinterpret_cast<MemprofStats *>(arg);
+  MemprofThreadContext *tctx = static_cast<MemprofThreadContext *>(tctx_base);
+  if (MemprofThread *t = tctx->thread)
+    accumulated_stats->MergeFrom(&t->stats());
+}
+
+static void GetAccumulatedStats(MemprofStats *stats) {
+  stats->Clear();
+  {
+    ThreadRegistryLock l(&memprofThreadRegistry());
+    memprofThreadRegistry().RunCallbackForEachThreadLocked(MergeThreadStats,
+                                                           stats);
+  }
+  stats->MergeFrom(&unknown_thread_stats);
+  {
+    BlockingMutexLock lock(&dead_threads_stats_lock);
+    stats->MergeFrom(&dead_threads_stats);
+  }
+  // This is not very accurate: we may miss allocation peaks that happen
+  // between two updates of accumulated_stats_. For more accurate bookkeeping
+  // the maximum should be updated on every malloc(), which is unacceptable.
+  if (max_malloced_memory < stats->malloced) {
+    max_malloced_memory = stats->malloced;
+  }
+}
+
+void FlushToDeadThreadStats(MemprofStats *stats) {
+  BlockingMutexLock lock(&dead_threads_stats_lock);
+  dead_threads_stats.MergeFrom(stats);
+  stats->Clear();
+}
+
+MemprofStats &GetCurrentThreadStats() {
+  MemprofThread *t = GetCurrentThread();
+  return (t) ? t->stats() : unknown_thread_stats;
+}
+
+static void PrintAccumulatedStats() {
+  MemprofStats stats;
+  GetAccumulatedStats(&stats);
+  // Use lock to keep reports from mixing up.
+  BlockingMutexLock lock(&print_lock);
+  stats.Print();
+  StackDepotStats *stack_depot_stats = StackDepotGetStats();
+  Printf("Stats: StackDepot: %zd ids; %zdM allocated\n",
+         stack_depot_stats->n_uniq_ids, stack_depot_stats->allocated >> 20);
+  PrintInternalAllocatorStats();
+}
+
+} // namespace __memprof
+
+// ---------------------- Interface ---------------- {{{1
+using namespace __memprof;
+
+void __memprof_print_accumulated_stats() { PrintAccumulatedStats(); }
diff --git a/compiler-rt/lib/memprof/memprof_thread.h b/compiler-rt/lib/memprof/memprof_thread.h
new file mode 100644
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_thread.h
@@ -0,0 +1,147 @@
+//===-- memprof_thread.h ---------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// MemProf-private header for memprof_thread.cpp.
+//===----------------------------------------------------------------------===//
+
+#ifndef MEMPROF_THREAD_H
+#define MEMPROF_THREAD_H
+
+#include "memprof_allocator.h"
+#include "memprof_internal.h"
+#include "memprof_stats.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_libc.h"
+#include "sanitizer_common/sanitizer_thread_registry.h"
+
+namespace __sanitizer {
+struct DTLS;
+} // namespace __sanitizer
+
+namespace __memprof {
+
+const u32 kInvalidTid = 0xffffff;          // Must fit into 24 bits.
+const u32 kMaxNumberOfThreads = (1 << 22); // 4M
+
+class MemprofThread;
+
+// These objects are created for every thread and are never deleted,
+// so we can find them by tid even if the thread is long dead.
+class MemprofThreadContext : public ThreadContextBase {
+public:
+  explicit MemprofThreadContext(int tid)
+      : ThreadContextBase(tid), announced(false),
+        destructor_iterations(GetPthreadDestructorIterations()), stack_id(0),
+        thread(nullptr) {}
+  bool announced;
+  u8 destructor_iterations;
+  u32 stack_id;
+  MemprofThread *thread;
+
+  void OnCreated(void *arg) override;
+  void OnFinished() override;
+
+  struct CreateThreadContextArgs {
+    MemprofThread *thread;
+    StackTrace *stack;
+  };
+};
+
+// MemprofThreadContext objects are never freed, so we need many of them.
+COMPILER_CHECK(sizeof(MemprofThreadContext) <= 256);
+
+// MemprofThread are stored in TSD and destroyed when the thread dies.
+class MemprofThread {
+public:
+  static MemprofThread *Create(thread_callback_t start_routine, void *arg,
+                               u32 parent_tid, StackTrace *stack,
+                               bool detached);
+  static void TSDDtor(void *tsd);
+  void Destroy();
+
+  struct InitOptions;
+  void Init(const InitOptions *options = nullptr);
+
+  thread_return_t ThreadStart(tid_t os_id,
+                              atomic_uintptr_t *signal_thread_is_registered);
+
+  uptr stack_top();
+  uptr stack_bottom();
+  uptr stack_size();
+  uptr tls_begin() { return tls_begin_; }
+  uptr tls_end() { return tls_end_; }
+  DTLS *dtls() { return dtls_; }
+  u32 tid() { return context_->tid; }
+  MemprofThreadContext *context() { return context_; }
+  void set_context(MemprofThreadContext *context) { context_ = context; }
+
+  bool AddrIsInStack(uptr addr);
+
+  void StartSwitchFiber(uptr bottom, uptr size);
+  void FinishSwitchFiber(uptr *bottom_old, uptr *size_old);
+
+  // True is this thread is currently unwinding stack (i.e. collecting a stack
+  // trace). Used to prevent deadlocks on platforms where libc unwinder calls
+  // malloc internally. See PR17116 for more details.
+  bool isUnwinding() const { return unwinding_; }
+  void setUnwinding(bool b) { unwinding_ = b; }
+
+  MemprofThreadLocalMallocStorage &malloc_storage() { return malloc_storage_; }
+  MemprofStats &stats() { return stats_; }
+
+private:
+  // NOTE: There is no MemprofThread constructor. It is allocated
+  // via mmap() and *must* be valid in zero-initialized state.
+
+  void SetThreadStackAndTls(const InitOptions *options);
+
+  struct StackBounds {
+    uptr bottom;
+    uptr top;
+  };
+  StackBounds GetStackBounds() const;
+
+  MemprofThreadContext *context_;
+  thread_callback_t start_routine_;
+  void *arg_;
+
+  uptr stack_top_;
+  uptr stack_bottom_;
+  // these variables are used when the thread is about to switch stack
+  uptr next_stack_top_;
+  uptr next_stack_bottom_;
+  // true if switching is in progress
+  atomic_uint8_t stack_switching_;
+
+  uptr tls_begin_;
+  uptr tls_end_;
+  DTLS *dtls_;
+
+  MemprofThreadLocalMallocStorage malloc_storage_;
+  MemprofStats stats_;
+  bool unwinding_;
+};
+
+// Returns a single instance of registry.
+ThreadRegistry &memprofThreadRegistry();
+
+// Must be called under ThreadRegistryLock.
+MemprofThreadContext *GetThreadContextByTidLocked(u32 tid);
+
+// Get the current thread. May return 0.
+MemprofThread *GetCurrentThread();
+void SetCurrentThread(MemprofThread *t);
+u32 GetCurrentTidOrInvalid();
+
+// Used to handle fork().
+void EnsureMainThreadIDIsCorrect();
+} // namespace __memprof
+
+#endif // MEMPROF_THREAD_H
diff --git a/compiler-rt/lib/memprof/memprof_thread.cpp b/compiler-rt/lib/memprof/memprof_thread.cpp
new file mode 100644
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_thread.cpp
@@ -0,0 +1,293 @@
+//===-- memprof_thread.cpp -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// Thread-related code.
+//===----------------------------------------------------------------------===//
+#include "memprof_thread.h"
+#include "memprof_allocator.h"
+#include "memprof_interceptors.h"
+#include "memprof_mapping.h"
+#include "memprof_stack.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_placement_new.h"
+#include "sanitizer_common/sanitizer_stackdepot.h"
+#include "sanitizer_common/sanitizer_tls_get_addr.h"
+
+namespace __memprof {
+
+// MemprofThreadContext implementation.
+
+void MemprofThreadContext::OnCreated(void *arg) {
+  CreateThreadContextArgs *args = static_cast<CreateThreadContextArgs *>(arg);
+  if (args->stack)
+    stack_id = StackDepotPut(*args->stack);
+  thread = args->thread;
+  thread->set_context(this);
+}
+
+void MemprofThreadContext::OnFinished() {
+  // Drop the link to the MemprofThread object.
+  thread = nullptr;
+}
+
+static ALIGNED(16) char thread_registry_placeholder[sizeof(ThreadRegistry)];
+static ThreadRegistry *memprof_thread_registry;
+
+static BlockingMutex mu_for_thread_context(LINKER_INITIALIZED);
+static LowLevelAllocator allocator_for_thread_context;
+
+static ThreadContextBase *GetMemprofThreadContext(u32 tid) {
+  BlockingMutexLock lock(&mu_for_thread_context);
+  return new (allocator_for_thread_context) MemprofThreadContext(tid);
+}
+
+ThreadRegistry &memprofThreadRegistry() {
+  static bool initialized;
+  // Don't worry about thread_safety - this should be called when there is
+  // a single thread.
+  if (!initialized) {
+    // Never reuse MemProf threads: we store pointer to MemprofThreadContext
+    // in TSD and can't reliably tell when no more TSD destructors will
+    // be called. It would be wrong to reuse MemprofThreadContext for another
+    // thread before all TSD destructors will be called for it.
+    memprof_thread_registry = new (thread_registry_placeholder) ThreadRegistry(
+        GetMemprofThreadContext, kMaxNumberOfThreads, kMaxNumberOfThreads);
+    initialized = true;
+  }
+  return *memprof_thread_registry;
+}
+
+MemprofThreadContext *GetThreadContextByTidLocked(u32 tid) {
+  return static_cast<MemprofThreadContext *>(
+      memprofThreadRegistry().GetThreadLocked(tid));
+}
+
+// MemprofThread implementation.
+
+MemprofThread *MemprofThread::Create(thread_callback_t start_routine, void *arg,
+                                     u32 parent_tid, StackTrace *stack,
+                                     bool detached) {
+  uptr PageSize = GetPageSizeCached();
+  uptr size = RoundUpTo(sizeof(MemprofThread), PageSize);
+  MemprofThread *thread = (MemprofThread *)MmapOrDie(size, __func__);
+  thread->start_routine_ = start_routine;
+  thread->arg_ = arg;
+  MemprofThreadContext::CreateThreadContextArgs args = {thread, stack};
+  memprofThreadRegistry().CreateThread(*reinterpret_cast<uptr *>(thread),
+                                       detached, parent_tid, &args);
+
+  return thread;
+}
+
+void MemprofThread::TSDDtor(void *tsd) {
+  MemprofThreadContext *context = (MemprofThreadContext *)tsd;
+  VReport(1, "T%d TSDDtor\n", context->tid);
+  if (context->thread)
+    context->thread->Destroy();
+}
+
+void MemprofThread::Destroy() {
+  int tid = this->tid();
+  VReport(1, "T%d exited\n", tid);
+
+  malloc_storage().CommitBack();
+  if (common_flags()->use_sigaltstack)
+    UnsetAlternateSignalStack();
+  memprofThreadRegistry().FinishThread(tid);
+  FlushToDeadThreadStats(&stats_);
+  uptr size = RoundUpTo(sizeof(MemprofThread), GetPageSizeCached());
+  UnmapOrDie(this, size);
+  DTLS_Destroy();
+}
+
+void MemprofThread::StartSwitchFiber(uptr bottom, uptr size) {
+  if (atomic_load(&stack_switching_, memory_order_relaxed)) {
+    Report("ERROR: starting fiber switch while in fiber switch\n");
+    Die();
+  }
+
+  next_stack_bottom_ = bottom;
+  next_stack_top_ = bottom + size;
+  atomic_store(&stack_switching_, 1, memory_order_release);
+}
+
+void MemprofThread::FinishSwitchFiber(uptr *bottom_old, uptr *size_old) {
+  if (!atomic_load(&stack_switching_, memory_order_relaxed)) {
+    Report("ERROR: finishing a fiber switch that has not started\n");
+    Die();
+  }
+
+  if (bottom_old)
+    *bottom_old = stack_bottom_;
+  if (size_old)
+    *size_old = stack_top_ - stack_bottom_;
+  stack_bottom_ = next_stack_bottom_;
+  stack_top_ = next_stack_top_;
+  atomic_store(&stack_switching_, 0, memory_order_release);
+  next_stack_top_ = 0;
+  next_stack_bottom_ = 0;
+}
+
+inline MemprofThread::StackBounds MemprofThread::GetStackBounds() const {
+  if (!atomic_load(&stack_switching_, memory_order_acquire)) {
+    // Make sure the stack bounds are fully initialized.
+    if (stack_bottom_ >= stack_top_)
+      return {0, 0};
+    return {stack_bottom_, stack_top_};
+  }
+  char local;
+  const uptr cur_stack = (uptr)&local;
+  // Note: need to check next stack first, because FinishSwitchFiber
+  // may be in process of overwriting stack_top_/bottom_. But in such case
+  // we are already on the next stack.
+  if (cur_stack >= next_stack_bottom_ && cur_stack < next_stack_top_)
+    return {next_stack_bottom_, next_stack_top_};
+  return {stack_bottom_, stack_top_};
+}
+
+uptr MemprofThread::stack_top() { return GetStackBounds().top; }
+
+uptr MemprofThread::stack_bottom() { return GetStackBounds().bottom; }
+
+uptr MemprofThread::stack_size() {
+  const auto bounds = GetStackBounds();
+  return bounds.top - bounds.bottom;
+}
+
+void MemprofThread::Init(const InitOptions *options) {
+  next_stack_top_ = next_stack_bottom_ = 0;
+  atomic_store(&stack_switching_, false, memory_order_release);
+  CHECK_EQ(this->stack_size(), 0U);
+  SetThreadStackAndTls(options);
+  if (stack_top_ != stack_bottom_) {
+    CHECK_GT(this->stack_size(), 0U);
+    CHECK(AddrIsInMem(stack_bottom_));
+    CHECK(AddrIsInMem(stack_top_ - 1));
+  }
+  int local = 0;
+  VReport(1, "T%d: stack [%p,%p) size 0x%zx; local=%p\n", tid(),
+          (void *)stack_bottom_, (void *)stack_top_, stack_top_ - stack_bottom_,
+          &local);
+}
+
+thread_return_t
+MemprofThread::ThreadStart(tid_t os_id,
+                           atomic_uintptr_t *signal_thread_is_registered) {
+  Init();
+  memprofThreadRegistry().StartThread(tid(), os_id, ThreadType::Regular,
+                                      nullptr);
+  if (signal_thread_is_registered)
+    atomic_store(signal_thread_is_registered, 1, memory_order_release);
+
+  if (common_flags()->use_sigaltstack)
+    SetAlternateSignalStack();
+
+  if (!start_routine_) {
+    // start_routine_ == 0 if we're on the main thread or on one of the
+    // OS X libdispatch worker threads. But nobody is supposed to call
+    // ThreadStart() for the worker threads.
+    CHECK_EQ(tid(), 0);
+    return 0;
+  }
+
+  return start_routine_(arg_);
+}
+
+MemprofThread *CreateMainThread() {
+  MemprofThread *main_thread = MemprofThread::Create(
+      /* start_routine */ nullptr, /* arg */ nullptr, /* parent_tid */ 0,
+      /* stack */ nullptr, /* detached */ true);
+  SetCurrentThread(main_thread);
+  main_thread->ThreadStart(internal_getpid(),
+                           /* signal_thread_is_registered */ nullptr);
+  return main_thread;
+}
+
+// This implementation doesn't use the argument, which is just passed down
+// from the caller of Init (which see, above).  It's only there to support
+// OS-specific implementations that need more information passed through.
+void MemprofThread::SetThreadStackAndTls(const InitOptions *options) {
+  DCHECK_EQ(options, nullptr);
+  uptr tls_size = 0;
+  uptr stack_size = 0;
+  GetThreadStackAndTls(tid() == 0, &stack_bottom_, &stack_size, &tls_begin_,
+                       &tls_size);
+  stack_top_ = stack_bottom_ + stack_size;
+  tls_end_ = tls_begin_ + tls_size;
+  dtls_ = DTLS_Get();
+
+  if (stack_top_ != stack_bottom_) {
+    int local;
+    CHECK(AddrIsInStack((uptr)&local));
+  }
+}
+
+bool MemprofThread::AddrIsInStack(uptr addr) {
+  const auto bounds = GetStackBounds();
+  return addr >= bounds.bottom && addr < bounds.top;
+}
+
+MemprofThread *GetCurrentThread() {
+  MemprofThreadContext *context =
+      reinterpret_cast<MemprofThreadContext *>(MemprofTSDGet());
+  if (!context)
+    return nullptr;
+  return context->thread;
+}
+
+void SetCurrentThread(MemprofThread *t) {
+  CHECK(t->context());
+  VReport(2, "SetCurrentThread: %p for thread %p\n", t->context(),
+          (void *)GetThreadSelf());
+  // Make sure we do not reset the current MemprofThread.
+  CHECK_EQ(0, MemprofTSDGet());
+  MemprofTSDSet(t->context());
+  CHECK_EQ(t->context(), MemprofTSDGet());
+}
+
+u32 GetCurrentTidOrInvalid() {
+  MemprofThread *t = GetCurrentThread();
+  return t ? t->tid() : kInvalidTid;
+}
+
+void EnsureMainThreadIDIsCorrect() {
+  MemprofThreadContext *context =
+      reinterpret_cast<MemprofThreadContext *>(MemprofTSDGet());
+  if (context && (context->tid == 0))
+    context->os_id = GetTid();
+}
+} // namespace __memprof
+
+// ---------------------- Interface ---------------- {{{1
+using namespace __memprof;
+
+extern "C" {
+SANITIZER_INTERFACE_ATTRIBUTE
+void __sanitizer_start_switch_fiber(UNUSED void **fakestacksave,
+                                    const void *bottom, uptr size) {
+  MemprofThread *t = GetCurrentThread();
+  if (!t) {
+    VReport(1, "__memprof_start_switch_fiber called from unknown thread\n");
+    return;
+  }
+  t->StartSwitchFiber((uptr)bottom, size);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __sanitizer_finish_switch_fiber(UNUSED void *fakestack,
+                                     const void **bottom_old, uptr *size_old) {
+  MemprofThread *t = GetCurrentThread();
+  if (!t) {
+    VReport(1, "__memprof_finish_switch_fiber called from unknown thread\n");
+    return;
+  }
+  t->FinishSwitchFiber((uptr *)bottom_old, (uptr *)size_old);
+}
+}
diff --git a/compiler-rt/lib/memprof/weak_symbols.txt b/compiler-rt/lib/memprof/weak_symbols.txt
new file mode 100644
--- /dev/null
+++ b/compiler-rt/lib/memprof/weak_symbols.txt
@@ -0,0 +1 @@
+___memprof_default_options ___memprof_on_error
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common.h.sv b/compiler-rt/lib/sanitizer_common/sanitizer_common.h.sv
new file mode 100644
--- /dev/null
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common.h.sv
@@ -0,0 +1,1025 @@
+//===-- sanitizer_common.h --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is shared between run-time libraries of sanitizers.
+//
+// It declares common functions and classes that are used in both runtimes.
+// Implementation of some functions are provided in sanitizer_common, while
+// others must be defined by run-time library itself.
+//===----------------------------------------------------------------------===//
+#ifndef SANITIZER_COMMON_H
+#define SANITIZER_COMMON_H
+
+#include "sanitizer_flags.h"
+#include "sanitizer_interface_internal.h"
+#include "sanitizer_internal_defs.h"
+#include "sanitizer_libc.h"
+#include "sanitizer_list.h"
+#include "sanitizer_mutex.h"
+
+#if defined(_MSC_VER) && !defined(__clang__)
+extern "C" void _ReadWriteBarrier();
+#pragma intrinsic(_ReadWriteBarrier)
+#endif
+
+namespace __sanitizer {
+
+struct AddressInfo;
+struct BufferedStackTrace;
+struct SignalContext;
+struct StackTrace;
+
+// Constants.
+const uptr kWordSize = SANITIZER_WORDSIZE / 8;
+const uptr kWordSizeInBits = 8 * kWordSize;
+
+const uptr kCacheLineSize = SANITIZER_CACHE_LINE_SIZE;
+
+const uptr kMaxPathLength = 4096;
+
+const uptr kMaxThreadStackSize = 1 << 30;  // 1Gb
+
+static const uptr kErrorMessageBufferSize = 1 << 16;
+
+// Denotes fake PC values that come from JIT/JAVA/etc.
+// For such PC values __tsan_symbolize_external_ex() will be called.
+const u64 kExternalPCBit = 1ULL << 60;
+
+extern const char *SanitizerToolName;  // Can be changed by the tool.
+
+extern atomic_uint32_t current_verbosity;
+INLINE void SetVerbosity(int verbosity) {
+  atomic_store(&current_verbosity, verbosity, memory_order_relaxed);
+}
+INLINE int Verbosity() {
+  return atomic_load(&current_verbosity, memory_order_relaxed);
+}
+
+#if SANITIZER_ANDROID
+INLINE uptr GetPageSize() {
+// Android post-M sysconf(_SC_PAGESIZE) crashes if called from .preinit_array.
+  return 4096;
+}
+INLINE uptr GetPageSizeCached() {
+  return 4096;
+}
+#else
+uptr GetPageSize();
+extern uptr PageSizeCached;
+INLINE uptr GetPageSizeCached() {
+  if (!PageSizeCached)
+    PageSizeCached = GetPageSize();
+  return PageSizeCached;
+}
+#endif
+uptr GetMmapGranularity();
+uptr GetMaxVirtualAddress();
+uptr GetMaxUserVirtualAddress();
+// Threads
+tid_t GetTid();
+int TgKill(pid_t pid, tid_t tid, int sig);
+uptr GetThreadSelf();
+void GetThreadStackTopAndBottom(bool at_initialization, uptr *stack_top,
+                                uptr *stack_bottom);
+void GetThreadStackAndTls(bool main, uptr *stk_addr, uptr *stk_size,
+                          uptr *tls_addr, uptr *tls_size);
+
+// Memory management
+void *MmapOrDie(uptr size, const char *mem_type, bool raw_report = false);
+INLINE void *MmapOrDieQuietly(uptr size, const char *mem_type) {
+  return MmapOrDie(size, mem_type, /*raw_report*/ true);
+}
+void UnmapOrDie(void *addr, uptr size);
+// Behaves just like MmapOrDie, but tolerates out of memory condition, in that
+// case returns nullptr.
+void *MmapOrDieOnFatalError(uptr size, const char *mem_type);
+bool MmapFixedNoReserve(uptr fixed_addr, uptr size, const char *name = nullptr)
+     WARN_UNUSED_RESULT;
+bool MmapFixedSuperNoReserve(uptr fixed_addr, uptr size,
+                             const char *name = nullptr) WARN_UNUSED_RESULT;
+void *MmapNoReserveOrDie(uptr size, const char *mem_type);
+void *MmapFixedOrDie(uptr fixed_addr, uptr size, const char *name = nullptr);
+// Behaves just like MmapFixedOrDie, but tolerates out of memory condition, in
+// that case returns nullptr.
+void *MmapFixedOrDieOnFatalError(uptr fixed_addr, uptr size,
+                                 const char *name = nullptr);
+void *MmapFixedNoAccess(uptr fixed_addr, uptr size, const char *name = nullptr);
+void *MmapNoAccess(uptr size);
+// Map aligned chunk of address space; size and alignment are powers of two.
+// Dies on all but out of memory errors, in the latter case returns nullptr.
+void *MmapAlignedOrDieOnFatalError(uptr size, uptr alignment,
+                                   const char *mem_type);
+// Disallow access to a memory range.  Use MmapFixedNoAccess to allocate an
+// unaccessible memory.
+bool MprotectNoAccess(uptr addr, uptr size);
+bool MprotectReadOnly(uptr addr, uptr size);
+
+void MprotectMallocZones(void *addr, int prot);
+
+// Get the max address, taking into account alignment due to the mmap
+// granularity and shadow size.
+uptr GetHighMemEnd(uptr shadow_scale);
+
+// Maps shadow_size_bytes of shadow memory and returns shadow address. It will
+// be aligned to shadow_base_alignment if that is nonzero, otherwise to
+// mmap granularity * 2^mmap_alignment_scale. The returned address will have
+// max(2^kShadowBaseAlignment, mmap granularity) on the left, and
+// shadow_size_bytes bytes on the right, mapped no access.
+uptr MapDynamicShadow(uptr shadow_size_bytes, uptr mmap_alignment_scale,
+                      uptr shadow_base_alignment = 0);
+
+// Reserve memory range [beg, end]. If madvise_shadow is true then apply
+// madvise (e.g. hugepages, core dumping) requested by options.
+void ReserveShadowMemoryRange(uptr beg, uptr end, const char *name,
+                              bool madvise_shadow = true);
+
+// Protect size bytes of memory starting at addr. Also try to protect
+// several pages at the start of the address space as specified by
+// zero_base_shadow_start, at most up to the size or zero_base_max_shadow_start.
+void ProtectGap(uptr addr, uptr size, uptr zero_base_shadow_start,
+                uptr zero_base_max_shadow_start, const char *name = 0);
+
+// Find an available address space.
+uptr FindAvailableMemoryRange(uptr size, uptr alignment, uptr left_padding,
+                              uptr *largest_gap_found, uptr *max_occupied_addr);
+
+// Used to check if we can map shadow memory to a fixed location.
+bool MemoryRangeIsAvailable(uptr range_start, uptr range_end);
+// Releases memory pages entirely within the [beg, end] address range. Noop if
+// the provided range does not contain at least one entire page.
+void ReleaseMemoryPagesToOS(uptr beg, uptr end);
+void IncreaseTotalMmap(uptr size);
+void DecreaseTotalMmap(uptr size);
+uptr GetRSS();
+void SetShadowRegionHugePageMode(uptr addr, uptr length);
+bool DontDumpShadowMemory(uptr addr, uptr length);
+// Check if the built VMA size matches the runtime one.
+void CheckVMASize();
+void RunMallocHooks(const void *ptr, uptr size);
+void RunFreeHooks(const void *ptr);
+
+class ReservedAddressRange {
+ public:
+  uptr Init(uptr size, const char *name = nullptr, uptr fixed_addr = 0);
+  uptr InitAligned(uptr size, uptr align, const char *name = nullptr);
+  uptr Map(uptr fixed_addr, uptr size, const char *name = nullptr);
+  uptr MapOrDie(uptr fixed_addr, uptr size, const char *name = nullptr);
+  void Unmap(uptr addr, uptr size);
+  void *base() const { return base_; }
+  uptr size() const { return size_; }
+
+ private:
+  void* base_;
+  uptr size_;
+  const char* name_;
+  uptr os_handle_;
+};
+
+typedef void (*fill_profile_f)(uptr start, uptr rss, bool file,
+                               /*out*/uptr *stats, uptr stats_size);
+
+// Parse the contents of /proc/self/smaps and generate a memory profile.
+// |cb| is a tool-specific callback that fills the |stats| array containing
+// |stats_size| elements.
+void GetMemoryProfile(fill_profile_f cb, uptr *stats, uptr stats_size);
+
+// Simple low-level (mmap-based) allocator for internal use. Doesn't have
+// constructor, so all instances of LowLevelAllocator should be
+// linker initialized.
+class LowLevelAllocator {
+ public:
+  // Requires an external lock.
+  void *Allocate(uptr size);
+ private:
+  char *allocated_end_;
+  char *allocated_current_;
+};
+// Set the min alignment of LowLevelAllocator to at least alignment.
+void SetLowLevelAllocateMinAlignment(uptr alignment);
+typedef void (*LowLevelAllocateCallback)(uptr ptr, uptr size);
+// Allows to register tool-specific callbacks for LowLevelAllocator.
+// Passing NULL removes the callback.
+void SetLowLevelAllocateCallback(LowLevelAllocateCallback callback);
+
+// IO
+void CatastrophicErrorWrite(const char *buffer, uptr length);
+void RawWrite(const char *buffer);
+bool ColorizeReports();
+void RemoveANSIEscapeSequencesFromString(char *buffer);
+void Printf(const char *format, ...);
+void Report(const char *format, ...);
+void SetPrintfAndReportCallback(void (*callback)(const char *));
+#define VReport(level, ...)                                              \
+  do {                                                                   \
+    if ((uptr)Verbosity() >= (level)) Report(__VA_ARGS__); \
+  } while (0)
+#define VPrintf(level, ...)                                              \
+  do {                                                                   \
+    if ((uptr)Verbosity() >= (level)) Printf(__VA_ARGS__); \
+  } while (0)
+
+// Lock sanitizer error reporting and protects against nested errors.
+class ScopedErrorReportLock {
+ public:
+  ScopedErrorReportLock();
+  ~ScopedErrorReportLock();
+
+  static void CheckLocked();
+};
+
+extern uptr stoptheworld_tracer_pid;
+extern uptr stoptheworld_tracer_ppid;
+
+bool IsAccessibleMemoryRange(uptr beg, uptr size);
+
+// Error report formatting.
+const char *StripPathPrefix(const char *filepath,
+                            const char *strip_file_prefix);
+// Strip the directories from the module name.
+const char *StripModuleName(const char *module);
+
+// OS
+uptr ReadBinaryName(/*out*/char *buf, uptr buf_len);
+uptr ReadBinaryNameCached(/*out*/char *buf, uptr buf_len);
+uptr ReadLongProcessName(/*out*/ char *buf, uptr buf_len);
+const char *GetProcessName();
+void UpdateProcessName();
+void CacheBinaryName();
+void DisableCoreDumperIfNecessary();
+void DumpProcessMap();
+void PrintModuleMap();
+const char *GetEnv(const char *name);
+bool SetEnv(const char *name, const char *value);
+
+u32 GetUid();
+void ReExec();
+void CheckASLR();
+void CheckMPROTECT();
+char **GetArgv();
+char **GetEnviron();
+void PrintCmdline();
+bool StackSizeIsUnlimited();
+void SetStackSizeLimitInBytes(uptr limit);
+bool AddressSpaceIsUnlimited();
+void SetAddressSpaceUnlimited();
+void AdjustStackSize(void *attr);
+void PlatformPrepareForSandboxing(__sanitizer_sandbox_arguments *args);
+void SetSandboxingCallback(void (*f)());
+
+void InitializeCoverage(bool enabled, const char *coverage_dir);
+
+void InitTlsSize();
+uptr GetTlsSize();
+
+// Other
+void SleepForSeconds(int seconds);
+void SleepForMillis(int millis);
+u64 NanoTime();
+u64 MonotonicNanoTime();
+int Atexit(void (*function)(void));
+bool TemplateMatch(const char *templ, const char *str);
+
+// Exit
+void NORETURN Abort();
+void NORETURN Die();
+void NORETURN
+CheckFailed(const char *file, int line, const char *cond, u64 v1, u64 v2);
+void NORETURN ReportMmapFailureAndDie(uptr size, const char *mem_type,
+                                      const char *mmap_type, error_t err,
+                                      bool raw_report = false);
+
+// Specific tools may override behavior of "Die" and "CheckFailed" functions
+// to do tool-specific job.
+typedef void (*DieCallbackType)(void);
+
+// It's possible to add several callbacks that would be run when "Die" is
+// called. The callbacks will be run in the opposite order. The tools are
+// strongly recommended to setup all callbacks during initialization, when there
+// is only a single thread.
+bool AddDieCallback(DieCallbackType callback);
+bool RemoveDieCallback(DieCallbackType callback);
+
+void SetUserDieCallback(DieCallbackType callback);
+
+typedef void (*CheckFailedCallbackType)(const char *, int, const char *,
+                                       u64, u64);
+void SetCheckFailedCallback(CheckFailedCallbackType callback);
+
+// Callback will be called if soft_rss_limit_mb is given and the limit is
+// exceeded (exceeded==true) or if rss went down below the limit
+// (exceeded==false).
+// The callback should be registered once at the tool init time.
+void SetSoftRssLimitExceededCallback(void (*Callback)(bool exceeded));
+
+// Functions related to signal handling.
+typedef void (*SignalHandlerType)(int, void *, void *);
+HandleSignalMode GetHandleSignalMode(int signum);
+void InstallDeadlySignalHandlers(SignalHandlerType handler);
+
+// Signal reporting.
+// Each sanitizer uses slightly different implementation of stack unwinding.
+typedef void (*UnwindSignalStackCallbackType)(const SignalContext &sig,
+                                              const void *callback_context,
+                                              BufferedStackTrace *stack);
+// Print deadly signal report and die.
+void HandleDeadlySignal(void *siginfo, void *context, u32 tid,
+                        UnwindSignalStackCallbackType unwind,
+                        const void *unwind_context);
+
+// Part of HandleDeadlySignal, exposed for asan.
+void StartReportDeadlySignal();
+// Part of HandleDeadlySignal, exposed for asan.
+void ReportDeadlySignal(const SignalContext &sig, u32 tid,
+                        UnwindSignalStackCallbackType unwind,
+                        const void *unwind_context);
+
+// Alternative signal stack (POSIX-only).
+void SetAlternateSignalStack();
+void UnsetAlternateSignalStack();
+
+// We don't want a summary too long.
+const int kMaxSummaryLength = 1024;
+// Construct a one-line string:
+//   SUMMARY: SanitizerToolName: error_message
+// and pass it to __sanitizer_report_error_summary.
+// If alt_tool_name is provided, it's used in place of SanitizerToolName.
+void ReportErrorSummary(const char *error_message,
+                        const char *alt_tool_name = nullptr);
+// Same as above, but construct error_message as:
+//   error_type file:line[:column][ function]
+void ReportErrorSummary(const char *error_type, const AddressInfo &info,
+                        const char *alt_tool_name = nullptr);
+// Same as above, but obtains AddressInfo by symbolizing top stack trace frame.
+void ReportErrorSummary(const char *error_type, const StackTrace *trace,
+                        const char *alt_tool_name = nullptr);
+
+void ReportMmapWriteExec(int prot);
+
+// Math
+#if SANITIZER_WINDOWS && !defined(__clang__) && !defined(__GNUC__)
+extern "C" {
+unsigned char _BitScanForward(unsigned long *index, unsigned long mask);
+unsigned char _BitScanReverse(unsigned long *index, unsigned long mask);
+#if defined(_WIN64)
+unsigned char _BitScanForward64(unsigned long *index, unsigned __int64 mask);
+unsigned char _BitScanReverse64(unsigned long *index, unsigned __int64 mask);
+#endif
+}
+#endif
+
+INLINE uptr MostSignificantSetBitIndex(uptr x) {
+  CHECK_NE(x, 0U);
+  unsigned long up;
+#if !SANITIZER_WINDOWS || defined(__clang__) || defined(__GNUC__)
+# ifdef _WIN64
+  up = SANITIZER_WORDSIZE - 1 - __builtin_clzll(x);
+# else
+  up = SANITIZER_WORDSIZE - 1 - __builtin_clzl(x);
+# endif
+#elif defined(_WIN64)
+  _BitScanReverse64(&up, x);
+#else
+  _BitScanReverse(&up, x);
+#endif
+  return up;
+}
+
+INLINE uptr LeastSignificantSetBitIndex(uptr x) {
+  CHECK_NE(x, 0U);
+  unsigned long up;
+#if !SANITIZER_WINDOWS || defined(__clang__) || defined(__GNUC__)
+# ifdef _WIN64
+  up = __builtin_ctzll(x);
+# else
+  up = __builtin_ctzl(x);
+# endif
+#elif defined(_WIN64)
+  _BitScanForward64(&up, x);
+#else
+  _BitScanForward(&up, x);
+#endif
+  return up;
+}
+
+INLINE bool IsPowerOfTwo(uptr x) {
+  return (x & (x - 1)) == 0;
+}
+
+INLINE uptr RoundUpToPowerOfTwo(uptr size) {
+  CHECK(size);
+  if (IsPowerOfTwo(size)) return size;
+
+  uptr up = MostSignificantSetBitIndex(size);
+  CHECK_LT(size, (1ULL << (up + 1)));
+  CHECK_GT(size, (1ULL << up));
+  return 1ULL << (up + 1);
+}
+
+INLINE uptr RoundUpTo(uptr size, uptr boundary) {
+  RAW_CHECK(IsPowerOfTwo(boundary));
+  return (size + boundary - 1) & ~(boundary - 1);
+}
+
+INLINE uptr RoundDownTo(uptr x, uptr boundary) {
+  return x & ~(boundary - 1);
+}
+
+INLINE bool IsAligned(uptr a, uptr alignment) {
+  return (a & (alignment - 1)) == 0;
+}
+
+INLINE uptr Log2(uptr x) {
+  CHECK(IsPowerOfTwo(x));
+  return LeastSignificantSetBitIndex(x);
+}
+
+// Don't use std::min, std::max or std::swap, to minimize dependency
+// on libstdc++.
+template<class T> T Min(T a, T b) { return a < b ? a : b; }
+template<class T> T Max(T a, T b) { return a > b ? a : b; }
+template<class T> void Swap(T& a, T& b) {
+  T tmp = a;
+  a = b;
+  b = tmp;
+}
+
+// Char handling
+INLINE bool IsSpace(int c) {
+  return (c == ' ') || (c == '\n') || (c == '\t') ||
+         (c == '\f') || (c == '\r') || (c == '\v');
+}
+INLINE bool IsDigit(int c) {
+  return (c >= '0') && (c <= '9');
+}
+INLINE int ToLower(int c) {
+  return (c >= 'A' && c <= 'Z') ? (c + 'a' - 'A') : c;
+}
+
+// A low-level vector based on mmap. May incur a significant memory overhead for
+// small vectors.
+// WARNING: The current implementation supports only POD types.
+template<typename T>
+class InternalMmapVectorNoCtor {
+ public:
+  void Initialize(uptr initial_capacity) {
+    capacity_bytes_ = 0;
+    size_ = 0;
+    data_ = 0;
+    reserve(initial_capacity);
+  }
+  void Destroy() { UnmapOrDie(data_, capacity_bytes_); }
+  T &operator[](uptr i) {
+    CHECK_LT(i, size_);
+    return data_[i];
+  }
+  const T &operator[](uptr i) const {
+    CHECK_LT(i, size_);
+    return data_[i];
+  }
+  void push_back(const T &element) {
+    CHECK_LE(size_, capacity());
+    if (size_ == capacity()) {
+      uptr new_capacity = RoundUpToPowerOfTwo(size_ + 1);
+      Realloc(new_capacity);
+    }
+    internal_memcpy(&data_[size_++], &element, sizeof(T));
+  }
+  T &back() {
+    CHECK_GT(size_, 0);
+    return data_[size_ - 1];
+  }
+  void pop_back() {
+    CHECK_GT(size_, 0);
+    size_--;
+  }
+  uptr size() const {
+    return size_;
+  }
+  const T *data() const {
+    return data_;
+  }
+  T *data() {
+    return data_;
+  }
+  uptr capacity() const { return capacity_bytes_ / sizeof(T); }
+  void reserve(uptr new_size) {
+    // Never downsize internal buffer.
+    if (new_size > capacity())
+      Realloc(new_size);
+  }
+  void resize(uptr new_size) {
+    if (new_size > size_) {
+      reserve(new_size);
+      internal_memset(&data_[size_], 0, sizeof(T) * (new_size - size_));
+    }
+    size_ = new_size;
+  }
+
+  void clear() { size_ = 0; }
+  bool empty() const { return size() == 0; }
+
+  const T *begin() const {
+    return data();
+  }
+  T *begin() {
+    return data();
+  }
+  const T *end() const {
+    return data() + size();
+  }
+  T *end() {
+    return data() + size();
+  }
+
+  void swap(InternalMmapVectorNoCtor &other) {
+    Swap(data_, other.data_);
+    Swap(capacity_bytes_, other.capacity_bytes_);
+    Swap(size_, other.size_);
+  }
+
+ private:
+  void Realloc(uptr new_capacity) {
+    CHECK_GT(new_capacity, 0);
+    CHECK_LE(size_, new_capacity);
+    uptr new_capacity_bytes =
+        RoundUpTo(new_capacity * sizeof(T), GetPageSizeCached());
+    T *new_data = (T *)MmapOrDie(new_capacity_bytes, "InternalMmapVector");
+    internal_memcpy(new_data, data_, size_ * sizeof(T));
+    UnmapOrDie(data_, capacity_bytes_);
+    data_ = new_data;
+    capacity_bytes_ = new_capacity_bytes;
+  }
+
+  T *data_;
+  uptr capacity_bytes_;
+  uptr size_;
+};
+
+template <typename T>
+bool operator==(const InternalMmapVectorNoCtor<T> &lhs,
+                const InternalMmapVectorNoCtor<T> &rhs) {
+  if (lhs.size() != rhs.size()) return false;
+  return internal_memcmp(lhs.data(), rhs.data(), lhs.size() * sizeof(T)) == 0;
+}
+
+template <typename T>
+bool operator!=(const InternalMmapVectorNoCtor<T> &lhs,
+                const InternalMmapVectorNoCtor<T> &rhs) {
+  return !(lhs == rhs);
+}
+
+template<typename T>
+class InternalMmapVector : public InternalMmapVectorNoCtor<T> {
+ public:
+  InternalMmapVector() { InternalMmapVectorNoCtor<T>::Initialize(0); }
+  explicit InternalMmapVector(uptr cnt) {
+    InternalMmapVectorNoCtor<T>::Initialize(cnt);
+    this->resize(cnt);
+  }
+  ~InternalMmapVector() { InternalMmapVectorNoCtor<T>::Destroy(); }
+  // Disallow copies and moves.
+  InternalMmapVector(const InternalMmapVector &) = delete;
+  InternalMmapVector &operator=(const InternalMmapVector &) = delete;
+  InternalMmapVector(InternalMmapVector &&) = delete;
+  InternalMmapVector &operator=(InternalMmapVector &&) = delete;
+};
+
+class InternalScopedString : public InternalMmapVector<char> {
+ public:
+  explicit InternalScopedString(uptr max_length)
+      : InternalMmapVector<char>(max_length), length_(0) {
+    (*this)[0] = '\0';
+  }
+  uptr length() { return length_; }
+  void clear() {
+    (*this)[0] = '\0';
+    length_ = 0;
+  }
+  void append(const char *format, ...);
+
+ private:
+  uptr length_;
+};
+
+template <class T>
+struct CompareLess {
+  bool operator()(const T &a, const T &b) const { return a < b; }
+};
+
+// HeapSort for arrays and InternalMmapVector.
+template <class T, class Compare = CompareLess<T>>
+void Sort(T *v, uptr size, Compare comp = {}) {
+  if (size < 2)
+    return;
+  // Stage 1: insert elements to the heap.
+  for (uptr i = 1; i < size; i++) {
+    uptr j, p;
+    for (j = i; j > 0; j = p) {
+      p = (j - 1) / 2;
+      if (comp(v[p], v[j]))
+        Swap(v[j], v[p]);
+      else
+        break;
+    }
+  }
+  // Stage 2: swap largest element with the last one,
+  // and sink the new top.
+  for (uptr i = size - 1; i > 0; i--) {
+    Swap(v[0], v[i]);
+    uptr j, max_ind;
+    for (j = 0; j < i; j = max_ind) {
+      uptr left = 2 * j + 1;
+      uptr right = 2 * j + 2;
+      max_ind = j;
+      if (left < i && comp(v[max_ind], v[left]))
+        max_ind = left;
+      if (right < i && comp(v[max_ind], v[right]))
+        max_ind = right;
+      if (max_ind != j)
+        Swap(v[j], v[max_ind]);
+      else
+        break;
+    }
+  }
+}
+
+// Works like std::lower_bound: finds the first element that is not less
+// than the val.
+template <class Container, class Value, class Compare>
+uptr InternalLowerBound(const Container &v, uptr first, uptr last,
+                        const Value &val, Compare comp) {
+  while (last > first) {
+    uptr mid = (first + last) / 2;
+    if (comp(v[mid], val))
+      first = mid + 1;
+    else
+      last = mid;
+  }
+  return first;
+}
+
+enum ModuleArch {
+  kModuleArchUnknown,
+  kModuleArchI386,
+  kModuleArchX86_64,
+  kModuleArchX86_64H,
+  kModuleArchARMV6,
+  kModuleArchARMV7,
+  kModuleArchARMV7S,
+  kModuleArchARMV7K,
+  kModuleArchARM64
+};
+
+// Opens the file 'file_name" and reads up to 'max_len' bytes.
+// The resulting buffer is mmaped and stored in '*buff'.
+// Returns true if file was successfully opened and read.
+bool ReadFileToVector(const char *file_name,
+                      InternalMmapVectorNoCtor<char> *buff,
+                      uptr max_len = 1 << 26, error_t *errno_p = nullptr);
+
+// Opens the file 'file_name" and reads up to 'max_len' bytes.
+// This function is less I/O efficient than ReadFileToVector as it may reread
+// file multiple times to avoid mmap during read attempts. It's used to read
+// procmap, so short reads with mmap in between can produce inconsistent result.
+// The resulting buffer is mmaped and stored in '*buff'.
+// The size of the mmaped region is stored in '*buff_size'.
+// The total number of read bytes is stored in '*read_len'.
+// Returns true if file was successfully opened and read.
+bool ReadFileToBuffer(const char *file_name, char **buff, uptr *buff_size,
+                      uptr *read_len, uptr max_len = 1 << 26,
+                      error_t *errno_p = nullptr);
+
+// When adding a new architecture, don't forget to also update
+// script/asan_symbolize.py and sanitizer_symbolizer_libcdep.cpp.
+inline const char *ModuleArchToString(ModuleArch arch) {
+  switch (arch) {
+    case kModuleArchUnknown:
+      return "";
+    case kModuleArchI386:
+      return "i386";
+    case kModuleArchX86_64:
+      return "x86_64";
+    case kModuleArchX86_64H:
+      return "x86_64h";
+    case kModuleArchARMV6:
+      return "armv6";
+    case kModuleArchARMV7:
+      return "armv7";
+    case kModuleArchARMV7S:
+      return "armv7s";
+    case kModuleArchARMV7K:
+      return "armv7k";
+    case kModuleArchARM64:
+      return "arm64";
+  }
+  CHECK(0 && "Invalid module arch");
+  return "";
+}
+
+const uptr kModuleUUIDSize = 16;
+const uptr kMaxSegName = 16;
+
+// Represents a binary loaded into virtual memory (e.g. this can be an
+// executable or a shared object).
+class LoadedModule {
+ public:
+  LoadedModule()
+      : full_name_(nullptr),
+        base_address_(0),
+        max_executable_address_(0),
+        arch_(kModuleArchUnknown),
+        instrumented_(false) {
+    internal_memset(uuid_, 0, kModuleUUIDSize);
+    ranges_.clear();
+  }
+  void set(const char *module_name, uptr base_address);
+  void set(const char *module_name, uptr base_address, ModuleArch arch,
+           u8 uuid[kModuleUUIDSize], bool instrumented);
+  void clear();
+  void addAddressRange(uptr beg, uptr end, bool executable, bool writable,
+                       const char *name = nullptr);
+  bool containsAddress(uptr address) const;
+
+  const char *full_name() const { return full_name_; }
+  uptr base_address() const { return base_address_; }
+  uptr max_executable_address() const { return max_executable_address_; }
+  ModuleArch arch() const { return arch_; }
+  const u8 *uuid() const { return uuid_; }
+  bool instrumented() const { return instrumented_; }
+
+  struct AddressRange {
+    AddressRange *next;
+    uptr beg;
+    uptr end;
+    bool executable;
+    bool writable;
+    char name[kMaxSegName];
+
+    AddressRange(uptr beg, uptr end, bool executable, bool writable,
+                 const char *name)
+        : next(nullptr),
+          beg(beg),
+          end(end),
+          executable(executable),
+          writable(writable) {
+      internal_strncpy(this->name, (name ? name : ""), ARRAY_SIZE(this->name));
+    }
+  };
+
+  const IntrusiveList<AddressRange> &ranges() const { return ranges_; }
+
+ private:
+  char *full_name_;  // Owned.
+  uptr base_address_;
+  uptr max_executable_address_;
+  ModuleArch arch_;
+  u8 uuid_[kModuleUUIDSize];
+  bool instrumented_;
+  IntrusiveList<AddressRange> ranges_;
+};
+
+// List of LoadedModules. OS-dependent implementation is responsible for
+// filling this information.
+class ListOfModules {
+ public:
+  ListOfModules() : initialized(false) {}
+  ~ListOfModules() { clear(); }
+  void init();
+  void fallbackInit();  // Uses fallback init if available, otherwise clears
+  const LoadedModule *begin() const { return modules_.begin(); }
+  LoadedModule *begin() { return modules_.begin(); }
+  const LoadedModule *end() const { return modules_.end(); }
+  LoadedModule *end() { return modules_.end(); }
+  uptr size() const { return modules_.size(); }
+  const LoadedModule &operator[](uptr i) const {
+    CHECK_LT(i, modules_.size());
+    return modules_[i];
+  }
+
+ private:
+  void clear() {
+    for (auto &module : modules_) module.clear();
+    modules_.clear();
+  }
+  void clearOrInit() {
+    initialized ? clear() : modules_.Initialize(kInitialCapacity);
+    initialized = true;
+  }
+
+  InternalMmapVectorNoCtor<LoadedModule> modules_;
+  // We rarely have more than 16K loaded modules.
+  static const uptr kInitialCapacity = 1 << 14;
+  bool initialized;
+};
+
+// Callback type for iterating over a set of memory ranges.
+typedef void (*RangeIteratorCallback)(uptr begin, uptr end, void *arg);
+
+enum AndroidApiLevel {
+  ANDROID_NOT_ANDROID = 0,
+  ANDROID_KITKAT = 19,
+  ANDROID_LOLLIPOP_MR1 = 22,
+  ANDROID_POST_LOLLIPOP = 23
+};
+
+void WriteToSyslog(const char *buffer);
+
+#if defined(SANITIZER_WINDOWS) && defined(_MSC_VER) && !defined(__clang__)
+#define SANITIZER_WIN_TRACE 1
+#else
+#define SANITIZER_WIN_TRACE 0
+#endif
+
+#if SANITIZER_MAC || SANITIZER_WIN_TRACE
+void LogFullErrorReport(const char *buffer);
+#else
+INLINE void LogFullErrorReport(const char *buffer) {}
+#endif
+
+#if SANITIZER_LINUX || SANITIZER_MAC
+void WriteOneLineToSyslog(const char *s);
+void LogMessageOnPrintf(const char *str);
+#else
+INLINE void WriteOneLineToSyslog(const char *s) {}
+INLINE void LogMessageOnPrintf(const char *str) {}
+#endif
+
+#if SANITIZER_LINUX || SANITIZER_WIN_TRACE
+// Initialize Android logging. Any writes before this are silently lost.
+void AndroidLogInit();
+void SetAbortMessage(const char *);
+#else
+INLINE void AndroidLogInit() {}
+// FIXME: MacOS implementation could use CRSetCrashLogMessage.
+INLINE void SetAbortMessage(const char *) {}
+#endif
+
+#if SANITIZER_ANDROID
+void SanitizerInitializeUnwinder();
+AndroidApiLevel AndroidGetApiLevel();
+#else
+INLINE void AndroidLogWrite(const char *buffer_unused) {}
+INLINE void SanitizerInitializeUnwinder() {}
+INLINE AndroidApiLevel AndroidGetApiLevel() { return ANDROID_NOT_ANDROID; }
+#endif
+
+INLINE uptr GetPthreadDestructorIterations() {
+#if SANITIZER_ANDROID
+  return (AndroidGetApiLevel() == ANDROID_LOLLIPOP_MR1) ? 8 : 4;
+#elif SANITIZER_POSIX
+  return 4;
+#else
+// Unused on Windows.
+  return 0;
+#endif
+}
+
+void *internal_start_thread(void *(*func)(void*), void *arg);
+void internal_join_thread(void *th);
+void MaybeStartBackgroudThread();
+
+// Make the compiler think that something is going on there.
+// Use this inside a loop that looks like memset/memcpy/etc to prevent the
+// compiler from recognising it and turning it into an actual call to
+// memset/memcpy/etc.
+static inline void SanitizerBreakOptimization(void *arg) {
+#if defined(_MSC_VER) && !defined(__clang__)
+  _ReadWriteBarrier();
+#else
+  __asm__ __volatile__("" : : "r" (arg) : "memory");
+#endif
+}
+
+struct SignalContext {
+  void *siginfo;
+  void *context;
+  uptr addr;
+  uptr pc;
+  uptr sp;
+  uptr bp;
+  bool is_memory_access;
+  enum WriteFlag { UNKNOWN, READ, WRITE } write_flag;
+
+  // In some cases the kernel cannot provide the true faulting address; `addr`
+  // will be zero then.  This field allows to distinguish between these cases
+  // and dereferences of null.
+  bool is_true_faulting_addr;
+
+  // VS2013 doesn't implement unrestricted unions, so we need a trivial default
+  // constructor
+  SignalContext() = default;
+
+  // Creates signal context in a platform-specific manner.
+  // SignalContext is going to keep pointers to siginfo and context without
+  // owning them.
+  SignalContext(void *siginfo, void *context)
+      : siginfo(siginfo),
+        context(context),
+        addr(GetAddress()),
+        is_memory_access(IsMemoryAccess()),
+        write_flag(GetWriteFlag()),
+        is_true_faulting_addr(IsTrueFaultingAddress()) {
+    InitPcSpBp();
+  }
+
+  static void DumpAllRegisters(void *context);
+
+  // Type of signal e.g. SIGSEGV or EXCEPTION_ACCESS_VIOLATION.
+  int GetType() const;
+
+  // String description of the signal.
+  const char *Describe() const;
+
+  // Returns true if signal is stack overflow.
+  bool IsStackOverflow() const;
+
+ private:
+  // Platform specific initialization.
+  void InitPcSpBp();
+  uptr GetAddress() const;
+  WriteFlag GetWriteFlag() const;
+  bool IsMemoryAccess() const;
+  bool IsTrueFaultingAddress() const;
+};
+
+void InitializePlatformEarly();
+void MaybeReexec();
+
+template <typename Fn>
+class RunOnDestruction {
+ public:
+  explicit RunOnDestruction(Fn fn) : fn_(fn) {}
+  ~RunOnDestruction() { fn_(); }
+
+ private:
+  Fn fn_;
+};
+
+// A simple scope guard. Usage:
+// auto cleanup = at_scope_exit([]{ do_cleanup; });
+template <typename Fn>
+RunOnDestruction<Fn> at_scope_exit(Fn fn) {
+  return RunOnDestruction<Fn>(fn);
+}
+
+// Linux on 64-bit s390 had a nasty bug that crashes the whole machine
+// if a process uses virtual memory over 4TB (as many sanitizers like
+// to do).  This function will abort the process if running on a kernel
+// that looks vulnerable.
+#if SANITIZER_LINUX && SANITIZER_S390_64
+void AvoidCVE_2016_2143();
+#else
+INLINE void AvoidCVE_2016_2143() {}
+#endif
+
+struct StackDepotStats {
+  uptr n_uniq_ids;
+  uptr allocated;
+};
+
+// The default value for allocator_release_to_os_interval_ms common flag to
+// indicate that sanitizer allocator should not attempt to release memory to OS.
+const s32 kReleaseToOSIntervalNever = -1;
+
+void CheckNoDeepBind(const char *filename, int flag);
+
+// Returns the requested amount of random data (up to 256 bytes) that can then
+// be used to seed a PRNG. Defaults to blocking like the underlying syscall.
+bool GetRandom(void *buffer, uptr length, bool blocking = true);
+
+// Returns the number of logical processors on the system.
+u32 GetNumberOfCPUs();
+extern u32 NumberOfCPUsCached;
+INLINE u32 GetNumberOfCPUsCached() {
+  if (!NumberOfCPUsCached)
+    NumberOfCPUsCached = GetNumberOfCPUs();
+  return NumberOfCPUsCached;
+}
+
+template <typename T>
+class ArrayRef {
+ public:
+  ArrayRef() {}
+  ArrayRef(T *begin, T *end) : begin_(begin), end_(end) {}
+
+  T *begin() { return begin_; }
+  T *end() { return end_; }
+
+ private:
+  T *begin_ = nullptr;
+  T *end_ = nullptr;
+};
+
+}  // namespace __sanitizer
+
+inline void *operator new(__sanitizer::operator_new_size_type size,
+                          __sanitizer::LowLevelAllocator &alloc) {  // NOLINT
+  return alloc.Allocate(size);
+}
+
+#endif  // SANITIZER_COMMON_H
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp.sv b/compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp.sv
new file mode 100644
--- /dev/null
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp.sv
@@ -0,0 +1,239 @@
+//===-- sanitizer_common_libcdep.cpp --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is shared between AddressSanitizer and ThreadSanitizer
+// run-time libraries.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_allocator_interface.h"
+#include "sanitizer_common.h"
+#include "sanitizer_flags.h"
+#include "sanitizer_procmaps.h"
+
+
+namespace __sanitizer {
+
+static void (*SoftRssLimitExceededCallback)(bool exceeded);
+void SetSoftRssLimitExceededCallback(void (*Callback)(bool exceeded)) {
+  CHECK_EQ(SoftRssLimitExceededCallback, nullptr);
+  SoftRssLimitExceededCallback = Callback;
+}
+
+#if (SANITIZER_LINUX || SANITIZER_NETBSD) && !SANITIZER_GO
+// Weak default implementation for when sanitizer_stackdepot is not linked in.
+SANITIZER_WEAK_ATTRIBUTE StackDepotStats *StackDepotGetStats() {
+  return nullptr;
+}
+
+void *BackgroundThread(void *arg) {
+  const uptr hard_rss_limit_mb = common_flags()->hard_rss_limit_mb;
+  const uptr soft_rss_limit_mb = common_flags()->soft_rss_limit_mb;
+  const bool heap_profile = common_flags()->heap_profile;
+  uptr prev_reported_rss = 0;
+  uptr prev_reported_stack_depot_size = 0;
+  bool reached_soft_rss_limit = false;
+  uptr rss_during_last_reported_profile = 0;
+  while (true) {
+    SleepForMillis(100);
+    const uptr current_rss_mb = GetRSS() >> 20;
+    if (Verbosity()) {
+      // If RSS has grown 10% since last time, print some information.
+      if (prev_reported_rss * 11 / 10 < current_rss_mb) {
+        Printf("%s: RSS: %zdMb\n", SanitizerToolName, current_rss_mb);
+        prev_reported_rss = current_rss_mb;
+      }
+      // If stack depot has grown 10% since last time, print it too.
+      StackDepotStats *stack_depot_stats = StackDepotGetStats();
+      if (stack_depot_stats) {
+        if (prev_reported_stack_depot_size * 11 / 10 <
+            stack_depot_stats->allocated) {
+          Printf("%s: StackDepot: %zd ids; %zdM allocated\n",
+                 SanitizerToolName,
+                 stack_depot_stats->n_uniq_ids,
+                 stack_depot_stats->allocated >> 20);
+          prev_reported_stack_depot_size = stack_depot_stats->allocated;
+        }
+      }
+    }
+    // Check RSS against the limit.
+    if (hard_rss_limit_mb && hard_rss_limit_mb < current_rss_mb) {
+      Report("%s: hard rss limit exhausted (%zdMb vs %zdMb)\n",
+             SanitizerToolName, hard_rss_limit_mb, current_rss_mb);
+      DumpProcessMap();
+      Die();
+    }
+    if (soft_rss_limit_mb) {
+      if (soft_rss_limit_mb < current_rss_mb && !reached_soft_rss_limit) {
+        reached_soft_rss_limit = true;
+        Report("%s: soft rss limit exhausted (%zdMb vs %zdMb)\n",
+               SanitizerToolName, soft_rss_limit_mb, current_rss_mb);
+        if (SoftRssLimitExceededCallback)
+          SoftRssLimitExceededCallback(true);
+      } else if (soft_rss_limit_mb >= current_rss_mb &&
+                 reached_soft_rss_limit) {
+        reached_soft_rss_limit = false;
+        if (SoftRssLimitExceededCallback)
+          SoftRssLimitExceededCallback(false);
+      }
+    }
+    if (heap_profile &&
+        current_rss_mb > rss_during_last_reported_profile * 1.1) {
+      Printf("\n\nHEAP PROFILE at RSS %zdMb\n", current_rss_mb);
+      __sanitizer_print_memory_profile(90, 20);
+      rss_during_last_reported_profile = current_rss_mb;
+    }
+  }
+}
+#endif
+
+void WriteToSyslog(const char *msg) {
+  InternalScopedString msg_copy(kErrorMessageBufferSize);
+  msg_copy.append("%s", msg);
+  char *p = msg_copy.data();
+  char *q;
+
+  // Print one line at a time.
+  // syslog, at least on Android, has an implicit message length limit.
+  while ((q = internal_strchr(p, '\n'))) {
+    *q = '\0';
+    WriteOneLineToSyslog(p);
+    p = q + 1;
+  }
+  // Print remaining characters, if there are any.
+  // Note that this will add an extra newline at the end.
+  // FIXME: buffer extra output. This would need a thread-local buffer, which
+  // on Android requires plugging into the tools (ex. ASan's) Thread class.
+  if (*p)
+    WriteOneLineToSyslog(p);
+}
+
+void MaybeStartBackgroudThread() {
+#if (SANITIZER_LINUX || SANITIZER_NETBSD) && \
+    !SANITIZER_GO  // Need to implement/test on other platforms.
+  // Start the background thread if one of the rss limits is given.
+  if (!common_flags()->hard_rss_limit_mb &&
+      !common_flags()->soft_rss_limit_mb &&
+      !common_flags()->heap_profile) return;
+  if (!&real_pthread_create) return;  // Can't spawn the thread anyway.
+  internal_start_thread(BackgroundThread, nullptr);
+#endif
+}
+
+static void (*sandboxing_callback)();
+void SetSandboxingCallback(void (*f)()) {
+  sandboxing_callback = f;
+}
+
+uptr ReservedAddressRange::InitAligned(uptr size, uptr align,
+                                       const char *name) {
+  CHECK(IsPowerOfTwo(align));
+  if (align <= GetPageSizeCached())
+    return Init(size, name);
+  uptr start = Init(size + align, name);
+  start += align - (start & (align - 1));
+  return start;
+}
+
+// Reserve memory range [beg, end].
+// We need to use inclusive range because end+1 may not be representable.
+void ReserveShadowMemoryRange(uptr beg, uptr end, const char *name,
+                              bool madvise_shadow) {
+  Printf("Reserve %p %p %d %s\n", beg, end, madvise_shadow, name);
+  CHECK_EQ((beg % GetMmapGranularity()), 0);
+  CHECK_EQ(((end + 1) % GetMmapGranularity()), 0);
+  uptr size = end - beg + 1;
+  DecreaseTotalMmap(size); // Don't count the shadow against mmap_limit_mb.
+  if (madvise_shadow ? !MmapFixedSuperNoReserve(beg, size, name)
+                     : !MmapFixedNoReserve(beg, size, name)) {
+    Report("ReserveShadowMemoryRange failed while trying to map 0x%zx bytes. "
+           "Perhaps you're using ulimit -v\n",
+           size);
+    Abort();
+  }
+  if (madvise_shadow && common_flags()->use_madv_dontdump)
+    DontDumpShadowMemory(beg, size);
+}
+
+void ProtectGap(uptr addr, uptr size, uptr zero_base_shadow_start,
+                uptr zero_base_max_shadow_start, const char *name) {
+  if (!size)
+    return;
+  void *res = MmapFixedNoAccess(addr, size, "shadow gap");
+  if (addr == (uptr)res)
+    return;
+  Printf("ProtectGap %p %p %p %p %p %s\n", addr, res, size, zero_base_shadow_start,
+         zero_base_max_shadow_start, name);
+  // A few pages at the start of the address space can not be protected.
+  // But we really want to protect as much as possible, to prevent this memory
+  // being returned as a result of a non-FIXED mmap().
+  if (addr == zero_base_shadow_start) {
+    uptr step = GetMmapGranularity();
+    while (size > step && addr < zero_base_max_shadow_start) {
+      addr += step;
+      size -= step;
+      void *res = MmapFixedNoAccess(addr, size, "shadow gap");
+      if (addr == (uptr)res)
+        return;
+    }
+  }
+
+  Report("ERROR: Failed to protect the shadow gap. "
+         "%s cannot proceed correctly. ABORTING.\n",
+         SanitizerToolName);
+  DumpProcessMap();
+  Die();
+}
+
+static void UnmapFromTo(uptr from, uptr to) {
+  if (to == from)
+    return;
+  CHECK(to >= from);
+  uptr res = internal_munmap(reinterpret_cast<void *>(from), to - from);
+  if (UNLIKELY(internal_iserror(res))) {
+    Report("ERROR: %s failed to unmap 0x%zx (%zd) bytes at address %p\n",
+           SanitizerToolName, to - from, to - from, from);
+    CHECK("unable to unmap" && 0);
+  }
+}
+
+uptr MapDynamicShadow(uptr shadow_size_bytes, uptr mmap_alignment_scale,
+                      uptr shadow_base_alignment) {
+  const uptr granularity = GetMmapGranularity();
+  const uptr alignment = shadow_base_alignment
+                             ? 1ULL << shadow_base_alignment
+                             : granularity << mmap_alignment_scale;
+  const uptr left_padding =
+      shadow_base_alignment ? 1ULL << shadow_base_alignment : granularity;
+
+  if (shadow_base_alignment) {
+    const uptr min_alignment = granularity << mmap_alignment_scale;
+    CHECK_GE(alignment, min_alignment);
+  }
+
+  const uptr shadow_size = RoundUpTo(shadow_size_bytes, granularity);
+  const uptr map_size = shadow_size + left_padding + alignment;
+
+  const uptr map_start = (uptr)MmapNoAccess(map_size);
+  CHECK_NE(map_start, ~(uptr)0);
+
+  const uptr shadow_start = RoundUpTo(map_start + left_padding, alignment);
+
+  UnmapFromTo(map_start, shadow_start - left_padding);
+  UnmapFromTo(shadow_start + shadow_size, map_start + map_size);
+
+  return shadow_start;
+}
+
+}  // namespace __sanitizer
+
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_sandbox_on_notify,
+                             __sanitizer_sandbox_arguments *args) {
+  __sanitizer::PlatformPrepareForSandboxing(args);
+  if (__sanitizer::sandboxing_callback)
+    __sanitizer::sandboxing_callback();
+}
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h b/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h
--- a/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h
@@ -451,5 +451,8 @@
 namespace __hwasan {
 using namespace __sanitizer;
 }
+namespace __memprof {
+using namespace __sanitizer;
+}
 
 #endif  // SANITIZER_DEFS_H
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_libc.h b/compiler-rt/lib/sanitizer_common/sanitizer_libc.h
--- a/compiler-rt/lib/sanitizer_common/sanitizer_libc.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_libc.h
@@ -71,6 +71,7 @@
 
 uptr internal_getpid();
 uptr internal_getppid();
+uptr internal_getcpu();
 
 int internal_dlinfo(void *handle, int request, void *p);
 
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
--- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
@@ -745,6 +745,8 @@
   return internal_syscall(SYSCALL(getppid));
 }
 
+uptr internal_getcpu() { return internal_syscall(SYSCALL(getcpu)); }
+
 int internal_dlinfo(void *handle, int request, void *p) {
 #if SANITIZER_FREEBSD
   return dlinfo(handle, request, p);
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stackdepot.h b/compiler-rt/lib/sanitizer_common/sanitizer_stackdepot.h
--- a/compiler-rt/lib/sanitizer_common/sanitizer_stackdepot.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_stackdepot.h
@@ -41,6 +41,7 @@
 
 void StackDepotLockAll();
 void StackDepotUnlockAll();
+void StackDepotPrintAll();
 
 // Instantiating this class creates a snapshot of StackDepot which can be
 // efficiently queried with StackDepotGet(). You can use it concurrently with
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stackdepot.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stackdepot.cpp
--- a/compiler-rt/lib/sanitizer_common/sanitizer_stackdepot.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_stackdepot.cpp
@@ -115,6 +115,12 @@
   theDepot.UnlockAll();
 }
 
+void StackDepotPrintAll() {
+#if !SANITIZER_GO
+  theDepot.PrintAll();
+#endif
+}
+
 bool StackDepotReverseMap::IdDescPair::IdComparator(
     const StackDepotReverseMap::IdDescPair &a,
     const StackDepotReverseMap::IdDescPair &b) {
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stackdepotbase.h b/compiler-rt/lib/sanitizer_common/sanitizer_stackdepotbase.h
--- a/compiler-rt/lib/sanitizer_common/sanitizer_stackdepotbase.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_stackdepotbase.h
@@ -13,9 +13,11 @@
 #ifndef SANITIZER_STACKDEPOTBASE_H
 #define SANITIZER_STACKDEPOTBASE_H
 
+#include <stdio.h>
+
+#include "sanitizer_atomic.h"
 #include "sanitizer_internal_defs.h"
 #include "sanitizer_mutex.h"
-#include "sanitizer_atomic.h"
 #include "sanitizer_persistent_allocator.h"
 
 namespace __sanitizer {
@@ -34,6 +36,7 @@
 
   void LockAll();
   void UnlockAll();
+  void PrintAll();
 
  private:
   static Node *find(Node *s, args_type args, u32 hash);
@@ -172,6 +175,21 @@
   }
 }
 
+template <class Node, int kReservedBits, int kTabSizeLog>
+void StackDepotBase<Node, kReservedBits, kTabSizeLog>::PrintAll() {
+  for (int i = 0; i < kTabSize; ++i) {
+    atomic_uintptr_t *p = &tab[i];
+    lock(p);
+    uptr v = atomic_load(p, memory_order_relaxed);
+    Node *s = (Node *)(v & ~1UL);
+    for (; s; s = s->link) {
+      Printf("Stack for id %u:\n", s->id);
+      s->load().Print();
+    }
+    unlock(p, s);
+  }
+}
+
 } // namespace __sanitizer
 
 #endif // SANITIZER_STACKDEPOTBASE_H
diff --git a/compiler-rt/test/CMakeLists.txt b/compiler-rt/test/CMakeLists.txt
--- a/compiler-rt/test/CMakeLists.txt
+++ b/compiler-rt/test/CMakeLists.txt
@@ -68,6 +68,9 @@
   if(COMPILER_RT_BUILD_PROFILE AND COMPILER_RT_HAS_PROFILE)
     compiler_rt_test_runtime(profile)
   endif()
+  if(COMPILER_RT_BUILD_MEMPROF)
+    compiler_rt_test_runtime(memprof)
+  endif()
   if(COMPILER_RT_BUILD_XRAY)
     compiler_rt_test_runtime(xray)
   endif()
diff --git a/compiler-rt/test/lit.common.cfg.py b/compiler-rt/test/lit.common.cfg.py
--- a/compiler-rt/test/lit.common.cfg.py
+++ b/compiler-rt/test/lit.common.cfg.py
@@ -57,6 +57,8 @@
 # If needed, add cflag for shadow scale.
 if config.asan_shadow_scale != '':
   config.target_cflags += " -mllvm -asan-mapping-scale=" + config.asan_shadow_scale
+if config.memprof_shadow_scale != '':
+  config.target_cflags += " -mllvm -memprof-mapping-scale=" + config.memprof_shadow_scale
 
 # BFD linker in 64-bit android toolchains fails to find libc++_shared.so, which
 # is a transitive shared library dependency (via asan runtime).
@@ -542,6 +544,11 @@
 else:
   config.available_features.add("shadow-scale-3")
 
+if config.memprof_shadow_scale:
+  config.available_features.add("memprof-shadow-scale-%s" % config.memprof_shadow_scale)
+else:
+  config.available_features.add("memprof-shadow-scale-3")
+
 if config.expensive_checks:
   config.available_features.add("expensive_checks")
 
diff --git a/compiler-rt/test/lit.common.configured.in b/compiler-rt/test/lit.common.configured.in
--- a/compiler-rt/test/lit.common.configured.in
+++ b/compiler-rt/test/lit.common.configured.in
@@ -29,6 +29,7 @@
 set_default("compiler_rt_libdir", "@COMPILER_RT_RESOLVED_LIBRARY_OUTPUT_DIR@")
 set_default("emulator", "@COMPILER_RT_EMULATOR@")
 set_default("asan_shadow_scale", "@COMPILER_RT_ASAN_SHADOW_SCALE@")
+set_default("memprof_shadow_scale", "@COMPILER_RT_MEMPROF_SHADOW_SCALE@")
 set_default("apple_platform", "osx")
 set_default("apple_platform_min_deployment_target_flag", "-mmacosx-version-min")
 set_default("sanitizer_can_use_cxxabi", @SANITIZER_CAN_USE_CXXABI_PYBOOL@)
diff --git a/compiler-rt/test/memprof/CMakeLists.txt b/compiler-rt/test/memprof/CMakeLists.txt
new file mode 100644
--- /dev/null
+++ b/compiler-rt/test/memprof/CMakeLists.txt
@@ -0,0 +1,60 @@
+set(MEMPROF_LIT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
+
+set(MEMPROF_TESTSUITES)
+set(MEMPROF_DYNAMIC_TESTSUITES)
+
+macro(get_bits_for_arch arch bits)
+  if (${arch} MATCHES "x86_64")
+    set(${bits} 64)
+  else()
+    message(FATAL_ERROR "Unexpected target architecture: ${arch}")
+  endif()
+endmacro()
+
+set(MEMPROF_TEST_DEPS ${SANITIZER_COMMON_LIT_TEST_DEPS})
+if(NOT COMPILER_RT_STANDALONE_BUILD)
+  list(APPEND MEMPROF_TEST_DEPS memprof)
+  if(COMPILER_RT_HAS_LLD AND TARGET lld)
+    list(APPEND MEMPROF_TEST_DEPS lld)
+  endif()
+endif()
+set(MEMPROF_DYNAMIC_TEST_DEPS ${MEMPROF_TEST_DEPS})
+
+set(MEMPROF_TEST_ARCH ${MEMPROF_SUPPORTED_ARCH})
+
+foreach(arch ${MEMPROF_TEST_ARCH})
+  set(MEMPROF_TEST_TARGET_ARCH ${arch})
+  string(TOLOWER "-${arch}-${OS_NAME}" MEMPROF_TEST_CONFIG_SUFFIX)
+  get_bits_for_arch(${arch} MEMPROF_TEST_BITS)
+  get_test_cc_for_arch(${arch} MEMPROF_TEST_TARGET_CC MEMPROF_TEST_TARGET_CFLAGS)
+  set(MEMPROF_TEST_DYNAMIC False)
+  string(TOUPPER ${arch} ARCH_UPPER_CASE)
+  set(CONFIG_NAME ${ARCH_UPPER_CASE}${OS_NAME}Config)
+  configure_lit_site_cfg(
+    ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in
+    ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/lit.site.cfg.py
+    )
+  list(APPEND MEMPROF_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME})
+
+  string(TOLOWER "-${arch}-${OS_NAME}-dynamic" MEMPROF_TEST_CONFIG_SUFFIX)
+  set(MEMPROF_TEST_DYNAMIC True)
+  set(CONFIG_NAME ${ARCH_UPPER_CASE}${OS_NAME}DynamicConfig)
+  configure_lit_site_cfg(
+    ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in
+    ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/lit.site.cfg.py)
+  list(APPEND MEMPROF_DYNAMIC_TESTSUITES
+    ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME})
+endforeach()
+
+add_lit_testsuite(check-memprof "Running the MemProfiler tests"
+  ${MEMPROF_TESTSUITES}
+  DEPENDS ${MEMPROF_TEST_DEPS})
+set_target_properties(check-memprof PROPERTIES FOLDER "Compiler-RT Misc")
+
+add_lit_testsuite(check-memprof-dynamic
+	"Running the MemProfiler tests with dynamic runtime"
+  ${MEMPROF_DYNAMIC_TESTSUITES}
+  ${exclude_from_check_all.g}
+  DEPENDS ${MEMPROF_DYNAMIC_TEST_DEPS})
+set_target_properties(check-memprof-dynamic
+  PROPERTIES FOLDER "Compiler-RT Misc")
diff --git a/compiler-rt/test/memprof/TestCases/atexit_stats.cpp b/compiler-rt/test/memprof/TestCases/atexit_stats.cpp
new file mode 100644
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/atexit_stats.cpp
@@ -0,0 +1,20 @@
+// Check atexit option.
+
+// RUN: %clangxx_memprof -O0 %s -o %t
+// RUN: %env_memprof_opts=atexit=1 %run %t 2>&1 | FileCheck %s
+// RUN: %env_memprof_opts=atexit=0 %run %t 2>&1 | FileCheck %s --check-prefix=NOATEXIT
+
+// CHECK: MemProfiler exit stats:
+// CHECK: Stats: {{[0-9]+}}M malloced ({{[0-9]+}}M for overhead) by {{[0-9]+}} calls
+// CHECK: Stats: {{[0-9]+}}M realloced by {{[0-9]+}} calls
+// CHECK: Stats: {{[0-9]+}}M freed by {{[0-9]+}} calls
+// CHECK: Stats: {{[0-9]+}}M really freed by {{[0-9]+}} calls
+// CHECK: Stats: {{[0-9]+}}M ({{[0-9]+}}M-{{[0-9]+}}M) mmaped; {{[0-9]+}} maps, {{[0-9]+}} unmaps
+// CHECK:   mallocs by size class:
+// CHECK: Stats: malloc large: {{[0-9]+}}
+
+// NOATEXIT-NOT: MemProfiler exit stats
+
+int main() {
+  return 0;
+}
diff --git a/compiler-rt/test/memprof/TestCases/default_options.cpp b/compiler-rt/test/memprof/TestCases/default_options.cpp
new file mode 100644
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/default_options.cpp
@@ -0,0 +1,13 @@
+// RUN: %clangxx_memprof -O2 %s -o %t
+// RUN: %run %t 2>&1 | FileCheck %s
+
+const char *kMemProfDefaultOptions = "verbosity=1 help=1";
+
+extern "C" const char *__memprof_default_options() {
+  // CHECK: Available flags for MemProfiler:
+  return kMemProfDefaultOptions;
+}
+
+int main() {
+  return 0;
+}
diff --git a/compiler-rt/test/memprof/TestCases/dump_process_map.cpp b/compiler-rt/test/memprof/TestCases/dump_process_map.cpp
new file mode 100644
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/dump_process_map.cpp
@@ -0,0 +1,14 @@
+// Check dump_process_map option.
+
+// RUN: %clangxx_memprof -O0 %s -o %t
+// RUN: %env_memprof_opts=dump_process_map=1 %run %t 2>&1 | FileCheck %s
+// RUN: %env_memprof_opts=dump_process_map=0 %run %t 2>&1 | FileCheck %s --check-prefix=NOMAP
+
+// CHECK: Process memory map follows:
+// CHECK: dump_process_map.cpp.tmp
+// CHECK: End of process memory map.
+// NOMAP-NOT: memory map
+
+int main() {
+  return 0;
+}
diff --git a/compiler-rt/test/memprof/TestCases/free_hook_realloc.cpp b/compiler-rt/test/memprof/TestCases/free_hook_realloc.cpp
new file mode 100644
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/free_hook_realloc.cpp
@@ -0,0 +1,34 @@
+// Check that free hook doesn't conflict with Realloc.
+// RUN: %clangxx_memprof -O2 %s -o %t
+// RUN: %run %t 2>&1 | FileCheck %s
+
+#include <sanitizer/allocator_interface.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+static void *glob_ptr;
+
+extern "C" {
+void __sanitizer_free_hook(const volatile void *ptr) {
+  if (ptr == glob_ptr) {
+    *(int *)ptr = 0;
+    write(1, "FreeHook\n", sizeof("FreeHook\n"));
+  }
+}
+}
+
+int main() {
+  int *x = (int *)malloc(100);
+  x[0] = 42;
+  glob_ptr = x;
+  int *y = (int *)realloc(x, 200);
+  // Verify that free hook was called and didn't spoil the memory.
+  if (y[0] != 42) {
+    _exit(1);
+  }
+  write(1, "Passed\n", sizeof("Passed\n"));
+  free(y);
+  // CHECK: FreeHook
+  // CHECK: Passed
+  return 0;
+}
diff --git a/compiler-rt/test/memprof/TestCases/function-sections-are-bad.cpp b/compiler-rt/test/memprof/TestCases/function-sections-are-bad.cpp
new file mode 100644
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/function-sections-are-bad.cpp
@@ -0,0 +1,40 @@
+// Check that --gc-sections does not throw away (or localize) parts of sanitizer
+// interface.
+// RUN: %clang_memprof %s -Wl,--gc-sections -ldl -o %t
+// RUN: %clang_memprof %s -DBUILD_SO -fPIC -o %t-so.so -shared
+// RUN: %run %t 2>&1
+
+#ifndef BUILD_SO
+#include <assert.h>
+#include <dlfcn.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+int main(int argc, char *argv[]) {
+  char path[4096];
+  snprintf(path, sizeof(path), "%s-so.so", argv[0]);
+
+  void *handle = dlopen(path, RTLD_LAZY);
+  if (!handle)
+    fprintf(stderr, "%s\n", dlerror());
+  assert(handle != 0);
+
+  typedef void (*F)();
+  F f = (F)dlsym(handle, "call_rtl_from_dso");
+  printf("%s\n", dlerror());
+  assert(dlerror() == 0);
+  f();
+
+  dlclose(handle);
+  return 0;
+}
+
+#else // BUILD_SO
+
+#include <sanitizer/memprof_interface.h>
+extern "C" void call_rtl_from_dso() {
+  volatile int32_t x;
+  volatile int32_t y = __sanitizer_unaligned_load32((void *)&x);
+}
+
+#endif // BUILD_SO
diff --git a/compiler-rt/test/memprof/TestCases/interface_test.cpp b/compiler-rt/test/memprof/TestCases/interface_test.cpp
new file mode 100644
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/interface_test.cpp
@@ -0,0 +1,10 @@
+// Check that user may include MemProf interface header.
+// RUN: %clang_memprof %s -o %t && %run %t
+// RUN: %clang_memprof -x c %s -o %t && %run %t
+// RUN: %clang %s -pie -o %t && %run %t
+// RUN: %clang -x c %s -pie -o %t && %run %t
+#include <sanitizer/memprof_interface.h>
+
+int main() {
+  return 0;
+}
diff --git a/compiler-rt/test/memprof/TestCases/log_path_test.cpp b/compiler-rt/test/memprof/TestCases/log_path_test.cpp
new file mode 100644
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/log_path_test.cpp
@@ -0,0 +1,34 @@
+// The for loop in the backticks below requires bash.
+// REQUIRES: shell
+//
+// RUN: %clangxx_memprof  %s -o %t
+
+// Regular run.
+// RUN: %run %t 2> %t.out
+// RUN: FileCheck %s --check-prefix=CHECK-GOOD < %t.out
+
+// Good log_path.
+// RUN: rm -f %t.log.*
+// RUN: %env_memprof_opts=log_path=%t.log %run %t 2> %t.out
+// RUN: FileCheck %s --check-prefix=CHECK-GOOD < %t.log.*
+
+// Invalid log_path.
+// RUN: %env_memprof_opts=log_path=/dev/null/INVALID not %run %t 2> %t.out
+// RUN: FileCheck %s --check-prefix=CHECK-INVALID < %t.out
+
+// Too long log_path.
+// RUN: %env_memprof_opts=log_path=`for((i=0;i<10000;i++)); do echo -n $i; done` \
+// RUN:   not %run %t 2> %t.out
+// RUN: FileCheck %s --check-prefix=CHECK-LONG < %t.out
+
+#include <stdlib.h>
+#include <string.h>
+int main(int argc, char **argv) {
+  char *x = (char *)malloc(10);
+  memset(x, 0, 10);
+  free(x);
+  return 0;
+}
+// CHECK-GOOD: Memory allocation stack id
+// CHECK-INVALID: ERROR: Can't open file: /dev/null/INVALID
+// CHECK-LONG: ERROR: Path is too long: 01234
diff --git a/compiler-rt/test/memprof/TestCases/malloc-size-too-big.cpp b/compiler-rt/test/memprof/TestCases/malloc-size-too-big.cpp
new file mode 100644
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/malloc-size-too-big.cpp
@@ -0,0 +1,36 @@
+// RUN: %clangxx_memprof -O0 %s -o %t
+// RUN: %env_memprof_opts=allocator_may_return_null=0 not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-SUMMARY
+// RUN: %env_memprof_opts=allocator_may_return_null=1 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-NULL
+// Test print_summary
+// RUN: %env_memprof_opts=allocator_may_return_null=0:print_summary=0 not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOSUMMARY
+// Test print_cmdline
+// RUN: %env_memprof_opts=allocator_may_return_null=0:print_cmdline=1 not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-SUMMARY --check-prefix=CHECK-CMDLINE
+// Test print_stats
+// RUN: %env_memprof_opts=allocator_may_return_null=0:print_stats=1 not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-SUMMARY --check-prefix=CHECK-STATS
+
+#include <stdio.h>
+#include <stdlib.h>
+
+static const size_t kMaxAllowedMallocSizePlusOne = (1ULL << 40) + 1;
+int main() {
+  void *p = malloc(kMaxAllowedMallocSizePlusOne);
+  // CHECK: {{ERROR: MemProfiler: requested allocation size .* \(.* after adjustments for alignment, headers etc\.\) exceeds maximum supported size}}
+  // CHECK: {{#0 0x.* in .*malloc}}
+  // CHECK: {{#1 0x.* in main .*malloc-size-too-big.cpp:}}[[@LINE-3]]
+  // CHECK-SUMMARY: SUMMARY: MemProfiler: allocation-size-too-big
+  // CHECK-NOSUMMARY-NOT: SUMMARY:
+  // CHECK-CMDLINE: Command: {{.*}}malloc-size-too-big.cpp.tmp
+
+  // CHECK-STATS: Stats: {{[0-9]+}}M malloced ({{[0-9]+}}M for overhead) by {{[0-9]+}} calls
+  // CHECK-STATS: Stats: {{[0-9]+}}M realloced by {{[0-9]+}} calls
+  // CHECK-STATS: Stats: {{[0-9]+}}M freed by {{[0-9]+}} calls
+  // CHECK-STATS: Stats: {{[0-9]+}}M really freed by {{[0-9]+}} calls
+  // CHECK-STATS: Stats: {{[0-9]+}}M ({{[0-9]+}}M-{{[0-9]+}}M) mmaped; {{[0-9]+}} maps, {{[0-9]+}} unmaps
+  // CHECK-STATS:   mallocs by size class:
+  // CHECK-STATS: Stats: malloc large: {{[0-9]+}}
+
+  printf("malloc returned: %zu\n", (size_t)p);
+  // CHECK-NULL: malloc returned: 0
+
+  return 0;
+}
diff --git a/compiler-rt/test/memprof/TestCases/mem_info_cache_entries.cpp b/compiler-rt/test/memprof/TestCases/mem_info_cache_entries.cpp
new file mode 100644
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/mem_info_cache_entries.cpp
@@ -0,0 +1,11 @@
+// Check mem_info_cache_entries option.
+
+// RUN: %clangxx_memprof -O0 %s -o %t
+// RUN: %env_memprof_opts=mem_info_cache_entries=15:print_mem_info_cache_miss_rate=1:print_mem_info_cache_miss_rate_details=1 %run %t 2>&1 | FileCheck %s
+
+// CHECK: Set 14 miss rate: 0 / {{.*}} = 0.00%
+// CHECK-NOT: Set
+
+int main() {
+  return 0;
+}
diff --git a/compiler-rt/test/memprof/TestCases/memprof_options-help.cpp b/compiler-rt/test/memprof/TestCases/memprof_options-help.cpp
new file mode 100644
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/memprof_options-help.cpp
@@ -0,0 +1,9 @@
+// RUN: %clangxx_memprof -O0 %s -o %t
+// RUN: %env_memprof_opts=help=1 %run %t 2>&1 | FileCheck %s
+
+int main() {
+}
+
+// CHECK: Available flags for MemProfiler:
+// CHECK-DAG: replace_str
+// CHECK-DAG: print_stats
diff --git a/compiler-rt/test/memprof/TestCases/memprof_rt_conflict_test.cpp b/compiler-rt/test/memprof/TestCases/memprof_rt_conflict_test.cpp
new file mode 100644
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/memprof_rt_conflict_test.cpp
@@ -0,0 +1,13 @@
+// Test that preloading dynamic runtime to statically linked
+// executable is prohibited.
+//
+// RUN: %clangxx_memprof_static %s -o %t
+// RUN: env LD_PRELOAD=%shared_libmemprof not %run %t 2>&1 | FileCheck %s
+
+// REQUIRES: memprof-dynamic-runtime
+// XFAIL: android
+
+#include <stdlib.h>
+int main(int argc, char **argv) { return 0; }
+
+// CHECK: Your application is linked against incompatible MemProf runtimes
diff --git a/compiler-rt/test/memprof/TestCases/on_error_callback.cpp b/compiler-rt/test/memprof/TestCases/on_error_callback.cpp
new file mode 100644
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/on_error_callback.cpp
@@ -0,0 +1,17 @@
+// RUN: %clangxx_memprof -O2 %s -o %t && not %run %t 2>&1 | FileCheck %s
+
+#include <stdio.h>
+#include <stdlib.h>
+
+extern "C" void __memprof_on_error() {
+  fprintf(stderr, "__memprof_on_error called\n");
+  fflush(stderr);
+}
+
+static const size_t kMaxAllowedMallocSizePlusOne = (1ULL << 40) + 1;
+int main() {
+  void *p = malloc(kMaxAllowedMallocSizePlusOne);
+  // CHECK: __memprof_on_error called
+  printf("malloc returned: %zu\n", (size_t)p);
+  return 0;
+}
diff --git a/compiler-rt/test/memprof/TestCases/print_miss_rate.cpp b/compiler-rt/test/memprof/TestCases/print_miss_rate.cpp
new file mode 100644
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/print_miss_rate.cpp
@@ -0,0 +1,14 @@
+// Check print_mem_info_cache_miss_rate and
+// print_mem_info_cache_miss_rate_details options.
+
+// RUN: %clangxx_memprof -O0 %s -o %t
+// RUN: %env_memprof_opts=print_mem_info_cache_miss_rate=1 %run %t 2>&1 | FileCheck %s
+// RUN: %env_memprof_opts=print_mem_info_cache_miss_rate=1:print_mem_info_cache_miss_rate_details=1 %run %t 2>&1 | FileCheck %s --check-prefix=DETAILS
+
+// CHECK: Overall miss rate: 0 / {{.*}} = 0.00%
+// DETAILS: Set 0 miss rate: 0 / {{.*}} = 0.00%
+// DETAILS: Set 16380 miss rate: 0 / {{.*}} = 0.00%
+
+int main() {
+  return 0;
+}
diff --git a/compiler-rt/test/memprof/TestCases/realloc.cpp b/compiler-rt/test/memprof/TestCases/realloc.cpp
new file mode 100644
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/realloc.cpp
@@ -0,0 +1,21 @@
+// RUN: %clangxx_memprof -O0 %s -o %t
+// Default is true (free on realloc to 0 size)
+// RUN: %run %t 2>&1 | FileCheck %s
+// RUN: %env_memprof_opts=allocator_frees_and_returns_null_on_realloc_zero=true %run %t 2>&1 | FileCheck %s
+// RUN: %env_memprof_opts=allocator_frees_and_returns_null_on_realloc_zero=false %run %t 2>&1 | FileCheck %s --check-prefix=NO-FREE
+
+#include <stdio.h>
+#include <stdlib.h>
+
+int main() {
+  void *p = malloc(42);
+  p = realloc(p, 0);
+  if (p) {
+    // NO-FREE: Allocated something on realloc(p, 0)
+    fprintf(stderr, "Allocated something on realloc(p, 0)\n");
+  } else {
+    // CHECK: realloc(p, 0) returned nullptr
+    fprintf(stderr, "realloc(p, 0) returned nullptr\n");
+  }
+  free(p);
+}
diff --git a/compiler-rt/test/memprof/TestCases/sleep_after_init.c b/compiler-rt/test/memprof/TestCases/sleep_after_init.c
new file mode 100644
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/sleep_after_init.c
@@ -0,0 +1,10 @@
+// RUN: %clang_memprof -O2 %s -o %t
+// RUN: %env_memprof_opts=sleep_after_init=1 %run %t 2>&1 | FileCheck %s
+
+#include <stdlib.h>
+int main() {
+  // CHECK: Sleeping for 1 second
+  char *x = (char *)malloc(10 * sizeof(char));
+  free(x);
+  return 0;
+}
diff --git a/compiler-rt/test/memprof/TestCases/sleep_before_dying.c b/compiler-rt/test/memprof/TestCases/sleep_before_dying.c
new file mode 100644
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/sleep_before_dying.c
@@ -0,0 +1,11 @@
+// RUN: %clang_memprof -O2 %s -o %t
+// RUN: %env_memprof_opts=sleep_before_dying=1 not %run %t 2>&1 | FileCheck %s
+
+#include <stdio.h>
+#include <stdlib.h>
+static const size_t kMaxAllowedMallocSizePlusOne = (1ULL << 40) + 1;
+int main() {
+  void *p = malloc(kMaxAllowedMallocSizePlusOne);
+  // CHECK: Sleeping for 1 second
+  printf("malloc returned: %zu\n", (size_t)p);
+}
diff --git a/compiler-rt/test/memprof/TestCases/stress_dtls.c b/compiler-rt/test/memprof/TestCases/stress_dtls.c
new file mode 100644
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/stress_dtls.c
@@ -0,0 +1,118 @@
+// REQUIRES: memprof-64-bits
+// UNSUPPORTED: android
+// Stress test dynamic TLS + dlopen + threads.
+//
+// Note that glibc 2.15 seems utterly broken on this test,
+// it fails with ~17 DSOs dlopen-ed.
+// glibc 2.19 seems fine.
+//
+//
+// RUN: %clangxx_memprof -x c -DSO_NAME=f0 %s -shared -o %t-f0.so -fPIC
+// RUN: %clangxx_memprof -x c -DSO_NAME=f1 %s -shared -o %t-f1.so -fPIC
+// RUN: %clangxx_memprof -x c -DSO_NAME=f2 %s -shared -o %t-f2.so -fPIC
+// RUN: %clangxx_memprof %s -ldl -pthread -o %t
+// RUN: %run %t 0 3
+// RUN: %run %t 2 3
+// RUN: %env_memprof_opts=verbosity=2 %run %t 10 2 2>&1 | FileCheck %s
+// RUN: %env_memprof_opts=verbosity=2:intercept_tls_get_addr=1 %run %t 10 2 2>&1 | FileCheck %s
+// RUN: %env_memprof_opts=verbosity=2:intercept_tls_get_addr=0 %run %t 10 2 2>&1 | FileCheck %s --check-prefix=CHECK0
+// CHECK: __tls_get_addr
+// CHECK: Creating thread 0
+// CHECK: __tls_get_addr
+// CHECK: Creating thread 1
+// CHECK: __tls_get_addr
+// CHECK: Creating thread 2
+// CHECK: __tls_get_addr
+// CHECK: Creating thread 3
+// CHECK: __tls_get_addr
+// Make sure that TLS slots don't leak
+// CHECK-NOT: num_live_dtls 5
+//
+// CHECK0-NOT: __tls_get_addr
+/*
+cc=your-compiler
+
+$cc stress_dtls.c -pthread -ldl
+for((i=0;i<100;i++)); do
+  $cc -fPIC -shared -DSO_NAME=f$i -o a.out-f$i.so stress_dtls.c;
+done
+./a.out 2 4  # <<<<<< 2 threads, 4 libs
+./a.out 3 50 # <<<<<< 3 threads, 50 libs
+*/
+#ifndef SO_NAME
+#define _GNU_SOURCE
+#include <assert.h>
+#include <dlfcn.h>
+#include <pthread.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+typedef void **(*f_t)();
+
+__thread int my_tls;
+
+#define MAX_N_FUNCTIONS 1000
+f_t Functions[MAX_N_FUNCTIONS];
+
+void *PrintStuff(void *unused) {
+  uintptr_t stack;
+  // fprintf(stderr, "STACK: %p TLS: %p SELF: %p\n", &stack, &my_tls,
+  //        (void *)pthread_self());
+  int i;
+  for (i = 0; i < MAX_N_FUNCTIONS; i++) {
+    if (!Functions[i])
+      break;
+    uintptr_t dtls = (uintptr_t)Functions[i]();
+    fprintf(stderr, "  dtls[%03d]: %lx\n", i, dtls);
+    *(long *)dtls = 42; // check that this is writable.
+  }
+  return NULL;
+}
+
+int main(int argc, char *argv[]) {
+  int num_threads = 1;
+  int num_libs = 1;
+  if (argc >= 2)
+    num_threads = atoi(argv[1]);
+  if (argc >= 3)
+    num_libs = atoi(argv[2]);
+  assert(num_libs <= MAX_N_FUNCTIONS);
+
+  int lib;
+  for (lib = 0; lib < num_libs; lib++) {
+    char buf[4096];
+    snprintf(buf, sizeof(buf), "%s-f%d.so", argv[0], lib);
+    void *handle = dlopen(buf, RTLD_LAZY);
+    if (!handle) {
+      fprintf(stderr, "%s\n", dlerror());
+      exit(1);
+    }
+    snprintf(buf, sizeof(buf), "f%d", lib);
+    Functions[lib] = (f_t)dlsym(handle, buf);
+    if (!Functions[lib]) {
+      fprintf(stderr, "%s\n", dlerror());
+      exit(1);
+    }
+    fprintf(stderr, "LIB[%03d] %s: %p\n", lib, buf, Functions[lib]);
+    PrintStuff(0);
+
+    int i;
+    for (i = 0; i < num_threads; i++) {
+      pthread_t t;
+      fprintf(stderr, "Creating thread %d\n", i);
+      pthread_create(&t, 0, PrintStuff, 0);
+      pthread_join(t, 0);
+    }
+  }
+  return 0;
+}
+#else // SO_NAME
+#ifndef DTLS_SIZE
+#define DTLS_SIZE (1 << 17)
+#endif
+__thread void *huge_thread_local_array[DTLS_SIZE];
+void **SO_NAME() {
+  return &huge_thread_local_array[0];
+}
+#endif
diff --git a/compiler-rt/test/memprof/TestCases/test_malloc_load_store.c b/compiler-rt/test/memprof/TestCases/test_malloc_load_store.c
new file mode 100644
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/test_malloc_load_store.c
@@ -0,0 +1,38 @@
+// Check profile with a single malloc call and set of loads and stores. Ensures
+// we get the same profile regardless of whether the memory is deallocated
+// before exit.
+
+// RUN: %clangxx_memprof -O0 %s -o %t
+// RUN: %env_memprof_opts= %run %t 2>&1 | FileCheck %s
+
+// RUN: %clangxx_memprof -DFREE -O0 %s -o %t
+// RUN: %env_memprof_opts= %run %t 2>&1 | FileCheck %s
+
+// This is actually:
+//  Memory allocation stack id = STACKID
+//   alloc_count 1, size (ave/min/max) 40.00 / 40 / 40
+// but we need to look for them in the same CHECK to get the correct STACKID.
+// CHECK:  Memory allocation stack id = [[STACKID:[0-9]+]]{{[[:space:]].*}}alloc_count 1, size (ave/min/max) 40.00 / 40 / 40
+// CHECK-NEXT:  access_count (ave/min/max): 20.00 / 20 / 20
+// CHECK-NEXT:  lifetime (ave/min/max): [[AVELIFETIME:[0-9]+]].00 / [[AVELIFETIME]] / [[AVELIFETIME]]
+// CHECK-NEXT:  num migrated: 0, num lifetime overlaps: 0, num same alloc cpu: 0, num same dealloc_cpu: 0
+// CHECK: Stack for id [[STACKID]]:
+// CHECK-NEXT: #0 {{.*}} in malloc
+// CHECK-NEXT: #1 {{.*}} in main {{.*}}:[[@LINE+6]]
+
+#include <stdio.h>
+#include <stdlib.h>
+
+int main() {
+  int *p = (int *)malloc(10 * sizeof(int));
+  for (int i = 0; i < 10; i++)
+    p[i] = i;
+  int j = 0;
+  for (int i = 0; i < 10; i++)
+    j += p[i];
+#ifdef FREE
+  free(p);
+#endif
+
+  return 0;
+}
diff --git a/compiler-rt/test/memprof/TestCases/test_memintrin.cpp b/compiler-rt/test/memprof/TestCases/test_memintrin.cpp
new file mode 100644
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/test_memintrin.cpp
@@ -0,0 +1,49 @@
+// Check profile with calls to memory intrinsics.
+
+// RUN: %clangxx_memprof -O0 %s -o %t
+// RUN: %run %t 2>&1 | FileCheck %s
+
+// This is actually:
+//  Memory allocation stack id = STACKIDP
+//   alloc_count 1, size (ave/min/max) 40.00 / 40 / 40
+//   access_count (ave/min/max): 3.00 / 3 / 3
+// but we need to look for them in the same CHECK to get the correct STACKIDP.
+// CHECK-DAG:  Memory allocation stack id = [[STACKIDP:[0-9]+]]{{[[:space:]].*}} alloc_count 1, size (ave/min/max) 40.00 / 40 / 40{{[[:space:]].*}} access_count (ave/min/max): 3.00 / 3 / 3
+//
+// This is actually:
+//  Memory allocation stack id = STACKIDQ
+//   alloc_count 1, size (ave/min/max) 20.00 / 20 / 20
+//   access_count (ave/min/max): 2.00 / 2 / 2
+// but we need to look for them in the same CHECK to get the correct STACKIDQ.
+// CHECK-DAG:  Memory allocation stack id = [[STACKIDQ:[0-9]+]]{{[[:space:]].*}} alloc_count 1, size (ave/min/max) 20.00 / 20 / 20{{[[:space:]].*}} access_count (ave/min/max): 2.00 / 2 / 2
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+int main() {
+  // This is actually:
+  //  Stack for id STACKIDP:
+  //    #0 {{.*}} in operator new
+  //    #1 {{.*}} in main {{.*}}:@LINE+1
+  //  but we need to look for them in the same CHECK-DAG.
+  // CHECK-DAG: Stack for id [[STACKIDP]]:{{[[:space:]].*}} #0 {{.*}} in operator new{{.*[[:space:]].*}} #1 {{.*}} in main {{.*}}:[[@LINE+1]]
+  int *p = new int[10];
+
+  // This is actually:
+  //  Stack for id STACKIDQ:
+  //    #0 {{.*}} in operator new
+  //    #1 {{.*}} in main {{.*}}:@LINE+1
+  //  but we need to look for them in the same CHECK-DAG.
+  // CHECK-DAG: Stack for id [[STACKIDQ]]:{{[[:space:]].*}} #0 {{.*}} in operator new{{.*[[:space:]].*}} #1 {{.*}} in main {{.*}}:[[@LINE+1]]
+  int *q = new int[5];
+
+  memset(p, 1, 10);
+  memcpy(q, p, 5);
+  int x = memcmp(p, q, 5);
+
+  delete p;
+  delete q;
+
+  return x;
+}
diff --git a/compiler-rt/test/memprof/TestCases/test_new_load_store.cpp b/compiler-rt/test/memprof/TestCases/test_new_load_store.cpp
new file mode 100644
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/test_new_load_store.cpp
@@ -0,0 +1,42 @@
+// Check profile with a single new call and set of loads and stores. Ensures
+// we get the same profile regardless of whether the memory is deallocated
+// before exit.
+
+// RUN: %clangxx_memprof -O0 %s -o %t
+// RUN: %env_memprof_opts= %run %t 2>&1 | FileCheck %s
+
+// RUN: %clangxx_memprof -DFREE -O0 %s -o %t
+// RUN: %env_memprof_opts= %run %t 2>&1 | FileCheck %s
+
+// Try again with callbacks instead of inline sequences
+// RUN: %clangxx_memprof -mllvm -memprof-use-callbacks -O0 %s -o %t
+// RUN: %env_memprof_opts= %run %t 2>&1 | FileCheck %s
+
+// This is actually:
+//  Memory allocation stack id = STACKID
+//   alloc_count 1, size (ave/min/max) 40.00 / 40 / 40
+// but we need to look for them in the same CHECK to get the correct STACKID.
+// CHECK:  Memory allocation stack id = [[STACKID:[0-9]+]]{{[[:space:]].*}}alloc_count 1, size (ave/min/max) 40.00 / 40 / 40
+// CHECK-NEXT:  access_count (ave/min/max): 20.00 / 20 / 20
+// CHECK-NEXT:  lifetime (ave/min/max): [[AVELIFETIME:[0-9]+]].00 / [[AVELIFETIME]] / [[AVELIFETIME]]
+// CHECK-NEXT:  num migrated: 0, num lifetime overlaps: 0, num same alloc cpu: 0, num same dealloc_cpu: 0
+// CHECK: Stack for id [[STACKID]]:
+// CHECK-NEXT: #0 {{.*}} in operator new
+// CHECK-NEXT: #1 {{.*}} in main {{.*}}:[[@LINE+6]]
+
+#include <stdio.h>
+#include <stdlib.h>
+
+int main() {
+  int *p = new int[10];
+  for (int i = 0; i < 10; i++)
+    p[i] = i;
+  int j = 0;
+  for (int i = 0; i < 10; i++)
+    j += p[i];
+#ifdef FREE
+  delete p;
+#endif
+
+  return 0;
+}
diff --git a/compiler-rt/test/memprof/TestCases/test_terse.cpp b/compiler-rt/test/memprof/TestCases/test_terse.cpp
new file mode 100644
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/test_terse.cpp
@@ -0,0 +1,31 @@
+// Check terse format profile with a single malloc call and set of loads and
+// stores. Ensures we get the same profile regardless of whether the memory is
+// deallocated before exit.
+
+// RUN: %clangxx_memprof -O0 %s -o %t
+// RUN: %env_memprof_opts=print_terse=1 %run %t 2>&1 | FileCheck %s
+
+// RUN: %clangxx_memprof -DFREE -O0 %s -o %t
+// RUN: %env_memprof_opts=print_terse=1 %run %t 2>&1 | FileCheck %s
+
+// CHECK: MIB:[[STACKID:[0-9]+]]/1/40.00/40/40/20.00/20/20/[[AVELIFETIME:[0-9]+]].00/[[AVELIFETIME]]/[[AVELIFETIME]]/0/0/0/0
+// CHECK: Stack for id [[STACKID]]:
+// CHECK-NEXT: #0 {{.*}} in operator new
+// CHECK-NEXT: #1 {{.*}} in main {{.*}}:[[@LINE+6]]
+
+#include <stdio.h>
+#include <stdlib.h>
+
+int main() {
+  int *p = new int[10];
+  for (int i = 0; i < 10; i++)
+    p[i] = i;
+  int j = 0;
+  for (int i = 0; i < 10; i++)
+    j += p[i];
+#ifdef FREE
+  delete p;
+#endif
+
+  return 0;
+}
diff --git a/compiler-rt/test/memprof/TestCases/unaligned_loads_and_stores.cpp b/compiler-rt/test/memprof/TestCases/unaligned_loads_and_stores.cpp
new file mode 100644
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/unaligned_loads_and_stores.cpp
@@ -0,0 +1,31 @@
+// RUN: %clangxx_memprof -O0 %s -o %t
+// RUN: %run %t 2>&1 | FileCheck %s
+
+// This is actually:
+//  Memory allocation stack id = STACKID
+//    alloc_count 1, size (ave/min/max) 128.00 / 128 / 128
+// but we need to look for them in the same CHECK to get the correct STACKID.
+// CHECK:      Memory allocation stack id = [[STACKID:[0-9]+]]{{[[:space:]].*}}alloc_count 1, size (ave/min/max) 128.00 / 128 / 128
+// CHECK-NEXT:   access_count (ave/min/max): 7.00 / 7 / 7
+
+#include <sanitizer/memprof_interface.h>
+
+#include <stdlib.h>
+#include <string.h>
+int main(int argc, char **argv) {
+  // CHECK:      Stack for id [[STACKID]]:
+  // CHECK-NEXT:     #0 {{.*}} in operator new[](unsigned long)
+  // CHECK-NEXT:     #1 {{.*}} in main {{.*}}:[[@LINE+1]]
+  char *x = new char[128];
+  memset(x, 0xab, 128);
+  __sanitizer_unaligned_load16(x + 15);
+  __sanitizer_unaligned_load32(x + 15);
+  __sanitizer_unaligned_load64(x + 15);
+
+  __sanitizer_unaligned_store16(x + 15, 0);
+  __sanitizer_unaligned_store32(x + 15, 0);
+  __sanitizer_unaligned_store64(x + 15, 0);
+
+  delete[] x;
+  return 0;
+}
diff --git a/compiler-rt/test/memprof/lit.cfg.py b/compiler-rt/test/memprof/lit.cfg.py
new file mode 100644
--- /dev/null
+++ b/compiler-rt/test/memprof/lit.cfg.py
@@ -0,0 +1,103 @@
+# -*- Python -*-
+
+import os
+import platform
+import re
+
+import lit.formats
+
+# Get shlex.quote if available (added in 3.3), and fall back to pipes.quote if
+# it's not available.
+try:
+  import shlex
+  sh_quote = shlex.quote
+except:
+  import pipes
+  sh_quote = pipes.quote
+
+def get_required_attr(config, attr_name):
+  attr_value = getattr(config, attr_name, None)
+  if attr_value == None:
+    lit_config.fatal(
+      "No attribute %r in test configuration! You may need to run "
+      "tests from your build directory or add this attribute "
+      "to lit.site.cfg.py " % attr_name)
+  return attr_value
+
+# Setup config name.
+config.name = 'MemProfiler' + config.name_suffix
+
+# Platform-specific default MEMPROF_OPTIONS for lit tests.
+default_memprof_opts = list(config.default_sanitizer_opts)
+
+default_memprof_opts_str = ':'.join(default_memprof_opts)
+if default_memprof_opts_str:
+  config.environment['MEMPROF_OPTIONS'] = default_memprof_opts_str
+config.substitutions.append(('%env_memprof_opts=',
+                             'env MEMPROF_OPTIONS=' + default_memprof_opts_str))
+
+# Setup source root.
+config.test_source_root = os.path.dirname(__file__)
+
+libdl_flag = '-ldl'
+
+# Setup default compiler flags used with -fmemory-profile option.
+# FIXME: Review the set of required flags and check if it can be reduced.
+target_cflags = [get_required_attr(config, 'target_cflags')]
+target_cxxflags = config.cxx_mode_flags + target_cflags
+clang_memprof_static_cflags = (['-fmemory-profile',
+                            '-mno-omit-leaf-frame-pointer',
+                            '-fno-omit-frame-pointer',
+                            '-fno-optimize-sibling-calls'] +
+                            config.debug_info_flags + target_cflags)
+clang_memprof_static_cxxflags = config.cxx_mode_flags + clang_memprof_static_cflags
+
+memprof_dynamic_flags = []
+if config.memprof_dynamic:
+  memprof_dynamic_flags = ['-shared-libsan']
+  config.available_features.add('memprof-dynamic-runtime')
+else:
+  config.available_features.add('memprof-static-runtime')
+clang_memprof_cflags = clang_memprof_static_cflags + memprof_dynamic_flags
+clang_memprof_cxxflags = clang_memprof_static_cxxflags + memprof_dynamic_flags
+
+def build_invocation(compile_flags):
+  return ' ' + ' '.join([config.clang] + compile_flags) + ' '
+
+config.substitutions.append( ("%clang ", build_invocation(target_cflags)) )
+config.substitutions.append( ("%clangxx ", build_invocation(target_cxxflags)) )
+config.substitutions.append( ("%clang_memprof ", build_invocation(clang_memprof_cflags)) )
+config.substitutions.append( ("%clangxx_memprof ", build_invocation(clang_memprof_cxxflags)) )
+if config.memprof_dynamic:
+  shared_libmemprof_path = os.path.join(config.compiler_rt_libdir, 'libclang_rt.memprof{}.so'.format(config.target_suffix))
+  config.substitutions.append( ("%shared_libmemprof", shared_libmemprof_path) )
+  config.substitutions.append( ("%clang_memprof_static ", build_invocation(clang_memprof_static_cflags)) )
+  config.substitutions.append( ("%clangxx_memprof_static ", build_invocation(clang_memprof_static_cxxflags)) )
+
+# Some tests uses C++11 features such as lambdas and need to pass -std=c++11.
+config.substitutions.append(("%stdcxx11 ", '-std=c++11 '))
+
+config.substitutions.append( ("%libdl", libdl_flag) )
+
+config.available_features.add('memprof-' + config.bits + '-bits')
+
+config.available_features.add('fast-unwinder-works')
+
+# Set LD_LIBRARY_PATH to pick dynamic runtime up properly.
+new_ld_library_path = os.path.pathsep.join(
+  (config.compiler_rt_libdir, config.environment.get('LD_LIBRARY_PATH', '')))
+config.environment['LD_LIBRARY_PATH'] = new_ld_library_path
+
+# Default test suffixes.
+config.suffixes = ['.c', '.cpp']
+
+config.substitutions.append(('%fPIC', '-fPIC'))
+config.substitutions.append(('%fPIE', '-fPIE'))
+config.substitutions.append(('%pie', '-pie'))
+
+# Only run the tests on supported OSs.
+if config.host_os not in ['Linux']:
+  config.unsupported = True
+
+if not config.parallelism_group:
+  config.parallelism_group = 'shadow-memory'
diff --git a/compiler-rt/test/memprof/lit.site.cfg.py.in b/compiler-rt/test/memprof/lit.site.cfg.py.in
new file mode 100644
--- /dev/null
+++ b/compiler-rt/test/memprof/lit.site.cfg.py.in
@@ -0,0 +1,15 @@
+@LIT_SITE_CFG_IN_HEADER@
+
+# Tool-specific config options.
+config.name_suffix = "@MEMPROF_TEST_CONFIG_SUFFIX@"
+config.target_cflags = "@MEMPROF_TEST_TARGET_CFLAGS@"
+config.clang = "@MEMPROF_TEST_TARGET_CC@"
+config.bits = "@MEMPROF_TEST_BITS@"
+config.memprof_dynamic = @MEMPROF_TEST_DYNAMIC@
+config.target_arch = "@MEMPROF_TEST_TARGET_ARCH@"
+
+# Load common config for all compiler-rt lit tests.
+lit_config.load_config(config, "@COMPILER_RT_BINARY_DIR@/test/lit.common.configured")
+
+# Load tool-specific config that would do the real work.
+lit_config.load_config(config, "@MEMPROF_LIT_SOURCE_DIR@/lit.cfg.py")