diff --git a/compiler-rt/lib/scudo/standalone/allocator_config.h b/compiler-rt/lib/scudo/standalone/allocator_config.h
--- a/compiler-rt/lib/scudo/standalone/allocator_config.h
+++ b/compiler-rt/lib/scudo/standalone/allocator_config.h
@@ -40,7 +40,7 @@
   using SizeClassMap = AndroidSizeClassMap;
 #if SCUDO_CAN_USE_PRIMARY64
   // 1GB regions
-  typedef SizeClassAllocator64<SizeClassMap, 30U> Primary;
+  typedef SizeClassAllocator64<SizeClassMap, 30U, true> Primary;
 #else
   // 512KB regions
   typedef SizeClassAllocator32<SizeClassMap, 19U> Primary;
diff --git a/compiler-rt/lib/scudo/standalone/combined.h b/compiler-rt/lib/scudo/standalone/combined.h
--- a/compiler-rt/lib/scudo/standalone/combined.h
+++ b/compiler-rt/lib/scudo/standalone/combined.h
@@ -15,6 +15,7 @@
 #include "flags_parser.h"
 #include "interface.h"
 #include "local_cache.h"
+#include "memtag.h"
 #include "quarantine.h"
 #include "report.h"
 #include "secondary.h"
@@ -160,6 +161,13 @@
     TSD->Cache.destroy(&Stats);
   }
 
+  void *untagPointerMaybe(void *Ptr) {
+    if (Primary.SupportsMemoryTagging)
+      return reinterpret_cast<void *>(
+          untagPointer(reinterpret_cast<uptr>(Ptr)));
+    return Ptr;
+  }
+
   NOINLINE void *allocate(uptr Size, Chunk::Origin Origin,
                           uptr Alignment = MinAlignment,
                           bool ZeroContents = false) {
@@ -194,7 +202,7 @@
 
     void *Block;
     uptr ClassId;
-    uptr BlockEnd;
+    uptr SecondaryBlockEnd;
     if (LIKELY(PrimaryT::canAllocate(NeededSize))) {
       ClassId = SizeClassMap::getClassIdBySize(NeededSize);
       DCHECK_NE(ClassId, 0U);
@@ -205,8 +213,8 @@
         TSD->unlock();
     } else {
       ClassId = 0;
-      Block =
-          Secondary.allocate(NeededSize, Alignment, &BlockEnd, ZeroContents);
+      Block = Secondary.allocate(NeededSize, Alignment, &SecondaryBlockEnd,
+                                 ZeroContents);
     }
 
     if (UNLIKELY(!Block)) {
@@ -215,16 +223,81 @@
       reportOutOfMemory(NeededSize);
     }
 
-    // We only need to zero the contents for Primary backed allocations. This
-    // condition is not necessarily unlikely, but since memset is costly, we
-    // might as well mark it as such.
-    if (UNLIKELY(ZeroContents && ClassId))
-      memset(Block, 0, PrimaryT::getSizeByClassId(ClassId));
-
-    const uptr UnalignedUserPtr =
-        reinterpret_cast<uptr>(Block) + Chunk::getHeaderSize();
+    const uptr BlockUptr = reinterpret_cast<uptr>(Block);
+    const uptr UnalignedUserPtr = BlockUptr + Chunk::getHeaderSize();
     const uptr UserPtr = roundUpTo(UnalignedUserPtr, Alignment);
 
+    void *Ptr = reinterpret_cast<void *>(UserPtr);
+    void *TaggedPtr = Ptr;
+    if (ClassId) {
+      // We only need to zero or tag the contents for Primary backed
+      // allocations. We only set tags for primary allocations in order to avoid
+      // faulting potentially large numbers of pages for large secondary
+      // allocations. We assume that guard pages are enough to protect these
+      // allocations.
+      //
+      // FIXME: When the kernel provides a way to set the background tag of a
+      // mapping, we should be able to tag secondary allocations as well.
+      //
+      // When memory tagging is enabled, zeroing the contents is done as part of
+      // setting the tag.
+      if (UNLIKELY(useMemoryTagging())) {
+        uptr PrevUserPtr;
+        Chunk::UnpackedHeader Header;
+        const uptr BlockEnd = BlockUptr + PrimaryT::getSizeByClassId(ClassId);
+        // If possible, try to reuse the UAF tag that was set by deallocate().
+        // For simplicity, only reuse tags if we have the same start address as
+        // the previous allocation. This handles the majority of cases since
+        // most allocations will not be more aligned than the minimum alignment.
+        //
+        // We need to handle situations involving reclaimed chunks, and retag
+        // the reclaimed portions if necessary. In the case where the chunk is
+        // fully reclaimed, the chunk's header will be zero, which will trigger
+        // the code path for new mappings and invalid chunks that prepares the
+        // chunk from scratch. There are three possibilities for partial
+        // reclaiming:
+        //
+        // (1) Header was reclaimed, data was partially reclaimed.
+        // (2) Header was not reclaimed, all data was reclaimed (e.g. because
+        //     data started on a page boundary).
+        // (3) Header was not reclaimed, data was partially reclaimed.
+        //
+        // Case (1) will be handled in the same way as for full reclaiming,
+        // since the header will be zero.
+        //
+        // We can detect case (2) by loading the tag from the start
+        // of the chunk. If it is zero, it means that either all data was
+        // reclaimed (since we never use zero as the chunk tag), or that the
+        // previous allocation was of size zero. Either way, we need to prepare
+        // a new chunk from scratch.
+        //
+        // We can detect case (3) by moving to the next page (if covered by the
+        // chunk) and loading the tag of its first granule. If it is zero, it
+        // means that all following pages may need to be retagged. On the other
+        // hand, if it is nonzero, we can assume that all following pages are
+        // still tagged, according to the logic that if any of the pages
+        // following the next page were reclaimed, the next page would have been
+        // reclaimed as well.
+        uptr TaggedUserPtr;
+        if (getChunkFromBlock(BlockUptr, &PrevUserPtr, &Header) &&
+            PrevUserPtr == UserPtr &&
+            (TaggedUserPtr = loadTag(UserPtr)) != UserPtr) {
+          uptr PrevEnd = TaggedUserPtr + Header.SizeOrUnusedBytes;
+          const uptr NextPage = roundUpTo(TaggedUserPtr, getPageSizeCached());
+          if (NextPage < PrevEnd && loadTag(NextPage) != NextPage)
+            PrevEnd = NextPage;
+          TaggedPtr = reinterpret_cast<void *>(TaggedUserPtr);
+          resizeTaggedChunk(PrevEnd, TaggedUserPtr + Size, BlockEnd);
+        } else {
+          TaggedPtr = prepareTaggedChunk(Ptr, Size, BlockEnd);
+        }
+      } else if (UNLIKELY(ZeroContents)) {
+        // This condition is not necessarily unlikely, but since memset is
+        // costly, we might as well mark it as such.
+        memset(Block, 0, PrimaryT::getSizeByClassId(ClassId));
+      }
+    }
+
     Chunk::UnpackedHeader Header = {};
     if (UNLIKELY(UnalignedUserPtr != UserPtr)) {
       const uptr Offset = UserPtr - UnalignedUserPtr;
@@ -240,15 +313,15 @@
     Header.ClassId = ClassId & Chunk::ClassIdMask;
     Header.State = Chunk::State::Allocated;
     Header.Origin = Origin & Chunk::OriginMask;
-    Header.SizeOrUnusedBytes = (ClassId ? Size : BlockEnd - (UserPtr + Size)) &
-                               Chunk::SizeOrUnusedBytesMask;
-    void *Ptr = reinterpret_cast<void *>(UserPtr);
+    Header.SizeOrUnusedBytes =
+        (ClassId ? Size : SecondaryBlockEnd - (UserPtr + Size)) &
+        Chunk::SizeOrUnusedBytesMask;
     Chunk::storeHeader(Cookie, Ptr, &Header);
 
     if (&__scudo_allocate_hook)
-      __scudo_allocate_hook(Ptr, Size);
+      __scudo_allocate_hook(TaggedPtr, Size);
 
-    return Ptr;
+    return TaggedPtr;
   }
 
   NOINLINE void deallocate(void *Ptr, Chunk::Origin Origin, uptr DeleteSize = 0,
@@ -269,6 +342,8 @@
     if (UNLIKELY(!isAligned(reinterpret_cast<uptr>(Ptr), MinAlignment)))
       reportMisalignedPointer(AllocatorAction::Deallocating, Ptr);
 
+    Ptr = untagPointerMaybe(Ptr);
+
     Chunk::UnpackedHeader Header;
     Chunk::loadHeader(Cookie, Ptr, &Header);
 
@@ -296,6 +371,9 @@
   void *reallocate(void *OldPtr, uptr NewSize, uptr Alignment = MinAlignment) {
     initThreadMaybe();
 
+    void *OldTaggedPtr = OldPtr;
+    OldPtr = untagPointerMaybe(OldPtr);
+
     // The following cases are handled by the C wrappers.
     DCHECK_NE(OldPtr, nullptr);
     DCHECK_NE(NewSize, 0);
@@ -344,7 +422,11 @@
                      : BlockEnd - (reinterpret_cast<uptr>(OldPtr) + NewSize)) &
             Chunk::SizeOrUnusedBytesMask;
         Chunk::compareExchangeHeader(Cookie, OldPtr, &NewHeader, &OldHeader);
-        return OldPtr;
+        if (UNLIKELY(ClassId && useMemoryTagging()))
+          resizeTaggedChunk(reinterpret_cast<uptr>(OldTaggedPtr) + OldSize,
+                            reinterpret_cast<uptr>(OldTaggedPtr) + NewSize,
+                            BlockEnd);
+        return OldTaggedPtr;
       }
     }
 
@@ -355,7 +437,7 @@
     void *NewPtr = allocate(NewSize, Chunk::Origin::Malloc, Alignment);
     if (NewPtr) {
       const uptr OldSize = getSize(OldPtr, &OldHeader);
-      memcpy(NewPtr, OldPtr, Min(NewSize, OldSize));
+      memcpy(NewPtr, OldTaggedPtr, Min(NewSize, OldSize));
       quarantineOrDeallocateChunk(OldPtr, &OldHeader, OldSize);
     }
     return NewPtr;
@@ -422,8 +504,13 @@
       uptr Chunk;
       Chunk::UnpackedHeader Header;
       if (getChunkFromBlock(Block, &Chunk, &Header) &&
-          Header.State == Chunk::State::Allocated)
-        Callback(Chunk, getSize(reinterpret_cast<void *>(Chunk), &Header), Arg);
+          Header.State == Chunk::State::Allocated) {
+        uptr TaggedChunk = Chunk;
+        if (Primary.SupportsMemoryTagging)
+          TaggedChunk = loadTag(Chunk);
+        Callback(TaggedChunk, getSize(reinterpret_cast<void *>(Chunk), &Header),
+                 Arg);
+      }
     };
     Primary.iterateOverBlocks(Lambda);
     Secondary.iterateOverBlocks(Lambda);
@@ -446,6 +533,7 @@
     initThreadMaybe();
     if (UNLIKELY(!Ptr))
       return 0;
+    Ptr = untagPointerMaybe(const_cast<void *>(Ptr));
     Chunk::UnpackedHeader Header;
     Chunk::loadHeader(Cookie, Ptr, &Header);
     // Getting the usable size of a chunk only makes sense if it's allocated.
@@ -466,11 +554,22 @@
     initThreadMaybe();
     if (!Ptr || !isAligned(reinterpret_cast<uptr>(Ptr), MinAlignment))
       return false;
+    Ptr = untagPointerMaybe(const_cast<void *>(Ptr));
     Chunk::UnpackedHeader Header;
     return Chunk::isValid(Cookie, Ptr, &Header) &&
            Header.State == Chunk::State::Allocated;
   }
 
+  bool useMemoryTagging() {
+    return Primary.useMemoryTagging();
+  }
+
+  void disableMemoryTagging() {
+    if (useMemoryTagging())
+      disableMemoryTagChecks();
+    Primary.disableMemoryTagging();
+  }
+
 private:
   using SecondaryT = typename Params::Secondary;
   typedef typename PrimaryT::SizeClassMap SizeClassMap;
@@ -484,6 +583,8 @@
 
   static_assert(MinAlignment >= sizeof(Chunk::PackedHeader),
                 "Minimal alignment must at least cover a chunk header.");
+  static_assert(!PrimaryT::SupportsMemoryTagging ||
+                MinAlignment >= archMemoryTagGranuleSize(), "");
 
   static const u32 BlockMarker = 0x44554353U;
 
@@ -561,6 +662,10 @@
   void quarantineOrDeallocateChunk(void *Ptr, Chunk::UnpackedHeader *Header,
                                    uptr Size) {
     Chunk::UnpackedHeader NewHeader = *Header;
+    if (UNLIKELY(NewHeader.ClassId && useMemoryTagging())) {
+      uptr TaggedBegin, TaggedEnd;
+      setRandomTag(Ptr, Size, &TaggedBegin, &TaggedEnd);
+    }
     // If the quarantine is disabled, the actual size of a chunk is 0 or larger
     // than the maximum allowed, we return a chunk directly to the backend.
     // Logical Or can be short-circuited, which introduces unnecessary
diff --git a/compiler-rt/lib/scudo/standalone/common.h b/compiler-rt/lib/scudo/standalone/common.h
--- a/compiler-rt/lib/scudo/standalone/common.h
+++ b/compiler-rt/lib/scudo/standalone/common.h
@@ -142,6 +142,7 @@
 #define MAP_ALLOWNOMEM (1U << 0)
 #define MAP_NOACCESS (1U << 1)
 #define MAP_RESIZABLE (1U << 2)
+#define MAP_MEMTAG (1U << 3)
 
 // Our platform memory mapping use is restricted to 3 scenarios:
 // - reserve memory at a random address (MAP_NOACCESS);
diff --git a/compiler-rt/lib/scudo/standalone/linux.cpp b/compiler-rt/lib/scudo/standalone/linux.cpp
--- a/compiler-rt/lib/scudo/standalone/linux.cpp
+++ b/compiler-rt/lib/scudo/standalone/linux.cpp
@@ -35,6 +35,10 @@
 #define ANDROID_PR_SET_VMA_ANON_NAME 0
 #endif
 
+#ifdef ANDROID_EXPERIMENTAL_MTE
+#include <bionic/mte_kernel.h>
+#endif
+
 namespace scudo {
 
 uptr getPageSize() { return static_cast<uptr>(sysconf(_SC_PAGESIZE)); }
@@ -50,6 +54,10 @@
     MmapProt = PROT_NONE;
   } else {
     MmapProt = PROT_READ | PROT_WRITE;
+#if defined(__aarch64__) && defined(ANDROID_EXPERIMENTAL_MTE)
+    if (Flags & MAP_MEMTAG)
+      MmapProt |= PROT_MTE;
+#endif
   }
   if (Addr) {
     // Currently no scenario for a noaccess mapping with a fixed address.
diff --git a/compiler-rt/lib/scudo/standalone/memtag.h b/compiler-rt/lib/scudo/standalone/memtag.h
new file mode 100644
--- /dev/null
+++ b/compiler-rt/lib/scudo/standalone/memtag.h
@@ -0,0 +1,205 @@
+//===-- memtag.h ------------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SCUDO_MEMTAG_H_
+#define SCUDO_MEMTAG_H_
+
+#include "internal_defs.h"
+
+#include <sys/auxv.h>
+#if defined(ANDROID_EXPERIMENTAL_MTE)
+#include <bionic/mte_kernel.h>
+#endif
+
+namespace scudo {
+
+#if defined(__aarch64__)
+
+inline constexpr bool archSupportsMemoryTagging() { return true; }
+inline constexpr size_t archMemoryTagGranuleSize() { return 16; }
+
+inline bool systemSupportsMemoryTagging() {
+#if defined(ANDROID_EXPERIMENTAL_MTE)
+  return getauxval(AT_HWCAP2) & HWCAP2_MTE;
+#else
+  return false;
+#endif
+}
+
+inline void disableMemoryTagChecks() {
+  __asm__ __volatile__(".arch_extension mte; msr tco, #1");
+}
+
+inline void enableMemoryTagChecksTestOnly() {
+  __asm__ __volatile__(".arch_extension mte; msr tco, #0");
+}
+
+inline uptr untagPointer(uptr Ptr) { return Ptr & ((1ULL << 56) - 1); }
+
+inline void setRandomTag(void *Ptr, uptr Size, uptr *TaggedBegin,
+                         uptr *TaggedEnd) {
+  void *End;
+  __asm__ __volatile__(
+      R"(
+    .arch_extension mte
+
+    // Set a random tag for Ptr in TaggedPtr. This needs to happen even if
+    // Size = 0 so that TaggedPtr ends up pointing at a valid address.
+    irg %[TaggedPtr], %[Ptr]
+    mov %[Cur], %[TaggedPtr]
+
+    // Skip the loop if Size = 0. We don't want to do any tagging in this case.
+    cbz %[Size], 2f
+
+    // Set the memory tag of the region
+    // [TaggedPtr, TaggedPtr + roundUpTo(Size, 16))
+    // to the pointer tag stored in TaggedPtr.
+    add %[End], %[TaggedPtr], %[Size]
+
+  1:
+    stzg %[Cur], [%[Cur]], #16
+    cmp %[Cur], %[End]
+    b.lt 1b
+
+  2:
+  )"
+      : [ TaggedPtr ] "=&r"(*TaggedBegin), [ Cur ] "=&r"(*TaggedEnd), [ End ] "=&r"(End)
+      : [ Ptr ] "r"(Ptr), [ Size ] "r"(Size));
+}
+
+inline void *prepareTaggedChunk(void *Ptr, size_t Size, uptr BlockEnd) {
+  // Prepare the granule before the chunk to store the chunk header by setting
+  // its tag to 0. Normally its tag will already be 0, but in the case where a
+  // chunk holding a low alignment allocation is reused for a higher alignment
+  // allocation, the chunk may already have a non-zero tag from the previous
+  // allocation.
+  __asm__ __volatile__(".arch_extension mte; stg %0, [%0, #-16]" : : "r"(Ptr));
+
+  uptr TaggedBegin, TaggedEnd;
+  setRandomTag(Ptr, Size, &TaggedBegin, &TaggedEnd);
+
+  // Finally, set the tag of the granule past the end of the allocation to 0,
+  // to catch linear overflows even if a previous larger allocation used the
+  // same block and tag. Only do this if the granule past the end is in our
+  // block, because this would otherwise lead to a SEGV if the allocation
+  // covers the entire block and our block is at the end of a mapping. The tag
+  // of the next block's header granule will be set to 0, so it will serve the
+  // purpose of catching linear overflows in this case.
+  uptr UntaggedEnd = untagPointer(TaggedEnd);
+  if (UntaggedEnd != BlockEnd)
+    __asm__ __volatile__(".arch_extension mte; stg %0, [%0]"
+                         :
+                         : "r"(UntaggedEnd));
+  return reinterpret_cast<void *>(TaggedBegin);
+}
+
+inline void resizeTaggedChunk(uptr OldPtr, uptr NewPtr, uptr BlockEnd) {
+  uptr RoundOldPtr = roundUpTo(OldPtr, 16);
+  if (RoundOldPtr >= NewPtr) {
+    // If the allocation is shrinking we just need to set the tag past the end
+    // of the allocation to 0. See explanation in prepareTaggedChunk above.
+    uptr RoundNewPtr = untagPointer(roundUpTo(NewPtr, 16));
+    if (RoundNewPtr != BlockEnd)
+      __asm__ __volatile__(".arch_extension mte; stg %0, [%0]"
+                           :
+                           : "r"(RoundNewPtr));
+    return;
+  }
+
+  __asm__ __volatile__(R"(
+    .arch_extension mte
+
+    // Set the memory tag of the region
+    // [roundUpTo(OldPtr, 16), roundUpTo(NewPtr, 16))
+    // to the pointer tag stored in OldPtr.
+  1:
+    stzg %[Cur], [%[Cur]], #16
+    cmp %[Cur], %[End]
+    b.lt 1b
+
+    // Finally, set the tag of the granule past the end of the allocation to 0.
+    and %[Cur], %[Cur], #(1 << 56) - 1
+    cmp %[Cur], %[BlockEnd]
+    b.eq 2f
+    stg %[Cur], [%[Cur]]
+
+  2:
+  )"
+                       : [ Cur ] "+&r"(RoundOldPtr), [ End ] "+&r"(NewPtr)
+                       : [ BlockEnd ] "r"(BlockEnd));
+}
+
+inline uptr tagPointer(uptr UntaggedPtr, uptr Tag) {
+  return UntaggedPtr | (Tag & (0xfUL << 56));
+}
+
+inline uptr loadTag(uptr Ptr) {
+  uptr TaggedPtr = Ptr;
+  __asm__ __volatile__(".arch_extension mte; ldg %0, [%0]"
+                       : "+r"(TaggedPtr));
+  return TaggedPtr;
+}
+
+#else
+
+inline constexpr bool archSupportsMemoryTagging() { return false; }
+
+inline bool systemSupportsMemoryTagging() {
+  UNREACHABLE("memory tagging not supported");
+}
+
+inline size_t archMemoryTagGranuleSize() {
+  UNREACHABLE("memory tagging not supported");
+}
+
+inline void disableMemoryTagChecks() {
+  UNREACHABLE("memory tagging not supported");
+}
+
+inline void enableMemoryTagChecksTestOnly() {
+  UNREACHABLE("memory tagging not supported");
+}
+
+inline uptr untagPointer(uptr Ptr) {
+  (void)Ptr;
+  UNREACHABLE("memory tagging not supported");
+}
+
+inline void setRandomTag(void *Ptr, uptr Size, uptr *TaggedBegin,
+                         uptr *TaggedEnd) {
+  (void)Ptr;
+  (void)Size;
+  (void)TaggedBegin;
+  (void)TaggedEnd;
+  UNREACHABLE("memory tagging not supported");
+}
+
+inline void *prepareTaggedChunk(void *Ptr, size_t Size, uptr BlockEnd) {
+  (void)Ptr;
+  (void)Size;
+  (void)BlockEnd;
+  UNREACHABLE("memory tagging not supported");
+}
+
+inline void resizeTaggedChunk(uptr OldPtr, uptr NewPtr, uptr BlockEnd) {
+  (void)OldPtr;
+  (void)NewPtr;
+  (void)BlockEnd;
+  UNREACHABLE("memory tagging not supported");
+}
+
+inline uptr loadTag(uptr Ptr) {
+  (void)Ptr;
+  UNREACHABLE("memory tagging not supported");
+}
+
+#endif
+
+}
+
+#endif
diff --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h
--- a/compiler-rt/lib/scudo/standalone/primary32.h
+++ b/compiler-rt/lib/scudo/standalone/primary32.h
@@ -46,6 +46,7 @@
   typedef SizeClassAllocator32<SizeClassMapT, RegionSizeLog> ThisT;
   typedef SizeClassAllocatorLocalCache<ThisT> CacheT;
   typedef typename CacheT::TransferBatch TransferBatch;
+  static const bool SupportsMemoryTagging = false;
 
   static uptr getSizeByClassId(uptr ClassId) {
     return (ClassId == SizeClassMap::BatchClassId)
@@ -173,6 +174,9 @@
     return TotalReleasedBytes;
   }
 
+  bool useMemoryTagging() { return false; }
+  void disableMemoryTagging() {}
+
 private:
   static const uptr NumClasses = SizeClassMap::NumClasses;
   static const uptr RegionSize = 1UL << RegionSizeLog;
diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h
--- a/compiler-rt/lib/scudo/standalone/primary64.h
+++ b/compiler-rt/lib/scudo/standalone/primary64.h
@@ -13,6 +13,7 @@
 #include "common.h"
 #include "list.h"
 #include "local_cache.h"
+#include "memtag.h"
 #include "release.h"
 #include "stats.h"
 #include "string_utils.h"
@@ -38,12 +39,18 @@
 // The memory used by this allocator is never unmapped, but can be partially
 // released if the platform allows for it.
 
-template <class SizeClassMapT, uptr RegionSizeLog> class SizeClassAllocator64 {
-public:
+template <class SizeClassMapT, uptr RegionSizeLog,
+          bool MaySupportMemoryTagging = false>
+class SizeClassAllocator64 {
+ public:
   typedef SizeClassMapT SizeClassMap;
-  typedef SizeClassAllocator64<SizeClassMap, RegionSizeLog> ThisT;
+  typedef SizeClassAllocator64<SizeClassMap, RegionSizeLog,
+                               MaySupportMemoryTagging>
+      ThisT;
   typedef SizeClassAllocatorLocalCache<ThisT> CacheT;
   typedef typename CacheT::TransferBatch TransferBatch;
+  static const bool SupportsMemoryTagging =
+      MaySupportMemoryTagging && archSupportsMemoryTagging();
 
   static uptr getSizeByClassId(uptr ClassId) {
     return (ClassId == SizeClassMap::BatchClassId)
@@ -85,6 +92,9 @@
       Region->RandState = getRandomU32(&Seed);
     }
     ReleaseToOsIntervalMs = ReleaseToOsInterval;
+
+    if (SupportsMemoryTagging)
+      UseMemoryTagging = systemSupportsMemoryTagging();
   }
   void init(s32 ReleaseToOsInterval) {
     memset(this, 0, sizeof(*this));
@@ -180,6 +190,11 @@
     return TotalReleasedBytes;
   }
 
+  bool useMemoryTagging() const {
+    return SupportsMemoryTagging && UseMemoryTagging;
+  }
+  void disableMemoryTagging() { UseMemoryTagging = false; }
+
 private:
   static const uptr RegionSize = 1UL << RegionSizeLog;
   static const uptr NumClasses = SizeClassMap::NumClasses;
@@ -221,6 +236,7 @@
   RegionInfo *RegionInfoArray;
   MapPlatformData Data;
   s32 ReleaseToOsIntervalMs;
+  bool UseMemoryTagging;
 
   RegionInfo *getRegionInfo(uptr ClassId) const {
     DCHECK_LT(ClassId, NumClasses);
@@ -285,7 +301,9 @@
         Region->Data = Data;
       if (UNLIKELY(!map(reinterpret_cast<void *>(RegionBeg + MappedUser),
                         UserMapSize, "scudo:primary",
-                        MAP_ALLOWNOMEM | MAP_RESIZABLE, &Region->Data)))
+                        MAP_ALLOWNOMEM | MAP_RESIZABLE |
+                            (useMemoryTagging() ? MAP_MEMTAG : 0),
+                        &Region->Data)))
         return nullptr;
       Region->MappedUser += UserMapSize;
       C->getStats().add(StatMapped, UserMapSize);
diff --git a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
--- a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
@@ -22,6 +22,45 @@
 
 static constexpr scudo::Chunk::Origin Origin = scudo::Chunk::Origin::Malloc;
 
+static void disableDebuggerdMaybe() {
+#if SCUDO_ANDROID
+  // Disable the debuggerd signal handler on Android, without this we can end
+  // up spending a significant amount of time creating tombstones.
+  signal(SIGSEGV, SIG_DFL);
+#endif
+}
+
+template <class AllocatorT>
+bool isTaggedAllocation(AllocatorT *Allocator, scudo::uptr Size,
+                        scudo::uptr Alignment) {
+  if (!Allocator->useMemoryTagging())
+    return false;
+
+  const scudo::uptr MinAlignment = 1UL << SCUDO_MIN_ALIGNMENT_LOG;
+  if (Alignment < MinAlignment) Alignment = MinAlignment;
+  const scudo::uptr NeededSize =
+      scudo::roundUpTo(Size, MinAlignment) +
+      ((Alignment > MinAlignment) ? Alignment : scudo::Chunk::getHeaderSize());
+  return AllocatorT::PrimaryT::canAllocate(NeededSize);
+}
+
+template <class AllocatorT>
+void checkMemoryTaggingMaybe(AllocatorT *Allocator, void *P, scudo::uptr Size,
+                             scudo::uptr Alignment) {
+  if (!isTaggedAllocation(Allocator, Size, Alignment))
+    return;
+
+  Size = scudo::roundUpTo(Size, scudo::archMemoryTagGranuleSize());
+  EXPECT_DEATH({
+    disableDebuggerdMaybe();
+    reinterpret_cast<char *>(P)[-1] = 0xaa;
+  }, "");
+  EXPECT_DEATH({
+    disableDebuggerdMaybe();
+    reinterpret_cast<char *>(P)[Size] = 0xaa;
+  }, "");
+}
+
 template <class Config> static void testAllocator() {
   using AllocatorT = scudo::Allocator<Config>;
   auto Deleter = [](AllocatorT *A) {
@@ -56,6 +95,7 @@
         EXPECT_TRUE(scudo::isAligned(reinterpret_cast<scudo::uptr>(P), Align));
         EXPECT_LE(Size, Allocator->getUsableSize(P));
         memset(P, 0xaa, Size);
+        checkMemoryTaggingMaybe(Allocator.get(), P, Size, Align);
         Allocator->deallocate(P, Origin, Size);
       }
     }
@@ -83,7 +123,8 @@
   bool Found = false;
   for (scudo::uptr I = 0; I < 1024U && !Found; I++) {
     void *P = Allocator->allocate(NeedleSize, Origin);
-    if (P == NeedleP)
+    if (Allocator->untagPointerMaybe(P) ==
+        Allocator->untagPointerMaybe(NeedleP))
       Found = true;
     Allocator->deallocate(P, Origin);
   }
@@ -120,6 +161,7 @@
     EXPECT_EQ(NewP, P);
     for (scudo::uptr I = 0; I < DataSize - 32; I++)
       EXPECT_EQ((reinterpret_cast<char *>(NewP))[I], Marker);
+    checkMemoryTaggingMaybe(Allocator.get(), NewP, NewSize, 0);
   }
   Allocator->deallocate(P, Origin);
 
@@ -148,6 +190,52 @@
 
   Allocator->releaseToOS();
 
+  if (Allocator->useMemoryTagging()) {
+    // Check that use-after-free is detected.
+    for (scudo::uptr SizeLog = 0U; SizeLog <= 20U; SizeLog++) {
+      const scudo::uptr Size = 1U << SizeLog;
+      if (!isTaggedAllocation(Allocator.get(), Size, 1))
+        continue;
+      // UAF detection is probabilistic, so we repeat the test up to 256 times
+      // if necessary. With 15 possible tags this means a 1 in 15^256 chance of
+      // a false positive.
+      EXPECT_DEATH({
+        disableDebuggerdMaybe();
+        for (unsigned I = 0; I != 256; ++I) {
+          void *P = Allocator->allocate(Size, Origin);
+          Allocator->deallocate(P, Origin);
+          reinterpret_cast<char *>(P)[0] = 0xaa;
+        }
+      }, "");
+      EXPECT_DEATH({
+        disableDebuggerdMaybe();
+        for (unsigned I = 0; I != 256; ++I) {
+          void *P = Allocator->allocate(Size, Origin);
+          Allocator->deallocate(P, Origin);
+          reinterpret_cast<char *>(P)[Size - 1] = 0xaa;
+        }
+      }, "");
+    }
+
+    // Check that disabling memory tagging works correctly.
+    void *P = Allocator->allocate(2048, Origin);
+    EXPECT_DEATH(reinterpret_cast<char *>(P)[2048] = 0xaa, "");
+    Allocator->disableMemoryTagging();
+    reinterpret_cast<char *>(P)[2048] = 0xaa;
+    Allocator->deallocate(P, Origin);
+
+    P = Allocator->allocate(2048, Origin);
+    EXPECT_EQ(Allocator->untagPointerMaybe(P), P);
+    reinterpret_cast<char *>(P)[2048] = 0xaa;
+    Allocator->deallocate(P, Origin);
+
+    Allocator->releaseToOS();
+
+    // The allocator may have disabled memory tag checks globally, which may
+    // interfere with subsequent tests. Re-enable them now.
+    scudo::enableMemoryTagChecksTestOnly();
+  }
+
   scudo::uptr BufferSize = 8192;
   std::vector<char> Buffer(BufferSize);
   scudo::uptr ActualSize = Allocator->getStats(Buffer.data(), BufferSize);
diff --git a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp
--- a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp
@@ -58,6 +58,7 @@
   testPrimary<scudo::SizeClassAllocator32<SizeClassMap, 18U>>();
 #endif
   testPrimary<scudo::SizeClassAllocator64<SizeClassMap, 24U>>();
+  testPrimary<scudo::SizeClassAllocator64<SizeClassMap, 24U, true>>();
 }
 
 // The 64-bit SizeClassAllocator can be easily OOM'd with small region sizes.
@@ -143,6 +144,7 @@
   testIteratePrimary<scudo::SizeClassAllocator32<SizeClassMap, 18U>>();
 #endif
   testIteratePrimary<scudo::SizeClassAllocator64<SizeClassMap, 24U>>();
+  testIteratePrimary<scudo::SizeClassAllocator64<SizeClassMap, 24U, true>>();
 }
 
 static std::mutex Mutex;
@@ -202,6 +204,7 @@
   testPrimaryThreaded<scudo::SizeClassAllocator32<SizeClassMap, 18U>>();
 #endif
   testPrimaryThreaded<scudo::SizeClassAllocator64<SizeClassMap, 24U>>();
+  testPrimaryThreaded<scudo::SizeClassAllocator64<SizeClassMap, 24U, true>>();
 }
 
 // Through a simple allocation that spans two pages, verify that releaseToOS
@@ -232,4 +235,5 @@
   testReleaseToOS<scudo::SizeClassAllocator32<SizeClassMap, 18U>>();
 #endif
   testReleaseToOS<scudo::SizeClassAllocator64<SizeClassMap, 24U>>();
+  testReleaseToOS<scudo::SizeClassAllocator64<SizeClassMap, 24U, true>>();
 }
diff --git a/compiler-rt/lib/scudo/standalone/wrappers_c.inc b/compiler-rt/lib/scudo/standalone/wrappers_c.inc
--- a/compiler-rt/lib/scudo/standalone/wrappers_c.inc
+++ b/compiler-rt/lib/scudo/standalone/wrappers_c.inc
@@ -176,3 +176,11 @@
   fputs("</malloc>", stream);
   return 0;
 }
+
+// Disable memory tagging and memory tag checks for the heap. This function may
+// set a thread-wide property (i.e. PSTATE.TCO) in order to disable tag checks,
+// which may have an effect on mappings outside of the heap. The program must be
+// single threaded at the point when the function is called.
+INTERFACE WEAK void SCUDO_PREFIX(malloc_disable_memory_tagging)() {
+  SCUDO_ALLOCATOR.disableMemoryTagging();
+}