diff --git a/llvm/include/llvm/DWARFLinkerParallel/StringPool.h b/llvm/include/llvm/DWARFLinkerParallel/StringPool.h
--- a/llvm/include/llvm/DWARFLinkerParallel/StringPool.h
+++ b/llvm/include/llvm/DWARFLinkerParallel/StringPool.h
@@ -12,6 +12,7 @@
 #include "llvm/ADT/ConcurrentHashtable.h"
 #include "llvm/CodeGen/DwarfStringPoolEntry.h"
 #include "llvm/Support/Allocator.h"
+#include "llvm/Support/PerThreadBumpPtrAllocator.h"
 #include <string>
 #include <string_view>
 
@@ -22,28 +23,6 @@
 /// and a string body which is placed right after StringEntry.
 using StringEntry = StringMapEntry<DwarfStringPoolEntry *>;
 
-class StringAllocator : public AllocatorBase<StringAllocator> {
-public:
-  inline LLVM_ATTRIBUTE_RETURNS_NONNULL void *Allocate(size_t Size,
-                                                       size_t Alignment) {
-#if LLVM_ENABLE_THREADS
-    std::lock_guard<std::mutex> Guard(AllocatorMutex);
-#endif
-
-    return Allocator.Allocate(Size, Align(Alignment));
-  }
-
-  // Pull in base class overloads.
-  using AllocatorBase<StringAllocator>::Allocate;
-
-private:
-#if LLVM_ENABLE_THREADS
-  std::mutex AllocatorMutex;
-#endif
-
-  BumpPtrAllocator Allocator;
-};
-
 class StringPoolEntryInfo {
 public:
   /// \returns Hash value for the specified \p Key.
@@ -62,28 +41,31 @@
   }
 
   /// \returns newly created object of KeyDataTy type.
-  static inline StringEntry *create(const StringRef &Key,
-                                    StringAllocator &Allocator) {
+  static inline StringEntry *
+  create(const StringRef &Key, parallel::PerThreadBumpPtrAllocator &Allocator) {
     return StringEntry::create(Key, Allocator);
   }
 };
 
 class StringPool
-    : public ConcurrentHashTableByPtr<StringRef, StringEntry, StringAllocator,
+    : public ConcurrentHashTableByPtr<StringRef, StringEntry,
+                                      parallel::PerThreadBumpPtrAllocator,
                                       StringPoolEntryInfo> {
 public:
   StringPool()
-      : ConcurrentHashTableByPtr<StringRef, StringEntry, StringAllocator,
+      : ConcurrentHashTableByPtr<StringRef, StringEntry,
+                                 parallel::PerThreadBumpPtrAllocator,
                                  StringPoolEntryInfo>(Allocator) {}
 
   StringPool(size_t InitialSize)
-      : ConcurrentHashTableByPtr<StringRef, StringEntry, StringAllocator,
+      : ConcurrentHashTableByPtr<StringRef, StringEntry,
+                                 parallel::PerThreadBumpPtrAllocator,
                                  StringPoolEntryInfo>(Allocator, InitialSize) {}
 
-  StringAllocator &getAllocatorRef() { return Allocator; }
+  parallel::PerThreadBumpPtrAllocator &getAllocatorRef() { return Allocator; }
 
 private:
-  StringAllocator Allocator;
+  parallel::PerThreadBumpPtrAllocator Allocator;
 };
 
 } // end of namespace dwarflinker_parallel
diff --git a/llvm/include/llvm/Support/Parallel.h b/llvm/include/llvm/Support/Parallel.h
--- a/llvm/include/llvm/Support/Parallel.h
+++ b/llvm/include/llvm/Support/Parallel.h
@@ -40,8 +40,11 @@
 
 inline unsigned getThreadIndex() { return threadIndex; }
 #endif
+
+size_t getNumThreads();
 #else
 inline unsigned getThreadIndex() { return 0; }
+inline size_t getNumThreads() { return 1; }
 #endif
 
 namespace detail {
diff --git a/llvm/include/llvm/Support/PerThreadBumpPtrAllocator.h b/llvm/include/llvm/Support/PerThreadBumpPtrAllocator.h
new file mode 100644
--- /dev/null
+++ b/llvm/include/llvm/Support/PerThreadBumpPtrAllocator.h
@@ -0,0 +1,120 @@
+//===- PerThreadBumpPtrAllocator.h ------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_PERTHREADBUMPPTRALLOCATOR_H
+#define LLVM_SUPPORT_PERTHREADBUMPPTRALLOCATOR_H
+
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Parallel.h"
+
+namespace llvm {
+namespace parallel {
+
+/// PerThreadAllocator is used in conjunction with ThreadPoolExecutor to allow
+/// per-thread allocations. It wraps a possibly thread-unsafe allocator,
+/// e.g. BumpPtrAllocator. PerThreadAllocator must be used with only main thread
+/// or threads created by ThreadPoolExecutor, as it utilizes getThreadIndex,
+/// which is set by ThreadPoolExecutor. To work properly, ThreadPoolExecutor
+/// should be initialized before PerThreadAllocator is created.
+/// TODO: The same approach might be implemented for ThreadPool.
+
+template <typename AllocatorTy>
+class PerThreadAllocator
+    : public AllocatorBase<PerThreadAllocator<AllocatorTy>> {
+public:
+  PerThreadAllocator()
+      : NumOfAllocators(parallel::getNumThreads()),
+        Allocators(std::make_unique<AllocatorTy[]>(NumOfAllocators)) {}
+
+  /// \defgroup Methods which could be called asynchronously:
+  ///
+  /// @{
+
+  using AllocatorBase<PerThreadAllocator<AllocatorTy>>::Allocate;
+
+  using AllocatorBase<PerThreadAllocator<AllocatorTy>>::Deallocate;
+
+  /// Allocate \a Size bytes of \a Alignment aligned memory.
+  void *Allocate(size_t Size, size_t Alignment) {
+    assert(getThreadIndex() < NumOfAllocators);
+    return Allocators[getThreadIndex()].Allocate(Size, Alignment);
+  }
+
+  /// Deallocate \a Ptr to \a Size bytes of memory allocated by this
+  /// allocator.
+  void Deallocate(const void *Ptr, size_t Size, size_t Alignment) {
+    assert(getThreadIndex() < NumOfAllocators);
+    return Allocators[getThreadIndex()].Deallocate(Ptr, Size, Alignment);
+  }
+
+  /// Return allocator corresponding to the current thread.
+  AllocatorTy &getThreadLocalAllocator() {
+    assert(getThreadIndex() < NumOfAllocators);
+    return Allocators[getThreadIndex()];
+  }
+
+  // Return number of used allocators.
+  size_t getNumberOfAllocators() const { return NumOfAllocators; }
+  /// @}
+
+  /// \defgroup Methods which could not be called asynchronously:
+  ///
+  /// @{
+
+  /// Reset state of allocators.
+  void Reset() {
+    for (size_t Idx = 0; Idx < getNumberOfAllocators(); Idx++)
+      Allocators[Idx].Reset();
+  }
+
+  /// Return total memory size used by all allocators.
+  size_t getTotalMemory() const {
+    size_t TotalMemory = 0;
+
+    for (size_t Idx = 0; Idx < getNumberOfAllocators(); Idx++)
+      TotalMemory += Allocators[Idx].getTotalMemory();
+
+    return TotalMemory;
+  }
+
+  /// Return allocated size by all allocators.
+  size_t getBytesAllocated() const {
+    size_t BytesAllocated = 0;
+
+    for (size_t Idx = 0; Idx < getNumberOfAllocators(); Idx++)
+      BytesAllocated += Allocators[Idx].getBytesAllocated();
+
+    return BytesAllocated;
+  }
+
+  /// Set red zone for all allocators.
+  void setRedZoneSize(size_t NewSize) {
+    for (size_t Idx = 0; Idx < getNumberOfAllocators(); Idx++)
+      Allocators[Idx].setRedZoneSize(NewSize);
+  }
+
+  /// Print statistic for each allocator.
+  void PrintStats() const {
+    for (size_t Idx = 0; Idx < getNumberOfAllocators(); Idx++) {
+      errs() << "\n Allocator " << Idx << "\n";
+      Allocators[Idx].PrintStats();
+    }
+  }
+  /// @}
+
+protected:
+  size_t NumOfAllocators;
+  std::unique_ptr<AllocatorTy[]> Allocators;
+};
+
+using PerThreadBumpPtrAllocator = PerThreadAllocator<BumpPtrAllocator>;
+
+} // end namespace parallel
+} // end namespace llvm
+
+#endif // LLVM_SUPPORT_PERTHREADBUMPPTRALLOCATOR_H
diff --git a/llvm/lib/Support/Parallel.cpp b/llvm/lib/Support/Parallel.cpp
--- a/llvm/lib/Support/Parallel.cpp
+++ b/llvm/lib/Support/Parallel.cpp
@@ -40,6 +40,7 @@
 public:
   virtual ~Executor() = default;
   virtual void add(std::function<void()> func) = 0;
+  virtual size_t getThreadsNum() const = 0;
 
   static Executor *getDefaultExecutor();
 };
@@ -49,7 +50,7 @@
 class ThreadPoolExecutor : public Executor {
 public:
   explicit ThreadPoolExecutor(ThreadPoolStrategy S = hardware_concurrency()) {
-    unsigned ThreadCount = S.compute_thread_count();
+    ThreadCount = S.compute_thread_count();
     // Spawn all but one of the threads in another thread as spawning threads
     // can take a while.
     Threads.reserve(ThreadCount);
@@ -58,7 +59,7 @@
     // Use operator[] before creating the thread to avoid data race in .size()
     // in “safe libc++” mode.
     auto &Thread0 = Threads[0];
-    Thread0 = std::thread([this, ThreadCount, S] {
+    Thread0 = std::thread([this, S] {
       for (unsigned I = 1; I < ThreadCount; ++I) {
         Threads.emplace_back([=] { work(S, I); });
         if (Stop)
@@ -105,6 +106,8 @@
     Cond.notify_one();
   }
 
+  size_t getThreadsNum() const override { return ThreadCount; }
+
 private:
   void work(ThreadPoolStrategy S, unsigned ThreadID) {
     threadIndex = ThreadID;
@@ -127,6 +130,7 @@
   std::condition_variable Cond;
   std::promise<void> ThreadsCreated;
   std::vector<std::thread> Threads;
+  unsigned ThreadCount;
 };
 
 Executor *Executor::getDefaultExecutor() {
@@ -156,6 +160,10 @@
 }
 } // namespace
 } // namespace detail
+
+size_t getNumThreads() {
+  return detail::Executor::getDefaultExecutor()->getThreadsNum();
+}
 #endif
 
 static std::atomic<int> TaskGroupInstances;
diff --git a/llvm/unittests/ADT/ConcurrentHashtableTest.cpp b/llvm/unittests/ADT/ConcurrentHashtableTest.cpp
--- a/llvm/unittests/ADT/ConcurrentHashtableTest.cpp
+++ b/llvm/unittests/ADT/ConcurrentHashtableTest.cpp
@@ -10,11 +10,13 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/Parallel.h"
+#include "llvm/Support/PerThreadBumpPtrAllocator.h"
 #include "gtest/gtest.h"
 #include <limits>
 #include <random>
 #include <vector>
 using namespace llvm;
+using namespace parallel;
 
 namespace {
 class String {
@@ -36,38 +38,11 @@
   std::array<char, 0x20> ExtraData;
 };
 
-class SimpleAllocator : public AllocatorBase<SimpleAllocator> {
-public:
-  inline LLVM_ATTRIBUTE_RETURNS_NONNULL void *Allocate(size_t Size,
-                                                       size_t Alignment) {
-#if LLVM_ENABLE_THREADS
-    std::lock_guard<std::mutex> Guard(AllocatorMutex);
-#endif
-
-    return Allocator.Allocate(Size, Align(Alignment));
-  }
-  inline size_t getBytesAllocated() {
-#if LLVM_ENABLE_THREADS
-    std::lock_guard<std::mutex> Guard(AllocatorMutex);
-#endif
-
-    return Allocator.getBytesAllocated();
-  }
-
-  // Pull in base class overloads.
-  using AllocatorBase<SimpleAllocator>::Allocate;
-
-protected:
-#if LLVM_ENABLE_THREADS
-  std::mutex AllocatorMutex;
-#endif
-  BumpPtrAllocator Allocator;
-} Allocator;
-
 TEST(ConcurrentHashTableTest, AddStringEntries) {
-  ConcurrentHashTableByPtr<
-      std::string, String, SimpleAllocator,
-      ConcurrentHashTableInfoByPtr<std::string, String, SimpleAllocator>>
+  PerThreadBumpPtrAllocator Allocator;
+  ConcurrentHashTableByPtr<std::string, String, PerThreadBumpPtrAllocator,
+                           ConcurrentHashTableInfoByPtr<
+                               std::string, String, PerThreadBumpPtrAllocator>>
       HashTable(Allocator, 10);
 
   size_t AllocatedBytesAtStart = Allocator.getBytesAllocated();
@@ -113,20 +88,24 @@
 }
 
 TEST(ConcurrentHashTableTest, AddStringMultiplueEntries) {
+  PerThreadBumpPtrAllocator Allocator;
   const size_t NumElements = 10000;
-  ConcurrentHashTableByPtr<
-      std::string, String, SimpleAllocator,
-      ConcurrentHashTableInfoByPtr<std::string, String, SimpleAllocator>>
+  ConcurrentHashTableByPtr<std::string, String, PerThreadBumpPtrAllocator,
+                           ConcurrentHashTableInfoByPtr<
+                               std::string, String, PerThreadBumpPtrAllocator>>
       HashTable(Allocator);
 
   // Check insertion.
   for (size_t I = 0; I < NumElements; I++) {
-    size_t AllocatedBytesAtStart = Allocator.getBytesAllocated();
+    BumpPtrAllocator &ThreadLocalAllocator =
+        Allocator.getThreadLocalAllocator();
+    size_t AllocatedBytesAtStart = ThreadLocalAllocator.getBytesAllocated();
     std::string StringForElement = formatv("{0}", I);
     std::pair<String *, bool> Entry = HashTable.insert(StringForElement);
     EXPECT_TRUE(Entry.second);
     EXPECT_TRUE(Entry.first->getKey() == StringForElement);
-    EXPECT_TRUE(Allocator.getBytesAllocated() > AllocatedBytesAtStart);
+    EXPECT_TRUE(ThreadLocalAllocator.getBytesAllocated() >
+                AllocatedBytesAtStart);
   }
 
   std::string StatisticString;
@@ -140,13 +119,16 @@
 
   // Check insertion of duplicates.
   for (size_t I = 0; I < NumElements; I++) {
-    size_t AllocatedBytesAtStart = Allocator.getBytesAllocated();
+    BumpPtrAllocator &ThreadLocalAllocator =
+        Allocator.getThreadLocalAllocator();
+    size_t AllocatedBytesAtStart = ThreadLocalAllocator.getBytesAllocated();
     std::string StringForElement = formatv("{0}", I);
     std::pair<String *, bool> Entry = HashTable.insert(StringForElement);
     EXPECT_FALSE(Entry.second);
     EXPECT_TRUE(Entry.first->getKey() == StringForElement);
     // Check no additional bytes were allocated for duplicate.
-    EXPECT_TRUE(Allocator.getBytesAllocated() == AllocatedBytesAtStart);
+    EXPECT_TRUE(ThreadLocalAllocator.getBytesAllocated() ==
+                AllocatedBytesAtStart);
   }
 
   // Check statistic.
@@ -157,21 +139,25 @@
 }
 
 TEST(ConcurrentHashTableTest, AddStringMultiplueEntriesWithResize) {
+  PerThreadBumpPtrAllocator Allocator;
   // Number of elements exceeds original size, thus hashtable should be resized.
   const size_t NumElements = 20000;
-  ConcurrentHashTableByPtr<
-      std::string, String, SimpleAllocator,
-      ConcurrentHashTableInfoByPtr<std::string, String, SimpleAllocator>>
+  ConcurrentHashTableByPtr<std::string, String, PerThreadBumpPtrAllocator,
+                           ConcurrentHashTableInfoByPtr<
+                               std::string, String, PerThreadBumpPtrAllocator>>
       HashTable(Allocator, 100);
 
   // Check insertion.
   for (size_t I = 0; I < NumElements; I++) {
-    size_t AllocatedBytesAtStart = Allocator.getBytesAllocated();
+    BumpPtrAllocator &ThreadLocalAllocator =
+        Allocator.getThreadLocalAllocator();
+    size_t AllocatedBytesAtStart = ThreadLocalAllocator.getBytesAllocated();
     std::string StringForElement = formatv("{0} {1}", I, I + 100);
     std::pair<String *, bool> Entry = HashTable.insert(StringForElement);
     EXPECT_TRUE(Entry.second);
     EXPECT_TRUE(Entry.first->getKey() == StringForElement);
-    EXPECT_TRUE(Allocator.getBytesAllocated() > AllocatedBytesAtStart);
+    EXPECT_TRUE(ThreadLocalAllocator.getBytesAllocated() >
+                AllocatedBytesAtStart);
   }
 
   std::string StatisticString;
@@ -185,13 +171,16 @@
 
   // Check insertion of duplicates.
   for (size_t I = 0; I < NumElements; I++) {
-    size_t AllocatedBytesAtStart = Allocator.getBytesAllocated();
+    BumpPtrAllocator &ThreadLocalAllocator =
+        Allocator.getThreadLocalAllocator();
+    size_t AllocatedBytesAtStart = ThreadLocalAllocator.getBytesAllocated();
     std::string StringForElement = formatv("{0} {1}", I, I + 100);
     std::pair<String *, bool> Entry = HashTable.insert(StringForElement);
     EXPECT_FALSE(Entry.second);
     EXPECT_TRUE(Entry.first->getKey() == StringForElement);
     // Check no additional bytes were allocated for duplicate.
-    EXPECT_TRUE(Allocator.getBytesAllocated() == AllocatedBytesAtStart);
+    EXPECT_TRUE(ThreadLocalAllocator.getBytesAllocated() ==
+                AllocatedBytesAtStart);
   }
 
   // Check statistic.
@@ -202,20 +191,24 @@
 }
 
 TEST(ConcurrentHashTableTest, AddStringEntriesParallel) {
+  PerThreadBumpPtrAllocator Allocator;
   const size_t NumElements = 10000;
-  ConcurrentHashTableByPtr<
-      std::string, String, SimpleAllocator,
-      ConcurrentHashTableInfoByPtr<std::string, String, SimpleAllocator>>
+  ConcurrentHashTableByPtr<std::string, String, PerThreadBumpPtrAllocator,
+                           ConcurrentHashTableInfoByPtr<
+                               std::string, String, PerThreadBumpPtrAllocator>>
       HashTable(Allocator);
 
   // Check parallel insertion.
   parallelFor(0, NumElements, [&](size_t I) {
-    size_t AllocatedBytesAtStart = Allocator.getBytesAllocated();
+    BumpPtrAllocator &ThreadLocalAllocator =
+        Allocator.getThreadLocalAllocator();
+    size_t AllocatedBytesAtStart = ThreadLocalAllocator.getBytesAllocated();
     std::string StringForElement = formatv("{0}", I);
     std::pair<String *, bool> Entry = HashTable.insert(StringForElement);
     EXPECT_TRUE(Entry.second);
     EXPECT_TRUE(Entry.first->getKey() == StringForElement);
-    EXPECT_TRUE(Allocator.getBytesAllocated() > AllocatedBytesAtStart);
+    EXPECT_TRUE(ThreadLocalAllocator.getBytesAllocated() >
+                AllocatedBytesAtStart);
   });
 
   std::string StatisticString;
@@ -229,13 +222,16 @@
 
   // Check parallel insertion of duplicates.
   parallelFor(0, NumElements, [&](size_t I) {
-    size_t AllocatedBytesAtStart = Allocator.getBytesAllocated();
+    BumpPtrAllocator &ThreadLocalAllocator =
+        Allocator.getThreadLocalAllocator();
+    size_t AllocatedBytesAtStart = ThreadLocalAllocator.getBytesAllocated();
     std::string StringForElement = formatv("{0}", I);
     std::pair<String *, bool> Entry = HashTable.insert(StringForElement);
     EXPECT_FALSE(Entry.second);
     EXPECT_TRUE(Entry.first->getKey() == StringForElement);
     // Check no additional bytes were allocated for duplicate.
-    EXPECT_TRUE(Allocator.getBytesAllocated() == AllocatedBytesAtStart);
+    EXPECT_TRUE(ThreadLocalAllocator.getBytesAllocated() ==
+                AllocatedBytesAtStart);
   });
 
   // Check statistic.
@@ -246,20 +242,24 @@
 }
 
 TEST(ConcurrentHashTableTest, AddStringEntriesParallelWithResize) {
+  PerThreadBumpPtrAllocator Allocator;
   const size_t NumElements = 20000;
-  ConcurrentHashTableByPtr<
-      std::string, String, SimpleAllocator,
-      ConcurrentHashTableInfoByPtr<std::string, String, SimpleAllocator>>
+  ConcurrentHashTableByPtr<std::string, String, PerThreadBumpPtrAllocator,
+                           ConcurrentHashTableInfoByPtr<
+                               std::string, String, PerThreadBumpPtrAllocator>>
       HashTable(Allocator, 100);
 
   // Check parallel insertion.
   parallelFor(0, NumElements, [&](size_t I) {
-    size_t AllocatedBytesAtStart = Allocator.getBytesAllocated();
+    BumpPtrAllocator &ThreadLocalAllocator =
+        Allocator.getThreadLocalAllocator();
+    size_t AllocatedBytesAtStart = ThreadLocalAllocator.getBytesAllocated();
     std::string StringForElement = formatv("{0}", I);
     std::pair<String *, bool> Entry = HashTable.insert(StringForElement);
     EXPECT_TRUE(Entry.second);
     EXPECT_TRUE(Entry.first->getKey() == StringForElement);
-    EXPECT_TRUE(Allocator.getBytesAllocated() > AllocatedBytesAtStart);
+    EXPECT_TRUE(ThreadLocalAllocator.getBytesAllocated() >
+                AllocatedBytesAtStart);
   });
 
   std::string StatisticString;
@@ -273,13 +273,16 @@
 
   // Check parallel insertion of duplicates.
   parallelFor(0, NumElements, [&](size_t I) {
-    size_t AllocatedBytesAtStart = Allocator.getBytesAllocated();
+    BumpPtrAllocator &ThreadLocalAllocator =
+        Allocator.getThreadLocalAllocator();
+    size_t AllocatedBytesAtStart = ThreadLocalAllocator.getBytesAllocated();
     std::string StringForElement = formatv("{0}", I);
     std::pair<String *, bool> Entry = HashTable.insert(StringForElement);
     EXPECT_FALSE(Entry.second);
     EXPECT_TRUE(Entry.first->getKey() == StringForElement);
     // Check no additional bytes were allocated for duplicate.
-    EXPECT_TRUE(Allocator.getBytesAllocated() == AllocatedBytesAtStart);
+    EXPECT_TRUE(ThreadLocalAllocator.getBytesAllocated() ==
+                AllocatedBytesAtStart);
   });
 
   // Check statistic.
diff --git a/llvm/unittests/Support/CMakeLists.txt b/llvm/unittests/Support/CMakeLists.txt
--- a/llvm/unittests/Support/CMakeLists.txt
+++ b/llvm/unittests/Support/CMakeLists.txt
@@ -66,6 +66,7 @@
   OptimizedStructLayoutTest.cpp
   ParallelTest.cpp
   Path.cpp
+  PerThreadBumpPtrAllocatorTest.cpp
   ProcessTest.cpp
   ProgramTest.cpp
   RegexTest.cpp
diff --git a/llvm/unittests/Support/PerThreadBumpPtrAllocatorTest.cpp b/llvm/unittests/Support/PerThreadBumpPtrAllocatorTest.cpp
new file mode 100644
--- /dev/null
+++ b/llvm/unittests/Support/PerThreadBumpPtrAllocatorTest.cpp
@@ -0,0 +1,52 @@
+//===- PerThreadBumpPtrAllocatorTest.cpp ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/PerThreadBumpPtrAllocator.h"
+#include "llvm/Support/Parallel.h"
+#include "gtest/gtest.h"
+#include <cstdlib>
+
+using namespace llvm;
+using namespace parallel;
+
+namespace {
+
+TEST(PerThreadBumpPtrAllocatorTest, Simple) {
+  PerThreadBumpPtrAllocator Allocator;
+
+  uint64_t *Var =
+      (uint64_t *)Allocator.Allocate(sizeof(uint64_t), alignof(uint64_t));
+  *Var = 0xFE;
+  EXPECT_EQ(0xFEul, *Var);
+  EXPECT_EQ(sizeof(uint64_t), Allocator.getBytesAllocated());
+  EXPECT_TRUE(Allocator.getBytesAllocated() <= Allocator.getTotalMemory());
+
+  PerThreadBumpPtrAllocator Allocator2(std::move(Allocator));
+
+  EXPECT_EQ(sizeof(uint64_t), Allocator2.getBytesAllocated());
+  EXPECT_TRUE(Allocator2.getBytesAllocated() <= Allocator2.getTotalMemory());
+
+  EXPECT_EQ(0xFEul, *Var);
+}
+
+TEST(PerThreadBumpPtrAllocatorTest, ParallelAllocation) {
+  PerThreadBumpPtrAllocator Allocator;
+
+  static size_t constexpr NumAllocations = 1000;
+
+  parallelFor(0, NumAllocations, [&](size_t Idx) {
+    uint64_t *ptr =
+        (uint64_t *)Allocator.Allocate(sizeof(uint64_t), alignof(uint64_t));
+    *ptr = Idx;
+  });
+
+  EXPECT_EQ(sizeof(uint64_t) * NumAllocations, Allocator.getBytesAllocated());
+  EXPECT_EQ(Allocator.getNumberOfAllocators(), parallel::getNumThreads());
+}
+
+} // anonymous namespace