diff --git a/llvm/include/llvm/Support/Parallel.h b/llvm/include/llvm/Support/Parallel.h
--- a/llvm/include/llvm/Support/Parallel.h
+++ b/llvm/include/llvm/Support/Parallel.h
@@ -40,8 +40,11 @@
 
 inline unsigned getThreadIndex() { return threadIndex; }
 #endif
+
+size_t getMaxThreadsNum();
 #else
 inline unsigned getThreadIndex() { return 0; }
+inline size_t getMaxThreadsNum() { return 1; }
 #endif
 
 namespace detail {
diff --git a/llvm/include/llvm/Support/PerThreadBumpPtrAllocator.h b/llvm/include/llvm/Support/PerThreadBumpPtrAllocator.h
new file mode 100644
--- /dev/null
+++ b/llvm/include/llvm/Support/PerThreadBumpPtrAllocator.h
@@ -0,0 +1,102 @@
+//===- PerThreadBumpPtrAllocator.h ------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_PERTHREADBUMPPTRALLOCATOR_H
+#define LLVM_SUPPORT_PERTHREADBUMPPTRALLOCATOR_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Parallel.h"
+
+namespace llvm {
+namespace parallel {
+
+/// PerThreadAllocator allows separating allocations by thread id.
+/// It is possible because ThreadPoolExecutor creates threads, keeps them until
+/// the destructor of ThreadPoolExecutor is called, and assigns ids to the
+/// threads. Thus PerThreadAllocator should be used with only threads
+/// created by ThreadPoolExecutor. To work properly, ThreadPoolExecutor should
+/// be initialized before PerThreadAllocator is created.
+/// TODO: The same approach might be implemented for ThreadPool.
+
+template <typename AllocatorTy>
+class PerThreadAllocator
+    : public AllocatorBase<PerThreadAllocator<AllocatorTy>> {
+public:
+  PerThreadAllocator() { Allocators.resize(parallel::getMaxThreadsNum()); }
+
+  // Pull in base class overloads.
+  using AllocatorBase<PerThreadAllocator<AllocatorTy>>::Allocate;
+
+  // Pull in base class overloads.
+  using AllocatorBase<PerThreadAllocator<AllocatorTy>>::Deallocate;
+
+  /// Allocate \a Size bytes of \a Alignment aligned memory.
+  void *Allocate(size_t Size, size_t Alignment) {
+    return Allocators[getThreadIndex()].Allocate(Size, Alignment);
+  }
+
+  /// Deallocate \a Ptr to \a Size bytes of memory allocated by this
+  /// allocator.
+  void Deallocate(const void *Ptr, size_t Size, size_t Alignment) {
+    return Allocators[getThreadIndex()].Deallocate(Ptr, Size, Alignment);
+  }
+
+  // Reset state of allocators.
+  void Reset() {
+    for (AllocatorTy &Allocator : Allocators)
+      Allocator.Reset();
+  }
+
+  // Return total memory size used by all allocators.
+  size_t getTotalMemory() const {
+    size_t TotalMemory = 0;
+
+    for (const AllocatorTy &Allocator : Allocators)
+      TotalMemory += Allocator.getTotalMemory();
+
+    return TotalMemory;
+  }
+
+  // Return allocated size by all allocators.
+  size_t getBytesAllocated() const {
+    size_t BytesAllocated = 0;
+
+    for (const AllocatorTy &Allocator : Allocators)
+      BytesAllocated += Allocator.getBytesAllocated();
+
+    return BytesAllocated;
+  }
+
+  // Set red zone for all allocators.
+  void setRedZoneSize(size_t NewSize) {
+    for (AllocatorTy &Allocator : Allocators)
+      Allocator.setRedZoneSize(NewSize);
+  }
+
+  // Print statistic for each allocator.
+  void PrintStats() const {
+    for (size_t Idx = 0; Idx < Allocators.size(); Idx++) {
+      errs() << "\n Allocator " << Idx << "\n";
+      Allocators[Idx].PrintStats();
+    }
+  }
+
+  // Return number of used allocators.
+  size_t getNumberOfAllocators() const { return Allocators.size(); }
+
+protected:
+  SmallVector<AllocatorTy> Allocators;
+};
+
+using PerThreadBumpPtrAllocator = class PerThreadAllocator<BumpPtrAllocator>;
+
+} // end namespace parallel
+} // end namespace llvm
+
+#endif // LLVM_SUPPORT_PERTHREADBUMPPTRALLOCATOR_H
diff --git a/llvm/lib/Support/Parallel.cpp b/llvm/lib/Support/Parallel.cpp
--- a/llvm/lib/Support/Parallel.cpp
+++ b/llvm/lib/Support/Parallel.cpp
@@ -40,6 +40,7 @@
 public:
   virtual ~Executor() = default;
   virtual void add(std::function<void()> func) = 0;
+  virtual size_t getThreadsNum() const = 0;
 
   static Executor *getDefaultExecutor();
 };
@@ -49,7 +50,7 @@
 class ThreadPoolExecutor : public Executor {
 public:
   explicit ThreadPoolExecutor(ThreadPoolStrategy S = hardware_concurrency()) {
-    unsigned ThreadCount = S.compute_thread_count();
+    ThreadCount = S.compute_thread_count();
     // Spawn all but one of the threads in another thread as spawning threads
     // can take a while.
     Threads.reserve(ThreadCount);
@@ -58,7 +59,7 @@
     // Use operator[] before creating the thread to avoid data race in .size()
     // in “safe libc++” mode.
     auto &Thread0 = Threads[0];
-    Thread0 = std::thread([this, ThreadCount, S] {
+    Thread0 = std::thread([this, S] {
       for (unsigned I = 1; I < ThreadCount; ++I) {
         Threads.emplace_back([=] { work(S, I); });
         if (Stop)
@@ -105,6 +106,8 @@
     Cond.notify_one();
   }
 
+  size_t getThreadsNum() const override { return ThreadCount; }
+
 private:
   void work(ThreadPoolStrategy S, unsigned ThreadID) {
     threadIndex = ThreadID;
@@ -127,6 +130,7 @@
   std::condition_variable Cond;
   std::promise<void> ThreadsCreated;
   std::vector<std::thread> Threads;
+  unsigned ThreadCount;
 };
 
 Executor *Executor::getDefaultExecutor() {
@@ -156,6 +160,10 @@
 }
 } // namespace
 } // namespace detail
+
+size_t getMaxThreadsNum() {
+  return detail::Executor::getDefaultExecutor()->getThreadsNum();
+}
 #endif
 
 static std::atomic<int> TaskGroupInstances;
diff --git a/llvm/unittests/Support/CMakeLists.txt b/llvm/unittests/Support/CMakeLists.txt
--- a/llvm/unittests/Support/CMakeLists.txt
+++ b/llvm/unittests/Support/CMakeLists.txt
@@ -62,6 +62,7 @@
   OptimizedStructLayoutTest.cpp
   ParallelTest.cpp
   Path.cpp
+  PerThreadBumpPtrAllocatorTest.cpp
   ProcessTest.cpp
   ProgramTest.cpp
   RegexTest.cpp
diff --git a/llvm/unittests/Support/PerThreadBumpPtrAllocatorTest.cpp b/llvm/unittests/Support/PerThreadBumpPtrAllocatorTest.cpp
new file mode 100644
--- /dev/null
+++ b/llvm/unittests/Support/PerThreadBumpPtrAllocatorTest.cpp
@@ -0,0 +1,53 @@
+//===- PerThreadBumpPtrAllocatorTest.cpp ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/PerThreadBumpPtrAllocator.h"
+#include "llvm/Support/Parallel.h"
+#include "gtest/gtest.h"
+#include <cstdlib>
+
+using namespace llvm;
+using namespace parallel;
+
+namespace {
+
+TEST(PerThreadBumpPtrAllocatorTest, Simple) {
+  PerThreadBumpPtrAllocator Allocator;
+
+  uint64_t *Var =
+      (uint64_t *)Allocator.Allocate(sizeof(uint64_t), alignof(uint64_t));
+
+  *Var = 0xFE;
+  EXPECT_EQ(0xFEul, *Var);
+  EXPECT_EQ(sizeof(uint64_t), Allocator.getBytesAllocated());
+  EXPECT_TRUE(Allocator.getBytesAllocated() <= Allocator.getTotalMemory());
+
+  PerThreadBumpPtrAllocator Allocator2(std::move(Allocator));
+
+  EXPECT_EQ(sizeof(uint64_t), Allocator2.getBytesAllocated());
+  EXPECT_TRUE(Allocator2.getBytesAllocated() <= Allocator2.getTotalMemory());
+
+  EXPECT_EQ(0xFEul, *Var);
+}
+
+TEST(PerThreadBumpPtrAllocatorTest, ParallelAllocation) {
+  PerThreadBumpPtrAllocator Allocator;
+
+  static size_t constexpr NumAllocations = 5000;
+
+  parallelFor(0, NumAllocations, [&](size_t Idx) {
+    uint64_t *ptr =
+        (uint64_t *)Allocator.Allocate(sizeof(uint64_t), alignof(uint64_t));
+    *ptr = Idx;
+  });
+
+  EXPECT_EQ(sizeof(uint64_t) * NumAllocations, Allocator.getBytesAllocated());
+  EXPECT_EQ(Allocator.getNumberOfAllocators(), parallel::getMaxThreadsNum());
+}
+
+} // anonymous namespace