diff --git a/llvm/include/llvm/Support/Parallel.h b/llvm/include/llvm/Support/Parallel.h --- a/llvm/include/llvm/Support/Parallel.h +++ b/llvm/include/llvm/Support/Parallel.h @@ -40,8 +40,11 @@ inline unsigned getThreadIndex() { return threadIndex; } #endif + +size_t getMaxThreadsNum(); #else inline unsigned getThreadIndex() { return 0; } +inline size_t getMaxThreadsNum() { return 1; } #endif namespace detail { diff --git a/llvm/include/llvm/Support/PerThreadBumpPtrAllocator.h b/llvm/include/llvm/Support/PerThreadBumpPtrAllocator.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Support/PerThreadBumpPtrAllocator.h @@ -0,0 +1,102 @@ +//===- PerThreadBumpPtrAllocator.h ------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_PERTHREADBUMPPTRALLOCATOR_H +#define LLVM_SUPPORT_PERTHREADBUMPPTRALLOCATOR_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Parallel.h" + +namespace llvm { +namespace parallel { + +/// PerThreadAllocator allows separating allocations by thread id. +/// It is possible because ThreadPoolExecutor creates threads, keeps them until +/// the destructor of ThreadPoolExecutor is called, and assigns ids to the +/// threads. Thus PerThreadAllocator should be used with only threads +/// created by ThreadPoolExecutor. To work properly, ThreadPoolExecutor should +/// be initialized before PerThreadAllocator is created. +/// TODO: The same approach might be implemented for ThreadPool. + +template +class PerThreadAllocator + : public AllocatorBase> { +public: + PerThreadAllocator() { Allocators.resize(parallel::getMaxThreadsNum()); } + + // Pull in base class overloads. + using AllocatorBase>::Allocate; + + // Pull in base class overloads. + using AllocatorBase>::Deallocate; + + /// Allocate \a Size bytes of \a Alignment aligned memory. + void *Allocate(size_t Size, size_t Alignment) { + return Allocators[getThreadIndex()].Allocate(Size, Alignment); + } + + /// Deallocate \a Ptr to \a Size bytes of memory allocated by this + /// allocator. + void Deallocate(const void *Ptr, size_t Size, size_t Alignment) { + return Allocators[getThreadIndex()].Deallocate(Ptr, Size, Alignment); + } + + // Reset state of allocators. + void Reset() { + for (AllocatorTy &Allocator : Allocators) + Allocator.Reset(); + } + + // Return total memory size used by all allocators. + size_t getTotalMemory() const { + size_t TotalMemory = 0; + + for (const AllocatorTy &Allocator : Allocators) + TotalMemory += Allocator.getTotalMemory(); + + return TotalMemory; + } + + // Return allocated size by all allocators. + size_t getBytesAllocated() const { + size_t BytesAllocated = 0; + + for (const AllocatorTy &Allocator : Allocators) + BytesAllocated += Allocator.getBytesAllocated(); + + return BytesAllocated; + } + + // Set red zone for all allocators. + void setRedZoneSize(size_t NewSize) { + for (AllocatorTy &Allocator : Allocators) + Allocator.setRedZoneSize(NewSize); + } + + // Print statistic for each allocator. + void PrintStats() const { + for (size_t Idx = 0; Idx < Allocators.size(); Idx++) { + errs() << "\n Allocator " << Idx << "\n"; + Allocators[Idx].PrintStats(); + } + } + + // Return number of used allocators. + size_t getNumberOfAllocators() const { return Allocators.size(); } + +protected: + SmallVector Allocators; +}; + +using PerThreadBumpPtrAllocator = class PerThreadAllocator; + +} // end namespace parallel +} // end namespace llvm + +#endif // LLVM_SUPPORT_PERTHREADBUMPPTRALLOCATOR_H diff --git a/llvm/lib/Support/Parallel.cpp b/llvm/lib/Support/Parallel.cpp --- a/llvm/lib/Support/Parallel.cpp +++ b/llvm/lib/Support/Parallel.cpp @@ -40,6 +40,7 @@ public: virtual ~Executor() = default; virtual void add(std::function func) = 0; + virtual size_t getThreadsNum() const = 0; static Executor *getDefaultExecutor(); }; @@ -49,7 +50,7 @@ class ThreadPoolExecutor : public Executor { public: explicit ThreadPoolExecutor(ThreadPoolStrategy S = hardware_concurrency()) { - unsigned ThreadCount = S.compute_thread_count(); + ThreadCount = S.compute_thread_count(); // Spawn all but one of the threads in another thread as spawning threads // can take a while. Threads.reserve(ThreadCount); @@ -58,7 +59,7 @@ // Use operator[] before creating the thread to avoid data race in .size() // in “safe libc++” mode. auto &Thread0 = Threads[0]; - Thread0 = std::thread([this, ThreadCount, S] { + Thread0 = std::thread([this, S] { for (unsigned I = 1; I < ThreadCount; ++I) { Threads.emplace_back([=] { work(S, I); }); if (Stop) @@ -105,6 +106,8 @@ Cond.notify_one(); } + size_t getThreadsNum() const override { return ThreadCount; } + private: void work(ThreadPoolStrategy S, unsigned ThreadID) { threadIndex = ThreadID; @@ -127,6 +130,7 @@ std::condition_variable Cond; std::promise ThreadsCreated; std::vector Threads; + unsigned ThreadCount; }; Executor *Executor::getDefaultExecutor() { @@ -156,6 +160,10 @@ } } // namespace } // namespace detail + +size_t getMaxThreadsNum() { + return detail::Executor::getDefaultExecutor()->getThreadsNum(); +} #endif static std::atomic TaskGroupInstances; diff --git a/llvm/unittests/Support/CMakeLists.txt b/llvm/unittests/Support/CMakeLists.txt --- a/llvm/unittests/Support/CMakeLists.txt +++ b/llvm/unittests/Support/CMakeLists.txt @@ -62,6 +62,7 @@ OptimizedStructLayoutTest.cpp ParallelTest.cpp Path.cpp + PerThreadBumpPtrAllocatorTest.cpp ProcessTest.cpp ProgramTest.cpp RegexTest.cpp diff --git a/llvm/unittests/Support/PerThreadBumpPtrAllocatorTest.cpp b/llvm/unittests/Support/PerThreadBumpPtrAllocatorTest.cpp new file mode 100644 --- /dev/null +++ b/llvm/unittests/Support/PerThreadBumpPtrAllocatorTest.cpp @@ -0,0 +1,53 @@ +//===- PerThreadBumpPtrAllocatorTest.cpp ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/PerThreadBumpPtrAllocator.h" +#include "llvm/Support/Parallel.h" +#include "gtest/gtest.h" +#include + +using namespace llvm; +using namespace parallel; + +namespace { + +TEST(PerThreadBumpPtrAllocatorTest, Simple) { + PerThreadBumpPtrAllocator Allocator; + + uint64_t *Var = + (uint64_t *)Allocator.Allocate(sizeof(uint64_t), alignof(uint64_t)); + + *Var = 0xFE; + EXPECT_EQ(0xFEul, *Var); + EXPECT_EQ(sizeof(uint64_t), Allocator.getBytesAllocated()); + EXPECT_TRUE(Allocator.getBytesAllocated() <= Allocator.getTotalMemory()); + + PerThreadBumpPtrAllocator Allocator2(std::move(Allocator)); + + EXPECT_EQ(sizeof(uint64_t), Allocator2.getBytesAllocated()); + EXPECT_TRUE(Allocator2.getBytesAllocated() <= Allocator2.getTotalMemory()); + + EXPECT_EQ(0xFEul, *Var); +} + +TEST(PerThreadBumpPtrAllocatorTest, ParallelAllocation) { + PerThreadBumpPtrAllocator Allocator; + + static size_t constexpr NumAllocations = 5000; + + parallelFor(0, NumAllocations, [&](size_t Idx) { + uint64_t *ptr = + (uint64_t *)Allocator.Allocate(sizeof(uint64_t), alignof(uint64_t)); + *ptr = Idx; + }); + + EXPECT_EQ(sizeof(uint64_t) * NumAllocations, Allocator.getBytesAllocated()); + EXPECT_EQ(Allocator.getNumberOfAllocators(), parallel::getMaxThreadsNum()); +} + +} // anonymous namespace