diff --git a/llvm/include/llvm/DWARFLinkerParallel/StringPool.h b/llvm/include/llvm/DWARFLinkerParallel/StringPool.h --- a/llvm/include/llvm/DWARFLinkerParallel/StringPool.h +++ b/llvm/include/llvm/DWARFLinkerParallel/StringPool.h @@ -12,6 +12,7 @@ #include "llvm/ADT/ConcurrentHashtable.h" #include "llvm/CodeGen/DwarfStringPoolEntry.h" #include "llvm/Support/Allocator.h" +#include "llvm/Support/PerThreadBumpPtrAllocator.h" #include #include @@ -22,28 +23,6 @@ /// and a string body which is placed right after StringEntry. using StringEntry = StringMapEntry; -class StringAllocator : public AllocatorBase { -public: - inline LLVM_ATTRIBUTE_RETURNS_NONNULL void *Allocate(size_t Size, - size_t Alignment) { -#if LLVM_ENABLE_THREADS - std::lock_guard Guard(AllocatorMutex); -#endif - - return Allocator.Allocate(Size, Align(Alignment)); - } - - // Pull in base class overloads. - using AllocatorBase::Allocate; - -private: -#if LLVM_ENABLE_THREADS - std::mutex AllocatorMutex; -#endif - - BumpPtrAllocator Allocator; -}; - class StringPoolEntryInfo { public: /// \returns Hash value for the specified \p Key. @@ -62,28 +41,31 @@ } /// \returns newly created object of KeyDataTy type. - static inline StringEntry *create(const StringRef &Key, - StringAllocator &Allocator) { + static inline StringEntry * + create(const StringRef &Key, parallel::PerThreadBumpPtrAllocator &Allocator) { return StringEntry::create(Key, Allocator); } }; class StringPool - : public ConcurrentHashTableByPtr { public: StringPool() - : ConcurrentHashTableByPtr(Allocator) {} StringPool(size_t InitialSize) - : ConcurrentHashTableByPtr(Allocator, InitialSize) {} - StringAllocator &getAllocatorRef() { return Allocator; } + parallel::PerThreadBumpPtrAllocator &getAllocatorRef() { return Allocator; } private: - StringAllocator Allocator; + parallel::PerThreadBumpPtrAllocator Allocator; }; } // end of namespace dwarflinker_parallel diff --git a/llvm/include/llvm/Support/Parallel.h b/llvm/include/llvm/Support/Parallel.h --- a/llvm/include/llvm/Support/Parallel.h +++ b/llvm/include/llvm/Support/Parallel.h @@ -40,8 +40,11 @@ inline unsigned getThreadIndex() { return threadIndex; } #endif + +size_t getNumThreads(); #else inline unsigned getThreadIndex() { return 0; } +inline size_t getNumThreads() { return 1; } #endif namespace detail { diff --git a/llvm/include/llvm/Support/PerThreadBumpPtrAllocator.h b/llvm/include/llvm/Support/PerThreadBumpPtrAllocator.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Support/PerThreadBumpPtrAllocator.h @@ -0,0 +1,120 @@ +//===- PerThreadBumpPtrAllocator.h ------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_PERTHREADBUMPPTRALLOCATOR_H +#define LLVM_SUPPORT_PERTHREADBUMPPTRALLOCATOR_H + +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Parallel.h" + +namespace llvm { +namespace parallel { + +/// PerThreadAllocator is used in conjunction with ThreadPoolExecutor to allow +/// per-thread allocations. It wraps a possibly thread-unsafe allocator, +/// e.g. BumpPtrAllocator. PerThreadAllocator must be used with only main thread +/// or threads created by ThreadPoolExecutor, as it utilizes getThreadIndex, +/// which is set by ThreadPoolExecutor. To work properly, ThreadPoolExecutor +/// should be initialized before PerThreadAllocator is created. +/// TODO: The same approach might be implemented for ThreadPool. + +template +class PerThreadAllocator + : public AllocatorBase> { +public: + PerThreadAllocator() + : NumOfAllocators(parallel::getNumThreads()), + Allocators(std::make_unique(NumOfAllocators)) {} + + /// \defgroup Methods which could be called asynchronously: + /// + /// @{ + + using AllocatorBase>::Allocate; + + using AllocatorBase>::Deallocate; + + /// Allocate \a Size bytes of \a Alignment aligned memory. + void *Allocate(size_t Size, size_t Alignment) { + assert(getThreadIndex() < NumOfAllocators); + return Allocators[getThreadIndex()].Allocate(Size, Alignment); + } + + /// Deallocate \a Ptr to \a Size bytes of memory allocated by this + /// allocator. + void Deallocate(const void *Ptr, size_t Size, size_t Alignment) { + assert(getThreadIndex() < NumOfAllocators); + return Allocators[getThreadIndex()].Deallocate(Ptr, Size, Alignment); + } + + /// Return allocator corresponding to the current thread. + AllocatorTy &getThreadLocalAllocator() { + assert(getThreadIndex() < NumOfAllocators); + return Allocators[getThreadIndex()]; + } + + // Return number of used allocators. + size_t getNumberOfAllocators() const { return NumOfAllocators; } + /// @} + + /// \defgroup Methods which could not be called asynchronously: + /// + /// @{ + + /// Reset state of allocators. + void Reset() { + for (size_t Idx = 0; Idx < getNumberOfAllocators(); Idx++) + Allocators[Idx].Reset(); + } + + /// Return total memory size used by all allocators. + size_t getTotalMemory() const { + size_t TotalMemory = 0; + + for (size_t Idx = 0; Idx < getNumberOfAllocators(); Idx++) + TotalMemory += Allocators[Idx].getTotalMemory(); + + return TotalMemory; + } + + /// Return allocated size by all allocators. + size_t getBytesAllocated() const { + size_t BytesAllocated = 0; + + for (size_t Idx = 0; Idx < getNumberOfAllocators(); Idx++) + BytesAllocated += Allocators[Idx].getBytesAllocated(); + + return BytesAllocated; + } + + /// Set red zone for all allocators. + void setRedZoneSize(size_t NewSize) { + for (size_t Idx = 0; Idx < getNumberOfAllocators(); Idx++) + Allocators[Idx].setRedZoneSize(NewSize); + } + + /// Print statistic for each allocator. + void PrintStats() const { + for (size_t Idx = 0; Idx < getNumberOfAllocators(); Idx++) { + errs() << "\n Allocator " << Idx << "\n"; + Allocators[Idx].PrintStats(); + } + } + /// @} + +protected: + size_t NumOfAllocators; + std::unique_ptr Allocators; +}; + +using PerThreadBumpPtrAllocator = PerThreadAllocator; + +} // end namespace parallel +} // end namespace llvm + +#endif // LLVM_SUPPORT_PERTHREADBUMPPTRALLOCATOR_H diff --git a/llvm/lib/Support/Parallel.cpp b/llvm/lib/Support/Parallel.cpp --- a/llvm/lib/Support/Parallel.cpp +++ b/llvm/lib/Support/Parallel.cpp @@ -40,6 +40,7 @@ public: virtual ~Executor() = default; virtual void add(std::function func) = 0; + virtual size_t getThreadsNum() const = 0; static Executor *getDefaultExecutor(); }; @@ -49,7 +50,7 @@ class ThreadPoolExecutor : public Executor { public: explicit ThreadPoolExecutor(ThreadPoolStrategy S = hardware_concurrency()) { - unsigned ThreadCount = S.compute_thread_count(); + ThreadCount = S.compute_thread_count(); // Spawn all but one of the threads in another thread as spawning threads // can take a while. Threads.reserve(ThreadCount); @@ -58,7 +59,7 @@ // Use operator[] before creating the thread to avoid data race in .size() // in “safe libc++” mode. auto &Thread0 = Threads[0]; - Thread0 = std::thread([this, ThreadCount, S] { + Thread0 = std::thread([this, S] { for (unsigned I = 1; I < ThreadCount; ++I) { Threads.emplace_back([=] { work(S, I); }); if (Stop) @@ -105,6 +106,8 @@ Cond.notify_one(); } + size_t getThreadsNum() const override { return ThreadCount; } + private: void work(ThreadPoolStrategy S, unsigned ThreadID) { threadIndex = ThreadID; @@ -127,6 +130,7 @@ std::condition_variable Cond; std::promise ThreadsCreated; std::vector Threads; + unsigned ThreadCount; }; Executor *Executor::getDefaultExecutor() { @@ -156,6 +160,10 @@ } } // namespace } // namespace detail + +size_t getNumThreads() { + return detail::Executor::getDefaultExecutor()->getThreadsNum(); +} #endif static std::atomic TaskGroupInstances; diff --git a/llvm/unittests/ADT/ConcurrentHashtableTest.cpp b/llvm/unittests/ADT/ConcurrentHashtableTest.cpp --- a/llvm/unittests/ADT/ConcurrentHashtableTest.cpp +++ b/llvm/unittests/ADT/ConcurrentHashtableTest.cpp @@ -10,11 +10,13 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/Parallel.h" +#include "llvm/Support/PerThreadBumpPtrAllocator.h" #include "gtest/gtest.h" #include #include #include using namespace llvm; +using namespace parallel; namespace { class String { @@ -36,38 +38,11 @@ std::array ExtraData; }; -class SimpleAllocator : public AllocatorBase { -public: - inline LLVM_ATTRIBUTE_RETURNS_NONNULL void *Allocate(size_t Size, - size_t Alignment) { -#if LLVM_ENABLE_THREADS - std::lock_guard Guard(AllocatorMutex); -#endif - - return Allocator.Allocate(Size, Align(Alignment)); - } - inline size_t getBytesAllocated() { -#if LLVM_ENABLE_THREADS - std::lock_guard Guard(AllocatorMutex); -#endif - - return Allocator.getBytesAllocated(); - } - - // Pull in base class overloads. - using AllocatorBase::Allocate; - -protected: -#if LLVM_ENABLE_THREADS - std::mutex AllocatorMutex; -#endif - BumpPtrAllocator Allocator; -} Allocator; - TEST(ConcurrentHashTableTest, AddStringEntries) { - ConcurrentHashTableByPtr< - std::string, String, SimpleAllocator, - ConcurrentHashTableInfoByPtr> + PerThreadBumpPtrAllocator Allocator; + ConcurrentHashTableByPtr> HashTable(Allocator, 10); size_t AllocatedBytesAtStart = Allocator.getBytesAllocated(); @@ -113,20 +88,24 @@ } TEST(ConcurrentHashTableTest, AddStringMultiplueEntries) { + PerThreadBumpPtrAllocator Allocator; const size_t NumElements = 10000; - ConcurrentHashTableByPtr< - std::string, String, SimpleAllocator, - ConcurrentHashTableInfoByPtr> + ConcurrentHashTableByPtr> HashTable(Allocator); // Check insertion. for (size_t I = 0; I < NumElements; I++) { - size_t AllocatedBytesAtStart = Allocator.getBytesAllocated(); + BumpPtrAllocator &ThreadLocalAllocator = + Allocator.getThreadLocalAllocator(); + size_t AllocatedBytesAtStart = ThreadLocalAllocator.getBytesAllocated(); std::string StringForElement = formatv("{0}", I); std::pair Entry = HashTable.insert(StringForElement); EXPECT_TRUE(Entry.second); EXPECT_TRUE(Entry.first->getKey() == StringForElement); - EXPECT_TRUE(Allocator.getBytesAllocated() > AllocatedBytesAtStart); + EXPECT_TRUE(ThreadLocalAllocator.getBytesAllocated() > + AllocatedBytesAtStart); } std::string StatisticString; @@ -140,13 +119,16 @@ // Check insertion of duplicates. for (size_t I = 0; I < NumElements; I++) { - size_t AllocatedBytesAtStart = Allocator.getBytesAllocated(); + BumpPtrAllocator &ThreadLocalAllocator = + Allocator.getThreadLocalAllocator(); + size_t AllocatedBytesAtStart = ThreadLocalAllocator.getBytesAllocated(); std::string StringForElement = formatv("{0}", I); std::pair Entry = HashTable.insert(StringForElement); EXPECT_FALSE(Entry.second); EXPECT_TRUE(Entry.first->getKey() == StringForElement); // Check no additional bytes were allocated for duplicate. - EXPECT_TRUE(Allocator.getBytesAllocated() == AllocatedBytesAtStart); + EXPECT_TRUE(ThreadLocalAllocator.getBytesAllocated() == + AllocatedBytesAtStart); } // Check statistic. @@ -157,21 +139,25 @@ } TEST(ConcurrentHashTableTest, AddStringMultiplueEntriesWithResize) { + PerThreadBumpPtrAllocator Allocator; // Number of elements exceeds original size, thus hashtable should be resized. const size_t NumElements = 20000; - ConcurrentHashTableByPtr< - std::string, String, SimpleAllocator, - ConcurrentHashTableInfoByPtr> + ConcurrentHashTableByPtr> HashTable(Allocator, 100); // Check insertion. for (size_t I = 0; I < NumElements; I++) { - size_t AllocatedBytesAtStart = Allocator.getBytesAllocated(); + BumpPtrAllocator &ThreadLocalAllocator = + Allocator.getThreadLocalAllocator(); + size_t AllocatedBytesAtStart = ThreadLocalAllocator.getBytesAllocated(); std::string StringForElement = formatv("{0} {1}", I, I + 100); std::pair Entry = HashTable.insert(StringForElement); EXPECT_TRUE(Entry.second); EXPECT_TRUE(Entry.first->getKey() == StringForElement); - EXPECT_TRUE(Allocator.getBytesAllocated() > AllocatedBytesAtStart); + EXPECT_TRUE(ThreadLocalAllocator.getBytesAllocated() > + AllocatedBytesAtStart); } std::string StatisticString; @@ -185,13 +171,16 @@ // Check insertion of duplicates. for (size_t I = 0; I < NumElements; I++) { - size_t AllocatedBytesAtStart = Allocator.getBytesAllocated(); + BumpPtrAllocator &ThreadLocalAllocator = + Allocator.getThreadLocalAllocator(); + size_t AllocatedBytesAtStart = ThreadLocalAllocator.getBytesAllocated(); std::string StringForElement = formatv("{0} {1}", I, I + 100); std::pair Entry = HashTable.insert(StringForElement); EXPECT_FALSE(Entry.second); EXPECT_TRUE(Entry.first->getKey() == StringForElement); // Check no additional bytes were allocated for duplicate. - EXPECT_TRUE(Allocator.getBytesAllocated() == AllocatedBytesAtStart); + EXPECT_TRUE(ThreadLocalAllocator.getBytesAllocated() == + AllocatedBytesAtStart); } // Check statistic. @@ -202,20 +191,24 @@ } TEST(ConcurrentHashTableTest, AddStringEntriesParallel) { + PerThreadBumpPtrAllocator Allocator; const size_t NumElements = 10000; - ConcurrentHashTableByPtr< - std::string, String, SimpleAllocator, - ConcurrentHashTableInfoByPtr> + ConcurrentHashTableByPtr> HashTable(Allocator); // Check parallel insertion. parallelFor(0, NumElements, [&](size_t I) { - size_t AllocatedBytesAtStart = Allocator.getBytesAllocated(); + BumpPtrAllocator &ThreadLocalAllocator = + Allocator.getThreadLocalAllocator(); + size_t AllocatedBytesAtStart = ThreadLocalAllocator.getBytesAllocated(); std::string StringForElement = formatv("{0}", I); std::pair Entry = HashTable.insert(StringForElement); EXPECT_TRUE(Entry.second); EXPECT_TRUE(Entry.first->getKey() == StringForElement); - EXPECT_TRUE(Allocator.getBytesAllocated() > AllocatedBytesAtStart); + EXPECT_TRUE(ThreadLocalAllocator.getBytesAllocated() > + AllocatedBytesAtStart); }); std::string StatisticString; @@ -229,13 +222,16 @@ // Check parallel insertion of duplicates. parallelFor(0, NumElements, [&](size_t I) { - size_t AllocatedBytesAtStart = Allocator.getBytesAllocated(); + BumpPtrAllocator &ThreadLocalAllocator = + Allocator.getThreadLocalAllocator(); + size_t AllocatedBytesAtStart = ThreadLocalAllocator.getBytesAllocated(); std::string StringForElement = formatv("{0}", I); std::pair Entry = HashTable.insert(StringForElement); EXPECT_FALSE(Entry.second); EXPECT_TRUE(Entry.first->getKey() == StringForElement); // Check no additional bytes were allocated for duplicate. - EXPECT_TRUE(Allocator.getBytesAllocated() == AllocatedBytesAtStart); + EXPECT_TRUE(ThreadLocalAllocator.getBytesAllocated() == + AllocatedBytesAtStart); }); // Check statistic. @@ -246,20 +242,24 @@ } TEST(ConcurrentHashTableTest, AddStringEntriesParallelWithResize) { + PerThreadBumpPtrAllocator Allocator; const size_t NumElements = 20000; - ConcurrentHashTableByPtr< - std::string, String, SimpleAllocator, - ConcurrentHashTableInfoByPtr> + ConcurrentHashTableByPtr> HashTable(Allocator, 100); // Check parallel insertion. parallelFor(0, NumElements, [&](size_t I) { - size_t AllocatedBytesAtStart = Allocator.getBytesAllocated(); + BumpPtrAllocator &ThreadLocalAllocator = + Allocator.getThreadLocalAllocator(); + size_t AllocatedBytesAtStart = ThreadLocalAllocator.getBytesAllocated(); std::string StringForElement = formatv("{0}", I); std::pair Entry = HashTable.insert(StringForElement); EXPECT_TRUE(Entry.second); EXPECT_TRUE(Entry.first->getKey() == StringForElement); - EXPECT_TRUE(Allocator.getBytesAllocated() > AllocatedBytesAtStart); + EXPECT_TRUE(ThreadLocalAllocator.getBytesAllocated() > + AllocatedBytesAtStart); }); std::string StatisticString; @@ -273,13 +273,16 @@ // Check parallel insertion of duplicates. parallelFor(0, NumElements, [&](size_t I) { - size_t AllocatedBytesAtStart = Allocator.getBytesAllocated(); + BumpPtrAllocator &ThreadLocalAllocator = + Allocator.getThreadLocalAllocator(); + size_t AllocatedBytesAtStart = ThreadLocalAllocator.getBytesAllocated(); std::string StringForElement = formatv("{0}", I); std::pair Entry = HashTable.insert(StringForElement); EXPECT_FALSE(Entry.second); EXPECT_TRUE(Entry.first->getKey() == StringForElement); // Check no additional bytes were allocated for duplicate. - EXPECT_TRUE(Allocator.getBytesAllocated() == AllocatedBytesAtStart); + EXPECT_TRUE(ThreadLocalAllocator.getBytesAllocated() == + AllocatedBytesAtStart); }); // Check statistic. diff --git a/llvm/unittests/Support/CMakeLists.txt b/llvm/unittests/Support/CMakeLists.txt --- a/llvm/unittests/Support/CMakeLists.txt +++ b/llvm/unittests/Support/CMakeLists.txt @@ -66,6 +66,7 @@ OptimizedStructLayoutTest.cpp ParallelTest.cpp Path.cpp + PerThreadBumpPtrAllocatorTest.cpp ProcessTest.cpp ProgramTest.cpp RegexTest.cpp diff --git a/llvm/unittests/Support/PerThreadBumpPtrAllocatorTest.cpp b/llvm/unittests/Support/PerThreadBumpPtrAllocatorTest.cpp new file mode 100644 --- /dev/null +++ b/llvm/unittests/Support/PerThreadBumpPtrAllocatorTest.cpp @@ -0,0 +1,52 @@ +//===- PerThreadBumpPtrAllocatorTest.cpp ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/PerThreadBumpPtrAllocator.h" +#include "llvm/Support/Parallel.h" +#include "gtest/gtest.h" +#include + +using namespace llvm; +using namespace parallel; + +namespace { + +TEST(PerThreadBumpPtrAllocatorTest, Simple) { + PerThreadBumpPtrAllocator Allocator; + + uint64_t *Var = + (uint64_t *)Allocator.Allocate(sizeof(uint64_t), alignof(uint64_t)); + *Var = 0xFE; + EXPECT_EQ(0xFEul, *Var); + EXPECT_EQ(sizeof(uint64_t), Allocator.getBytesAllocated()); + EXPECT_TRUE(Allocator.getBytesAllocated() <= Allocator.getTotalMemory()); + + PerThreadBumpPtrAllocator Allocator2(std::move(Allocator)); + + EXPECT_EQ(sizeof(uint64_t), Allocator2.getBytesAllocated()); + EXPECT_TRUE(Allocator2.getBytesAllocated() <= Allocator2.getTotalMemory()); + + EXPECT_EQ(0xFEul, *Var); +} + +TEST(PerThreadBumpPtrAllocatorTest, ParallelAllocation) { + PerThreadBumpPtrAllocator Allocator; + + static size_t constexpr NumAllocations = 1000; + + parallelFor(0, NumAllocations, [&](size_t Idx) { + uint64_t *ptr = + (uint64_t *)Allocator.Allocate(sizeof(uint64_t), alignof(uint64_t)); + *ptr = Idx; + }); + + EXPECT_EQ(sizeof(uint64_t) * NumAllocations, Allocator.getBytesAllocated()); + EXPECT_EQ(Allocator.getNumberOfAllocators(), parallel::getNumThreads()); +} + +} // anonymous namespace