Index: compiler-rt/lib/xray/CMakeLists.txt =================================================================== --- compiler-rt/lib/xray/CMakeLists.txt +++ compiler-rt/lib/xray/CMakeLists.txt @@ -2,33 +2,57 @@ # XRay runtime library implementation files. set(XRAY_SOURCES - xray_init.cc - xray_flags.cc - xray_interface.cc - xray_log_interface.cc - xray_utils.cc) + xray_init.cc + xray_flags.cc + xray_interface.cc + xray_log_interface.cc + xray_utils.cc) # Implementation files for all XRay modes. set(XRAY_FDR_MODE_SOURCES - xray_buffer_queue.cc - xray_fdr_logging.cc) + xray_buffer_queue.cc + xray_fdr_logging.cc) set(XRAY_BASIC_MODE_SOURCES - xray_inmemory_log.cc) + xray_inmemory_log.cc) + # Implementation files for all XRay architectures. -set(aarch64_SOURCES xray_AArch64.cc xray_trampoline_AArch64.S) -set(arm_SOURCES xray_arm.cc xray_trampoline_arm.S) -set(armhf_SOURCES ${arm_SOURCES}) -set(mips_SOURCES xray_mips.cc xray_trampoline_mips.S) -set(mipsel_SOURCES xray_mips.cc xray_trampoline_mips.S) -set(mips64_SOURCES xray_mips64.cc xray_trampoline_mips64.S) -set(mips64el_SOURCES xray_mips64.cc xray_trampoline_mips64.S) +set(x86_64_SOURCES + xray_x86_64.cc + xray_trampoline_x86_64.S) + +set(arm_SOURCES + xray_arm.cc + xray_trampoline_arm.S) + +set(armhf_SOURCES + ${arm_SOURCES}) + +set(aarch64_SOURCES + xray_AArch64.cc + xray_trampoline_AArch64.S) + +set(mips_SOURCES + xray_mips.cc + xray_trampoline_mips.S) + +set(mipsel_SOURCES + xray_mips.cc + xray_trampoline_mips.S) + +set(mips64_SOURCES + xray_mips64.cc + xray_trampoline_mips64.S) + +set(mips64el_SOURCES + xray_mips64.cc + xray_trampoline_mips64.S) + set(powerpc64le_SOURCES xray_powerpc64.cc xray_trampoline_powerpc64.cc xray_trampoline_powerpc64_asm.S) -set(x86_64_SOURCES xray_x86_64.cc xray_trampoline_x86_64.S) # Now put it all together... include_directories(..) Index: compiler-rt/lib/xray/tests/unit/CMakeLists.txt =================================================================== --- compiler-rt/lib/xray/tests/unit/CMakeLists.txt +++ compiler-rt/lib/xray/tests/unit/CMakeLists.txt @@ -2,3 +2,7 @@ buffer_queue_test.cc xray_unit_test_main.cc) add_xray_unittest(XRayFDRLoggingTest SOURCES fdr_logging_test.cc xray_unit_test_main.cc) +add_xray_unittest(XRayAllocatorTest SOURCES + allocator_test.cc xray_unit_test_main.cc) +add_xray_unittest(XRaySegmentedArrayTest SOURCES + segmented_array_test.cc xray_unit_test_main.cc) Index: compiler-rt/lib/xray/tests/unit/allocator_test.cc =================================================================== --- /dev/null +++ compiler-rt/lib/xray/tests/unit/allocator_test.cc @@ -0,0 +1,42 @@ +//===-- allocator_test.cc -------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a function call tracing system. +// +//===----------------------------------------------------------------------===// + +#include "xray_allocator.h" +#include "gtest/gtest.h" + +namespace __xray { +namespace { + +struct TestData { + s64 First; + s64 Second; +}; + +TEST(AllocatorTest, Construction) { Allocator A(2 << 11, 0); } + +TEST(AllocatorTest, Allocate) { + Allocator A(2 << 11, 0); + auto B = A.Allocate(); + ASSERT_NE(B.Data, nullptr); +} + +TEST(AllocatorTest, OverAllocate) { + Allocator A(sizeof(TestData), 0); + auto B1 = A.Allocate(); + (void)B1; + auto B2 = A.Allocate(); + ASSERT_EQ(B2.Data, nullptr); +} + +} // namespace +} // namespace __xray Index: compiler-rt/lib/xray/tests/unit/segmented_array_test.cc =================================================================== --- /dev/null +++ compiler-rt/lib/xray/tests/unit/segmented_array_test.cc @@ -0,0 +1,139 @@ +#include "xray_segmented_array.h" +#include "gtest/gtest.h" + +namespace __xray { +namespace { + +struct TestData { + s64 First; + s64 Second; + + // Need a constructor for emplace operations. + TestData(s64 F, s64 S) : First(F), Second(S) {} +}; + +TEST(SegmentedArrayTest, Construction) { + Array Data; + (void)Data; +} + +TEST(SegmentedArrayTest, ConstructWithAllocator) { + using AllocatorType = typename Array::AllocatorType; + AllocatorType A(1 << 4, 0); + Array Data(A); + (void)Data; +} + +TEST(SegmentedArrayTest, ConstructAndPopulate) { + Array data; + ASSERT_NE(data.Append(TestData{0, 0}), nullptr); + ASSERT_NE(data.Append(TestData{1, 1}), nullptr); + ASSERT_EQ(data.size(), 2u); +} + +TEST(SegmentedArrayTest, ConstructPopulateAndLookup) { + Array data; + ASSERT_NE(data.Append(TestData{0, 1}), nullptr); + ASSERT_EQ(data.size(), 1u); + ASSERT_EQ(data[0].First, 0); + ASSERT_EQ(data[0].Second, 1); +} + +TEST(SegmentedArrayTest, PopulateWithMoreElements) { + Array data; + static const auto kMaxElements = 100u; + for (auto I = 0u; I < kMaxElements; ++I) { + ASSERT_NE(data.Append(TestData{I, I + 1}), nullptr); + } + ASSERT_EQ(data.size(), kMaxElements); + for (auto I = 0u; I < kMaxElements; ++I) { + ASSERT_EQ(data[I].First, I); + ASSERT_EQ(data[I].Second, I + 1); + } +} + +TEST(SegmentedArrayTest, AppendEmplace) { + Array data; + ASSERT_NE(data.AppendEmplace(1, 1), nullptr); + ASSERT_EQ(data[0].First, 1); + ASSERT_EQ(data[0].Second, 1); +} + +TEST(SegmentedArrayTest, AppendAndTrim) { + Array data; + ASSERT_NE(data.AppendEmplace(1, 1), nullptr); + ASSERT_EQ(data.size(), 1u); + data.trim(1); + ASSERT_EQ(data.size(), 0u); + ASSERT_TRUE(data.empty()); +} + +TEST(SegmentedArrayTest, IteratorAdvance) { + Array data; + ASSERT_TRUE(data.empty()); + ASSERT_EQ(data.begin(), data.end()); + auto I0 = data.begin(); + ASSERT_EQ(I0++, data.begin()); + ASSERT_NE(I0, data.begin()); + for (const auto &D : data) { + (void)D; + FAIL(); + } + ASSERT_NE(data.AppendEmplace(1, 1), nullptr); + ASSERT_EQ(data.size(), 1u); + ASSERT_NE(data.begin(), data.end()); + auto &D0 = *data.begin(); + ASSERT_EQ(D0.First, 1); + ASSERT_EQ(D0.Second, 1); +} + +TEST(SegmentedArrayTest, IteratorRetreat) { + Array data; + ASSERT_TRUE(data.empty()); + ASSERT_EQ(data.begin(), data.end()); + ASSERT_NE(data.AppendEmplace(1, 1), nullptr); + ASSERT_EQ(data.size(), 1u); + ASSERT_NE(data.begin(), data.end()); + auto &D0 = *data.begin(); + ASSERT_EQ(D0.First, 1); + ASSERT_EQ(D0.Second, 1); + + auto I0 = data.end(); + ASSERT_EQ(I0--, data.end()); + ASSERT_NE(I0, data.end()); + ASSERT_EQ(I0, data.begin()); + ASSERT_EQ(I0->First, 1); + ASSERT_EQ(I0->Second, 1); +} + +TEST(SegmentedArrayTest, IteratorTrimBehaviour) { + Array data; + ASSERT_TRUE(data.empty()); + auto I0Begin = data.begin(), I0End = data.end(); + // Add enough elements in data to have more than one chunk. + constexpr auto ChunkX2 = Array::ChunkSize * 2; + for (auto i = ChunkX2; i > 0u; --i) { + data.Append({static_cast(i), static_cast(i)}); + } + ASSERT_EQ(data.size(), ChunkX2); + // Trim one chunk's elements worth. + data.trim(ChunkX2 / 2); + ASSERT_EQ(data.size(), ChunkX2 / 2); + // Then trim until it's empty. + data.trim(ChunkX2 / 2); + ASSERT_TRUE(data.empty()); + + // Here our iterators should be the same. + auto I1Begin = data.begin(), I1End = data.end(); + EXPECT_EQ(I0Begin, I1Begin); + EXPECT_EQ(I0End, I1End); + + // Then we ensure that adding elements back works just fine. + for (auto i = ChunkX2; i > 0u; --i) { + data.Append({static_cast(i), static_cast(i)}); + } + EXPECT_EQ(data.size(), ChunkX2); +} + +} // namespace +} // namespace __xray Index: compiler-rt/lib/xray/xray_allocator.h =================================================================== --- /dev/null +++ compiler-rt/lib/xray/xray_allocator.h @@ -0,0 +1,149 @@ +//===-- xray_allocator.h ---------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Defines the allocator interface for an arena allocator, used primarily for +// the profiling runtime. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_ALLOCATOR_H +#define XRAY_ALLOCATOR_H + +#include "sanitizer_common/sanitizer_allocator_internal.h" +#include "sanitizer_common/sanitizer_common.h" +#include "sanitizer_common/sanitizer_mutex.h" +#include +#include + +#include "sanitizer_common/sanitizer_internal_defs.h" + +namespace __xray { + +/// The Allocator type hands out fixed-sized chunks of memory that are +/// cache-line aligned and sized. This is useful for placement of +/// performance-sensitive data in memory that's frequently accessed. The +/// allocator also self-limits the peak memory usage to a dynamically defined +/// maximum. +/// +/// N is the lower-bound size of the block of memory to return from the +/// allocation function. N is used to compute the size of a block, which is +/// cache-line-size multiples worth of memory. We compute the size of a block by +/// determining how many cache lines worth of memory is required to subsume N. +template struct Allocator { + // The Allocator returns memory as Block instances. + struct Block { + /// Compute the minimum cache-line size multiple that is >= N. + static constexpr auto Size = + kCacheLineSize * ((N / kCacheLineSize) + (N % kCacheLineSize ? 1 : 0)); + void *Data = nullptr; + }; + +private: + // A BlockLink will contain a fixed number of blocks, each with an identifier + // to specify whether it's been handed out or not. We keep track of BlockLink + // iterators, which are basically a pointer to the link and an offset into + // the fixed set of blocks associated with a link. The iterators are + // bidirectional. + // + // We're calling it a "link" in the context of seeing these as a chain of + // block pointer containers (i.e. links in a chain). + struct BlockLink { + static_assert(kCacheLineSize % sizeof(void *) == 0, + "Cache line size is not divisible by size of void*; none of " + "the assumptions of the BlockLink will hold."); + + // We compute the number of pointers to areas in memory where we consider as + // individual blocks we've allocated. To ensure that instances of the + // BlockLink object are cache-line sized, we deduct one additional + // pointers worth representing the pointer to the previous link. + // + // This structure corresponds to the following layout: + // + // Blocks [ 0, 1, 2, .., BlockPtrCount - 1] + // + static constexpr auto BlockPtrCount = + (kCacheLineSize / sizeof(Block *)) - 1; + + // FIXME: Align this to cache-line address boundaries? + Block Blocks[BlockPtrCount]{}; + BlockLink *Prev = nullptr; + }; + + static_assert(sizeof(BlockLink) == kCacheLineSize, + "BlockLink instances must be cache-line-sized."); + + static BlockLink NullLink; + + // FIXME: Implement a freelist, in case we actually do intend to return memory + // to the allocator, as opposed to just de-allocating everything in one go? + + size_t MaxMemory; + SpinMutex Mutex{}; + BlockLink *Tail = &NullLink; + size_t Counter = 0; + + BlockLink *NewChainLink() { + auto NewChain = reinterpret_cast( + InternalAlloc(sizeof(BlockLink), nullptr, kCacheLineSize)); + auto BackingStore = reinterpret_cast(InternalAlloc( + BlockLink::BlockPtrCount * Block::Size, nullptr, kCacheLineSize)); + size_t Offset = 0; + DCHECK_NE(NewChain, nullptr); + DCHECK_NE(BackingStore, nullptr); + for (auto &B : NewChain->Blocks) { + B.Data = BackingStore + Offset; + Offset += Block::Size; + } + NewChain->Prev = Tail; + return NewChain; + } + +public: + Allocator(size_t M, size_t PreAllocate) : MaxMemory(M) { + // FIXME: Implement PreAllocate support! + } + + Block Allocate() { + SpinMutexLock Lock(&Mutex); + // Check whether we're over quota. + if (Counter * Block::Size >= MaxMemory) + return {}; + + size_t ChainOffset = Counter % BlockLink::BlockPtrCount; + + Block B{}; + BlockLink *Link = Tail; + if (UNLIKELY(Counter == 0 || ChainOffset == 0)) + Tail = Link = NewChainLink(); + + B = Link->Blocks[ChainOffset]; + ++Counter; + return B; + } + + ~Allocator() NOEXCEPT { + // We need to deallocate all the blocks, including the chain links. + for (auto *C = Tail; C != &NullLink;) { + // We know that the data block is a large contiguous page, we deallocate + // that at once. + InternalFree(C->Blocks[0].Data); + auto Prev = C->Prev; + InternalFree(C); + C = Prev; + } + } +}; // namespace __xray + +// Storage for the NullLink sentinel. +template typename Allocator::BlockLink Allocator::NullLink; + +} // namespace __xray + +#endif // XRAY_ALLOCATOR_H Index: compiler-rt/lib/xray/xray_segmented_array.h =================================================================== --- /dev/null +++ compiler-rt/lib/xray/xray_segmented_array.h @@ -0,0 +1,337 @@ +//===-- xray_segmented_array.h ---------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Defines the implementation of a segmented array, with fixed-size chunks +// backing the segments. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_SEGMENTED_ARRAY_H +#define XRAY_SEGMENTED_ARRAY_H + +#include "sanitizer_common/sanitizer_allocator.h" +#include "xray_allocator.h" +#include +#include + +namespace __xray { + +namespace { + +constexpr size_t gcd(size_t a, size_t b) { + return (b == 0) ? a : gcd(b, a % b); +} + +constexpr size_t lcm(size_t a, size_t b) { return a * b / gcd(a, b); } + +} // namespace + +/// The Array type provides an interface similar to std::vector<...> but does +/// not shrink in size. Once constructed, elements can be appended but cannot be +/// removed. The implementation is heavily dependent on the contract provided by +/// the Allocator type, in that all memory will be released when the Allocator +/// is destroyed. When an Array is destroyed, it will destroy elements in the +/// backing store but will not free the memory. The parameter N defines how many +/// elements of T there should be in a single block. +/// +/// We compute the least common multiple of the size of T and the cache line +/// size, to allow us to maximise the number of T objects we can place in +/// cache-line multiple sized blocks. To get back the number of T's, we divide +/// this least common multiple by the size of T. +template +struct Array { + static constexpr size_t ChunkSize = N; + static constexpr size_t AllocatorChunkSize = sizeof(T) * ChunkSize; + using AllocatorType = Allocator; + static_assert(std::is_trivially_destructible::value, + "T must be trivially destructible."); + +private: + // TODO: Consider co-locating the chunk information with the data in the + // Block, as in an intrusive list -- i.e. putting the next and previous + // pointer values inside the Block storage. + struct Chunk { + typename AllocatorType::Block Block; + static constexpr size_t Size = N; + Chunk *Prev = nullptr; + Chunk *Next = nullptr; + }; + + static Chunk SentinelChunk; + + AllocatorType *Allocator; + Chunk *Head = &SentinelChunk; + Chunk *Tail = &SentinelChunk; + size_t Size = 0; + size_t FreeElements = 0; + + // Here we keep track of chunks in the freelist, to allow us to re-use chunks + // when elements are trimmed off the end. + Chunk *Freelist = &SentinelChunk; + + Chunk *NewChunk() { + // We need to handle the case in which enough elements have been trimmed to + // allow us to re-use chunks we've allocated before. For this we look into + // the Freelist, to see whether we need to actually allocate new blocks or + // just re-use blocks we've already seen before. + if (Freelist != &SentinelChunk) { + auto *FreeChunk = Freelist; + Freelist = FreeChunk->Next; + FreeChunk->Next = &SentinelChunk; + return FreeChunk; + } + + auto Block = Allocator->Allocate(); + if (Block.Data == nullptr) + return nullptr; + // TODO: Maybe use a separate managed allocator for Chunk instances? + auto C = reinterpret_cast(InternalAlloc(sizeof(Chunk))); + if (C == nullptr) + return nullptr; + C->Block = Block; + return C; + } + + static AllocatorType &GetGlobalAllocator() { + static AllocatorType *const GlobalAllocator = [] { + AllocatorType *A = reinterpret_cast( + InternalAlloc(sizeof(AllocatorType))); + new (A) AllocatorType(2 << 10, 0); + return A; + }(); + + return *GlobalAllocator; + } + + Chunk *InitHeadAndTail() { + DCHECK_EQ(Head, &SentinelChunk); + DCHECK_EQ(Tail, &SentinelChunk); + auto Chunk = NewChunk(); + if (Chunk == nullptr) + return nullptr; + Chunk->Prev = &SentinelChunk; + Chunk->Next = &SentinelChunk; + Head = Chunk; + Tail = Chunk; + return Chunk; + } + + Chunk *AppendNewChunk() { + auto Chunk = NewChunk(); + if (Chunk == nullptr) + return nullptr; + Tail->Next = Chunk; + Chunk->Prev = Tail; + Chunk->Next = &SentinelChunk; + Tail = Chunk; + return Chunk; + } + + // This Iterator models a BidirectionalIterator. + template class Iterator { + Chunk *C = nullptr; + size_t Offset = 0; + + public: + Iterator(Chunk *IC, size_t Off) : C(IC), Offset(Off) {} + + Iterator &operator++() { + DCHECK_NE(C, &SentinelChunk); + if (++Offset % N) + return *this; + + // At this point, we know that Offset % N == 0, so we must advance the + // chunk pointer. + DCHECK_EQ(Offset % N, 0); + C = C->Next; + return *this; + } + + Iterator &operator--() { + DCHECK_NE(C, &SentinelChunk); + DCHECK_GT(Offset, 0); + + // We check whether the offset was on a boundary before decrement, to see + // whether we need to retreat to the previous chunk. + if ((Offset-- % N) == 0) + C = C->Prev; + return *this; + } + + Iterator operator++(int) { + Iterator Copy(*this); + ++(*this); + return Copy; + } + + Iterator operator--(int) { + Iterator Copy(*this); + --(*this); + return Copy; + } + + template + friend bool operator==(const Iterator &L, const Iterator &R) { + return L.C == R.C && L.Offset == R.Offset; + } + + template + friend bool operator!=(const Iterator &L, const Iterator &R) { + return !(L == R); + } + + U &operator*() const { + DCHECK_NE(C, &SentinelChunk); + return reinterpret_cast(C->Block.Data)[Offset % N]; + } + + U *operator->() const { + DCHECK_NE(C, &SentinelChunk); + return reinterpret_cast(C->Block.Data) + (Offset % N); + } + }; + +public: + explicit Array(AllocatorType &A) : Allocator(&A) {} + Array() : Array(GetGlobalAllocator()) {} + + Array(const Array &) = delete; + Array(Array &&O) NOEXCEPT : Allocator(O.Allocator), + Head(O.Head), + Tail(O.Tail), + Size(O.Size) { + O.Head = &SentinelChunk; + O.Tail = &SentinelChunk; + O.Size = 0; + } + + bool empty() const { return Size == 0; } + + AllocatorType &allocator() const { + DCHECK_NE(Allocator, nullptr); + return *Allocator; + } + + size_t size() const { return Size; } + + T *Append(const T &E) { + if (UNLIKELY(Head == &SentinelChunk)) + if (InitHeadAndTail() == nullptr) + return nullptr; + + auto Offset = Size % N; + if (UNLIKELY(Size != 0 && Offset == 0)) + if (AppendNewChunk() == nullptr) + return nullptr; + + auto Position = reinterpret_cast(Tail->Block.Data) + Offset; + *Position = E; + ++Size; + FreeElements -= FreeElements ? 1 : 0; + return Position; + } + + template T *AppendEmplace(Args &&... args) { + if (UNLIKELY(Head == &SentinelChunk)) + if (InitHeadAndTail() == nullptr) + return nullptr; + + auto Offset = Size % N; + if (UNLIKELY(Size != 0 && Offset == 0)) + if (AppendNewChunk() == nullptr) + return nullptr; + + auto Position = reinterpret_cast(Tail->Block.Data) + Offset; + // In-place construct at Position. + new (Position) T(std::forward(args)...); + ++Size; + FreeElements -= FreeElements ? 1 : 0; + return Position; + } + + T &operator[](size_t Offset) const { + DCHECK_LE(Offset, Size); + // We need to traverse the array enough times to find the element at Offset. + auto C = Head; + while (Offset >= N) { + C = C->Next; + Offset -= N; + DCHECK_NE(C, &SentinelChunk); + } + auto Position = reinterpret_cast(C->Block.Data) + Offset; + return *Position; + } + + T &front() const { + DCHECK_NE(Head, &SentinelChunk); + DCHECK_NE(Size, 0u); + return *reinterpret_cast(Head->Block.Data); + } + + T &back() const { + DCHECK_NE(Tail, &SentinelChunk); + auto Offset = (Size - 1) % N; + return *(reinterpret_cast(Tail->Block.Data) + Offset); + } + + template T *find_element(Predicate P) const { + if (empty()) + return nullptr; + + auto E = end(); + for (auto I = begin(); I != E; ++I) + if (P(*I)) + return &(*I); + + return nullptr; + } + + /// Remove N Elements from the end. This leaves the blocks behind, and not + /// require allocation of new blocks for new elements added after trimming. + void trim(size_t Elements) { + DCHECK_LE(Elements, Size); + Size -= Elements; + FreeElements += Elements; + + // Here we need to check whether we've cleared enough elements to warrant + // putting blocks on to the freelist. We determine whether we need to + // right-size the internal list, by keeping track of the number of "free" + // elements still in the array. + auto ChunksToTrim = FreeElements / N; + for (size_t i = 0; i < ChunksToTrim; ++i, FreeElements -= N) { + // Put the tail into the Freelist. + auto *FreeChunk = Tail; + Tail = Tail->Prev; + if (Tail == &SentinelChunk) + Head = Tail; + else + Tail->Next = &SentinelChunk; + FreeChunk->Next = Freelist; + FreeChunk->Prev = Freelist->Prev; + Freelist = FreeChunk; + } + } + + // Provide iterators. + Iterator begin() const { return Iterator(Head, 0); } + Iterator end() const { return Iterator(Tail, Size); } + Iterator cbegin() const { return Iterator(Head, 0); } + Iterator cend() const { return Iterator(Tail, Size); } +}; + +// We need to have this storage definition out-of-line so that the compiler can +// ensure that storage for the SentinelChunk is defined and has a single +// address. +template +typename Array::Chunk Array::SentinelChunk; + +} // namespace __xray + +#endif // XRAY_SEGMENTED_ARRAY_H