Index: compiler-rt/lib/xray/CMakeLists.txt =================================================================== --- compiler-rt/lib/xray/CMakeLists.txt +++ compiler-rt/lib/xray/CMakeLists.txt @@ -2,33 +2,57 @@ # XRay runtime library implementation files. set(XRAY_SOURCES - xray_init.cc - xray_flags.cc - xray_interface.cc - xray_log_interface.cc - xray_utils.cc) + xray_init.cc + xray_flags.cc + xray_interface.cc + xray_log_interface.cc + xray_utils.cc) # Implementation files for all XRay modes. set(XRAY_FDR_MODE_SOURCES - xray_buffer_queue.cc - xray_fdr_logging.cc) + xray_buffer_queue.cc + xray_fdr_logging.cc) set(XRAY_BASIC_MODE_SOURCES - xray_inmemory_log.cc) + xray_inmemory_log.cc) + # Implementation files for all XRay architectures. -set(aarch64_SOURCES xray_AArch64.cc xray_trampoline_AArch64.S) -set(arm_SOURCES xray_arm.cc xray_trampoline_arm.S) -set(armhf_SOURCES ${arm_SOURCES}) -set(mips_SOURCES xray_mips.cc xray_trampoline_mips.S) -set(mipsel_SOURCES xray_mips.cc xray_trampoline_mips.S) -set(mips64_SOURCES xray_mips64.cc xray_trampoline_mips64.S) -set(mips64el_SOURCES xray_mips64.cc xray_trampoline_mips64.S) +set(x86_64_SOURCES + xray_x86_64.cc + xray_trampoline_x86_64.S) + +set(arm_SOURCES + xray_arm.cc + xray_trampoline_arm.S) + +set(armhf_SOURCES + ${arm_SOURCES}) + +set(aarch64_SOURCES + xray_AArch64.cc + xray_trampoline_AArch64.S) + +set(mips_SOURCES + xray_mips.cc + xray_trampoline_mips.S) + +set(mipsel_SOURCES + xray_mips.cc + xray_trampoline_mips.S) + +set(mips64_SOURCES + xray_mips64.cc + xray_trampoline_mips64.S) + +set(mips64el_SOURCES + xray_mips64.cc + xray_trampoline_mips64.S) + set(powerpc64le_SOURCES xray_powerpc64.cc xray_trampoline_powerpc64.cc xray_trampoline_powerpc64_asm.S) -set(x86_64_SOURCES xray_x86_64.cc xray_trampoline_x86_64.S) # Now put it all together... include_directories(..) Index: compiler-rt/lib/xray/tests/unit/CMakeLists.txt =================================================================== --- compiler-rt/lib/xray/tests/unit/CMakeLists.txt +++ compiler-rt/lib/xray/tests/unit/CMakeLists.txt @@ -2,3 +2,7 @@ buffer_queue_test.cc xray_unit_test_main.cc) add_xray_unittest(XRayFDRLoggingTest SOURCES fdr_logging_test.cc xray_unit_test_main.cc) +add_xray_unittest(XRayAllocatorTest SOURCES + allocator_test.cc xray_unit_test_main.cc) +add_xray_unittest(XRaySegmentedArrayTest SOURCES + segmented_array_test.cc xray_unit_test_main.cc) Index: compiler-rt/lib/xray/tests/unit/allocator_test.cc =================================================================== --- /dev/null +++ compiler-rt/lib/xray/tests/unit/allocator_test.cc @@ -0,0 +1,42 @@ +//===-- allocator_test.cc -------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a function call tracing system. +// +//===----------------------------------------------------------------------===// + +#include "xray_allocator.h" +#include "gtest/gtest.h" + +namespace __xray { +namespace { + +struct TestData { + s64 First; + s64 Second; +}; + +TEST(AllocatorTest, Construction) { Allocator A(2 << 11, 0); } + +TEST(AllocatorTest, Allocate) { + Allocator A(2 << 11, 0); + auto B = A.Allocate(); + ASSERT_NE(B.Data, nullptr); +} + +TEST(AllocatorTest, OverAllocate) { + Allocator A(sizeof(TestData), 0); + auto B1 = A.Allocate(); + (void)B1; + auto B2 = A.Allocate(); + ASSERT_EQ(B2.Data, nullptr); +} + +} // namespace +} // namespace __xray Index: compiler-rt/lib/xray/tests/unit/segmented_array_test.cc =================================================================== --- /dev/null +++ compiler-rt/lib/xray/tests/unit/segmented_array_test.cc @@ -0,0 +1,91 @@ +#include "xray_segmented_array.h" +#include "gtest/gtest.h" + +namespace __xray { +namespace { + +struct TestData { + s64 First; + s64 Second; + + // Need a constructor for emplace operations. + TestData(s64 F, s64 S) : First(F), Second(S) {} +}; + +TEST(SegmentedArrayTest, Construction) { + Array Data; + (void)Data; +} + +TEST(SegmentedArrayTest, ConstructWithAllocator) { + using AllocatorType = typename Array::AllocatorType; + AllocatorType A(1 << 4, 0); + Array Data(A); + (void)Data; +} + +TEST(SegmentedArrayTest, ConstructAndPopulate) { + Array data; + ASSERT_NE(data.Append(TestData{0, 0}), nullptr); + ASSERT_NE(data.Append(TestData{1, 1}), nullptr); + ASSERT_EQ(data.size(), 2u); +} + +TEST(SegmentedArrayTest, ConstructPopulateAndLookup) { + Array data; + ASSERT_NE(data.Append(TestData{0, 1}), nullptr); + ASSERT_EQ(data.size(), 1u); + ASSERT_EQ(data[0].First, 0); + ASSERT_EQ(data[0].Second, 1); +} + +TEST(SegmentedArrayTest, PopulateWithMoreElements) { + Array data; + static const auto kMaxElements = 100u; + for (auto I = 0u; I < kMaxElements; ++I) { + ASSERT_NE(data.Append(TestData{I, I + 1}), nullptr); + } + ASSERT_EQ(data.size(), kMaxElements); + for (auto I = 0u; I < kMaxElements; ++I) { + ASSERT_EQ(data[I].First, I); + ASSERT_EQ(data[I].Second, I + 1); + } +} + +TEST(SegmentedArrayTest, AppendEmplace) { + Array data; + ASSERT_NE(data.AppendEmplace(1, 1), nullptr); + ASSERT_EQ(data[0].First, 1); + ASSERT_EQ(data[0].Second, 1); +} + +TEST(SegmentedArrayTest, AppendAndTrim) { + Array data; + ASSERT_NE(data.AppendEmplace(1, 1), nullptr); + ASSERT_EQ(data.size(), 1u); + data.trim(1); + ASSERT_EQ(data.size(), 0u); + ASSERT_TRUE(data.empty()); +} + +TEST(SegmentedArrayTest, Iterators) { + Array data; + ASSERT_TRUE(data.empty()); + ASSERT_EQ(data.begin(), data.end()); + auto I0 = data.begin(); + ASSERT_EQ(I0++, data.begin()); + ASSERT_NE(I0, data.begin()); + for (const auto &D : data) { + (void)D; + FAIL(); + } + ASSERT_NE(data.AppendEmplace(1, 1), nullptr); + ASSERT_EQ(data.size(), 1u); + ASSERT_NE(data.begin(), data.end()); + auto &D0 = *data.begin(); + ASSERT_EQ(D0.First, 1); + ASSERT_EQ(D0.Second, 1); +} + +} // namespace +} // namespace __xray Index: compiler-rt/lib/xray/xray_allocator.h =================================================================== --- /dev/null +++ compiler-rt/lib/xray/xray_allocator.h @@ -0,0 +1,163 @@ +//===-- xray_allocator.h ---------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Defines the allocator interface for an arena allocator, used primarily for +// the profiling runtime. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_ALLOCATOR_H +#define XRAY_ALLOCATOR_H + +#include "sanitizer_common/sanitizer_allocator_internal.h" +#include "sanitizer_common/sanitizer_common.h" +#include "sanitizer_common/sanitizer_mutex.h" +#include +#include + +#include "sanitizer_common/sanitizer_internal_defs.h" + +namespace __xray { + +/// The Allocator type hands out fixed-sized chunks of memory that are +/// cache-line aligned and sized. This is useful for placement of +/// performance-sensitive data in memory that's frequently accessed. The +/// allocator also self-limits the peak memory usage to a dynamically defined +/// maximum. +/// +/// N is the lower-bound size of the block of memory to return from the +/// allocation function. N is used to compute the size of a block, which is +/// cache-line-size multiples worth of memory. We compute the size of a block by +/// determining how many cache lines worth of memory is required to subsume N. +template struct Allocator { + // The Allocator returns memory as Block instances. + struct Block { + /// Compute the minimum cache-line size multiple that is >= N. + static constexpr auto Size = + kCacheLineSize * ((N / kCacheLineSize) + (N % kCacheLineSize ? 1 : 0)); + void *Data = nullptr; + }; + +private: + // A BlockLink will contain a fixed number of blocks, each with an identifier + // to specify whether it's been handed out or not. We keep track of BlockLink + // iterators, which are basically a pointer to the link and an offset into + // the fixed set of blocks associated with a link. The iterators are + // bidirectional. + // + // We're calling it a "link" in the context of seeing these as a chain of + // block pointer containers (i.e. links in a chain). + struct BlockLink { + static_assert(kCacheLineSize % sizeof(void *) == 0, + "Cache line size is not divisible by size of void*; none of " + "the assumptions of the BlockLink will hold."); + + // We compute the number of pointers to areas in memory where we consider as + // individual blocks we've allocated. To ensure that instances of the + // BlockLink object are cache-line sized, we deduct three additional + // pointers worth representing the pointer to the previous link. + // + // This structure corresponds to the following layout: + // + // Blocks [ 0, 1, 2, .., Size - 1] + // + // Using a 16-bit bitmap will allow us to cover a really big cache line that + // might store up to 16 pointers. + static constexpr auto BlockPtrCount = + (kCacheLineSize / sizeof(Block *)) - 1; + + // FIXME: Align this to cache-line address boundaries? + Block Blocks[BlockPtrCount]{}; + BlockLink *Prev = nullptr; + }; + + static_assert(sizeof(BlockLink) == kCacheLineSize, + "BlockLink instances must be cache-line-sized."); + + static BlockLink NullLink; + + // FIXME: Implement a freelist, in case we actually do intend to return memory + // to the allocator, as opposed to just de-allocating everything in one go? + + size_t MaxMemory; + SpinMutex Mutex{}; + BlockLink *Tail = &NullLink; + size_t Counter = 0; + + BlockLink *NewChainLink() { + auto NewChain = reinterpret_cast( + InternalAlloc(sizeof(BlockLink), nullptr, kCacheLineSize)); + auto BackingStore = reinterpret_cast(InternalAlloc( + BlockLink::BlockPtrCount * Block::Size, nullptr, kCacheLineSize)); + size_t Offset = 0; + DCHECK_NE(NewChain, nullptr); + DCHECK_NE(BackingStore, nullptr); + for (auto &B : NewChain->Blocks) { + B.Data = BackingStore + Offset; + Offset += Block::Size; + } + NewChain->Prev = Tail; + return NewChain; + } + +public: + Allocator(size_t M, size_t PreAllocate) : MaxMemory(M) { + // FIXME: Implement PreAllocate support! + } + + Block Allocate() { + SpinMutexLock Lock(&Mutex); + // Check whether we're over quota. + if (Counter * Block::Size >= MaxMemory) + return {}; + + size_t ChainOffset = Counter % BlockLink::BlockPtrCount; + if (UNLIKELY(Tail == &NullLink)) { + // FIXME: Maybe pre-allocate some BlockLink instances in the same page + // as the Block objects. + auto *NewLink = NewChainLink(); + + // Prepare to return the first block. + auto B = NewLink->Blocks[ChainOffset]; + Tail = NewLink; + ++Counter; + return B; + } + + // At this point we know that we've wrapped around, and the current chain no + // longer has free blocks. We then allocate a new chain link and make that + // the current tail. + DCHECK_EQ(ChainOffset, 0); + auto *NewLink = NewChainLink(); + auto B = NewLink->Blocks[ChainOffset]; + Tail = NewLink; + ++Counter; + return B; + } + + ~Allocator() NOEXCEPT { + // We need to deallocate all the blocks, including the chain links. + for (auto *C = Tail; C != &NullLink;) { + // We know that the data block is a large contiguous page, we deallocate + // that at once. + InternalFree(C->Blocks[0].Data); + auto Prev = C->Prev; + InternalFree(C); + C = Prev; + } + } +}; + +// Storage for the NullLink sentinel. +template typename Allocator::BlockLink Allocator::NullLink; + +} // namespace __xray + +#endif // XRAY_ALLOCATOR_H Index: compiler-rt/lib/xray/xray_segmented_array.h =================================================================== --- /dev/null +++ compiler-rt/lib/xray/xray_segmented_array.h @@ -0,0 +1,292 @@ +//===-- xray_segmented_array.h ---------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Defines the implementation of a segmented array, with fixed-size chunks +// backing the segments. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_SEGMENTED_ARRAY_H +#define XRAY_SEGMENTED_ARRAY_H + +#include "sanitizer_common/sanitizer_allocator.h" +#include "xray_allocator.h" +#include +#include + +namespace __xray { + +/// The Array type provides an interface similar to std::vector<...> but does +/// not shrink in size. Once constructed, elements can be appended but cannot be +/// removed. The implementation is heavily dependent on the contract provided by +/// the Allocator type, in that all memory will be released when the Allocator +/// is destroyed. When an Array is destroyed, it will destroy elements in the +/// backing store but will not free the memory. The parameter N defines how many +/// elements of T there should be in a single block. By default we compute this +/// number from packing as many T's as possible in a single cache line. +template +struct Array { + static constexpr size_t AllocatorChunkSize = sizeof(T) * N; + using AllocatorType = Allocator; + static_assert(std::is_trivially_destructible::value, + "T must be trivially destructible."); + +private: + struct Chunk { + typename AllocatorType::Block Block; + static constexpr size_t Size = N; + Chunk *Prev = nullptr; + Chunk *Next = nullptr; + }; + + static Chunk SentinelChunk; + + AllocatorType *Allocator; + Chunk *Head = &SentinelChunk; + Chunk *Tail = &SentinelChunk; + size_t Size = 0; + + Chunk *NewChunk() { + auto Block = Allocator->Allocate(); + if (Block.Data == nullptr) + return nullptr; + // TODO: Maybe use a separate managed allocator for Chunk instances? + auto C = reinterpret_cast(InternalAlloc(sizeof(Chunk))); + if (C == nullptr) + return nullptr; + C->Block = Block; + return C; + } + + static AllocatorType &GetGlobalAllocator() { + static AllocatorType *const GlobalAllocator = [] { + AllocatorType *A = reinterpret_cast( + InternalAlloc(sizeof(AllocatorType))); + new (A) AllocatorType(2 << 10, 0); + return A; + }(); + + return *GlobalAllocator; + } + + Chunk *InitHeadAndTail() { + DCHECK_EQ(Head, &SentinelChunk); + DCHECK_EQ(Tail, &SentinelChunk); + auto Chunk = NewChunk(); + if (Chunk == nullptr) + return nullptr; + Chunk->Prev = &SentinelChunk; + Chunk->Next = &SentinelChunk; + Head = Chunk; + Tail = Chunk; + return Chunk; + } + + Chunk *AppendNewChunk() { + auto Chunk = NewChunk(); + if (Chunk == nullptr) + return nullptr; + Tail->Next = Chunk; + Chunk->Prev = Tail; + Chunk->Next = &SentinelChunk; + Tail = Chunk; + return Chunk; + } + + class Iterator { + Chunk *C = nullptr; + size_t Offset = 0; + + public: + Iterator(Chunk *IC, size_t O) : C(IC), Offset(O) {} + + Iterator &operator++() { + DCHECK_NE(C, nullptr); + if (++Offset % N) + return *this; + + // At this point, we know that Offset % N == 0, so we must advance the + // chunk pointer. + DCHECK_EQ(Offset % N, 0); + C = C->Next; + return *this; + } + + Iterator &operator--() { + DCHECK_NE(C, nullptr); + if (--Offset % N) { + return *this; + } + + // At this point, we know that Offset % N == 0, so we must retreat the + // chunk pointer. + DCHECK_EQ(Offset % N, 0); + C = C->Prev; + return *this; + } + + Iterator operator++(int) { + Iterator Copy(*this); + if (++Offset % N == 0) + C = C->Next; + DCHECK_NE(Copy.Offset, this->Offset); + return Copy; + } + + Iterator operator--(int) { + Iterator Copy(*this); + if (--Offset % N == 0) + C = C->Prev; + DCHECK_NE(Copy.Offset, this->Offset); + return Copy; + } + + friend bool operator==(const Iterator &L, const Iterator &R) { + return L.C == R.C && L.Offset == R.Offset; + } + + friend bool operator!=(const Iterator &L, const Iterator &R) { + return !(L == R); + } + + const T &operator*() const { + DCHECK_NE(C, &SentinelChunk); + return reinterpret_cast(C->Block.Data)[Offset % N]; + } + + T &operator*() { + DCHECK_NE(C, &SentinelChunk); + return reinterpret_cast(C->Block.Data)[Offset % N]; + } + }; + +public: + explicit Array(AllocatorType &A) : Allocator(&A) {} + Array() : Array(GetGlobalAllocator()) {} + + Array(const Array &) = delete; + Array(Array &&O) NOEXCEPT : Allocator(O.Allocator), + Head(O.Head), + Tail(O.Tail), + Size(O.Size) { + O.Head = &SentinelChunk; + O.Tail = &SentinelChunk; + O.Size = 0; + } + + bool empty() const { return Size == 0; } + + AllocatorType &allocator() const { + DCHECK_NE(Allocator, nullptr); + return *Allocator; + } + + size_t size() const { return Size; } + + T *Append(const T &E) { + if (UNLIKELY(Head == &SentinelChunk)) + if (InitHeadAndTail() == nullptr) + return nullptr; + + auto Offset = Size % N; + if (UNLIKELY(Size != 0 && Offset == 0)) + if (AppendNewChunk() == nullptr) + return nullptr; + + auto Position = reinterpret_cast(Tail->Block.Data) + Offset; + *Position = E; + ++Size; + return Position; + } + + template T *AppendEmplace(Args &&... args) { + if (UNLIKELY(Head == &SentinelChunk)) + if (InitHeadAndTail() == nullptr) + return nullptr; + + auto Offset = Size % N; + if (UNLIKELY(Size != 0 && Offset == 0)) + if (AppendNewChunk() == nullptr) + return nullptr; + + auto Position = reinterpret_cast(Tail->Block.Data) + Offset; + // In-place construct at Position. + new (Position) T(std::forward(args)...); + ++Size; + return Position; + } + + T &operator[](size_t Offset) const { + DCHECK_LE(Offset, Size); + // We need to traverse the array enough times to find the element at Offset. + auto C = Head; + while (Offset >= N) { + C = C->Next; + Offset -= N; + DCHECK_NE(C, &SentinelChunk); + } + auto Position = reinterpret_cast(C->Block.Data) + Offset; + return *Position; + } + + T &front() const { + DCHECK_NE(Head, &SentinelChunk); + DCHECK_NE(Size, 0u); + return *reinterpret_cast(Head->Block.Data); + } + + T &back() const { + DCHECK_NE(Tail, &SentinelChunk); + auto Offset = (Size - 1) % N; + return *(reinterpret_cast(Tail->Block.Data) + Offset); + } + + template T *find_element(Predicate P) const { + if (empty()) + return nullptr; + + auto C = Head; + auto E = Size; + while (E != 0) { + DCHECK(C != &SentinelChunk); + auto LE = N < E ? N : E; + auto end = reinterpret_cast(C->Block.Data) + LE; + for (auto It = reinterpret_cast(C->Block.Data); It != end; ++It) + if (P(*It)) + return It; + C = C->Next; + E -= LE; + } + return nullptr; + } + + /// Remove N Elements from the end. + void trim(size_t Elements) { + // TODO: Figure out whether we need to actually destroy the objects that are + // going to be trimmed -- determine whether assuming that trivially + // destructible objects are OK to discard, from the XRay implementation. + DCHECK_LE(Elements, Size); + Size -= Elements; + } + + // Provide iterators. + Iterator begin() const { return Iterator(Head, 0); } + Iterator end() const { return Iterator(Tail, Size); } + Iterator cbegin() const { return Iterator(Head, 0); } + Iterator cend() const { return Iterator(Tail, Size); } +}; + +template +typename Array::Chunk Array::SentinelChunk; + +} // namespace __xray + +#endif // XRAY_SEGMENTED_ARRAY_H