Index: lib/xray/tests/unit/segmented_array_test.cc =================================================================== --- lib/xray/tests/unit/segmented_array_test.cc +++ lib/xray/tests/unit/segmented_array_test.cc @@ -15,16 +15,14 @@ TEST(SegmentedArrayTest, ConstructWithAllocators) { using AllocatorType = typename Array::AllocatorType; AllocatorType A(1 << 4); - ChunkAllocator CA(1 << 4); - Array Data(A, CA); + Array Data(A); (void)Data; } TEST(SegmentedArrayTest, ConstructAndPopulate) { using AllocatorType = typename Array::AllocatorType; AllocatorType A(1 << 4); - ChunkAllocator CA(1 << 4); - Array data(A, CA); + Array data(A); ASSERT_NE(data.Append(TestData{0, 0}), nullptr); ASSERT_NE(data.Append(TestData{1, 1}), nullptr); ASSERT_EQ(data.size(), 2u); @@ -33,8 +31,7 @@ TEST(SegmentedArrayTest, ConstructPopulateAndLookup) { using AllocatorType = typename Array::AllocatorType; AllocatorType A(1 << 4); - ChunkAllocator CA(1 << 4); - Array data(A, CA); + Array data(A); ASSERT_NE(data.Append(TestData{0, 1}), nullptr); ASSERT_EQ(data.size(), 1u); ASSERT_EQ(data[0].First, 0); @@ -44,8 +41,7 @@ TEST(SegmentedArrayTest, PopulateWithMoreElements) { using AllocatorType = typename Array::AllocatorType; AllocatorType A(1 << 24); - ChunkAllocator CA(1 << 20); - Array data(A, CA); + Array data(A); static const auto kMaxElements = 100u; for (auto I = 0u; I < kMaxElements; ++I) { ASSERT_NE(data.Append(TestData{I, I + 1}), nullptr); @@ -60,8 +56,7 @@ TEST(SegmentedArrayTest, AppendEmplace) { using AllocatorType = typename Array::AllocatorType; AllocatorType A(1 << 4); - ChunkAllocator CA(1 << 4); - Array data(A, CA); + Array data(A); ASSERT_NE(data.AppendEmplace(1, 1), nullptr); ASSERT_EQ(data[0].First, 1); ASSERT_EQ(data[0].Second, 1); @@ -70,8 +65,7 @@ TEST(SegmentedArrayTest, AppendAndTrim) { using AllocatorType = typename Array::AllocatorType; AllocatorType A(1 << 4); - ChunkAllocator CA(1 << 4); - Array data(A, CA); + Array data(A); ASSERT_NE(data.AppendEmplace(1, 1), nullptr); ASSERT_EQ(data.size(), 1u); data.trim(1); @@ -82,8 +76,7 @@ TEST(SegmentedArrayTest, IteratorAdvance) { using AllocatorType = typename Array::AllocatorType; AllocatorType A(1 << 4); - ChunkAllocator CA(1 << 4); - Array data(A, CA); + Array data(A); ASSERT_TRUE(data.empty()); ASSERT_EQ(data.begin(), data.end()); auto I0 = data.begin(); @@ -104,8 +97,7 @@ TEST(SegmentedArrayTest, IteratorRetreat) { using AllocatorType = typename Array::AllocatorType; AllocatorType A(1 << 4); - ChunkAllocator CA(1 << 4); - Array data(A, CA); + Array data(A); ASSERT_TRUE(data.empty()); ASSERT_EQ(data.begin(), data.end()); ASSERT_NE(data.AppendEmplace(1, 1), nullptr); @@ -126,17 +118,16 @@ TEST(SegmentedArrayTest, IteratorTrimBehaviour) { using AllocatorType = typename Array::AllocatorType; AllocatorType A(1 << 20); - ChunkAllocator CA(1 << 10); - Array Data(A, CA); + Array Data(A); ASSERT_TRUE(Data.empty()); auto I0Begin = Data.begin(), I0End = Data.end(); // Add enough elements in Data to have more than one chunk. - constexpr auto Chunk = Array::ChunkSize; - constexpr auto ChunkX2 = Chunk * 2; - for (auto i = ChunkX2; i > 0u; --i) { + constexpr auto Segment = Array::SegmentSize; + constexpr auto SegmentX2 = Segment * 2; + for (auto i = SegmentX2; i > 0u; --i) { Data.AppendEmplace(static_cast(i), static_cast(i)); } - ASSERT_EQ(Data.size(), ChunkX2); + ASSERT_EQ(Data.size(), SegmentX2); { auto &Back = Data.back(); ASSERT_EQ(Back.First, 1); @@ -144,18 +135,18 @@ } // Trim one chunk's elements worth. - Data.trim(Chunk); - ASSERT_EQ(Data.size(), Chunk); + Data.trim(Segment); + ASSERT_EQ(Data.size(), Segment); // Check that we are still able to access 'back' properly. { auto &Back = Data.back(); - ASSERT_EQ(Back.First, static_cast(Chunk + 1)); - ASSERT_EQ(Back.Second, static_cast(Chunk + 1)); + ASSERT_EQ(Back.First, static_cast(Segment + 1)); + ASSERT_EQ(Back.Second, static_cast(Segment + 1)); } // Then trim until it's empty. - Data.trim(Chunk); + Data.trim(Segment); ASSERT_TRUE(Data.empty()); // Here our iterators should be the same. @@ -164,10 +155,10 @@ EXPECT_EQ(I0End, I1End); // Then we ensure that adding elements back works just fine. - for (auto i = ChunkX2; i > 0u; --i) { + for (auto i = SegmentX2; i > 0u; --i) { Data.AppendEmplace(static_cast(i), static_cast(i)); } - EXPECT_EQ(Data.size(), ChunkX2); + EXPECT_EQ(Data.size(), SegmentX2); } struct ShadowStackEntry { @@ -179,8 +170,7 @@ TEST(SegmentedArrayTest, SimulateStackBehaviour) { using AllocatorType = typename Array::AllocatorType; AllocatorType A(1 << 10); - ChunkAllocator CA(1 << 10); - Array Data(A, CA); + Array Data(A); static uint64_t Dummy = 0; constexpr uint64_t Max = 9; Index: lib/xray/xray_function_call_trie.h =================================================================== --- lib/xray/xray_function_call_trie.h +++ lib/xray/xray_function_call_trie.h @@ -119,9 +119,9 @@ // We add a constructor here to allow us to inplace-construct through // Array<...>'s AppendEmplace. - Node(Node *P, NodeIdPairAllocatorType &A, ChunkAllocator &CA, int64_t CC, - int64_t CLT, int32_t F) - : Parent(P), Callees(A, CA), CallCount(CC), CumulativeLocalTime(CLT), + Node(Node *P, NodeIdPairAllocatorType &A, int64_t CC, int64_t CLT, + int32_t F) + : Parent(P), Callees(A), CallCount(CC), CumulativeLocalTime(CLT), FId(F) {} // TODO: Include the compact histogram. @@ -153,7 +153,6 @@ RootAllocatorType *RootAllocator = nullptr; ShadowStackAllocatorType *ShadowStackAllocator = nullptr; NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr; - ChunkAllocator *ChunkAlloc = nullptr; Allocators() {} Allocators(const Allocators &) = delete; @@ -162,12 +161,11 @@ Allocators(Allocators &&O) : NodeAllocator(O.NodeAllocator), RootAllocator(O.RootAllocator), ShadowStackAllocator(O.ShadowStackAllocator), - NodeIdPairAllocator(O.NodeIdPairAllocator), ChunkAlloc(O.ChunkAlloc) { + NodeIdPairAllocator(O.NodeIdPairAllocator) { O.NodeAllocator = nullptr; O.RootAllocator = nullptr; O.ShadowStackAllocator = nullptr; O.NodeIdPairAllocator = nullptr; - O.ChunkAlloc = nullptr; } Allocators &operator=(Allocators &&O) { @@ -191,11 +189,6 @@ O.NodeIdPairAllocator = this->NodeIdPairAllocator; this->NodeIdPairAllocator = Tmp; } - { - auto Tmp = O.ChunkAlloc; - O.ChunkAlloc = this->ChunkAlloc; - this->ChunkAlloc = Tmp; - } return *this; } @@ -223,11 +216,6 @@ InternalFree(NodeIdPairAllocator); NodeIdPairAllocator = nullptr; } - if (ChunkAlloc != nullptr) { - ChunkAlloc->~ChunkAllocator(); - InternalFree(ChunkAlloc); - ChunkAlloc = nullptr; - } } }; @@ -258,11 +246,6 @@ InternalAlloc(sizeof(NodeIdPairAllocatorType))); new (NodeIdPairAllocator) NodeIdPairAllocatorType(Max); A.NodeIdPairAllocator = NodeIdPairAllocator; - - auto ChunkAlloc = reinterpret_cast( - InternalAlloc(sizeof(ChunkAllocator))); - new (ChunkAlloc) ChunkAllocator(Max); - A.ChunkAlloc = ChunkAlloc; return A; } @@ -271,22 +254,20 @@ RootArray Roots; ShadowStackArray ShadowStack; NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr; - ChunkAllocator *ChunkAlloc = nullptr; public: explicit FunctionCallTrie(const Allocators &A) - : Nodes(*A.NodeAllocator, *A.ChunkAlloc), - Roots(*A.RootAllocator, *A.ChunkAlloc), - ShadowStack(*A.ShadowStackAllocator, *A.ChunkAlloc), - NodeIdPairAllocator(A.NodeIdPairAllocator), ChunkAlloc(A.ChunkAlloc) {} + : Nodes(*A.NodeAllocator), Roots(*A.RootAllocator), + ShadowStack(*A.ShadowStackAllocator), + NodeIdPairAllocator(A.NodeIdPairAllocator) {} void enterFunction(const int32_t FId, uint64_t TSC) { DCHECK_NE(FId, 0); // This function primarily deals with ensuring that the ShadowStack is // consistent and ready for when an exit event is encountered. if (UNLIKELY(ShadowStack.empty())) { - auto NewRoot = Nodes.AppendEmplace(nullptr, *NodeIdPairAllocator, - *ChunkAlloc, 0, 0, FId); + auto NewRoot = + Nodes.AppendEmplace(nullptr, *NodeIdPairAllocator, 0, 0, FId); if (UNLIKELY(NewRoot == nullptr)) return; Roots.Append(NewRoot); @@ -309,8 +290,8 @@ } // This means we've never seen this stack before, create a new node here. - auto NewNode = Nodes.AppendEmplace(TopNode, *NodeIdPairAllocator, - *ChunkAlloc, 0, 0, FId); + auto NewNode = + Nodes.AppendEmplace(TopNode, *NodeIdPairAllocator, 0, 0, FId); if (UNLIKELY(NewNode == nullptr)) return; DCHECK_NE(NewNode, nullptr); @@ -370,13 +351,12 @@ typename Stack::AllocatorType StackAllocator( profilingFlags()->stack_allocator_max); - ChunkAllocator StackChunkAllocator(profilingFlags()->stack_allocator_max); - Stack DFSStack(StackAllocator, StackChunkAllocator); + Stack DFSStack(StackAllocator); for (const auto Root : getRoots()) { // Add a node in O for this root. auto NewRoot = O.Nodes.AppendEmplace( - nullptr, *O.NodeIdPairAllocator, *O.ChunkAlloc, Root->CallCount, + nullptr, *O.NodeIdPairAllocator, Root->CallCount, Root->CumulativeLocalTime, Root->FId); // Because we cannot allocate more memory we should bail out right away. @@ -395,9 +375,8 @@ DFSStack.trim(1); for (const auto Callee : NP.Node->Callees) { auto NewNode = O.Nodes.AppendEmplace( - NP.NewNode, *O.NodeIdPairAllocator, *O.ChunkAlloc, - Callee.NodePtr->CallCount, Callee.NodePtr->CumulativeLocalTime, - Callee.FId); + NP.NewNode, *O.NodeIdPairAllocator, Callee.NodePtr->CallCount, + Callee.NodePtr->CumulativeLocalTime, Callee.FId); if (UNLIKELY(NewNode == nullptr)) return; NP.NewNode->Callees.AppendEmplace(NewNode, Callee.FId); @@ -423,16 +402,15 @@ using Stack = Array; typename Stack::AllocatorType StackAllocator( profilingFlags()->stack_allocator_max); - ChunkAllocator CA(profilingFlags()->stack_allocator_max); - Stack DFSStack(StackAllocator, CA); + Stack DFSStack(StackAllocator); for (const auto Root : getRoots()) { Node *TargetRoot = nullptr; auto R = O.Roots.find_element( [&](const Node *Node) { return Node->FId == Root->FId; }); if (R == nullptr) { - TargetRoot = O.Nodes.AppendEmplace(nullptr, *O.NodeIdPairAllocator, - *O.ChunkAlloc, 0, 0, Root->FId); + TargetRoot = O.Nodes.AppendEmplace(nullptr, *O.NodeIdPairAllocator, 0, + 0, Root->FId); if (UNLIKELY(TargetRoot == nullptr)) return; @@ -456,9 +434,8 @@ return C.FId == Callee.FId; }); if (TargetCallee == nullptr) { - auto NewTargetNode = - O.Nodes.AppendEmplace(NT.TargetNode, *O.NodeIdPairAllocator, - *O.ChunkAlloc, 0, 0, Callee.FId); + auto NewTargetNode = O.Nodes.AppendEmplace( + NT.TargetNode, *O.NodeIdPairAllocator, 0, 0, Callee.FId); if (UNLIKELY(NewTargetNode == nullptr)) return; Index: lib/xray/xray_profile_collector.cc =================================================================== --- lib/xray/xray_profile_collector.cc +++ lib/xray/xray_profile_collector.cc @@ -118,12 +118,11 @@ const FunctionCallTrie::Node *Node = nullptr; // Constructor for in-place construction. - ProfileRecord(PathAllocator &A, ChunkAllocator &CA, - const FunctionCallTrie::Node *N) + ProfileRecord(PathAllocator &A, const FunctionCallTrie::Node *N) : Path([&] { auto P = reinterpret_cast(InternalAlloc(sizeof(PathArray))); - new (P) PathArray(A, CA); + new (P) PathArray(A); return P; }()), Node(N) {} @@ -137,18 +136,17 @@ // the path(s) and the data associated with the path. static void populateRecords(ProfileRecordArray &PRs, ProfileRecord::PathAllocator &PA, - ChunkAllocator &CA, const FunctionCallTrie &Trie) { + const FunctionCallTrie &Trie) { using StackArray = Array; using StackAllocator = typename StackArray::AllocatorType; StackAllocator StackAlloc(profilingFlags()->stack_allocator_max); - ChunkAllocator StackChunkAlloc(profilingFlags()->stack_allocator_max); - StackArray DFSStack(StackAlloc, StackChunkAlloc); + StackArray DFSStack(StackAlloc); for (const auto R : Trie.getRoots()) { DFSStack.Append(R); while (!DFSStack.empty()) { auto Node = DFSStack.back(); DFSStack.trim(1); - auto Record = PRs.AppendEmplace(PA, CA, Node); + auto Record = PRs.AppendEmplace(PA, Node); if (Record == nullptr) return; DCHECK_NE(Record, nullptr); @@ -214,10 +212,9 @@ for (u32 I = 0; I < ThreadTries->Size(); ++I) { using ProfileRecordAllocator = typename ProfileRecordArray::AllocatorType; ProfileRecordAllocator PRAlloc(profilingFlags()->global_allocator_max); - ChunkAllocator CA(profilingFlags()->global_allocator_max); ProfileRecord::PathAllocator PathAlloc( profilingFlags()->global_allocator_max); - ProfileRecordArray ProfileRecords(PRAlloc, CA); + ProfileRecordArray ProfileRecords(PRAlloc); // First, we want to compute the amount of space we're going to need. We'll // use a local allocator and an __xray::Array<...> to store the intermediary @@ -226,7 +223,7 @@ const auto &Trie = *(*ThreadTries)[I].Trie; if (Trie.getRoots().empty()) continue; - populateRecords(ProfileRecords, PathAlloc, CA, Trie); + populateRecords(ProfileRecords, PathAlloc, Trie); DCHECK(!Trie.getRoots().empty()); DCHECK(!ProfileRecords.empty()); Index: lib/xray/xray_profiling.cc =================================================================== --- lib/xray/xray_profiling.cc +++ lib/xray/xray_profiling.cc @@ -57,12 +57,13 @@ static pthread_key_t ProfilingKey; thread_local std::aligned_storage::type ThreadStorage{}; - static ProfilingData &getThreadLocalData() XRAY_NEVER_INSTRUMENT { - if (pthread_getspecific(ProfilingKey) == NULL) { + thread_local auto ThreadOnce = [] { new (&ThreadStorage) ProfilingData{}; pthread_setspecific(ProfilingKey, &ThreadStorage); - } + return false; + }(); + (void)ThreadOnce; auto &TLD = *reinterpret_cast(&ThreadStorage); Index: lib/xray/xray_segmented_array.h =================================================================== --- lib/xray/xray_segmented_array.h +++ lib/xray/xray_segmented_array.h @@ -9,7 +9,7 @@ // // This file is a part of XRay, a dynamic runtime instrumentation system. // -// Defines the implementation of a segmented array, with fixed-size chunks +// Defines the implementation of a segmented array, with fixed-size segments // backing the segments. // //===----------------------------------------------------------------------===// @@ -24,113 +24,128 @@ #include namespace __xray { -struct Chunk { - void *Data; - Chunk *Prev; - Chunk *Next; -}; - -using ChunkAllocator = Allocator; /// The Array type provides an interface similar to std::vector<...> but does /// not shrink in size. Once constructed, elements can be appended but cannot be /// removed. The implementation is heavily dependent on the contract provided by /// the Allocator type, in that all memory will be released when the Allocator /// is destroyed. When an Array is destroyed, it will destroy elements in the -/// backing store but will not free the memory. The parameter N defines how many -/// elements of T there should be in a single block. -/// -/// We compute the least common multiple of the size of T and the cache line -/// size, to allow us to maximise the number of T objects we can place in -/// cache-line multiple sized blocks. To get back the number of T's, we divide -/// this least common multiple by the size of T. -template -struct Array { - static constexpr size_t ChunkSize = N; - static constexpr size_t ElementStorageSize = next_pow2(sizeof(T)); - static constexpr size_t AllocatorChunkSize = ElementStorageSize * ChunkSize; - using AllocatorType = Allocator; - static_assert(std::is_trivially_destructible::value, - "T must be trivially destructible."); +/// backing store but will not free the memory. +template class Array { + struct SegmentBase { + SegmentBase *Prev; + SegmentBase *Next; + }; -private: - // TODO: Consider co-locating the chunk information with the data in the - // Block, as in an intrusive list -- i.e. putting the next and previous - // pointer values inside the Block storage. - static Chunk SentinelChunk; + // We want each segment of the array to be cache-line aligned, and elements of + // the array be offset from the beginning of the segment. + struct Segment : SegmentBase { + char Data[]; + }; + +public: + // Each segment of the array will be laid out with the following assumptions: + // + // - Each segment will be on a cache-line address boundary (kCacheLineSize + // aligned). + // + // - The elements will be accessed through an aligned pointer, dependent on + // the alignment of T. + // + // - Each element is at least two-pointers worth from the beginning of the + // Segment, aligned properly, and the rest of the elements are accessed + // through appropriate alignment. + // + // We then compute the size of the segment to follow this logic: + // + // - Compute the number of elements that can fit within + // kCacheLineSize-multiple segments, minus the size of two pointers. + // + // - Request cacheline-multiple sized elements from the allocator. + static constexpr size_t AlignedElementStorageSize = + sizeof(typename std::aligned_storage::type); + + static constexpr size_t SegmentSize = + nearest_boundary(sizeof(Segment) + next_pow2(sizeof(T)), kCacheLineSize); + + using AllocatorType = Allocator; + + static constexpr size_t ElementsPerSegment = + (SegmentSize - sizeof(Segment)) / next_pow2(sizeof(T)); + + static_assert(ElementsPerSegment > 0, + "Must have at least 1 element per segment."); + static SegmentBase SentinelSegment; + +private: AllocatorType *Alloc; - ChunkAllocator *ChunkAlloc; - Chunk *Head = &SentinelChunk; - Chunk *Tail = &SentinelChunk; + SegmentBase *Head = &SentinelSegment; + SegmentBase *Tail = &SentinelSegment; size_t Size = 0; - // Here we keep track of chunks in the freelist, to allow us to re-use chunks - // when elements are trimmed off the end. - Chunk *Freelist = &SentinelChunk; + // Here we keep track of segments in the freelist, to allow us to re-use + // segments when elements are trimmed off the end. + SegmentBase *Freelist = &SentinelSegment; - Chunk *NewChunk() { + Segment *NewSegment() { // We need to handle the case in which enough elements have been trimmed to - // allow us to re-use chunks we've allocated before. For this we look into + // allow us to re-use segments we've allocated before. For this we look into // the Freelist, to see whether we need to actually allocate new blocks or // just re-use blocks we've already seen before. - if (Freelist != &SentinelChunk) { - auto *FreeChunk = Freelist; - Freelist = FreeChunk->Next; - FreeChunk->Next = &SentinelChunk; - Freelist->Prev = &SentinelChunk; - return FreeChunk; + if (Freelist != &SentinelSegment) { + auto *FreeSegment = Freelist; + Freelist = FreeSegment->Next; + FreeSegment->Next = &SentinelSegment; + Freelist->Prev = &SentinelSegment; + return static_cast(FreeSegment); } - auto Block = Alloc->Allocate(); - if (Block.Data == nullptr) - return nullptr; - - auto ChunkElement = ChunkAlloc->Allocate(); - if (ChunkElement.Data == nullptr) + auto SegmentBlock = Alloc->Allocate(); + if (SegmentBlock.Data == nullptr) return nullptr; - // Placement-new the Chunk element at the appropriate location. - auto C = reinterpret_cast(ChunkElement.Data); - Chunk LocalC{Block.Data, &SentinelChunk, &SentinelChunk}; - internal_memcpy(C, &LocalC, sizeof(LocalC)); - return C; + // Placement-new the Segment element at the beginning of the SegmentBlock. + auto S = reinterpret_cast(SegmentBlock.Data); + new (S) SegmentBase{&SentinelSegment, &SentinelSegment}; + return S; } - Chunk *InitHeadAndTail() { - DCHECK_EQ(Head, &SentinelChunk); - DCHECK_EQ(Tail, &SentinelChunk); - auto Chunk = NewChunk(); - if (Chunk == nullptr) + Segment *InitHeadAndTail() { + DCHECK_EQ(Head, &SentinelSegment); + DCHECK_EQ(Tail, &SentinelSegment); + auto Segment = NewSegment(); + if (Segment == nullptr) return nullptr; - DCHECK_EQ(Chunk->Next, &SentinelChunk); - DCHECK_EQ(Chunk->Prev, &SentinelChunk); - return Head = Tail = Chunk; + DCHECK_EQ(Segment->Next, &SentinelSegment); + DCHECK_EQ(Segment->Prev, &SentinelSegment); + Head = Tail = static_cast(Segment); + return Segment; } - Chunk *AppendNewChunk() { - auto Chunk = NewChunk(); - if (Chunk == nullptr) + Segment *AppendNewSegment() { + auto S = NewSegment(); + if (S == nullptr) return nullptr; - DCHECK_NE(Tail, &SentinelChunk); - DCHECK_EQ(Tail->Next, &SentinelChunk); - DCHECK_EQ(Chunk->Prev, &SentinelChunk); - DCHECK_EQ(Chunk->Next, &SentinelChunk); - Tail->Next = Chunk; - Chunk->Prev = Tail; - Tail = Chunk; - return Tail; + DCHECK_NE(Tail, &SentinelSegment); + DCHECK_EQ(Tail->Next, &SentinelSegment); + DCHECK_EQ(S->Prev, &SentinelSegment); + DCHECK_EQ(S->Next, &SentinelSegment); + Tail->Next = S; + S->Prev = Tail; + Tail = S; + return static_cast(Tail); } // This Iterator models a BidirectionalIterator. template class Iterator { - Chunk *C = &SentinelChunk; + SegmentBase *S = &SentinelSegment; size_t Offset = 0; size_t Size = 0; public: - Iterator(Chunk *IC, size_t Off, size_t S) : C(IC), Offset(Off), Size(S) {} + Iterator(SegmentBase *IS, size_t Off, size_t S) + : S(IS), Offset(Off), Size(S) {} Iterator(const Iterator &) noexcept = default; Iterator() noexcept = default; Iterator(Iterator &&) noexcept = default; @@ -139,28 +154,28 @@ ~Iterator() = default; Iterator &operator++() { - if (++Offset % N || Offset == Size) + if (++Offset % ElementsPerSegment || Offset == Size) return *this; // At this point, we know that Offset % N == 0, so we must advance the - // chunk pointer. - DCHECK_EQ(Offset % N, 0); + // segment pointer. + DCHECK_EQ(Offset % ElementsPerSegment, 0); DCHECK_NE(Offset, Size); - DCHECK_NE(C, &SentinelChunk); - DCHECK_NE(C->Next, &SentinelChunk); - C = C->Next; - DCHECK_NE(C, &SentinelChunk); + DCHECK_NE(S, &SentinelSegment); + DCHECK_NE(S->Next, &SentinelSegment); + S = S->Next; + DCHECK_NE(S, &SentinelSegment); return *this; } Iterator &operator--() { - DCHECK_NE(C, &SentinelChunk); + DCHECK_NE(S, &SentinelSegment); DCHECK_GT(Offset, 0); auto PreviousOffset = Offset--; - if (PreviousOffset != Size && PreviousOffset % N == 0) { - DCHECK_NE(C->Prev, &SentinelChunk); - C = C->Prev; + if (PreviousOffset != Size && PreviousOffset % ElementsPerSegment == 0) { + DCHECK_NE(S->Prev, &SentinelSegment); + S = S->Prev; } return *this; @@ -180,7 +195,7 @@ template friend bool operator==(const Iterator &L, const Iterator &R) { - return L.C == R.C && L.Offset == R.Offset; + return L.S == R.S && L.Offset == R.Offset; } template @@ -189,31 +204,29 @@ } U &operator*() const { - DCHECK_NE(C, &SentinelChunk); - auto RelOff = Offset % N; - return *reinterpret_cast(reinterpret_cast(C->Data) + - (RelOff * ElementStorageSize)); - } + DCHECK_NE(S, &SentinelSegment); + auto RelOff = Offset % ElementsPerSegment; - U *operator->() const { - DCHECK_NE(C, &SentinelChunk); - auto RelOff = Offset % N; - return reinterpret_cast(reinterpret_cast(C->Data) + - (RelOff * ElementStorageSize)); + // We need to compute the character-aligned pointer, offset from the + // segment's Data location to get the element in the position of Offset. + auto Base = static_cast(S)->Data; + auto AlignedOffset = Base + (RelOff * AlignedElementStorageSize); + return *reinterpret_cast(AlignedOffset); } + + U *operator->() const { return &(**this); } }; public: - explicit Array(AllocatorType &A, ChunkAllocator &CA) - : Alloc(&A), ChunkAlloc(&CA) {} + explicit Array(AllocatorType &A) : Alloc(&A) {} Array(const Array &) = delete; Array(Array &&O) NOEXCEPT : Alloc(O.Alloc), Head(O.Head), Tail(O.Tail), Size(O.Size) { - O.Head = &SentinelChunk; - O.Tail = &SentinelChunk; + O.Head = &SentinelSegment; + O.Tail = &SentinelSegment; O.Size = 0; } @@ -227,39 +240,41 @@ size_t size() const { return Size; } T *Append(const T &E) { - if (UNLIKELY(Head == &SentinelChunk)) + if (UNLIKELY(Head == &SentinelSegment)) if (InitHeadAndTail() == nullptr) return nullptr; - auto Offset = Size % N; + auto Offset = Size % ElementsPerSegment; if (UNLIKELY(Size != 0 && Offset == 0)) - if (AppendNewChunk() == nullptr) + if (AppendNewSegment() == nullptr) return nullptr; - auto Position = reinterpret_cast(reinterpret_cast(Tail->Data) + - (Offset * ElementStorageSize)); + auto Base = static_cast(Tail)->Data; + auto AlignedOffset = Base + (Offset * AlignedElementStorageSize); + auto Position = reinterpret_cast(AlignedOffset); *Position = E; ++Size; return Position; } template T *AppendEmplace(Args &&... args) { - if (UNLIKELY(Head == &SentinelChunk)) + if (UNLIKELY(Head == &SentinelSegment)) if (InitHeadAndTail() == nullptr) return nullptr; - auto Offset = Size % N; - auto *LatestChunk = Tail; + auto Offset = Size % ElementsPerSegment; + auto *LatestSegment = Tail; if (UNLIKELY(Size != 0 && Offset == 0)) { - LatestChunk = AppendNewChunk(); - if (LatestChunk == nullptr) + LatestSegment = AppendNewSegment(); + if (LatestSegment == nullptr) return nullptr; } - DCHECK_NE(Tail, &SentinelChunk); - auto Position = reinterpret_cast(LatestChunk->Data) + - (Offset * ElementStorageSize); - DCHECK_EQ(reinterpret_cast(Position) % ElementStorageSize, 0); + DCHECK_NE(Tail, &SentinelSegment); + auto Base = static_cast(LatestSegment)->Data; + auto AlignedOffset = Base + (Offset * AlignedElementStorageSize); + auto Position = reinterpret_cast(AlignedOffset); + // In-place construct at Position. new (Position) T{std::forward(args)...}; ++Size; @@ -269,24 +284,26 @@ T &operator[](size_t Offset) const { DCHECK_LE(Offset, Size); // We need to traverse the array enough times to find the element at Offset. - auto C = Head; - while (Offset >= N) { - C = C->Next; - Offset -= N; - DCHECK_NE(C, &SentinelChunk); - } - return *reinterpret_cast(reinterpret_cast(C->Data) + - (Offset * ElementStorageSize)); + auto S = Head; + while (Offset >= ElementsPerSegment) { + S = S->Next; + Offset -= ElementsPerSegment; + DCHECK_NE(S, &SentinelSegment); + } + auto Base = static_cast(S)->Data; + auto AlignedOffset = Base + (Offset * AlignedElementStorageSize); + auto Position = reinterpret_cast(AlignedOffset); + return *reinterpret_cast(Position); } T &front() const { - DCHECK_NE(Head, &SentinelChunk); + DCHECK_NE(Head, &SentinelSegment); DCHECK_NE(Size, 0u); return *begin(); } T &back() const { - DCHECK_NE(Tail, &SentinelChunk); + DCHECK_NE(Tail, &SentinelSegment); DCHECK_NE(Size, 0u); auto It = end(); --It; @@ -313,28 +330,29 @@ auto OldSize = Size; Size -= Elements; - DCHECK_NE(Head, &SentinelChunk); - DCHECK_NE(Tail, &SentinelChunk); + DCHECK_NE(Head, &SentinelSegment); + DCHECK_NE(Tail, &SentinelSegment); - for (auto ChunksToTrim = - (nearest_boundary(OldSize, N) - nearest_boundary(Size, N)) / N; - ChunksToTrim > 0; --ChunksToTrim) { - DCHECK_NE(Head, &SentinelChunk); - DCHECK_NE(Tail, &SentinelChunk); + for (auto SegmentsToTrim = (nearest_boundary(OldSize, ElementsPerSegment) - + nearest_boundary(Size, ElementsPerSegment)) / + ElementsPerSegment; + SegmentsToTrim > 0; --SegmentsToTrim) { + DCHECK_NE(Head, &SentinelSegment); + DCHECK_NE(Tail, &SentinelSegment); // Put the tail into the Freelist. - auto *FreeChunk = Tail; + auto *FreeSegment = Tail; Tail = Tail->Prev; - if (Tail == &SentinelChunk) + if (Tail == &SentinelSegment) Head = Tail; else - Tail->Next = &SentinelChunk; + Tail->Next = &SentinelSegment; - DCHECK_EQ(Tail->Next, &SentinelChunk); - FreeChunk->Next = Freelist; - FreeChunk->Prev = &SentinelChunk; - if (Freelist != &SentinelChunk) - Freelist->Prev = FreeChunk; - Freelist = FreeChunk; + DCHECK_EQ(Tail->Next, &SentinelSegment); + FreeSegment->Next = Freelist; + FreeSegment->Prev = &SentinelSegment; + if (Freelist != &SentinelSegment) + Freelist->Prev = FreeSegment; + Freelist = FreeSegment; } } @@ -346,11 +364,11 @@ }; // We need to have this storage definition out-of-line so that the compiler can -// ensure that storage for the SentinelChunk is defined and has a single +// ensure that storage for the SentinelSegment is defined and has a single // address. -template -Chunk Array::SentinelChunk{nullptr, &Array::SentinelChunk, - &Array::SentinelChunk}; +template +typename Array::SegmentBase Array::SentinelSegment{ + &Array::SentinelSegment, &Array::SentinelSegment}; } // namespace __xray