Index: lib/xray/xray_allocator.h =================================================================== --- lib/xray/xray_allocator.h +++ lib/xray/xray_allocator.h @@ -20,6 +20,7 @@ #include "sanitizer_common/sanitizer_internal_defs.h" #include "sanitizer_common/sanitizer_mutex.h" #include "sanitizer_common/sanitizer_posix.h" +#include "xray_defs.h" #include "xray_utils.h" #include #include @@ -27,6 +28,45 @@ namespace __xray { +// We implement our own memory allocation routine which will bypass the +// internal allocator. This allows us to manage the memory directly, using +// mmap'ed memory to back the allocators. +template T *allocate() XRAY_NEVER_INSTRUMENT { + auto B = reinterpret_cast( + internal_mmap(NULL, sizeof(T), PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)); + if (B == MAP_FAILED) { + if (Verbosity()) + Report("XRay Profiling: Failed to allocate memory of size %d.\n", + sizeof(T)); + return nullptr; + } + return reinterpret_cast(B); +} + +template void deallocate(T *B) XRAY_NEVER_INSTRUMENT { + if (B == nullptr) + return; + internal_munmap(B, sizeof(T)); +} + +inline void *allocateBuffer(size_t S) XRAY_NEVER_INSTRUMENT { + auto B = reinterpret_cast(internal_mmap( + NULL, S, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)); + if (B == MAP_FAILED) { + if (Verbosity()) + Report("XRay Profiling: Failed to allocate memory of size %d.\n", S); + return nullptr; + } + return B; +} + +inline void deallocateBuffer(void *B, size_t S) XRAY_NEVER_INSTRUMENT { + if (B == nullptr) + return; + internal_munmap(B, S); +} + /// The Allocator type hands out fixed-sized chunks of memory that are /// cache-line aligned and sized. This is useful for placement of /// performance-sensitive data in memory that's frequently accessed. The @@ -59,12 +99,12 @@ size_t AllocatedBlocks = 0; SpinMutex Mutex{}; - void *Alloc() { + void *Alloc() XRAY_NEVER_INSTRUMENT { SpinMutexLock Lock(&Mutex); if (UNLIKELY(BackingStore == nullptr)) { BackingStore = reinterpret_cast( internal_mmap(NULL, MaxMemory, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, 0, 0)); + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)); if (BackingStore == MAP_FAILED) { BackingStore = nullptr; if (Verbosity()) @@ -107,12 +147,12 @@ } public: - explicit Allocator(size_t M) + explicit Allocator(size_t M) XRAY_NEVER_INSTRUMENT : MaxMemory(nearest_boundary(M, kCacheLineSize)) {} - Block Allocate() { return {Alloc()}; } + Block Allocate() XRAY_NEVER_INSTRUMENT { return {Alloc()}; } - ~Allocator() NOEXCEPT { + ~Allocator() NOEXCEPT XRAY_NEVER_INSTRUMENT { if (BackingStore != nullptr) { internal_munmap(BackingStore, MaxMemory); } Index: lib/xray/xray_function_call_trie.h =================================================================== --- lib/xray/xray_function_call_trie.h +++ lib/xray/xray_function_call_trie.h @@ -15,7 +15,7 @@ #ifndef XRAY_FUNCTION_CALL_TRIE_H #define XRAY_FUNCTION_CALL_TRIE_H -#include "sanitizer_common/sanitizer_allocator_internal.h" +#include "xray_defs.h" #include "xray_profiling_flags.h" #include "xray_segmented_array.h" #include // For placement new. @@ -120,9 +120,11 @@ // We add a constructor here to allow us to inplace-construct through // Array<...>'s AppendEmplace. Node(Node *P, NodeIdPairAllocatorType &A, int64_t CC, int64_t CLT, - int32_t F) - : Parent(P), Callees(A), CallCount(CC), CumulativeLocalTime(CLT), - FId(F) {} + int32_t F) XRAY_NEVER_INSTRUMENT : Parent(P), + Callees(A), + CallCount(CC), + CumulativeLocalTime(CLT), + FId(F) {} // TODO: Include the compact histogram. }; @@ -134,7 +136,8 @@ // We add a constructor here to allow us to inplace-construct through // Array<...>'s AppendEmplace. - ShadowStackEntry(uint64_t T, Node *N) : EntryTSC{T}, NodePtr{N} {} + ShadowStackEntry(uint64_t T, Node *N) XRAY_NEVER_INSTRUMENT : EntryTSC{T}, + NodePtr{N} {} }; using NodeArray = Array; @@ -158,8 +161,9 @@ Allocators(const Allocators &) = delete; Allocators &operator=(const Allocators &) = delete; - Allocators(Allocators &&O) - : NodeAllocator(O.NodeAllocator), RootAllocator(O.RootAllocator), + Allocators(Allocators &&O) XRAY_NEVER_INSTRUMENT + : NodeAllocator(O.NodeAllocator), + RootAllocator(O.RootAllocator), ShadowStackAllocator(O.ShadowStackAllocator), NodeIdPairAllocator(O.NodeIdPairAllocator) { O.NodeAllocator = nullptr; @@ -168,7 +172,7 @@ O.NodeIdPairAllocator = nullptr; } - Allocators &operator=(Allocators &&O) { + Allocators &operator=(Allocators &&O) XRAY_NEVER_INSTRUMENT { { auto Tmp = O.NodeAllocator; O.NodeAllocator = this->NodeAllocator; @@ -192,58 +196,54 @@ return *this; } - ~Allocators() { + ~Allocators() XRAY_NEVER_INSTRUMENT { // Note that we cannot use delete on these pointers, as they need to be // returned to the sanitizer_common library's internal memory tracking // system. if (NodeAllocator != nullptr) { NodeAllocator->~NodeAllocatorType(); - InternalFree(NodeAllocator); + deallocate(NodeAllocator); NodeAllocator = nullptr; } if (RootAllocator != nullptr) { RootAllocator->~RootAllocatorType(); - InternalFree(RootAllocator); + deallocate(RootAllocator); RootAllocator = nullptr; } if (ShadowStackAllocator != nullptr) { ShadowStackAllocator->~ShadowStackAllocatorType(); - InternalFree(ShadowStackAllocator); + deallocate(ShadowStackAllocator); ShadowStackAllocator = nullptr; } if (NodeIdPairAllocator != nullptr) { NodeIdPairAllocator->~NodeIdPairAllocatorType(); - InternalFree(NodeIdPairAllocator); + deallocate(NodeIdPairAllocator); NodeIdPairAllocator = nullptr; } } }; // TODO: Support configuration of options through the arguments. - static Allocators InitAllocators() { + static Allocators InitAllocators() XRAY_NEVER_INSTRUMENT { return InitAllocatorsCustom(profilingFlags()->per_thread_allocator_max); } - static Allocators InitAllocatorsCustom(uptr Max) { + static Allocators InitAllocatorsCustom(uptr Max) XRAY_NEVER_INSTRUMENT { Allocators A; - auto NodeAllocator = reinterpret_cast( - InternalAlloc(sizeof(Allocators::NodeAllocatorType))); + auto NodeAllocator = allocate(); new (NodeAllocator) Allocators::NodeAllocatorType(Max); A.NodeAllocator = NodeAllocator; - auto RootAllocator = reinterpret_cast( - InternalAlloc(sizeof(Allocators::RootAllocatorType))); + auto RootAllocator = allocate(); new (RootAllocator) Allocators::RootAllocatorType(Max); A.RootAllocator = RootAllocator; auto ShadowStackAllocator = - reinterpret_cast( - InternalAlloc(sizeof(Allocators::ShadowStackAllocatorType))); + allocate(); new (ShadowStackAllocator) Allocators::ShadowStackAllocatorType(Max); A.ShadowStackAllocator = ShadowStackAllocator; - auto NodeIdPairAllocator = reinterpret_cast( - InternalAlloc(sizeof(NodeIdPairAllocatorType))); + auto NodeIdPairAllocator = allocate(); new (NodeIdPairAllocator) NodeIdPairAllocatorType(Max); A.NodeIdPairAllocator = NodeIdPairAllocator; return A; @@ -256,12 +256,13 @@ NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr; public: - explicit FunctionCallTrie(const Allocators &A) - : Nodes(*A.NodeAllocator), Roots(*A.RootAllocator), + explicit FunctionCallTrie(const Allocators &A) XRAY_NEVER_INSTRUMENT + : Nodes(*A.NodeAllocator), + Roots(*A.RootAllocator), ShadowStack(*A.ShadowStackAllocator), NodeIdPairAllocator(A.NodeIdPairAllocator) {} - void enterFunction(const int32_t FId, uint64_t TSC) { + void enterFunction(const int32_t FId, uint64_t TSC) XRAY_NEVER_INSTRUMENT { DCHECK_NE(FId, 0); // This function primarily deals with ensuring that the ShadowStack is // consistent and ready for when an exit event is encountered. @@ -301,7 +302,7 @@ return; } - void exitFunction(int32_t FId, uint64_t TSC) { + void exitFunction(int32_t FId, uint64_t TSC) XRAY_NEVER_INSTRUMENT { // When we exit a function, we look up the ShadowStack to see whether we've // entered this function before. We do as little processing here as we can, // since most of the hard work would have already been done at function @@ -323,7 +324,7 @@ } } - const RootArray &getRoots() const { return Roots; } + const RootArray &getRoots() const XRAY_NEVER_INSTRUMENT { return Roots; } // The deepCopyInto operation will update the provided FunctionCallTrie by // re-creating the contents of this particular FunctionCallTrie in the other @@ -338,7 +339,7 @@ // synchronisation of both "this" and |O|. // // This function must *not* be called with a non-empty FunctionCallTrie |O|. - void deepCopyInto(FunctionCallTrie &O) const { + void deepCopyInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT { DCHECK(O.getRoots().empty()); // We then push the root into a stack, to use as the parent marker for new @@ -394,7 +395,7 @@ // // This function is *not* thread-safe, and may require external // synchronisation of both "this" and |O|. - void mergeInto(FunctionCallTrie &O) const { + void mergeInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT { struct NodeAndTarget { FunctionCallTrie::Node *OrigNode; FunctionCallTrie::Node *TargetNode; Index: lib/xray/xray_profile_collector.cc =================================================================== --- lib/xray/xray_profile_collector.cc +++ lib/xray/xray_profile_collector.cc @@ -15,6 +15,7 @@ #include "xray_profile_collector.h" #include "sanitizer_common/sanitizer_common.h" #include "xray_allocator.h" +#include "xray_defs.h" #include "xray_profiling_flags.h" #include "xray_segmented_array.h" #include @@ -81,26 +82,9 @@ static ProfileBufferArrayAllocator *ProfileBuffersAllocator = nullptr; static FunctionCallTrie::Allocators *GlobalAllocators = nullptr; -static void *allocateBuffer(size_t S) { - auto B = reinterpret_cast(internal_mmap( - NULL, S, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)); - if (B == MAP_FAILED) { - if (Verbosity()) - Report("XRay Profiling: Failed to allocate memory of size %d.\n", S); - return nullptr; - } - return B; -} - -static void deallocateBuffer(void *B, size_t S) { - if (B == nullptr) - return; - internal_munmap(B, S); -} - } // namespace -void post(const FunctionCallTrie &T, tid_t TId) { +void post(const FunctionCallTrie &T, tid_t TId) XRAY_NEVER_INSTRUMENT { static pthread_once_t Once = PTHREAD_ONCE_INIT; pthread_once(&Once, +[] { reset(); }); @@ -134,8 +118,10 @@ const FunctionCallTrie::Node *Node = nullptr; // Constructor for in-place construction. - ProfileRecord(PathAllocator &A, const FunctionCallTrie::Node *N) - : Path(A), Node(N) {} + ProfileRecord(PathAllocator &A, + const FunctionCallTrie::Node *N) XRAY_NEVER_INSTRUMENT + : Path(A), + Node(N) {} }; namespace { @@ -144,9 +130,9 @@ // Walk a depth-first traversal of each root of the FunctionCallTrie to generate // the path(s) and the data associated with the path. -static void populateRecords(ProfileRecordArray &PRs, - ProfileRecord::PathAllocator &PA, - const FunctionCallTrie &Trie) { +static void +populateRecords(ProfileRecordArray &PRs, ProfileRecord::PathAllocator &PA, + const FunctionCallTrie &Trie) XRAY_NEVER_INSTRUMENT { using StackArray = Array; using StackAllocator = typename StackArray::AllocatorType; StackAllocator StackAlloc(profilingFlags()->stack_allocator_max); @@ -174,7 +160,8 @@ } static void serializeRecords(ProfileBuffer *Buffer, const BlockHeader &Header, - const ProfileRecordArray &ProfileRecords) { + const ProfileRecordArray &ProfileRecords) + XRAY_NEVER_INSTRUMENT { auto NextPtr = static_cast( internal_memcpy(Buffer->Data, &Header, sizeof(Header))) + sizeof(Header); @@ -207,7 +194,7 @@ } // namespace -void serialize() { +void serialize() XRAY_NEVER_INSTRUMENT { SpinMutexLock Lock(&GlobalMutex); if (GlobalAllocators == nullptr || ThreadTries == nullptr || @@ -266,7 +253,7 @@ } } -void reset() { +void reset() XRAY_NEVER_INSTRUMENT { SpinMutexLock Lock(&GlobalMutex); if (ProfileBuffers != nullptr) { @@ -316,7 +303,7 @@ new (ProfileBuffers) ProfileBufferArray(*ProfileBuffersAllocator); } -XRayBuffer nextBuffer(XRayBuffer B) { +XRayBuffer nextBuffer(XRayBuffer B) XRAY_NEVER_INSTRUMENT { SpinMutexLock Lock(&GlobalMutex); if (ProfileBuffers == nullptr || ProfileBuffers->size() == 0) Index: lib/xray/xray_profiling.cc =================================================================== --- lib/xray/xray_profiling.cc +++ lib/xray/xray_profiling.cc @@ -19,7 +19,6 @@ #include "sanitizer_common/sanitizer_flags.h" #include "xray/xray_interface.h" #include "xray/xray_log_interface.h" - #include "xray_flags.h" #include "xray_profile_collector.h" #include "xray_profiling_flags.h" @@ -69,7 +68,7 @@ }(); (void)ThreadOnce; - auto &TLD = *reinterpret_cast(&ThreadStorage); + auto &TLD = *reinterpret_cast(&ThreadStorage); if (UNLIKELY(TLD.Allocators == nullptr || TLD.FCT == nullptr)) { auto *Allocators = @@ -167,11 +166,13 @@ thread_local atomic_uint8_t ReentranceGuard{0}; -static void postCurrentThreadFCT(ProfilingData &TLD) { +static void postCurrentThreadFCT(ProfilingData &TLD) XRAY_NEVER_INSTRUMENT { if (TLD.Allocators == nullptr || TLD.FCT == nullptr) return; - profileCollectorService::post(*TLD.FCT, GetTid()); + if (!TLD.FCT->getRoots().empty()) + profileCollectorService::post(*TLD.FCT, GetTid()); + cleanupTLD(); } Index: lib/xray/xray_segmented_array.h =================================================================== --- lib/xray/xray_segmented_array.h +++ lib/xray/xray_segmented_array.h @@ -88,7 +88,7 @@ // segments when elements are trimmed off the end. SegmentBase *Freelist = &SentinelSegment; - Segment *NewSegment() { + Segment *NewSegment() XRAY_NEVER_INSTRUMENT { // We need to handle the case in which enough elements have been trimmed to // allow us to re-use segments we've allocated before. For this we look into // the Freelist, to see whether we need to actually allocate new blocks or @@ -111,7 +111,7 @@ return S; } - Segment *InitHeadAndTail() { + Segment *InitHeadAndTail() XRAY_NEVER_INSTRUMENT { DCHECK_EQ(Head, &SentinelSegment); DCHECK_EQ(Tail, &SentinelSegment); auto Segment = NewSegment(); @@ -123,7 +123,7 @@ return Segment; } - Segment *AppendNewSegment() { + Segment *AppendNewSegment() XRAY_NEVER_INSTRUMENT { auto S = NewSegment(); if (S == nullptr) return nullptr; @@ -144,16 +144,18 @@ size_t Size = 0; public: - Iterator(SegmentBase *IS, size_t Off, size_t S) - : S(IS), Offset(Off), Size(S) {} - Iterator(const Iterator &) noexcept = default; - Iterator() noexcept = default; - Iterator(Iterator &&) noexcept = default; - Iterator &operator=(const Iterator &) = default; - Iterator &operator=(Iterator &&) = default; - ~Iterator() = default; + Iterator(SegmentBase *IS, size_t Off, size_t S) XRAY_NEVER_INSTRUMENT + : S(IS), + Offset(Off), + Size(S) {} + Iterator(const Iterator &) NOEXCEPT XRAY_NEVER_INSTRUMENT = default; + Iterator() NOEXCEPT XRAY_NEVER_INSTRUMENT = default; + Iterator(Iterator &&) NOEXCEPT XRAY_NEVER_INSTRUMENT = default; + Iterator &operator=(const Iterator &) XRAY_NEVER_INSTRUMENT = default; + Iterator &operator=(Iterator &&) XRAY_NEVER_INSTRUMENT = default; + ~Iterator() XRAY_NEVER_INSTRUMENT = default; - Iterator &operator++() { + Iterator &operator++() XRAY_NEVER_INSTRUMENT { if (++Offset % ElementsPerSegment || Offset == Size) return *this; @@ -168,7 +170,7 @@ return *this; } - Iterator &operator--() { + Iterator &operator--() XRAY_NEVER_INSTRUMENT { DCHECK_NE(S, &SentinelSegment); DCHECK_GT(Offset, 0); @@ -181,29 +183,31 @@ return *this; } - Iterator operator++(int) { + Iterator operator++(int) XRAY_NEVER_INSTRUMENT { Iterator Copy(*this); ++(*this); return Copy; } - Iterator operator--(int) { + Iterator operator--(int) XRAY_NEVER_INSTRUMENT { Iterator Copy(*this); --(*this); return Copy; } template - friend bool operator==(const Iterator &L, const Iterator &R) { + friend bool operator==(const Iterator &L, + const Iterator &R) XRAY_NEVER_INSTRUMENT { return L.S == R.S && L.Offset == R.Offset; } template - friend bool operator!=(const Iterator &L, const Iterator &R) { + friend bool operator!=(const Iterator &L, + const Iterator &R) XRAY_NEVER_INSTRUMENT { return !(L == R); } - U &operator*() const { + U &operator*() const XRAY_NEVER_INSTRUMENT { DCHECK_NE(S, &SentinelSegment); auto RelOff = Offset % ElementsPerSegment; @@ -214,11 +218,11 @@ return *reinterpret_cast(AlignedOffset); } - U *operator->() const { return &(**this); } + U *operator->() const XRAY_NEVER_INSTRUMENT { return &(**this); } }; public: - explicit Array(AllocatorType &A) : Alloc(&A) {} + explicit Array(AllocatorType &A) XRAY_NEVER_INSTRUMENT : Alloc(&A) {} Array(const Array &) = delete; Array(Array &&O) NOEXCEPT : Alloc(O.Alloc), @@ -230,16 +234,16 @@ O.Size = 0; } - bool empty() const { return Size == 0; } + bool empty() const XRAY_NEVER_INSTRUMENT { return Size == 0; } - AllocatorType &allocator() const { + AllocatorType &allocator() const XRAY_NEVER_INSTRUMENT { DCHECK_NE(Alloc, nullptr); return *Alloc; } - size_t size() const { return Size; } + size_t size() const XRAY_NEVER_INSTRUMENT { return Size; } - T *Append(const T &E) { + T *Append(const T &E) XRAY_NEVER_INSTRUMENT { if (UNLIKELY(Head == &SentinelSegment)) if (InitHeadAndTail() == nullptr) return nullptr; @@ -257,7 +261,8 @@ return Position; } - template T *AppendEmplace(Args &&... args) { + template + T *AppendEmplace(Args &&... args) XRAY_NEVER_INSTRUMENT { if (UNLIKELY(Head == &SentinelSegment)) if (InitHeadAndTail() == nullptr) return nullptr; @@ -281,7 +286,7 @@ return reinterpret_cast(Position); } - T &operator[](size_t Offset) const { + T &operator[](size_t Offset) const XRAY_NEVER_INSTRUMENT { DCHECK_LE(Offset, Size); // We need to traverse the array enough times to find the element at Offset. auto S = Head; @@ -296,13 +301,13 @@ return *reinterpret_cast(Position); } - T &front() const { + T &front() const XRAY_NEVER_INSTRUMENT { DCHECK_NE(Head, &SentinelSegment); DCHECK_NE(Size, 0u); return *begin(); } - T &back() const { + T &back() const XRAY_NEVER_INSTRUMENT { DCHECK_NE(Tail, &SentinelSegment); DCHECK_NE(Size, 0u); auto It = end(); @@ -310,7 +315,8 @@ return *It; } - template T *find_element(Predicate P) const { + template + T *find_element(Predicate P) const XRAY_NEVER_INSTRUMENT { if (empty()) return nullptr; @@ -324,7 +330,7 @@ /// Remove N Elements from the end. This leaves the blocks behind, and not /// require allocation of new blocks for new elements added after trimming. - void trim(size_t Elements) { + void trim(size_t Elements) XRAY_NEVER_INSTRUMENT { if (Elements == 0) return; @@ -360,10 +366,18 @@ } // Provide iterators. - Iterator begin() const { return Iterator(Head, 0, Size); } - Iterator end() const { return Iterator(Tail, Size, Size); } - Iterator cbegin() const { return Iterator(Head, 0, Size); } - Iterator cend() const { return Iterator(Tail, Size, Size); } + Iterator begin() const XRAY_NEVER_INSTRUMENT { + return Iterator(Head, 0, Size); + } + Iterator end() const XRAY_NEVER_INSTRUMENT { + return Iterator(Tail, Size, Size); + } + Iterator cbegin() const XRAY_NEVER_INSTRUMENT { + return Iterator(Head, 0, Size); + } + Iterator cend() const XRAY_NEVER_INSTRUMENT { + return Iterator(Tail, Size, Size); + } }; // We need to have this storage definition out-of-line so that the compiler can