Index: sanitizer_common/sanitizer_stackdepot.h =================================================================== --- sanitizer_common/sanitizer_stackdepot.h +++ sanitizer_common/sanitizer_stackdepot.h @@ -20,19 +20,163 @@ // StackDepot efficiently stores huge amounts of stack traces. -// Maps stack trace to an unique id. -u32 StackDepotPut(const uptr *stack, uptr size); -// Retrieves a stored stack trace by the id. -const uptr *StackDepotGet(u32 id, uptr *size); - struct StackDepotStats { uptr n_uniq_ids; uptr mapped; }; -StackDepotStats *StackDepotGetStats(); +struct StackDepotDesc { + const uptr *stack; + uptr size; + u32 hash() const { + // murmur2 + const u32 m = 0x5bd1e995; + const u32 seed = 0x9747b28c; + const u32 r = 24; + u32 h = seed ^ (size * sizeof(uptr)); + for (uptr i = 0; i < size; i++) { + u32 k = stack[i]; + k *= m; + k ^= k >> r; + k *= m; + h *= m; + h ^= k; + } + h ^= h >> 13; + h *= m; + h ^= h >> 15; + return h; + } + bool is_valid() { return size > 0 && stack; } +}; + +struct StackDepotNode { + StackDepotNode *link; + u32 id; + u32 hash_bits : 12; + u32 use_count : 20; + static const u32 MAX_USE_COUNT = 1 << 18; + uptr size; + uptr stack[1]; // [size] + typedef StackDepotDesc args_type; + bool eq(u32 hash, const args_type &args) const { + if ((hash >> 20) != hash_bits || args.size != size) return false; + uptr i = 0; + for (; i < size; i++) { + if (stack[i] != args.stack[i]) return false; + } + return true; + } + static uptr storage_size(const args_type &args) { + return sizeof(StackDepotNode) + (args.size - 1) * sizeof(uptr); + } + void store(const args_type &args, u32 hash) { + hash_bits = hash >> 20; + size = args.size; + internal_memcpy(stack, args.stack, size * sizeof(uptr)); + } + args_type load() const { + args_type ret = {&stack[0], size}; + return ret; + } + struct Handle { + StackDepotNode *node_; + Handle() : node_(0) {} + explicit Handle(StackDepotNode *node) : node_(node) {} + bool valid() { return node_; } + u32 id() { return node_->id; } + int inc_use_count() { + if (node_->use_count >= MAX_USE_COUNT) node_->use_count++; + return node_->use_count; + } + uptr size() { return node_->size; } + uptr *stack() { return &node_->stack[0]; } + }; + Handle get_handle() { return Handle(this); } +}; + +struct ChainedOriginDepotDesc { + u32 here_id; + u32 prev_id; + u32 hash() const { return 0; } + bool is_valid() { return true; } +}; + +struct ChainedOriginDepotNode { + StackDepotNode *link; + u32 id; + u32 here_id; + u32 prev_id; + typedef ChainedOriginDepotDesc args_type; + bool eq(u32 hash, const args_type &args) const { + return here_id == args.here_id && prev_id == args.prev_id; + } + static uptr storage_size(const args_type &args) { + return sizeof(ChainedOriginDepotNode); + } + void store(const args_type &args, u32 other_hash) { + here_id = args.here_id; + prev_id = args.prev_id; + } + args_type load() const { + args_type ret = {here_id, prev_id}; + return ret; + } + struct Handle { + ChainedOriginDepotNode *node_; + Handle() : node_(0) {} + explicit Handle(ChainedOriginDepotNode *node) : node_(node) {} + bool valid() { return node_; } + u32 id() { return node_->id; } + int here_id() { return node_->here_id; } + int prev_id() { return node_->prev_id; } + }; + Handle get_handle() { return Handle(this); } +}; + +template +class StackDepot { + public: + typedef typename Node::args_type args_type; + typedef typename Node::Handle handle_type; + // Maps stack trace to an unique id. + handle_type Put(args_type args); + // Retrieves a stored stack trace by the id. + args_type Get(u32 id); + + + StackDepotStats *GetStats() { return &stats; } + + private: + Node *tryallocDesc(uptr memsz); + Node *allocDesc(uptr size); + static Node *find(Node *s, args_type args, u32 hash); + static Node *lock(atomic_uintptr_t *p); + static void unlock(atomic_uintptr_t *p, Node *s); + + static const int kTabSize = 1024 * 1024; // Hash table size. + static const int kPartBits = 8; + static const int kPartShift = sizeof(u32) * 8 - kPartBits - 1; + static const int kPartCount = + 1 << kPartBits; // Number of subparts in the table. + static const int kPartSize = kTabSize / kPartCount; + static const int kMaxId = 1 << kPartShift; + + StaticSpinMutex mtx; // Protects alloc of new blocks for region allocator. + atomic_uintptr_t region_pos; // Region allocator for Node's. + atomic_uintptr_t region_end; + atomic_uintptr_t tab[kTabSize]; // Hash table of Node's. + atomic_uint32_t seq[kPartCount]; // Unique id generators. + + StackDepotStats stats; -struct StackDesc; + friend class StackDepotReverseMap; +}; + +StackDepotStats *StackDepotGetStats(); +u32 StackDepotPut(const uptr *stack, uptr size); +// Retrieves a stored stack trace by the id. +const uptr *StackDepotGet(u32 id, uptr *size); // Instantiating this class creates a snapshot of StackDepot which can be // efficiently queried with StackDepotGet(). You can use it concurrently with @@ -46,7 +190,7 @@ private: struct IdDescPair { u32 id; - StackDesc *desc; + StackDepotNode *desc; static bool IdComparator(const IdDescPair &a, const IdDescPair &b); }; @@ -57,6 +201,139 @@ StackDepotReverseMap(const StackDepotReverseMap&); void operator=(const StackDepotReverseMap&); }; + + +template +Node *StackDepot::tryallocDesc(uptr memsz) { + // Optimisic lock-free allocation, essentially try to bump the region ptr. + for (;;) { + uptr cmp = atomic_load(®ion_pos, memory_order_acquire); + uptr end = atomic_load(®ion_end, memory_order_acquire); + if (cmp == 0 || cmp + memsz > end) + return 0; + if (atomic_compare_exchange_weak( + ®ion_pos, &cmp, cmp + memsz, + memory_order_acquire)) + return (StackDepotNode*)cmp; + } +} + +template +Node *StackDepot::allocDesc(uptr memsz) { + // First, try to allocate optimisitically. + StackDepotNode *s = tryallocDesc(memsz); + if (s) + return s; + // If failed, lock, retry and alloc new superblock. + SpinMutexLock l(&mtx); + for (;;) { + s = tryallocDesc(memsz); + if (s) + return s; + atomic_store(®ion_pos, 0, memory_order_relaxed); + uptr allocsz = 64 * 1024; + if (allocsz < memsz) + allocsz = memsz; + uptr mem = (uptr)MmapOrDie(allocsz, "stack depot"); + stats.mapped += allocsz; + atomic_store(®ion_end, mem + allocsz, memory_order_release); + atomic_store(®ion_pos, mem, memory_order_release); + } +} + +template +Node *StackDepot::find(Node *s, args_type args, u32 hash) { + // Searches linked list s for the stack, returns its id. + for (; s; s = s->link) { + if (s->eq(hash, args)) { + return s; + } + } + return 0; +} + +template +Node *StackDepot::lock(atomic_uintptr_t *p) { + // Uses the pointer lsb as mutex. + for (int i = 0;; i++) { + uptr cmp = atomic_load(p, memory_order_relaxed); + if ((cmp & 1) == 0 + && atomic_compare_exchange_weak(p, &cmp, cmp | 1, + memory_order_acquire)) + return (Node*)cmp; + if (i < 10) + proc_yield(10); + else + internal_sched_yield(); + } +} + +template +void StackDepot::unlock(atomic_uintptr_t *p, Node *s) { + DCHECK_EQ((uptr)s & 1, 0); + atomic_store(p, (uptr)s, memory_order_release); +} + +template +typename StackDepot::handle_type StackDepot::Put(args_type args) { + if (!args.is_valid()) + return handle_type(); + uptr h = args.hash(); + atomic_uintptr_t *p = &tab[h % kTabSize]; + uptr v = atomic_load(p, memory_order_consume); + Node *s = (Node*)(v & ~1); + // First, try to find the existing stack. + Node *node = find(s, args, h); + if (node) + return node->get_handle(); + // If failed, lock, retry and insert new. + Node *s2 = lock(p); + if (s2 != s) { + node = find(s2, args, h); + if (node) { + unlock(p, s2); + return node->get_handle(); + } + } + uptr part = (h % kTabSize) / kPartSize; + u32 id = atomic_fetch_add(&seq[part], 1, memory_order_relaxed) + 1; + stats.n_uniq_ids++; + CHECK_LT(id, kMaxId); + id |= part << kPartShift; + CHECK_NE(id, 0); + CHECK_EQ(id & (1u << 31), 0); + uptr memsz = Node::storage_size(args); + s = allocDesc(memsz); + s->id = id; + s->store(args, h); + s->link = s2; + unlock(p, s); + return s->get_handle(); +} + +template +typename StackDepot::args_type StackDepot::Get(u32 id) { + if (id == 0) { + return args_type(); + } + CHECK_EQ(id & (1u << 31), 0); + // High kPartBits contain part id, so we need to scan at most kPartSize lists. + uptr part = id >> kPartShift; + for (int i = 0; i != kPartSize; i++) { + uptr idx = part * kPartSize + i; + CHECK_LT(idx, kTabSize); + atomic_uintptr_t *p = &tab[idx]; + uptr v = atomic_load(p, memory_order_consume); + Node *s = (Node*)(v & ~1); + for (; s; s = s->link) { + if (s->id == id) { + return s->load(); + } + } + } + return args_type(); +} + } // namespace __sanitizer #endif // SANITIZER_STACKDEPOT_H Index: sanitizer_common/sanitizer_stackdepot.cc =================================================================== --- sanitizer_common/sanitizer_stackdepot.cc +++ sanitizer_common/sanitizer_stackdepot.cc @@ -19,186 +19,23 @@ namespace __sanitizer { -const int kTabSize = 1024 * 1024; // Hash table size. -const int kPartBits = 8; -const int kPartShift = sizeof(u32) * 8 - kPartBits - 1; -const int kPartCount = 1 << kPartBits; // Number of subparts in the table. -const int kPartSize = kTabSize / kPartCount; -const int kMaxId = 1 << kPartShift; - -struct StackDesc { - StackDesc *link; - u32 id; - u32 hash; - uptr size; - uptr stack[1]; // [size] -}; - -static struct { - StaticSpinMutex mtx; // Protects alloc of new blocks for region allocator. - atomic_uintptr_t region_pos; // Region allocator for StackDesc's. - atomic_uintptr_t region_end; - atomic_uintptr_t tab[kTabSize]; // Hash table of StackDesc's. - atomic_uint32_t seq[kPartCount]; // Unique id generators. -} depot; - -static StackDepotStats stats; +static StackDepot theDepot; +static StackDepot chainedOriginDepot; StackDepotStats *StackDepotGetStats() { - return &stats; -} - -static u32 hash(const uptr *stack, uptr size) { - // murmur2 - const u32 m = 0x5bd1e995; - const u32 seed = 0x9747b28c; - const u32 r = 24; - u32 h = seed ^ (size * sizeof(uptr)); - for (uptr i = 0; i < size; i++) { - u32 k = stack[i]; - k *= m; - k ^= k >> r; - k *= m; - h *= m; - h ^= k; - } - h ^= h >> 13; - h *= m; - h ^= h >> 15; - return h; -} - -static StackDesc *tryallocDesc(uptr memsz) { - // Optimisic lock-free allocation, essentially try to bump the region ptr. - for (;;) { - uptr cmp = atomic_load(&depot.region_pos, memory_order_acquire); - uptr end = atomic_load(&depot.region_end, memory_order_acquire); - if (cmp == 0 || cmp + memsz > end) - return 0; - if (atomic_compare_exchange_weak( - &depot.region_pos, &cmp, cmp + memsz, - memory_order_acquire)) - return (StackDesc*)cmp; - } -} - -static StackDesc *allocDesc(uptr size) { - // First, try to allocate optimisitically. - uptr memsz = sizeof(StackDesc) + (size - 1) * sizeof(uptr); - StackDesc *s = tryallocDesc(memsz); - if (s) - return s; - // If failed, lock, retry and alloc new superblock. - SpinMutexLock l(&depot.mtx); - for (;;) { - s = tryallocDesc(memsz); - if (s) - return s; - atomic_store(&depot.region_pos, 0, memory_order_relaxed); - uptr allocsz = 64 * 1024; - if (allocsz < memsz) - allocsz = memsz; - uptr mem = (uptr)MmapOrDie(allocsz, "stack depot"); - stats.mapped += allocsz; - atomic_store(&depot.region_end, mem + allocsz, memory_order_release); - atomic_store(&depot.region_pos, mem, memory_order_release); - } -} - -static u32 find(StackDesc *s, const uptr *stack, uptr size, u32 hash) { - // Searches linked list s for the stack, returns its id. - for (; s; s = s->link) { - if (s->hash == hash && s->size == size) { - uptr i = 0; - for (; i < size; i++) { - if (stack[i] != s->stack[i]) - break; - } - if (i == size) - return s->id; - } - } - return 0; -} - -static StackDesc *lock(atomic_uintptr_t *p) { - // Uses the pointer lsb as mutex. - for (int i = 0;; i++) { - uptr cmp = atomic_load(p, memory_order_relaxed); - if ((cmp & 1) == 0 - && atomic_compare_exchange_weak(p, &cmp, cmp | 1, - memory_order_acquire)) - return (StackDesc*)cmp; - if (i < 10) - proc_yield(10); - else - internal_sched_yield(); - } -} - -static void unlock(atomic_uintptr_t *p, StackDesc *s) { - DCHECK_EQ((uptr)s & 1, 0); - atomic_store(p, (uptr)s, memory_order_release); + return theDepot.GetStats(); } u32 StackDepotPut(const uptr *stack, uptr size) { - if (stack == 0 || size == 0) - return 0; - uptr h = hash(stack, size); - atomic_uintptr_t *p = &depot.tab[h % kTabSize]; - uptr v = atomic_load(p, memory_order_consume); - StackDesc *s = (StackDesc*)(v & ~1); - // First, try to find the existing stack. - u32 id = find(s, stack, size, h); - if (id) - return id; - // If failed, lock, retry and insert new. - StackDesc *s2 = lock(p); - if (s2 != s) { - id = find(s2, stack, size, h); - if (id) { - unlock(p, s2); - return id; - } - } - uptr part = (h % kTabSize) / kPartSize; - id = atomic_fetch_add(&depot.seq[part], 1, memory_order_relaxed) + 1; - stats.n_uniq_ids++; - CHECK_LT(id, kMaxId); - id |= part << kPartShift; - CHECK_NE(id, 0); - CHECK_EQ(id & (1u << 31), 0); - s = allocDesc(size); - s->id = id; - s->hash = h; - s->size = size; - internal_memcpy(s->stack, stack, size * sizeof(uptr)); - s->link = s2; - unlock(p, s); - return id; + StackDepotDesc desc = {stack, size}; + StackDepotNode::Handle h = theDepot.Put(desc); + return h.valid() ? h.id() : 0; } const uptr *StackDepotGet(u32 id, uptr *size) { - if (id == 0) - return 0; - CHECK_EQ(id & (1u << 31), 0); - // High kPartBits contain part id, so we need to scan at most kPartSize lists. - uptr part = id >> kPartShift; - for (int i = 0; i != kPartSize; i++) { - uptr idx = part * kPartSize + i; - CHECK_LT(idx, kTabSize); - atomic_uintptr_t *p = &depot.tab[idx]; - uptr v = atomic_load(p, memory_order_consume); - StackDesc *s = (StackDesc*)(v & ~1); - for (; s; s = s->link) { - if (s->id == id) { - *size = s->size; - return s->stack; - } - } - } - *size = 0; - return 0; + StackDepotDesc desc = theDepot.Get(id); + *size = desc.size; + return desc.stack; } bool StackDepotReverseMap::IdDescPair::IdComparator( @@ -209,10 +46,10 @@ StackDepotReverseMap::StackDepotReverseMap() : map_(StackDepotGetStats()->n_uniq_ids + 100) { - for (int idx = 0; idx < kTabSize; idx++) { - atomic_uintptr_t *p = &depot.tab[idx]; + for (int idx = 0; idx < StackDepot::kTabSize; idx++) { + atomic_uintptr_t *p = &theDepot.tab[idx]; uptr v = atomic_load(p, memory_order_consume); - StackDesc *s = (StackDesc*)(v & ~1); + StackDepotNode *s = (StackDepotNode*)(v & ~1); for (; s; s = s->link) { IdDescPair pair = {s->id, s}; map_.push_back(pair); @@ -230,7 +67,7 @@ *size = 0; return 0; } - StackDesc *desc = map_[idx].desc; + StackDepotNode *desc = map_[idx].desc; *size = desc->size; return desc->stack; }