Index: lib/xray/CMakeLists.txt =================================================================== --- lib/xray/CMakeLists.txt +++ lib/xray/CMakeLists.txt @@ -6,6 +6,7 @@ xray_flags.cc xray_interface.cc xray_log_interface.cc + xray_unique_string_table.cc xray_utils.cc) # Implementation files for all XRay modes. Index: lib/xray/tests/unit/CMakeLists.txt =================================================================== --- lib/xray/tests/unit/CMakeLists.txt +++ lib/xray/tests/unit/CMakeLists.txt @@ -2,3 +2,5 @@ buffer_queue_test.cc xray_unit_test_main.cc) add_xray_unittest(XRayFDRLoggingTest SOURCES fdr_logging_test.cc xray_unit_test_main.cc) +add_xray_unittest(XRayUniqueStringTableTest SOURCES + unique_string_table_test.cc xray_unit_test_main.cc) Index: lib/xray/tests/unit/unique_string_table_test.cc =================================================================== --- /dev/null +++ lib/xray/tests/unit/unique_string_table_test.cc @@ -0,0 +1,50 @@ +//===-- unique_string_table_test.cc ---------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a function call tracing system. +// +//===----------------------------------------------------------------------===// + +#include "xray/xray_interface.h" +#include "xray_unique_string_table.h" +#include "gtest/gtest.h" + +static __xray::UniqueStringTable *Strings = new __xray::UniqueStringTable(); + +static constexpr char HelloHello[] = "Hello Hello"; + +TEST(UniqueStringTableTest, Register) { + uint16_t Hello = Strings->AddOrLookupValue("Hello", 5); + uint16_t Different = Strings->AddOrLookupValue("Goodbye", 7); + EXPECT_NE(Hello, Different) << "Different strings should be different"; + uint16_t HelloAgain = Strings->AddOrLookupValue("Hello", 5); + EXPECT_EQ(HelloAgain, Hello) << "Should be same"; + uint16_t FromCharArray = Strings->AddOrLookupValue(HelloHello, 5); + EXPECT_EQ(FromCharArray, Hello) << "Should be same"; + uint16_t FromCharArraySubstring = Strings->AddOrLookupValue(HelloHello, 6); + EXPECT_NE(FromCharArraySubstring, Hello) << "Should be different"; + uint16_t FromCharArrayOffset = Strings->AddOrLookupValue(HelloHello + 6, 5); + EXPECT_EQ(FromCharArrayOffset, Hello) << "Should be same"; + uint16_t Aloha = Strings->AddOrLookupValue("Aloha", 5); + EXPECT_NE(Hello, Aloha) << "Should be different"; + EXPECT_NE(Different, Aloha) << "Should be different"; +} + +TEST(UniqueStringTableTest, ThroughPublicInterface) { + uint16_t One = __xray_register_event_type("One"); + uint16_t Two = __xray_register_event_type("2"); + uint16_t Million = __xray_register_event_type("One Million"); + uint16_t OneAgain = __xray_register_event_type("One"); + uint16_t MillionAgain = __xray_register_event_type("One Million"); + EXPECT_EQ(One, OneAgain) << "Equal types give different tags"; + EXPECT_EQ(Million, MillionAgain) << "Bad Accounting"; + EXPECT_NE(One, Two) << "1 != 2"; + EXPECT_NE(One, Million) << "1 != 1000000"; + EXPECT_NE(Two, Million) << "2 != 1000000"; +} Index: lib/xray/xray_interface.cc =================================================================== --- lib/xray/xray_interface.cc +++ lib/xray/xray_interface.cc @@ -22,11 +22,12 @@ #include #include -#include "sanitizer_common/sanitizer_addrhashmap.h" #include "sanitizer_common/sanitizer_common.h" +#include "sanitizer_common/sanitizer_libc.h" #include "xray_defs.h" #include "xray_flags.h" +#include "xray_unique_string_table.h" extern __sanitizer::SpinMutex XRayInstrMapMutex; extern __sanitizer::atomic_uint8_t XRayInitialized; @@ -66,16 +67,8 @@ // patching/unpatching. __sanitizer::atomic_uint8_t XRayPatching{0}; -struct TypeDescription { - uint32_t type_id; - std::size_t description_string_length; -}; - -using TypeDescriptorMapType = __sanitizer::AddrHashMap; -// An address map from immutable descriptors to type ids. -TypeDescriptorMapType TypeDescriptorAddressMap{}; - -__sanitizer::atomic_uint32_t TypeEventDescriptorCounter{0}; +// Keep track of unique strings registered as type ids. +::__xray::UniqueStringTable *const TypeEventTable = new UniqueStringTable(); // MProtectHelper is an RAII wrapper for calls to mprotect(...) that will // undo any successful mprotect(...) changes. This is used to make a page @@ -387,13 +380,11 @@ uint16_t __xray_register_event_type( const char *const event_type) noexcept XRAY_NEVER_INSTRUMENT { - TypeDescriptorMapType::Handle h(&TypeDescriptorAddressMap, (uptr)event_type); - if (h.created()) { - h->type_id = __sanitizer::atomic_fetch_add( - &TypeEventDescriptorCounter, 1, __sanitizer::memory_order_acq_rel); - h->description_string_length = strnlen(event_type, 1024); - } - return h->type_id; + auto length = __sanitizer::internal_strnlen(event_type, 1024); + // TODO: The current API contract is that the pointer registered must be + // valid for the length of the program. This may change with DSO support. + // We can copy the string to a buffer if we decide to pay that cost. + return TypeEventTable->AddOrLookupValue(event_type, length); } XRayPatchingStatus __xray_patch() XRAY_NEVER_INSTRUMENT { Index: lib/xray/xray_unique_string_table.h =================================================================== --- /dev/null +++ lib/xray/xray_unique_string_table.h @@ -0,0 +1,41 @@ +//===-- xray_unique_string_table.h ------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Defines a table of deduped strings and their unique identifiers. +// +//===----------------------------------------------------------------------===// + +#ifndef XRAY_UNIQUE_STRING_TABLE_H +#define XRAY_UNIQUE_STRING_TABLE_H + +#include +#include +namespace __xray { + +// Forward declare storage +struct UniqueStringTableImpl; + +class UniqueStringTable { +public: + UniqueStringTable(); + uint16_t AddOrLookupValue(const char *const content, std::size_t length); + // Meant to be constructed once as a static storage duration. + ~UniqueStringTable() = delete; + UniqueStringTable(const UniqueStringTable &) = delete; + UniqueStringTable &operator=(const UniqueStringTable &) = delete; + +private: + UniqueStringTableImpl *impl_; +}; + +} // namespace __xray + +#endif // XRAY_UNIQUE_STRING_TABLE_H Index: lib/xray/xray_unique_string_table.cc =================================================================== --- /dev/null +++ lib/xray/xray_unique_string_table.cc @@ -0,0 +1,126 @@ +//===-- xray_unique_string_table.cc -----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Defines a table of deduped strings and their unique identifiers. +// +//===----------------------------------------------------------------------===// + +#include "xray_unique_string_table.h" +#include "sanitizer_common/sanitizer_libc.h" +#include "sanitizer_common/sanitizer_mutex.h" +#include "sanitizer_common/sanitizer_vector.h" + +using __sanitizer::internal_strncmp; +using __sanitizer::SpinMutex; +using __sanitizer::SpinMutexLock; +using __sanitizer::Vector; + +namespace __xray { + +namespace { + +struct HashNode { + const char *const content; + const std::size_t length; + const uint32_t hash; +}; + +// Murmur3's 32 bit variant. strncmp for each node in linear search felt wrong, +// even though the intended use cases are not high cardinality. +uint32_t Murmur3_32(const char *key, std::size_t length) { + static constexpr uint32_t seed = 0x31415926; + static constexpr uint32_t c1 = 0xcc9e2d51; + static constexpr uint32_t c2 = 0x1b873593; + static constexpr uint32_t m = 5; + static constexpr uint32_t n = 0xe6546b64; + + uint32_t hash = seed; + const uint8_t *key_width_8 = reinterpret_cast(key); + if (length > 3) { + const uint32_t *key_width_32 = reinterpret_cast(key); + std::size_t len_width_32 = length >> 2; + do { + uint32_t k = *key_width_32++; + k *= c1; + k = (k << 15) | (k >> 17); + k *= c2; + hash ^= k; + hash = (hash << 13) | (hash >> 19); + hash = (hash * m) + n; + } while (--len_width_32); + key_width_8 = reinterpret_cast(key_width_32); + } + if (length & 3) { + std::size_t i = length & 3; + uint32_t remaining_bytes = 0; + // shift values into a uint32 + switch (i) { + case 3: + remaining_bytes |= key_width_8[2]; + remaining_bytes <<= 8; + case 2: + remaining_bytes |= key_width_8[1]; + remaining_bytes <<= 8; + case 1: + remaining_bytes |= key_width_8[0]; + } + // multiply, rotate, multiply the final block. + remaining_bytes *= c1; + remaining_bytes = (remaining_bytes << 15) | (remaining_bytes >> 17); + remaining_bytes *= c2; + hash ^= remaining_bytes; + } + + // Final avalanche stage. + hash ^= length; + hash ^= (hash >> 16); + hash *= 0x85ebca6b; + hash ^= (hash >> 13); + hash *= 0xc2b2ae35; + hash ^= (hash >> 16); + return hash; +} + +} // namespace + +struct UniqueStringTableImpl { + Vector Storage{}; + SpinMutex insertion_mutex{}; +}; + +UniqueStringTable::UniqueStringTable() : impl_(new UniqueStringTableImpl()) {} + +uint16_t UniqueStringTable::AddOrLookupValue(const char *const content, + std::size_t length) { + uint32_t HashValue = Murmur3_32(content, length); + // Linear search storage. + std::size_t PreInsertSize = 0; + for (; PreInsertSize < impl_->Storage.Size(); ++PreInsertSize) { + const HashNode &val = impl_->Storage[PreInsertSize]; + if (val.hash == HashValue && val.length == length && + internal_strncmp(content, val.content, length) == 0) + return PreInsertSize; + } + SpinMutexLock Lock(&impl_->insertion_mutex); + std::size_t SizeAfterLock = impl_->Storage.Size(); + // Search anything that may have been added after we acquire the lock. + for (std::size_t i = PreInsertSize; i < SizeAfterLock; ++i) { + const HashNode &Val = impl_->Storage[i]; + if (Val.hash == HashValue && Val.length == length && + internal_strncmp(content, Val.content, length) == 0) + return i; + } + HashNode Node{content, length, HashValue}; + impl_->Storage.PushBack(Node); + return impl_->Storage.Size() - 1; +} + +} // namespace __xray