diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt --- a/libc/src/__support/CMakeLists.txt +++ b/libc/src/__support/CMakeLists.txt @@ -200,3 +200,4 @@ add_subdirectory(threads) add_subdirectory(File) +add_subdirectory(swisstable) diff --git a/libc/src/__support/swisstable/CMakeLists.txt b/libc/src/__support/swisstable/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/libc/src/__support/swisstable/CMakeLists.txt @@ -0,0 +1,16 @@ +add_header_library( + dispatch + HDRS + dispatch.h + DEPENDS + .common + libc.src.__support.common +) + +add_header_library( + common + HDRS + common.h + DEPENDS + libc.src.__support.builtin_wrappers +) \ No newline at end of file diff --git a/libc/src/__support/swisstable/asimd.h b/libc/src/__support/swisstable/asimd.h new file mode 100644 --- /dev/null +++ b/libc/src/__support/swisstable/asimd.h @@ -0,0 +1,89 @@ +//===-- SwissTable ASIMD Specialization -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_LIBC_SUPPORT_SWISSTABLE_ASIMD_H +#define LLVM_LIBC_SUPPORT_SWISSTABLE_ASIMD_H + +#include "src/__support/swisstable/common.h" +#include + +namespace __llvm_libc::internal::swisstable { + +// According to abseil-cpp, ARM's 16-byte ASIMD operations may +// introduce too much latency. +// Reference: +// https://github.com/abseil/abseil-cpp/commit/6481443560a92d0a3a55a31807de0cd712cd4f88 +// With ASIMD, some bitmasks are not iteratale. This is because we +// do not want to clear the lower bits in each stride with extra +// `AND` operation. +using BitMask = BitMaskAdaptor; +using IteratableBitMask = IteratableBitMaskAdaptor; + +struct Group { + int8x8_t data; + + // Load a group of control words from an arbitary address. + static Group load(const void *__restrict addr) { + return {vld1_s8(static_cast(addr))}; + } + + // Load a group of control words from an aligned address. + // Notice that there is no difference of aligned/unaigned + // loading in ASIMD. + static Group aligned_load(const void *__restrict addr) { + + return {vld1_s8(static_cast(addr))}; + } + + // Store a group of control words to an aligned address. + void aligned_store(void *addr) const { + vst1_s8(static_cast(addr), data); + } + + // Find out the lanes equal to the given byte and return the bitmask + // with corresponding bits set. + IteratableBitMask match_byte(uint8_t byte) const { + auto duplicated = vdup_n_s8(byte); + auto cmp = vceq_s8(data, duplicated); + auto converted = vget_lane_u64(vreinterpret_u64_u8(cmp), 0); + return {converted & BitMask::MASK}; + } + + // Find out the lanes equal to EMPTY and return the bitmask + // with corresponding bits set. + BitMask mask_empty() const { + auto duplicated = vdup_n_s8(EMPTY); + auto cmp = vceq_s8(data, duplicated); + auto converted = vget_lane_u64(vreinterpret_u64_u8(cmp), 0); + return {converted}; + } + + // Find out the lanes equal to EMPTY or DELETE (highest bit set) and + // return the bitmask with corresponding bits set. + BitMask mask_empty_or_deleted() const { + auto converted = vget_lane_u64(vreinterpret_u64_s8(data), 0); + return {converted & BitMask::MASK}; + } + + // Find out the lanes reprensting full cells (without highest bit) and + // return the bitmask with corresponding bits set. + BitMask mask_full() const { return mask_empty_or_deleted().invert(); } + + // Performs the following transformation on all bytes in the group: + // - `EMPTY => EMPTY` + // - `DELETED => EMPTY` + // - `FULL => DELETED` + Group convert_special_to_empty_and_full_to_deleted() const { + auto dup = vdup_n_s8(0x80); + auto zero = vdup_n_s8(0x00); + auto special = vcgt_s8(zero, data); + return vorr_s8(dup, vreinterpret_s8_u8(special)); + } +}; + +} // namespace __llvm_libc::internal::swisstable +#endif // LLVM_LIBC_SUPPORT_SWISSTABLE_ASIMD_H diff --git a/libc/src/__support/swisstable/common.h b/libc/src/__support/swisstable/common.h new file mode 100644 --- /dev/null +++ b/libc/src/__support/swisstable/common.h @@ -0,0 +1,94 @@ + +//===-- SwissTable Common Definitions ---------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SUPPORT_SWISSTABLE_COMMON_H +#define LLVM_LIBC_SUPPORT_SWISSTABLE_COMMON_H + +#include +#include +#include +#include + +namespace __llvm_libc::internal::swisstable { +// special values of the control byte +using CtrlWord = uint8_t; +using HashWord = uint64_t; +constexpr static inline CtrlWord EMPTY = 0b11111111u; +constexpr static inline CtrlWord DELETED = 0b10000000u; + +// Implementations of the bitmask. +// The backend word type may vary depending on different microarchitectures. +// For example, with X86 SSE2, the bitmask is just the 16bit unsigned integer +// corresponding to lanes in a SIMD register. +template struct BitMaskAdaptor { + // A masked constant whose bits are all set. + constexpr static inline T MASK = WORD_MASK; + // A stride in the bitmask may use multiple bits. + constexpr static inline size_t STRIDE = WORD_STRIDE; + + T word; + + // Invert zeros and ones inside the word. + BitMaskAdaptor invert() const { return {static_cast(word ^ WORD_MASK)}; } + + // Operator helper to do bit manipulations. + BitMaskAdaptor operator^(T value) const { + return {static_cast(this->word ^ value)}; + } + + // Check if any bit is set inside the word. + bool any_bit_set() const { return word != 0; } + + // Count trailing zeros with respect to stride. + size_t trailing_zeros() const { return safe_ctz(word) / WORD_STRIDE; } + + // Count trailing zeros with respect to stride. (Assume the bitmask is none + // zero.) + size_t lowest_set_bit_nonzero() const { + return unsafe_ctz(word) / WORD_STRIDE; + } + + // Count leading zeros with respect to stride. + size_t leading_zeros() const { + // move the word to the highest location. + return safe_clz(word) / WORD_STRIDE; + } +}; + +template struct IteratableBitMaskAdaptor : public BitMask { + // Use the bitmask as an iterator. Update the state and return current lowest + // set bit. To make the bitmask iterable, each stride must contain 0 or exact + // 1 set bit. + void remove_lowest_bit() { + // Remove the last set bit inside the word: + // word = 011110100 (original value) + // word - 1 = 011110011 (invert all bits up to the last set bit) + // word & (word - 1) = 011110000 (value with the last bit cleared) + this->word = this->word & (this->word - 1); + } + using value_type = size_t; + using iterator = BitMask; + using const_iterator = BitMask; + size_t operator*() { return this->lowest_set_bit_nonzero(); } + IteratableBitMaskAdaptor &operator++() { + this->remove_lowest_bit(); + return *this; + } + IteratableBitMaskAdaptor begin() { return *this; } + IteratableBitMaskAdaptor end() { return {0}; } + bool operator==(const IteratableBitMaskAdaptor &other) { + return this->word == other.word; + } + bool operator!=(const IteratableBitMaskAdaptor &other) { + return this->word != other.word; + } +}; + +} // namespace __llvm_libc::internal::swisstable +#endif // LLVM_LIBC_SUPPORT_SWISSTABLE_COMMON_H diff --git a/libc/src/__support/swisstable/dispatch.h b/libc/src/__support/swisstable/dispatch.h new file mode 100644 --- /dev/null +++ b/libc/src/__support/swisstable/dispatch.h @@ -0,0 +1,19 @@ +//===-- SwissTable Platform Dispatch ----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_LIBC_SUPPORT_SWISSTABLE_DISPATCH_H +#define LLVM_LIBC_SUPPORT_SWISSTABLE_DISPATCH_H +#include "src/__support/architectures.h" +#if defined(LLVM_LIBC_ARCH_X86_64) && defined(__SSE2__) +#include "src/__support/swisstable/sse2.h" +#elif defined(LLVM_LIBC_ARCH_ANY_ARM) && defined(__ARM_NEON) && \ + defined(__ORDER_LITTLE_ENDIAN__) +#include "src/__support/swisstable/asimd.h" +#else +#include "src/__support/swisstable/generic.h" +#endif +#endif // LLVM_LIBC_SUPPORT_SWISSTABLE_DISPATCH_H diff --git a/libc/src/__support/swisstable/generic.h b/libc/src/__support/swisstable/generic.h new file mode 100644 --- /dev/null +++ b/libc/src/__support/swisstable/generic.h @@ -0,0 +1,153 @@ +//===-- SwissTable Generic Fallback -----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_LIBC_SUPPORT_SWISSTABLE_GENERIC_H +#define LLVM_LIBC_SUPPORT_SWISSTABLE_GENERIC_H +#include "src/__support/endian.h" +#include "src/__support/swisstable/common.h" +#include "src/string/memory_utils/memcpy_implementations.h" + +namespace __llvm_libc::internal::swisstable { + +// Helper function to spread a byte across the whole word. +// Accumutively, the procedure looks like: +// byte = 0x00000000000000ff +// byte | (byte << 8) = 0x000000000000ffff +// byte | (byte << 16) = 0x00000000ffffffff +// byte | (byte << 32) = 0xffffffffffffffff +constexpr static inline uintptr_t repeat(uintptr_t byte) { + size_t shift_amount = 8; + while (shift_amount < sizeof(uintptr_t) * 8) { + byte |= byte << shift_amount; + shift_amount <<= 1; + } + return byte; +} + +using BitMask = BitMaskAdaptor; +using IteratableBitMask = IteratableBitMaskAdaptor; + +struct Group { + uintptr_t data; + + // Load a group of control words from an arbitary address. + static Group load(const void *__restrict addr) { + uintptr_t data; + inline_memcpy(reinterpret_cast(&data), + static_cast(addr), sizeof(data)); + return {data}; + } + + // Load a group of control words from an aligned address. + // Notice that there is no difference of aligned/unaigned + // loading in ASIMD. + static Group aligned_load(const void *__restrict addr) { + uintptr_t data = *static_cast(addr); + return {data}; + } + + // Store a group of control words to an aligned address. + void aligned_store(void *addr) const { + *static_cast(addr) = data; + } + + // Find out the lanes equal to the given byte and return the bitmask + // with corresponding bits set. + IteratableBitMask match_byte(uint8_t byte) const { + // Given byte = 0x10, suppose the data is: + // + // data = [ 0x10 | 0x10 | 0x00 | 0xF1 | ... ] + // + // First, we compare the byte using XOR operation: + // + // [ 0x10 | 0x10 | 0x10 | 0x10 | ... ] (0) + // ^ [ 0x10 | 0x10 | 0x00 | 0xF1 | ... ] (1) + // = [ 0x00 | 0x00 | 0x10 | 0xE1 | ... ] (2) + // + // Notice that the equal positions will now be 0x00, so if we substract 0x01 + // respective to every byte, it will need to carry the substraction to upper + // bits (assume no carry from the hidden parts) + // [ 0x00 | 0x00 | 0x10 | 0xE1 | ... ] (2) + // - [ 0x01 | 0x01 | 0x01 | 0x01 | ... ] (3) + // = [ 0xFE | 0xFF | 0x0F | 0xE0 | ... ] (4) + // + // But there may be some bytes whose highest bit is already set after the + // xor operation. To rule out these positions, we AND them with the NOT + // of the XOR result: + // + // [ 0xFF | 0xFF | 0xEF | 0x1E | ... ] (5, NOT (2)) + // & [ 0xFE | 0xFF | 0x0F | 0xE0 | ... ] (4) + // = [ 0xFE | 0xFF | 0x0F | 0x10 | ... ] (6) + // + // To make the bitmask iteratable, only one bit can be set in each stride. + // So we AND each byte with 0x80 and keep only the highest bit: + // + // [ 0xFE | 0xFF | 0x0F | 0x10 | ... ] (6) + // & [ 0x80 | 0x80 | 0x80 | 0x80 | ... ] (7) + // = [ 0x80 | 0x80 | 0x00 | 0x00 | ... ] (8) + // + // However, there are possitbilites for false positives. For example, if the + // data is [ 0x10 | 0x11 | 0x10 | 0xF1 | ... ]. This only happens when there + // is a key only differs from the searched by the lowest bit. The claims + // are: + // + // - This never happens for `EMPTY` and `DELETED`, only full entries. + // - The check for key equality will catch these. + // - This only happens if there is at least 1 true match. + // - The chance of this happening is very low (< 1% chance per byte). + auto cmp = data ^ repeat(byte); + auto result = __llvm_libc::Endian::to_little_endian((cmp - repeat(0x01)) & + ~cmp & repeat(0x80)); + return {result}; + } + + // Find out the lanes equal to EMPTY and return the bitmask + // with corresponding bits set. + BitMask mask_empty() const { + // If the high bit is set, then the byte must be either: + // 1111_1111 (EMPTY) or 1000_0000 (DELETED). + // So we can just check if the top two bits are 1 by ANDing them. + return {__llvm_libc::Endian::to_little_endian(data & (data << 1) & + repeat(0x80))}; + } + + // Find out the lanes equal to EMPTY or DELETE (highest bit set) and + // return the bitmask with corresponding bits set. + BitMask mask_empty_or_deleted() const { + return {__llvm_libc::Endian::to_little_endian(data) & repeat(0x80)}; + } + + // Find out the lanes reprensting full cells (without highest bit) and + // return the bitmask with corresponding bits set. + BitMask mask_full() const { return mask_empty_or_deleted().invert(); } + + // Performs the following transformation on all bytes in the group: + // - `EMPTY => EMPTY` + // - `DELETED => EMPTY` + // - `FULL => DELETED` + Group convert_special_to_empty_and_full_to_deleted() const { + // Set the highest bit only for positions whose highest bit is not set + // before. + // + // data = [ 00000000 | 11111111 | 10000000 | ... ] + // ~data = [ 11111111 | 00000000 | 00000000 | ... ] + // full = [ 10000000 | 00000000 | 00000000 | ... ] + + auto full = (~data) & repeat(0x80); + + // Inverse the bit and convert `01111111` to `1000000` by + // add `1` in that bit. The carry will not propogate outside + // that byte: + // ~full = [ 01111111 | 11111111 | 11111111 | ... ] + // full >> 1 = [ 00000001 | 00000000 | 00000000 | ... ] + // result = [ 10000000 | 11111111 | 11111111 | ... ] + return {~full + (full >> 1)}; + } +}; + +} // namespace __llvm_libc::internal::swisstable +#endif // LLVM_LIBC_SUPPORT_SWISSTABLE_GENERIC_H diff --git a/libc/src/__support/swisstable/sse2.h b/libc/src/__support/swisstable/sse2.h new file mode 100644 --- /dev/null +++ b/libc/src/__support/swisstable/sse2.h @@ -0,0 +1,81 @@ +//===-- SwissTable SSE2 Specialization --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_LIBC_SUPPORT_SWISSTABLE_SSE2_H +#define LLVM_LIBC_SUPPORT_SWISSTABLE_SSE2_H + +#include "src/__support/swisstable/common.h" +#include + +namespace __llvm_libc::internal::swisstable { + +// With SSE2, every bitmask is iteratable: because +// we use single bit to encode the data. + +using BitMask = BitMaskAdaptor; +using IteratableBitMask = IteratableBitMaskAdaptor; + +struct Group { + __m128i data; + + // Load a group of control words from an arbitary address. + static Group load(const void *__restrict addr) { + return {_mm_loadu_si128(static_cast(addr))}; + } + + // Load a group of control words from an aligned address. + static Group aligned_load(const void *__restrict addr) { + return {_mm_load_si128(static_cast(addr))}; + } + + // Store a group of control words to an aligned address. + void aligned_store(void *addr) const { + _mm_store_si128(static_cast<__m128i *>(addr), data); + } + + // Find out the lanes equal to the given byte and return the bitmask + // with corresponding bits set. + IteratableBitMask match_byte(uint8_t byte) const { + auto cmp = _mm_cmpeq_epi8(data, _mm_set1_epi8(byte)); + auto bitmask = static_cast(_mm_movemask_epi8(cmp)); + return {bitmask}; + } + + // Find out the lanes equal to EMPTY and return the bitmask + // with corresponding bits set. + BitMask mask_empty() const { return match_byte(EMPTY); } + + // Find out the lanes equal to EMPTY or DELETE (highest bit set) and + // return the bitmask with corresponding bits set. + BitMask mask_empty_or_deleted() const { + auto bitmask = static_cast(_mm_movemask_epi8(data)); + return {bitmask}; + } + + // Find out the lanes reprensting full cells (without highest bit) and + // return the bitmask with corresponding bits set. + BitMask mask_full() const { return mask_empty_or_deleted().invert(); } + + // Performs the following transformation on all bytes in the group: + // - `EMPTY => EMPTY` + // - `DELETED => EMPTY` + // - `FULL => DELETED` + Group convert_special_to_empty_and_full_to_deleted() const { + // Recall that EMPTY and DELETED are distinguished from others in + // their highest bit. This makes them negative when considered as + // signed integer. And for full ones, highest bits are all zeros. + // So we first identify those lanes smaller than or equal to zero + // and then convert them by setting the highest bit of them. + __m128i zero = _mm_setzero_si128(); + __m128i special = _mm_cmpgt_epi8(zero, data); + __m128i converted = _mm_or_si128(special, _mm_set1_epi8(0x80u)); + return {converted}; + } +}; + +} // namespace __llvm_libc::internal::swisstable +#endif // LLVM_LIBC_SUPPORT_SWISSTABLE_SSE2_H diff --git a/libc/test/src/__support/CMakeLists.txt b/libc/test/src/__support/CMakeLists.txt --- a/libc/test/src/__support/CMakeLists.txt +++ b/libc/test/src/__support/CMakeLists.txt @@ -136,3 +136,4 @@ add_subdirectory(File) add_subdirectory(OSUtil) add_subdirectory(FPUtil) +add_subdirectory(swisstable) diff --git a/libc/test/src/__support/swisstable/CMakeLists.txt b/libc/test/src/__support/swisstable/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/libc/test/src/__support/swisstable/CMakeLists.txt @@ -0,0 +1,35 @@ +add_libc_unittest( + bitmask_test + SUITE + libc_support_unittests + SRCS + bitmask_test.cpp + DEPENDS + libc.src.__support.swisstable.dispatch +) + +add_libc_unittest( + group_test + SUITE + libc_support_unittests + SRCS + group_test.cpp + COMPILE_OPTIONS + -DSWISSTABLE_TEST_USE_GENERIC_GROUP=0 + DEPENDS + libc.src.string.memcmp + libc.src.__support.swisstable.dispatch +) + +add_libc_unittest( + group_generic_test + SUITE + libc_support_unittests + SRCS + group_test.cpp + COMPILE_OPTIONS + -DSWISSTABLE_TEST_USE_GENERIC_GROUP=1 + DEPENDS + libc.src.string.memcmp + libc.src.__support.swisstable.dispatch +) diff --git a/libc/test/src/__support/swisstable/bitmask_test.cpp b/libc/test/src/__support/swisstable/bitmask_test.cpp new file mode 100644 --- /dev/null +++ b/libc/test/src/__support/swisstable/bitmask_test.cpp @@ -0,0 +1,141 @@ +#include "src/__support/swisstable/dispatch.h" +#include "utils/UnitTest/LibcTest.h" +#include +#include +namespace __llvm_libc::internal::swisstable { + +using ShortBitMask = BitMaskAdaptor; +using LargeBitMask = BitMaskAdaptor; + +TEST(LlvmLibcSwissTableBitMask, Invert) { + { + auto x = ShortBitMask{0b10101010'10101010}; + ASSERT_EQ(x.invert().word, static_cast(0b010101010'1010101)); + } + + { + auto x = LargeBitMask{0x80808080'00000000}; + ASSERT_EQ(x.invert().word, static_cast(0x00000000'80808080)); + } +} + +TEST(LlvmLibcSwissTableBitMask, SingleBitStrideLeadingZeros) { + ASSERT_EQ(ShortBitMask{0x8808}.leading_zeros(), size_t{0}); + ASSERT_EQ(ShortBitMask{0x0808}.leading_zeros(), size_t{4}); + ASSERT_EQ(ShortBitMask{0x0408}.leading_zeros(), size_t{5}); + ASSERT_EQ(ShortBitMask{0x0208}.leading_zeros(), size_t{6}); + ASSERT_EQ(ShortBitMask{0x0108}.leading_zeros(), size_t{7}); + ASSERT_EQ(ShortBitMask{0x0008}.leading_zeros(), size_t{12}); + + uint16_t data = 0xffff; + for (size_t i = 0; i < 16; ++i) { + ASSERT_EQ(ShortBitMask{data}.leading_zeros(), i); + data >>= 1; + } +} + +TEST(LlvmLibcSwissTableBitMask, MultiBitStrideLeadingZeros) { + ASSERT_EQ(LargeBitMask{0x80808080'80808080}.leading_zeros(), size_t{0}); + ASSERT_EQ(LargeBitMask{0x00808080'80808080}.leading_zeros(), size_t{1}); + ASSERT_EQ(LargeBitMask{0x00000080'80808080}.leading_zeros(), size_t{3}); + + ASSERT_EQ(LargeBitMask{0x80808080'80808080}.leading_zeros(), size_t{0}); + ASSERT_EQ(LargeBitMask{0x01808080'80808080}.leading_zeros(), size_t{0}); + ASSERT_EQ(LargeBitMask{0x10000080'80808080}.leading_zeros(), size_t{0}); + ASSERT_EQ(LargeBitMask{0x0F808080'80808080}.leading_zeros(), size_t{0}); + + uint64_t data = 0xffff'ffff'ffff'ffff; + for (size_t i = 0; i < 8; ++i) { + for (size_t j = 0; j < 8; ++j) { + ASSERT_EQ(LargeBitMask{data}.leading_zeros(), i); + data >>= 1; + } + } +} + +TEST(LlvmLibcSwissTableBitMask, SingleBitStrideTrailingZeros) { + ASSERT_EQ(ShortBitMask{0x8808}.trailing_zeros(), size_t{3}); + ASSERT_EQ(ShortBitMask{0x0804}.trailing_zeros(), size_t{2}); + ASSERT_EQ(ShortBitMask{0x0802}.trailing_zeros(), size_t{1}); + ASSERT_EQ(ShortBitMask{0x0801}.trailing_zeros(), size_t{0}); + ASSERT_EQ(ShortBitMask{0x0800}.trailing_zeros(), size_t{11}); + ASSERT_EQ(ShortBitMask{0x1000}.trailing_zeros(), size_t{12}); + + uint16_t data = 0xffff; + for (size_t i = 0; i < 16; ++i) { + ASSERT_EQ(ShortBitMask{data}.trailing_zeros(), i); + data <<= 1; + } +} + +TEST(LlvmLibcSwissTableBitMask, MultiBitStrideTrailingZeros) { + ASSERT_EQ(LargeBitMask{0x80808080'80808080}.trailing_zeros(), size_t{0}); + ASSERT_EQ(LargeBitMask{0x80808080'80808000}.trailing_zeros(), size_t{1}); + ASSERT_EQ(LargeBitMask{0x80808080'80804000}.trailing_zeros(), size_t{1}); + + ASSERT_EQ(LargeBitMask{0x80808080'80800000}.trailing_zeros(), size_t{2}); + ASSERT_EQ(LargeBitMask{0x80808080'80000000}.trailing_zeros(), size_t{3}); + ASSERT_EQ(LargeBitMask{0x80808080'00000000}.trailing_zeros(), size_t{4}); + ASSERT_EQ(LargeBitMask{0x80808000'00000000}.trailing_zeros(), size_t{5}); + + uint64_t data = 0xffff'ffff'ffff'ffff; + for (size_t i = 0; i < 8; ++i) { + for (size_t j = 0; j < 8; ++j) { + ASSERT_EQ(LargeBitMask{data}.trailing_zeros(), i); + data <<= 1; + } + } +} + +TEST(LlvmLibcSwissTableBitMask, SingleBitStrideLowestSetBit) { + uint16_t data = 0xffff; + for (size_t i = 0; i < 16; ++i) { + if (ShortBitMask{data}.any_bit_set()) { + ASSERT_EQ(ShortBitMask{data}.lowest_set_bit_nonzero(), i); + data <<= 1; + } + } +} + +TEST(LlvmLibcSwissTableBitMask, MultiBitStrideLowestSetBit) { + uint64_t data = 0xffff'ffff'ffff'ffff; + for (size_t i = 0; i < 8; ++i) { + for (size_t j = 0; j < 8; ++j) { + if (LargeBitMask{data}.any_bit_set()) { + ASSERT_EQ(LargeBitMask{data}.lowest_set_bit_nonzero(), i); + data <<= 1; + } + } + } +} + +TEST(LlvmLibcSwissTableBitMask, SingleBitStrideIteration) { + using Iter = IteratableBitMaskAdaptor; + uint16_t data = 0xffff; + for (size_t i = 0; i < 16; ++i) { + Iter iter = {data}; + size_t j = i; + for (auto x : iter) { + ASSERT_EQ(x, j); + j++; + } + ASSERT_EQ(j, size_t{16}); + data <<= 1; + } +} + +TEST(LlvmLibcSwissTableBitMask, MultiBitStrideIteration) { + using Iter = IteratableBitMaskAdaptor; + uint64_t data = Iter::MASK; + for (size_t i = 0; i < 8; ++i) { + Iter iter = {data}; + size_t j = i; + for (auto x : iter) { + ASSERT_EQ(x, j); + j++; + } + ASSERT_EQ(j, size_t{8}); + data <<= Iter::STRIDE; + } +} +} // namespace __llvm_libc::internal::swisstable diff --git a/libc/test/src/__support/swisstable/group_test.cpp b/libc/test/src/__support/swisstable/group_test.cpp new file mode 100644 --- /dev/null +++ b/libc/test/src/__support/swisstable/group_test.cpp @@ -0,0 +1,120 @@ +#include +#if SWISSTABLE_TEST_USE_GENERIC_GROUP +#include "src/__support/swisstable/generic.h" +#define SWISSTABLE_TEST_SUITE(X) TEST(LlvmLibcSwissTableGroupGeneric, X) +#else +#include "src/__support/swisstable/dispatch.h" +#define SWISSTABLE_TEST_SUITE(X) TEST(LlvmLibcSwissTableGroup, X) +#endif +#include "src/string/memcmp.h" +#include "utils/UnitTest/LibcTest.h" +#include + +namespace __llvm_libc::internal::swisstable { + +struct ByteArray { + alignas(Group) uint8_t data[sizeof(Group) + 1]{}; +}; + +SWISSTABLE_TEST_SUITE(LoadStore) { + ByteArray array{}; + ByteArray compare{}; + for (size_t i = 0; i < sizeof(array.data); ++i) { + array.data[i] = 0xff; + auto group = Group::aligned_load(array.data); + group.aligned_store(compare.data); + EXPECT_EQ(__llvm_libc::memcmp(compare.data, array.data, sizeof(Group)), 0); + group = Group::load(&array.data[1]); + group.aligned_store(compare.data); + EXPECT_EQ(__llvm_libc::memcmp(compare.data, &array.data[1], sizeof(Group)), + 0); + array.data[i] = 0; + } +} + +SWISSTABLE_TEST_SUITE(Match) { + // Any pair of targets have bit differences not only at the lowest bit. + // No False positive. + uint8_t targets[4] = {0x00, 0x11, 0xFF, 0x0F}; + size_t count[4] = {0, 0, 0, 0}; + size_t appearance[4][sizeof(Group)]; + ByteArray array{}; + + uintptr_t random = reinterpret_cast(&array) ^ + reinterpret_cast(aligned_alloc); + + for (size_t i = 0; i < sizeof(Group); ++i) { + size_t choice = random % 4; + random /= 4; + array.data[i] = targets[choice]; + appearance[choice][count[choice]++] = i; + } + + for (size_t t = 0; t < sizeof(targets); ++t) { + auto bitmask = Group::aligned_load(array.data).match_byte(targets[t]); + for (size_t i = 0; i < count[t]; ++i) { + size_t iterated = 0; + for (size_t position : bitmask) { + ASSERT_EQ(appearance[t][iterated], position); + iterated++; + } + ASSERT_EQ(count[t], iterated); + } + } +} + +SWISSTABLE_TEST_SUITE(MaskEmpty) { + uint8_t values[4] = {0x00, 0x0F, DELETED, EMPTY}; + + for (size_t i = 0; i < sizeof(Group); ++i) { + ByteArray array{}; + + intptr_t random = reinterpret_cast(&array) ^ + reinterpret_cast(aligned_alloc); + ASSERT_FALSE(Group::aligned_load(array.data).mask_empty().any_bit_set()); + + array.data[i] = EMPTY; + for (size_t j = 0; j < sizeof(Group); ++j) { + if (i == j) + continue; + size_t sample_space = 3 + (j > i); + size_t choice = random % sample_space; + random /= sizeof(values); + array.data[j] = values[choice]; + } + + auto mask = Group::aligned_load(array.data).mask_empty(); + ASSERT_TRUE(mask.any_bit_set()); + ASSERT_EQ(mask.lowest_set_bit_nonzero(), i); + } +} + +SWISSTABLE_TEST_SUITE(MaskEmptyOrDeleted) { + uint8_t values[4] = {0x00, 0x0F, DELETED, EMPTY}; + for (size_t t = 2; t <= 3; ++t) { + for (size_t i = 0; i < sizeof(Group); ++i) { + ByteArray array{}; + + intptr_t random = reinterpret_cast(&array) ^ + reinterpret_cast(aligned_alloc); + ASSERT_FALSE(Group::aligned_load(array.data) + .mask_empty_or_deleted() + .any_bit_set()); + + array.data[i] = values[t]; + for (size_t j = 0; j < sizeof(Group); ++j) { + if (i == j) + continue; + size_t sample_space = 2 + 2 * (j > i); + size_t choice = random % sample_space; + random /= sizeof(values); + array.data[j] = values[choice]; + } + + auto mask = Group::aligned_load(array.data).mask_empty_or_deleted(); + ASSERT_TRUE(mask.any_bit_set()); + ASSERT_EQ(mask.lowest_set_bit_nonzero(), i); + } + } +} +} // namespace __llvm_libc::internal::swisstable