diff --git a/llvm/include/llvm/Support/HashBuilder.h b/llvm/include/llvm/Support/HashBuilder.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Support/HashBuilder.h @@ -0,0 +1,333 @@ +//===- llvm/Support/HashBuilder.h - Convenient hashing interface-*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements an interface allowing to conveniently build hashes of +// various data types, without relying on the underlying hasher type to know +// about hashed data types. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_HASHBUILDER_H +#define LLVM_SUPPORT_HASHBUILDER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/type_traits.h" + +#include +#include + +namespace llvm { + +/// Interface to help hash various types through a hasher type. +/// +/// Via provided specializations of `update` and `updateRange` functions, +/// various types (e.g. `ArrayRef`, `StringRef`, etc.) can be hashed without +/// requiring any knowledge of hashed types from the hasher type. +/// +/// The only methods expected from the templated hasher type `HasherT` are: +/// * a default constructor +/// * void update(ArrayRef Data) +/// +/// From a user point of view, the interface provides the following: +/// * `template update(const T &Value)` +/// The `update` function implements hashing of various types. +/// * `template void updateRange(ItT First, ItT Last)` +/// The `updateRange` function is designed to aid hashing a range of values. +/// +/// User-defined `struct` types can participate in this interface by providing +/// an `updateHash` templated function. See the associated template +/// specialization for details. +/// +/// This interface does not impose requirements on the hasher +/// `update(const uint8_t *Ptr, size_t Size)` method. +/// We want to avoid collisions for variable-size types; for example for +/// ``` +/// builder.update({1}); +/// builder.update({2, 3}); +/// ``` +/// and +/// ``` +/// builder.update({1, 2}); +/// builder.update({3}); +/// ``` +/// . Thus, specializations of `update` and `updateHash` for variable-size types +/// must not assume that the hasher type considers the size as part of the +/// hash; they must explicitly update the hash with the size. See for example +/// specializations for `ArrayRef` and `StringRef`. +/// +/// Additionally, since types are eventually forwarded to the hasher's +/// `void update(ArrayRef)` method, endianness plays a role in the hash +/// computation (for example when computing `update((int)123)`). +/// Specifying endianness via the `Endianness` template parameter allows to +/// compute stable hash across platforms with different endianness. +template +class HashBuilder { +private: + /// Trait to indicate whether a type's bits can be hashed directly (after + /// endianness correction). + template + struct IsHashableData + : std::integral_constant::value || + std::is_floating_point::value> {}; + +public: + Optional OptionalHasher; + HasherT &Hasher; + + explicit HashBuilder(HasherT &Hasher) : Hasher(Hasher) {} + template + explicit HashBuilder(ArgTypes &&...Args) + : OptionalHasher(in_place, std::forward(Args)...), + Hasher(*OptionalHasher) {} + + template + using has_final_t = decltype(std::declval().final()); + template < + typename E = std::enable_if_t::value>> + auto final() { + return Hasher.final(); + } + + /// Implement hashing for hashable data types, e.g. integral, enum, or + /// floating-point values. + template + std::enable_if_t::value, HashBuilder &> update(T Value) { + Value = support::endian::byte_swap(Value, Endianness); + return updateBytes( + makeArrayRef(reinterpret_cast(&Value), sizeof(Value))); + } + + /// Support hashing `ArrayRef`. + /// + /// `Value.size()` is taken into account to ensure cases like + /// ``` + /// builder.update({1}); + /// builder.update({2, 3}); + /// ``` + /// and + /// ``` + /// builder.update({1, 2}); + /// builder.update({3}); + /// ``` + /// do not collide. + template HashBuilder &update(ArrayRef Value) { + return updateRange(Value); + } + + /// Support hashing `StringRef`. + /// + /// `Value.size()` is taken into account to ensure cases like + /// ``` + /// builder.update("a"); + /// builder.update("bc"); + /// ``` + /// and + /// ``` + /// builder.update("ab"); + /// builder.update("c"); + /// ``` + /// do not collide. + HashBuilder &update(StringRef Value) { return updateRange(Value); } + + /// Implement hashing for user-defined `struct`s. + /// + /// Any user-define `struct` can participate in hashing via `HashBuilder` by + /// providing a `updateHash` templated function. + /// + /// ``` + /// template + /// void updateHash(HashBuilder &HBuilder, + /// const UserDefinedStruct &Value); + /// ``` + /// + /// For example: + /// ``` + /// struct SimpleStruct { + /// char c; + /// int i; + /// }; + /// + /// template + /// void updateHash(HashBuilder &HBuilder, + /// const SimpleStruct &Value) { + /// HBuilder.update(Value.c); + /// HBuilder.update(Value.i); + /// } + /// ``` + /// + /// To avoid endianness issues, specializations of `updateHash` should + /// generally rely on exising `update` and `updateRange` functions. If + /// directly using `updateBytes`, an implementation must correctly handle + /// endianness. + /// + /// ``` + /// struct __attribute__ ((packed)) StructWithFastHash { + /// int I; + /// char C; + /// + /// // If possible, we want to hash both `I` and `C` in a single + /// `updateBytes` + /// // call for performance concerns. + /// template + /// friend void updateHash(HashBuilder &HBuilder, + /// const StructWithFastHash &Value) { + /// if (Endianness == support::endianness::native || + /// Endianness == support::endian::system_endianness()) { + /// HBuilder.updateBytes(makeArrayRef( + /// reinterpret_cast(&Value), sizeof(Value))); + /// } else { + /// // Rely on existing `update` methods to handle endianness. + /// HBuilder.update(Value.I); + /// HBuilder.update(Value.C); + /// } + /// } + /// }; + /// ``` + /// + /// To avoid collisions, specialization of `updateHash` for variable-size + /// types must take the size into account. + /// + /// For example: + /// ``` + /// struct CustomContainer { + /// private: + /// size_t Size; + /// int Elements[100]; + /// + /// public: + /// CustomContainer(size_t Size) : Size(Size) { + /// for (size_t I = 0; I != Size; ++I) + /// Elements[I] = I; + /// } + /// template + /// friend void updateHash(HashBuilder &HBuilder, + /// const CustomContainer &Value) { + /// if (Endianness == support::endianness::native || + /// Endianness == support::endian::system_endianness()) { + /// HBuilder.updateBytes(makeArrayRef( + /// reinterpret_cast(&Value.Size), + /// sizeof(Value.Size) + Value.Size * sizeof(Value.Elements[0]))); + /// } else { + /// // `updateRange` will take care of encoding the size. + /// HBuilder.updateRange(&Value.Elements[0], &Value.Elements[0] + + /// Value.Size); + /// } + /// } + /// }; + /// ``` + /// + template + using has_update_hash_t = + decltype(updateHash(std::declval(), std::declval())); + template + std::enable_if_t::value, HashBuilder &> + update(const T &Value) { + updateHash(*this, Value); + return *this; + } + + template + HashBuilder &update(const std::pair &Value) { + update(Value.first); + update(Value.second); + return *this; + } + + template + typename std::enable_if<(sizeof...(Ts) > 1), HashBuilder &>::type + update(const std::tuple &Arg) { + return updateTupleHelper(Arg, typename std::index_sequence_for()); + } + + /// A convenenience variadic helper. + /// It simply iterates over its arguments, in order. + /// ``` + /// update(Arg1, Arg2); + /// ``` + /// is equivalent to + /// ``` + /// update(Arg1) + /// update(Arg2) + /// ``` + template + typename std::enable_if<(sizeof...(Ts) > 1), HashBuilder &>::type + update(const Ts &...Args) { + std::tuple{(update(Args), Args)...}; + return *this; + } + + template + HashBuilder &updateRange(ForwardIteratorT First, ForwardIteratorT Last) { + return updateRangeImpl( + First, Last, + typename std::iterator_traits::iterator_category()); + } + + template HashBuilder &updateRange(const RangeT &Range) { + return updateRange(adl_begin(Range), adl_end(Range)); + } + + /// Update the hash with `Data`. This does not rely on `HasherT` to + /// take the size of `Data` into account. + /// + /// Users of this function should pay attention to respect endianness + /// contraints. + HashBuilder &updateBytes(ArrayRef Data) { + Hasher.update(Data); + return *this; + } + + /// Update the hash with `Data`. This does not rely on `HasherT` to + /// take the size of `Data` into account. + /// + /// Users of this function should pay attention to respect endianness + /// contraints. + HashBuilder &updateBytes(StringRef Data) { + return updateBytes(makeArrayRef( + reinterpret_cast(Data.data()), Data.size())); + } + +private: + template + HashBuilder &updateTupleHelper(const std::tuple &Arg, + std::index_sequence) { + std::tuple{ + (update(std::get(Arg)), std::get(Arg))...}; + return *this; + } + + // FIXME: Once available, specialize this function for `contiguous_iterator`s, + // and use it for `ArrayRef` and `StringRef`. + template + HashBuilder &updateRangeImpl(ForwardIteratorT First, ForwardIteratorT Last, + std::forward_iterator_tag) { + update(std::distance(First, Last)); + for (auto It = First; It != Last; ++It) + update(*It); + return *this; + } + + template + std::enable_if_t::value && + Endianness == support::endian::system_endianness(), + HashBuilder &> + updateRangeImpl(T *First, T *Last, std::forward_iterator_tag) { + update(std::distance(First, Last)); + updateBytes(makeArrayRef(reinterpret_cast(First), + (Last - First) * sizeof(T))); + return *this; + } +}; + +} // end namespace llvm + +#endif // LLVM_SUPPORT_HASHBUILDER_H diff --git a/llvm/unittests/Support/CMakeLists.txt b/llvm/unittests/Support/CMakeLists.txt --- a/llvm/unittests/Support/CMakeLists.txt +++ b/llvm/unittests/Support/CMakeLists.txt @@ -39,6 +39,7 @@ FormatVariadicTest.cpp FSUniqueIDTest.cpp GlobPatternTest.cpp + HashBuilderTest.cpp Host.cpp IndexedAccessorTest.cpp InstructionCostTest.cpp diff --git a/llvm/unittests/Support/HashBuilderTest.cpp b/llvm/unittests/Support/HashBuilderTest.cpp new file mode 100644 --- /dev/null +++ b/llvm/unittests/Support/HashBuilderTest.cpp @@ -0,0 +1,288 @@ +//===- llvm/unittest/Support/HashBuilderTest.cpp - HashBuilder unit tests -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/HashBuilder.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/MD5.h" +#include "llvm/Support/SHA1.h" +#include "llvm/Support/SHA256.h" +#include "gtest/gtest.h" + +#include +#include +#include +#include +#include + +// gtest utilities and macros rely on using a single type. So wrap both the +// hasher type and endianness. +template +struct HasherTAndEndianness { + using HasherT = _HasherT; + static constexpr llvm::support::endianness Endianness = _Endianness; +}; +using HasherTAndEndiannessToTest = + ::testing::Types, + HasherTAndEndianness, + HasherTAndEndianness, + HasherTAndEndianness, + HasherTAndEndianness, + HasherTAndEndianness, + HasherTAndEndianness, + HasherTAndEndianness, + HasherTAndEndianness>; +template class HashBuilderTest : public testing::Test {}; +TYPED_TEST_SUITE(HashBuilderTest, HasherTAndEndiannessToTest); + +template +using HashBuilder = llvm::HashBuilder; + +template +static auto computeHash(const Ts &...Args) { + return static_cast( + HashBuilder().update(Args...).final()); +} + +template +static auto computeHashForRange(const Ts &...Args) { + return static_cast( + HashBuilder().updateRange(Args...).final()); +} + +// All the test infrastructure relies on the variadic helpers. Test them first. +TYPED_TEST(HashBuilderTest, VariadicHelpers) { + { + HashBuilder HBuilder; + + HBuilder.update(100); + HBuilder.update(2.7); + HBuilder.update("string"); + + EXPECT_EQ(HBuilder.final(), computeHash(100, 2.7, "string")); + } + + { + HashBuilder HBuilder; + + std::vector Vec{100, 101, 102}; + HBuilder.updateRange(Vec); + + EXPECT_EQ(HBuilder.final(), computeHashForRange(Vec)); + } + + { + HashBuilder HBuilder; + + std::vector Vec{200, 201, 202}; + HBuilder.updateRange(Vec.begin(), Vec.end()); + + EXPECT_EQ(HBuilder.final(), + computeHashForRange(Vec.begin(), Vec.end())); + } +} + +template std::string hashRawData(T Data) { + using H = typename HE::HasherT; + constexpr auto E = HE::Endianness; + H Hasher; + T SwappedData = llvm::support::endian::byte_swap(Data, E); + Hasher.update(llvm::makeArrayRef( + reinterpret_cast(&SwappedData), sizeof(Data))); + return static_cast(Hasher.final()); +} + +TYPED_TEST(HashBuilderTest, ReferenceHashCheck) { + using HE = TypeParam; + + char C = 'c'; + int32_t I = 0x12345678; + uint64_t UI64 = static_cast(1) << 50; + enum TestEnumeration : uint16_t { TE_One = 1, TE_Two = 2 }; + TestEnumeration Enum = TE_Two; + double D = 123.0; + + EXPECT_EQ(hashRawData(C), computeHash(C)); + EXPECT_EQ(hashRawData(I), computeHash(I)); + EXPECT_EQ(hashRawData(UI64), computeHash(UI64)); + EXPECT_EQ(hashRawData(Enum), computeHash(Enum)); + EXPECT_EQ(hashRawData(D), computeHash(D)); +} + +struct SimpleStruct { + char C; + int I; +}; + +template +void updateHash(llvm::HashBuilder &HBuilder, + const SimpleStruct &Value) { + HBuilder.update(Value.C); + HBuilder.update(Value.I); +} + +struct StructWithoutCopyOrMove { + int I; + StructWithoutCopyOrMove() = default; + StructWithoutCopyOrMove(const StructWithoutCopyOrMove &) = delete; + StructWithoutCopyOrMove &operator=(const StructWithoutCopyOrMove &) = delete; + + template + friend void updateHash(llvm::HashBuilder &HBuilder, + const StructWithoutCopyOrMove &Value) { + HBuilder.update(Value.I); + } +}; + +struct __attribute__((packed)) StructWithFastHash { + int I; + char C; + + // If possible, we want to hash both `I` and `C` in a single `updateBytes` + // call for performance concerns. + template + friend void updateHash(llvm::HashBuilder &HBuilder, + const StructWithFastHash &Value) { + if (Endianness == llvm::support::endianness::native || + Endianness == llvm::support::endian::system_endianness()) { + HBuilder.updateBytes(llvm::makeArrayRef( + reinterpret_cast(&Value), sizeof(Value))); + } else { + // Rely on existing `update` methods to handle endianness. + HBuilder.update(Value.I); + HBuilder.update(Value.C); + } + } +}; + +struct CustomContainer { +private: + size_t Size; + int Elements[100]; + +public: + CustomContainer(size_t Size) : Size(Size) { + for (size_t I = 0; I != Size; ++I) + Elements[I] = I; + } + template + friend void updateHash(llvm::HashBuilder &HBuilder, + const CustomContainer &Value) { + if (Endianness == llvm::support::endianness::native || + Endianness == llvm::support::endian::system_endianness()) { + HBuilder.updateBytes(llvm::makeArrayRef( + reinterpret_cast(&Value.Size), + sizeof(Value.Size) + Value.Size * sizeof(Value.Elements[0]))); + } else { + HBuilder.updateRange(&Value.Elements[0], &Value.Elements[0] + Value.Size); + } + } +}; + +TYPED_TEST(HashBuilderTest, HashUserDefinedStruct) { + using HE = TypeParam; + EXPECT_EQ(computeHash(SimpleStruct{'c', 123}), computeHash('c', 123)); + EXPECT_EQ(computeHash(StructWithoutCopyOrMove{1}), computeHash(1)); + EXPECT_EQ(computeHash(StructWithFastHash{123, 'c'}), + computeHash(123, 'c')); + EXPECT_EQ(computeHash(CustomContainer(3)), + computeHash(static_cast(3), 0, 1, 2)); +} + +TYPED_TEST(HashBuilderTest, HashArrayRefHashableDataTypes) { + using HE = TypeParam; + llvm::ArrayRef Array{1, 20, 0x12345678}; + EXPECT_NE(computeHash(Array), computeHash(1, 20, 0x12345678)); + EXPECT_EQ(computeHash(Array), + computeHashForRange(Array.begin(), Array.end())); + EXPECT_EQ(computeHash(Array), + computeHashForRange(Array.data(), Array.data() + Array.size())); +} + +TYPED_TEST(HashBuilderTest, HashArrayRef) { + using HE = TypeParam; + llvm::ArrayRef Array123{1, 2, 3}; + llvm::ArrayRef Array12{1, 2}; + llvm::ArrayRef Array1{1}; + llvm::ArrayRef Array23{2, 3}; + llvm::ArrayRef Array3{3}; + llvm::ArrayRef ArrayEmpty{}; + + auto Hash123andEmpty = computeHash(Array123, ArrayEmpty); + auto Hash12And3 = computeHash(Array12, Array3); + auto Hash1And23 = computeHash(Array1, Array23); + auto HashEmptyAnd123 = computeHash(ArrayEmpty, Array123); + + EXPECT_NE(Hash123andEmpty, Hash12And3); + EXPECT_NE(Hash123andEmpty, Hash1And23); + EXPECT_NE(Hash123andEmpty, HashEmptyAnd123); + EXPECT_NE(Hash12And3, Hash1And23); + EXPECT_NE(Hash12And3, HashEmptyAnd123); + EXPECT_NE(Hash1And23, HashEmptyAnd123); +} + +TYPED_TEST(HashBuilderTest, HashArrayRefNonHashableDataTypes) { + using HE = TypeParam; + llvm::ArrayRef Array{{'a', 100}, {'b', 200}}; + EXPECT_NE(computeHash(Array), + computeHash(SimpleStruct{'a', 100}, SimpleStruct{'b', 200})); +} + +TYPED_TEST(HashBuilderTest, HashStringRef) { + using HE = TypeParam; + llvm::StringRef SEmpty(""); + llvm::StringRef S1("1"); + llvm::StringRef S12("12"); + llvm::StringRef S123("123"); + llvm::StringRef S23("23"); + llvm::StringRef S3("3"); + + auto Hash123andEmpty = computeHash(S123, SEmpty); + auto Hash12And3 = computeHash(S12, S3); + auto Hash1And23 = computeHash(S1, S23); + auto HashEmptyAnd123 = computeHash(SEmpty, S123); + + EXPECT_NE(Hash123andEmpty, Hash12And3); + EXPECT_NE(Hash123andEmpty, Hash1And23); + EXPECT_NE(Hash123andEmpty, HashEmptyAnd123); + EXPECT_NE(Hash12And3, Hash1And23); + EXPECT_NE(Hash12And3, HashEmptyAnd123); + EXPECT_NE(Hash1And23, HashEmptyAnd123); +} + +TYPED_TEST(HashBuilderTest, HashStdString) { + using HE = TypeParam; + EXPECT_EQ(computeHash(std::string("123")), + computeHash(llvm::StringRef("123"))); +} + +TYPED_TEST(HashBuilderTest, HashStdPairTuple) { + using HE = TypeParam; + EXPECT_EQ(computeHash(std::make_pair(1, "string")), + computeHash(1, "string")); + EXPECT_EQ(computeHash(std::make_tuple(1, "string", 3.0f)), + computeHash(1, "string", 3.0f)); + + std::pair Pair; + Pair.first.I = 1; + Pair.second = "string"; + std::tuple Tuple; + std::get<0>(Tuple).I = 1; + std::get<1>(Tuple) = "string"; + + EXPECT_EQ(computeHash(Pair), computeHash(Tuple)); +} + +TYPED_TEST(HashBuilderTest, HashRangeWithForwardIterator) { + using HE = TypeParam; + std::list List; + List.push_back(1); + List.push_back(2); + List.push_back(3); + EXPECT_NE(computeHashForRange(List), computeHash(1, 2, 3)); +}