diff --git a/llvm/include/llvm/Support/HashBuilder.h b/llvm/include/llvm/Support/HashBuilder.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Support/HashBuilder.h @@ -0,0 +1,255 @@ +//===- llvm/Support/HashBuilder.h - Convenient hashing interface-*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements an interface allowing to conveniently build hashes of +// various data types, without relying on the underlying hasher type to know +// about hashed data types. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_HASHBUILDER_H +#define LLVM_SUPPORT_HASHBUILDER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/type_traits.h" + +#include +#include + +namespace llvm { + +namespace hash_builder { +namespace detail { +/// Trait to indicate whether a type's bits can be hashed directly. +/// A type for which this trait is true will be treated like a bag of bytes by +/// `HashBuilder`. +template +struct IsHashableData + : std::integral_constant::value || + std::is_floating_point::value> {}; +} // namespace detail +} // namespace hash_builder + +/// Interface to help hash various types through a hasher type. +/// +/// Via provided specializations of `update` and `updateRange` functions, +/// various types (e.g. `ArrayRef`, `StringRef`, etc.) can be hashed without +/// requiring any knowledge of hashed types from the hasher type. +/// +/// The only methods expected from the templated hasher type `HasherT` are: +/// * a default constructor +/// * void update(ArrayRef Data) +/// +/// From a user point of view, the interface provides the following: +/// * `template update(const T &Value)` +/// The `update` function implements hashing of various types. +/// * `template void updateRange(ItT First, ItT Last)` +/// The `updateRange` function is designed to aid hashing a range of values. +/// +/// User-defined `struct` types can participate in this interface by providing +/// an `updateHash` templated function. See the associated template +/// specialization for details. +/// +/// This interface does not impose requirements on the hasher +/// `update(const uint8_t *Ptr, size_t Size)` method. +/// We want to avoid collisions for variable-size types; for example for +/// ``` +/// builder.update({1}); +/// builder.update({2, 3}); +/// ``` +/// and +/// ``` +/// builder.update({1, 2}); +/// builder.update({3}); +/// ``` +/// . Thus, specializations of `update` and `updateHash` for variable-size types +/// should not assume that the hasher type considers the size as part of the +/// hash; they should explicitly update the hash with the size. See for example +/// specializations for `ArrayRef` and `StringRef`. +template class HashBuilder { +public: + HasherT &Hasher; + + explicit HashBuilder(HasherT &Hasher) : Hasher(Hasher) {} + + template + using has_update_hash_t = + decltype(updateHash(std::declval(), std::declval())); + + /// Implement hashing for hashable data types, e.g. integral, enum, or + /// floating-point values. + /// + /// The value is simply treated like a bag of bytes. + template + std::enable_if_t::value> + update(T Value) { + updateBytes(reinterpret_cast(&Value), sizeof(Value)); + } + + /// Support hashing `ArrayRef`. + /// + /// `Value.size()` is taken into account to ensure cases like + /// ``` + /// builder.update({1}); + /// builder.update({2, 3}); + /// ``` + /// and + /// ``` + /// builder.update({1, 2}); + /// builder.update({3}); + /// ``` + /// do not collide. + template void update(ArrayRef Value) { updateRange(Value); } + + /// Support hashing `StringRef`. + /// + /// `Value.size()` is taken into account to ensure cases like + /// ``` + /// builder.update("a"); + /// builder.update("bc"); + /// ``` + /// and + /// ``` + /// builder.update("ab"); + /// builder.update("c"); + /// ``` + /// do not collide. + void update(StringRef Value) { updateRange(Value); } + + /// Implement hashing for user-defined `struct`s. + /// + /// Any user-define `struct` can participate in hashing via `HashBuilder` by + /// providing a `updateHash` templated function: + // + /// ``` + /// template + /// void updateHash(HashBuilder &HBuilder, + /// const UserDefinedStruct &Value); + /// ``` + /// + /// For example: + /// + /// ``` + /// struct SimpleStruct { char c; int i; }; + // + /// template + /// void updateHash(HashBuilder &HBuilder, + /// const SimpleStruct &Value) { + /// HBuilder.update(Value.c); + /// HBuilder.update(Value.i); + /// } + /// + /// struct StructWithPrivateMember { + /// public: + /// explicit StructWithPrivateMember(int i, float f) : i(i), f(f) {} + /// + /// int i; + /// private: + /// float f; + /// + /// template + /// friend void updateHash(HashBuilder &HBuilder, + /// const StructWithPrivateMember& Value) { + /// HBuilder.update(Value.i); + /// HBuilder.update(Value.f); + /// } + /// }; + /// ``` + template + std::enable_if_t::value> + update(const T &Value) { + updateHash(*this, Value); + } + + template + void update(const std::pair &Value) { + update(Value.first); + update(Value.second); + } + + template + typename std::enable_if<(sizeof...(Ts) > 1)>::type + update(const std::tuple &Arg) { + updateTupleHelper(Arg, typename std::index_sequence_for()); + } + + /// A convenenience variadic helper. + /// It simply iterates over its arguments, in order. + /// ``` + /// update(Arg1, Arg2); + /// ``` + /// is equivalent to + /// ``` + /// update(Arg1) + /// update(Arg2) + /// ``` + template + typename std::enable_if<(sizeof...(Ts) > 1)>::type update(const Ts &...Args) { + std::tuple Unused{(update(Args), Args)...}; + } + + template + void updateRange(ForwardIteratorT First, ForwardIteratorT Last) { + updateRangeImpl( + First, Last, + typename std::iterator_traits::iterator_category()); + } + + template void updateRange(const RangeT &Range) { + updateRange(adl_begin(Range), adl_end(Range)); + } + + /// Update the hash with `Data`. This does not rely on `HasherT` to + /// take the size of `Data` into account. + void updateBytes(ArrayRef Data) { Hasher.update(Data); } + + /// Update the hash with `Data`. This does not rely on `HasherT` to + /// take the size of `Data` into account. + void updateBytes(StringRef Data) { + updateBytes(ArrayRef( + reinterpret_cast(Data.data()), Data.size())); + } + + /// Update the hash with a "bag of bytes". This does not rely on `HasherT` to + /// take `Size` into account. + void updateBytes(const uint8_t *Ptr, size_t Size) { + updateBytes(ArrayRef(Ptr, Size)); + } + +private: + template + void updateTupleHelper(const std::tuple &Arg, + std::index_sequence) { + std::tuple{ + (update(std::get(Arg)), std::get(Arg))...}; + } + + // FIXME: Once available, specialize this function for `contiguous_iterator`s, + // and use it for `ArrayRef` and `StringRef`. + template + void updateRangeImpl(ForwardIteratorT First, ForwardIteratorT Last, + std::forward_iterator_tag) { + update(std::distance(First, Last)); + for (auto It = First; It != Last; ++It) + update(*It); + } + + template + std::enable_if_t::value> + updateRangeImpl(T *First, T *Last, std::forward_iterator_tag) { + update(std::distance(First, Last)); + updateBytes(reinterpret_cast(First), + (Last - First) * sizeof(T)); + } +}; + +} // end namespace llvm + +#endif // LLVM_SUPPORT_HASHBUILDER_H diff --git a/llvm/unittests/Support/CMakeLists.txt b/llvm/unittests/Support/CMakeLists.txt --- a/llvm/unittests/Support/CMakeLists.txt +++ b/llvm/unittests/Support/CMakeLists.txt @@ -39,6 +39,7 @@ FormatVariadicTest.cpp FSUniqueIDTest.cpp GlobPatternTest.cpp + HashBuilderTest.cpp Host.cpp IndexedAccessorTest.cpp InstructionCostTest.cpp diff --git a/llvm/unittests/Support/HashBuilderTest.cpp b/llvm/unittests/Support/HashBuilderTest.cpp new file mode 100644 --- /dev/null +++ b/llvm/unittests/Support/HashBuilderTest.cpp @@ -0,0 +1,262 @@ +//===- llvm/unittest/Support/HashBuilderTest.cpp - HashBuilder unit tests -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/HashBuilder.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/MD5.h" +#include "llvm/Support/SHA1.h" +#include "llvm/Support/SHA256.h" +#include "gtest/gtest.h" + +#include +#include +#include + +using namespace llvm; + +// Convenience helpers to concisely compute hashes. + +template static auto finalHash(HasherT &Hasher); + +template <> auto finalHash(MD5 &Hasher) { + MD5::MD5Result MD5Res; + Hasher.final(MD5Res); + return MD5Res.words(); +} + +template <> auto finalHash(SHA1 &Hasher) { return Hasher.final().str(); } + +template <> auto finalHash(SHA256 &Hasher) { + return Hasher.final().str(); +} + +template +static auto computeHash(const Ts &...Args) { + HasherT Hasher; + HashBuilder HBuilder(Hasher); + HBuilder.update(Args...); + return finalHash(HBuilder.Hasher); +} + +template +static auto computeHashForRange(const Ts &...Args) { + HasherT Hasher; + HashBuilder HBuilder(Hasher); + HBuilder.updateRange(Args...); + return finalHash(HBuilder.Hasher); +} + +template class BasicTest : public ::testing::Test { +public: + BasicTest() : HBuilder(Hasher) { + char C = 'c'; + int I = 1; + uint64_t UI64 = static_cast(1) << 50; + enum TestEnumeration { TE_One = 1, TE_Two = 2 }; + volatile int VI = 71; + const volatile int CVI = 72; + double D = 123.0; + + HBuilder.update(C); + HBuilder.update(I); + HBuilder.update(UI64); + HBuilder.update(TE_One); + HBuilder.update(VI); + HBuilder.update(CVI); + HBuilder.update(D); + } + + HasherT Hasher; + HashBuilder HBuilder; +}; + +using HashBuilderTestBasicMD5 = BasicTest; +TEST_F(HashBuilderTestBasicMD5, BasicMD5Check) { + using MD5Words = std::pair; + EXPECT_EQ(finalHash(Hasher), + MD5Words(0x7FE040B0B5311CE2ULL, 0xBE6766F347F32726ULL)); +} + +using HashBuilderTestBasicSHA1 = BasicTest; +TEST_F(HashBuilderTestBasicSHA1, BasicSHA1Check) { + EXPECT_EQ(finalHash(Hasher), "\xEC\x87\x19" + "a\xC8\xCA.o%S\xA3\xEE\xC9\x9B" + "a\xB1" + "EX<\xCB"); +} + +using HashBuilderTestBasicSHA256 = BasicTest; +TEST_F(HashBuilderTestBasicSHA256, BasicSHA256) { + EXPECT_EQ(finalHash(Hasher), + "urD\x16\xC4\x90\x90\xE7k\x8CH|J\x98\x1C\xAE\xEC~" + "\x1A\x88\xFF\xF8q\xD3\xCE-\x8A\x8D\xCF" + "F\xD6\x11"); +} + +template +class TypedHashBuilderTest : public testing::Test {}; +using HasherTypes = ::testing::Types; +TYPED_TEST_SUITE(TypedHashBuilderTest, HasherTypes); + +TYPED_TEST(TypedHashBuilderTest, HashVariadic) { + using H = TypeParam; + H Hasher1; + H Hasher2; + HashBuilder HBuilder1(Hasher1); + HashBuilder HBuilder2(Hasher2); + + HBuilder1.update(100); + HBuilder1.update(2.7); + HBuilder1.update("string"); + + HBuilder2.update(100, 2.7, "string"); + + EXPECT_EQ(finalHash(Hasher1), finalHash(Hasher2)); +} + +struct SimpleStruct { + char C; + int I; +}; + +template +void updateHash(HashBuilder &HBuilder, const SimpleStruct &Value) { + HBuilder.update(Value.C); + HBuilder.update(Value.I); +} + +struct StructWithPrivateMember { +public: + explicit StructWithPrivateMember(std::string S, float F) : S(S), F(F) {} + + std::string S; + +private: + float F; + + template + friend void updateHash(HashBuilder &HBuilder, + const StructWithPrivateMember &Value) { + HBuilder.update(Value.S); + HBuilder.update(Value.F); + } +}; + +struct StructWithoutCopyOrMove { + int I; + StructWithoutCopyOrMove() = default; + StructWithoutCopyOrMove(const StructWithoutCopyOrMove &) = delete; + StructWithoutCopyOrMove &operator=(const StructWithoutCopyOrMove &) = delete; + + template + friend void updateHash(HashBuilder &HBuilder, + const StructWithoutCopyOrMove &Value) { + HBuilder.update(Value.I); + } +}; + +TYPED_TEST(TypedHashBuilderTest, HashUserDefinedStruct) { + using H = TypeParam; + EXPECT_EQ(computeHash(SimpleStruct{'c', 123}), computeHash('c', 123)); + EXPECT_EQ(computeHash(StructWithPrivateMember{"s", 2.0f}), + computeHash("s", 2.0f)); + EXPECT_EQ(computeHash(StructWithoutCopyOrMove{1}), computeHash(1)); +} + +TYPED_TEST(TypedHashBuilderTest, HashArrayRefHashableDataTypes) { + using H = TypeParam; + ArrayRef Array{1, 20, 0x12345678}; + EXPECT_NE(computeHash(Array), computeHash(1, 20, 0x12345678)); + EXPECT_EQ(computeHash(Array), + computeHashForRange(Array.begin(), Array.end())); + EXPECT_EQ(computeHash(Array), + computeHashForRange(Array.data(), Array.data() + Array.size())); +} + +TYPED_TEST(TypedHashBuilderTest, HashArrayRef) { + using H = TypeParam; + ArrayRef Array123{1, 2, 3}; + ArrayRef Array12{1, 2}; + ArrayRef Array1{1}; + ArrayRef Array23{2, 3}; + ArrayRef Array3{3}; + ArrayRef ArrayEmpty{}; + + auto Hash123andEmpty = computeHash(Array123, ArrayEmpty); + auto Hash12And3 = computeHash(Array12, Array3); + auto Hash1And23 = computeHash(Array1, Array23); + auto HashEmptyAnd123 = computeHash(ArrayEmpty, Array123); + + EXPECT_NE(Hash123andEmpty, Hash12And3); + EXPECT_NE(Hash123andEmpty, Hash1And23); + EXPECT_NE(Hash123andEmpty, HashEmptyAnd123); + EXPECT_NE(Hash12And3, Hash1And23); + EXPECT_NE(Hash12And3, HashEmptyAnd123); + EXPECT_NE(Hash1And23, HashEmptyAnd123); +} + +TYPED_TEST(TypedHashBuilderTest, HashArrayRefNonHashableDataTypes) { + using H = TypeParam; + ArrayRef Array{{'a', 100}, {'b', 200}}; + EXPECT_NE(computeHash(Array), + computeHash(SimpleStruct{'a', 100}, SimpleStruct{'b', 200})); +} + +TYPED_TEST(TypedHashBuilderTest, HashStringRef) { + using H = TypeParam; + StringRef SEmpty(""); + StringRef S1("1"); + StringRef S12("12"); + StringRef S123("123"); + StringRef S23("23"); + StringRef S3("3"); + + auto Hash123andEmpty = computeHash(S123, SEmpty); + auto Hash12And3 = computeHash(S12, S3); + auto Hash1And23 = computeHash(S1, S23); + auto HashEmptyAnd123 = computeHash(SEmpty, S123); + + EXPECT_NE(Hash123andEmpty, Hash12And3); + EXPECT_NE(Hash123andEmpty, Hash1And23); + EXPECT_NE(Hash123andEmpty, HashEmptyAnd123); + EXPECT_NE(Hash12And3, Hash1And23); + EXPECT_NE(Hash12And3, HashEmptyAnd123); + EXPECT_NE(Hash1And23, HashEmptyAnd123); +} + +TYPED_TEST(TypedHashBuilderTest, HashStdString) { + using H = TypeParam; + EXPECT_EQ(computeHash(std::string("123")), + computeHash(StringRef("123"))); +} + +TYPED_TEST(TypedHashBuilderTest, HashStdPairTuple) { + using H = TypeParam; + EXPECT_EQ(computeHash(std::make_pair(1, "string")), + computeHash(1, "string")); + EXPECT_EQ(computeHash(std::make_tuple(1, "string", 3.0f)), + computeHash(1, "string", 3.0f)); + + std::pair Pair; + Pair.first.I = 1; + Pair.second = "string"; + std::tuple Tuple; + std::get<0>(Tuple).I = 1; + std::get<1>(Tuple) = "string"; + + EXPECT_EQ(computeHash(Pair), computeHash(Tuple)); +} + +TYPED_TEST(TypedHashBuilderTest, HashRangeWithForwardIterator) { + using H = TypeParam; + std::list List; + List.push_back(1); + List.push_back(2); + List.push_back(3); + EXPECT_NE(computeHashForRange(List), computeHash(1, 2, 3)); +}